code.delx.au - gnu-emacs/blob - src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. End-of-line handlers
  29   6. C library functions
  30   7. Emacs Lisp library functions
  31   8. Post-amble
  32
  33 */
  34
  35 /*** GENERAL NOTE on CODING SYSTEM ***
  36
  37   Coding system is an encoding mechanism of one or more character
  38   sets.  Here's a list of coding systems which Emacs can handle.  When
  39   we say "decode", it means converting some other coding system to
  40   Emacs' internal format (emacs-internal), and when we say "encode",
  41   it means converting the coding system emacs-mule to some other
  42   coding system.
  43
  44   0. Emacs' internal format (emacs-mule)
  45
  46   Emacs itself holds a multi-lingual character in a buffer and a string
  47   in a special format.  Details are described in section 2.
  48
  49   1. ISO2022
  50
  51   The most famous coding system for multiple character sets.  X's
  52   Compound Text, various EUCs (Extended Unix Code), and coding
  53   systems used in Internet communication such as ISO-2022-JP are
  54   all variants of ISO2022.  Details are described in section 3.
  55
  56   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  57
  58   A coding system to encode character sets: ASCII, JISX0201, and
  59   JISX0208.  Widely used for PC's in Japan.  Details are described in
  60   section 4.
  61
  62   3. BIG5
  63
  64   A coding system to encode character sets: ASCII and Big5.  Widely
  65   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  66   described in section 4.  In this file, when we write "BIG5"
  67   (all uppercase), we mean the coding system, and when we write
  68   "Big5" (capitalized), we mean the character set.
  69
  70   4. Raw text
  71
  72   A coding system for a text containing random 8-bit code.  Emacs does
  73   no code conversion on such a text except for end-of-line format.
  74
  75   5. Other
  76
  77   If a user wants to read/write a text encoded in a coding system not
  78   listed above, he can supply a decoder and an encoder for it in CCL
  79   (Code Conversion Language) programs.  Emacs executes the CCL program
  80   while reading/writing.
  81
  82   Emacs represents a coding system by a Lisp symbol that has a property
  83   `coding-system'.  But, before actually using the coding system, the
  84   information about it is set in a structure of type `struct
  85   coding_system' for rapid processing.  See section 6 for more details.
  86
  87 */
  88
  89 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  90
  91   How end-of-line of a text is encoded depends on a system.  For
  92   instance, Unix's format is just one byte of `line-feed' code,
  93   whereas DOS's format is two-byte sequence of `carriage-return' and
  94   `line-feed' codes.  MacOS's format is usually one byte of
  95   `carriage-return'.
  96
  97   Since text characters encoding and end-of-line encoding are
  98   independent, any coding system described above can take
  99   any format of end-of-line.  So, Emacs has information of format of
 100   end-of-line in each coding-system.  See section 6 for more details.
 101
 102 */
 103
 104 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 105
 106   These functions check if a text between SRC and SRC_END is encoded
 107   in the coding system category XXX.  Each returns an integer value in
 108   which appropriate flag bits for the category XXX is set.  The flag
 109   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 110   template of these functions.  */
 111 #if 0
 112 int
 113 detect_coding_emacs_mule (src, src_end)
 114      unsigned char *src, *src_end;
 115 {
 116   ...
 117 }
 118 #endif
 119
 120 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 121
 122   These functions decode SRC_BYTES length text at SOURCE encoded in
 123   CODING to Emacs' internal format (emacs-mule).  The resulting text
 124   goes to a place pointed to by DESTINATION, the length of which
 125   should not exceed DST_BYTES.  These functions set the information of
 126   original and decoded texts in the members produced, produced_char,
 127   consumed, and consumed_char of the structure *CODING.
 128
 129   The return value is an integer (CODING_FINISH_XXX) indicating how
 130   the decoding finished.
 131
 132   DST_BYTES zero means that source area and destination area are
 133   overlapped, which means that we can produce a decoded text until it
 134   reaches at the head of not-yet-decoded source text.
 135
 136   Below is a template of these functions.  */
 137 #if 0
 138 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 139      struct coding_system *coding;
 140      unsigned char *source, *destination;
 141      int src_bytes, dst_bytes;
 142 {
 143   ...
 144 }
 145 #endif
 146
 147 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 148
 149   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 150   internal format (emacs-mule) to CODING.  The resulting text goes to
 151   a place pointed to by DESTINATION, the length of which should not
 152   exceed DST_BYTES.  These functions set the information of
 153   original and encoded texts in the members produced, produced_char,
 154   consumed, and consumed_char of the structure *CODING.
 155
 156   The return value is an integer (CODING_FINISH_XXX) indicating how
 157   the encoding finished.
 158
 159   DST_BYTES zero means that source area and destination area are
 160   overlapped, which means that we can produce a decoded text until it
 161   reaches at the head of not-yet-decoded source text.
 162
 163   Below is a template of these functions.  */
 164 #if 0
 165 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 166      struct coding_system *coding;
 167      unsigned char *source, *destination;
 168      int src_bytes, dst_bytes;
 169 {
 170   ...
 171 }
 172 #endif
 173
 174 /*** COMMONLY USED MACROS ***/
 175
 176 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 177    THREE_MORE_BYTES safely get one, two, and three bytes from the
 178    source text respectively.  If there are not enough bytes in the
 179    source, they jump to `label_end_of_loop'.  The caller should set
 180    variables `src' and `src_end' to appropriate areas in advance.  */
 181
 182 #define ONE_MORE_BYTE(c1)       \
 183   do {                          \
 184     if (src < src_end)          \
 185       c1 = *src++;              \
 186     else                        \
 187       goto label_end_of_loop;   \
 188   } while (0)
 189
 190 #define TWO_MORE_BYTES(c1, c2)  \
 191   do {                          \
 192     if (src + 1 < src_end)      \
 193       c1 = *src++, c2 = *src++; \
 194     else                        \
 195       goto label_end_of_loop;   \
 196   } while (0)
 197
 198 #define THREE_MORE_BYTES(c1, c2, c3)            \
 199   do {                                          \
 200     if (src + 2 < src_end)                      \
 201       c1 = *src++, c2 = *src++, c3 = *src++;    \
 202     else                                        \
 203       goto label_end_of_loop;                   \
 204   } while (0)
 205
 206 /* The following three macros DECODE_CHARACTER_ASCII,
 207    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 208    the multi-byte form of a character of each class at the place
 209    pointed by `dst'.  The caller should set the variable `dst' to
 210    point to an appropriate area and the variable `coding' to point to
 211    the coding-system of the currently decoding text in advance.  */
 212
 213 /* Decode one ASCII character C.  */
 214
 215 #define DECODE_CHARACTER_ASCII(c)                               \
 216   do {                                                          \
 217     if (COMPOSING_P (coding->composing))                        \
 218       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 219     else                                                        \
 220       {                                                         \
 221         *dst++ = (c);                                           \
 222         coding->produced_char++;                                \
 223       }                                                         \
 224   } while (0)
 225
 226 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 227    position-code is C.  */
 228
 229 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 230   do {                                                                  \
 231     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 232     if (COMPOSING_P (coding->composing))                                \
 233       *dst++ = leading_code + 0x20;                                     \
 234     else                                                                \
 235       {                                                                 \
 236         *dst++ = leading_code;                                          \
 237         coding->produced_char++;                                        \
 238       }                                                                 \
 239     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 240       *dst++ = leading_code;                                            \
 241     *dst++ = (c) | 0x80;                                                \
 242   } while (0)
 243
 244 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 245    position-codes are C1 and C2.  */
 246
 247 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 248   do {                                                  \
 249     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 250     *dst++ = (c2) | 0x80;                               \
 251   } while (0)
 252
 253 \f
 254 /*** 1. Preamble ***/
 255
 256 #include <stdio.h>
 257
 258 #ifdef emacs
 259
 260 #include <config.h>
 261 #include "lisp.h"
 262 #include "buffer.h"
 263 #include "charset.h"
 264 #include "ccl.h"
 265 #include "coding.h"
 266 #include "window.h"
 267
 268 #else  /* not emacs */
 269
 270 #include "mulelib.h"
 271
 272 #endif /* not emacs */
 273
 274 Lisp_Object Qcoding_system, Qeol_type;
 275 Lisp_Object Qbuffer_file_coding_system;
 276 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 277 Lisp_Object Qno_conversion, Qundecided;
 278 Lisp_Object Qcoding_system_history;
 279 Lisp_Object Qsafe_charsets;
 280
 281 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 282 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 283 Lisp_Object Qstart_process, Qopen_network_stream;
 284 Lisp_Object Qtarget_idx;
 285
 286 Lisp_Object Vselect_safe_coding_system_function;
 287
 288 /* Mnemonic character of each format of end-of-line.  */
 289 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 290 /* Mnemonic character to indicate format of end-of-line is not yet
 291    decided.  */
 292 int eol_mnemonic_undecided;
 293
 294 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 295    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 296 int system_eol_type;
 297
 298 #ifdef emacs
 299
 300 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 301
 302 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 303
 304 /* Coding system emacs-mule and raw-text are for converting only
 305    end-of-line format.  */
 306 Lisp_Object Qemacs_mule, Qraw_text;
 307
 308 /* Coding-systems are handed between Emacs Lisp programs and C internal
 309    routines by the following three variables.  */
 310 /* Coding-system for reading files and receiving data from process.  */
 311 Lisp_Object Vcoding_system_for_read;
 312 /* Coding-system for writing files and sending data to process.  */
 313 Lisp_Object Vcoding_system_for_write;
 314 /* Coding-system actually used in the latest I/O.  */
 315 Lisp_Object Vlast_coding_system_used;
 316
 317 /* A vector of length 256 which contains information about special
 318    Latin codes (espepcially for dealing with Microsoft code).  */
 319 Lisp_Object Vlatin_extra_code_table;
 320
 321 /* Flag to inhibit code conversion of end-of-line format.  */
 322 int inhibit_eol_conversion;
 323
 324 /* Coding system to be used to encode text for terminal display.  */
 325 struct coding_system terminal_coding;
 326
 327 /* Coding system to be used to encode text for terminal display when
 328    terminal coding system is nil.  */
 329 struct coding_system safe_terminal_coding;
 330
 331 /* Coding system of what is sent from terminal keyboard.  */
 332 struct coding_system keyboard_coding;
 333
 334 Lisp_Object Vfile_coding_system_alist;
 335 Lisp_Object Vprocess_coding_system_alist;
 336 Lisp_Object Vnetwork_coding_system_alist;
 337
 338 #endif /* emacs */
 339
 340 Lisp_Object Qcoding_category, Qcoding_category_index;
 341
 342 /* List of symbols `coding-category-xxx' ordered by priority.  */
 343 Lisp_Object Vcoding_category_list;
 344
 345 /* Table of coding categories (Lisp symbols).  */
 346 Lisp_Object Vcoding_category_table;
 347
 348 /* Table of names of symbol for each coding-category.  */
 349 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 350   "coding-category-emacs-mule",
 351   "coding-category-sjis",
 352   "coding-category-iso-7",
 353   "coding-category-iso-7-tight",
 354   "coding-category-iso-8-1",
 355   "coding-category-iso-8-2",
 356   "coding-category-iso-7-else",
 357   "coding-category-iso-8-else",
 358   "coding-category-big5",
 359   "coding-category-raw-text",
 360   "coding-category-binary"
 361 };
 362
 363 /* Table pointers to coding systems corresponding to each coding
 364    categories.  */
 365 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 366
 367 /* Flag to tell if we look up unification table on character code
 368    conversion.  */
 369 Lisp_Object Venable_character_unification;
 370 /* Standard unification table to look up on decoding (reading).  */
 371 Lisp_Object Vstandard_character_unification_table_for_decode;
 372 /* Standard unification table to look up on encoding (writing).  */
 373 Lisp_Object Vstandard_character_unification_table_for_encode;
 374
 375 Lisp_Object Qcharacter_unification_table;
 376 Lisp_Object Qcharacter_unification_table_for_decode;
 377 Lisp_Object Qcharacter_unification_table_for_encode;
 378
 379 /* Alist of charsets vs revision number.  */
 380 Lisp_Object Vcharset_revision_alist;
 381
 382 /* Default coding systems used for process I/O.  */
 383 Lisp_Object Vdefault_process_coding_system;
 384
 385 \f
 386 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 387
 388 /* Emacs' internal format for encoding multiple character sets is a
 389    kind of multi-byte encoding, i.e. characters are encoded by
 390    variable-length sequences of one-byte codes.  ASCII characters
 391    and control characters (e.g. `tab', `newline') are represented by
 392    one-byte sequences which are their ASCII codes, in the range 0x00
 393    through 0x7F.  The other characters are represented by a sequence
 394    of `base leading-code', optional `extended leading-code', and one
 395    or two `position-code's.  The length of the sequence is determined
 396    by the base leading-code.  Leading-code takes the range 0x80
 397    through 0x9F, whereas extended leading-code and position-code take
 398    the range 0xA0 through 0xFF.  See `charset.h' for more details
 399    about leading-code and position-code.
 400
 401    There's one exception to this rule.  Special leading-code
 402    `leading-code-composition' denotes that the following several
 403    characters should be composed into one character.  Leading-codes of
 404    components (except for ASCII) are added 0x20.  An ASCII character
 405    component is represented by a 2-byte sequence of `0xA0' and
 406    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 407    details of composite character.  Hence, we can summarize the code
 408    range as follows:
 409
 410    --- CODE RANGE of Emacs' internal format ---
 411    (character set)      (range)
 412    ASCII                0x00 .. 0x7F
 413    ELSE (1st byte)      0x80 .. 0x9F
 414         (rest bytes)    0xA0 .. 0xFF
 415    ---------------------------------------------
 416
 417   */
 418
 419 enum emacs_code_class_type emacs_code_class[256];
 420
 421 /* Go to the next statement only if *SRC is accessible and the code is
 422    greater than 0xA0.  */
 423 #define CHECK_CODE_RANGE_A0_FF  \
 424   do {                          \
 425     if (src >= src_end)         \
 426       goto label_end_of_switch; \
 427     else if (*src++ < 0xA0)     \
 428       return 0;                 \
 429   } while (0)
 430
 431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 432    Check if a text is encoded in Emacs' internal format.  If it is,
 433    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 434
 435 int
 436 detect_coding_emacs_mule (src, src_end)
 437      unsigned char *src, *src_end;
 438 {
 439   unsigned char c;
 440   int composing = 0;
 441
 442   while (src < src_end)
 443     {
 444       c = *src++;
 445
 446       if (composing)
 447         {
 448           if (c < 0xA0)
 449             composing = 0;
 450           else
 451             c -= 0x20;
 452         }
 453
 454       switch (emacs_code_class[c])
 455         {
 456         case EMACS_ascii_code:
 457         case EMACS_linefeed_code:
 458           break;
 459
 460         case EMACS_control_code:
 461           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 462             return 0;
 463           break;
 464
 465         case EMACS_invalid_code:
 466           return 0;
 467
 468         case EMACS_leading_code_composition: /* c == 0x80 */
 469           if (composing)
 470             CHECK_CODE_RANGE_A0_FF;
 471           else
 472             composing = 1;
 473           break;
 474
 475         case EMACS_leading_code_4:
 476           CHECK_CODE_RANGE_A0_FF;
 477           /* fall down to check it two more times ...  */
 478
 479         case EMACS_leading_code_3:
 480           CHECK_CODE_RANGE_A0_FF;
 481           /* fall down to check it one more time ...  */
 482
 483         case EMACS_leading_code_2:
 484           CHECK_CODE_RANGE_A0_FF;
 485           break;
 486
 487         default:
 488         label_end_of_switch:
 489           break;
 490         }
 491     }
 492   return CODING_CATEGORY_MASK_EMACS_MULE;
 493 }
 494
 495 \f
 496 /*** 3. ISO2022 handlers ***/
 497
 498 /* The following note describes the coding system ISO2022 briefly.
 499    Since the intention of this note is to help in understanding of
 500    the programs in this file, some parts are NOT ACCURATE or OVERLY
 501    SIMPLIFIED.  For the thorough understanding, please refer to the
 502    original document of ISO2022.
 503
 504    ISO2022 provides many mechanisms to encode several character sets
 505    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 506    all text is encoded by codes of less than 128.  This may make the
 507    encoded text a little bit longer, but the text gets more stability
 508    to pass through several gateways (some of them strip off the MSB).
 509
 510    There are two kinds of character set: control character set and
 511    graphic character set.  The former contains control characters such
 512    as `newline' and `escape' to provide control functions (control
 513    functions are provided also by escape sequences).  The latter
 514    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 515    two control character sets and many graphic character sets.
 516
 517    Graphic character sets are classified into one of the following
 518    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 519    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 520    bytes (DIMENSION) and the number of characters in one dimension
 521    (CHARS) of the set.  In addition, each character set is assigned an
 522    identification tag (called "final character" and denoted as <F>
 523    here after) which is unique in each class.  <F> of each character
 524    set is decided by ECMA(*) when it is registered in ISO.  Code range
 525    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 526
 527    Note (*): ECMA = European Computer Manufacturers Association
 528
 529    Here are examples of graphic character set [NAME(<F>)]:
 530         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 531         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 532         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 533         o DIMENSION2_CHARS96 -- none for the moment
 534
 535    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 536         C0 [0x00..0x1F] -- control character plane 0
 537         GL [0x20..0x7F] -- graphic character plane 0
 538         C1 [0x80..0x9F] -- control character plane 1
 539         GR [0xA0..0xFF] -- graphic character plane 1
 540
 541    A control character set is directly designated and invoked to C0 or
 542    C1 by an escape sequence.  The most common case is that ISO646's
 543    control character set is designated/invoked to C0 and ISO6429's
 544    control character set is designated/invoked to C1, and usually
 545    these designations/invocations are omitted in a coded text.  With
 546    7-bit environment, only C0 can be used, and a control character for
 547    C1 is encoded by an appropriate escape sequence to fit in the
 548    environment.  All control characters for C1 are defined the
 549    corresponding escape sequences.
 550
 551    A graphic character set is at first designated to one of four
 552    graphic registers (G0 through G3), then these graphic registers are
 553    invoked to GL or GR.  These designations and invocations can be
 554    done independently.  The most common case is that G0 is invoked to
 555    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 556    these invocations and designations are omitted in a coded text.
 557    With 7-bit environment, only GL can be used.
 558
 559    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 560    and 0x7F of GL area work as control characters SPACE and DEL
 561    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 562
 563    There are two ways of invocation: locking-shift and single-shift.
 564    With locking-shift, the invocation lasts until the next different
 565    invocation, whereas with single-shift, the invocation works only
 566    for the following character and doesn't affect locking-shift.
 567    Invocations are done by the following control characters or escape
 568    sequences.
 569
 570    ----------------------------------------------------------------------
 571    function             control char    escape sequence description
 572    ----------------------------------------------------------------------
 573    SI  (shift-in)               0x0F    none            invoke G0 to GL
 574    SO  (shift-out)              0x0E    none            invoke G1 to GL
 575    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 576    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 577    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 578    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 579    ----------------------------------------------------------------------
 580    The first four are for locking-shift.  Control characters for these
 581    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 582
 583    Designations are done by the following escape sequences.
 584    ----------------------------------------------------------------------
 585    escape sequence      description
 586    ----------------------------------------------------------------------
 587    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 588    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 589    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 590    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 591    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 592    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 593    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 594    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 595    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 596    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 597    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 598    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 599    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 600    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 601    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 602    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 603    ----------------------------------------------------------------------
 604
 605    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 606    of dimension 1, chars 94, and final character <F>, and etc.
 607
 608    Note (*): Although these designations are not allowed in ISO2022,
 609    Emacs accepts them on decoding, and produces them on encoding
 610    CHARS96 character set in a coding system which is characterized as
 611    7-bit environment, non-locking-shift, and non-single-shift.
 612
 613    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 614    '(' can be omitted.  We call this as "short-form" here after.
 615
 616    Now you may notice that there are a lot of ways for encoding the
 617    same multilingual text in ISO2022.  Actually, there exists many
 618    coding systems such as Compound Text (used in X's inter client
 619    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 620    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 621    localized platforms), and all of these are variants of ISO2022.
 622
 623    In addition to the above, Emacs handles two more kinds of escape
 624    sequences: ISO6429's direction specification and Emacs' private
 625    sequence for specifying character composition.
 626
 627    ISO6429's direction specification takes the following format:
 628         o CSI ']'      -- end of the current direction
 629         o CSI '0' ']'  -- end of the current direction
 630         o CSI '1' ']'  -- start of left-to-right text
 631         o CSI '2' ']'  -- start of right-to-left text
 632    The control character CSI (0x9B: control sequence introducer) is
 633    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 634
 635    Character composition specification takes the following format:
 636         o ESC '0' -- start character composition
 637         o ESC '1' -- end character composition
 638    Since these are not standard escape sequences of any ISO, the use
 639    of them for these meaning is restricted to Emacs only.  */
 640
 641 enum iso_code_class_type iso_code_class[256];
 642
 643 #define CHARSET_OK(idx, charset)                        \
 644   (coding_system_table[idx]->safe_charsets[charset]     \
 645    || (CODING_SPEC_ISO_REQUESTED_DESIGNATION            \
 646        (coding_system_table[idx], charset)              \
 647        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 648
 649 #define SHIFT_OUT_OK(idx) \
 650   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 651
 652 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 653    Check if a text is encoded in ISO2022.  If it is, returns an
 654    integer in which appropriate flag bits any of:
 655         CODING_CATEGORY_MASK_ISO_7
 656         CODING_CATEGORY_MASK_ISO_7_TIGHT
 657         CODING_CATEGORY_MASK_ISO_8_1
 658         CODING_CATEGORY_MASK_ISO_8_2
 659         CODING_CATEGORY_MASK_ISO_7_ELSE
 660         CODING_CATEGORY_MASK_ISO_8_ELSE
 661    are set.  If a code which should never appear in ISO2022 is found,
 662    returns 0.  */
 663
 664 int
 665 detect_coding_iso2022 (src, src_end)
 666      unsigned char *src, *src_end;
 667 {
 668   int mask = CODING_CATEGORY_MASK_ISO;
 669   int mask_found = 0;
 670   int reg[4], shift_out = 0;
 671   int c, c1, i, charset;
 672
 673   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 674   while (mask && src < src_end)
 675     {
 676       c = *src++;
 677       switch (c)
 678         {
 679         case ISO_CODE_ESC:
 680           if (src >= src_end)
 681             break;
 682           c = *src++;
 683           if (c >= '(' && c <= '/')
 684             {
 685               /* Designation sequence for a charset of dimension 1.  */
 686               if (src >= src_end)
 687                 break;
 688               c1 = *src++;
 689               if (c1 < ' ' || c1 >= 0x80
 690                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 691                 /* Invalid designation sequence.  Just ignore.  */
 692                 break;
 693               reg[(c - '(') % 4] = charset;
 694             }
 695           else if (c == '$')
 696             {
 697               /* Designation sequence for a charset of dimension 2.  */
 698               if (src >= src_end)
 699                 break;
 700               c = *src++;
 701               if (c >= '@' && c <= 'B')
 702                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 703                 reg[0] = charset = iso_charset_table[1][0][c];
 704               else if (c >= '(' && c <= '/')
 705                 {
 706                   if (src >= src_end)
 707                     break;
 708                   c1 = *src++;
 709                   if (c1 < ' ' || c1 >= 0x80
 710                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 711                     /* Invalid designation sequence.  Just ignore.  */
 712                     break;
 713                   reg[(c - '(') % 4] = charset;
 714                 }
 715               else
 716                 /* Invalid designation sequence.  Just ignore.  */
 717                 break;
 718             }
 719           else if (c == 'N' || c == 'n')
 720             {
 721               if (shift_out == 0
 722                   && (reg[1] >= 0
 723                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 724                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 725                 {
 726                   /* Locking shift out.  */
 727                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 728                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 729                   shift_out = 1;
 730                 }
 731               break;
 732             }
 733           else if (c == 'O' || c == 'o')
 734             {
 735               if (shift_out == 1)
 736                 {
 737                   /* Locking shift in.  */
 738                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 739                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 740                   shift_out = 0;
 741                 }
 742               break;
 743             }
 744           else if (c == '0' || c == '1' || c == '2')
 745             /* Start/end composition.  Just ignore.  */
 746             break;
 747           else
 748             /* Invalid escape sequence.  Just ignore.  */
 749             break;
 750
 751           /* We found a valid designation sequence for CHARSET.  */
 752           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 753           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 754             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 755           else
 756             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 757           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 758             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 759           else
 760             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 761           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 762             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 763           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 764             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 765           break;
 766
 767         case ISO_CODE_SO:
 768           if (shift_out == 0
 769               && (reg[1] >= 0
 770                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 771                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 772             {
 773               /* Locking shift out.  */
 774               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 775               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 776             }
 777           break;
 778
 779         case ISO_CODE_SI:
 780           if (shift_out == 1)
 781             {
 782               /* Locking shift in.  */
 783               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 784               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 785             }
 786           break;
 787
 788         case ISO_CODE_CSI:
 789         case ISO_CODE_SS2:
 790         case ISO_CODE_SS3:
 791           {
 792             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 793
 794             if (c != ISO_CODE_CSI)
 795               {
 796                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 797                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 798                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 799                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 800                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 801                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 802               }
 803             if (VECTORP (Vlatin_extra_code_table)
 804                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 805               {
 806                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 807                     & CODING_FLAG_ISO_LATIN_EXTRA)
 808                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 809                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 810                     & CODING_FLAG_ISO_LATIN_EXTRA)
 811                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 812               }
 813             mask &= newmask;
 814             mask_found |= newmask;
 815           }
 816           break;
 817
 818         default:
 819           if (c < 0x80)
 820             break;
 821           else if (c < 0xA0)
 822             {
 823               if (VECTORP (Vlatin_extra_code_table)
 824                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 825                 {
 826                   int newmask = 0;
 827
 828                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 829                       & CODING_FLAG_ISO_LATIN_EXTRA)
 830                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 831                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 832                       & CODING_FLAG_ISO_LATIN_EXTRA)
 833                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 834                   mask &= newmask;
 835                   mask_found |= newmask;
 836                 }
 837               else
 838                 return 0;
 839             }
 840           else
 841             {
 842               unsigned char *src_begin = src;
 843
 844               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 845                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 846               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 847               while (src < src_end && *src >= 0xA0)
 848                 src++;
 849               if ((src - src_begin - 1) & 1 && src < src_end)
 850                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 851               else
 852                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 853             }
 854           break;
 855         }
 856     }
 857
 858   return (mask & mask_found);
 859 }
 860
 861 /* Decode a character of which charset is CHARSET and the 1st position
 862    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 863    fetched from SRC and set to C2.  If CHARSET is negative, it means
 864    that we are decoding ill formed text, and what we can do is just to
 865    read C1 as is.  */
 866
 867 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 868   do {                                                                  \
 869     int c_alt, charset_alt = (charset);                                 \
 870     if (COMPOSING_HEAD_P (coding->composing))                           \
 871       {                                                                 \
 872         *dst++ = LEADING_CODE_COMPOSITION;                              \
 873         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 874           /* To tell composition rules are embeded.  */                 \
 875           *dst++ = 0xFF;                                                \
 876         coding->composing += 2;                                         \
 877       }                                                                 \
 878     if ((charset) >= 0)                                                 \
 879       {                                                                 \
 880         if (CHARSET_DIMENSION (charset) == 2)                           \
 881           {                                                             \
 882             ONE_MORE_BYTE (c2);                                         \
 883             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 884                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 885               {                                                         \
 886                 src--;                                                  \
 887                 c2 = ' ';                                               \
 888               }                                                         \
 889           }                                                             \
 890         if (!NILP (unification_table)                                   \
 891             && ((c_alt = unify_char (unification_table,                 \
 892                                      -1, (charset), c1, c2)) >= 0))     \
 893           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 894       }                                                                 \
 895     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 896       DECODE_CHARACTER_ASCII (c1);                                      \
 897     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 898       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 899     else                                                                \
 900       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 901     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 902       /* To tell a composition rule follows.  */                        \
 903       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 904   } while (0)
 905
 906 /* Set designation state into CODING.  */
 907 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 908   do {                                                                     \
 909     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 910                                      make_number (chars),                  \
 911                                      make_number (final_char));            \
 912     if (charset >= 0                                                       \
 913         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 914             || coding->safe_charsets[charset]))                            \
 915       {                                                                    \
 916         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 917             && reg == 0                                                    \
 918             && charset == CHARSET_ASCII)                                   \
 919           {                                                                \
 920             /* We should insert this designation sequence as is so         \
 921                that it is surely written back to a file.  */               \
 922             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 923             goto label_invalid_code;                                       \
 924           }                                                                \
 925         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 926         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 927             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 928           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 929         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 930       }                                                                    \
 931     else                                                                   \
 932       {                                                                    \
 933         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 934         goto label_invalid_code;                                           \
 935       }                                                                    \
 936   } while (0)
 937
 938 /* Check if the current composing sequence contains only valid codes.
 939    If the composing sequence doesn't end before SRC_END, return -1.
 940    Else, if it contains only valid codes, return 0.
 941    Else return the length of the composing sequence.  */
 942
 943 int check_composing_code (coding, src, src_end)
 944      struct coding_system *coding;
 945      unsigned char *src, *src_end;
 946 {
 947   unsigned char *src_start = src;
 948   int invalid_code_found = 0;
 949   int charset, c, c1, dim;
 950
 951   while (src < src_end)
 952     {
 953       if (*src++ != ISO_CODE_ESC) continue;
 954       if (src >= src_end) break;
 955       if ((c = *src++) == '1') /* end of compsition */
 956         return (invalid_code_found ? src - src_start : 0);
 957       if (src + 2 >= src_end) break;
 958       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 959         invalid_code_found = 1;
 960       else
 961         {
 962           dim = 0;
 963           if (c == '$')
 964             {
 965               dim = 1;
 966               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 967             }
 968           if (c >= '(' && c <= '/')
 969             {
 970               c1 = *src++;
 971               if ((c1 < ' ' || c1 >= 0x80)
 972                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 973                   || ! coding->safe_charsets[charset]
 974                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 975                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 976                 invalid_code_found = 1;
 977             }
 978           else
 979             invalid_code_found = 1;
 980         }
 981     }
 982   return ((coding->mode & CODING_MODE_LAST_BLOCK) ? src_end - src_start : -1);
 983 }
 984
 985 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 986
 987 int
 988 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 989      struct coding_system *coding;
 990      unsigned char *source, *destination;
 991      int src_bytes, dst_bytes;
 992 {
 993   unsigned char *src = source;
 994   unsigned char *src_end = source + src_bytes;
 995   unsigned char *dst = destination;
 996   unsigned char *dst_end = destination + dst_bytes;
 997   /* Since the maximum bytes produced by each loop is 7, we subtract 6
 998      from DST_END to assure that overflow checking is necessary only
 999      at the head of loop.  */
1000   unsigned char *adjusted_dst_end = dst_end - 6;
1001   int charset;
1002   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1003   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1004   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1005   Lisp_Object unification_table
1006     = coding->character_unification_table_for_decode;
1007   int result = CODING_FINISH_NORMAL;
1008
1009   if (!NILP (Venable_character_unification) && NILP (unification_table))
1010     unification_table = Vstandard_character_unification_table_for_decode;
1011
1012   coding->produced_char = 0;
1013   coding->fake_multibyte = 0;
1014   while (src < src_end && (dst_bytes
1015                            ? (dst < adjusted_dst_end)
1016                            : (dst < src - 6)))
1017     {
1018       /* SRC_BASE remembers the start position in source in each loop.
1019          The loop will be exited when there's not enough source text
1020          to analyze long escape sequence or 2-byte code (within macros
1021          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1022          to SRC_BASE before exiting.  */
1023       unsigned char *src_base = src;
1024       int c1 = *src++, c2;
1025
1026       switch (iso_code_class [c1])
1027         {
1028         case ISO_0x20_or_0x7F:
1029           if (!coding->composing
1030               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1031             {
1032               /* This is SPACE or DEL.  */
1033               *dst++ = c1;
1034               coding->produced_char++;
1035               break;
1036             }
1037           /* This is a graphic character, we fall down ...  */
1038
1039         case ISO_graphic_plane_0:
1040           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1041             {
1042               /* This is a composition rule.  */
1043               *dst++ = c1 | 0x80;
1044               coding->composing = COMPOSING_WITH_RULE_TAIL;
1045             }
1046           else
1047             DECODE_ISO_CHARACTER (charset0, c1);
1048           break;
1049
1050         case ISO_0xA0_or_0xFF:
1051           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1052               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1053             goto label_invalid_code;
1054           /* This is a graphic character, we fall down ... */
1055
1056         case ISO_graphic_plane_1:
1057           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1058             goto label_invalid_code;
1059           else
1060             DECODE_ISO_CHARACTER (charset1, c1);
1061           break;
1062
1063         case ISO_control_code:
1064           /* All ISO2022 control characters in this class have the
1065              same representation in Emacs internal format.  */
1066           if (c1 == '\n'
1067               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1068               && (coding->eol_type == CODING_EOL_CR
1069                   || coding->eol_type == CODING_EOL_CRLF))
1070             {
1071               result = CODING_FINISH_INCONSISTENT_EOL;
1072               goto label_end_of_loop_2;
1073             }
1074           *dst++ = c1;
1075           coding->produced_char++;
1076           break;
1077
1078         case ISO_carriage_return:
1079           if (coding->eol_type == CODING_EOL_CR)
1080             *dst++ = '\n';
1081           else if (coding->eol_type == CODING_EOL_CRLF)
1082             {
1083               ONE_MORE_BYTE (c1);
1084               if (c1 == ISO_CODE_LF)
1085                 *dst++ = '\n';
1086               else
1087                 {
1088                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1089                     {
1090                       result = CODING_FINISH_INCONSISTENT_EOL;
1091                       goto label_end_of_loop_2;
1092                     }
1093                   src--;
1094                   *dst++ = '\r';
1095                 }
1096             }
1097           else
1098             *dst++ = c1;
1099           coding->produced_char++;
1100           break;
1101
1102         case ISO_shift_out:
1103           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1104               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1105             goto label_invalid_code;
1106           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1107           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1108           break;
1109
1110         case ISO_shift_in:
1111           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1112             goto label_invalid_code;
1113           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1114           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1115           break;
1116
1117         case ISO_single_shift_2_7:
1118         case ISO_single_shift_2:
1119           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1120             goto label_invalid_code;
1121           /* SS2 is handled as an escape sequence of ESC 'N' */
1122           c1 = 'N';
1123           goto label_escape_sequence;
1124
1125         case ISO_single_shift_3:
1126           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1127             goto label_invalid_code;
1128           /* SS2 is handled as an escape sequence of ESC 'O' */
1129           c1 = 'O';
1130           goto label_escape_sequence;
1131
1132         case ISO_control_sequence_introducer:
1133           /* CSI is handled as an escape sequence of ESC '[' ...  */
1134           c1 = '[';
1135           goto label_escape_sequence;
1136
1137         case ISO_escape:
1138           ONE_MORE_BYTE (c1);
1139         label_escape_sequence:
1140           /* Escape sequences handled by Emacs are invocation,
1141              designation, direction specification, and character
1142              composition specification.  */
1143           switch (c1)
1144             {
1145             case '&':           /* revision of following character set */
1146               ONE_MORE_BYTE (c1);
1147               if (!(c1 >= '@' && c1 <= '~'))
1148                 goto label_invalid_code;
1149               ONE_MORE_BYTE (c1);
1150               if (c1 != ISO_CODE_ESC)
1151                 goto label_invalid_code;
1152               ONE_MORE_BYTE (c1);
1153               goto label_escape_sequence;
1154
1155             case '$':           /* designation of 2-byte character set */
1156               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1157                 goto label_invalid_code;
1158               ONE_MORE_BYTE (c1);
1159               if (c1 >= '@' && c1 <= 'B')
1160                 {       /* designation of JISX0208.1978, GB2312.1980,
1161                                    or JISX0208.1980 */
1162                   DECODE_DESIGNATION (0, 2, 94, c1);
1163                 }
1164               else if (c1 >= 0x28 && c1 <= 0x2B)
1165                 {       /* designation of DIMENSION2_CHARS94 character set */
1166                   ONE_MORE_BYTE (c2);
1167                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1168                 }
1169               else if (c1 >= 0x2C && c1 <= 0x2F)
1170                 {       /* designation of DIMENSION2_CHARS96 character set */
1171                   ONE_MORE_BYTE (c2);
1172                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1173                 }
1174               else
1175                 goto label_invalid_code;
1176               break;
1177
1178             case 'n':           /* invocation of locking-shift-2 */
1179               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1180                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1181                 goto label_invalid_code;
1182               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1183               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1184               break;
1185
1186             case 'o':           /* invocation of locking-shift-3 */
1187               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1188                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1189                 goto label_invalid_code;
1190               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1191               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1192               break;
1193
1194             case 'N':           /* invocation of single-shift-2 */
1195               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1196                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1197                 goto label_invalid_code;
1198               ONE_MORE_BYTE (c1);
1199               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1200               DECODE_ISO_CHARACTER (charset, c1);
1201               break;
1202
1203             case 'O':           /* invocation of single-shift-3 */
1204               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1205                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1206                 goto label_invalid_code;
1207               ONE_MORE_BYTE (c1);
1208               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1209               DECODE_ISO_CHARACTER (charset, c1);
1210               break;
1211
1212             case '0': case '2': /* start composing */
1213               /* Before processing composing, we must be sure that all
1214                  characters being composed are supported by CODING.
1215                  If not, we must give up composing and insert the
1216                  bunch of codes for composing as is without decoding.  */
1217               {
1218                 int result1;
1219
1220                 result1 = check_composing_code (coding, src, src_end);
1221                 if (result1 == 0)
1222                   coding->composing = (c1 == '0'
1223                                        ? COMPOSING_NO_RULE_HEAD
1224                                        : COMPOSING_WITH_RULE_HEAD);
1225                 else if (result1 > 0)
1226                   {
1227                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1228                       {
1229                         bcopy (src_base, dst, result1 + 2);
1230                         src += result1;
1231                         dst += result1 + 2;
1232                         coding->produced_char += result1 + 2;
1233                       }
1234                     else
1235                       {
1236                         result = CODING_FINISH_INSUFFICIENT_DST;
1237                         goto label_end_of_loop_2;
1238                       }
1239                   }
1240                 else
1241                   goto label_end_of_loop;
1242               }
1243               break;
1244
1245             case '1':           /* end composing */
1246               coding->composing = COMPOSING_NO;
1247               coding->produced_char++;
1248               break;
1249
1250             case '[':           /* specification of direction */
1251               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1252                 goto label_invalid_code;
1253               /* For the moment, nested direction is not supported.
1254                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1255                  left-to-right, and nozero means right-to-left.  */
1256               ONE_MORE_BYTE (c1);
1257               switch (c1)
1258                 {
1259                 case ']':       /* end of the current direction */
1260                   coding->mode &= ~CODING_MODE_DIRECTION;
1261
1262                 case '0':       /* end of the current direction */
1263                 case '1':       /* start of left-to-right direction */
1264                   ONE_MORE_BYTE (c1);
1265                   if (c1 == ']')
1266                     coding->mode &= ~CODING_MODE_DIRECTION;
1267                   else
1268                     goto label_invalid_code;
1269                   break;
1270
1271                 case '2':       /* start of right-to-left direction */
1272                   ONE_MORE_BYTE (c1);
1273                   if (c1 == ']')
1274                     coding->mode |= CODING_MODE_DIRECTION;
1275                   else
1276                     goto label_invalid_code;
1277                   break;
1278
1279                 default:
1280                   goto label_invalid_code;
1281                 }
1282               break;
1283
1284             default:
1285               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1286                 goto label_invalid_code;
1287               if (c1 >= 0x28 && c1 <= 0x2B)
1288                 {       /* designation of DIMENSION1_CHARS94 character set */
1289                   ONE_MORE_BYTE (c2);
1290                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1291                 }
1292               else if (c1 >= 0x2C && c1 <= 0x2F)
1293                 {       /* designation of DIMENSION1_CHARS96 character set */
1294                   ONE_MORE_BYTE (c2);
1295                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1296                 }
1297               else
1298                 {
1299                   goto label_invalid_code;
1300                 }
1301             }
1302           /* We must update these variables now.  */
1303           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1304           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1305           break;
1306
1307         label_invalid_code:
1308           while (src_base < src)
1309             *dst++ = *src_base++;
1310           coding->fake_multibyte = 1;
1311         }
1312       continue;
1313
1314     label_end_of_loop:
1315       result = CODING_FINISH_INSUFFICIENT_SRC;
1316     label_end_of_loop_2:
1317       src = src_base;
1318       break;
1319     }
1320
1321   if (src < src_end)
1322     {
1323       if (result == CODING_FINISH_NORMAL)
1324         result = CODING_FINISH_INSUFFICIENT_DST;
1325       else if (result != CODING_FINISH_INCONSISTENT_EOL
1326                && coding->mode & CODING_MODE_LAST_BLOCK)
1327         {
1328           /* This is the last block of the text to be decoded.  We had
1329              better just flush out all remaining codes in the text
1330              although they are not valid characters.  */
1331           src_bytes = src_end - src;
1332           if (dst_bytes && (dst_end - dst < src_bytes))
1333             src_bytes = dst_end - dst;
1334           bcopy (src, dst, src_bytes);
1335           dst += src_bytes;
1336           src += src_bytes;
1337           coding->fake_multibyte = 1;
1338         }
1339     }
1340
1341   coding->consumed = coding->consumed_char = src - source;
1342   coding->produced = dst - destination;
1343   return result;
1344 }
1345
1346 /* ISO2022 encoding stuff.  */
1347
1348 /*
1349    It is not enough to say just "ISO2022" on encoding, we have to
1350    specify more details.  In Emacs, each coding system of ISO2022
1351    variant has the following specifications:
1352         1. Initial designation to G0 thru G3.
1353         2. Allows short-form designation?
1354         3. ASCII should be designated to G0 before control characters?
1355         4. ASCII should be designated to G0 at end of line?
1356         5. 7-bit environment or 8-bit environment?
1357         6. Use locking-shift?
1358         7. Use Single-shift?
1359    And the following two are only for Japanese:
1360         8. Use ASCII in place of JIS0201-1976-Roman?
1361         9. Use JISX0208-1983 in place of JISX0208-1978?
1362    These specifications are encoded in `coding->flags' as flag bits
1363    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1364    details.
1365 */
1366
1367 /* Produce codes (escape sequence) for designating CHARSET to graphic
1368    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1369    the coding system CODING allows, produce designation sequence of
1370    short-form.  */
1371
1372 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1373   do {                                                                  \
1374     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1375     char *intermediate_char_94 = "()*+";                                \
1376     char *intermediate_char_96 = ",-./";                                \
1377     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1378     if (revision < 255)                                                 \
1379       {                                                                 \
1380         *dst++ = ISO_CODE_ESC;                                          \
1381         *dst++ = '&';                                                   \
1382         *dst++ = '@' + revision;                                        \
1383       }                                                                 \
1384     *dst++ = ISO_CODE_ESC;                                              \
1385     if (CHARSET_DIMENSION (charset) == 1)                               \
1386       {                                                                 \
1387         if (CHARSET_CHARS (charset) == 94)                              \
1388           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1389         else                                                            \
1390           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1391       }                                                                 \
1392     else                                                                \
1393       {                                                                 \
1394         *dst++ = '$';                                                   \
1395         if (CHARSET_CHARS (charset) == 94)                              \
1396           {                                                             \
1397             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1398                 || reg != 0                                             \
1399                 || final_char < '@' || final_char > 'B')                \
1400               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1401           }                                                             \
1402         else                                                            \
1403           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1404       }                                                                 \
1405     *dst++ = final_char;                                                \
1406     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1407   } while (0)
1408
1409 /* The following two macros produce codes (control character or escape
1410    sequence) for ISO2022 single-shift functions (single-shift-2 and
1411    single-shift-3).  */
1412
1413 #define ENCODE_SINGLE_SHIFT_2                           \
1414   do {                                                  \
1415     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1416       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1417     else                                                \
1418       {                                                 \
1419         *dst++ = ISO_CODE_SS2;                          \
1420         coding->fake_multibyte = 1;                     \
1421       }                                                 \
1422     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1423   } while (0)
1424
1425 #define ENCODE_SINGLE_SHIFT_3                           \
1426   do {                                                  \
1427     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1428       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1429     else                                                \
1430       {                                                 \
1431         *dst++ = ISO_CODE_SS3;                          \
1432         coding->fake_multibyte = 1;                     \
1433       }                                                 \
1434     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1435   } while (0)
1436
1437 /* The following four macros produce codes (control character or
1438    escape sequence) for ISO2022 locking-shift functions (shift-in,
1439    shift-out, locking-shift-2, and locking-shift-3).  */
1440
1441 #define ENCODE_SHIFT_IN                         \
1442   do {                                          \
1443     *dst++ = ISO_CODE_SI;                       \
1444     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1445   } while (0)
1446
1447 #define ENCODE_SHIFT_OUT                        \
1448   do {                                          \
1449     *dst++ = ISO_CODE_SO;                       \
1450     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1451   } while (0)
1452
1453 #define ENCODE_LOCKING_SHIFT_2                  \
1454   do {                                          \
1455     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1456     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1457   } while (0)
1458
1459 #define ENCODE_LOCKING_SHIFT_3                  \
1460   do {                                          \
1461     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1462     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1463   } while (0)
1464
1465 /* Produce codes for a DIMENSION1 character whose character set is
1466    CHARSET and whose position-code is C1.  Designation and invocation
1467    sequences are also produced in advance if necessary.  */
1468
1469
1470 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1471   do {                                                                  \
1472     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1473       {                                                                 \
1474         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1475           *dst++ = c1 & 0x7F;                                           \
1476         else                                                            \
1477           *dst++ = c1 | 0x80;                                           \
1478         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1479         break;                                                          \
1480       }                                                                 \
1481     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1482       {                                                                 \
1483         *dst++ = c1 & 0x7F;                                             \
1484         break;                                                          \
1485       }                                                                 \
1486     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1487       {                                                                 \
1488         *dst++ = c1 | 0x80;                                             \
1489         break;                                                          \
1490       }                                                                 \
1491     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1492              && !coding->safe_charsets[charset])                        \
1493       {                                                                 \
1494         /* We should not encode this character, instead produce one or  \
1495            two `?'s.  */                                                \
1496         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1497         if (CHARSET_WIDTH (charset) == 2)                               \
1498           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1499         break;                                                          \
1500       }                                                                 \
1501     else                                                                \
1502       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1503          must invoke it, or, at first, designate it to some graphic     \
1504          register.  Then repeat the loop to actually produce the        \
1505          character.  */                                                 \
1506       dst = encode_invocation_designation (charset, coding, dst);       \
1507   } while (1)
1508
1509 /* Produce codes for a DIMENSION2 character whose character set is
1510    CHARSET and whose position-codes are C1 and C2.  Designation and
1511    invocation codes are also produced in advance if necessary.  */
1512
1513 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1514   do {                                                                  \
1515     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1516       {                                                                 \
1517         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1518           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1519         else                                                            \
1520           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1521         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1522         break;                                                          \
1523       }                                                                 \
1524     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1525       {                                                                 \
1526         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1527         break;                                                          \
1528       }                                                                 \
1529     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1530       {                                                                 \
1531         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1532         break;                                                          \
1533       }                                                                 \
1534     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1535              && !coding->safe_charsets[charset])                        \
1536       {                                                                 \
1537         /* We should not encode this character, instead produce one or  \
1538            two `?'s.  */                                                \
1539         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1540         if (CHARSET_WIDTH (charset) == 2)                               \
1541           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1542         break;                                                          \
1543       }                                                                 \
1544     else                                                                \
1545       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1546          must invoke it, or, at first, designate it to some graphic     \
1547          register.  Then repeat the loop to actually produce the        \
1548          character.  */                                                 \
1549       dst = encode_invocation_designation (charset, coding, dst);       \
1550   } while (1)
1551
1552 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                             \
1553   do {                                                                    \
1554     int c_alt, charset_alt;                                               \
1555     if (!NILP (unification_table)                                         \
1556         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
1557             >= 0))                                                        \
1558       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
1559     else                                                                  \
1560       charset_alt = charset;                                              \
1561     if (CHARSET_DIMENSION (charset_alt) == 1)                             \
1562       {                                                                   \
1563         if (charset == CHARSET_ASCII                                      \
1564             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)                 \
1565           charset_alt = charset_latin_jisx0201;                           \
1566         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                \
1567       }                                                                   \
1568     else                                                                  \
1569       {                                                                   \
1570         if (charset == charset_jisx0208                                   \
1571             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)                \
1572           charset_alt = charset_jisx0208_1978;                            \
1573         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);            \
1574       }                                                                   \
1575     if (! COMPOSING_P (coding->composing))                                \
1576       coding->consumed_char++;                                            \
1577      } while (0)
1578
1579 /* Produce designation and invocation codes at a place pointed by DST
1580    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1581    Return new DST.  */
1582
1583 unsigned char *
1584 encode_invocation_designation (charset, coding, dst)
1585      int charset;
1586      struct coding_system *coding;
1587      unsigned char *dst;
1588 {
1589   int reg;                      /* graphic register number */
1590
1591   /* At first, check designations.  */
1592   for (reg = 0; reg < 4; reg++)
1593     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1594       break;
1595
1596   if (reg >= 4)
1597     {
1598       /* CHARSET is not yet designated to any graphic registers.  */
1599       /* At first check the requested designation.  */
1600       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1601       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1602         /* Since CHARSET requests no special designation, designate it
1603            to graphic register 0.  */
1604         reg = 0;
1605
1606       ENCODE_DESIGNATION (charset, reg, coding);
1607     }
1608
1609   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1610       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1611     {
1612       /* Since the graphic register REG is not invoked to any graphic
1613          planes, invoke it to graphic plane 0.  */
1614       switch (reg)
1615         {
1616         case 0:                 /* graphic register 0 */
1617           ENCODE_SHIFT_IN;
1618           break;
1619
1620         case 1:                 /* graphic register 1 */
1621           ENCODE_SHIFT_OUT;
1622           break;
1623
1624         case 2:                 /* graphic register 2 */
1625           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1626             ENCODE_SINGLE_SHIFT_2;
1627           else
1628             ENCODE_LOCKING_SHIFT_2;
1629           break;
1630
1631         case 3:                 /* graphic register 3 */
1632           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1633             ENCODE_SINGLE_SHIFT_3;
1634           else
1635             ENCODE_LOCKING_SHIFT_3;
1636           break;
1637         }
1638     }
1639   return dst;
1640 }
1641
1642 /* The following two macros produce codes for indicating composition.  */
1643 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1644 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1645 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1646
1647 /* The following three macros produce codes for indicating direction
1648    of text.  */
1649 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1650   do {                                                  \
1651     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1652       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1653     else                                                \
1654       *dst++ = ISO_CODE_CSI;                            \
1655   } while (0)
1656
1657 #define ENCODE_DIRECTION_R2L    \
1658   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1659
1660 #define ENCODE_DIRECTION_L2R    \
1661   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1662
1663 /* Produce codes for designation and invocation to reset the graphic
1664    planes and registers to initial state.  */
1665 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1666   do {                                                                      \
1667     int reg;                                                                \
1668     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1669       ENCODE_SHIFT_IN;                                                      \
1670     for (reg = 0; reg < 4; reg++)                                           \
1671       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1672           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1673               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1674         ENCODE_DESIGNATION                                                  \
1675           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1676   } while (0)
1677
1678 /* Produce designation sequences of charsets in the line started from
1679    SRC to a place pointed by *DSTP, and update DSTP.
1680
1681    If the current block ends before any end-of-line, we may fail to
1682    find all the necessary designations.  */
1683
1684 encode_designation_at_bol (coding, table, src, src_end, dstp)
1685      struct coding_system *coding;
1686      Lisp_Object table;
1687      unsigned char *src, *src_end, **dstp;
1688 {
1689   int charset, c, found = 0, reg;
1690   /* Table of charsets to be designated to each graphic register.  */
1691   int r[4];
1692   unsigned char *dst = *dstp;
1693
1694   for (reg = 0; reg < 4; reg++)
1695     r[reg] = -1;
1696
1697   while (src < src_end && *src != '\n' && found < 4)
1698     {
1699       int bytes = BYTES_BY_CHAR_HEAD (*src);
1700
1701       if (NILP (table))
1702         charset = CHARSET_AT (src);
1703       else
1704         {
1705           int c_alt;
1706           unsigned char c1, c2;
1707
1708           SPLIT_STRING(src, bytes, charset, c1, c2);
1709           if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
1710             charset = CHAR_CHARSET (c_alt);
1711         }
1712
1713       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1714       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1715         {
1716           found++;
1717           r[reg] = charset;
1718         }
1719
1720       src += bytes;
1721     }
1722
1723   if (found)
1724     {
1725       for (reg = 0; reg < 4; reg++)
1726         if (r[reg] >= 0
1727             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1728           ENCODE_DESIGNATION (r[reg], reg, coding);
1729       *dstp = dst;
1730     }
1731 }
1732
1733 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1734
1735 int
1736 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1737      struct coding_system *coding;
1738      unsigned char *source, *destination;
1739      int src_bytes, dst_bytes;
1740 {
1741   unsigned char *src = source;
1742   unsigned char *src_end = source + src_bytes;
1743   unsigned char *dst = destination;
1744   unsigned char *dst_end = destination + dst_bytes;
1745   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1746      from DST_END to assure overflow checking is necessary only at the
1747      head of loop.  */
1748   unsigned char *adjusted_dst_end = dst_end - 19;
1749   Lisp_Object unification_table
1750       = coding->character_unification_table_for_encode;
1751   int result = CODING_FINISH_NORMAL;
1752
1753   if (!NILP (Venable_character_unification) && NILP (unification_table))
1754     unification_table = Vstandard_character_unification_table_for_encode;
1755
1756   coding->consumed_char = 0;
1757   coding->fake_multibyte = 0;
1758   while (src < src_end && (dst_bytes
1759                            ? (dst < adjusted_dst_end)
1760                            : (dst < src - 19)))
1761     {
1762       /* SRC_BASE remembers the start position in source in each loop.
1763          The loop will be exited when there's not enough source text
1764          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1765          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1766          reset to SRC_BASE before exiting.  */
1767       unsigned char *src_base = src;
1768       int charset, c1, c2, c3, c4;
1769
1770       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1771           && CODING_SPEC_ISO_BOL (coding))
1772         {
1773           /* We have to produce designation sequences if any now.  */
1774           encode_designation_at_bol (coding, unification_table,
1775                                      src, src_end, &dst);
1776           CODING_SPEC_ISO_BOL (coding) = 0;
1777         }
1778
1779       c1 = *src++;
1780       /* If we are seeing a component of a composite character, we are
1781          seeing a leading-code encoded irregularly for composition, or
1782          a composition rule if composing with rule.  We must set C1 to
1783          a normal leading-code or an ASCII code.  If we are not seeing
1784          a composite character, we must reset composition,
1785          designation, and invocation states.  */
1786       if (COMPOSING_P (coding->composing))
1787         {
1788           if (c1 < 0xA0)
1789             {
1790               /* We are not in a composite character any longer.  */
1791               coding->composing = COMPOSING_NO;
1792               ENCODE_RESET_PLANE_AND_REGISTER;
1793               ENCODE_COMPOSITION_END;
1794             }
1795           else
1796             {
1797               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1798                 {
1799                   *dst++ = c1 & 0x7F;
1800                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1801                   continue;
1802                 }
1803               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1804                 coding->composing = COMPOSING_WITH_RULE_RULE;
1805               if (c1 == 0xA0)
1806                 {
1807                   /* This is an ASCII component.  */
1808                   ONE_MORE_BYTE (c1);
1809                   c1 &= 0x7F;
1810                 }
1811               else
1812                 /* This is a leading-code of non ASCII component.  */
1813                 c1 -= 0x20;
1814             }
1815         }
1816
1817       /* Now encode one character.  C1 is a control character, an
1818          ASCII character, or a leading-code of multi-byte character.  */
1819       switch (emacs_code_class[c1])
1820         {
1821         case EMACS_ascii_code:
1822           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1823           break;
1824
1825         case EMACS_control_code:
1826           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1827             ENCODE_RESET_PLANE_AND_REGISTER;
1828           *dst++ = c1;
1829           coding->consumed_char++;
1830           break;
1831
1832         case EMACS_carriage_return_code:
1833           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1834             {
1835               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1836                 ENCODE_RESET_PLANE_AND_REGISTER;
1837               *dst++ = c1;
1838               coding->consumed_char++;
1839               break;
1840             }
1841           /* fall down to treat '\r' as '\n' ...  */
1842
1843         case EMACS_linefeed_code:
1844           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1845             ENCODE_RESET_PLANE_AND_REGISTER;
1846           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1847             bcopy (coding->spec.iso2022.initial_designation,
1848                    coding->spec.iso2022.current_designation,
1849                    sizeof coding->spec.iso2022.initial_designation);
1850           if (coding->eol_type == CODING_EOL_LF
1851               || coding->eol_type == CODING_EOL_UNDECIDED)
1852             *dst++ = ISO_CODE_LF;
1853           else if (coding->eol_type == CODING_EOL_CRLF)
1854             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1855           else
1856             *dst++ = ISO_CODE_CR;
1857           CODING_SPEC_ISO_BOL (coding) = 1;
1858           coding->consumed_char++;
1859           break;
1860
1861         case EMACS_leading_code_2:
1862           ONE_MORE_BYTE (c2);
1863           if (c2 < 0xA0)
1864             {
1865               /* invalid sequence */
1866               *dst++ = c1;
1867               *dst++ = c2;
1868               coding->consumed_char += 2;
1869             }
1870           else
1871             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1872           break;
1873
1874         case EMACS_leading_code_3:
1875           TWO_MORE_BYTES (c2, c3);
1876           if (c2 < 0xA0 || c3 < 0xA0)
1877             {
1878               /* invalid sequence */
1879               *dst++ = c1;
1880               *dst++ = c2;
1881               *dst++ = c3;
1882               coding->consumed_char += 3;
1883             }
1884           else if (c1 < LEADING_CODE_PRIVATE_11)
1885             ENCODE_ISO_CHARACTER (c1, c2, c3);
1886           else
1887             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1888           break;
1889
1890         case EMACS_leading_code_4:
1891           THREE_MORE_BYTES (c2, c3, c4);
1892           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1893             {
1894               /* invalid sequence */
1895               *dst++ = c1;
1896               *dst++ = c2;
1897               *dst++ = c3;
1898               *dst++ = c4;
1899               coding->consumed_char += 4;
1900             }
1901           else
1902             ENCODE_ISO_CHARACTER (c2, c3, c4);
1903           break;
1904
1905         case EMACS_leading_code_composition:
1906           ONE_MORE_BYTE (c2);
1907           if (c2 < 0xA0)
1908             {
1909               /* invalid sequence */
1910               *dst++ = c1;
1911               *dst++ = c2;
1912               coding->consumed_char += 2;
1913             }
1914           else if (c2 == 0xFF)
1915             {
1916               ENCODE_RESET_PLANE_AND_REGISTER;
1917               coding->composing = COMPOSING_WITH_RULE_HEAD;
1918               ENCODE_COMPOSITION_WITH_RULE_START;
1919               coding->consumed_char++;
1920             }
1921           else
1922             {
1923               ENCODE_RESET_PLANE_AND_REGISTER;
1924               /* Rewind one byte because it is a character code of
1925                  composition elements.  */
1926               src--;
1927               coding->composing = COMPOSING_NO_RULE_HEAD;
1928               ENCODE_COMPOSITION_NO_RULE_START;
1929               coding->consumed_char++;
1930             }
1931           break;
1932
1933         case EMACS_invalid_code:
1934           *dst++ = c1;
1935           coding->consumed_char++;
1936           break;
1937         }
1938       continue;
1939     label_end_of_loop:
1940       result = CODING_FINISH_INSUFFICIENT_SRC;
1941       src = src_base;
1942       break;
1943     }
1944
1945   if (src < src_end)
1946     {
1947       if (result == CODING_FINISH_NORMAL)
1948         result = CODING_FINISH_INSUFFICIENT_DST;
1949       else
1950         /* If this is the last block of the text to be encoded, we
1951            must reset graphic planes and registers to the initial
1952            state, and flush out the carryover if any.  */
1953         if (coding->mode & CODING_MODE_LAST_BLOCK)
1954           ENCODE_RESET_PLANE_AND_REGISTER;
1955     }
1956
1957   coding->consumed = src - source;
1958   coding->produced = coding->produced_char = dst - destination;
1959   return result;
1960 }
1961
1962 \f
1963 /*** 4. SJIS and BIG5 handlers ***/
1964
1965 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1966    quite widely.  So, for the moment, Emacs supports them in the bare
1967    C code.  But, in the future, they may be supported only by CCL.  */
1968
1969 /* SJIS is a coding system encoding three character sets: ASCII, right
1970    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1971    as is.  A character of charset katakana-jisx0201 is encoded by
1972    "position-code + 0x80".  A character of charset japanese-jisx0208
1973    is encoded in 2-byte but two position-codes are divided and shifted
1974    so that it fit in the range below.
1975
1976    --- CODE RANGE of SJIS ---
1977    (character set)      (range)
1978    ASCII                0x00 .. 0x7F
1979    KATAKANA-JISX0201    0xA0 .. 0xDF
1980    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xFF
1981             (2nd byte)  0x40 .. 0xFF
1982    -------------------------------
1983
1984 */
1985
1986 /* BIG5 is a coding system encoding two character sets: ASCII and
1987    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
1988    character set and is encoded in two-byte.
1989
1990    --- CODE RANGE of BIG5 ---
1991    (character set)      (range)
1992    ASCII                0x00 .. 0x7F
1993    Big5 (1st byte)      0xA1 .. 0xFE
1994         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
1995    --------------------------
1996
1997    Since the number of characters in Big5 is larger than maximum
1998    characters in Emacs' charset (96x96), it can't be handled as one
1999    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2000    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2001    contains frequently used characters and the latter contains less
2002    frequently used characters.  */
2003
2004 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2005    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2006    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2007    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2008
2009 /* Number of Big5 characters which have the same code in 1st byte.  */
2010 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2011
2012 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2013   do {                                                                  \
2014     unsigned int temp                                                   \
2015       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2016     if (b1 < 0xC9)                                                      \
2017       charset = charset_big5_1;                                         \
2018     else                                                                \
2019       {                                                                 \
2020         charset = charset_big5_2;                                       \
2021         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2022       }                                                                 \
2023     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2024     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2025   } while (0)
2026
2027 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2028   do {                                                                  \
2029     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2030     if (charset == charset_big5_2)                                      \
2031       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2032     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2033     b2 = temp % BIG5_SAME_ROW;                                          \
2034     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2035   } while (0)
2036
2037 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2038   do {                                                                  \
2039     int c_alt, charset_alt = (charset);                                 \
2040     if (!NILP (unification_table)                                       \
2041         && ((c_alt = unify_char (unification_table,                     \
2042                                  -1, (charset), c1, c2)) >= 0))         \
2043           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2044     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2045       DECODE_CHARACTER_ASCII (c1);                                      \
2046     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2047       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2048     else                                                                \
2049       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2050   } while (0)
2051
2052 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                       \
2053   do {                                                                    \
2054     int c_alt, charset_alt;                                               \
2055     if (!NILP (unification_table)                                         \
2056         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
2057             >= 0))                                                        \
2058       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
2059     else                                                                  \
2060       charset_alt = charset;                                              \
2061     if (charset_alt == charset_ascii)                                     \
2062       *dst++ = c1;                                                        \
2063     else if (CHARSET_DIMENSION (charset_alt) == 1)                        \
2064       {                                                                   \
2065         if (sjis_p && charset_alt == charset_katakana_jisx0201)           \
2066           *dst++ = c1;                                                    \
2067         else                                                              \
2068           {                                                               \
2069             *dst++ = charset_alt, *dst++ = c1;                            \
2070             coding->fake_multibyte = 1;                                   \
2071           }                                                               \
2072       }                                                                   \
2073     else                                                                  \
2074       {                                                                   \
2075         c1 &= 0x7F, c2 &= 0x7F;                                           \
2076         if (sjis_p && charset_alt == charset_jisx0208)                    \
2077           {                                                               \
2078             unsigned char s1, s2;                                         \
2079                                                                           \
2080             ENCODE_SJIS (c1, c2, s1, s2);                                 \
2081             *dst++ = s1, *dst++ = s2;                                     \
2082             coding->fake_multibyte = 1;                                   \
2083           }                                                               \
2084         else if (!sjis_p                                                  \
2085                  && (charset_alt == charset_big5_1                        \
2086                      || charset_alt == charset_big5_2))                   \
2087           {                                                               \
2088             unsigned char b1, b2;                                         \
2089                                                                           \
2090             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);                    \
2091             *dst++ = b1, *dst++ = b2;                                     \
2092           }                                                               \
2093         else                                                              \
2094           {                                                               \
2095             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;               \
2096             coding->fake_multibyte = 1;                                   \
2097           }                                                               \
2098       }                                                                   \
2099     coding->consumed_char++;                                              \
2100   } while (0);
2101
2102 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2103    Check if a text is encoded in SJIS.  If it is, return
2104    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2105
2106 int
2107 detect_coding_sjis (src, src_end)
2108      unsigned char *src, *src_end;
2109 {
2110   unsigned char c;
2111
2112   while (src < src_end)
2113     {
2114       c = *src++;
2115       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2116         {
2117           if (src < src_end && *src++ < 0x40)
2118             return 0;
2119         }
2120     }
2121   return CODING_CATEGORY_MASK_SJIS;
2122 }
2123
2124 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2125    Check if a text is encoded in BIG5.  If it is, return
2126    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2127
2128 int
2129 detect_coding_big5 (src, src_end)
2130      unsigned char *src, *src_end;
2131 {
2132   unsigned char c;
2133
2134   while (src < src_end)
2135     {
2136       c = *src++;
2137       if (c >= 0xA1)
2138         {
2139           if (src >= src_end)
2140             break;
2141           c = *src++;
2142           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2143             return 0;
2144         }
2145     }
2146   return CODING_CATEGORY_MASK_BIG5;
2147 }
2148
2149 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2150    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2151
2152 int
2153 decode_coding_sjis_big5 (coding, source, destination,
2154                          src_bytes, dst_bytes, sjis_p)
2155      struct coding_system *coding;
2156      unsigned char *source, *destination;
2157      int src_bytes, dst_bytes;
2158      int sjis_p;
2159 {
2160   unsigned char *src = source;
2161   unsigned char *src_end = source + src_bytes;
2162   unsigned char *dst = destination;
2163   unsigned char *dst_end = destination + dst_bytes;
2164   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2165      from DST_END to assure overflow checking is necessary only at the
2166      head of loop.  */
2167   unsigned char *adjusted_dst_end = dst_end - 3;
2168   Lisp_Object unification_table
2169       = coding->character_unification_table_for_decode;
2170   int result = CODING_FINISH_NORMAL;
2171
2172   if (!NILP (Venable_character_unification) && NILP (unification_table))
2173     unification_table = Vstandard_character_unification_table_for_decode;
2174
2175   coding->produced_char = 0;
2176   coding->fake_multibyte = 0;
2177   while (src < src_end && (dst_bytes
2178                            ? (dst < adjusted_dst_end)
2179                            : (dst < src - 3)))
2180     {
2181       /* SRC_BASE remembers the start position in source in each loop.
2182          The loop will be exited when there's not enough source text
2183          to analyze two-byte character (within macro ONE_MORE_BYTE).
2184          In that case, SRC is reset to SRC_BASE before exiting.  */
2185       unsigned char *src_base = src;
2186       unsigned char c1 = *src++, c2, c3, c4;
2187
2188       if (c1 < 0x20)
2189         {
2190           if (c1 == '\r')
2191             {
2192               if (coding->eol_type == CODING_EOL_CRLF)
2193                 {
2194                   ONE_MORE_BYTE (c2);
2195                   if (c2 == '\n')
2196                     *dst++ = c2;
2197                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2198                     {
2199                       result = CODING_FINISH_INCONSISTENT_EOL;
2200                       goto label_end_of_loop_2;
2201                     }
2202                   else
2203                     /* To process C2 again, SRC is subtracted by 1.  */
2204                     *dst++ = c1, src--;
2205                 }
2206               else if (coding->eol_type == CODING_EOL_CR)
2207                 *dst++ = '\n';
2208               else
2209                 *dst++ = c1;
2210             }
2211           else if (c1 == '\n'
2212                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2213                    && (coding->eol_type == CODING_EOL_CR
2214                        || coding->eol_type == CODING_EOL_CRLF))
2215             {
2216               result = CODING_FINISH_INCONSISTENT_EOL;
2217               goto label_end_of_loop_2;
2218             }
2219           else
2220             *dst++ = c1;
2221           coding->produced_char++;
2222         }
2223       else if (c1 < 0x80)
2224         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2225       else if (c1 < 0xA0)
2226         {
2227           /* SJIS -> JISX0208 */
2228           if (sjis_p)
2229             {
2230               ONE_MORE_BYTE (c2);
2231               if (c2 >= 0x40)
2232                 {
2233                   DECODE_SJIS (c1, c2, c3, c4);
2234                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2235                 }
2236               else
2237                 goto label_invalid_code_2;
2238             }
2239           else
2240             goto label_invalid_code_1;
2241         }
2242       else if (c1 < 0xE0)
2243         {
2244           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
2245           if (sjis_p)
2246             DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2247                                         /* dummy */ c2);
2248           else
2249             {
2250               int charset;
2251
2252               ONE_MORE_BYTE (c2);
2253               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2254                 {
2255                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2256                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2257                 }
2258               else
2259                 goto label_invalid_code_2;
2260             }
2261         }
2262       else                      /* C1 >= 0xE0 */
2263         {
2264           /* SJIS -> JISX0208, BIG5 -> Big5 */
2265           if (sjis_p)
2266             {
2267               ONE_MORE_BYTE (c2);
2268               if (c2 >= 0x40)
2269                 {
2270                   DECODE_SJIS (c1, c2, c3, c4);
2271                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2272                 }
2273               else
2274                 goto label_invalid_code_2;
2275             }
2276           else
2277             {
2278               int charset;
2279
2280               ONE_MORE_BYTE (c2);
2281               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2282                 {
2283                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2284                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2285                 }
2286               else
2287                 goto label_invalid_code_2;
2288             }
2289         }
2290       continue;
2291
2292     label_invalid_code_1:
2293       *dst++ = c1;
2294       coding->produced_char++;
2295       coding->fake_multibyte = 1;
2296       continue;
2297
2298     label_invalid_code_2:
2299       *dst++ = c1; *dst++= c2;
2300       coding->produced_char += 2;
2301       coding->fake_multibyte = 1;
2302       continue;
2303
2304     label_end_of_loop:
2305       result = CODING_FINISH_INSUFFICIENT_SRC;
2306     label_end_of_loop_2:
2307       src = src_base;
2308       break;
2309     }
2310
2311   if (src < src_end)
2312     {
2313       if (result == CODING_FINISH_NORMAL)
2314         result = CODING_FINISH_INSUFFICIENT_DST;
2315       else if (result != CODING_FINISH_INCONSISTENT_EOL
2316                && coding->mode & CODING_MODE_LAST_BLOCK)
2317         {
2318           src_bytes = src_end - src;
2319           if (dst_bytes && (dst_end - dst < src_bytes))
2320             src_bytes = dst_end - dst;
2321           bcopy (dst, src, src_bytes);
2322           src += src_bytes;
2323           dst += src_bytes;
2324           coding->fake_multibyte = 1;
2325         }
2326     }
2327
2328   coding->consumed = coding->consumed_char = src - source;
2329   coding->produced = dst - destination;
2330   return result;
2331 }
2332
2333 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2334    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2335    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2336    sure that all these charsets are registered as official charset
2337    (i.e. do not have extended leading-codes).  Characters of other
2338    charsets are produced without any encoding.  If SJIS_P is 1, encode
2339    SJIS text, else encode BIG5 text.  */
2340
2341 int
2342 encode_coding_sjis_big5 (coding, source, destination,
2343                          src_bytes, dst_bytes, sjis_p)
2344      struct coding_system *coding;
2345      unsigned char *source, *destination;
2346      int src_bytes, dst_bytes;
2347      int sjis_p;
2348 {
2349   unsigned char *src = source;
2350   unsigned char *src_end = source + src_bytes;
2351   unsigned char *dst = destination;
2352   unsigned char *dst_end = destination + dst_bytes;
2353   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2354      from DST_END to assure overflow checking is necessary only at the
2355      head of loop.  */
2356   unsigned char *adjusted_dst_end = dst_end - 1;
2357   Lisp_Object unification_table
2358       = coding->character_unification_table_for_encode;
2359   int result = CODING_FINISH_NORMAL;
2360
2361   if (!NILP (Venable_character_unification) && NILP (unification_table))
2362     unification_table = Vstandard_character_unification_table_for_encode;
2363
2364   coding->consumed_char = 0;
2365   coding->fake_multibyte = 0;
2366   while (src < src_end && (dst_bytes
2367                            ? (dst < adjusted_dst_end)
2368                            : (dst < src - 1)))
2369     {
2370       /* SRC_BASE remembers the start position in source in each loop.
2371          The loop will be exited when there's not enough source text
2372          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2373          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2374          before exiting.  */
2375       unsigned char *src_base = src;
2376       unsigned char c1 = *src++, c2, c3, c4;
2377
2378       if (coding->composing)
2379         {
2380           if (c1 == 0xA0)
2381             {
2382               ONE_MORE_BYTE (c1);
2383               c1 &= 0x7F;
2384             }
2385           else if (c1 >= 0xA0)
2386             c1 -= 0x20;
2387           else
2388             coding->composing = 0;
2389         }
2390
2391       switch (emacs_code_class[c1])
2392         {
2393         case EMACS_ascii_code:
2394           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2395           break;
2396
2397         case EMACS_control_code:
2398           *dst++ = c1;
2399           coding->consumed_char++;
2400           break;
2401
2402         case EMACS_carriage_return_code:
2403           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2404             {
2405               *dst++ = c1;
2406               coding->consumed_char++;
2407               break;
2408             }
2409           /* fall down to treat '\r' as '\n' ...  */
2410
2411         case EMACS_linefeed_code:
2412           if (coding->eol_type == CODING_EOL_LF
2413               || coding->eol_type == CODING_EOL_UNDECIDED)
2414             *dst++ = '\n';
2415           else if (coding->eol_type == CODING_EOL_CRLF)
2416             *dst++ = '\r', *dst++ = '\n';
2417           else
2418             *dst++ = '\r';
2419           coding->consumed_char++;
2420           break;
2421
2422         case EMACS_leading_code_2:
2423           ONE_MORE_BYTE (c2);
2424           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2425           break;
2426
2427         case EMACS_leading_code_3:
2428           TWO_MORE_BYTES (c2, c3);
2429           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2430           break;
2431
2432         case EMACS_leading_code_4:
2433           THREE_MORE_BYTES (c2, c3, c4);
2434           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2435           break;
2436
2437         case EMACS_leading_code_composition:
2438           coding->composing = 1;
2439           break;
2440
2441         default:                /* i.e. case EMACS_invalid_code: */
2442           *dst++ = c1;
2443           coding->consumed_char++;
2444         }
2445       continue;
2446
2447     label_end_of_loop:
2448       result = CODING_FINISH_INSUFFICIENT_SRC;
2449       src = src_base;
2450       break;
2451     }
2452
2453   if (result == CODING_FINISH_NORMAL
2454       && src < src_end)
2455     result = CODING_FINISH_INSUFFICIENT_DST;
2456   coding->consumed = src - source;
2457   coding->produced = coding->produced_char = dst - destination;
2458   return result;
2459 }
2460
2461 \f
2462 /*** 5. End-of-line handlers ***/
2463
2464 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2465    This function is called only when `coding->eol_type' is
2466    CODING_EOL_CRLF or CODING_EOL_CR.  */
2467
2468 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2469      struct coding_system *coding;
2470      unsigned char *source, *destination;
2471      int src_bytes, dst_bytes;
2472 {
2473   unsigned char *src = source;
2474   unsigned char *src_end = source + src_bytes;
2475   unsigned char *dst = destination;
2476   unsigned char *dst_end = destination + dst_bytes;
2477   unsigned char c;
2478   int result = CODING_FINISH_NORMAL;
2479
2480   coding->fake_multibyte = 0;
2481
2482   if (src_bytes <= 0)
2483     return result;
2484
2485   switch (coding->eol_type)
2486     {
2487     case CODING_EOL_CRLF:
2488       {
2489         /* Since the maximum bytes produced by each loop is 2, we
2490            subtract 1 from DST_END to assure overflow checking is
2491            necessary only at the head of loop.  */
2492         unsigned char *adjusted_dst_end = dst_end - 1;
2493
2494         while (src < src_end && (dst_bytes
2495                                  ? (dst < adjusted_dst_end)
2496                                  : (dst < src - 1)))
2497           {
2498             unsigned char *src_base = src;
2499
2500             c = *src++;
2501             if (c == '\r')
2502               {
2503                 ONE_MORE_BYTE (c);
2504                 if (c != '\n')
2505                   {
2506                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2507                       {
2508                         result = CODING_FINISH_INCONSISTENT_EOL;
2509                         goto label_end_of_loop_2;
2510                       }
2511                     *dst++ = '\r';
2512                     if (BASE_LEADING_CODE_P (c))
2513                       coding->fake_multibyte = 1;
2514                   }
2515                 *dst++ = c;
2516               }
2517             else if (c == '\n'
2518                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2519               {
2520                 result = CODING_FINISH_INCONSISTENT_EOL;
2521                 goto label_end_of_loop_2;
2522               }
2523             else
2524               {
2525                 *dst++ = c;
2526                 if (BASE_LEADING_CODE_P (c))
2527                   coding->fake_multibyte = 1;
2528               }
2529             continue;
2530
2531           label_end_of_loop:
2532             result = CODING_FINISH_INSUFFICIENT_SRC;
2533           label_end_of_loop_2:
2534             src = src_base;
2535             break;
2536           }
2537         if (result == CODING_FINISH_NORMAL
2538             && src < src_end)
2539           result = CODING_FINISH_INSUFFICIENT_DST;
2540       }
2541       break;
2542
2543     case CODING_EOL_CR:
2544       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2545         {
2546           while (src < src_end)
2547             {
2548               if ((c = *src++) == '\n')
2549                 break;
2550               if (BASE_LEADING_CODE_P (c))
2551                 coding->fake_multibyte = 1;
2552             }
2553           if (*--src == '\n')
2554             {
2555               src_bytes = src - source;
2556               result = CODING_FINISH_INCONSISTENT_EOL;
2557             }
2558         }
2559       if (dst_bytes && src_bytes > dst_bytes)
2560         {
2561           result = CODING_FINISH_INSUFFICIENT_DST;
2562           src_bytes = dst_bytes;
2563         }
2564       if (dst_bytes)
2565         bcopy (source, destination, src_bytes);
2566       else
2567         safe_bcopy (source, destination, src_bytes);
2568       src = source + src_bytes;
2569       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2570       break;
2571
2572     default:                    /* i.e. case: CODING_EOL_LF */
2573       if (dst_bytes && src_bytes > dst_bytes)
2574         {
2575           result = CODING_FINISH_INSUFFICIENT_DST;
2576           src_bytes = dst_bytes;
2577         }
2578       if (dst_bytes)
2579         bcopy (source, destination, src_bytes);
2580       else
2581         safe_bcopy (source, destination, src_bytes);
2582       src += src_bytes;
2583       dst += dst_bytes;
2584       coding->fake_multibyte = 1;
2585       break;
2586     }
2587
2588   coding->consumed = coding->consumed_char = src - source;
2589   coding->produced = coding->produced_char = dst - destination;
2590   return result;
2591 }
2592
2593 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2594    format of end-of-line according to `coding->eol_type'.  If
2595    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2596    '\r' in source text also means end-of-line.  */
2597
2598 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2599      struct coding_system *coding;
2600      unsigned char *source, *destination;
2601      int src_bytes, dst_bytes;
2602 {
2603   unsigned char *src = source;
2604   unsigned char *dst = destination;
2605   int result = CODING_FINISH_NORMAL;
2606
2607   coding->fake_multibyte = 0;
2608
2609   if (coding->eol_type == CODING_EOL_CRLF)
2610     {
2611       unsigned char c;
2612       unsigned char *src_end = source + src_bytes;
2613       unsigned char *dst_end = destination + dst_bytes;
2614       /* Since the maximum bytes produced by each loop is 2, we
2615          subtract 1 from DST_END to assure overflow checking is
2616          necessary only at the head of loop.  */
2617       unsigned char *adjusted_dst_end = dst_end - 1;
2618
2619       while (src < src_end && (dst_bytes
2620                                ? (dst < adjusted_dst_end)
2621                                : (dst < src - 1)))
2622         {
2623           c = *src++;
2624           if (c == '\n'
2625               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2626             *dst++ = '\r', *dst++ = '\n';
2627           else
2628             {
2629               *dst++ = c;
2630               if (BASE_LEADING_CODE_P (c))
2631                 coding->fake_multibyte = 1;
2632             }
2633         }
2634       if (src < src_end)
2635         result = CODING_FINISH_INSUFFICIENT_DST;
2636     }
2637   else
2638     {
2639       unsigned char c;
2640
2641       if (dst_bytes && src_bytes > dst_bytes)
2642         {
2643           src_bytes = dst_bytes;
2644           result = CODING_FINISH_INSUFFICIENT_DST;
2645         }
2646       if (dst_bytes)
2647         bcopy (source, destination, src_bytes);
2648       else
2649         {
2650           safe_bcopy (source, destination, src_bytes);
2651           dst_bytes = src_bytes;
2652         }
2653       if (coding->eol_type == CODING_EOL_CRLF)
2654         {
2655           while (src_bytes--)
2656             {
2657               if ((c = *dst++) == '\n')
2658                 dst[-1] = '\r';
2659               else if (BASE_LEADING_CODE_P (c))
2660                   coding->fake_multibyte = 1;
2661             }
2662         }
2663       else
2664         {
2665           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2666             {
2667               while (src_bytes--)
2668                 if (*dst++ == '\r') dst[-1] = '\n';
2669             }
2670           coding->fake_multibyte = 1;
2671         }
2672       src = source + dst_bytes;
2673       dst = destination + dst_bytes;
2674     }
2675
2676   coding->consumed = coding->consumed_char = src - source;
2677   coding->produced = coding->produced_char = dst - destination;
2678   return result;
2679 }
2680
2681 \f
2682 /*** 6. C library functions ***/
2683
2684 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2685    has a property `coding-system'.  The value of this property is a
2686    vector of length 5 (called as coding-vector).  Among elements of
2687    this vector, the first (element[0]) and the fifth (element[4])
2688    carry important information for decoding/encoding.  Before
2689    decoding/encoding, this information should be set in fields of a
2690    structure of type `coding_system'.
2691
2692    A value of property `coding-system' can be a symbol of another
2693    subsidiary coding-system.  In that case, Emacs gets coding-vector
2694    from that symbol.
2695
2696    `element[0]' contains information to be set in `coding->type'.  The
2697    value and its meaning is as follows:
2698
2699    0 -- coding_type_emacs_mule
2700    1 -- coding_type_sjis
2701    2 -- coding_type_iso2022
2702    3 -- coding_type_big5
2703    4 -- coding_type_ccl encoder/decoder written in CCL
2704    nil -- coding_type_no_conversion
2705    t -- coding_type_undecided (automatic conversion on decoding,
2706                                no-conversion on encoding)
2707
2708    `element[4]' contains information to be set in `coding->flags' and
2709    `coding->spec'.  The meaning varies by `coding->type'.
2710
2711    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2712    of length 32 (of which the first 13 sub-elements are used now).
2713    Meanings of these sub-elements are:
2714
2715    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2716         If the value is an integer of valid charset, the charset is
2717         assumed to be designated to graphic register N initially.
2718
2719         If the value is minus, it is a minus value of charset which
2720         reserves graphic register N, which means that the charset is
2721         not designated initially but should be designated to graphic
2722         register N just before encoding a character in that charset.
2723
2724         If the value is nil, graphic register N is never used on
2725         encoding.
2726
2727    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2728         Each value takes t or nil.  See the section ISO2022 of
2729         `coding.h' for more information.
2730
2731    If `coding->type' is `coding_type_big5', element[4] is t to denote
2732    BIG5-ETen or nil to denote BIG5-HKU.
2733
2734    If `coding->type' takes the other value, element[4] is ignored.
2735
2736    Emacs Lisp's coding system also carries information about format of
2737    end-of-line in a value of property `eol-type'.  If the value is
2738    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2739    means CODING_EOL_CR.  If it is not integer, it should be a vector
2740    of subsidiary coding systems of which property `eol-type' has one
2741    of above values.
2742
2743 */
2744
2745 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2746    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2747    is setup so that no conversion is necessary and return -1, else
2748    return 0.  */
2749
2750 int
2751 setup_coding_system (coding_system, coding)
2752      Lisp_Object coding_system;
2753      struct coding_system *coding;
2754 {
2755   Lisp_Object coding_spec, coding_type, eol_type, plist;
2756   Lisp_Object val;
2757   int i;
2758
2759   /* Initialize some fields required for all kinds of coding systems.  */
2760   coding->symbol = coding_system;
2761   coding->common_flags = 0;
2762   coding->mode = 0;
2763   coding->heading_ascii = -1;
2764   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2765   coding_spec = Fget (coding_system, Qcoding_system);
2766   if (!VECTORP (coding_spec)
2767       || XVECTOR (coding_spec)->size != 5
2768       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2769     goto label_invalid_coding_system;
2770
2771   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2772   if (VECTORP (eol_type))
2773     {
2774       coding->eol_type = CODING_EOL_UNDECIDED;
2775       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2776     }
2777   else if (XFASTINT (eol_type) == 1)
2778     {
2779       coding->eol_type = CODING_EOL_CRLF;
2780       coding->common_flags
2781         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2782     }
2783   else if (XFASTINT (eol_type) == 2)
2784     {
2785       coding->eol_type = CODING_EOL_CR;
2786       coding->common_flags
2787         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2788     }
2789   else
2790     coding->eol_type = CODING_EOL_LF;
2791
2792   coding_type = XVECTOR (coding_spec)->contents[0];
2793   /* Try short cut.  */
2794   if (SYMBOLP (coding_type))
2795     {
2796       if (EQ (coding_type, Qt))
2797         {
2798           coding->type = coding_type_undecided;
2799           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2800         }
2801       else
2802         coding->type = coding_type_no_conversion;
2803       return 0;
2804     }
2805
2806   /* Initialize remaining fields.  */
2807   coding->composing = 0;
2808   coding->character_unification_table_for_decode = Qnil;
2809   coding->character_unification_table_for_encode = Qnil;
2810
2811   /* Get values of coding system properties:
2812      `post-read-conversion', `pre-write-conversion',
2813      `character-unification-table-for-decode',
2814      `character-unification-table-for-encode'.  */
2815   plist = XVECTOR (coding_spec)->contents[3];
2816   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2817   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2818   val = Fplist_get (plist, Qcharacter_unification_table_for_decode);
2819   if (SYMBOLP (val))
2820     val = Fget (val, Qcharacter_unification_table_for_decode);
2821   coding->character_unification_table_for_decode
2822     = CHAR_TABLE_P (val) ? val : Qnil;
2823   val = Fplist_get (plist, Qcharacter_unification_table_for_encode);
2824   if (SYMBOLP (val))
2825     val = Fget (val, Qcharacter_unification_table_for_encode);
2826   coding->character_unification_table_for_encode
2827     = CHAR_TABLE_P (val) ? val : Qnil;
2828   val = Fplist_get (plist, Qcoding_category);
2829   if (!NILP (val))
2830     {
2831       val = Fget (val, Qcoding_category_index);
2832       if (INTEGERP (val))
2833         coding->category_idx = XINT (val);
2834       else
2835         goto label_invalid_coding_system;
2836     }
2837   else
2838     goto label_invalid_coding_system;
2839
2840   val = Fplist_get (plist, Qsafe_charsets);
2841   if (EQ (val, Qt))
2842     {
2843       for (i = 0; i <= MAX_CHARSET; i++)
2844         coding->safe_charsets[i] = 1;
2845     }
2846   else
2847     {
2848       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2849       while (CONSP (val))
2850         {
2851           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2852             coding->safe_charsets[i] = 1;
2853           val = XCONS (val)->cdr;
2854         }
2855     }
2856
2857   switch (XFASTINT (coding_type))
2858     {
2859     case 0:
2860       coding->type = coding_type_emacs_mule;
2861       if (!NILP (coding->post_read_conversion))
2862         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2863       if (!NILP (coding->pre_write_conversion))
2864         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2865       break;
2866
2867     case 1:
2868       coding->type = coding_type_sjis;
2869       coding->common_flags
2870         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2871       break;
2872
2873     case 2:
2874       coding->type = coding_type_iso2022;
2875       coding->common_flags
2876         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2877       {
2878         Lisp_Object val, temp;
2879         Lisp_Object *flags;
2880         int i, charset, reg_bits = 0;
2881
2882         val = XVECTOR (coding_spec)->contents[4];
2883
2884         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2885           goto label_invalid_coding_system;
2886
2887         flags = XVECTOR (val)->contents;
2888         coding->flags
2889           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2890              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2891              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2892              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2893              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2894              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2895              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2896              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2897              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2898              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2899              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2900              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2901              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2902              );
2903
2904         /* Invoke graphic register 0 to plane 0.  */
2905         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2906         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2907         CODING_SPEC_ISO_INVOCATION (coding, 1)
2908           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2909         /* Not single shifting at first.  */
2910         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2911         /* Beginning of buffer should also be regarded as bol. */
2912         CODING_SPEC_ISO_BOL (coding) = 1;
2913
2914         for (charset = 0; charset <= MAX_CHARSET; charset++)
2915           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2916         val = Vcharset_revision_alist;
2917         while (CONSP (val))
2918           {
2919             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2920             if (charset >= 0
2921                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2922                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2923               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2924             val = XCONS (val)->cdr;
2925           }
2926
2927         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2928            FLAGS[REG] can be one of below:
2929                 integer CHARSET: CHARSET occupies register I,
2930                 t: designate nothing to REG initially, but can be used
2931                   by any charsets,
2932                 list of integer, nil, or t: designate the first
2933                   element (if integer) to REG initially, the remaining
2934                   elements (if integer) is designated to REG on request,
2935                   if an element is t, REG can be used by any charsets,
2936                 nil: REG is never used.  */
2937         for (charset = 0; charset <= MAX_CHARSET; charset++)
2938           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2939             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2940         for (i = 0; i < 4; i++)
2941           {
2942             if (INTEGERP (flags[i])
2943                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2944                 || (charset = get_charset_id (flags[i])) >= 0)
2945               {
2946                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2947                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2948               }
2949             else if (EQ (flags[i], Qt))
2950               {
2951                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2952                 reg_bits |= 1 << i;
2953                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2954               }
2955             else if (CONSP (flags[i]))
2956               {
2957                 Lisp_Object tail = flags[i];
2958
2959                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2960                 if (INTEGERP (XCONS (tail)->car)
2961                     && (charset = XINT (XCONS (tail)->car),
2962                         CHARSET_VALID_P (charset))
2963                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2964                   {
2965                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2966                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2967                   }
2968                 else
2969                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2970                 tail = XCONS (tail)->cdr;
2971                 while (CONSP (tail))
2972                   {
2973                     if (INTEGERP (XCONS (tail)->car)
2974                         && (charset = XINT (XCONS (tail)->car),
2975                             CHARSET_VALID_P (charset))
2976                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2977                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2978                         = i;
2979                     else if (EQ (XCONS (tail)->car, Qt))
2980                       reg_bits |= 1 << i;
2981                     tail = XCONS (tail)->cdr;
2982                   }
2983               }
2984             else
2985               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2986
2987             CODING_SPEC_ISO_DESIGNATION (coding, i)
2988               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
2989           }
2990
2991         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
2992           {
2993             /* REG 1 can be used only by locking shift in 7-bit env.  */
2994             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
2995               reg_bits &= ~2;
2996             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
2997               /* Without any shifting, only REG 0 and 1 can be used.  */
2998               reg_bits &= 3;
2999           }
3000
3001         if (reg_bits)
3002           for (charset = 0; charset <= MAX_CHARSET; charset++)
3003             {
3004               if (CHARSET_VALID_P (charset))
3005                 {
3006                   /* There exist some default graphic registers to be
3007                      used CHARSET.  */
3008
3009                   /* We had better avoid designating a charset of
3010                      CHARS96 to REG 0 as far as possible.  */
3011                   if (CHARSET_CHARS (charset) == 96)
3012                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3013                       = (reg_bits & 2
3014                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3015                   else
3016                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3017                       = (reg_bits & 1
3018                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3019                 }
3020             }
3021       }
3022       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3023       coding->spec.iso2022.last_invalid_designation_register = -1;
3024       break;
3025
3026     case 3:
3027       coding->type = coding_type_big5;
3028       coding->common_flags
3029         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3030       coding->flags
3031         = (NILP (XVECTOR (coding_spec)->contents[4])
3032            ? CODING_FLAG_BIG5_HKU
3033            : CODING_FLAG_BIG5_ETEN);
3034       break;
3035
3036     case 4:
3037       coding->type = coding_type_ccl;
3038       coding->common_flags
3039         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3040       {
3041         Lisp_Object val = XVECTOR (coding_spec)->contents[4];
3042         if (CONSP  (val)
3043             && VECTORP (XCONS (val)->car)
3044             && VECTORP (XCONS (val)->cdr))
3045           {
3046             setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car);
3047             setup_ccl_program (&(coding->spec.ccl.encoder), XCONS (val)->cdr);
3048           }
3049         else
3050           goto label_invalid_coding_system;
3051       }
3052       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3053       break;
3054
3055     case 5:
3056       coding->type = coding_type_raw_text;
3057       break;
3058
3059     default:
3060       goto label_invalid_coding_system;
3061     }
3062   return 0;
3063
3064  label_invalid_coding_system:
3065   coding->type = coding_type_no_conversion;
3066   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3067   coding->common_flags = 0;
3068   coding->eol_type = CODING_EOL_LF;
3069   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3070   return -1;
3071 }
3072
3073 /* Emacs has a mechanism to automatically detect a coding system if it
3074    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3075    it's impossible to distinguish some coding systems accurately
3076    because they use the same range of codes.  So, at first, coding
3077    systems are categorized into 7, those are:
3078
3079    o coding-category-emacs-mule
3080
3081         The category for a coding system which has the same code range
3082         as Emacs' internal format.  Assigned the coding-system (Lisp
3083         symbol) `emacs-mule' by default.
3084
3085    o coding-category-sjis
3086
3087         The category for a coding system which has the same code range
3088         as SJIS.  Assigned the coding-system (Lisp
3089         symbol) `japanese-shift-jis' by default.
3090
3091    o coding-category-iso-7
3092
3093         The category for a coding system which has the same code range
3094         as ISO2022 of 7-bit environment.  This doesn't use any locking
3095         shift and single shift functions.  This can encode/decode all
3096         charsets.  Assigned the coding-system (Lisp symbol)
3097         `iso-2022-7bit' by default.
3098
3099    o coding-category-iso-7-tight
3100
3101         Same as coding-category-iso-7 except that this can
3102         encode/decode only the specified charsets.
3103
3104    o coding-category-iso-8-1
3105
3106         The category for a coding system which has the same code range
3107         as ISO2022 of 8-bit environment and graphic plane 1 used only
3108         for DIMENSION1 charset.  This doesn't use any locking shift
3109         and single shift functions.  Assigned the coding-system (Lisp
3110         symbol) `iso-latin-1' by default.
3111
3112    o coding-category-iso-8-2
3113
3114         The category for a coding system which has the same code range
3115         as ISO2022 of 8-bit environment and graphic plane 1 used only
3116         for DIMENSION2 charset.  This doesn't use any locking shift
3117         and single shift functions.  Assigned the coding-system (Lisp
3118         symbol) `japanese-iso-8bit' by default.
3119
3120    o coding-category-iso-7-else
3121
3122         The category for a coding system which has the same code range
3123         as ISO2022 of 7-bit environemnt but uses locking shift or
3124         single shift functions.  Assigned the coding-system (Lisp
3125         symbol) `iso-2022-7bit-lock' by default.
3126
3127    o coding-category-iso-8-else
3128
3129         The category for a coding system which has the same code range
3130         as ISO2022 of 8-bit environemnt but uses locking shift or
3131         single shift functions.  Assigned the coding-system (Lisp
3132         symbol) `iso-2022-8bit-ss2' by default.
3133
3134    o coding-category-big5
3135
3136         The category for a coding system which has the same code range
3137         as BIG5.  Assigned the coding-system (Lisp symbol)
3138         `cn-big5' by default.
3139
3140    o coding-category-binary
3141
3142         The category for a coding system not categorized in any of the
3143         above.  Assigned the coding-system (Lisp symbol)
3144         `no-conversion' by default.
3145
3146    Each of them is a Lisp symbol and the value is an actual
3147    `coding-system's (this is also a Lisp symbol) assigned by a user.
3148    What Emacs does actually is to detect a category of coding system.
3149    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3150    decide only one possible category, it selects a category of the
3151    highest priority.  Priorities of categories are also specified by a
3152    user in a Lisp variable `coding-category-list'.
3153
3154 */
3155
3156 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3157    If it detects possible coding systems, return an integer in which
3158    appropriate flag bits are set.  Flag bits are defined by macros
3159    CODING_CATEGORY_MASK_XXX in `coding.h'.
3160
3161    How many ASCII characters are at the head is returned as *SKIP.  */
3162
3163 static int
3164 detect_coding_mask (source, src_bytes, priorities, skip)
3165      unsigned char *source;
3166      int src_bytes, *priorities, *skip;
3167 {
3168   register unsigned char c;
3169   unsigned char *src = source, *src_end = source + src_bytes;
3170   unsigned int mask = (CODING_CATEGORY_MASK_ISO_7BIT
3171                        | CODING_CATEGORY_MASK_ISO_SHIFT);
3172   int i;
3173
3174   /* At first, skip all ASCII characters and control characters except
3175      for three ISO2022 specific control characters.  */
3176  label_loop_detect_coding:
3177   while (src < src_end)
3178     {
3179       c = *src;
3180       if (c >= 0x80
3181           || ((mask & CODING_CATEGORY_MASK_ISO_7BIT)
3182               && c == ISO_CODE_ESC)
3183           || ((mask & CODING_CATEGORY_MASK_ISO_SHIFT)
3184               && (c == ISO_CODE_SI || c == ISO_CODE_SO)))
3185         break;
3186       src++;
3187     }
3188   *skip = src - source;
3189
3190   if (src >= src_end)
3191     /* We found nothing other than ASCII.  There's nothing to do.  */
3192     return 0;
3193
3194   /* The text seems to be encoded in some multilingual coding system.
3195      Now, try to find in which coding system the text is encoded.  */
3196   if (c < 0x80)
3197     {
3198       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3199       /* C is an ISO2022 specific control code of C0.  */
3200       mask = detect_coding_iso2022 (src, src_end);
3201       if (mask == 0)
3202         {
3203           /* No valid ISO2022 code follows C.  Try again.  */
3204           src++;
3205           mask = (c != ISO_CODE_ESC
3206                   ? CODING_CATEGORY_MASK_ISO_7BIT
3207                   : CODING_CATEGORY_MASK_ISO_SHIFT);
3208           goto label_loop_detect_coding;
3209         }
3210       if (priorities)
3211         goto label_return_highest_only;
3212     }
3213   else
3214     {
3215       int try;
3216
3217       if (c < 0xA0)
3218         {
3219           /* C is the first byte of SJIS character code,
3220              or a leading-code of Emacs' internal format (emacs-mule).  */
3221           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3222
3223           /* Or, if C is a special latin extra code,
3224              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3225              or is an ISO2022 control-sequence-introducer (CSI),
3226              we should also consider the possibility of ISO2022 codings.  */
3227           if ((VECTORP (Vlatin_extra_code_table)
3228                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3229               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3230               || (c == ISO_CODE_CSI
3231                   && (src < src_end
3232                       && (*src == ']'
3233                           || ((*src == '0' || *src == '1' || *src == '2')
3234                               && src + 1 < src_end
3235                               && src[1] == ']')))))
3236             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3237                      | CODING_CATEGORY_MASK_ISO_8BIT);
3238         }
3239       else
3240         /* C is a character of ISO2022 in graphic plane right,
3241            or a SJIS's 1-byte character code (i.e. JISX0201),
3242            or the first byte of BIG5's 2-byte code.  */
3243         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3244                 | CODING_CATEGORY_MASK_ISO_8BIT
3245                 | CODING_CATEGORY_MASK_SJIS
3246                 | CODING_CATEGORY_MASK_BIG5);
3247
3248       mask = 0;
3249       if (priorities)
3250         {
3251           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3252             {
3253               priorities[i] &= try;
3254               if (priorities[i] & CODING_CATEGORY_MASK_ISO)
3255                 mask = detect_coding_iso2022 (src, src_end);
3256               else if (priorities[i] & CODING_CATEGORY_MASK_SJIS)
3257                 mask = detect_coding_sjis (src, src_end);
3258               else if (priorities[i] & CODING_CATEGORY_MASK_BIG5)
3259                 mask = detect_coding_big5 (src, src_end);
3260               else if (priorities[i] & CODING_CATEGORY_MASK_EMACS_MULE)
3261                 mask = detect_coding_emacs_mule (src, src_end);
3262               if (mask)
3263                 goto label_return_highest_only;
3264             }
3265           return CODING_CATEGORY_MASK_RAW_TEXT;
3266         }
3267       if (try & CODING_CATEGORY_MASK_ISO)
3268         mask |= detect_coding_iso2022 (src, src_end);
3269       if (try & CODING_CATEGORY_MASK_SJIS)
3270         mask |= detect_coding_sjis (src, src_end);
3271       if (try & CODING_CATEGORY_MASK_BIG5)
3272         mask |= detect_coding_big5 (src, src_end);
3273       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3274         mask |= detect_coding_emacs_mule (src, src_end);
3275     }
3276   return (mask | CODING_CATEGORY_MASK_RAW_TEXT);
3277
3278  label_return_highest_only:
3279   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3280     {
3281       if (mask & priorities[i])
3282         return priorities[i];
3283     }
3284   return CODING_CATEGORY_MASK_RAW_TEXT;
3285 }
3286
3287 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3288    The information of the detected coding system is set in CODING.  */
3289
3290 void
3291 detect_coding (coding, src, src_bytes)
3292      struct coding_system *coding;
3293      unsigned char *src;
3294      int src_bytes;
3295 {
3296   unsigned int idx;
3297   int skip, mask, i;
3298   int priorities[CODING_CATEGORY_IDX_MAX];
3299   Lisp_Object val = Vcoding_category_list;
3300
3301   i = 0;
3302   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
3303     {
3304       if (! SYMBOLP (XCONS (val)->car))
3305         break;
3306       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
3307       if (idx >= CODING_CATEGORY_IDX_MAX)
3308         break;
3309       priorities[i++] = (1 << idx);
3310       val = XCONS (val)->cdr;
3311     }
3312   /* If coding-category-list is valid and contains all coding
3313      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
3314      the following code saves Emacs from craching.  */
3315   while (i < CODING_CATEGORY_IDX_MAX)
3316     priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
3317
3318   mask = detect_coding_mask (src, src_bytes, priorities, &skip);
3319   coding->heading_ascii = skip;
3320
3321   if (!mask) return;
3322
3323   /* We found a single coding system of the highest priority in MASK.  */
3324   idx = 0;
3325   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3326   if (! mask)
3327     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3328
3329   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3330
3331   if (coding->eol_type != CODING_EOL_UNDECIDED)
3332     {
3333       Lisp_Object tmp = Fget (val, Qeol_type);
3334
3335       if (VECTORP (tmp))
3336         val = XVECTOR (tmp)->contents[coding->eol_type];
3337     }
3338   setup_coding_system (val, coding);
3339   /* Set this again because setup_coding_system reset this member.  */
3340   coding->heading_ascii = skip;
3341 }
3342
3343 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3344    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3345    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3346
3347    How many non-eol characters are at the head is returned as *SKIP.  */
3348
3349 #define MAX_EOL_CHECK_COUNT 3
3350
3351 static int
3352 detect_eol_type (source, src_bytes, skip)
3353      unsigned char *source;
3354      int src_bytes, *skip;
3355 {
3356   unsigned char *src = source, *src_end = src + src_bytes;
3357   unsigned char c;
3358   int total = 0;                /* How many end-of-lines are found so far.  */
3359   int eol_type = CODING_EOL_UNDECIDED;
3360   int this_eol_type;
3361
3362   *skip = 0;
3363
3364   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3365     {
3366       c = *src++;
3367       if (c == '\n' || c == '\r')
3368         {
3369           if (*skip == 0)
3370             *skip = src - 1 - source;
3371           total++;
3372           if (c == '\n')
3373             this_eol_type = CODING_EOL_LF;
3374           else if (src >= src_end || *src != '\n')
3375             this_eol_type = CODING_EOL_CR;
3376           else
3377             this_eol_type = CODING_EOL_CRLF, src++;
3378
3379           if (eol_type == CODING_EOL_UNDECIDED)
3380             /* This is the first end-of-line.  */
3381             eol_type = this_eol_type;
3382           else if (eol_type != this_eol_type)
3383             {
3384               /* The found type is different from what found before.  */
3385               eol_type = CODING_EOL_INCONSISTENT;
3386               break;
3387             }
3388         }
3389     }
3390
3391   if (*skip == 0)
3392     *skip = src_end - source;
3393   return eol_type;
3394 }
3395
3396 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3397    is encoded.  If it detects an appropriate format of end-of-line, it
3398    sets the information in *CODING.  */
3399
3400 void
3401 detect_eol (coding, src, src_bytes)
3402      struct coding_system *coding;
3403      unsigned char *src;
3404      int src_bytes;
3405 {
3406   Lisp_Object val;
3407   int skip;
3408   int eol_type = detect_eol_type (src, src_bytes, &skip);
3409
3410   if (coding->heading_ascii > skip)
3411     coding->heading_ascii = skip;
3412   else
3413     skip = coding->heading_ascii;
3414
3415   if (eol_type == CODING_EOL_UNDECIDED)
3416     return;
3417   if (eol_type == CODING_EOL_INCONSISTENT)
3418     {
3419 #if 0
3420       /* This code is suppressed until we find a better way to
3421          distinguish raw text file and binary file.  */
3422
3423       /* If we have already detected that the coding is raw-text, the
3424          coding should actually be no-conversion.  */
3425       if (coding->type == coding_type_raw_text)
3426         {
3427           setup_coding_system (Qno_conversion, coding);
3428           return;
3429         }
3430       /* Else, let's decode only text code anyway.  */
3431 #endif /* 0 */
3432       eol_type = CODING_EOL_LF;
3433     }
3434
3435   val = Fget (coding->symbol, Qeol_type);
3436   if (VECTORP (val) && XVECTOR (val)->size == 3)
3437     {
3438       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3439       coding->heading_ascii = skip;
3440     }
3441 }
3442
3443 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3444
3445 #define DECODING_BUFFER_MAG(coding)                                          \
3446   (coding->type == coding_type_iso2022                                       \
3447    ? 3                                                                       \
3448    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3449       ? 2                                                                    \
3450       : (coding->type == coding_type_raw_text                                \
3451          ? 1                                                                 \
3452          : (coding->type == coding_type_ccl                                  \
3453             ? coding->spec.ccl.decoder.buf_magnification                     \
3454             : 2))))
3455
3456 /* Return maximum size (bytes) of a buffer enough for decoding
3457    SRC_BYTES of text encoded in CODING.  */
3458
3459 int
3460 decoding_buffer_size (coding, src_bytes)
3461      struct coding_system *coding;
3462      int src_bytes;
3463 {
3464   return (src_bytes * DECODING_BUFFER_MAG (coding)
3465           + CONVERSION_BUFFER_EXTRA_ROOM);
3466 }
3467
3468 /* Return maximum size (bytes) of a buffer enough for encoding
3469    SRC_BYTES of text to CODING.  */
3470
3471 int
3472 encoding_buffer_size (coding, src_bytes)
3473      struct coding_system *coding;
3474      int src_bytes;
3475 {
3476   int magnification;
3477
3478   if (coding->type == coding_type_ccl)
3479     magnification = coding->spec.ccl.encoder.buf_magnification;
3480   else
3481     magnification = 3;
3482
3483   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3484 }
3485
3486 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3487 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3488 #endif
3489
3490 char *conversion_buffer;
3491 int conversion_buffer_size;
3492
3493 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3494    or decoding.  Sufficient memory is allocated automatically.  If we
3495    run out of memory, return NULL.  */
3496
3497 char *
3498 get_conversion_buffer (size)
3499      int size;
3500 {
3501   if (size > conversion_buffer_size)
3502     {
3503       char *buf;
3504       int real_size = conversion_buffer_size * 2;
3505
3506       while (real_size < size) real_size *= 2;
3507       buf = (char *) xmalloc (real_size);
3508       xfree (conversion_buffer);
3509       conversion_buffer = buf;
3510       conversion_buffer_size = real_size;
3511     }
3512   return conversion_buffer;
3513 }
3514
3515 int
3516 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3517      struct coding_system *coding;
3518      unsigned char *source, *destination;
3519      int src_bytes, dst_bytes, encodep;
3520 {
3521   struct ccl_program *ccl
3522     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3523   int result;
3524
3525   coding->produced = ccl_driver (ccl, source, destination,
3526                                  src_bytes, dst_bytes, &(coding->consumed));
3527   if (encodep)
3528     {
3529       coding->produced_char = coding->produced;
3530       coding->consumed_char
3531         = multibyte_chars_in_text (source, coding->consumed);
3532     }
3533   else
3534     {
3535       coding->produced_char
3536         = multibyte_chars_in_text (destination, coding->produced);
3537       coding->consumed_char = coding->consumed;
3538     }
3539   switch (ccl->status)
3540     {
3541     case CCL_STAT_SUSPEND_BY_SRC:
3542       result = CODING_FINISH_INSUFFICIENT_SRC;
3543       break;
3544     case CCL_STAT_SUSPEND_BY_DST:
3545       result = CODING_FINISH_INSUFFICIENT_DST;
3546       break;
3547     default:
3548       result = CODING_FINISH_NORMAL;
3549       break;
3550     }
3551   return result;
3552 }
3553
3554 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3555    decoding, it may detect coding system and format of end-of-line if
3556    those are not yet decided.  */
3557
3558 int
3559 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3560      struct coding_system *coding;
3561      unsigned char *source, *destination;
3562      int src_bytes, dst_bytes;
3563 {
3564   int result;
3565
3566   if (src_bytes <= 0)
3567     {
3568       coding->produced = coding->produced_char = 0;
3569       coding->consumed = coding->consumed_char = 0;
3570       coding->fake_multibyte = 0;
3571       return CODING_FINISH_NORMAL;
3572     }
3573
3574   if (coding->type == coding_type_undecided)
3575     detect_coding (coding, source, src_bytes);
3576
3577   if (coding->eol_type == CODING_EOL_UNDECIDED)
3578     detect_eol (coding, source, src_bytes);
3579
3580   switch (coding->type)
3581     {
3582     case coding_type_emacs_mule:
3583     case coding_type_undecided:
3584     case coding_type_raw_text:
3585       if (coding->eol_type == CODING_EOL_LF
3586           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3587         goto label_no_conversion;
3588       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3589       break;
3590
3591     case coding_type_sjis:
3592       result = decode_coding_sjis_big5 (coding, source, destination,
3593                                         src_bytes, dst_bytes, 1);
3594       break;
3595
3596     case coding_type_iso2022:
3597       result = decode_coding_iso2022 (coding, source, destination,
3598                                       src_bytes, dst_bytes);
3599       break;
3600
3601     case coding_type_big5:
3602       result = decode_coding_sjis_big5 (coding, source, destination,
3603                                         src_bytes, dst_bytes, 0);
3604       break;
3605
3606     case coding_type_ccl:
3607       result = ccl_coding_driver (coding, source, destination,
3608                                   src_bytes, dst_bytes, 0);
3609       break;
3610
3611     default:                    /* i.e. case coding_type_no_conversion: */
3612     label_no_conversion:
3613       if (dst_bytes && src_bytes > dst_bytes)
3614         {
3615           coding->produced = dst_bytes;
3616           result = CODING_FINISH_INSUFFICIENT_DST;
3617         }
3618       else
3619         {
3620           coding->produced = src_bytes;
3621           result = CODING_FINISH_NORMAL;
3622         }
3623       if (dst_bytes)
3624         bcopy (source, destination, coding->produced);
3625       else
3626         safe_bcopy (source, destination, coding->produced);
3627       coding->fake_multibyte = 1;
3628       coding->consumed
3629         = coding->consumed_char = coding->produced_char = coding->produced;
3630       break;
3631     }
3632
3633   return result;
3634 }
3635
3636 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3637
3638 int
3639 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3640      struct coding_system *coding;
3641      unsigned char *source, *destination;
3642      int src_bytes, dst_bytes;
3643 {
3644   int result;
3645
3646   if (src_bytes <= 0)
3647     {
3648       coding->produced = coding->produced_char = 0;
3649       coding->consumed = coding->consumed_char = 0;
3650       coding->fake_multibyte = 0;
3651       return CODING_FINISH_NORMAL;
3652     }
3653
3654   switch (coding->type)
3655     {
3656     case coding_type_emacs_mule:
3657     case coding_type_undecided:
3658     case coding_type_raw_text:
3659       if (coding->eol_type == CODING_EOL_LF
3660           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3661         goto label_no_conversion;
3662       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3663       break;
3664
3665     case coding_type_sjis:
3666       result = encode_coding_sjis_big5 (coding, source, destination,
3667                                         src_bytes, dst_bytes, 1);
3668       break;
3669
3670     case coding_type_iso2022:
3671       result = encode_coding_iso2022 (coding, source, destination,
3672                                       src_bytes, dst_bytes);
3673       break;
3674
3675     case coding_type_big5:
3676       result = encode_coding_sjis_big5 (coding, source, destination,
3677                                         src_bytes, dst_bytes, 0);
3678       break;
3679
3680     case coding_type_ccl:
3681       result = ccl_coding_driver (coding, source, destination,
3682                                   src_bytes, dst_bytes, 1);
3683       break;
3684
3685     default:                    /* i.e. case coding_type_no_conversion: */
3686     label_no_conversion:
3687       if (dst_bytes && src_bytes > dst_bytes)
3688         {
3689           coding->produced = dst_bytes;
3690           result = CODING_FINISH_INSUFFICIENT_DST;
3691         }
3692       else
3693         {
3694           coding->produced = src_bytes;
3695           result = CODING_FINISH_NORMAL;
3696         }
3697       if (dst_bytes)
3698         bcopy (source, destination, coding->produced);
3699       else
3700         safe_bcopy (source, destination, coding->produced);
3701       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3702         {
3703           unsigned char *p = destination, *pend = p + coding->produced;
3704           while (p < pend)
3705             if (*p++ == '\015') p[-1] = '\n';
3706         }
3707       coding->fake_multibyte = 1;
3708       coding->consumed
3709         = coding->consumed_char = coding->produced_char = coding->produced;
3710       break;
3711     }
3712
3713   return result;
3714 }
3715
3716 /* Scan text in the region between *BEG and *END (byte positions),
3717    skip characters which we don't have to decode by coding system
3718    CODING at the head and tail, then set *BEG and *END to the region
3719    of the text we actually have to convert.  The caller should move
3720    the gap out of the region in advance.
3721
3722    If STR is not NULL, *BEG and *END are indices into STR.  */
3723
3724 static void
3725 shrink_decoding_region (beg, end, coding, str)
3726      int *beg, *end;
3727      struct coding_system *coding;
3728      unsigned char *str;
3729 {
3730   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3731   int eol_conversion;
3732
3733   if (coding->type == coding_type_ccl
3734       || coding->type == coding_type_undecided
3735       || !NILP (coding->post_read_conversion))
3736     {
3737       /* We can't skip any data.  */
3738       return;
3739     }
3740   else if (coding->type == coding_type_no_conversion)
3741     {
3742       /* We need no conversion, but don't have to skip any data here.
3743          Decoding routine handles them effectively anyway.  */
3744       return;
3745     }
3746
3747   if (coding->heading_ascii >= 0)
3748     /* Detection routine has already found how much we can skip at the
3749        head.  */
3750     *beg += coding->heading_ascii;
3751
3752   if (str)
3753     {
3754       begp_orig = begp = str + *beg;
3755       endp_orig = endp = str + *end;
3756     }
3757   else
3758     {
3759       begp_orig = begp = BYTE_POS_ADDR (*beg);
3760       endp_orig = endp = begp + *end - *beg;
3761     }
3762
3763   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3764
3765   switch (coding->type)
3766     {
3767     case coding_type_emacs_mule:
3768     case coding_type_raw_text:
3769       if (eol_conversion)
3770         {
3771           if (coding->heading_ascii < 0)
3772             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3773           while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80)
3774             endp--;
3775         }
3776       else
3777         begp = endp;
3778       break;
3779
3780     case coding_type_sjis:
3781     case coding_type_big5:
3782       /* We can skip all ASCII characters at the head.  */
3783       if (coding->heading_ascii < 0)
3784         {
3785           if (eol_conversion)
3786             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3787           else
3788             while (begp < endp && *begp < 0x80) begp++;
3789         }
3790       /* We can skip all ASCII characters at the tail except for the
3791          second byte of SJIS or BIG5 code.  */
3792       if (eol_conversion)
3793         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3794       else
3795         while (begp < endp && endp[-1] < 0x80) endp--;
3796       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3797         endp++;
3798       break;
3799
3800     default:            /* i.e. case coding_type_iso2022: */
3801       if (coding->heading_ascii < 0)
3802         {
3803           /* We can skip all ASCII characters at the head except for a
3804              few control codes.  */
3805           while (begp < endp && (c = *begp) < 0x80
3806                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3807                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3808                  && (!eol_conversion || c != ISO_CODE_LF))
3809             begp++;
3810         }
3811       switch (coding->category_idx)
3812         {
3813         case CODING_CATEGORY_IDX_ISO_8_1:
3814         case CODING_CATEGORY_IDX_ISO_8_2:
3815           /* We can skip all ASCII characters at the tail.  */
3816           if (eol_conversion)
3817             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3818           else
3819             while (begp < endp && endp[-1] < 0x80) endp--;
3820           break;
3821
3822         case CODING_CATEGORY_IDX_ISO_7:
3823         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3824           /* We can skip all charactes at the tail except for ESC and
3825              the following 2-byte at the tail.  */
3826           if (eol_conversion)
3827             while (begp < endp
3828                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3829               endp--;
3830           else
3831             while (begp < endp
3832                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3833               endp--;
3834           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3835             {
3836               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3837                 /* This is an ASCII designation sequence.  We can
3838                     surely skip the tail.  */
3839                 endp += 2;
3840               else
3841                 /* Hmmm, we can't skip the tail.  */
3842                 endp = endp_orig;
3843             }
3844         }
3845     }
3846   *beg += begp - begp_orig;
3847   *end += endp - endp_orig;
3848   return;
3849 }
3850
3851 /* Like shrink_decoding_region but for encoding.  */
3852
3853 static void
3854 shrink_encoding_region (beg, end, coding, str)
3855      int *beg, *end;
3856      struct coding_system *coding;
3857      unsigned char *str;
3858 {
3859   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3860   int eol_conversion;
3861
3862   if (coding->type == coding_type_ccl)
3863     /* We can't skip any data.  */
3864     return;
3865   else if (coding->type == coding_type_no_conversion)
3866     {
3867       /* We need no conversion.  */
3868       *beg = *end;
3869       return;
3870     }
3871
3872   if (str)
3873     {
3874       begp_orig = begp = str + *beg;
3875       endp_orig = endp = str + *end;
3876     }
3877   else
3878     {
3879       begp_orig = begp = BYTE_POS_ADDR (*beg);
3880       endp_orig = endp = begp + *end - *beg;
3881     }
3882
3883   eol_conversion = (coding->eol_type == CODING_EOL_CR
3884                     || coding->eol_type == CODING_EOL_CRLF);
3885
3886   /* Here, we don't have to check coding->pre_write_conversion because
3887      the caller is expected to have handled it already.  */
3888   switch (coding->type)
3889     {
3890     case coding_type_undecided:
3891     case coding_type_emacs_mule:
3892     case coding_type_raw_text:
3893       if (eol_conversion)
3894         {
3895           while (begp < endp && *begp != '\n') begp++;
3896           while (begp < endp && endp[-1] != '\n') endp--;
3897         }
3898       else
3899         begp = endp;
3900       break;
3901
3902     case coding_type_iso2022:
3903       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3904         {
3905           unsigned char *bol = begp;
3906           while (begp < endp && *begp < 0x80)
3907             {
3908               begp++;
3909               if (begp[-1] == '\n')
3910                 bol = begp;
3911             }
3912           begp = bol;
3913           goto label_skip_tail;
3914         }
3915       /* fall down ... */
3916
3917     default:
3918       /* We can skip all ASCII characters at the head and tail.  */
3919       if (eol_conversion)
3920         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
3921       else
3922         while (begp < endp && *begp < 0x80) begp++;
3923     label_skip_tail:
3924       if (eol_conversion)
3925         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3926       else
3927         while (begp < endp && *(endp - 1) < 0x80) endp--;
3928       break;
3929     }
3930
3931   *beg += begp - begp_orig;
3932   *end += endp - endp_orig;
3933   return;
3934 }
3935
3936 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
3937    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
3938    coding system CODING, and return the status code of code conversion
3939    (currently, this value has no meaning).
3940
3941    How many characters (and bytes) are converted to how many
3942    characters (and bytes) are recorded in members of the structure
3943    CODING.
3944
3945    If REPLACE is nonzero, we do various things as if the original text
3946    is deleted and a new text is inserted.  See the comments in
3947    replace_range (insdel.c) to know what we are doing.  */
3948
3949 int
3950 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
3951      int from, from_byte, to, to_byte, encodep, replace;
3952      struct coding_system *coding;
3953 {
3954   int len = to - from, len_byte = to_byte - from_byte;
3955   int require, inserted, inserted_byte;
3956   int head_skip, tail_skip, total_skip;
3957   Lisp_Object saved_coding_symbol = Qnil;
3958   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
3959   int first = 1;
3960   int fake_multibyte = 0;
3961   unsigned char *src, *dst;
3962   Lisp_Object deletion = Qnil;
3963
3964   if (replace)
3965     {
3966       int saved_from = from;
3967
3968       prepare_to_modify_buffer (from, to, &from);
3969       if (saved_from != from)
3970         {
3971           to = from + len;
3972           if (multibyte)
3973             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
3974           else
3975             from_byte = from, to_byte = to;
3976           len_byte = to_byte - from_byte;
3977         }
3978     }
3979
3980   if (! encodep && CODING_REQUIRE_DETECTION (coding))
3981     {
3982       /* We must detect encoding of text and eol format.  */
3983
3984       if (from < GPT && to > GPT)
3985         move_gap_both (from, from_byte);
3986       if (coding->type == coding_type_undecided)
3987         {
3988           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
3989           if (coding->type == coding_type_undecided)
3990             /* It seems that the text contains only ASCII, but we
3991                should not left it undecided because the deeper
3992                decoding routine (decode_coding) tries to detect the
3993                encodings again in vain.  */
3994             coding->type = coding_type_emacs_mule;
3995         }
3996       if (coding->eol_type == CODING_EOL_UNDECIDED)
3997         {
3998           saved_coding_symbol = coding->symbol;
3999           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4000           if (coding->eol_type == CODING_EOL_UNDECIDED)
4001             coding->eol_type = CODING_EOL_LF;
4002           /* We had better recover the original eol format if we
4003              encounter an inconsitent eol format while decoding.  */
4004           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4005         }
4006     }
4007
4008   coding->consumed_char = len, coding->consumed = len_byte;
4009
4010   if (encodep
4011       ? ! CODING_REQUIRE_ENCODING (coding)
4012       : ! CODING_REQUIRE_DECODING (coding))
4013     {
4014       coding->produced = len_byte;
4015       if (multibyte
4016           && ! replace
4017           /* See the comment of the member heading_ascii in coding.h.  */
4018           && coding->heading_ascii < len_byte)
4019         {
4020           /* We still may have to combine byte at the head and the
4021              tail of the text in the region.  */
4022           if (from < GPT && GPT < to)
4023             move_gap_both (to, to_byte);
4024           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4025           adjust_after_insert (from, from_byte, to, to_byte, len);
4026           coding->produced_char = len;
4027         }
4028       else
4029         coding->produced_char = len_byte;
4030       return 0;
4031     }
4032
4033   /* Now we convert the text.  */
4034
4035   /* For encoding, we must process pre-write-conversion in advance.  */
4036   if (encodep
4037       && ! NILP (coding->pre_write_conversion)
4038       && SYMBOLP (coding->pre_write_conversion)
4039       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4040     {
4041       /* The function in pre-write-conversion may put a new text in a
4042          new buffer.  */
4043       struct buffer *prev = current_buffer, *new;
4044
4045       call2 (coding->pre_write_conversion, from, to);
4046       if (current_buffer != prev)
4047         {
4048           len = ZV - BEGV;
4049           new = current_buffer;
4050           set_buffer_internal_1 (prev);
4051           del_range_2 (from, from_byte, to, to_byte);
4052           insert_from_buffer (new, BEG, len, 0);
4053           to = from + len;
4054           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4055           len_byte = to_byte - from_byte;
4056         }
4057     }
4058
4059   if (replace)
4060     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4061
4062   /* Try to skip the heading and tailing ASCIIs.  */
4063   {
4064     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4065
4066     if (from < GPT && GPT < to)
4067       move_gap_both (from, from_byte);
4068     if (encodep)
4069       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4070     else
4071       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4072     if (from_byte == to_byte)
4073       {
4074         coding->produced = len_byte;
4075         coding->produced_char = multibyte ? len : len_byte;
4076         if (!replace)
4077           /* We must record and adjust for this new text now.  */
4078           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4079         return 0;
4080       }
4081
4082     head_skip = from_byte - from_byte_orig;
4083     tail_skip = to_byte_orig - to_byte;
4084     total_skip = head_skip + tail_skip;
4085     from += head_skip;
4086     to -= tail_skip;
4087     len -= total_skip; len_byte -= total_skip;
4088   }
4089
4090   /* For converion, we must put the gap before the text in addition to
4091      making the gap larger for efficient decoding.  The required gap
4092      size starts from 2000 which is the magic number used in make_gap.
4093      But, after one batch of conversion, it will be incremented if we
4094      find that it is not enough .  */
4095   require = 2000;
4096
4097   if (GAP_SIZE  < require)
4098     make_gap (require - GAP_SIZE);
4099   move_gap_both (from, from_byte);
4100
4101   if (GPT - BEG < beg_unchanged)
4102     beg_unchanged = GPT - BEG;
4103   if (Z - GPT < end_unchanged)
4104     end_unchanged = Z - GPT;
4105
4106   inserted = inserted_byte = 0;
4107   src = GAP_END_ADDR, dst = GPT_ADDR;
4108
4109   GAP_SIZE += len_byte;
4110   ZV -= len;
4111   Z -= len;
4112   ZV_BYTE -= len_byte;
4113   Z_BYTE -= len_byte;
4114
4115   for (;;)
4116     {
4117       int result;
4118
4119       /* The buffer memory is changed from:
4120          +--------+converted-text+---------+-------original-text------+---+
4121          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4122                   |<------------------- GAP_SIZE -------------------->|  */
4123       if (encodep)
4124         result = encode_coding (coding, src, dst, len_byte, 0);
4125       else
4126         result = decode_coding (coding, src, dst, len_byte, 0);
4127       /* to:
4128          +--------+-------converted-text--------+--+---original-text--+---+
4129          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4130                   |<------------------- GAP_SIZE -------------------->|  */
4131       if (coding->fake_multibyte)
4132         fake_multibyte = 1;
4133
4134       if (!encodep && !multibyte)
4135         coding->produced_char = coding->produced;
4136       inserted += coding->produced_char;
4137       inserted_byte += coding->produced;
4138       len_byte -= coding->consumed;
4139       src += coding->consumed;
4140       dst += inserted_byte;
4141
4142       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4143         {
4144           unsigned char *pend = dst, *p = pend - inserted_byte;
4145
4146           /* Encode LFs back to the original eol format (CR or CRLF).  */
4147           if (coding->eol_type == CODING_EOL_CR)
4148             {
4149               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4150             }
4151           else
4152             {
4153               int count = 0;
4154
4155               while (p < pend) if (*p++ == '\n') count++;
4156               if (src - dst < count)
4157                 {
4158                   /* We don't have sufficient room for putting LFs
4159                      back to CRLF.  We must record converted and
4160                      not-yet-converted text back to the buffer
4161                      content, enlarge the gap, then record them out of
4162                      the buffer contents again.  */
4163                   int add = len_byte + inserted_byte;
4164
4165                   GAP_SIZE -= add;
4166                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4167                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4168                   make_gap (count - GAP_SIZE);
4169                   GAP_SIZE += add;
4170                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4171                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4172                   /* Don't forget to update SRC, DST, and PEND.  */
4173                   src = GAP_END_ADDR - len_byte;
4174                   dst = GPT_ADDR + inserted_byte;
4175                   pend = dst;
4176                 }
4177               inserted += count;
4178               inserted_byte += count;
4179               coding->produced += count;
4180               p = dst = pend + count;
4181               while (count)
4182                 {
4183                   *--p = *--pend;
4184                   if (*p == '\n') count--, *--p = '\r';
4185                 }
4186             }
4187
4188           /* Suppress eol-format conversion in the further conversion.  */
4189           coding->eol_type = CODING_EOL_LF;
4190
4191           /* Restore the original symbol.  */
4192           coding->symbol = saved_coding_symbol;
4193
4194           continue;
4195         }
4196       if (len_byte <= 0)
4197         break;
4198       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4199         {
4200           /* The source text ends in invalid codes.  Let's just
4201              make them valid buffer contents, and finish conversion.  */
4202           inserted += len_byte;
4203           inserted_byte += len_byte;
4204           while (len_byte--)
4205             *src++ = *dst++;
4206           fake_multibyte = 1;
4207           break;
4208         }
4209       if (first)
4210         {
4211           /* We have just done the first batch of conversion which was
4212              stoped because of insufficient gap.  Let's reconsider the
4213              required gap size (i.e. SRT - DST) now.
4214
4215              We have converted ORIG bytes (== coding->consumed) into
4216              NEW bytes (coding->produced).  To convert the remaining
4217              LEN bytes, we may need REQUIRE bytes of gap, where:
4218                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4219                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4220              Here, we are sure that NEW >= ORIG.  */
4221           float ratio = coding->produced - coding->consumed;
4222           ratio /= coding->consumed;
4223           require = len_byte * ratio;
4224           first = 0;
4225         }
4226       if ((src - dst) < (require + 2000))
4227         {
4228           /* See the comment above the previous call of make_gap.  */
4229           int add = len_byte + inserted_byte;
4230
4231           GAP_SIZE -= add;
4232           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4233           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4234           make_gap (require + 2000);
4235           GAP_SIZE += add;
4236           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4237           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4238           /* Don't forget to update SRC, DST.  */
4239           src = GAP_END_ADDR - len_byte;
4240           dst = GPT_ADDR + inserted_byte;
4241         }
4242     }
4243   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4244
4245   if (multibyte
4246       && (fake_multibyte
4247           || !encodep && (to - from) != (to_byte - from_byte)))
4248     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4249
4250   /* If we have shrinked the conversion area, adjust it now.  */
4251   if (total_skip > 0)
4252     {
4253       if (tail_skip > 0)
4254         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4255       inserted += total_skip; inserted_byte += total_skip;
4256       GAP_SIZE += total_skip;
4257       GPT -= head_skip; GPT_BYTE -= head_skip;
4258       ZV -= total_skip; ZV_BYTE -= total_skip;
4259       Z -= total_skip; Z_BYTE -= total_skip;
4260       from -= head_skip; from_byte -= head_skip;
4261       to += tail_skip; to_byte += tail_skip;
4262     }
4263
4264   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4265
4266   if (! encodep && ! NILP (coding->post_read_conversion))
4267     {
4268       Lisp_Object val;
4269       int orig_inserted = inserted, pos = PT;
4270
4271       if (from != pos)
4272         temp_set_point_both (current_buffer, from, from_byte);
4273       val = call1 (coding->post_read_conversion, make_number (inserted));
4274       if (! NILP (val))
4275         {
4276           CHECK_NUMBER (val, 0);
4277           inserted = XFASTINT (val);
4278         }
4279       if (pos >= from + orig_inserted)
4280         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4281     }
4282
4283   signal_after_change (from, to - from, inserted);
4284
4285   {
4286     coding->consumed = to_byte - from_byte;
4287     coding->consumed_char = to - from;
4288     coding->produced = inserted_byte;
4289     coding->produced_char = inserted;
4290   }
4291
4292   return 0;
4293 }
4294
4295 Lisp_Object
4296 code_convert_string (str, coding, encodep, nocopy)
4297      Lisp_Object str;
4298      struct coding_system *coding;
4299      int encodep, nocopy;
4300 {
4301   int len;
4302   char *buf;
4303   int from = 0, to = XSTRING (str)->size;
4304   int to_byte = STRING_BYTES (XSTRING (str));
4305   struct gcpro gcpro1;
4306   Lisp_Object saved_coding_symbol = Qnil;
4307   int result;
4308
4309   if (encodep && !NILP (coding->pre_write_conversion)
4310       || !encodep && !NILP (coding->post_read_conversion))
4311     {
4312       /* Since we have to call Lisp functions which assume target text
4313          is in a buffer, after setting a temporary buffer, call
4314          code_convert_region.  */
4315       int count = specpdl_ptr - specpdl;
4316       struct buffer *prev = current_buffer;
4317
4318       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4319       temp_output_buffer_setup (" *code-converting-work*");
4320       set_buffer_internal (XBUFFER (Vstandard_output));
4321       if (encodep)
4322         insert_from_string (str, 0, 0, to, to_byte, 0);
4323       else
4324         {
4325           /* We must insert the contents of STR as is without
4326              unibyte<->multibyte conversion.  */
4327           current_buffer->enable_multibyte_characters = Qnil;
4328           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4329           current_buffer->enable_multibyte_characters = Qt;
4330         }
4331       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4332       if (encodep)
4333         /* We must return the buffer contents as unibyte string.  */
4334         current_buffer->enable_multibyte_characters = Qnil;
4335       str = make_buffer_string (BEGV, ZV, 0);
4336       set_buffer_internal (prev);
4337       return unbind_to (count, str);
4338     }
4339
4340   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4341     {
4342       /* See the comments in code_convert_region.  */
4343       if (coding->type == coding_type_undecided)
4344         {
4345           detect_coding (coding, XSTRING (str)->data, to_byte);
4346           if (coding->type == coding_type_undecided)
4347             coding->type = coding_type_emacs_mule;
4348         }
4349       if (coding->eol_type == CODING_EOL_UNDECIDED)
4350         {
4351           saved_coding_symbol = coding->symbol;
4352           detect_eol (coding, XSTRING (str)->data, to_byte);
4353           if (coding->eol_type == CODING_EOL_UNDECIDED)
4354             coding->eol_type = CODING_EOL_LF;
4355           /* We had better recover the original eol format if we
4356              encounter an inconsitent eol format while decoding.  */
4357           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4358         }
4359     }
4360
4361   if (encodep
4362       ? ! CODING_REQUIRE_ENCODING (coding)
4363       : ! CODING_REQUIRE_DECODING (coding))
4364     from = to_byte;
4365   else
4366     {
4367       /* Try to skip the heading and tailing ASCIIs.  */
4368       if (encodep)
4369         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4370       else
4371         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4372     }
4373   if (from == to_byte)
4374     return (nocopy ? str : Fcopy_sequence (str));
4375
4376   if (encodep)
4377     len = encoding_buffer_size (coding, to_byte - from);
4378   else
4379     len = decoding_buffer_size (coding, to_byte - from);
4380   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4381   GCPRO1 (str);
4382   buf = get_conversion_buffer (len);
4383   UNGCPRO;
4384
4385   if (from > 0)
4386     bcopy (XSTRING (str)->data, buf, from);
4387   result = (encodep
4388             ? encode_coding (coding, XSTRING (str)->data + from,
4389                              buf + from, to_byte - from, len)
4390             : decode_coding (coding, XSTRING (str)->data + from,
4391                              buf + from, to - from, len));
4392   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4393     {
4394       /* We simple try to decode the whole string again but without
4395          eol-conversion this time.  */
4396       coding->eol_type = CODING_EOL_LF;
4397       coding->symbol = saved_coding_symbol;
4398       return code_convert_string (str, coding, encodep, nocopy);
4399     }
4400
4401   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4402          STRING_BYTES (XSTRING (str)) - to_byte);
4403
4404   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4405   if (encodep)
4406     str = make_unibyte_string (buf, len + coding->produced);
4407   else
4408     str = make_string_from_bytes (buf, len + coding->produced_char,
4409                                   len + coding->produced);
4410   return str;
4411 }
4412
4413 \f
4414 #ifdef emacs
4415 /*** 7. Emacs Lisp library functions ***/
4416
4417 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4418   "Return t if OBJECT is nil or a coding-system.\n\
4419 See the documentation of `make-coding-system' for information\n\
4420 about coding-system objects.")
4421   (obj)
4422      Lisp_Object obj;
4423 {
4424   if (NILP (obj))
4425     return Qt;
4426   if (!SYMBOLP (obj))
4427     return Qnil;
4428   /* Get coding-spec vector for OBJ.  */
4429   obj = Fget (obj, Qcoding_system);
4430   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4431           ? Qt : Qnil);
4432 }
4433
4434 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4435        Sread_non_nil_coding_system, 1, 1, 0,
4436   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4437   (prompt)
4438      Lisp_Object prompt;
4439 {
4440   Lisp_Object val;
4441   do
4442     {
4443       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4444                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4445     }
4446   while (XSTRING (val)->size == 0);
4447   return (Fintern (val, Qnil));
4448 }
4449
4450 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4451   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4452 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4453   (prompt, default_coding_system)
4454      Lisp_Object prompt, default_coding_system;
4455 {
4456   Lisp_Object val;
4457   if (SYMBOLP (default_coding_system))
4458     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4459   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4460                           Qt, Qnil, Qcoding_system_history,
4461                           default_coding_system, Qnil);
4462   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4463 }
4464
4465 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4466        1, 1, 0,
4467   "Check validity of CODING-SYSTEM.\n\
4468 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4469 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4470 The value of property should be a vector of length 5.")
4471   (coding_system)
4472      Lisp_Object coding_system;
4473 {
4474   CHECK_SYMBOL (coding_system, 0);
4475   if (!NILP (Fcoding_system_p (coding_system)))
4476     return coding_system;
4477   while (1)
4478     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4479 }
4480 \f
4481 Lisp_Object
4482 detect_coding_system (src, src_bytes, highest)
4483      unsigned char *src;
4484      int src_bytes, highest;
4485 {
4486   int coding_mask, eol_type;
4487   Lisp_Object val, tmp;
4488   int dummy;
4489
4490   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4491   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4492   if (eol_type == CODING_EOL_INCONSISTENT)
4493     eol_type == CODING_EOL_UNDECIDED;
4494
4495   if (!coding_mask)
4496     {
4497       val = Qundecided;
4498       if (eol_type != CODING_EOL_UNDECIDED)
4499         {
4500           Lisp_Object val2;
4501           val2 = Fget (Qundecided, Qeol_type);
4502           if (VECTORP (val2))
4503             val = XVECTOR (val2)->contents[eol_type];
4504         }
4505       return val;
4506     }
4507
4508   /* At first, gather possible coding systems in VAL.  */
4509   val = Qnil;
4510   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4511     {
4512       int idx
4513         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4514       if (coding_mask & (1 << idx))
4515         {
4516           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4517           if (highest)
4518             break;
4519         }
4520     }
4521   if (!highest)
4522     val = Fnreverse (val);
4523
4524   /* Then, substitute the elements by subsidiary coding systems.  */
4525   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4526     {
4527       if (eol_type != CODING_EOL_UNDECIDED)
4528         {
4529           Lisp_Object eol;
4530           eol = Fget (XCONS (tmp)->car, Qeol_type);
4531           if (VECTORP (eol))
4532             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4533         }
4534     }
4535   return (highest ? XCONS (val)->car : val);
4536 }
4537
4538 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4539        2, 3, 0,
4540   "Detect coding system of the text in the region between START and END.\n\
4541 Return a list of possible coding systems ordered by priority.\n\
4542 \n\
4543 If only ASCII characters are found, it returns `undecided'\n\
4544 or its subsidiary coding system according to a detected end-of-line format.\n\
4545 \n\
4546 If optional argument HIGHEST is non-nil, return the coding system of\n\
4547 highest priority.")
4548   (start, end, highest)
4549      Lisp_Object start, end, highest;
4550 {
4551   int from, to;
4552   int from_byte, to_byte;
4553
4554   CHECK_NUMBER_COERCE_MARKER (start, 0);
4555   CHECK_NUMBER_COERCE_MARKER (end, 1);
4556
4557   validate_region (&start, &end);
4558   from = XINT (start), to = XINT (end);
4559   from_byte = CHAR_TO_BYTE (from);
4560   to_byte = CHAR_TO_BYTE (to);
4561
4562   if (from < GPT && to >= GPT)
4563     move_gap_both (to, to_byte);
4564
4565   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4566                                to_byte - from_byte,
4567                                !NILP (highest));
4568 }
4569
4570 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4571        1, 2, 0,
4572   "Detect coding system of the text in STRING.\n\
4573 Return a list of possible coding systems ordered by priority.\n\
4574 \n\
4575 If only ASCII characters are found, it returns `undecided'\n\
4576 or its subsidiary coding system according to a detected end-of-line format.\n\
4577 \n\
4578 If optional argument HIGHEST is non-nil, return the coding system of\n\
4579 highest priority.")
4580   (string, highest)
4581      Lisp_Object string, highest;
4582 {
4583   CHECK_STRING (string, 0);
4584
4585   return detect_coding_system (XSTRING (string)->data,
4586                                STRING_BYTES (XSTRING (string)),
4587                                !NILP (highest));
4588 }
4589
4590 Lisp_Object
4591 code_convert_region1 (start, end, coding_system, encodep)
4592      Lisp_Object start, end, coding_system;
4593      int encodep;
4594 {
4595   struct coding_system coding;
4596   int from, to, len;
4597
4598   CHECK_NUMBER_COERCE_MARKER (start, 0);
4599   CHECK_NUMBER_COERCE_MARKER (end, 1);
4600   CHECK_SYMBOL (coding_system, 2);
4601
4602   validate_region (&start, &end);
4603   from = XFASTINT (start);
4604   to = XFASTINT (end);
4605
4606   if (NILP (coding_system))
4607     return make_number (to - from);
4608
4609   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4610     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4611
4612   coding.mode |= CODING_MODE_LAST_BLOCK;
4613   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4614                        &coding, encodep, 1);
4615   return make_number (coding.produced_char);
4616 }
4617
4618 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4619        3, 3, "r\nzCoding system: ",
4620   "Decode the current region by specified coding system.\n\
4621 When called from a program, takes three arguments:\n\
4622 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4623 Return length of decoded text.")
4624   (start, end, coding_system)
4625      Lisp_Object start, end, coding_system;
4626 {
4627   return code_convert_region1 (start, end, coding_system, 0);
4628 }
4629
4630 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4631        3, 3, "r\nzCoding system: ",
4632   "Encode the current region by specified coding system.\n\
4633 When called from a program, takes three arguments:\n\
4634 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4635 Return length of encoded text.")
4636   (start, end, coding_system)
4637      Lisp_Object start, end, coding_system;
4638 {
4639   return code_convert_region1 (start, end, coding_system, 1);
4640 }
4641
4642 Lisp_Object
4643 code_convert_string1 (string, coding_system, nocopy, encodep)
4644      Lisp_Object string, coding_system, nocopy;
4645      int encodep;
4646 {
4647   struct coding_system coding;
4648
4649   CHECK_STRING (string, 0);
4650   CHECK_SYMBOL (coding_system, 1);
4651
4652   if (NILP (coding_system))
4653     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4654
4655   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4656     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4657
4658   coding.mode |= CODING_MODE_LAST_BLOCK;
4659   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4660 }
4661
4662 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4663        2, 3, 0,
4664   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4665 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4666 if the decoding operation is trivial.")
4667   (string, coding_system, nocopy)
4668      Lisp_Object string, coding_system, nocopy;
4669 {
4670   return code_convert_string1(string, coding_system, nocopy, 0);
4671 }
4672
4673 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4674        2, 3, 0,
4675   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4676 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4677 if the encoding operation is trivial.")
4678   (string, coding_system, nocopy)
4679      Lisp_Object string, coding_system, nocopy;
4680 {
4681   return code_convert_string1(string, coding_system, nocopy, 1);
4682 }
4683
4684 \f
4685 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4686   "Decode a JISX0208 character of shift-jis encoding.\n\
4687 CODE is the character code in SJIS.\n\
4688 Return the corresponding character.")
4689   (code)
4690      Lisp_Object code;
4691 {
4692   unsigned char c1, c2, s1, s2;
4693   Lisp_Object val;
4694
4695   CHECK_NUMBER (code, 0);
4696   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4697   DECODE_SJIS (s1, s2, c1, c2);
4698   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4699   return val;
4700 }
4701
4702 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4703   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4704 Return the corresponding character code in SJIS.")
4705   (ch)
4706      Lisp_Object ch;
4707 {
4708   int charset, c1, c2, s1, s2;
4709   Lisp_Object val;
4710
4711   CHECK_NUMBER (ch, 0);
4712   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4713   if (charset == charset_jisx0208)
4714     {
4715       ENCODE_SJIS (c1, c2, s1, s2);
4716       XSETFASTINT (val, (s1 << 8) | s2);
4717     }
4718   else
4719     XSETFASTINT (val, 0);
4720   return val;
4721 }
4722
4723 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4724   "Decode a Big5 character CODE of BIG5 coding system.\n\
4725 CODE is the character code in BIG5.\n\
4726 Return the corresponding character.")
4727   (code)
4728      Lisp_Object code;
4729 {
4730   int charset;
4731   unsigned char b1, b2, c1, c2;
4732   Lisp_Object val;
4733
4734   CHECK_NUMBER (code, 0);
4735   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4736   DECODE_BIG5 (b1, b2, charset, c1, c2);
4737   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4738   return val;
4739 }
4740
4741 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4742   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4743 Return the corresponding character code in Big5.")
4744   (ch)
4745      Lisp_Object ch;
4746 {
4747   int charset, c1, c2, b1, b2;
4748   Lisp_Object val;
4749
4750   CHECK_NUMBER (ch, 0);
4751   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4752   if (charset == charset_big5_1 || charset == charset_big5_2)
4753     {
4754       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4755       XSETFASTINT (val, (b1 << 8) | b2);
4756     }
4757   else
4758     XSETFASTINT (val, 0);
4759   return val;
4760 }
4761 \f
4762 DEFUN ("set-terminal-coding-system-internal",
4763        Fset_terminal_coding_system_internal,
4764        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4765   (coding_system)
4766      Lisp_Object coding_system;
4767 {
4768   CHECK_SYMBOL (coding_system, 0);
4769   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4770   /* We had better not send unsafe characters to terminal.  */
4771   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4772
4773   return Qnil;
4774 }
4775
4776 DEFUN ("set-safe-terminal-coding-system-internal",
4777        Fset_safe_terminal_coding_system_internal,
4778        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4779   (coding_system)
4780      Lisp_Object coding_system;
4781 {
4782   CHECK_SYMBOL (coding_system, 0);
4783   setup_coding_system (Fcheck_coding_system (coding_system),
4784                        &safe_terminal_coding);
4785   return Qnil;
4786 }
4787
4788 DEFUN ("terminal-coding-system",
4789        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4790   "Return coding system specified for terminal output.")
4791   ()
4792 {
4793   return terminal_coding.symbol;
4794 }
4795
4796 DEFUN ("set-keyboard-coding-system-internal",
4797        Fset_keyboard_coding_system_internal,
4798        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4799   (coding_system)
4800      Lisp_Object coding_system;
4801 {
4802   CHECK_SYMBOL (coding_system, 0);
4803   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4804   return Qnil;
4805 }
4806
4807 DEFUN ("keyboard-coding-system",
4808        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4809   "Return coding system specified for decoding keyboard input.")
4810   ()
4811 {
4812   return keyboard_coding.symbol;
4813 }
4814
4815 \f
4816 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4817        Sfind_operation_coding_system,  1, MANY, 0,
4818   "Choose a coding system for an operation based on the target name.\n\
4819 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4820 DECODING-SYSTEM is the coding system to use for decoding\n\
4821 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4822 for encoding (in case OPERATION does encoding).\n\
4823 \n\
4824 The first argument OPERATION specifies an I/O primitive:\n\
4825   For file I/O, `insert-file-contents' or `write-region'.\n\
4826   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4827   For network I/O, `open-network-stream'.\n\
4828 \n\
4829 The remaining arguments should be the same arguments that were passed\n\
4830 to the primitive.  Depending on which primitive, one of those arguments\n\
4831 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4832 whichever argument specifies the file name is TARGET.\n\
4833 \n\
4834 TARGET has a meaning which depends on OPERATION:\n\
4835   For file I/O, TARGET is a file name.\n\
4836   For process I/O, TARGET is a process name.\n\
4837   For network I/O, TARGET is a service name or a port number\n\
4838 \n\
4839 This function looks up what specified for TARGET in,\n\
4840 `file-coding-system-alist', `process-coding-system-alist',\n\
4841 or `network-coding-system-alist' depending on OPERATION.\n\
4842 They may specify a coding system, a cons of coding systems,\n\
4843 or a function symbol to call.\n\
4844 In the last case, we call the function with one argument,\n\
4845 which is a list of all the arguments given to this function.")
4846   (nargs, args)
4847      int nargs;
4848      Lisp_Object *args;
4849 {
4850   Lisp_Object operation, target_idx, target, val;
4851   register Lisp_Object chain;
4852
4853   if (nargs < 2)
4854     error ("Too few arguments");
4855   operation = args[0];
4856   if (!SYMBOLP (operation)
4857       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
4858     error ("Invalid first arguement");
4859   if (nargs < 1 + XINT (target_idx))
4860     error ("Too few arguments for operation: %s",
4861            XSYMBOL (operation)->name->data);
4862   target = args[XINT (target_idx) + 1];
4863   if (!(STRINGP (target)
4864         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
4865     error ("Invalid %dth argument", XINT (target_idx) + 1);
4866
4867   chain = ((EQ (operation, Qinsert_file_contents)
4868             || EQ (operation, Qwrite_region))
4869            ? Vfile_coding_system_alist
4870            : (EQ (operation, Qopen_network_stream)
4871               ? Vnetwork_coding_system_alist
4872               : Vprocess_coding_system_alist));
4873   if (NILP (chain))
4874     return Qnil;
4875
4876   for (; CONSP (chain); chain = XCONS (chain)->cdr)
4877     {
4878       Lisp_Object elt;
4879       elt = XCONS (chain)->car;
4880
4881       if (CONSP (elt)
4882           && ((STRINGP (target)
4883                && STRINGP (XCONS (elt)->car)
4884                && fast_string_match (XCONS (elt)->car, target) >= 0)
4885               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
4886         {
4887           val = XCONS (elt)->cdr;
4888           /* Here, if VAL is both a valid coding system and a valid
4889              function symbol, we return VAL as a coding system.  */
4890           if (CONSP (val))
4891             return val;
4892           if (! SYMBOLP (val))
4893             return Qnil;
4894           if (! NILP (Fcoding_system_p (val)))
4895             return Fcons (val, val);
4896           if (! NILP (Ffboundp (val)))
4897             {
4898               val = call1 (val, Flist (nargs, args));
4899               if (CONSP (val))
4900                 return val;
4901               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
4902                 return Fcons (val, val);
4903             }
4904           return Qnil;
4905         }
4906     }
4907   return Qnil;
4908 }
4909
4910 DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
4911        Supdate_iso_coding_systems, 0, 0, 0,
4912   "Update internal database for ISO2022 based coding systems.\n\
4913 When values of the following coding categories are changed, you must\n\
4914 call this function:\n\
4915   coding-category-iso-7, coding-category-iso-7-tight,\n\
4916   coding-category-iso-8-1, coding-category-iso-8-2,\n\
4917   coding-category-iso-7-else, coding-category-iso-8-else")
4918   ()
4919 {
4920   int i;
4921
4922   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
4923        i++)
4924     {
4925       if (! coding_system_table[i])
4926         coding_system_table[i]
4927           = (struct coding_system *) xmalloc (sizeof (struct coding_system));
4928       setup_coding_system
4929         (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
4930          coding_system_table[i]);
4931     }
4932   return Qnil;
4933 }
4934
4935 #endif /* emacs */
4936
4937 \f
4938 /*** 8. Post-amble ***/
4939
4940 init_coding_once ()
4941 {
4942   int i;
4943
4944   /* Emacs' internal format specific initialize routine.  */
4945   for (i = 0; i <= 0x20; i++)
4946     emacs_code_class[i] = EMACS_control_code;
4947   emacs_code_class[0x0A] = EMACS_linefeed_code;
4948   emacs_code_class[0x0D] = EMACS_carriage_return_code;
4949   for (i = 0x21 ; i < 0x7F; i++)
4950     emacs_code_class[i] = EMACS_ascii_code;
4951   emacs_code_class[0x7F] = EMACS_control_code;
4952   emacs_code_class[0x80] = EMACS_leading_code_composition;
4953   for (i = 0x81; i < 0xFF; i++)
4954     emacs_code_class[i] = EMACS_invalid_code;
4955   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
4956   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
4957   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
4958   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
4959
4960   /* ISO2022 specific initialize routine.  */
4961   for (i = 0; i < 0x20; i++)
4962     iso_code_class[i] = ISO_control_code;
4963   for (i = 0x21; i < 0x7F; i++)
4964     iso_code_class[i] = ISO_graphic_plane_0;
4965   for (i = 0x80; i < 0xA0; i++)
4966     iso_code_class[i] = ISO_control_code;
4967   for (i = 0xA1; i < 0xFF; i++)
4968     iso_code_class[i] = ISO_graphic_plane_1;
4969   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
4970   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4971   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
4972   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
4973   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
4974   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
4975   iso_code_class[ISO_CODE_ESC] = ISO_escape;
4976   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
4977   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
4978   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
4979
4980   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
4981   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
4982
4983   setup_coding_system (Qnil, &keyboard_coding);
4984   setup_coding_system (Qnil, &terminal_coding);
4985   setup_coding_system (Qnil, &safe_terminal_coding);
4986
4987   bzero (coding_system_table, sizeof coding_system_table);
4988
4989 #if defined (MSDOS) || defined (WINDOWSNT)
4990   system_eol_type = CODING_EOL_CRLF;
4991 #else
4992   system_eol_type = CODING_EOL_LF;
4993 #endif
4994 }
4995
4996 #ifdef emacs
4997
4998 syms_of_coding ()
4999 {
5000   Qtarget_idx = intern ("target-idx");
5001   staticpro (&Qtarget_idx);
5002
5003   Qcoding_system_history = intern ("coding-system-history");
5004   staticpro (&Qcoding_system_history);
5005   Fset (Qcoding_system_history, Qnil);
5006
5007   /* Target FILENAME is the first argument.  */
5008   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5009   /* Target FILENAME is the third argument.  */
5010   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5011
5012   Qcall_process = intern ("call-process");
5013   staticpro (&Qcall_process);
5014   /* Target PROGRAM is the first argument.  */
5015   Fput (Qcall_process, Qtarget_idx, make_number (0));
5016
5017   Qcall_process_region = intern ("call-process-region");
5018   staticpro (&Qcall_process_region);
5019   /* Target PROGRAM is the third argument.  */
5020   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5021
5022   Qstart_process = intern ("start-process");
5023   staticpro (&Qstart_process);
5024   /* Target PROGRAM is the third argument.  */
5025   Fput (Qstart_process, Qtarget_idx, make_number (2));
5026
5027   Qopen_network_stream = intern ("open-network-stream");
5028   staticpro (&Qopen_network_stream);
5029   /* Target SERVICE is the fourth argument.  */
5030   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5031
5032   Qcoding_system = intern ("coding-system");
5033   staticpro (&Qcoding_system);
5034
5035   Qeol_type = intern ("eol-type");
5036   staticpro (&Qeol_type);
5037
5038   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5039   staticpro (&Qbuffer_file_coding_system);
5040
5041   Qpost_read_conversion = intern ("post-read-conversion");
5042   staticpro (&Qpost_read_conversion);
5043
5044   Qpre_write_conversion = intern ("pre-write-conversion");
5045   staticpro (&Qpre_write_conversion);
5046
5047   Qno_conversion = intern ("no-conversion");
5048   staticpro (&Qno_conversion);
5049
5050   Qundecided = intern ("undecided");
5051   staticpro (&Qundecided);
5052
5053   Qcoding_system_p = intern ("coding-system-p");
5054   staticpro (&Qcoding_system_p);
5055
5056   Qcoding_system_error = intern ("coding-system-error");
5057   staticpro (&Qcoding_system_error);
5058
5059   Fput (Qcoding_system_error, Qerror_conditions,
5060         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5061   Fput (Qcoding_system_error, Qerror_message,
5062         build_string ("Invalid coding system"));
5063
5064   Qcoding_category = intern ("coding-category");
5065   staticpro (&Qcoding_category);
5066   Qcoding_category_index = intern ("coding-category-index");
5067   staticpro (&Qcoding_category_index);
5068
5069   Vcoding_category_table
5070     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5071   staticpro (&Vcoding_category_table);
5072   {
5073     int i;
5074     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5075       {
5076         XVECTOR (Vcoding_category_table)->contents[i]
5077           = intern (coding_category_name[i]);
5078         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5079               Qcoding_category_index, make_number (i));
5080       }
5081   }
5082
5083   Qcharacter_unification_table = intern ("character-unification-table");
5084   staticpro (&Qcharacter_unification_table);
5085   Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
5086         make_number (0));
5087
5088   Qcharacter_unification_table_for_decode
5089     = intern ("character-unification-table-for-decode");
5090   staticpro (&Qcharacter_unification_table_for_decode);
5091
5092   Qcharacter_unification_table_for_encode
5093     = intern ("character-unification-table-for-encode");
5094   staticpro (&Qcharacter_unification_table_for_encode);
5095
5096   Qsafe_charsets = intern ("safe-charsets");
5097   staticpro (&Qsafe_charsets);
5098
5099   Qemacs_mule = intern ("emacs-mule");
5100   staticpro (&Qemacs_mule);
5101
5102   Qraw_text = intern ("raw-text");
5103   staticpro (&Qraw_text);
5104
5105   defsubr (&Scoding_system_p);
5106   defsubr (&Sread_coding_system);
5107   defsubr (&Sread_non_nil_coding_system);
5108   defsubr (&Scheck_coding_system);
5109   defsubr (&Sdetect_coding_region);
5110   defsubr (&Sdetect_coding_string);
5111   defsubr (&Sdecode_coding_region);
5112   defsubr (&Sencode_coding_region);
5113   defsubr (&Sdecode_coding_string);
5114   defsubr (&Sencode_coding_string);
5115   defsubr (&Sdecode_sjis_char);
5116   defsubr (&Sencode_sjis_char);
5117   defsubr (&Sdecode_big5_char);
5118   defsubr (&Sencode_big5_char);
5119   defsubr (&Sset_terminal_coding_system_internal);
5120   defsubr (&Sset_safe_terminal_coding_system_internal);
5121   defsubr (&Sterminal_coding_system);
5122   defsubr (&Sset_keyboard_coding_system_internal);
5123   defsubr (&Skeyboard_coding_system);
5124   defsubr (&Sfind_operation_coding_system);
5125   defsubr (&Supdate_iso_coding_systems);
5126
5127   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5128     "List of coding systems.\n\
5129 \n\
5130 Do not alter the value of this variable manually.  This variable should be\n\
5131 updated by the functions `make-coding-system' and\n\
5132 `define-coding-system-alias'.");
5133   Vcoding_system_list = Qnil;
5134
5135   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5136     "Alist of coding system names.\n\
5137 Each element is one element list of coding system name.\n\
5138 This variable is given to `completing-read' as TABLE argument.\n\
5139 \n\
5140 Do not alter the value of this variable manually.  This variable should be\n\
5141 updated by the functions `make-coding-system' and\n\
5142 `define-coding-system-alias'.");
5143   Vcoding_system_alist = Qnil;
5144
5145   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5146     "List of coding-categories (symbols) ordered by priority.");
5147   {
5148     int i;
5149
5150     Vcoding_category_list = Qnil;
5151     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5152       Vcoding_category_list
5153         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5154                  Vcoding_category_list);
5155   }
5156
5157   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5158     "Specify the coding system for read operations.\n\
5159 It is useful to bind this variable with `let', but do not set it globally.\n\
5160 If the value is a coding system, it is used for decoding on read operation.\n\
5161 If not, an appropriate element is used from one of the coding system alists:\n\
5162 There are three such tables, `file-coding-system-alist',\n\
5163 `process-coding-system-alist', and `network-coding-system-alist'.");
5164   Vcoding_system_for_read = Qnil;
5165
5166   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5167     "Specify the coding system for write operations.\n\
5168 It is useful to bind this variable with `let', but do not set it globally.\n\
5169 If the value is a coding system, it is used for encoding on write operation.\n\
5170 If not, an appropriate element is used from one of the coding system alists:\n\
5171 There are three such tables, `file-coding-system-alist',\n\
5172 `process-coding-system-alist', and `network-coding-system-alist'.");
5173   Vcoding_system_for_write = Qnil;
5174
5175   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5176     "Coding system used in the latest file or process I/O.");
5177   Vlast_coding_system_used = Qnil;
5178
5179   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5180     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5181   inhibit_eol_conversion = 0;
5182
5183   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5184     "Alist to decide a coding system to use for a file I/O operation.\n\
5185 The format is ((PATTERN . VAL) ...),\n\
5186 where PATTERN is a regular expression matching a file name,\n\
5187 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5188 If VAL is a coding system, it is used for both decoding and encoding\n\
5189 the file contents.\n\
5190 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5191 and the cdr part is used for encoding.\n\
5192 If VAL is a function symbol, the function must return a coding system\n\
5193 or a cons of coding systems which are used as above.\n\
5194 \n\
5195 See also the function `find-operation-coding-system'.");
5196   Vfile_coding_system_alist = Qnil;
5197
5198   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5199     "Alist to decide a coding system to use for a process I/O operation.\n\
5200 The format is ((PATTERN . VAL) ...),\n\
5201 where PATTERN is a regular expression matching a program name,\n\
5202 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5203 If VAL is a coding system, it is used for both decoding what received\n\
5204 from the program and encoding what sent to the program.\n\
5205 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5206 and the cdr part is used for encoding.\n\
5207 If VAL is a function symbol, the function must return a coding system\n\
5208 or a cons of coding systems which are used as above.\n\
5209 \n\
5210 See also the function `find-operation-coding-system'.");
5211   Vprocess_coding_system_alist = Qnil;
5212
5213   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5214     "Alist to decide a coding system to use for a network I/O operation.\n\
5215 The format is ((PATTERN . VAL) ...),\n\
5216 where PATTERN is a regular expression matching a network service name\n\
5217 or is a port number to connect to,\n\
5218 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5219 If VAL is a coding system, it is used for both decoding what received\n\
5220 from the network stream and encoding what sent to the network stream.\n\
5221 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5222 and the cdr part is used for encoding.\n\
5223 If VAL is a function symbol, the function must return a coding system\n\
5224 or a cons of coding systems which are used as above.\n\
5225 \n\
5226 See also the function `find-operation-coding-system'.");
5227   Vnetwork_coding_system_alist = Qnil;
5228
5229   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5230     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5231   eol_mnemonic_unix = ':';
5232
5233   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5234     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5235   eol_mnemonic_dos = '\\';
5236
5237   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5238     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5239   eol_mnemonic_mac = '/';
5240
5241   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5242     "Mnemonic character indicating end-of-line format is not yet decided.");
5243   eol_mnemonic_undecided = ':';
5244
5245   DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
5246     "Non-nil means ISO 2022 encoder/decoder do character unification.");
5247   Venable_character_unification = Qt;
5248
5249   DEFVAR_LISP ("standard-character-unification-table-for-decode",
5250     &Vstandard_character_unification_table_for_decode,
5251     "Table for unifying characters when reading.");
5252   Vstandard_character_unification_table_for_decode = Qnil;
5253
5254   DEFVAR_LISP ("standard-character-unification-table-for-encode",
5255     &Vstandard_character_unification_table_for_encode,
5256     "Table for unifying characters when writing.");
5257   Vstandard_character_unification_table_for_encode = Qnil;
5258
5259   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5260     "Alist of charsets vs revision numbers.\n\
5261 While encoding, if a charset (car part of an element) is found,\n\
5262 designate it with the escape sequence identifing revision (cdr part of the element).");
5263   Vcharset_revision_alist = Qnil;
5264
5265   DEFVAR_LISP ("default-process-coding-system",
5266                &Vdefault_process_coding_system,
5267     "Cons of coding systems used for process I/O by default.\n\
5268 The car part is used for decoding a process output,\n\
5269 the cdr part is used for encoding a text to be sent to a process.");
5270   Vdefault_process_coding_system = Qnil;
5271
5272   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5273     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5274 This is a vector of length 256.\n\
5275 If Nth element is non-nil, the existence of code N in a file\n\
5276 \(or output of subprocess) doesn't prevent it to be detected as\n\
5277 a coding system of ISO 2022 variant which has a flag\n\
5278 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5279 or reading output of a subprocess.\n\
5280 Only 128th through 159th elements has a meaning.");
5281   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5282
5283   DEFVAR_LISP ("select-safe-coding-system-function",
5284                &Vselect_safe_coding_system_function,
5285     "Function to call to select safe coding system for encoding a text.\n\
5286 \n\
5287 If set, this function is called to force a user to select a proper\n\
5288 coding system which can encode the text in the case that a default\n\
5289 coding system used in each operation can't encode the text.\n\
5290 \n\
5291 The default value is `select-safe-codign-system' (which see).");
5292   Vselect_safe_coding_system_function = Qnil;
5293
5294 }
5295
5296 #endif /* emacs */