code.delx.au - gnu-emacs/blob - src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. CCL handlers
  29   6. End-of-line handlers
  30   7. C library functions
  31   8. Emacs Lisp library functions
  32   9. Post-amble
  33
  34 */
  35
  36 /*** GENERAL NOTE on CODING SYSTEM ***
  37
  38   Coding system is an encoding mechanism of one or more character
  39   sets.  Here's a list of coding systems which Emacs can handle.  When
  40   we say "decode", it means converting some other coding system to
  41   Emacs' internal format (emacs-internal), and when we say "encode",
  42   it means converting the coding system emacs-mule to some other
  43   coding system.
  44
  45   0. Emacs' internal format (emacs-mule)
  46
  47   Emacs itself holds a multi-lingual character in a buffer and a string
  48   in a special format.  Details are described in section 2.
  49
  50   1. ISO2022
  51
  52   The most famous coding system for multiple character sets.  X's
  53   Compound Text, various EUCs (Extended Unix Code), and coding
  54   systems used in Internet communication such as ISO-2022-JP are
  55   all variants of ISO2022.  Details are described in section 3.
  56
  57   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  58
  59   A coding system to encode character sets: ASCII, JISX0201, and
  60   JISX0208.  Widely used for PC's in Japan.  Details are described in
  61   section 4.
  62
  63   3. BIG5
  64
  65   A coding system to encode character sets: ASCII and Big5.  Widely
  66   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  67   described in section 4.  In this file, when we write "BIG5"
  68   (all uppercase), we mean the coding system, and when we write
  69   "Big5" (capitalized), we mean the character set.
  70
  71   4. Raw text
  72
  73   A coding system for a text containing random 8-bit code.  Emacs does
  74   no code conversion on such a text except for end-of-line format.
  75
  76   5. Other
  77
  78   If a user wants to read/write a text encoded in a coding system not
  79   listed above, he can supply a decoder and an encoder for it in CCL
  80   (Code Conversion Language) programs.  Emacs executes the CCL program
  81   while reading/writing.
  82
  83   Emacs represents a coding system by a Lisp symbol that has a property
  84   `coding-system'.  But, before actually using the coding system, the
  85   information about it is set in a structure of type `struct
  86   coding_system' for rapid processing.  See section 6 for more details.
  87
  88 */
  89
  90 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  91
  92   How end-of-line of a text is encoded depends on a system.  For
  93   instance, Unix's format is just one byte of `line-feed' code,
  94   whereas DOS's format is two-byte sequence of `carriage-return' and
  95   `line-feed' codes.  MacOS's format is usually one byte of
  96   `carriage-return'.
  97
  98   Since text characters encoding and end-of-line encoding are
  99   independent, any coding system described above can take
 100   any format of end-of-line.  So, Emacs has information of format of
 101   end-of-line in each coding-system.  See section 6 for more details.
 102
 103 */
 104
 105 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 106
 107   These functions check if a text between SRC and SRC_END is encoded
 108   in the coding system category XXX.  Each returns an integer value in
 109   which appropriate flag bits for the category XXX is set.  The flag
 110   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 111   template of these functions.  */
 112 #if 0
 113 int
 114 detect_coding_emacs_mule (src, src_end)
 115      unsigned char *src, *src_end;
 116 {
 117   ...
 118 }
 119 #endif
 120
 121 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 122
 123   These functions decode SRC_BYTES length text at SOURCE encoded in
 124   CODING to Emacs' internal format (emacs-mule).  The resulting text
 125   goes to a place pointed to by DESTINATION, the length of which
 126   should not exceed DST_BYTES.  These functions set the information of
 127   original and decoded texts in the members produced, produced_char,
 128   consumed, and consumed_char of the structure *CODING.
 129
 130   The return value is an integer (CODING_FINISH_XXX) indicating how
 131   the decoding finished.
 132
 133   DST_BYTES zero means that source area and destination area are
 134   overlapped, which means that we can produce a decoded text until it
 135   reaches at the head of not-yet-decoded source text.
 136
 137   Below is a template of these functions.  */
 138 #if 0
 139 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 140      struct coding_system *coding;
 141      unsigned char *source, *destination;
 142      int src_bytes, dst_bytes;
 143 {
 144   ...
 145 }
 146 #endif
 147
 148 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 149
 150   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 151   internal format (emacs-mule) to CODING.  The resulting text goes to
 152   a place pointed to by DESTINATION, the length of which should not
 153   exceed DST_BYTES.  These functions set the information of
 154   original and encoded texts in the members produced, produced_char,
 155   consumed, and consumed_char of the structure *CODING.
 156
 157   The return value is an integer (CODING_FINISH_XXX) indicating how
 158   the encoding finished.
 159
 160   DST_BYTES zero means that source area and destination area are
 161   overlapped, which means that we can produce a decoded text until it
 162   reaches at the head of not-yet-decoded source text.
 163
 164   Below is a template of these functions.  */
 165 #if 0
 166 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 167      struct coding_system *coding;
 168      unsigned char *source, *destination;
 169      int src_bytes, dst_bytes;
 170 {
 171   ...
 172 }
 173 #endif
 174
 175 /*** COMMONLY USED MACROS ***/
 176
 177 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 178    THREE_MORE_BYTES safely get one, two, and three bytes from the
 179    source text respectively.  If there are not enough bytes in the
 180    source, they jump to `label_end_of_loop'.  The caller should set
 181    variables `src' and `src_end' to appropriate areas in advance.  */
 182
 183 #define ONE_MORE_BYTE(c1)       \
 184   do {                          \
 185     if (src < src_end)          \
 186       c1 = *src++;              \
 187     else                        \
 188       goto label_end_of_loop;   \
 189   } while (0)
 190
 191 #define TWO_MORE_BYTES(c1, c2)  \
 192   do {                          \
 193     if (src + 1 < src_end)      \
 194       c1 = *src++, c2 = *src++; \
 195     else                        \
 196       goto label_end_of_loop;   \
 197   } while (0)
 198
 199 #define THREE_MORE_BYTES(c1, c2, c3)            \
 200   do {                                          \
 201     if (src + 2 < src_end)                      \
 202       c1 = *src++, c2 = *src++, c3 = *src++;    \
 203     else                                        \
 204       goto label_end_of_loop;                   \
 205   } while (0)
 206
 207 /* The following three macros DECODE_CHARACTER_ASCII,
 208    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 209    the multi-byte form of a character of each class at the place
 210    pointed by `dst'.  The caller should set the variable `dst' to
 211    point to an appropriate area and the variable `coding' to point to
 212    the coding-system of the currently decoding text in advance.  */
 213
 214 /* Decode one ASCII character C.  */
 215
 216 #define DECODE_CHARACTER_ASCII(c)       \
 217   do {                                  \
 218     *dst++ = (c) & 0x7F;                \
 219     coding->produced_char++;            \
 220   } while (0)
 221
 222 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 223    position-code is C.  */
 224
 225 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 226   do {                                                                  \
 227     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 228                                                                         \
 229     *dst++ = leading_code;                                              \
 230     if ((leading_code = CHARSET_LEADING_CODE_EXT (charset)) > 0)        \
 231       *dst++ = leading_code;                                            \
 232     *dst++ = (c) | 0x80;                                                \
 233     coding->produced_char++;                                            \
 234   } while (0)
 235
 236 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 237    position-codes are C1 and C2.  */
 238
 239 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 240   do {                                                  \
 241     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 242     *dst++ = (c2) | 0x80;                               \
 243   } while (0)
 244
 245 \f
 246 /*** 1. Preamble ***/
 247
 248 #ifdef emacs
 249 #include <config.h>
 250 #endif
 251
 252 #include <stdio.h>
 253
 254 #ifdef emacs
 255
 256 #include "lisp.h"
 257 #include "buffer.h"
 258 #include "charset.h"
 259 #include "composite.h"
 260 #include "ccl.h"
 261 #include "coding.h"
 262 #include "window.h"
 263
 264 #else  /* not emacs */
 265
 266 #include "mulelib.h"
 267
 268 #endif /* not emacs */
 269
 270 Lisp_Object Qcoding_system, Qeol_type;
 271 Lisp_Object Qbuffer_file_coding_system;
 272 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 273 Lisp_Object Qno_conversion, Qundecided;
 274 Lisp_Object Qcoding_system_history;
 275 Lisp_Object Qsafe_charsets;
 276 Lisp_Object Qvalid_codes;
 277
 278 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 279 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 280 Lisp_Object Qstart_process, Qopen_network_stream;
 281 Lisp_Object Qtarget_idx;
 282
 283 Lisp_Object Vselect_safe_coding_system_function;
 284
 285 /* Mnemonic string for each format of end-of-line.  */
 286 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 287 /* Mnemonic string to indicate format of end-of-line is not yet
 288    decided.  */
 289 Lisp_Object eol_mnemonic_undecided;
 290
 291 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 292    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 293 int system_eol_type;
 294
 295 #ifdef emacs
 296
 297 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 298
 299 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 300
 301 /* Coding system emacs-mule and raw-text are for converting only
 302    end-of-line format.  */
 303 Lisp_Object Qemacs_mule, Qraw_text;
 304
 305 /* Coding-systems are handed between Emacs Lisp programs and C internal
 306    routines by the following three variables.  */
 307 /* Coding-system for reading files and receiving data from process.  */
 308 Lisp_Object Vcoding_system_for_read;
 309 /* Coding-system for writing files and sending data to process.  */
 310 Lisp_Object Vcoding_system_for_write;
 311 /* Coding-system actually used in the latest I/O.  */
 312 Lisp_Object Vlast_coding_system_used;
 313
 314 /* A vector of length 256 which contains information about special
 315    Latin codes (especially for dealing with Microsoft codes).  */
 316 Lisp_Object Vlatin_extra_code_table;
 317
 318 /* Flag to inhibit code conversion of end-of-line format.  */
 319 int inhibit_eol_conversion;
 320
 321 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 322 int inherit_process_coding_system;
 323
 324 /* Coding system to be used to encode text for terminal display.  */
 325 struct coding_system terminal_coding;
 326
 327 /* Coding system to be used to encode text for terminal display when
 328    terminal coding system is nil.  */
 329 struct coding_system safe_terminal_coding;
 330
 331 /* Coding system of what is sent from terminal keyboard.  */
 332 struct coding_system keyboard_coding;
 333
 334 /* Default coding system to be used to write a file.  */
 335 struct coding_system default_buffer_file_coding;
 336
 337 Lisp_Object Vfile_coding_system_alist;
 338 Lisp_Object Vprocess_coding_system_alist;
 339 Lisp_Object Vnetwork_coding_system_alist;
 340
 341 Lisp_Object Vlocale_coding_system;
 342
 343 #endif /* emacs */
 344
 345 Lisp_Object Qcoding_category, Qcoding_category_index;
 346
 347 /* List of symbols `coding-category-xxx' ordered by priority.  */
 348 Lisp_Object Vcoding_category_list;
 349
 350 /* Table of coding categories (Lisp symbols).  */
 351 Lisp_Object Vcoding_category_table;
 352
 353 /* Table of names of symbol for each coding-category.  */
 354 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 355   "coding-category-emacs-mule",
 356   "coding-category-sjis",
 357   "coding-category-iso-7",
 358   "coding-category-iso-7-tight",
 359   "coding-category-iso-8-1",
 360   "coding-category-iso-8-2",
 361   "coding-category-iso-7-else",
 362   "coding-category-iso-8-else",
 363   "coding-category-ccl",
 364   "coding-category-big5",
 365   "coding-category-raw-text",
 366   "coding-category-binary"
 367 };
 368
 369 /* Table of pointers to coding systems corresponding to each coding
 370    categories.  */
 371 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 372
 373 /* Table of coding category masks.  Nth element is a mask for a coding
 374    cateogry of which priority is Nth.  */
 375 static
 376 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 377
 378 /* Flag to tell if we look up translation table on character code
 379    conversion.  */
 380 Lisp_Object Venable_character_translation;
 381 /* Standard translation table to look up on decoding (reading).  */
 382 Lisp_Object Vstandard_translation_table_for_decode;
 383 /* Standard translation table to look up on encoding (writing).  */
 384 Lisp_Object Vstandard_translation_table_for_encode;
 385
 386 Lisp_Object Qtranslation_table;
 387 Lisp_Object Qtranslation_table_id;
 388 Lisp_Object Qtranslation_table_for_decode;
 389 Lisp_Object Qtranslation_table_for_encode;
 390
 391 /* Alist of charsets vs revision number.  */
 392 Lisp_Object Vcharset_revision_alist;
 393
 394 /* Default coding systems used for process I/O.  */
 395 Lisp_Object Vdefault_process_coding_system;
 396
 397 /* Global flag to tell that we can't call post-read-conversion and
 398    pre-write-conversion functions.  Usually the value is zero, but it
 399    is set to 1 temporarily while such functions are running.  This is
 400    to avoid infinite recursive call.  */
 401 static int inhibit_pre_post_conversion;
 402
 403 \f
 404 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 405
 406 /* Emacs' internal format for encoding multiple character sets is a
 407    kind of multi-byte encoding, i.e. characters are encoded by
 408    variable-length sequences of one-byte codes.  ASCII characters
 409    and control characters (e.g. `tab', `newline') are represented by
 410    one-byte sequences which are their ASCII codes, in the range 0x00
 411    through 0x7F.  The other characters are represented by a sequence
 412    of `base leading-code', optional `extended leading-code', and one
 413    or two `position-code's.  The length of the sequence is determined
 414    by the base leading-code.  Leading-code takes the range 0x80
 415    through 0x9F, whereas extended leading-code and position-code take
 416    the range 0xA0 through 0xFF.  See `charset.h' for more details
 417    about leading-code and position-code.
 418
 419    --- CODE RANGE of Emacs' internal format ---
 420    (character set)      (range)
 421    ASCII                0x00 .. 0x7F
 422    ELSE (1st byte)      0x81 .. 0x9F
 423         (rest bytes)    0xA0 .. 0xFF
 424    ---------------------------------------------
 425
 426   */
 427
 428 enum emacs_code_class_type emacs_code_class[256];
 429
 430 /* Go to the next statement only if *SRC is accessible and the code is
 431    greater than 0xA0.  */
 432 #define CHECK_CODE_RANGE_A0_FF  \
 433   do {                          \
 434     if (src >= src_end)         \
 435       goto label_end_of_switch; \
 436     else if (*src++ < 0xA0)     \
 437       return 0;                 \
 438   } while (0)
 439
 440 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 441    Check if a text is encoded in Emacs' internal format.  If it is,
 442    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 443
 444 int
 445 detect_coding_emacs_mule (src, src_end)
 446      unsigned char *src, *src_end;
 447 {
 448   unsigned char c;
 449   int composing = 0;
 450
 451   while (src < src_end)
 452     {
 453       c = *src++;
 454
 455       if (composing)
 456         {
 457           if (c < 0xA0)
 458             composing = 0;
 459           else
 460             c -= 0x20;
 461         }
 462
 463       switch (emacs_code_class[c])
 464         {
 465         case EMACS_ascii_code:
 466         case EMACS_linefeed_code:
 467           break;
 468
 469         case EMACS_control_code:
 470           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 471             return 0;
 472           break;
 473
 474         case EMACS_invalid_code:
 475           return 0;
 476
 477         case EMACS_leading_code_4:
 478           CHECK_CODE_RANGE_A0_FF;
 479           /* fall down to check it two more times ...  */
 480
 481         case EMACS_leading_code_3:
 482           CHECK_CODE_RANGE_A0_FF;
 483           /* fall down to check it one more time ...  */
 484
 485         case EMACS_leading_code_2:
 486           CHECK_CODE_RANGE_A0_FF;
 487           break;
 488
 489         case 0x80:      /* Old leading code for a composite character.  */
 490           if (composing)
 491             CHECK_CODE_RANGE_A0_FF;
 492           else
 493             composing = 1;
 494           break;
 495
 496         default:
 497         label_end_of_switch:
 498           break;
 499         }
 500     }
 501   return CODING_CATEGORY_MASK_EMACS_MULE;
 502 }
 503
 504 \f
 505 /*** 3. ISO2022 handlers ***/
 506
 507 /* The following note describes the coding system ISO2022 briefly.
 508    Since the intention of this note is to help understand the
 509    functions in this file, some parts are NOT ACCURATE or OVERLY
 510    SIMPLIFIED.  For thorough understanding, please refer to the
 511    original document of ISO2022.
 512
 513    ISO2022 provides many mechanisms to encode several character sets
 514    in 7-bit and 8-bit environments.  For 7-bite environments, all text
 515    is encoded using bytes less than 128.  This may make the encoded
 516    text a little bit longer, but the text passes more easily through
 517    several gateways, some of which strip off MSB (Most Signigant Bit).
 518
 519    There are two kinds of character sets: control character set and
 520    graphic character set.  The former contains control characters such
 521    as `newline' and `escape' to provide control functions (control
 522    functions are also provided by escape sequences).  The latter
 523    contains graphic characters such as 'A' and '-'.  Emacs recognizes
 524    two control character sets and many graphic character sets.
 525
 526    Graphic character sets are classified into one of the following
 527    four classes, according to the number of bytes (DIMENSION) and
 528    number of characters in one dimension (CHARS) of the set:
 529    - DIMENSION1_CHARS94
 530    - DIMENSION1_CHARS96
 531    - DIMENSION2_CHARS94
 532    - DIMENSION2_CHARS96
 533
 534    In addition, each character set is assigned an identification tag,
 535    unique for each set, called "final character" (denoted as <F>
 536    hereafter).  The <F> of each character set is decided by ECMA(*)
 537    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
 538    (0x30..0x3F are for private use only).
 539
 540    Note (*): ECMA = European Computer Manufacturers Association
 541
 542    Here are examples of graphic character set [NAME(<F>)]:
 543         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 544         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 545         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 546         o DIMENSION2_CHARS96 -- none for the moment
 547
 548    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
 549         C0 [0x00..0x1F] -- control character plane 0
 550         GL [0x20..0x7F] -- graphic character plane 0
 551         C1 [0x80..0x9F] -- control character plane 1
 552         GR [0xA0..0xFF] -- graphic character plane 1
 553
 554    A control character set is directly designated and invoked to C0 or
 555    C1 by an escape sequence.  The most common case is that:
 556    - ISO646's  control character set is designated/invoked to C0, and
 557    - ISO6429's control character set is designated/invoked to C1,
 558    and usually these designations/invocations are omitted in encoded
 559    text.  In a 7-bit environment, only C0 can be used, and a control
 560    character for C1 is encoded by an appropriate escape sequence to
 561    fit into the environment.  All control characters for C1 are
 562    defined to have corresponding escape sequences.
 563
 564    A graphic character set is at first designated to one of four
 565    graphic registers (G0 through G3), then these graphic registers are
 566    invoked to GL or GR.  These designations and invocations can be
 567    done independently.  The most common case is that G0 is invoked to
 568    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
 569    these invocations and designations are omitted in encoded text.
 570    In a 7-bit environment, only GL can be used.
 571
 572    When a graphic character set of CHARS94 is invoked to GL, codes
 573    0x20 and 0x7F of the GL area work as control characters SPACE and
 574    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
 575    be used.
 576
 577    There are two ways of invocation: locking-shift and single-shift.
 578    With locking-shift, the invocation lasts until the next different
 579    invocation, whereas with single-shift, the invocation affects the
 580    following character only and doesn't affect the locking-shift
 581    state.  Invocations are done by the following control characters or
 582    escape sequences:
 583
 584    ----------------------------------------------------------------------
 585    abbrev  function                  cntrl escape seq   description
 586    ----------------------------------------------------------------------
 587    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
 588    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
 589    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
 590    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
 591    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
 592    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
 593    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
 594    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
 595    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
 596    ----------------------------------------------------------------------
 597    (*) These are not used by any known coding system.
 598
 599    Control characters for these functions are defined by macros
 600    ISO_CODE_XXX in `coding.h'.
 601
 602    Designations are done by the following escape sequences:
 603    ----------------------------------------------------------------------
 604    escape sequence      description
 605    ----------------------------------------------------------------------
 606    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 607    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 608    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 609    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 610    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 611    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 612    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 613    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 614    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 615    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 616    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 617    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 618    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 619    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 620    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 621    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 622    ----------------------------------------------------------------------
 623
 624    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 625    of dimension 1, chars 94, and final character <F>, etc...
 626
 627    Note (*): Although these designations are not allowed in ISO2022,
 628    Emacs accepts them on decoding, and produces them on encoding
 629    CHARS96 character sets in a coding system which is characterized as
 630    7-bit environment, non-locking-shift, and non-single-shift.
 631
 632    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 633    '(' can be omitted.  We refer to this as "short-form" hereafter.
 634
 635    Now you may notice that there are a lot of ways for encoding the
 636    same multilingual text in ISO2022.  Actually, there exist many
 637    coding systems such as Compound Text (used in X11's inter client
 638    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
 639    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
 640    localized platforms), and all of these are variants of ISO2022.
 641
 642    In addition to the above, Emacs handles two more kinds of escape
 643    sequences: ISO6429's direction specification and Emacs' private
 644    sequence for specifying character composition.
 645
 646    ISO6429's direction specification takes the following form:
 647         o CSI ']'      -- end of the current direction
 648         o CSI '0' ']'  -- end of the current direction
 649         o CSI '1' ']'  -- start of left-to-right text
 650         o CSI '2' ']'  -- start of right-to-left text
 651    The control character CSI (0x9B: control sequence introducer) is
 652    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
 653
 654    Character composition specification takes the following form:
 655         o ESC '0' -- start relative composition
 656         o ESC '1' -- end composition
 657         o ESC '2' -- start rule-base composition (*)
 658         o ESC '3' -- start relative composition with alternate chars  (**)
 659         o ESC '4' -- start rule-base composition with alternate chars  (**)
 660    Since these are not standard escape sequences of any ISO standard,
 661    the use of them for these meaning is restricted to Emacs only.
 662
 663    (*) This form is used only in Emacs 20.5 and the older versions,
 664    but the newer versions can safely decode it.
 665    (**) This form is used only in Emacs 21.1 and the newer versions,
 666    and the older versions can't decode it.
 667
 668    Here's a list of examples usages of these composition escape
 669    sequences (categorized by `enum composition_method').
 670
 671    COMPOSITION_RELATIVE:
 672         ESC 0 CHAR [ CHAR ] ESC 1
 673    COMPOSITOIN_WITH_RULE:
 674         ESC 2 CHAR [ RULE CHAR ] ESC 1
 675    COMPOSITION_WITH_ALTCHARS:
 676         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
 677    COMPOSITION_WITH_RULE_ALTCHARS:
 678         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
 679
 680 enum iso_code_class_type iso_code_class[256];
 681
 682 #define CHARSET_OK(idx, charset)                                \
 683   (coding_system_table[idx]                                     \
 684    && (coding_system_table[idx]->safe_charsets[charset]         \
 685        || (CODING_SPEC_ISO_REQUESTED_DESIGNATION                \
 686             (coding_system_table[idx], charset)                 \
 687            != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
 688
 689 #define SHIFT_OUT_OK(idx) \
 690   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 691
 692 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 693    Check if a text is encoded in ISO2022.  If it is, returns an
 694    integer in which appropriate flag bits any of:
 695         CODING_CATEGORY_MASK_ISO_7
 696         CODING_CATEGORY_MASK_ISO_7_TIGHT
 697         CODING_CATEGORY_MASK_ISO_8_1
 698         CODING_CATEGORY_MASK_ISO_8_2
 699         CODING_CATEGORY_MASK_ISO_7_ELSE
 700         CODING_CATEGORY_MASK_ISO_8_ELSE
 701    are set.  If a code which should never appear in ISO2022 is found,
 702    returns 0.  */
 703
 704 int
 705 detect_coding_iso2022 (src, src_end)
 706      unsigned char *src, *src_end;
 707 {
 708   int mask = CODING_CATEGORY_MASK_ISO;
 709   int mask_found = 0;
 710   int reg[4], shift_out = 0, single_shifting = 0;
 711   int c, c1, i, charset;
 712
 713   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 714   while (mask && src < src_end)
 715     {
 716       c = *src++;
 717       switch (c)
 718         {
 719         case ISO_CODE_ESC:
 720           single_shifting = 0;
 721           if (src >= src_end)
 722             break;
 723           c = *src++;
 724           if (c >= '(' && c <= '/')
 725             {
 726               /* Designation sequence for a charset of dimension 1.  */
 727               if (src >= src_end)
 728                 break;
 729               c1 = *src++;
 730               if (c1 < ' ' || c1 >= 0x80
 731                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 732                 /* Invalid designation sequence.  Just ignore.  */
 733                 break;
 734               reg[(c - '(') % 4] = charset;
 735             }
 736           else if (c == '$')
 737             {
 738               /* Designation sequence for a charset of dimension 2.  */
 739               if (src >= src_end)
 740                 break;
 741               c = *src++;
 742               if (c >= '@' && c <= 'B')
 743                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 744                 reg[0] = charset = iso_charset_table[1][0][c];
 745               else if (c >= '(' && c <= '/')
 746                 {
 747                   if (src >= src_end)
 748                     break;
 749                   c1 = *src++;
 750                   if (c1 < ' ' || c1 >= 0x80
 751                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 752                     /* Invalid designation sequence.  Just ignore.  */
 753                     break;
 754                   reg[(c - '(') % 4] = charset;
 755                 }
 756               else
 757                 /* Invalid designation sequence.  Just ignore.  */
 758                 break;
 759             }
 760           else if (c == 'N' || c == 'O')
 761             {
 762               /* ESC <Fe> for SS2 or SS3.  */
 763               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 764               break;
 765             }
 766           else if (c >= '0' && c <= '4')
 767             {
 768               /* ESC <Fp> for start/end composition.  */
 769               mask_found |= CODING_CATEGORY_MASK_ISO;
 770               break;
 771             }
 772           else
 773             /* Invalid escape sequence.  Just ignore.  */
 774             break;
 775
 776           /* We found a valid designation sequence for CHARSET.  */
 777           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 778           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 779             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 780           else
 781             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 782           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 783             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 784           else
 785             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 786           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 787             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 788           else
 789             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 790           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 791             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 792           else
 793             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 794           break;
 795
 796         case ISO_CODE_SO:
 797           single_shifting = 0;
 798           if (shift_out == 0
 799               && (reg[1] >= 0
 800                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 801                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 802             {
 803               /* Locking shift out.  */
 804               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 805               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 806             }
 807           break;
 808
 809         case ISO_CODE_SI:
 810           single_shifting = 0;
 811           if (shift_out == 1)
 812             {
 813               /* Locking shift in.  */
 814               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 815               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 816             }
 817           break;
 818
 819         case ISO_CODE_CSI:
 820           single_shifting = 0;
 821         case ISO_CODE_SS2:
 822         case ISO_CODE_SS3:
 823           {
 824             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 825
 826             if (c != ISO_CODE_CSI)
 827               {
 828                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 829                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 830                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 831                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 832                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 833                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 834                 single_shifting = 1;
 835               }
 836             if (VECTORP (Vlatin_extra_code_table)
 837                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 838               {
 839                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 840                     & CODING_FLAG_ISO_LATIN_EXTRA)
 841                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 842                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 843                     & CODING_FLAG_ISO_LATIN_EXTRA)
 844                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 845               }
 846             mask &= newmask;
 847             mask_found |= newmask;
 848           }
 849           break;
 850
 851         default:
 852           if (c < 0x80)
 853             {
 854               single_shifting = 0;
 855               break;
 856             }
 857           else if (c < 0xA0)
 858             {
 859               single_shifting = 0;
 860               if (VECTORP (Vlatin_extra_code_table)
 861                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 862                 {
 863                   int newmask = 0;
 864
 865                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 866                       & CODING_FLAG_ISO_LATIN_EXTRA)
 867                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 868                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 869                       & CODING_FLAG_ISO_LATIN_EXTRA)
 870                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 871                   mask &= newmask;
 872                   mask_found |= newmask;
 873                 }
 874               else
 875                 return 0;
 876             }
 877           else
 878             {
 879               unsigned char *src_begin = src;
 880
 881               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 882                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 883               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 884               /* Check the length of succeeding codes of the range
 885                  0xA0..0FF.  If the byte length is odd, we exclude
 886                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
 887                  when we are not single shifting.  */
 888               if (!single_shifting)
 889                 {
 890                   while (src < src_end && *src >= 0xA0)
 891                     src++;
 892                   if ((src - src_begin - 1) & 1 && src < src_end)
 893                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 894                   else
 895                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 896                 }
 897             }
 898           break;
 899         }
 900     }
 901
 902   return (mask & mask_found);
 903 }
 904
 905 /* Decode a character of which charset is CHARSET and the 1st position
 906    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 907    fetched from SRC and set to C2.  If CHARSET is negative, it means
 908    that we are decoding ill formed text, and what we can do is just to
 909    read C1 as is.
 910
 911    If we are now in the middle of composition sequence, the decoded
 912    character may be ALTCHAR (see the comment above).  In that case,
 913    the character goes to coding->cmp_data->data instead of DST.  */
 914
 915 #define DECODE_ISO_CHARACTER(charset, c1)                                 \
 916   do {                                                                    \
 917     int c_alt = -1, charset_alt = (charset);                              \
 918     if (charset_alt >= 0)                                                 \
 919       {                                                                   \
 920         if (CHARSET_DIMENSION (charset_alt) == 2)                         \
 921           {                                                               \
 922             ONE_MORE_BYTE (c2);                                           \
 923             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F           \
 924                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)    \
 925               {                                                           \
 926                 src--;                                                    \
 927                 charset_alt = CHARSET_ASCII;                              \
 928               }                                                           \
 929           }                                                               \
 930         if (!NILP (translation_table)                                     \
 931             && ((c_alt = translate_char (translation_table,               \
 932                                          -1, charset_alt, c1, c2)) >= 0)) \
 933           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                        \
 934       }                                                                   \
 935     if (! COMPOSING_P (coding)                                            \
 936         || coding->composing == COMPOSITION_RELATIVE                      \
 937         || coding->composing == COMPOSITION_WITH_RULE)                    \
 938       {                                                                   \
 939         if (charset_alt == CHARSET_ASCII || charset_alt < 0)              \
 940           DECODE_CHARACTER_ASCII (c1);                                    \
 941         else if (CHARSET_DIMENSION (charset_alt) == 1)                    \
 942           DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                  \
 943         else                                                              \
 944           DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);              \
 945       }                                                                   \
 946     if (COMPOSING_P (coding)                                              \
 947         && coding->composing != COMPOSITION_RELATIVE)                     \
 948       {                                                                   \
 949         if (c_alt < 0)                                                    \
 950           c_alt = MAKE_CHAR (charset_alt, c1, c2);                        \
 951         CODING_ADD_COMPOSITION_COMPONENT (coding, c_alt);                 \
 952         coding->composition_rule_follows                                  \
 953           = coding->composing != COMPOSITION_WITH_ALTCHARS;               \
 954       }                                                                   \
 955   } while (0)
 956
 957 /* Set designation state into CODING.  */
 958 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 959   do {                                                                     \
 960     int charset;                                                           \
 961                                                                            \
 962     if (final_char < '0' || final_char >= 128)                             \
 963       goto label_invalid_code;                                             \
 964     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
 965                                  make_number (chars),                      \
 966                                  make_number (final_char));                \
 967     if (charset >= 0                                                       \
 968         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 969             || coding->safe_charsets[charset]))                            \
 970       {                                                                    \
 971         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 972             && reg == 0                                                    \
 973             && charset == CHARSET_ASCII)                                   \
 974           {                                                                \
 975             /* We should insert this designation sequence as is so         \
 976                that it is surely written back to a file.  */               \
 977             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 978             goto label_invalid_code;                                       \
 979           }                                                                \
 980         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 981         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 982             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 983           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 984         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 985       }                                                                    \
 986     else                                                                   \
 987       {                                                                    \
 988         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 989         goto label_invalid_code;                                           \
 990       }                                                                    \
 991   } while (0)
 992
 993 /* Allocate a memory block for storing information about compositions.
 994    The block is chained to the already allocated blocks.  */
 995
 996 static void
 997 coding_allocate_composition_data (coding, char_offset)
 998      struct coding_system *coding;
 999      int char_offset;
1000 {
1001   struct composition_data *cmp_data
1002     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1003
1004   cmp_data->char_offset = char_offset;
1005   cmp_data->used = 0;
1006   cmp_data->prev = coding->cmp_data;
1007   cmp_data->next = NULL;
1008   if (coding->cmp_data)
1009     coding->cmp_data->next = cmp_data;
1010   coding->cmp_data = cmp_data;
1011   coding->cmp_data_start = 0;
1012 }
1013
1014 /* Record the starting position START and METHOD of one composition.  */
1015
1016 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
1017   do {                                                          \
1018     struct composition_data *cmp_data = coding->cmp_data;       \
1019     int *data = cmp_data->data + cmp_data->used;                \
1020     coding->cmp_data_start = cmp_data->used;                    \
1021     data[0] = -1;                                               \
1022     data[1] = cmp_data->char_offset + start;                    \
1023     data[3] = (int) method;                                     \
1024     cmp_data->used += 4;                                        \
1025   } while (0)
1026
1027 /* Record the ending position END of the current composition.  */
1028
1029 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
1030   do {                                                          \
1031     struct composition_data *cmp_data = coding->cmp_data;       \
1032     int *data = cmp_data->data + coding->cmp_data_start;        \
1033     data[0] = cmp_data->used - coding->cmp_data_start;          \
1034     data[2] = cmp_data->char_offset + end;                      \
1035   } while (0)
1036
1037 /* Record one COMPONENT (alternate character or composition rule).  */
1038
1039 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
1040   (coding->cmp_data->data[coding->cmp_data->used++] = component)
1041
1042 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4.  */
1043
1044 #define DECODE_COMPOSITION_START(c1)                                    \
1045   do {                                                                  \
1046     if (coding->composing == COMPOSITION_DISABLED)                      \
1047       {                                                                 \
1048         *dst++ = ISO_CODE_ESC;                                          \
1049         *dst++ = c1 & 0x7f;                                             \
1050         coding->produced_char += 2;                                     \
1051       }                                                                 \
1052     else if (!COMPOSING_P (coding))                                     \
1053       {                                                                 \
1054         /* This is surely the start of a composition.  We must be sure  \
1055            that coding->cmp_data has enough space to store the          \
1056            information about the composition.  If not, terminate the    \
1057            current decoding loop, allocate one more memory block for    \
1058            coding->cmp_data in the calller, then start the decoding     \
1059            loop again.  We can't allocate memory here directly because  \
1060            it may cause buffer/string relocation.  */                   \
1061         if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH  \
1062             >= COMPOSITION_DATA_SIZE)                                   \
1063           {                                                             \
1064             result = CODING_FINISH_INSUFFICIENT_CMP;                    \
1065             goto label_end_of_loop_2;                                   \
1066           }                                                             \
1067         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE           \
1068                              : c1 == '2' ? COMPOSITION_WITH_RULE        \
1069                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS    \
1070                              : COMPOSITION_WITH_RULE_ALTCHARS);         \
1071         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,    \
1072                                       coding->composing);               \
1073         coding->composition_rule_follows = 0;                           \
1074       }                                                                 \
1075     else                                                                \
1076       {                                                                 \
1077         /* We are already handling a composition.  If the method is     \
1078            the following two, the codes following the current escape    \
1079            sequence are actual characters stored in a buffer.  */       \
1080         if (coding->composing == COMPOSITION_WITH_ALTCHARS              \
1081             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)     \
1082           {                                                             \
1083             coding->composing = COMPOSITION_RELATIVE;                   \
1084             coding->composition_rule_follows = 0;                       \
1085           }                                                             \
1086       }                                                                 \
1087   } while (0)
1088
1089 /* Handle compositoin end sequence ESC 1.  */
1090
1091 #define DECODE_COMPOSITION_END(c1)                                      \
1092   do {                                                                  \
1093     if (coding->composing == COMPOSITION_DISABLED)                      \
1094       {                                                                 \
1095         *dst++ = ISO_CODE_ESC;                                          \
1096         *dst++ = c1;                                                    \
1097         coding->produced_char += 2;                                     \
1098       }                                                                 \
1099     else                                                                \
1100       {                                                                 \
1101         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1102         coding->composing = COMPOSITION_NO;                             \
1103       }                                                                 \
1104   } while (0)
1105
1106 /* Decode a composition rule from the byte C1 (and maybe one more byte
1107    from SRC) and store one encoded composition rule in
1108    coding->cmp_data.  */
1109
1110 #define DECODE_COMPOSITION_RULE(c1)                                     \
1111   do {                                                                  \
1112     int rule = 0;                                                       \
1113     (c1) -= 32;                                                         \
1114     if (c1 < 81)                /* old format (before ver.21) */        \
1115       {                                                                 \
1116         int gref = (c1) / 9;                                            \
1117         int nref = (c1) % 9;                                            \
1118         if (gref == 4) gref = 10;                                       \
1119         if (nref == 4) nref = 10;                                       \
1120         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1121       }                                                                 \
1122     else if (c1 < 93)           /* new format (after ver.21 */          \
1123       {                                                                 \
1124         ONE_MORE_BYTE (c2);                                             \
1125         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1126       }                                                                 \
1127     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1128     coding->composition_rule_follows = 0;                               \
1129   } while (0)
1130
1131
1132 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1133
1134 int
1135 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1136      struct coding_system *coding;
1137      unsigned char *source, *destination;
1138      int src_bytes, dst_bytes;
1139 {
1140   unsigned char *src = source;
1141   unsigned char *src_end = source + src_bytes;
1142   unsigned char *dst = destination;
1143   unsigned char *dst_end = destination + dst_bytes;
1144   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1145      from DST_END to assure that overflow checking is necessary only
1146      at the head of loop.  */
1147   unsigned char *adjusted_dst_end = dst_end - 6;
1148   int charset;
1149   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1150   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1151   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1152   Lisp_Object translation_table
1153     = coding->translation_table_for_decode;
1154   int result = CODING_FINISH_NORMAL;
1155
1156   if (!NILP (Venable_character_translation) && NILP (translation_table))
1157     translation_table = Vstandard_translation_table_for_decode;
1158
1159   coding->produced_char = 0;
1160   coding->fake_multibyte = 0;
1161   while (src < src_end && (dst_bytes
1162                            ? (dst < adjusted_dst_end)
1163                            : (dst < src - 6)))
1164     {
1165       /* SRC_BASE remembers the start position in source in each loop.
1166          The loop will be exited when there's not enough source text
1167          to analyze long escape sequence or 2-byte code (within macros
1168          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1169          to SRC_BASE before exiting.  */
1170       unsigned char *src_base = src;
1171       int c1 = *src++, c2;
1172
1173       /* We produce no character or one character.  */
1174       switch (iso_code_class [c1])
1175         {
1176         case ISO_0x20_or_0x7F:
1177           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1178             {
1179               DECODE_COMPOSITION_RULE (c1);
1180               break;
1181             }
1182           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1183             {
1184               /* This is SPACE or DEL.  */
1185               *dst++ = c1;
1186               coding->produced_char++;
1187               break;
1188             }
1189           /* This is a graphic character, we fall down ...  */
1190
1191         case ISO_graphic_plane_0:
1192           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1193             DECODE_COMPOSITION_RULE (c1);
1194           else
1195             DECODE_ISO_CHARACTER (charset0, c1);
1196           break;
1197
1198         case ISO_0xA0_or_0xFF:
1199           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1200               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1201             goto label_invalid_code;
1202           /* This is a graphic character, we fall down ... */
1203
1204         case ISO_graphic_plane_1:
1205           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1206             goto label_invalid_code;
1207           DECODE_ISO_CHARACTER (charset1, c1);
1208           break;
1209
1210         case ISO_control_code:
1211           if (COMPOSING_P (coding))
1212             DECODE_COMPOSITION_END ('1');
1213
1214           /* All ISO2022 control characters in this class have the
1215              same representation in Emacs internal format.  */
1216           if (c1 == '\n'
1217               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1218               && (coding->eol_type == CODING_EOL_CR
1219                   || coding->eol_type == CODING_EOL_CRLF))
1220             {
1221               result = CODING_FINISH_INCONSISTENT_EOL;
1222               goto label_end_of_loop_2;
1223             }
1224           *dst++ = c1;
1225           coding->produced_char++;
1226           break;
1227
1228         case ISO_carriage_return:
1229           if (COMPOSING_P (coding))
1230             DECODE_COMPOSITION_END ('1');
1231
1232           if (coding->eol_type == CODING_EOL_CR)
1233             *dst++ = '\n';
1234           else if (coding->eol_type == CODING_EOL_CRLF)
1235             {
1236               ONE_MORE_BYTE (c1);
1237               if (c1 == ISO_CODE_LF)
1238                 *dst++ = '\n';
1239               else
1240                 {
1241                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1242                     {
1243                       result = CODING_FINISH_INCONSISTENT_EOL;
1244                       goto label_end_of_loop_2;
1245                     }
1246                   src--;
1247                   *dst++ = '\r';
1248                 }
1249             }
1250           else
1251             *dst++ = c1;
1252           coding->produced_char++;
1253           break;
1254
1255         case ISO_shift_out:
1256           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1257               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1258             goto label_invalid_code;
1259           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1260           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1261           break;
1262
1263         case ISO_shift_in:
1264           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1265             goto label_invalid_code;
1266           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1267           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1268           break;
1269
1270         case ISO_single_shift_2_7:
1271         case ISO_single_shift_2:
1272           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1273             goto label_invalid_code;
1274           /* SS2 is handled as an escape sequence of ESC 'N' */
1275           c1 = 'N';
1276           goto label_escape_sequence;
1277
1278         case ISO_single_shift_3:
1279           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1280             goto label_invalid_code;
1281           /* SS2 is handled as an escape sequence of ESC 'O' */
1282           c1 = 'O';
1283           goto label_escape_sequence;
1284
1285         case ISO_control_sequence_introducer:
1286           /* CSI is handled as an escape sequence of ESC '[' ...  */
1287           c1 = '[';
1288           goto label_escape_sequence;
1289
1290         case ISO_escape:
1291           ONE_MORE_BYTE (c1);
1292         label_escape_sequence:
1293           /* Escape sequences handled by Emacs are invocation,
1294              designation, direction specification, and character
1295              composition specification.  */
1296           switch (c1)
1297             {
1298             case '&':           /* revision of following character set */
1299               ONE_MORE_BYTE (c1);
1300               if (!(c1 >= '@' && c1 <= '~'))
1301                 goto label_invalid_code;
1302               ONE_MORE_BYTE (c1);
1303               if (c1 != ISO_CODE_ESC)
1304                 goto label_invalid_code;
1305               ONE_MORE_BYTE (c1);
1306               goto label_escape_sequence;
1307
1308             case '$':           /* designation of 2-byte character set */
1309               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1310                 goto label_invalid_code;
1311               ONE_MORE_BYTE (c1);
1312               if (c1 >= '@' && c1 <= 'B')
1313                 {       /* designation of JISX0208.1978, GB2312.1980,
1314                            or JISX0208.1980 */
1315                   DECODE_DESIGNATION (0, 2, 94, c1);
1316                 }
1317               else if (c1 >= 0x28 && c1 <= 0x2B)
1318                 {       /* designation of DIMENSION2_CHARS94 character set */
1319                   ONE_MORE_BYTE (c2);
1320                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1321                 }
1322               else if (c1 >= 0x2C && c1 <= 0x2F)
1323                 {       /* designation of DIMENSION2_CHARS96 character set */
1324                   ONE_MORE_BYTE (c2);
1325                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1326                 }
1327               else
1328                 goto label_invalid_code;
1329               break;
1330
1331             case 'n':           /* invocation of locking-shift-2 */
1332               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1333                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1334                 goto label_invalid_code;
1335               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1336               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1337               break;
1338
1339             case 'o':           /* invocation of locking-shift-3 */
1340               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1341                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1342                 goto label_invalid_code;
1343               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1344               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1345               break;
1346
1347             case 'N':           /* invocation of single-shift-2 */
1348               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1349                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1350                 goto label_invalid_code;
1351               ONE_MORE_BYTE (c1);
1352               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1353               DECODE_ISO_CHARACTER (charset, c1);
1354               break;
1355
1356             case 'O':           /* invocation of single-shift-3 */
1357               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1358                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1359                 goto label_invalid_code;
1360               ONE_MORE_BYTE (c1);
1361               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1362               DECODE_ISO_CHARACTER (charset, c1);
1363               break;
1364
1365             case '0': case '2': case '3': case '4': /* start composition */
1366               DECODE_COMPOSITION_START (c1);
1367               break;
1368
1369             case '1':           /* end composition */
1370               DECODE_COMPOSITION_END (c1);
1371               break;
1372
1373             case '[':           /* specification of direction */
1374               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1375                 goto label_invalid_code;
1376               /* For the moment, nested direction is not supported.
1377                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1378                  left-to-right, and nozero means right-to-left.  */
1379               ONE_MORE_BYTE (c1);
1380               switch (c1)
1381                 {
1382                 case ']':       /* end of the current direction */
1383                   coding->mode &= ~CODING_MODE_DIRECTION;
1384
1385                 case '0':       /* end of the current direction */
1386                 case '1':       /* start of left-to-right direction */
1387                   ONE_MORE_BYTE (c1);
1388                   if (c1 == ']')
1389                     coding->mode &= ~CODING_MODE_DIRECTION;
1390                   else
1391                     goto label_invalid_code;
1392                   break;
1393
1394                 case '2':       /* start of right-to-left direction */
1395                   ONE_MORE_BYTE (c1);
1396                   if (c1 == ']')
1397                     coding->mode |= CODING_MODE_DIRECTION;
1398                   else
1399                     goto label_invalid_code;
1400                   break;
1401
1402                 default:
1403                   goto label_invalid_code;
1404                 }
1405               break;
1406
1407             default:
1408               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1409                 goto label_invalid_code;
1410               if (c1 >= 0x28 && c1 <= 0x2B)
1411                 {       /* designation of DIMENSION1_CHARS94 character set */
1412                   ONE_MORE_BYTE (c2);
1413                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1414                 }
1415               else if (c1 >= 0x2C && c1 <= 0x2F)
1416                 {       /* designation of DIMENSION1_CHARS96 character set */
1417                   ONE_MORE_BYTE (c2);
1418                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1419                 }
1420               else
1421                 {
1422                   goto label_invalid_code;
1423                 }
1424             }
1425           /* We must update these variables now.  */
1426           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1427           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1428           break;
1429
1430         label_invalid_code:
1431           if (COMPOSING_P (coding))
1432             DECODE_COMPOSITION_END ('1');
1433           coding->produced_char += src - src_base;
1434           while (src_base < src)
1435             *dst++ = (*src_base++) & 0x7F;
1436         }
1437       continue;
1438
1439     label_end_of_loop:
1440       result = CODING_FINISH_INSUFFICIENT_SRC;
1441     label_end_of_loop_2:
1442       src = src_base;
1443       break;
1444     }
1445
1446   if (src < src_end)
1447     {
1448       if (result == CODING_FINISH_NORMAL)
1449         result = CODING_FINISH_INSUFFICIENT_DST;
1450       else if (result != CODING_FINISH_INCONSISTENT_EOL
1451                && coding->mode & CODING_MODE_LAST_BLOCK)
1452         {
1453           /* This is the last block of the text to be decoded.  We had
1454              better just flush out all remaining codes in the text
1455              although they are not valid characters.  */
1456           if (COMPOSING_P (coding))
1457             DECODE_COMPOSITION_END ('1');
1458           src_bytes = src_end - src;
1459           if (dst_bytes && (dst_end - dst < src_end - src))
1460             src_end = src + (dst_end - dst);
1461           coding->produced_char += src_end - src;
1462           while (src < src_end)
1463             *dst++ = (*src++) & 0x7F;
1464         }
1465     }
1466
1467   coding->consumed = coding->consumed_char = src - source;
1468   coding->produced = dst - destination;
1469   return result;
1470 }
1471
1472 /* ISO2022 encoding stuff.  */
1473
1474 /*
1475    It is not enough to say just "ISO2022" on encoding, we have to
1476    specify more details.  In Emacs, each coding system of ISO2022
1477    variant has the following specifications:
1478         1. Initial designation to G0 thru G3.
1479         2. Allows short-form designation?
1480         3. ASCII should be designated to G0 before control characters?
1481         4. ASCII should be designated to G0 at end of line?
1482         5. 7-bit environment or 8-bit environment?
1483         6. Use locking-shift?
1484         7. Use Single-shift?
1485    And the following two are only for Japanese:
1486         8. Use ASCII in place of JIS0201-1976-Roman?
1487         9. Use JISX0208-1983 in place of JISX0208-1978?
1488    These specifications are encoded in `coding->flags' as flag bits
1489    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1490    details.
1491 */
1492
1493 /* Produce codes (escape sequence) for designating CHARSET to graphic
1494    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1495    the coding system CODING allows, produce designation sequence of
1496    short-form.  */
1497
1498 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1499   do {                                                                  \
1500     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1501     char *intermediate_char_94 = "()*+";                                \
1502     char *intermediate_char_96 = ",-./";                                \
1503     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1504     if (revision < 255)                                                 \
1505       {                                                                 \
1506         *dst++ = ISO_CODE_ESC;                                          \
1507         *dst++ = '&';                                                   \
1508         *dst++ = '@' + revision;                                        \
1509       }                                                                 \
1510     *dst++ = ISO_CODE_ESC;                                              \
1511     if (CHARSET_DIMENSION (charset) == 1)                               \
1512       {                                                                 \
1513         if (CHARSET_CHARS (charset) == 94)                              \
1514           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1515         else                                                            \
1516           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1517       }                                                                 \
1518     else                                                                \
1519       {                                                                 \
1520         *dst++ = '$';                                                   \
1521         if (CHARSET_CHARS (charset) == 94)                              \
1522           {                                                             \
1523             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1524                 || reg != 0                                             \
1525                 || final_char < '@' || final_char > 'B')                \
1526               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1527           }                                                             \
1528         else                                                            \
1529           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1530       }                                                                 \
1531     *dst++ = final_char;                                                \
1532     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1533   } while (0)
1534
1535 /* The following two macros produce codes (control character or escape
1536    sequence) for ISO2022 single-shift functions (single-shift-2 and
1537    single-shift-3).  */
1538
1539 #define ENCODE_SINGLE_SHIFT_2                           \
1540   do {                                                  \
1541     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1542       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1543     else                                                \
1544       {                                                 \
1545         *dst++ = ISO_CODE_SS2;                          \
1546         coding->fake_multibyte = 1;                     \
1547       }                                                 \
1548     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1549   } while (0)
1550
1551 #define ENCODE_SINGLE_SHIFT_3                           \
1552   do {                                                  \
1553     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1554       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1555     else                                                \
1556       {                                                 \
1557         *dst++ = ISO_CODE_SS3;                          \
1558         coding->fake_multibyte = 1;                     \
1559       }                                                 \
1560     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1561   } while (0)
1562
1563 /* The following four macros produce codes (control character or
1564    escape sequence) for ISO2022 locking-shift functions (shift-in,
1565    shift-out, locking-shift-2, and locking-shift-3).  */
1566
1567 #define ENCODE_SHIFT_IN                         \
1568   do {                                          \
1569     *dst++ = ISO_CODE_SI;                       \
1570     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1571   } while (0)
1572
1573 #define ENCODE_SHIFT_OUT                        \
1574   do {                                          \
1575     *dst++ = ISO_CODE_SO;                       \
1576     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1577   } while (0)
1578
1579 #define ENCODE_LOCKING_SHIFT_2                  \
1580   do {                                          \
1581     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1582     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1583   } while (0)
1584
1585 #define ENCODE_LOCKING_SHIFT_3                  \
1586   do {                                          \
1587     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1588     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1589   } while (0)
1590
1591 /* Produce codes for a DIMENSION1 character whose character set is
1592    CHARSET and whose position-code is C1.  Designation and invocation
1593    sequences are also produced in advance if necessary.  */
1594
1595
1596 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1597   do {                                                                  \
1598     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1599       {                                                                 \
1600         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1601           *dst++ = c1 & 0x7F;                                           \
1602         else                                                            \
1603           *dst++ = c1 | 0x80;                                           \
1604         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1605         break;                                                          \
1606       }                                                                 \
1607     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1608       {                                                                 \
1609         *dst++ = c1 & 0x7F;                                             \
1610         break;                                                          \
1611       }                                                                 \
1612     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1613       {                                                                 \
1614         *dst++ = c1 | 0x80;                                             \
1615         break;                                                          \
1616       }                                                                 \
1617     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1618              && !coding->safe_charsets[charset])                        \
1619       {                                                                 \
1620         /* We should not encode this character, instead produce one or  \
1621            two `?'s.  */                                                \
1622         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1623         if (CHARSET_WIDTH (charset) == 2)                               \
1624           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1625         break;                                                          \
1626       }                                                                 \
1627     else                                                                \
1628       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1629          must invoke it, or, at first, designate it to some graphic     \
1630          register.  Then repeat the loop to actually produce the        \
1631          character.  */                                                 \
1632       dst = encode_invocation_designation (charset, coding, dst);       \
1633   } while (1)
1634
1635 /* Produce codes for a DIMENSION2 character whose character set is
1636    CHARSET and whose position-codes are C1 and C2.  Designation and
1637    invocation codes are also produced in advance if necessary.  */
1638
1639 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1640   do {                                                                  \
1641     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1642       {                                                                 \
1643         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1644           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1645         else                                                            \
1646           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1647         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1648         break;                                                          \
1649       }                                                                 \
1650     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1651       {                                                                 \
1652         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1653         break;                                                          \
1654       }                                                                 \
1655     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1656       {                                                                 \
1657         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1658         break;                                                          \
1659       }                                                                 \
1660     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1661              && !coding->safe_charsets[charset])                        \
1662       {                                                                 \
1663         /* We should not encode this character, instead produce one or  \
1664            two `?'s.  */                                                \
1665         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1666         if (CHARSET_WIDTH (charset) == 2)                               \
1667           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1668         break;                                                          \
1669       }                                                                 \
1670     else                                                                \
1671       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1672          must invoke it, or, at first, designate it to some graphic     \
1673          register.  Then repeat the loop to actually produce the        \
1674          character.  */                                                 \
1675       dst = encode_invocation_designation (charset, coding, dst);       \
1676   } while (1)
1677
1678 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                           \
1679   do {                                                                  \
1680     int c_alt, charset_alt;                                             \
1681                                                                         \
1682     if (!NILP (translation_table)                                       \
1683         && ((c_alt = translate_char (translation_table, -1,             \
1684                                      charset, c1, c2))                  \
1685             >= 0))                                                      \
1686       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                          \
1687     else                                                                \
1688       charset_alt = charset;                                            \
1689     if (CHARSET_DEFINED_P (charset_alt))                                \
1690       {                                                                 \
1691         if (CHARSET_DIMENSION (charset_alt) == 1)                       \
1692           {                                                             \
1693             if (charset == CHARSET_ASCII                                \
1694                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)           \
1695               charset_alt = charset_latin_jisx0201;                     \
1696             ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);          \
1697           }                                                             \
1698         else                                                            \
1699           {                                                             \
1700             if (charset == charset_jisx0208                             \
1701                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)          \
1702               charset_alt = charset_jisx0208_1978;                      \
1703             ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);      \
1704           }                                                             \
1705       }                                                                 \
1706     else                                                                \
1707       {                                                                 \
1708         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1709           {                                                             \
1710             *dst++ = charset & 0x7f;                                    \
1711             *dst++ = c1 & 0x7f;                                         \
1712             if (c2)                                                     \
1713               *dst++ = c2 & 0x7f;                                       \
1714           }                                                             \
1715         else                                                            \
1716           {                                                             \
1717             *dst++ = charset;                                           \
1718             *dst++ = c1;                                                \
1719             if (c2)                                                     \
1720               *dst++ = c2;                                              \
1721           }                                                             \
1722       }                                                                 \
1723     coding->consumed_char++;                                            \
1724   } while (0)
1725
1726 /* Produce designation and invocation codes at a place pointed by DST
1727    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1728    Return new DST.  */
1729
1730 unsigned char *
1731 encode_invocation_designation (charset, coding, dst)
1732      int charset;
1733      struct coding_system *coding;
1734      unsigned char *dst;
1735 {
1736   int reg;                      /* graphic register number */
1737
1738   /* At first, check designations.  */
1739   for (reg = 0; reg < 4; reg++)
1740     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1741       break;
1742
1743   if (reg >= 4)
1744     {
1745       /* CHARSET is not yet designated to any graphic registers.  */
1746       /* At first check the requested designation.  */
1747       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1748       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1749         /* Since CHARSET requests no special designation, designate it
1750            to graphic register 0.  */
1751         reg = 0;
1752
1753       ENCODE_DESIGNATION (charset, reg, coding);
1754     }
1755
1756   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1757       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1758     {
1759       /* Since the graphic register REG is not invoked to any graphic
1760          planes, invoke it to graphic plane 0.  */
1761       switch (reg)
1762         {
1763         case 0:                 /* graphic register 0 */
1764           ENCODE_SHIFT_IN;
1765           break;
1766
1767         case 1:                 /* graphic register 1 */
1768           ENCODE_SHIFT_OUT;
1769           break;
1770
1771         case 2:                 /* graphic register 2 */
1772           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1773             ENCODE_SINGLE_SHIFT_2;
1774           else
1775             ENCODE_LOCKING_SHIFT_2;
1776           break;
1777
1778         case 3:                 /* graphic register 3 */
1779           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1780             ENCODE_SINGLE_SHIFT_3;
1781           else
1782             ENCODE_LOCKING_SHIFT_3;
1783           break;
1784         }
1785     }
1786   return dst;
1787 }
1788
1789 /* Produce 2-byte codes for encoded composition rule RULE.  */
1790
1791 #define ENCODE_COMPOSITION_RULE(rule)           \
1792   do {                                          \
1793     int gref, nref;                             \
1794     COMPOSITION_DECODE_RULE (rule, gref, nref); \
1795     *dst++ = 32 + 81 + gref;                    \
1796     *dst++ = 32 + nref;                         \
1797   } while (0)
1798
1799 /* Produce codes for indicating the start of a composition sequence
1800    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
1801    which specify information about the composition.  See the comment
1802    in coding.h for the format of DATA.  */
1803
1804 #define ENCODE_COMPOSITION_START(coding, data)                          \
1805   do {                                                                  \
1806     coding->composing = data[3];                                        \
1807     *dst++ = ISO_CODE_ESC;                                              \
1808     if (coding->composing == COMPOSITION_RELATIVE)                      \
1809       *dst++ = '0';                                                     \
1810     else                                                                \
1811       {                                                                 \
1812         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
1813                   ? '3' : '4');                                         \
1814         coding->cmp_data_index = coding->cmp_data_start + 4;            \
1815         coding->composition_rule_follows = 0;                           \
1816       }                                                                 \
1817   } while (0)
1818
1819 /* Produce codes for indicating the end of the current composition.  */
1820
1821 #define ENCODE_COMPOSITION_END(coding, data)                    \
1822   do {                                                          \
1823     *dst++ = ISO_CODE_ESC;                                      \
1824     *dst++ = '1';                                               \
1825     coding->cmp_data_start += data[0];                          \
1826     coding->composing = COMPOSITION_NO;                         \
1827     if (coding->cmp_data_start == coding->cmp_data->used        \
1828         && coding->cmp_data->next)                              \
1829       {                                                         \
1830         coding->cmp_data = coding->cmp_data->next;              \
1831         coding->cmp_data_start = 0;                             \
1832       }                                                         \
1833   } while (0)
1834
1835 /* Produce composition start sequence ESC 0.  Here, this sequence
1836    doesn't mean the start of a new composition but means that we have
1837    just produced components (alternate chars and composition rules) of
1838    the composition and the actual text follows in SRC.  */
1839
1840 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
1841   do {                                          \
1842     *dst++ = ISO_CODE_ESC;                      \
1843     *dst++ = '0';                               \
1844     coding->composing = COMPOSITION_RELATIVE;   \
1845   } while (0)
1846
1847 /* The following three macros produce codes for indicating direction
1848    of text.  */
1849 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1850   do {                                                  \
1851     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1852       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1853     else                                                \
1854       *dst++ = ISO_CODE_CSI;                            \
1855   } while (0)
1856
1857 #define ENCODE_DIRECTION_R2L    \
1858   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1859
1860 #define ENCODE_DIRECTION_L2R    \
1861   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1862
1863 /* Produce codes for designation and invocation to reset the graphic
1864    planes and registers to initial state.  */
1865 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1866   do {                                                                      \
1867     int reg;                                                                \
1868     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1869       ENCODE_SHIFT_IN;                                                      \
1870     for (reg = 0; reg < 4; reg++)                                           \
1871       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1872           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1873               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1874         ENCODE_DESIGNATION                                                  \
1875           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1876   } while (0)
1877
1878 /* Produce designation sequences of charsets in the line started from
1879    SRC to a place pointed by *DSTP, and update DSTP.
1880
1881    If the current block ends before any end-of-line, we may fail to
1882    find all the necessary designations.  */
1883
1884 void
1885 encode_designation_at_bol (coding, table, src, src_end, dstp)
1886      struct coding_system *coding;
1887      Lisp_Object table;
1888      unsigned char *src, *src_end, **dstp;
1889 {
1890   int charset, c, found = 0, reg;
1891   /* Table of charsets to be designated to each graphic register.  */
1892   int r[4];
1893   unsigned char *dst = *dstp;
1894
1895   for (reg = 0; reg < 4; reg++)
1896     r[reg] = -1;
1897
1898   while (src < src_end && *src != '\n' && found < 4)
1899     {
1900       int bytes = BYTES_BY_CHAR_HEAD (*src);
1901
1902       if (NILP (table))
1903         charset = CHARSET_AT (src);
1904       else
1905         {
1906           int c_alt;
1907           unsigned char c1, c2;
1908
1909           SPLIT_STRING(src, bytes, charset, c1, c2);
1910           if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
1911             charset = CHAR_CHARSET (c_alt);
1912         }
1913
1914       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1915       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1916         {
1917           found++;
1918           r[reg] = charset;
1919         }
1920
1921       src += bytes;
1922     }
1923
1924   if (found)
1925     {
1926       for (reg = 0; reg < 4; reg++)
1927         if (r[reg] >= 0
1928             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1929           ENCODE_DESIGNATION (r[reg], reg, coding);
1930       *dstp = dst;
1931     }
1932 }
1933
1934 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1935
1936 int
1937 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1938      struct coding_system *coding;
1939      unsigned char *source, *destination;
1940      int src_bytes, dst_bytes;
1941 {
1942   unsigned char *src = source;
1943   unsigned char *src_end = source + src_bytes;
1944   unsigned char *dst = destination;
1945   unsigned char *dst_end = destination + dst_bytes;
1946   /* Since the maximum bytes produced by each loop is 14, we subtract 13
1947      from DST_END to assure overflow checking is necessary only at the
1948      head of loop.  */
1949   unsigned char *adjusted_dst_end = dst_end - 13;
1950   Lisp_Object translation_table
1951       = coding->translation_table_for_encode;
1952   int result = CODING_FINISH_NORMAL;
1953
1954   if (!NILP (Venable_character_translation) && NILP (translation_table))
1955     translation_table = Vstandard_translation_table_for_encode;
1956
1957   coding->consumed_char = 0;
1958   coding->fake_multibyte = 0;
1959   while (src < src_end && (dst_bytes
1960                            ? (dst < adjusted_dst_end)
1961                            : (dst < src - 13)))
1962     {
1963       /* SRC_BASE remembers the start position in source in each loop.
1964          The loop will be exited when there's not enough source text
1965          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1966          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1967          reset to SRC_BASE before exiting.  */
1968       unsigned char *src_base = src;
1969       int charset, c1, c2, c3, c4;
1970
1971       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1972           && CODING_SPEC_ISO_BOL (coding))
1973         {
1974           /* We have to produce designation sequences if any now.  */
1975           encode_designation_at_bol (coding, translation_table,
1976                                      src, src_end, &dst);
1977           CODING_SPEC_ISO_BOL (coding) = 0;
1978         }
1979
1980       /* Check composition start and end.  */
1981       if (coding->composing != COMPOSITION_DISABLED
1982           && coding->cmp_data_start < coding->cmp_data->used)
1983         {
1984           struct composition_data *cmp_data = coding->cmp_data;
1985           int *data = cmp_data->data + coding->cmp_data_start;
1986           int this_pos = cmp_data->char_offset + coding->consumed_char;
1987
1988           if (coding->composing == COMPOSITION_RELATIVE)
1989             {
1990               if (this_pos == data[2])
1991                 {
1992                   ENCODE_COMPOSITION_END (coding, data);
1993                   cmp_data = coding->cmp_data;
1994                   data = cmp_data->data + coding->cmp_data_start;
1995                 }
1996             }
1997           else if (COMPOSING_P (coding))
1998             {
1999               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2000               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2001                 /* We have consumed components of the composition.
2002                    What follows in SRC is the compositions's base
2003                    text.  */
2004                 ENCODE_COMPOSITION_FAKE_START (coding);
2005               else
2006                 {
2007                   int c = cmp_data->data[coding->cmp_data_index++];
2008                   if (coding->composition_rule_follows)
2009                     {
2010                       ENCODE_COMPOSITION_RULE (c);
2011                       coding->composition_rule_follows = 0;
2012                     }
2013                   else
2014                     {
2015                       SPLIT_CHAR (c, charset, c1, c2);
2016                       ENCODE_ISO_CHARACTER (charset, c1, c2);
2017                       /* But, we didn't consume a character in SRC.  */
2018                       coding->consumed_char--;
2019                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2020                         coding->composition_rule_follows = 1;
2021                     }
2022                   continue;
2023                 }
2024             }
2025           if (!COMPOSING_P (coding))
2026             {
2027               if (this_pos == data[1])
2028                 {
2029                   ENCODE_COMPOSITION_START (coding, data);
2030                   continue;
2031                 }
2032             }
2033         }
2034
2035       c1 = *src++;
2036       /* Now encode one character.  C1 is a control character, an
2037          ASCII character, or a leading-code of multi-byte character.  */
2038       switch (emacs_code_class[c1])
2039         {
2040         case EMACS_ascii_code:
2041           c2 = 0;
2042           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
2043           break;
2044
2045         case EMACS_control_code:
2046           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2047             ENCODE_RESET_PLANE_AND_REGISTER;
2048           *dst++ = c1;
2049           coding->consumed_char++;
2050           break;
2051
2052         case EMACS_carriage_return_code:
2053           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2054             {
2055               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2056                 ENCODE_RESET_PLANE_AND_REGISTER;
2057               *dst++ = c1;
2058               coding->consumed_char++;
2059               break;
2060             }
2061           /* fall down to treat '\r' as '\n' ...  */
2062
2063         case EMACS_linefeed_code:
2064           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2065             ENCODE_RESET_PLANE_AND_REGISTER;
2066           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2067             bcopy (coding->spec.iso2022.initial_designation,
2068                    coding->spec.iso2022.current_designation,
2069                    sizeof coding->spec.iso2022.initial_designation);
2070           if (coding->eol_type == CODING_EOL_LF
2071               || coding->eol_type == CODING_EOL_UNDECIDED)
2072             *dst++ = ISO_CODE_LF;
2073           else if (coding->eol_type == CODING_EOL_CRLF)
2074             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2075           else
2076             *dst++ = ISO_CODE_CR;
2077           CODING_SPEC_ISO_BOL (coding) = 1;
2078           coding->consumed_char++;
2079           break;
2080
2081         case EMACS_leading_code_2:
2082           ONE_MORE_BYTE (c2);
2083           c3 = 0;
2084           if (c2 < 0xA0)
2085             {
2086               /* invalid sequence */
2087               *dst++ = c1;
2088               src--;
2089               coding->consumed_char++;
2090             }
2091           else
2092             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
2093           break;
2094
2095         case EMACS_leading_code_3:
2096           TWO_MORE_BYTES (c2, c3);
2097           c4 = 0;
2098           if (c2 < 0xA0 || c3 < 0xA0)
2099             {
2100               /* invalid sequence */
2101               *dst++ = c1;
2102               src -= 2;
2103               coding->consumed_char++;
2104             }
2105           else if (c1 < LEADING_CODE_PRIVATE_11)
2106             ENCODE_ISO_CHARACTER (c1, c2, c3);
2107           else
2108             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
2109           break;
2110
2111         case EMACS_leading_code_4:
2112           THREE_MORE_BYTES (c2, c3, c4);
2113           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
2114             {
2115               /* invalid sequence */
2116               *dst++ = c1;
2117               src -= 3;
2118               coding->consumed_char++;
2119             }
2120           else
2121             ENCODE_ISO_CHARACTER (c2, c3, c4);
2122           break;
2123
2124         case EMACS_invalid_code:
2125           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2126             ENCODE_RESET_PLANE_AND_REGISTER;
2127           *dst++ = c1;
2128           coding->consumed_char++;
2129           break;
2130         }
2131       continue;
2132     label_end_of_loop:
2133       result = CODING_FINISH_INSUFFICIENT_SRC;
2134       src = src_base;
2135       break;
2136     }
2137
2138   if (src < src_end && result == CODING_FINISH_NORMAL)
2139     result = CODING_FINISH_INSUFFICIENT_DST;
2140
2141   /* If this is the last block of the text to be encoded, we must
2142      reset graphic planes and registers to the initial state, and
2143      flush out the carryover if any.  */
2144   if (coding->mode & CODING_MODE_LAST_BLOCK)
2145     {
2146       ENCODE_RESET_PLANE_AND_REGISTER;
2147       if (COMPOSING_P (coding))
2148         *dst++ = ISO_CODE_ESC, *dst++ = '1';
2149       if (result == CODING_FINISH_INSUFFICIENT_SRC)
2150         {
2151           while (src < src_end && dst < dst_end)
2152             *dst++ = *src++;
2153         }
2154     }
2155   coding->consumed = src - source;
2156   coding->produced = coding->produced_char = dst - destination;
2157   return result;
2158 }
2159
2160 \f
2161 /*** 4. SJIS and BIG5 handlers ***/
2162
2163 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2164    quite widely.  So, for the moment, Emacs supports them in the bare
2165    C code.  But, in the future, they may be supported only by CCL.  */
2166
2167 /* SJIS is a coding system encoding three character sets: ASCII, right
2168    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2169    as is.  A character of charset katakana-jisx0201 is encoded by
2170    "position-code + 0x80".  A character of charset japanese-jisx0208
2171    is encoded in 2-byte but two position-codes are divided and shifted
2172    so that it fit in the range below.
2173
2174    --- CODE RANGE of SJIS ---
2175    (character set)      (range)
2176    ASCII                0x00 .. 0x7F
2177    KATAKANA-JISX0201    0xA0 .. 0xDF
2178    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2179             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2180    -------------------------------
2181
2182 */
2183
2184 /* BIG5 is a coding system encoding two character sets: ASCII and
2185    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2186    character set and is encoded in two-byte.
2187
2188    --- CODE RANGE of BIG5 ---
2189    (character set)      (range)
2190    ASCII                0x00 .. 0x7F
2191    Big5 (1st byte)      0xA1 .. 0xFE
2192         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2193    --------------------------
2194
2195    Since the number of characters in Big5 is larger than maximum
2196    characters in Emacs' charset (96x96), it can't be handled as one
2197    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2198    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2199    contains frequently used characters and the latter contains less
2200    frequently used characters.  */
2201
2202 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2203    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2204    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2205    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2206
2207 /* Number of Big5 characters which have the same code in 1st byte.  */
2208 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2209
2210 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2211   do {                                                                  \
2212     unsigned int temp                                                   \
2213       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2214     if (b1 < 0xC9)                                                      \
2215       charset = charset_big5_1;                                         \
2216     else                                                                \
2217       {                                                                 \
2218         charset = charset_big5_2;                                       \
2219         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2220       }                                                                 \
2221     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2222     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2223   } while (0)
2224
2225 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2226   do {                                                                  \
2227     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2228     if (charset == charset_big5_2)                                      \
2229       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2230     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2231     b2 = temp % BIG5_SAME_ROW;                                          \
2232     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2233   } while (0)
2234
2235 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2236   do {                                                                  \
2237     int c_alt, charset_alt = (charset);                                 \
2238     if (!NILP (translation_table)                                       \
2239         && ((c_alt = translate_char (translation_table,                 \
2240                                      -1, (charset), c1, c2)) >= 0))     \
2241       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                          \
2242     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2243       DECODE_CHARACTER_ASCII (c1);                                      \
2244     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2245       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2246     else                                                                \
2247       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2248   } while (0)
2249
2250 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)             \
2251   do {                                                          \
2252     int c_alt, charset_alt;                                     \
2253     if (!NILP (translation_table)                               \
2254         && ((c_alt = translate_char (translation_table, -1,     \
2255                                      charset, c1, c2))          \
2256             >= 0))                                              \
2257       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
2258     else                                                        \
2259       charset_alt = charset;                                    \
2260     if (charset_alt == charset_ascii)                           \
2261       *dst++ = c1;                                              \
2262     else if (CHARSET_DIMENSION (charset_alt) == 1)              \
2263       {                                                         \
2264         if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2265           *dst++ = c1;                                          \
2266         else if (sjis_p && charset_alt == charset_latin_jisx0201) \
2267           *dst++ = c1 & 0x7F;                                   \
2268         else                                                    \
2269           {                                                     \
2270             *dst++ = charset_alt, *dst++ = c1;                  \
2271             coding->fake_multibyte = 1;                         \
2272           }                                                     \
2273       }                                                         \
2274     else                                                        \
2275       {                                                         \
2276         c1 &= 0x7F, c2 &= 0x7F;                                 \
2277         if (sjis_p && (charset_alt == charset_jisx0208          \
2278                        || charset_alt == charset_jisx0208_1978))\
2279           {                                                     \
2280             unsigned char s1, s2;                               \
2281                                                                 \
2282             ENCODE_SJIS (c1, c2, s1, s2);                       \
2283             *dst++ = s1, *dst++ = s2;                           \
2284             coding->fake_multibyte = 1;                         \
2285           }                                                     \
2286         else if (!sjis_p                                        \
2287                  && (charset_alt == charset_big5_1              \
2288                      || charset_alt == charset_big5_2))         \
2289           {                                                     \
2290             unsigned char b1, b2;                               \
2291                                                                 \
2292             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);          \
2293             *dst++ = b1, *dst++ = b2;                           \
2294           }                                                     \
2295         else                                                    \
2296           {                                                     \
2297             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;     \
2298             coding->fake_multibyte = 1;                         \
2299           }                                                     \
2300       }                                                         \
2301     coding->consumed_char++;                                    \
2302   } while (0)
2303
2304 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2305    Check if a text is encoded in SJIS.  If it is, return
2306    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2307
2308 int
2309 detect_coding_sjis (src, src_end)
2310      unsigned char *src, *src_end;
2311 {
2312   unsigned char c;
2313
2314   while (src < src_end)
2315     {
2316       c = *src++;
2317       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2318         {
2319           if (src < src_end && *src++ < 0x40)
2320             return 0;
2321         }
2322     }
2323   return CODING_CATEGORY_MASK_SJIS;
2324 }
2325
2326 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2327    Check if a text is encoded in BIG5.  If it is, return
2328    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2329
2330 int
2331 detect_coding_big5 (src, src_end)
2332      unsigned char *src, *src_end;
2333 {
2334   unsigned char c;
2335
2336   while (src < src_end)
2337     {
2338       c = *src++;
2339       if (c >= 0xA1)
2340         {
2341           if (src >= src_end)
2342             break;
2343           c = *src++;
2344           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2345             return 0;
2346         }
2347     }
2348   return CODING_CATEGORY_MASK_BIG5;
2349 }
2350
2351 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2352    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2353
2354 int
2355 decode_coding_sjis_big5 (coding, source, destination,
2356                          src_bytes, dst_bytes, sjis_p)
2357      struct coding_system *coding;
2358      unsigned char *source, *destination;
2359      int src_bytes, dst_bytes;
2360      int sjis_p;
2361 {
2362   unsigned char *src = source;
2363   unsigned char *src_end = source + src_bytes;
2364   unsigned char *dst = destination;
2365   unsigned char *dst_end = destination + dst_bytes;
2366   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2367      from DST_END to assure overflow checking is necessary only at the
2368      head of loop.  */
2369   unsigned char *adjusted_dst_end = dst_end - 3;
2370   Lisp_Object translation_table
2371       = coding->translation_table_for_decode;
2372   int result = CODING_FINISH_NORMAL;
2373
2374   if (!NILP (Venable_character_translation) && NILP (translation_table))
2375     translation_table = Vstandard_translation_table_for_decode;
2376
2377   coding->produced_char = 0;
2378   coding->fake_multibyte = 0;
2379   while (src < src_end && (dst_bytes
2380                            ? (dst < adjusted_dst_end)
2381                            : (dst < src - 3)))
2382     {
2383       /* SRC_BASE remembers the start position in source in each loop.
2384          The loop will be exited when there's not enough source text
2385          to analyze two-byte character (within macro ONE_MORE_BYTE).
2386          In that case, SRC is reset to SRC_BASE before exiting.  */
2387       unsigned char *src_base = src;
2388       unsigned char c1 = *src++, c2, c3, c4;
2389
2390       if (c1 < 0x20)
2391         {
2392           if (c1 == '\r')
2393             {
2394               if (coding->eol_type == CODING_EOL_CRLF)
2395                 {
2396                   ONE_MORE_BYTE (c2);
2397                   if (c2 == '\n')
2398                     *dst++ = c2;
2399                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2400                     {
2401                       result = CODING_FINISH_INCONSISTENT_EOL;
2402                       goto label_end_of_loop_2;
2403                     }
2404                   else
2405                     /* To process C2 again, SRC is subtracted by 1.  */
2406                     *dst++ = c1, src--;
2407                 }
2408               else if (coding->eol_type == CODING_EOL_CR)
2409                 *dst++ = '\n';
2410               else
2411                 *dst++ = c1;
2412             }
2413           else if (c1 == '\n'
2414                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2415                    && (coding->eol_type == CODING_EOL_CR
2416                        || coding->eol_type == CODING_EOL_CRLF))
2417             {
2418               result = CODING_FINISH_INCONSISTENT_EOL;
2419               goto label_end_of_loop_2;
2420             }
2421           else
2422             *dst++ = c1;
2423           coding->produced_char++;
2424         }
2425       else if (c1 < 0x80)
2426         {
2427           c2 = 0;               /* avoid warning */
2428           DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2429         }
2430       else
2431         {
2432           if (sjis_p)
2433             {
2434               if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
2435                 {
2436                   /* SJIS -> JISX0208 */
2437                   ONE_MORE_BYTE (c2);
2438                   if (c2 >= 0x40 && c2 != 0x7F && c2 <= 0xFC)
2439                     {
2440                       DECODE_SJIS (c1, c2, c3, c4);
2441                       DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2442                     }
2443                   else
2444                     goto label_invalid_code_2;
2445                 }
2446               else if (c1 < 0xE0)
2447                 /* SJIS -> JISX0201-Kana */
2448                 {
2449                   c2 = 0;       /* avoid warning */
2450                   DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2451                                               /* dummy */ c2);
2452                 }
2453               else
2454                 goto label_invalid_code_1;
2455             }
2456           else
2457             {
2458               /* BIG5 -> Big5 */
2459               if (c1 >= 0xA1 && c1 <= 0xFE)
2460                 {
2461                   ONE_MORE_BYTE (c2);
2462                   if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2463                     {
2464                       int charset;
2465
2466                       DECODE_BIG5 (c1, c2, charset, c3, c4);
2467                       DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2468                     }
2469                   else
2470                     goto label_invalid_code_2;
2471                 }
2472               else
2473                 goto label_invalid_code_1;
2474             }
2475         }
2476       continue;
2477
2478     label_invalid_code_1:
2479       *dst++ = c1;
2480       coding->produced_char++;
2481       coding->fake_multibyte = 1;
2482       continue;
2483
2484     label_invalid_code_2:
2485       *dst++ = c1; *dst++= c2;
2486       coding->produced_char += 2;
2487       coding->fake_multibyte = 1;
2488       continue;
2489
2490     label_end_of_loop:
2491       result = CODING_FINISH_INSUFFICIENT_SRC;
2492     label_end_of_loop_2:
2493       src = src_base;
2494       break;
2495     }
2496
2497   if (src < src_end)
2498     {
2499       if (result == CODING_FINISH_NORMAL)
2500         result = CODING_FINISH_INSUFFICIENT_DST;
2501       else if (result != CODING_FINISH_INCONSISTENT_EOL
2502                && coding->mode & CODING_MODE_LAST_BLOCK)
2503         {
2504           src_bytes = src_end - src;
2505           if (dst_bytes && (dst_end - dst < src_bytes))
2506             src_bytes = dst_end - dst;
2507           bcopy (dst, src, src_bytes);
2508           src += src_bytes;
2509           dst += src_bytes;
2510           coding->fake_multibyte = 1;
2511         }
2512     }
2513
2514   coding->consumed = coding->consumed_char = src - source;
2515   coding->produced = dst - destination;
2516   return result;
2517 }
2518
2519 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2520    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2521    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2522    sure that all these charsets are registered as official charset
2523    (i.e. do not have extended leading-codes).  Characters of other
2524    charsets are produced without any encoding.  If SJIS_P is 1, encode
2525    SJIS text, else encode BIG5 text.  */
2526
2527 int
2528 encode_coding_sjis_big5 (coding, source, destination,
2529                          src_bytes, dst_bytes, sjis_p)
2530      struct coding_system *coding;
2531      unsigned char *source, *destination;
2532      int src_bytes, dst_bytes;
2533      int sjis_p;
2534 {
2535   unsigned char *src = source;
2536   unsigned char *src_end = source + src_bytes;
2537   unsigned char *dst = destination;
2538   unsigned char *dst_end = destination + dst_bytes;
2539   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2540      from DST_END to assure overflow checking is necessary only at the
2541      head of loop.  */
2542   unsigned char *adjusted_dst_end = dst_end - 1;
2543   Lisp_Object translation_table
2544       = coding->translation_table_for_encode;
2545   int result = CODING_FINISH_NORMAL;
2546
2547   if (!NILP (Venable_character_translation) && NILP (translation_table))
2548     translation_table = Vstandard_translation_table_for_encode;
2549
2550   coding->consumed_char = 0;
2551   coding->fake_multibyte = 0;
2552   while (src < src_end && (dst_bytes
2553                            ? (dst < adjusted_dst_end)
2554                            : (dst < src - 1)))
2555     {
2556       /* SRC_BASE remembers the start position in source in each loop.
2557          The loop will be exited when there's not enough source text
2558          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2559          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2560          before exiting.  */
2561       unsigned char *src_base = src;
2562       unsigned char c1 = *src++, c2, c3, c4;
2563
2564       switch (emacs_code_class[c1])
2565         {
2566         case EMACS_ascii_code:
2567           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2568           break;
2569
2570         case EMACS_control_code:
2571           *dst++ = c1;
2572           coding->consumed_char++;
2573           break;
2574
2575         case EMACS_carriage_return_code:
2576           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2577             {
2578               *dst++ = c1;
2579               coding->consumed_char++;
2580               break;
2581             }
2582           /* fall down to treat '\r' as '\n' ...  */
2583
2584         case EMACS_linefeed_code:
2585           if (coding->eol_type == CODING_EOL_LF
2586               || coding->eol_type == CODING_EOL_UNDECIDED)
2587             *dst++ = '\n';
2588           else if (coding->eol_type == CODING_EOL_CRLF)
2589             *dst++ = '\r', *dst++ = '\n';
2590           else
2591             *dst++ = '\r';
2592           coding->consumed_char++;
2593           break;
2594
2595         case EMACS_leading_code_2:
2596           ONE_MORE_BYTE (c2);
2597           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2598           break;
2599
2600         case EMACS_leading_code_3:
2601           TWO_MORE_BYTES (c2, c3);
2602           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2603           break;
2604
2605         case EMACS_leading_code_4:
2606           THREE_MORE_BYTES (c2, c3, c4);
2607           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2608           break;
2609
2610         default:                /* i.e. case EMACS_invalid_code: */
2611           *dst++ = c1;
2612           coding->consumed_char++;
2613         }
2614       continue;
2615
2616     label_end_of_loop:
2617       result = CODING_FINISH_INSUFFICIENT_SRC;
2618       src = src_base;
2619       break;
2620     }
2621
2622   if (result == CODING_FINISH_NORMAL
2623       && src < src_end)
2624     result = CODING_FINISH_INSUFFICIENT_DST;
2625   coding->consumed = src - source;
2626   coding->produced = coding->produced_char = dst - destination;
2627   return result;
2628 }
2629
2630 \f
2631 /*** 5. CCL handlers ***/
2632
2633 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2634    Check if a text is encoded in a coding system of which
2635    encoder/decoder are written in CCL program.  If it is, return
2636    CODING_CATEGORY_MASK_CCL, else return 0.  */
2637
2638 int
2639 detect_coding_ccl (src, src_end)
2640      unsigned char *src, *src_end;
2641 {
2642   unsigned char *valid;
2643
2644   /* No coding system is assigned to coding-category-ccl.  */
2645   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2646     return 0;
2647
2648   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2649   while (src < src_end)
2650     {
2651       if (! valid[*src]) return 0;
2652       src++;
2653     }
2654   return CODING_CATEGORY_MASK_CCL;
2655 }
2656
2657 \f
2658 /*** 6. End-of-line handlers ***/
2659
2660 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2661    This function is called only when `coding->eol_type' is
2662    CODING_EOL_CRLF or CODING_EOL_CR.  */
2663
2664 int
2665 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2666      struct coding_system *coding;
2667      unsigned char *source, *destination;
2668      int src_bytes, dst_bytes;
2669 {
2670   unsigned char *src = source;
2671   unsigned char *src_end = source + src_bytes;
2672   unsigned char *dst = destination;
2673   unsigned char *dst_end = destination + dst_bytes;
2674   unsigned char c;
2675   int result = CODING_FINISH_NORMAL;
2676
2677   coding->fake_multibyte = 0;
2678
2679   if (src_bytes <= 0)
2680     {
2681       coding->produced = coding->produced_char = 0;
2682       coding->consumed = coding->consumed_char = 0;
2683       return result;
2684     }
2685
2686   switch (coding->eol_type)
2687     {
2688     case CODING_EOL_CRLF:
2689       {
2690         /* Since the maximum bytes produced by each loop is 2, we
2691            subtract 1 from DST_END to assure overflow checking is
2692            necessary only at the head of loop.  */
2693         unsigned char *adjusted_dst_end = dst_end - 1;
2694
2695         while (src < src_end && (dst_bytes
2696                                  ? (dst < adjusted_dst_end)
2697                                  : (dst < src - 1)))
2698           {
2699             unsigned char *src_base = src;
2700
2701             c = *src++;
2702             if (c == '\r')
2703               {
2704                 ONE_MORE_BYTE (c);
2705                 if (c == '\n')
2706                   *dst++ = c;
2707                 else
2708                   {
2709                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2710                       {
2711                         result = CODING_FINISH_INCONSISTENT_EOL;
2712                         goto label_end_of_loop_2;
2713                       }
2714                     src--;
2715                     *dst++ = '\r';
2716                     if (BASE_LEADING_CODE_P (c))
2717                       coding->fake_multibyte = 1;
2718                   }
2719               }
2720             else if (c == '\n'
2721                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2722               {
2723                 result = CODING_FINISH_INCONSISTENT_EOL;
2724                 goto label_end_of_loop_2;
2725               }
2726             else
2727               {
2728                 *dst++ = c;
2729                 if (BASE_LEADING_CODE_P (c))
2730                   coding->fake_multibyte = 1;
2731               }
2732             continue;
2733
2734           label_end_of_loop:
2735             result = CODING_FINISH_INSUFFICIENT_SRC;
2736           label_end_of_loop_2:
2737             src = src_base;
2738             break;
2739           }
2740         if (src < src_end)
2741           {
2742             if (result == CODING_FINISH_NORMAL)
2743               result = CODING_FINISH_INSUFFICIENT_DST;
2744             else if (result != CODING_FINISH_INCONSISTENT_EOL
2745                      && coding->mode & CODING_MODE_LAST_BLOCK)
2746               {
2747                 /* This is the last block of the text to be decoded.
2748                    We flush out all remaining codes.  */
2749                 src_bytes = src_end - src;
2750                 if (dst_bytes && (dst_end - dst < src_bytes))
2751                   src_bytes = dst_end - dst;
2752                 bcopy (src, dst, src_bytes);
2753                 dst += src_bytes;
2754                 src += src_bytes;
2755               }
2756           }
2757       }
2758       break;
2759
2760     case CODING_EOL_CR:
2761       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2762         {
2763           while (src < src_end)
2764             {
2765               if ((c = *src++) == '\n')
2766                 break;
2767               if (BASE_LEADING_CODE_P (c))
2768                 coding->fake_multibyte = 1;
2769             }
2770           if (*--src == '\n')
2771             {
2772               src_bytes = src - source;
2773               result = CODING_FINISH_INCONSISTENT_EOL;
2774             }
2775         }
2776       if (dst_bytes && src_bytes > dst_bytes)
2777         {
2778           result = CODING_FINISH_INSUFFICIENT_DST;
2779           src_bytes = dst_bytes;
2780         }
2781       if (dst_bytes)
2782         bcopy (source, destination, src_bytes);
2783       else
2784         safe_bcopy (source, destination, src_bytes);
2785       src = source + src_bytes;
2786       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2787       break;
2788
2789     default:                    /* i.e. case: CODING_EOL_LF */
2790       if (dst_bytes && src_bytes > dst_bytes)
2791         {
2792           result = CODING_FINISH_INSUFFICIENT_DST;
2793           src_bytes = dst_bytes;
2794         }
2795       if (dst_bytes)
2796         bcopy (source, destination, src_bytes);
2797       else
2798         safe_bcopy (source, destination, src_bytes);
2799       src += src_bytes;
2800       dst += src_bytes;
2801       coding->fake_multibyte = 1;
2802       break;
2803     }
2804
2805   coding->consumed = coding->consumed_char = src - source;
2806   coding->produced = coding->produced_char = dst - destination;
2807   return result;
2808 }
2809
2810 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2811    format of end-of-line according to `coding->eol_type'.  If
2812    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2813    '\r' in source text also means end-of-line.  */
2814
2815 int
2816 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2817      struct coding_system *coding;
2818      unsigned char *source, *destination;
2819      int src_bytes, dst_bytes;
2820 {
2821   unsigned char *src = source;
2822   unsigned char *dst = destination;
2823   int result = CODING_FINISH_NORMAL;
2824
2825   coding->fake_multibyte = 0;
2826
2827   if (coding->eol_type == CODING_EOL_CRLF)
2828     {
2829       unsigned char c;
2830       unsigned char *src_end = source + src_bytes;
2831       unsigned char *dst_end = destination + dst_bytes;
2832       /* Since the maximum bytes produced by each loop is 2, we
2833          subtract 1 from DST_END to assure overflow checking is
2834          necessary only at the head of loop.  */
2835       unsigned char *adjusted_dst_end = dst_end - 1;
2836
2837       while (src < src_end && (dst_bytes
2838                                ? (dst < adjusted_dst_end)
2839                                : (dst < src - 1)))
2840         {
2841           c = *src++;
2842           if (c == '\n'
2843               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2844             *dst++ = '\r', *dst++ = '\n';
2845           else
2846             {
2847               *dst++ = c;
2848               if (BASE_LEADING_CODE_P (c))
2849                 coding->fake_multibyte = 1;
2850             }
2851         }
2852       if (src < src_end)
2853         result = CODING_FINISH_INSUFFICIENT_DST;
2854     }
2855   else
2856     {
2857       unsigned char c;
2858
2859       if (dst_bytes && src_bytes > dst_bytes)
2860         {
2861           src_bytes = dst_bytes;
2862           result = CODING_FINISH_INSUFFICIENT_DST;
2863         }
2864       if (dst_bytes)
2865         bcopy (source, destination, src_bytes);
2866       else
2867         safe_bcopy (source, destination, src_bytes);
2868       dst_bytes = src_bytes;
2869       if (coding->eol_type == CODING_EOL_CR)
2870         {
2871           while (src_bytes--)
2872             {
2873               if ((c = *dst++) == '\n')
2874                 dst[-1] = '\r';
2875               else if (BASE_LEADING_CODE_P (c))
2876                 coding->fake_multibyte = 1;
2877             }
2878         }
2879       else
2880         {
2881           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2882             {
2883               while (src_bytes--)
2884                 if (*dst++ == '\r') dst[-1] = '\n';
2885             }
2886           coding->fake_multibyte = 1;
2887         }
2888       src = source + dst_bytes;
2889       dst = destination + dst_bytes;
2890     }
2891
2892   coding->consumed = coding->consumed_char = src - source;
2893   coding->produced = coding->produced_char = dst - destination;
2894   return result;
2895 }
2896
2897 \f
2898 /*** 7. C library functions ***/
2899
2900 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2901    has a property `coding-system'.  The value of this property is a
2902    vector of length 5 (called as coding-vector).  Among elements of
2903    this vector, the first (element[0]) and the fifth (element[4])
2904    carry important information for decoding/encoding.  Before
2905    decoding/encoding, this information should be set in fields of a
2906    structure of type `coding_system'.
2907
2908    A value of property `coding-system' can be a symbol of another
2909    subsidiary coding-system.  In that case, Emacs gets coding-vector
2910    from that symbol.
2911
2912    `element[0]' contains information to be set in `coding->type'.  The
2913    value and its meaning is as follows:
2914
2915    0 -- coding_type_emacs_mule
2916    1 -- coding_type_sjis
2917    2 -- coding_type_iso2022
2918    3 -- coding_type_big5
2919    4 -- coding_type_ccl encoder/decoder written in CCL
2920    nil -- coding_type_no_conversion
2921    t -- coding_type_undecided (automatic conversion on decoding,
2922                                no-conversion on encoding)
2923
2924    `element[4]' contains information to be set in `coding->flags' and
2925    `coding->spec'.  The meaning varies by `coding->type'.
2926
2927    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2928    of length 32 (of which the first 13 sub-elements are used now).
2929    Meanings of these sub-elements are:
2930
2931    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2932         If the value is an integer of valid charset, the charset is
2933         assumed to be designated to graphic register N initially.
2934
2935         If the value is minus, it is a minus value of charset which
2936         reserves graphic register N, which means that the charset is
2937         not designated initially but should be designated to graphic
2938         register N just before encoding a character in that charset.
2939
2940         If the value is nil, graphic register N is never used on
2941         encoding.
2942
2943    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2944         Each value takes t or nil.  See the section ISO2022 of
2945         `coding.h' for more information.
2946
2947    If `coding->type' is `coding_type_big5', element[4] is t to denote
2948    BIG5-ETen or nil to denote BIG5-HKU.
2949
2950    If `coding->type' takes the other value, element[4] is ignored.
2951
2952    Emacs Lisp's coding system also carries information about format of
2953    end-of-line in a value of property `eol-type'.  If the value is
2954    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2955    means CODING_EOL_CR.  If it is not integer, it should be a vector
2956    of subsidiary coding systems of which property `eol-type' has one
2957    of above values.
2958
2959 */
2960
2961 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2962    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2963    is setup so that no conversion is necessary and return -1, else
2964    return 0.  */
2965
2966 int
2967 setup_coding_system (coding_system, coding)
2968      Lisp_Object coding_system;
2969      struct coding_system *coding;
2970 {
2971   Lisp_Object coding_spec, coding_type, eol_type, plist;
2972   Lisp_Object val;
2973   int i;
2974
2975   /* Initialize some fields required for all kinds of coding systems.  */
2976   coding->symbol = coding_system;
2977   coding->common_flags = 0;
2978   coding->mode = 0;
2979   coding->heading_ascii = -1;
2980   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2981   coding->composing = COMPOSITION_DISABLED;
2982   coding->cmp_data = NULL;
2983
2984   if (NILP (coding_system))
2985     goto label_invalid_coding_system;
2986
2987   coding_spec = Fget (coding_system, Qcoding_system);
2988
2989   if (!VECTORP (coding_spec)
2990       || XVECTOR (coding_spec)->size != 5
2991       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2992     goto label_invalid_coding_system;
2993
2994   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2995   if (VECTORP (eol_type))
2996     {
2997       coding->eol_type = CODING_EOL_UNDECIDED;
2998       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2999     }
3000   else if (XFASTINT (eol_type) == 1)
3001     {
3002       coding->eol_type = CODING_EOL_CRLF;
3003       coding->common_flags
3004         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3005     }
3006   else if (XFASTINT (eol_type) == 2)
3007     {
3008       coding->eol_type = CODING_EOL_CR;
3009       coding->common_flags
3010         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3011     }
3012   else
3013     coding->eol_type = CODING_EOL_LF;
3014
3015   coding_type = XVECTOR (coding_spec)->contents[0];
3016   /* Try short cut.  */
3017   if (SYMBOLP (coding_type))
3018     {
3019       if (EQ (coding_type, Qt))
3020         {
3021           coding->type = coding_type_undecided;
3022           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3023         }
3024       else
3025         coding->type = coding_type_no_conversion;
3026       return 0;
3027     }
3028
3029   /* Get values of coding system properties:
3030      `post-read-conversion', `pre-write-conversion',
3031      `translation-table-for-decode', `translation-table-for-encode'.  */
3032   plist = XVECTOR (coding_spec)->contents[3];
3033   /* Pre & post conversion functions should be disabled if
3034      inhibit_eol_conversion is nozero.  This is the case that a code
3035      conversion function is called while those functions are running.  */
3036   if (! inhibit_pre_post_conversion)
3037     {
3038       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3039       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3040     }
3041   val = Fplist_get (plist, Qtranslation_table_for_decode);
3042   if (SYMBOLP (val))
3043     val = Fget (val, Qtranslation_table_for_decode);
3044   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3045   val = Fplist_get (plist, Qtranslation_table_for_encode);
3046   if (SYMBOLP (val))
3047     val = Fget (val, Qtranslation_table_for_encode);
3048   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3049   val = Fplist_get (plist, Qcoding_category);
3050   if (!NILP (val))
3051     {
3052       val = Fget (val, Qcoding_category_index);
3053       if (INTEGERP (val))
3054         coding->category_idx = XINT (val);
3055       else
3056         goto label_invalid_coding_system;
3057     }
3058   else
3059     goto label_invalid_coding_system;
3060
3061   val = Fplist_get (plist, Qsafe_charsets);
3062   if (EQ (val, Qt))
3063     {
3064       for (i = 0; i <= MAX_CHARSET; i++)
3065         coding->safe_charsets[i] = 1;
3066     }
3067   else
3068     {
3069       bzero (coding->safe_charsets, MAX_CHARSET + 1);
3070       while (CONSP (val))
3071         {
3072           if ((i = get_charset_id (XCAR (val))) >= 0)
3073             coding->safe_charsets[i] = 1;
3074           val = XCDR (val);
3075         }
3076     }
3077
3078   /* If the coding system has non-nil `composition' property, enable
3079      composition handling.  */
3080   val = Fplist_get (plist, Qcomposition);
3081   if (!NILP (val))
3082     coding->composing = COMPOSITION_NO;
3083
3084   switch (XFASTINT (coding_type))
3085     {
3086     case 0:
3087       coding->type = coding_type_emacs_mule;
3088       if (!NILP (coding->post_read_conversion))
3089         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3090       if (!NILP (coding->pre_write_conversion))
3091         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3092       break;
3093
3094     case 1:
3095       coding->type = coding_type_sjis;
3096       coding->common_flags
3097         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3098       break;
3099
3100     case 2:
3101       coding->type = coding_type_iso2022;
3102       coding->common_flags
3103         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3104       {
3105         Lisp_Object val, temp;
3106         Lisp_Object *flags;
3107         int i, charset, reg_bits = 0;
3108
3109         val = XVECTOR (coding_spec)->contents[4];
3110
3111         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3112           goto label_invalid_coding_system;
3113
3114         flags = XVECTOR (val)->contents;
3115         coding->flags
3116           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3117              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3118              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3119              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3120              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3121              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3122              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3123              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3124              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3125              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3126              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3127              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3128              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3129              );
3130
3131         /* Invoke graphic register 0 to plane 0.  */
3132         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3133         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3134         CODING_SPEC_ISO_INVOCATION (coding, 1)
3135           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3136         /* Not single shifting at first.  */
3137         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3138         /* Beginning of buffer should also be regarded as bol. */
3139         CODING_SPEC_ISO_BOL (coding) = 1;
3140
3141         for (charset = 0; charset <= MAX_CHARSET; charset++)
3142           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3143         val = Vcharset_revision_alist;
3144         while (CONSP (val))
3145           {
3146             charset = get_charset_id (Fcar_safe (XCAR (val)));
3147             if (charset >= 0
3148                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3149                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3150               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3151             val = XCDR (val);
3152           }
3153
3154         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3155            FLAGS[REG] can be one of below:
3156                 integer CHARSET: CHARSET occupies register I,
3157                 t: designate nothing to REG initially, but can be used
3158                   by any charsets,
3159                 list of integer, nil, or t: designate the first
3160                   element (if integer) to REG initially, the remaining
3161                   elements (if integer) is designated to REG on request,
3162                   if an element is t, REG can be used by any charsets,
3163                 nil: REG is never used.  */
3164         for (charset = 0; charset <= MAX_CHARSET; charset++)
3165           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3166             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3167         for (i = 0; i < 4; i++)
3168           {
3169             if (INTEGERP (flags[i])
3170                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3171                 || (charset = get_charset_id (flags[i])) >= 0)
3172               {
3173                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3174                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3175               }
3176             else if (EQ (flags[i], Qt))
3177               {
3178                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3179                 reg_bits |= 1 << i;
3180                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3181               }
3182             else if (CONSP (flags[i]))
3183               {
3184                 Lisp_Object tail;
3185                 tail = flags[i];
3186
3187                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3188                 if (INTEGERP (XCAR (tail))
3189                     && (charset = XINT (XCAR (tail)),
3190                         CHARSET_VALID_P (charset))
3191                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3192                   {
3193                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3194                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3195                   }
3196                 else
3197                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3198                 tail = XCDR (tail);
3199                 while (CONSP (tail))
3200                   {
3201                     if (INTEGERP (XCAR (tail))
3202                         && (charset = XINT (XCAR (tail)),
3203                             CHARSET_VALID_P (charset))
3204                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3205                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3206                         = i;
3207                     else if (EQ (XCAR (tail), Qt))
3208                       reg_bits |= 1 << i;
3209                     tail = XCDR (tail);
3210                   }
3211               }
3212             else
3213               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3214
3215             CODING_SPEC_ISO_DESIGNATION (coding, i)
3216               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3217           }
3218
3219         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3220           {
3221             /* REG 1 can be used only by locking shift in 7-bit env.  */
3222             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3223               reg_bits &= ~2;
3224             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3225               /* Without any shifting, only REG 0 and 1 can be used.  */
3226               reg_bits &= 3;
3227           }
3228
3229         if (reg_bits)
3230           for (charset = 0; charset <= MAX_CHARSET; charset++)
3231             {
3232               if (CHARSET_VALID_P (charset))
3233                 {
3234                   /* There exist some default graphic registers to be
3235                      used CHARSET.  */
3236
3237                   /* We had better avoid designating a charset of
3238                      CHARS96 to REG 0 as far as possible.  */
3239                   if (CHARSET_CHARS (charset) == 96)
3240                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3241                       = (reg_bits & 2
3242                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3243                   else
3244                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3245                       = (reg_bits & 1
3246                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3247                 }
3248             }
3249       }
3250       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3251       coding->spec.iso2022.last_invalid_designation_register = -1;
3252       break;
3253
3254     case 3:
3255       coding->type = coding_type_big5;
3256       coding->common_flags
3257         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3258       coding->flags
3259         = (NILP (XVECTOR (coding_spec)->contents[4])
3260            ? CODING_FLAG_BIG5_HKU
3261            : CODING_FLAG_BIG5_ETEN);
3262       break;
3263
3264     case 4:
3265       coding->type = coding_type_ccl;
3266       coding->common_flags
3267         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3268       {
3269         val = XVECTOR (coding_spec)->contents[4];
3270         if (! CONSP (val)
3271             || setup_ccl_program (&(coding->spec.ccl.decoder),
3272                                   XCAR (val)) < 0
3273             || setup_ccl_program (&(coding->spec.ccl.encoder),
3274                                   XCDR (val)) < 0)
3275           goto label_invalid_coding_system;
3276
3277         bzero (coding->spec.ccl.valid_codes, 256);
3278         val = Fplist_get (plist, Qvalid_codes);
3279         if (CONSP (val))
3280           {
3281             Lisp_Object this;
3282
3283             for (; CONSP (val); val = XCDR (val))
3284               {
3285                 this = XCAR (val);
3286                 if (INTEGERP (this)
3287                     && XINT (this) >= 0 && XINT (this) < 256)
3288                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3289                 else if (CONSP (this)
3290                          && INTEGERP (XCAR (this))
3291                          && INTEGERP (XCDR (this)))
3292                   {
3293                     int start = XINT (XCAR (this));
3294                     int end = XINT (XCDR (this));
3295
3296                     if (start >= 0 && start <= end && end < 256)
3297                       while (start <= end)
3298                         coding->spec.ccl.valid_codes[start++] = 1;
3299                   }
3300               }
3301           }
3302       }
3303       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3304       break;
3305
3306     case 5:
3307       coding->type = coding_type_raw_text;
3308       break;
3309
3310     default:
3311       goto label_invalid_coding_system;
3312     }
3313   return 0;
3314
3315  label_invalid_coding_system:
3316   coding->type = coding_type_no_conversion;
3317   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3318   coding->common_flags = 0;
3319   coding->eol_type = CODING_EOL_LF;
3320   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3321   return -1;
3322 }
3323
3324 /* Free memory blocks allocated for storing composition information.  */
3325
3326 void
3327 coding_free_composition_data (coding)
3328      struct coding_system *coding;
3329 {
3330   struct composition_data *cmp_data = coding->cmp_data, *next;
3331
3332   if (!cmp_data)
3333     return;
3334   /* Memory blocks are chained.  At first, rewind to the first, then,
3335      free blocks one by one.  */
3336   while (cmp_data->prev)
3337     cmp_data = cmp_data->prev;
3338   while (cmp_data)
3339     {
3340       next = cmp_data->next;
3341       xfree (cmp_data);
3342       cmp_data = next;
3343     }
3344   coding->cmp_data = NULL;
3345 }
3346
3347 /* Set `char_offset' member of all memory blocks pointed by
3348    coding->cmp_data to POS.  */
3349
3350 void
3351 coding_adjust_composition_offset (coding, pos)
3352      struct coding_system *coding;
3353      int pos;
3354 {
3355   struct composition_data *cmp_data;
3356
3357   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3358     cmp_data->char_offset = pos;
3359 }
3360
3361 /* Setup raw-text or one of its subsidiaries in the structure
3362    coding_system CODING according to the already setup value eol_type
3363    in CODING.  CODING should be setup for some coding system in
3364    advance.  */
3365
3366 void
3367 setup_raw_text_coding_system (coding)
3368      struct coding_system *coding;
3369 {
3370   if (coding->type != coding_type_raw_text)
3371     {
3372       coding->symbol = Qraw_text;
3373       coding->type = coding_type_raw_text;
3374       if (coding->eol_type != CODING_EOL_UNDECIDED)
3375         {
3376           Lisp_Object subsidiaries;
3377           subsidiaries = Fget (Qraw_text, Qeol_type);
3378
3379           if (VECTORP (subsidiaries)
3380               && XVECTOR (subsidiaries)->size == 3)
3381             coding->symbol
3382               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3383         }
3384       setup_coding_system (coding->symbol, coding);
3385     }
3386   return;
3387 }
3388
3389 /* Emacs has a mechanism to automatically detect a coding system if it
3390    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3391    it's impossible to distinguish some coding systems accurately
3392    because they use the same range of codes.  So, at first, coding
3393    systems are categorized into 7, those are:
3394
3395    o coding-category-emacs-mule
3396
3397         The category for a coding system which has the same code range
3398         as Emacs' internal format.  Assigned the coding-system (Lisp
3399         symbol) `emacs-mule' by default.
3400
3401    o coding-category-sjis
3402
3403         The category for a coding system which has the same code range
3404         as SJIS.  Assigned the coding-system (Lisp
3405         symbol) `japanese-shift-jis' by default.
3406
3407    o coding-category-iso-7
3408
3409         The category for a coding system which has the same code range
3410         as ISO2022 of 7-bit environment.  This doesn't use any locking
3411         shift and single shift functions.  This can encode/decode all
3412         charsets.  Assigned the coding-system (Lisp symbol)
3413         `iso-2022-7bit' by default.
3414
3415    o coding-category-iso-7-tight
3416
3417         Same as coding-category-iso-7 except that this can
3418         encode/decode only the specified charsets.
3419
3420    o coding-category-iso-8-1
3421
3422         The category for a coding system which has the same code range
3423         as ISO2022 of 8-bit environment and graphic plane 1 used only
3424         for DIMENSION1 charset.  This doesn't use any locking shift
3425         and single shift functions.  Assigned the coding-system (Lisp
3426         symbol) `iso-latin-1' by default.
3427
3428    o coding-category-iso-8-2
3429
3430         The category for a coding system which has the same code range
3431         as ISO2022 of 8-bit environment and graphic plane 1 used only
3432         for DIMENSION2 charset.  This doesn't use any locking shift
3433         and single shift functions.  Assigned the coding-system (Lisp
3434         symbol) `japanese-iso-8bit' by default.
3435
3436    o coding-category-iso-7-else
3437
3438         The category for a coding system which has the same code range
3439         as ISO2022 of 7-bit environemnt but uses locking shift or
3440         single shift functions.  Assigned the coding-system (Lisp
3441         symbol) `iso-2022-7bit-lock' by default.
3442
3443    o coding-category-iso-8-else
3444
3445         The category for a coding system which has the same code range
3446         as ISO2022 of 8-bit environemnt but uses locking shift or
3447         single shift functions.  Assigned the coding-system (Lisp
3448         symbol) `iso-2022-8bit-ss2' by default.
3449
3450    o coding-category-big5
3451
3452         The category for a coding system which has the same code range
3453         as BIG5.  Assigned the coding-system (Lisp symbol)
3454         `cn-big5' by default.
3455
3456    o coding-category-ccl
3457
3458         The category for a coding system of which encoder/decoder is
3459         written in CCL programs.  The default value is nil, i.e., no
3460         coding system is assigned.
3461
3462    o coding-category-binary
3463
3464         The category for a coding system not categorized in any of the
3465         above.  Assigned the coding-system (Lisp symbol)
3466         `no-conversion' by default.
3467
3468    Each of them is a Lisp symbol and the value is an actual
3469    `coding-system's (this is also a Lisp symbol) assigned by a user.
3470    What Emacs does actually is to detect a category of coding system.
3471    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3472    decide only one possible category, it selects a category of the
3473    highest priority.  Priorities of categories are also specified by a
3474    user in a Lisp variable `coding-category-list'.
3475
3476 */
3477
3478 static
3479 int ascii_skip_code[256];
3480
3481 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3482    If it detects possible coding systems, return an integer in which
3483    appropriate flag bits are set.  Flag bits are defined by macros
3484    CODING_CATEGORY_MASK_XXX in `coding.h'.
3485
3486    How many ASCII characters are at the head is returned as *SKIP.  */
3487
3488 static int
3489 detect_coding_mask (source, src_bytes, priorities, skip)
3490      unsigned char *source;
3491      int src_bytes, *priorities, *skip;
3492 {
3493   register unsigned char c;
3494   unsigned char *src = source, *src_end = source + src_bytes;
3495   unsigned int mask;
3496   int i;
3497
3498   /* At first, skip all ASCII characters and control characters except
3499      for three ISO2022 specific control characters.  */
3500   ascii_skip_code[ISO_CODE_SO] = 0;
3501   ascii_skip_code[ISO_CODE_SI] = 0;
3502   ascii_skip_code[ISO_CODE_ESC] = 0;
3503
3504  label_loop_detect_coding:
3505   while (src < src_end && ascii_skip_code[*src]) src++;
3506   *skip = src - source;
3507
3508   if (src >= src_end)
3509     /* We found nothing other than ASCII.  There's nothing to do.  */
3510     return 0;
3511
3512   c = *src;
3513   /* The text seems to be encoded in some multilingual coding system.
3514      Now, try to find in which coding system the text is encoded.  */
3515   if (c < 0x80)
3516     {
3517       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3518       /* C is an ISO2022 specific control code of C0.  */
3519       mask = detect_coding_iso2022 (src, src_end);
3520       if (mask == 0)
3521         {
3522           /* No valid ISO2022 code follows C.  Try again.  */
3523           src++;
3524           if (c == ISO_CODE_ESC)
3525             ascii_skip_code[ISO_CODE_ESC] = 1;
3526           else
3527             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3528           goto label_loop_detect_coding;
3529         }
3530       if (priorities)
3531         goto label_return_highest_only;
3532     }
3533   else
3534     {
3535       int try;
3536
3537       if (c < 0xA0)
3538         {
3539           /* C is the first byte of SJIS character code,
3540              or a leading-code of Emacs' internal format (emacs-mule).  */
3541           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3542
3543           /* Or, if C is a special latin extra code,
3544              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3545              or is an ISO2022 control-sequence-introducer (CSI),
3546              we should also consider the possibility of ISO2022 codings.  */
3547           if ((VECTORP (Vlatin_extra_code_table)
3548                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3549               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3550               || (c == ISO_CODE_CSI
3551                   && (src < src_end
3552                       && (*src == ']'
3553                           || ((*src == '0' || *src == '1' || *src == '2')
3554                               && src + 1 < src_end
3555                               && src[1] == ']')))))
3556             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3557                      | CODING_CATEGORY_MASK_ISO_8BIT);
3558         }
3559       else
3560         /* C is a character of ISO2022 in graphic plane right,
3561            or a SJIS's 1-byte character code (i.e. JISX0201),
3562            or the first byte of BIG5's 2-byte code.  */
3563         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3564                 | CODING_CATEGORY_MASK_ISO_8BIT
3565                 | CODING_CATEGORY_MASK_SJIS
3566                 | CODING_CATEGORY_MASK_BIG5);
3567
3568       /* Or, we may have to consider the possibility of CCL.  */
3569       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3570           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3571               ->spec.ccl.valid_codes)[c])
3572         try |= CODING_CATEGORY_MASK_CCL;
3573
3574       mask = 0;
3575       if (priorities)
3576         {
3577           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3578             {
3579               if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
3580                 mask = detect_coding_iso2022 (src, src_end);
3581               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3582                 mask = detect_coding_sjis (src, src_end);
3583               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3584                 mask = detect_coding_big5 (src, src_end);
3585               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3586                 mask = detect_coding_emacs_mule (src, src_end);
3587               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3588                 mask = detect_coding_ccl (src, src_end);
3589               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3590                 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3591               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3592                 mask = CODING_CATEGORY_MASK_BINARY;
3593               if (mask)
3594                 goto label_return_highest_only;
3595             }
3596           return CODING_CATEGORY_MASK_RAW_TEXT;
3597         }
3598       if (try & CODING_CATEGORY_MASK_ISO)
3599         mask |= detect_coding_iso2022 (src, src_end);
3600       if (try & CODING_CATEGORY_MASK_SJIS)
3601         mask |= detect_coding_sjis (src, src_end);
3602       if (try & CODING_CATEGORY_MASK_BIG5)
3603         mask |= detect_coding_big5 (src, src_end);
3604       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3605         mask |= detect_coding_emacs_mule (src, src_end);
3606       if (try & CODING_CATEGORY_MASK_CCL)
3607         mask |= detect_coding_ccl (src, src_end);
3608     }
3609   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3610
3611  label_return_highest_only:
3612   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3613     {
3614       if (mask & priorities[i])
3615         return priorities[i];
3616     }
3617   return CODING_CATEGORY_MASK_RAW_TEXT;
3618 }
3619
3620 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3621    The information of the detected coding system is set in CODING.  */
3622
3623 void
3624 detect_coding (coding, src, src_bytes)
3625      struct coding_system *coding;
3626      unsigned char *src;
3627      int src_bytes;
3628 {
3629   unsigned int idx;
3630   int skip, mask, i;
3631   Lisp_Object val;
3632
3633   val = Vcoding_category_list;
3634   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3635   coding->heading_ascii = skip;
3636
3637   if (!mask) return;
3638
3639   /* We found a single coding system of the highest priority in MASK.  */
3640   idx = 0;
3641   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3642   if (! mask)
3643     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3644
3645   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3646
3647   if (coding->eol_type != CODING_EOL_UNDECIDED)
3648     {
3649       Lisp_Object tmp;
3650
3651       tmp = Fget (val, Qeol_type);
3652       if (VECTORP (tmp))
3653         val = XVECTOR (tmp)->contents[coding->eol_type];
3654     }
3655   setup_coding_system (val, coding);
3656   /* Set this again because setup_coding_system reset this member.  */
3657   coding->heading_ascii = skip;
3658 }
3659
3660 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3661    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3662    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3663
3664    How many non-eol characters are at the head is returned as *SKIP.  */
3665
3666 #define MAX_EOL_CHECK_COUNT 3
3667
3668 static int
3669 detect_eol_type (source, src_bytes, skip)
3670      unsigned char *source;
3671      int src_bytes, *skip;
3672 {
3673   unsigned char *src = source, *src_end = src + src_bytes;
3674   unsigned char c;
3675   int total = 0;                /* How many end-of-lines are found so far.  */
3676   int eol_type = CODING_EOL_UNDECIDED;
3677   int this_eol_type;
3678
3679   *skip = 0;
3680
3681   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3682     {
3683       c = *src++;
3684       if (c == '\n' || c == '\r')
3685         {
3686           if (*skip == 0)
3687             *skip = src - 1 - source;
3688           total++;
3689           if (c == '\n')
3690             this_eol_type = CODING_EOL_LF;
3691           else if (src >= src_end || *src != '\n')
3692             this_eol_type = CODING_EOL_CR;
3693           else
3694             this_eol_type = CODING_EOL_CRLF, src++;
3695
3696           if (eol_type == CODING_EOL_UNDECIDED)
3697             /* This is the first end-of-line.  */
3698             eol_type = this_eol_type;
3699           else if (eol_type != this_eol_type)
3700             {
3701               /* The found type is different from what found before.  */
3702               eol_type = CODING_EOL_INCONSISTENT;
3703               break;
3704             }
3705         }
3706     }
3707
3708   if (*skip == 0)
3709     *skip = src_end - source;
3710   return eol_type;
3711 }
3712
3713 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3714    is encoded.  If it detects an appropriate format of end-of-line, it
3715    sets the information in *CODING.  */
3716
3717 void
3718 detect_eol (coding, src, src_bytes)
3719      struct coding_system *coding;
3720      unsigned char *src;
3721      int src_bytes;
3722 {
3723   Lisp_Object val;
3724   int skip;
3725   int eol_type = detect_eol_type (src, src_bytes, &skip);
3726
3727   if (coding->heading_ascii > skip)
3728     coding->heading_ascii = skip;
3729   else
3730     skip = coding->heading_ascii;
3731
3732   if (eol_type == CODING_EOL_UNDECIDED)
3733     return;
3734   if (eol_type == CODING_EOL_INCONSISTENT)
3735     {
3736 #if 0
3737       /* This code is suppressed until we find a better way to
3738          distinguish raw text file and binary file.  */
3739
3740       /* If we have already detected that the coding is raw-text, the
3741          coding should actually be no-conversion.  */
3742       if (coding->type == coding_type_raw_text)
3743         {
3744           setup_coding_system (Qno_conversion, coding);
3745           return;
3746         }
3747       /* Else, let's decode only text code anyway.  */
3748 #endif /* 0 */
3749       eol_type = CODING_EOL_LF;
3750     }
3751
3752   val = Fget (coding->symbol, Qeol_type);
3753   if (VECTORP (val) && XVECTOR (val)->size == 3)
3754     {
3755       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3756       coding->heading_ascii = skip;
3757     }
3758 }
3759
3760 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3761
3762 #define DECODING_BUFFER_MAG(coding)                                          \
3763   (coding->type == coding_type_iso2022                                       \
3764    ? 3                                                                       \
3765    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3766       ? 2                                                                    \
3767       : (coding->type == coding_type_raw_text                                \
3768          ? 1                                                                 \
3769          : (coding->type == coding_type_ccl                                  \
3770             ? coding->spec.ccl.decoder.buf_magnification                     \
3771             : 2))))
3772
3773 /* Return maximum size (bytes) of a buffer enough for decoding
3774    SRC_BYTES of text encoded in CODING.  */
3775
3776 int
3777 decoding_buffer_size (coding, src_bytes)
3778      struct coding_system *coding;
3779      int src_bytes;
3780 {
3781   return (src_bytes * DECODING_BUFFER_MAG (coding)
3782           + CONVERSION_BUFFER_EXTRA_ROOM);
3783 }
3784
3785 /* Return maximum size (bytes) of a buffer enough for encoding
3786    SRC_BYTES of text to CODING.  */
3787
3788 int
3789 encoding_buffer_size (coding, src_bytes)
3790      struct coding_system *coding;
3791      int src_bytes;
3792 {
3793   int magnification;
3794
3795   if (coding->type == coding_type_ccl)
3796     magnification = coding->spec.ccl.encoder.buf_magnification;
3797   else
3798     magnification = 3;
3799
3800   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3801 }
3802
3803 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3804 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3805 #endif
3806
3807 char *conversion_buffer;
3808 int conversion_buffer_size;
3809
3810 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3811    or decoding.  Sufficient memory is allocated automatically.  If we
3812    run out of memory, return NULL.  */
3813
3814 char *
3815 get_conversion_buffer (size)
3816      int size;
3817 {
3818   if (size > conversion_buffer_size)
3819     {
3820       char *buf;
3821       int real_size = conversion_buffer_size * 2;
3822
3823       while (real_size < size) real_size *= 2;
3824       buf = (char *) xmalloc (real_size);
3825       xfree (conversion_buffer);
3826       conversion_buffer = buf;
3827       conversion_buffer_size = real_size;
3828     }
3829   return conversion_buffer;
3830 }
3831
3832 int
3833 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3834      struct coding_system *coding;
3835      unsigned char *source, *destination;
3836      int src_bytes, dst_bytes, encodep;
3837 {
3838   struct ccl_program *ccl
3839     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3840   int result;
3841
3842   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3843
3844   coding->produced = ccl_driver (ccl, source, destination,
3845                                  src_bytes, dst_bytes, &(coding->consumed));
3846   coding->produced_char
3847     = (encodep
3848        ? coding->produced
3849        : multibyte_chars_in_text (destination, coding->produced));
3850   coding->consumed_char
3851     = multibyte_chars_in_text (source, coding->consumed);
3852
3853   switch (ccl->status)
3854     {
3855     case CCL_STAT_SUSPEND_BY_SRC:
3856       result = CODING_FINISH_INSUFFICIENT_SRC;
3857       break;
3858     case CCL_STAT_SUSPEND_BY_DST:
3859       result = CODING_FINISH_INSUFFICIENT_DST;
3860       break;
3861     case CCL_STAT_QUIT:
3862     case CCL_STAT_INVALID_CMD:
3863       result = CODING_FINISH_INTERRUPT;
3864       break;
3865     default:
3866       result = CODING_FINISH_NORMAL;
3867       break;
3868     }
3869   return result;
3870 }
3871
3872 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3873    decoding, it may detect coding system and format of end-of-line if
3874    those are not yet decided.
3875
3876    This function does not make full use of DESTINATION buffer.  For
3877    instance, if coding->type is coding_type_iso2022, it uses only
3878    (DST_BYTES - 7) bytes of DESTINATION buffer.  In the case that
3879    DST_BYTES is decided by the function decoding_buffer_size, it
3880    contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
3881    So, this function can decode the full SOURCE.  But, in the other
3882    case, if you want to avoid carry over, you must supply at least 7
3883    bytes more area in DESTINATION buffer than expected maximum bytes
3884    that will be produced by this function.  */
3885
3886 int
3887 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3888      struct coding_system *coding;
3889      unsigned char *source, *destination;
3890      int src_bytes, dst_bytes;
3891 {
3892   int result;
3893
3894   if (src_bytes <= 0
3895       && coding->type != coding_type_ccl
3896       && ! (coding->mode & CODING_MODE_LAST_BLOCK
3897             && CODING_REQUIRE_FLUSHING (coding)))
3898     {
3899       coding->produced = coding->produced_char = 0;
3900       coding->consumed = coding->consumed_char = 0;
3901       coding->fake_multibyte = 0;
3902       return CODING_FINISH_NORMAL;
3903     }
3904
3905   if (coding->type == coding_type_undecided)
3906     detect_coding (coding, source, src_bytes);
3907
3908   if (coding->eol_type == CODING_EOL_UNDECIDED)
3909     detect_eol (coding, source, src_bytes);
3910
3911   switch (coding->type)
3912     {
3913     case coding_type_emacs_mule:
3914     case coding_type_undecided:
3915     case coding_type_raw_text:
3916       if (coding->eol_type == CODING_EOL_LF
3917           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3918         goto label_no_conversion;
3919       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3920       break;
3921
3922     case coding_type_sjis:
3923       result = decode_coding_sjis_big5 (coding, source, destination,
3924                                         src_bytes, dst_bytes, 1);
3925       break;
3926
3927     case coding_type_iso2022:
3928       result = decode_coding_iso2022 (coding, source, destination,
3929                                       src_bytes, dst_bytes);
3930       break;
3931
3932     case coding_type_big5:
3933       result = decode_coding_sjis_big5 (coding, source, destination,
3934                                         src_bytes, dst_bytes, 0);
3935       break;
3936
3937     case coding_type_ccl:
3938       result = ccl_coding_driver (coding, source, destination,
3939                                   src_bytes, dst_bytes, 0);
3940       break;
3941
3942     default:                    /* i.e. case coding_type_no_conversion: */
3943     label_no_conversion:
3944       if (dst_bytes && src_bytes > dst_bytes)
3945         {
3946           coding->produced = dst_bytes;
3947           result = CODING_FINISH_INSUFFICIENT_DST;
3948         }
3949       else
3950         {
3951           coding->produced = src_bytes;
3952           result = CODING_FINISH_NORMAL;
3953         }
3954       if (dst_bytes)
3955         bcopy (source, destination, coding->produced);
3956       else
3957         safe_bcopy (source, destination, coding->produced);
3958       coding->fake_multibyte = 1;
3959       coding->consumed
3960         = coding->consumed_char = coding->produced_char = coding->produced;
3961       break;
3962     }
3963
3964   return result;
3965 }
3966
3967 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".
3968
3969    This function does not make full use of DESTINATION buffer.  For
3970    instance, if coding->type is coding_type_iso2022, it uses only
3971    (DST_BYTES - 20) bytes of DESTINATION buffer.  In the case that
3972    DST_BYTES is decided by the function encoding_buffer_size, it
3973    contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
3974    So, this function can encode the full SOURCE.  But, in the other
3975    case, if you want to avoid carry over, you must supply at least 20
3976    bytes more area in DESTINATION buffer than expected maximum bytes
3977    that will be produced by this function.  */
3978
3979 int
3980 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3981      struct coding_system *coding;
3982      unsigned char *source, *destination;
3983      int src_bytes, dst_bytes;
3984 {
3985   int result;
3986
3987   if (src_bytes <= 0
3988       && ! (coding->mode & CODING_MODE_LAST_BLOCK
3989             && CODING_REQUIRE_FLUSHING (coding)))
3990     {
3991       coding->produced = coding->produced_char = 0;
3992       coding->consumed = coding->consumed_char = 0;
3993       coding->fake_multibyte = 0;
3994       return CODING_FINISH_NORMAL;
3995     }
3996
3997   switch (coding->type)
3998     {
3999     case coding_type_emacs_mule:
4000     case coding_type_undecided:
4001     case coding_type_raw_text:
4002       if (coding->eol_type == CODING_EOL_LF
4003           ||  coding->eol_type == CODING_EOL_UNDECIDED)
4004         goto label_no_conversion;
4005       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
4006       break;
4007
4008     case coding_type_sjis:
4009       result = encode_coding_sjis_big5 (coding, source, destination,
4010                                         src_bytes, dst_bytes, 1);
4011       break;
4012
4013     case coding_type_iso2022:
4014       result = encode_coding_iso2022 (coding, source, destination,
4015                                       src_bytes, dst_bytes);
4016       break;
4017
4018     case coding_type_big5:
4019       result = encode_coding_sjis_big5 (coding, source, destination,
4020                                         src_bytes, dst_bytes, 0);
4021       break;
4022
4023     case coding_type_ccl:
4024       result = ccl_coding_driver (coding, source, destination,
4025                                   src_bytes, dst_bytes, 1);
4026       break;
4027
4028     default:                    /* i.e. case coding_type_no_conversion: */
4029     label_no_conversion:
4030       if (dst_bytes && src_bytes > dst_bytes)
4031         {
4032           coding->produced = dst_bytes;
4033           result = CODING_FINISH_INSUFFICIENT_DST;
4034         }
4035       else
4036         {
4037           coding->produced = src_bytes;
4038           result = CODING_FINISH_NORMAL;
4039         }
4040       if (dst_bytes)
4041         bcopy (source, destination, coding->produced);
4042       else
4043         safe_bcopy (source, destination, coding->produced);
4044       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
4045         {
4046           unsigned char *p = destination, *pend = p + coding->produced;
4047           while (p < pend)
4048             if (*p++ == '\015') p[-1] = '\n';
4049         }
4050       coding->fake_multibyte = 1;
4051       coding->consumed
4052         = coding->consumed_char = coding->produced_char = coding->produced;
4053       break;
4054     }
4055
4056   return result;
4057 }
4058
4059 /* Scan text in the region between *BEG and *END (byte positions),
4060    skip characters which we don't have to decode by coding system
4061    CODING at the head and tail, then set *BEG and *END to the region
4062    of the text we actually have to convert.  The caller should move
4063    the gap out of the region in advance.
4064
4065    If STR is not NULL, *BEG and *END are indices into STR.  */
4066
4067 static void
4068 shrink_decoding_region (beg, end, coding, str)
4069      int *beg, *end;
4070      struct coding_system *coding;
4071      unsigned char *str;
4072 {
4073   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4074   int eol_conversion;
4075   Lisp_Object translation_table;
4076
4077   if (coding->type == coding_type_ccl
4078       || coding->type == coding_type_undecided
4079       || !NILP (coding->post_read_conversion))
4080     {
4081       /* We can't skip any data.  */
4082       return;
4083     }
4084   else if (coding->type == coding_type_no_conversion)
4085     {
4086       /* We need no conversion, but don't have to skip any data here.
4087          Decoding routine handles them effectively anyway.  */
4088       return;
4089     }
4090
4091   translation_table = coding->translation_table_for_decode;
4092   if (NILP (translation_table) && !NILP (Venable_character_translation))
4093     translation_table = Vstandard_translation_table_for_decode;
4094   if (CHAR_TABLE_P (translation_table))
4095     {
4096       int i;
4097       for (i = 0; i < 128; i++)
4098         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4099           break;
4100       if (i < 128)
4101         /* Some ASCII character should be tranlsated.  We give up
4102            shrinking.  */
4103         return;
4104     }
4105
4106   eol_conversion = (coding->eol_type != CODING_EOL_LF);
4107
4108   if ((! eol_conversion) && (coding->heading_ascii >= 0))
4109     /* Detection routine has already found how much we can skip at the
4110        head.  */
4111     *beg += coding->heading_ascii;
4112
4113   if (str)
4114     {
4115       begp_orig = begp = str + *beg;
4116       endp_orig = endp = str + *end;
4117     }
4118   else
4119     {
4120       begp_orig = begp = BYTE_POS_ADDR (*beg);
4121       endp_orig = endp = begp + *end - *beg;
4122     }
4123
4124   switch (coding->type)
4125     {
4126     case coding_type_emacs_mule:
4127     case coding_type_raw_text:
4128       if (eol_conversion)
4129         {
4130           if (coding->heading_ascii < 0)
4131             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
4132           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
4133             endp--;
4134           /* Do not consider LF as ascii if preceded by CR, since that
4135              confuses eol decoding. */
4136           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4137             endp++;
4138         }
4139       else
4140         begp = endp;
4141       break;
4142
4143     case coding_type_sjis:
4144     case coding_type_big5:
4145       /* We can skip all ASCII characters at the head.  */
4146       if (coding->heading_ascii < 0)
4147         {
4148           if (eol_conversion)
4149             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4150           else
4151             while (begp < endp && *begp < 0x80) begp++;
4152         }
4153       /* We can skip all ASCII characters at the tail except for the
4154          second byte of SJIS or BIG5 code.  */
4155       if (eol_conversion)
4156         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4157       else
4158         while (begp < endp && endp[-1] < 0x80) endp--;
4159       /* Do not consider LF as ascii if preceded by CR, since that
4160          confuses eol decoding. */
4161       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4162         endp++;
4163       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4164         endp++;
4165       break;
4166
4167     default:            /* i.e. case coding_type_iso2022: */
4168       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4169         /* We can't skip any data.  */
4170         break;
4171       if (coding->heading_ascii < 0)
4172         {
4173           /* We can skip all ASCII characters at the head except for a
4174              few control codes.  */
4175           while (begp < endp && (c = *begp) < 0x80
4176                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4177                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4178                  && (!eol_conversion || c != ISO_CODE_LF))
4179             begp++;
4180         }
4181       switch (coding->category_idx)
4182         {
4183         case CODING_CATEGORY_IDX_ISO_8_1:
4184         case CODING_CATEGORY_IDX_ISO_8_2:
4185           /* We can skip all ASCII characters at the tail.  */
4186           if (eol_conversion)
4187             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4188           else
4189             while (begp < endp && endp[-1] < 0x80) endp--;
4190           /* Do not consider LF as ascii if preceded by CR, since that
4191              confuses eol decoding. */
4192           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4193             endp++;
4194           break;
4195
4196         case CODING_CATEGORY_IDX_ISO_7:
4197         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4198           {
4199             /* We can skip all charactes at the tail except for 8-bit
4200                codes and ESC and the following 2-byte at the tail.  */
4201             unsigned char *eight_bit = NULL;
4202
4203             if (eol_conversion)
4204               while (begp < endp
4205                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4206                 {
4207                   if (!eight_bit && c & 0x80) eight_bit = endp;
4208                   endp--;
4209                 }
4210             else
4211               while (begp < endp
4212                      && (c = endp[-1]) != ISO_CODE_ESC)
4213                 {
4214                   if (!eight_bit && c & 0x80) eight_bit = endp;
4215                   endp--;
4216                 }
4217             /* Do not consider LF as ascii if preceded by CR, since that
4218                confuses eol decoding. */
4219             if (begp < endp && endp < endp_orig
4220                 && endp[-1] == '\r' && endp[0] == '\n')
4221               endp++;
4222             if (begp < endp && endp[-1] == ISO_CODE_ESC)
4223               {
4224                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4225                   /* This is an ASCII designation sequence.  We can
4226                      surely skip the tail.  But, if we have
4227                      encountered an 8-bit code, skip only the codes
4228                      after that.  */
4229                   endp = eight_bit ? eight_bit : endp + 2;
4230                 else
4231                   /* Hmmm, we can't skip the tail.  */
4232                   endp = endp_orig;
4233               }
4234             else if (eight_bit)
4235               endp = eight_bit;
4236           }
4237         }
4238     }
4239   *beg += begp - begp_orig;
4240   *end += endp - endp_orig;
4241   return;
4242 }
4243
4244 /* Like shrink_decoding_region but for encoding.  */
4245
4246 static void
4247 shrink_encoding_region (beg, end, coding, str)
4248      int *beg, *end;
4249      struct coding_system *coding;
4250      unsigned char *str;
4251 {
4252   unsigned char *begp_orig, *begp, *endp_orig, *endp;
4253   int eol_conversion;
4254   Lisp_Object translation_table;
4255
4256   if (coding->type == coding_type_ccl)
4257     /* We can't skip any data.  */
4258     return;
4259   else if (coding->type == coding_type_no_conversion)
4260     {
4261       /* We need no conversion.  */
4262       *beg = *end;
4263       return;
4264     }
4265
4266   translation_table = coding->translation_table_for_encode;
4267   if (NILP (translation_table) && !NILP (Venable_character_translation))
4268     translation_table = Vstandard_translation_table_for_encode;
4269   if (CHAR_TABLE_P (translation_table))
4270     {
4271       int i;
4272       for (i = 0; i < 128; i++)
4273         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4274           break;
4275       if (i < 128)
4276         /* Some ASCII character should be tranlsated.  We give up
4277            shrinking.  */
4278         return;
4279     }
4280
4281   if (str)
4282     {
4283       begp_orig = begp = str + *beg;
4284       endp_orig = endp = str + *end;
4285     }
4286   else
4287     {
4288       begp_orig = begp = BYTE_POS_ADDR (*beg);
4289       endp_orig = endp = begp + *end - *beg;
4290     }
4291
4292   eol_conversion = (coding->eol_type == CODING_EOL_CR
4293                     || coding->eol_type == CODING_EOL_CRLF);
4294
4295   /* Here, we don't have to check coding->pre_write_conversion because
4296      the caller is expected to have handled it already.  */
4297   switch (coding->type)
4298     {
4299     case coding_type_undecided:
4300     case coding_type_emacs_mule:
4301     case coding_type_raw_text:
4302       if (eol_conversion)
4303         {
4304           while (begp < endp && *begp != '\n') begp++;
4305           while (begp < endp && endp[-1] != '\n') endp--;
4306         }
4307       else
4308         begp = endp;
4309       break;
4310
4311     case coding_type_iso2022:
4312       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4313         /* We can't skip any data.  */
4314         break;
4315       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4316         {
4317           unsigned char *bol = begp;
4318           while (begp < endp && *begp < 0x80)
4319             {
4320               begp++;
4321               if (begp[-1] == '\n')
4322                 bol = begp;
4323             }
4324           begp = bol;
4325           goto label_skip_tail;
4326         }
4327       /* fall down ... */
4328
4329     default:
4330       /* We can skip all ASCII characters at the head and tail.  */
4331       if (eol_conversion)
4332         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4333       else
4334         while (begp < endp && *begp < 0x80) begp++;
4335     label_skip_tail:
4336       if (eol_conversion)
4337         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4338       else
4339         while (begp < endp && *(endp - 1) < 0x80) endp--;
4340       break;
4341     }
4342
4343   *beg += begp - begp_orig;
4344   *end += endp - endp_orig;
4345   return;
4346 }
4347
4348 /* As shrinking conversion region requires some overhead, we don't try
4349    shrinking if the length of conversion region is less than this
4350    value.  */
4351 static int shrink_conversion_region_threshhold = 1024;
4352
4353 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
4354   do {                                                                  \
4355     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
4356       {                                                                 \
4357         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
4358         else shrink_decoding_region (beg, end, coding, str);            \
4359       }                                                                 \
4360   } while (0)
4361
4362 static Lisp_Object
4363 code_convert_region_unwind (dummy)
4364      Lisp_Object dummy;
4365 {
4366   inhibit_pre_post_conversion = 0;
4367   return Qnil;
4368 }
4369
4370 /* Store information about all compositions in the range FROM and TO
4371    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
4372    buffer or a string, defaults to the current buffer.  */
4373
4374 void
4375 coding_save_composition (coding, from, to, obj)
4376      struct coding_system *coding;
4377      int from, to;
4378      Lisp_Object obj;
4379 {
4380   Lisp_Object prop;
4381   int start, end;
4382
4383   coding->composing = COMPOSITION_DISABLED;
4384   if (!find_composition (from, to, &start, &end, &prop, obj)
4385       || end > to)
4386     return;
4387   if (start < from
4388       && (!find_composition (end, to, &start, &end, &prop, obj)
4389           || end > to))
4390     return;
4391   coding->composing = COMPOSITION_NO;
4392   coding_allocate_composition_data (coding, from);
4393   do
4394     {
4395       if (COMPOSITION_VALID_P (start, end, prop))
4396         {
4397           enum composition_method method = COMPOSITION_METHOD (prop);
4398           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
4399               >= COMPOSITION_DATA_SIZE)
4400             coding_allocate_composition_data (coding, from);
4401           /* For relative composition, we remember start and end
4402              positions, for the other compositions, we also remember
4403              components.  */
4404           CODING_ADD_COMPOSITION_START (coding, start - from, method);
4405           if (method != COMPOSITION_RELATIVE)
4406             {
4407               /* We must store a*/
4408               Lisp_Object val, ch;
4409
4410               val = COMPOSITION_COMPONENTS (prop);
4411               if (CONSP (val))
4412                 while (CONSP (val))
4413                   {
4414                     ch = XCAR (val), val = XCDR (val);
4415                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4416                   }
4417               else if (VECTORP (val) || STRINGP (val))
4418                 {
4419                   int len = (VECTORP (val)
4420                              ? XVECTOR (val)->size : XSTRING (val)->size);
4421                   int i;
4422                   for (i = 0; i < len; i++)
4423                     {
4424                       ch = (STRINGP (val)
4425                             ? Faref (val, make_number (i))
4426                             : XVECTOR (val)->contents[i]);
4427                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4428                     }
4429                 }
4430               else              /* INTEGERP (val) */
4431                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
4432             }
4433           CODING_ADD_COMPOSITION_END (coding, end - from);
4434         }
4435       start = end;
4436     }
4437   while (start < to
4438          && find_composition (start, to, &start, &end, &prop, obj)
4439          && end <= to);
4440
4441   /* Make coding->cmp_data point to the first memory block.  */
4442   while (coding->cmp_data->prev)
4443     coding->cmp_data = coding->cmp_data->prev;
4444   coding->cmp_data_start = 0;
4445 }
4446
4447 /* Reflect the saved information about compositions to OBJ.
4448    CODING->cmp_data points to a memory block for the informaiton.  OBJ
4449    is a buffer or a string, defaults to the current buffer.  */
4450
4451 static void
4452 coding_restore_composition (coding, obj)
4453      struct coding_system *coding;
4454      Lisp_Object obj;
4455 {
4456   struct composition_data *cmp_data = coding->cmp_data;
4457
4458   if (!cmp_data)
4459     return;
4460
4461   while (cmp_data->prev)
4462     cmp_data = cmp_data->prev;
4463
4464   while (cmp_data)
4465     {
4466       int i;
4467
4468       for (i = 0; i < cmp_data->used; i += cmp_data->data[i])
4469         {
4470           int *data = cmp_data->data + i;
4471           enum composition_method method = (enum composition_method) data[3];
4472           Lisp_Object components;
4473
4474           if (method == COMPOSITION_RELATIVE)
4475             components = Qnil;
4476           else
4477             {
4478               int len = data[0] - 4, j;
4479               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
4480
4481               for (j = 0; j < len; j++)
4482                 args[j] = make_number (data[4 + j]);
4483               components = (method == COMPOSITION_WITH_ALTCHARS
4484                             ? Fstring (len, args) : Fvector (len, args));
4485             }
4486           compose_text (data[1], data[2], components, Qnil, obj);
4487         }
4488       cmp_data = cmp_data->next;
4489     }
4490 }
4491
4492 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4493    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4494    coding system CODING, and return the status code of code conversion
4495    (currently, this value has no meaning).
4496
4497    How many characters (and bytes) are converted to how many
4498    characters (and bytes) are recorded in members of the structure
4499    CODING.
4500
4501    If REPLACE is nonzero, we do various things as if the original text
4502    is deleted and a new text is inserted.  See the comments in
4503    replace_range (insdel.c) to know what we are doing.  */
4504
4505 int
4506 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4507      int from, from_byte, to, to_byte, encodep, replace;
4508      struct coding_system *coding;
4509 {
4510   int len = to - from, len_byte = to_byte - from_byte;
4511   int require, inserted, inserted_byte;
4512   int head_skip, tail_skip, total_skip;
4513   Lisp_Object saved_coding_symbol;
4514   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
4515   int first = 1;
4516   int fake_multibyte = 0;
4517   unsigned char *src, *dst;
4518   Lisp_Object deletion;
4519   int orig_point = PT, orig_len = len;
4520   int prev_Z;
4521
4522   deletion = Qnil;
4523   saved_coding_symbol = Qnil;
4524
4525   if (from < PT && PT < to)
4526     {
4527       TEMP_SET_PT_BOTH (from, from_byte);
4528       orig_point = from;
4529     }
4530
4531   if (replace)
4532     {
4533       int saved_from = from;
4534
4535       prepare_to_modify_buffer (from, to, &from);
4536       if (saved_from != from)
4537         {
4538           to = from + len;
4539           if (multibyte)
4540             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4541           else
4542             from_byte = from, to_byte = to;
4543           len_byte = to_byte - from_byte;
4544         }
4545     }
4546
4547   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4548     {
4549       /* We must detect encoding of text and eol format.  */
4550
4551       if (from < GPT && to > GPT)
4552         move_gap_both (from, from_byte);
4553       if (coding->type == coding_type_undecided)
4554         {
4555           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4556           if (coding->type == coding_type_undecided)
4557             /* It seems that the text contains only ASCII, but we
4558                should not left it undecided because the deeper
4559                decoding routine (decode_coding) tries to detect the
4560                encodings again in vain.  */
4561             coding->type = coding_type_emacs_mule;
4562         }
4563       if (coding->eol_type == CODING_EOL_UNDECIDED)
4564         {
4565           saved_coding_symbol = coding->symbol;
4566           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4567           if (coding->eol_type == CODING_EOL_UNDECIDED)
4568             coding->eol_type = CODING_EOL_LF;
4569           /* We had better recover the original eol format if we
4570              encounter an inconsitent eol format while decoding.  */
4571           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4572         }
4573     }
4574
4575   if (encodep
4576       ? ! CODING_REQUIRE_ENCODING (coding)
4577       : ! CODING_REQUIRE_DECODING (coding))
4578     {
4579       coding->consumed_char = len;
4580       coding->consumed = len_byte;
4581       coding->produced = len_byte;
4582       if (multibyte
4583           && ! replace
4584           /* See the comment of the member heading_ascii in coding.h.  */
4585           && coding->heading_ascii < len_byte)
4586         {
4587           /* We still may have to combine byte at the head and the
4588              tail of the text in the region.  */
4589           if (from < GPT && GPT < to)
4590             move_gap_both (to, to_byte);
4591           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4592           adjust_after_insert (from, from_byte, to, to_byte, len);
4593           coding->produced_char = len;
4594         }
4595       else
4596         {
4597           if (!replace)
4598             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4599           coding->produced_char = len_byte;
4600         }
4601       return 0;
4602     }
4603
4604   /* Now we convert the text.  */
4605
4606   /* For encoding, we must process pre-write-conversion in advance.  */
4607   if (encodep
4608       && ! NILP (coding->pre_write_conversion)
4609       && SYMBOLP (coding->pre_write_conversion)
4610       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4611     {
4612       /* The function in pre-write-conversion may put a new text in a
4613          new buffer.  */
4614       struct buffer *prev = current_buffer;
4615       Lisp_Object new;
4616       int count = specpdl_ptr - specpdl;
4617
4618       record_unwind_protect (code_convert_region_unwind, Qnil);
4619       /* We should not call any more pre-write/post-read-conversion
4620          functions while this pre-write-conversion is running.  */
4621       inhibit_pre_post_conversion = 1;
4622       call2 (coding->pre_write_conversion,
4623              make_number (from), make_number (to));
4624       inhibit_pre_post_conversion = 0;
4625       /* Discard the unwind protect.  */
4626       specpdl_ptr--;
4627
4628       if (current_buffer != prev)
4629         {
4630           len = ZV - BEGV;
4631           new = Fcurrent_buffer ();
4632           set_buffer_internal_1 (prev);
4633           del_range_2 (from, from_byte, to, to_byte, 0);
4634           TEMP_SET_PT_BOTH (from, from_byte);
4635           insert_from_buffer (XBUFFER (new), 1, len, 0);
4636           Fkill_buffer (new);
4637           if (orig_point >= to)
4638             orig_point += len - orig_len;
4639           else if (orig_point > from)
4640             orig_point = from;
4641           orig_len = len;
4642           to = from + len;
4643           from_byte = multibyte ? CHAR_TO_BYTE (from) : from_byte;
4644           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4645           len_byte = to_byte - from_byte;
4646           TEMP_SET_PT_BOTH (from, from_byte);
4647         }
4648     }
4649
4650   if (replace)
4651     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4652
4653   if (coding->composing != COMPOSITION_DISABLED)
4654     {
4655       if (encodep)
4656         coding_save_composition (coding, from, to, Fcurrent_buffer ());
4657       else
4658         coding_allocate_composition_data (coding, from);
4659     }
4660
4661   /* For conversion by CCL program and for encoding with composition
4662      handling, we can't skip any character because we may convert or
4663      compose even ASCII characters.  */
4664   if (coding->type != coding_type_ccl
4665       && (!encodep || coding->cmp_data == NULL))
4666     {
4667       /* Try to skip the heading and tailing ASCIIs.  */
4668       int from_byte_orig = from_byte, to_byte_orig = to_byte;
4669
4670       if (from < GPT && GPT < to)
4671         move_gap_both (from, from_byte);
4672       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4673       if (from_byte == to_byte
4674           && (encodep || NILP (coding->post_read_conversion))
4675           && ! CODING_REQUIRE_FLUSHING (coding))
4676         {
4677           coding->produced = len_byte;
4678           coding->produced_char = multibyte ? len : len_byte;
4679           if (!replace)
4680             /* We must record and adjust for this new text now.  */
4681             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4682           return 0;
4683         }
4684
4685       head_skip = from_byte - from_byte_orig;
4686       tail_skip = to_byte_orig - to_byte;
4687       total_skip = head_skip + tail_skip;
4688       from += head_skip;
4689       to -= tail_skip;
4690       len -= total_skip; len_byte -= total_skip;
4691
4692       if (coding->cmp_data)
4693         coding->cmp_data->char_offset = from;
4694     }
4695
4696   /* The code conversion routine can not preserve text properties for
4697      now.  So, we must remove all text properties in the region.
4698      Here, we must suppress all modification hooks.  */
4699   if (replace)
4700     {
4701       int saved_inhibit_modification_hooks = inhibit_modification_hooks;
4702       inhibit_modification_hooks = 1;
4703       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4704       inhibit_modification_hooks = saved_inhibit_modification_hooks;
4705     }
4706
4707   /* For converion, we must put the gap before the text in addition to
4708      making the gap larger for efficient decoding.  The required gap
4709      size starts from 2000 which is the magic number used in make_gap.
4710      But, after one batch of conversion, it will be incremented if we
4711      find that it is not enough .  */
4712   require = 2000;
4713
4714   if (GAP_SIZE  < require)
4715     make_gap (require - GAP_SIZE);
4716   move_gap_both (from, from_byte);
4717
4718   inserted = inserted_byte = 0;
4719
4720   GAP_SIZE += len_byte;
4721   ZV -= len;
4722   Z -= len;
4723   ZV_BYTE -= len_byte;
4724   Z_BYTE -= len_byte;
4725
4726   if (GPT - BEG < BEG_UNCHANGED)
4727     BEG_UNCHANGED = GPT - BEG;
4728   if (Z - GPT < END_UNCHANGED)
4729     END_UNCHANGED = Z - GPT;
4730
4731   for (;;)
4732     {
4733       int result;
4734
4735       /* The buffer memory is now:
4736          +--------+converted-text+---------+-------original-text------+---+
4737          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4738                   |<------------------- GAP_SIZE -------------------->|  */
4739       src = GAP_END_ADDR - len_byte;
4740       dst = GPT_ADDR + inserted_byte;
4741
4742       if (encodep)
4743         result = encode_coding (coding, src, dst, len_byte, 0);
4744       else
4745         result = decode_coding (coding, src, dst, len_byte, 0);
4746
4747       /* The buffer memory is now:
4748          +--------+-------converted-text--------+--+---original-text--+---+
4749          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4750                   |<------------------- GAP_SIZE -------------------->|  */
4751
4752       if (coding->fake_multibyte)
4753         fake_multibyte = 1;
4754
4755       if (!encodep && !multibyte)
4756         coding->produced_char = coding->produced;
4757       inserted += coding->produced_char;
4758       inserted_byte += coding->produced;
4759       len_byte -= coding->consumed;
4760
4761       if (result == CODING_FINISH_INSUFFICIENT_CMP)
4762         {
4763           coding_allocate_composition_data (coding, from + inserted);
4764           continue;
4765         }
4766
4767       src += coding->consumed;
4768       dst += coding->produced;
4769
4770       if (result == CODING_FINISH_NORMAL)
4771         {
4772           src += len_byte;
4773           break;
4774         }
4775       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4776         {
4777           unsigned char *pend = dst, *p = pend - inserted_byte;
4778           Lisp_Object eol_type;
4779
4780           /* Encode LFs back to the original eol format (CR or CRLF).  */
4781           if (coding->eol_type == CODING_EOL_CR)
4782             {
4783               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4784             }
4785           else
4786             {
4787               int count = 0;
4788
4789               while (p < pend) if (*p++ == '\n') count++;
4790               if (src - dst < count)
4791                 {
4792                   /* We don't have sufficient room for encoding LFs
4793                      back to CRLF.  We must record converted and
4794                      not-yet-converted text back to the buffer
4795                      content, enlarge the gap, then record them out of
4796                      the buffer contents again.  */
4797                   int add = len_byte + inserted_byte;
4798
4799                   GAP_SIZE -= add;
4800                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4801                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4802                   make_gap (count - GAP_SIZE);
4803                   GAP_SIZE += add;
4804                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4805                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4806                   /* Don't forget to update SRC, DST, and PEND.  */
4807                   src = GAP_END_ADDR - len_byte;
4808                   dst = GPT_ADDR + inserted_byte;
4809                   pend = dst;
4810                 }
4811               inserted += count;
4812               inserted_byte += count;
4813               coding->produced += count;
4814               p = dst = pend + count;
4815               while (count)
4816                 {
4817                   *--p = *--pend;
4818                   if (*p == '\n') count--, *--p = '\r';
4819                 }
4820             }
4821
4822           /* Suppress eol-format conversion in the further conversion.  */
4823           coding->eol_type = CODING_EOL_LF;
4824
4825           /* Set the coding system symbol to that for Unix-like EOL.  */
4826           eol_type = Fget (saved_coding_symbol, Qeol_type);
4827           if (VECTORP (eol_type)
4828               && XVECTOR (eol_type)->size == 3
4829               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
4830             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
4831           else
4832             coding->symbol = saved_coding_symbol;
4833
4834           continue;
4835         }
4836       if (len_byte <= 0)
4837         {
4838           if (coding->type != coding_type_ccl
4839               || coding->mode & CODING_MODE_LAST_BLOCK)
4840             break;
4841           coding->mode |= CODING_MODE_LAST_BLOCK;
4842           continue;
4843         }
4844       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4845         {
4846           /* The source text ends in invalid codes.  Let's just
4847              make them valid buffer contents, and finish conversion.  */
4848           inserted += len_byte;
4849           inserted_byte += len_byte;
4850           while (len_byte--)
4851             *dst++ = *src++;
4852           fake_multibyte = 1;
4853           break;
4854         }
4855       if (result == CODING_FINISH_INTERRUPT)
4856         {
4857           /* The conversion procedure was interrupted by a user.  */
4858           fake_multibyte = 1;
4859           break;
4860         }
4861       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
4862       if (coding->consumed < 1)
4863         {
4864           /* It's quite strange to require more memory without
4865              consuming any bytes.  Perhaps CCL program bug.  */
4866           fake_multibyte = 1;
4867           break;
4868         }
4869       if (first)
4870         {
4871           /* We have just done the first batch of conversion which was
4872              stoped because of insufficient gap.  Let's reconsider the
4873              required gap size (i.e. SRT - DST) now.
4874
4875              We have converted ORIG bytes (== coding->consumed) into
4876              NEW bytes (coding->produced).  To convert the remaining
4877              LEN bytes, we may need REQUIRE bytes of gap, where:
4878                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4879                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4880              Here, we are sure that NEW >= ORIG.  */
4881           float ratio = coding->produced - coding->consumed;
4882           ratio /= coding->consumed;
4883           require = len_byte * ratio;
4884           first = 0;
4885         }
4886       if ((src - dst) < (require + 2000))
4887         {
4888           /* See the comment above the previous call of make_gap.  */
4889           int add = len_byte + inserted_byte;
4890
4891           GAP_SIZE -= add;
4892           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4893           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4894           make_gap (require + 2000);
4895           GAP_SIZE += add;
4896           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4897           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4898         }
4899     }
4900   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4901
4902   if (multibyte
4903       && (encodep
4904           || fake_multibyte
4905           || (to - from) != (to_byte - from_byte)))
4906     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4907
4908   /* If we have shrinked the conversion area, adjust it now.  */
4909   if (total_skip > 0)
4910     {
4911       if (tail_skip > 0)
4912         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4913       inserted += total_skip; inserted_byte += total_skip;
4914       GAP_SIZE += total_skip;
4915       GPT -= head_skip; GPT_BYTE -= head_skip;
4916       ZV -= total_skip; ZV_BYTE -= total_skip;
4917       Z -= total_skip; Z_BYTE -= total_skip;
4918       from -= head_skip; from_byte -= head_skip;
4919       to += tail_skip; to_byte += tail_skip;
4920     }
4921
4922   prev_Z = Z;
4923   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4924   inserted = Z - prev_Z;
4925
4926   if (!encodep && coding->cmp_data && coding->cmp_data->used)
4927     coding_restore_composition (coding, Fcurrent_buffer ());
4928   coding_free_composition_data (coding);
4929
4930   if (! encodep && ! NILP (coding->post_read_conversion))
4931     {
4932       Lisp_Object val;
4933       int count = specpdl_ptr - specpdl;
4934
4935       if (from != PT)
4936         TEMP_SET_PT_BOTH (from, from_byte);
4937       prev_Z = Z;
4938       record_unwind_protect (code_convert_region_unwind, Qnil);
4939       /* We should not call any more pre-write/post-read-conversion
4940          functions while this post-read-conversion is running.  */
4941       inhibit_pre_post_conversion = 1;
4942       val = call1 (coding->post_read_conversion, make_number (inserted));
4943       inhibit_pre_post_conversion = 0;
4944       /* Discard the unwind protect.  */
4945       specpdl_ptr--;
4946       CHECK_NUMBER (val, 0);
4947       inserted += Z - prev_Z;
4948     }
4949
4950   if (orig_point >= from)
4951     {
4952       if (orig_point >= from + orig_len)
4953         orig_point += inserted - orig_len;
4954       else
4955         orig_point = from;
4956       TEMP_SET_PT (orig_point);
4957     }
4958
4959   if (replace)
4960     {
4961       signal_after_change (from, to - from, inserted);
4962       update_compositions (from, from + inserted, CHECK_BORDER);
4963     }
4964
4965   {
4966     coding->consumed = to_byte - from_byte;
4967     coding->consumed_char = to - from;
4968     coding->produced = inserted_byte;
4969     coding->produced_char = inserted;
4970   }
4971
4972   return 0;
4973 }
4974
4975 Lisp_Object
4976 code_convert_string (str, coding, encodep, nocopy)
4977      Lisp_Object str;
4978      struct coding_system *coding;
4979      int encodep, nocopy;
4980 {
4981   int len;
4982   char *buf;
4983   int from = 0, to = XSTRING (str)->size;
4984   int to_byte = STRING_BYTES (XSTRING (str));
4985   struct gcpro gcpro1;
4986   Lisp_Object saved_coding_symbol;
4987   int result;
4988
4989   saved_coding_symbol = Qnil;
4990   if ((encodep && !NILP (coding->pre_write_conversion)
4991        || !encodep && !NILP (coding->post_read_conversion)))
4992     {
4993       /* Since we have to call Lisp functions which assume target text
4994          is in a buffer, after setting a temporary buffer, call
4995          code_convert_region.  */
4996       int count = specpdl_ptr - specpdl;
4997       struct buffer *prev = current_buffer;
4998       int multibyte = STRING_MULTIBYTE (str);
4999
5000       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5001       record_unwind_protect (code_convert_region_unwind, Qnil);
5002       inhibit_pre_post_conversion = 1;
5003       GCPRO1 (str);
5004       temp_output_buffer_setup (" *code-converting-work*");
5005       set_buffer_internal (XBUFFER (Vstandard_output));
5006       /* We must insert the contents of STR as is without
5007          unibyte<->multibyte conversion.  For that, we adjust the
5008          multibyteness of the working buffer to that of STR.  */
5009       Ferase_buffer ();         /* for safety */
5010       current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5011       insert_from_string (str, 0, 0, to, to_byte, 0);
5012       UNGCPRO;
5013       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
5014       /* Make a unibyte string if we are encoding, otherwise make a
5015          multibyte string.  */
5016       Fset_buffer_multibyte (encodep ? Qnil : Qt);
5017       str = make_buffer_string (BEGV, ZV, 0);
5018       return unbind_to (count, str);
5019     }
5020
5021   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5022     {
5023       /* See the comments in code_convert_region.  */
5024       if (coding->type == coding_type_undecided)
5025         {
5026           detect_coding (coding, XSTRING (str)->data, to_byte);
5027           if (coding->type == coding_type_undecided)
5028             coding->type = coding_type_emacs_mule;
5029         }
5030       if (coding->eol_type == CODING_EOL_UNDECIDED)
5031         {
5032           saved_coding_symbol = coding->symbol;
5033           detect_eol (coding, XSTRING (str)->data, to_byte);
5034           if (coding->eol_type == CODING_EOL_UNDECIDED)
5035             coding->eol_type = CODING_EOL_LF;
5036           /* We had better recover the original eol format if we
5037              encounter an inconsitent eol format while decoding.  */
5038           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5039         }
5040     }
5041
5042   if (encodep
5043       ? ! CODING_REQUIRE_ENCODING (coding)
5044       : ! CODING_REQUIRE_DECODING (coding))
5045     return (nocopy ? str : Fcopy_sequence (str));
5046
5047   if (coding->composing != COMPOSITION_DISABLED)
5048     {
5049       if (encodep)
5050         coding_save_composition (coding, from, to, str);
5051       else
5052         coding_allocate_composition_data (coding, from);
5053     }
5054
5055   /* For conversion by CCL program and for encoding with composition
5056      handling, we can't skip any character because we may convert or
5057      compose even ASCII characters.  */
5058   if (coding->type != coding_type_ccl
5059       && (!encodep || coding->cmp_data == NULL))
5060     {
5061       /* Try to skip the heading and tailing ASCIIs.  */
5062       int from_orig = from;
5063
5064       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5065                                 encodep);
5066       if (from == to_byte)
5067         return (nocopy ? str : Fcopy_sequence (str));
5068
5069       if (coding->cmp_data)
5070         coding->cmp_data->char_offset = from;
5071     }
5072
5073   if (encodep)
5074     len = encoding_buffer_size (coding, to_byte - from);
5075   else
5076     len = decoding_buffer_size (coding, to_byte - from);
5077   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
5078   GCPRO1 (str);
5079   buf = get_conversion_buffer (len);
5080   UNGCPRO;
5081
5082   if (from > 0)
5083     bcopy (XSTRING (str)->data, buf, from);
5084   result = (encodep
5085             ? encode_coding (coding, XSTRING (str)->data + from,
5086                              buf + from, to_byte - from, len)
5087             : decode_coding (coding, XSTRING (str)->data + from,
5088                              buf + from, to_byte - from, len));
5089   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5090     {
5091       /* We simply try to decode the whole string again but without
5092          eol-conversion this time.  */
5093       coding->eol_type = CODING_EOL_LF;
5094       coding->symbol = saved_coding_symbol;
5095       coding_free_composition_data (coding);
5096       return code_convert_string (str, coding, encodep, nocopy);
5097     }
5098
5099   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
5100          STRING_BYTES (XSTRING (str)) - to_byte);
5101
5102   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
5103   if (encodep)
5104     str = make_unibyte_string (buf, len + coding->produced);
5105   else
5106     {
5107       int chars= (coding->fake_multibyte
5108                   ? multibyte_chars_in_text (buf + from, coding->produced)
5109                   : coding->produced_char);
5110       str = make_multibyte_string (buf, len + chars, len + coding->produced);
5111     }
5112
5113   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5114     coding_restore_composition (coding, str);
5115
5116   coding_free_composition_data (coding);
5117   return str;
5118 }
5119
5120 \f
5121 #ifdef emacs
5122 /*** 8. Emacs Lisp library functions ***/
5123
5124 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
5125   "Return t if OBJECT is nil or a coding-system.\n\
5126 See the documentation of `make-coding-system' for information\n\
5127 about coding-system objects.")
5128   (obj)
5129      Lisp_Object obj;
5130 {
5131   if (NILP (obj))
5132     return Qt;
5133   if (!SYMBOLP (obj))
5134     return Qnil;
5135   /* Get coding-spec vector for OBJ.  */
5136   obj = Fget (obj, Qcoding_system);
5137   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
5138           ? Qt : Qnil);
5139 }
5140
5141 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
5142        Sread_non_nil_coding_system, 1, 1, 0,
5143   "Read a coding system from the minibuffer, prompting with string PROMPT.")
5144   (prompt)
5145      Lisp_Object prompt;
5146 {
5147   Lisp_Object val;
5148   do
5149     {
5150       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5151                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
5152     }
5153   while (XSTRING (val)->size == 0);
5154   return (Fintern (val, Qnil));
5155 }
5156
5157 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
5158   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5159 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5160   (prompt, default_coding_system)
5161      Lisp_Object prompt, default_coding_system;
5162 {
5163   Lisp_Object val;
5164   if (SYMBOLP (default_coding_system))
5165     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
5166   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5167                           Qt, Qnil, Qcoding_system_history,
5168                           default_coding_system, Qnil);
5169   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
5170 }
5171
5172 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
5173        1, 1, 0,
5174   "Check validity of CODING-SYSTEM.\n\
5175 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5176 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
5177 The value of property should be a vector of length 5.")
5178   (coding_system)
5179      Lisp_Object coding_system;
5180 {
5181   CHECK_SYMBOL (coding_system, 0);
5182   if (!NILP (Fcoding_system_p (coding_system)))
5183     return coding_system;
5184   while (1)
5185     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
5186 }
5187 \f
5188 Lisp_Object
5189 detect_coding_system (src, src_bytes, highest)
5190      unsigned char *src;
5191      int src_bytes, highest;
5192 {
5193   int coding_mask, eol_type;
5194   Lisp_Object val, tmp;
5195   int dummy;
5196
5197   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
5198   eol_type  = detect_eol_type (src, src_bytes, &dummy);
5199   if (eol_type == CODING_EOL_INCONSISTENT)
5200     eol_type = CODING_EOL_UNDECIDED;
5201
5202   if (!coding_mask)
5203     {
5204       val = Qundecided;
5205       if (eol_type != CODING_EOL_UNDECIDED)
5206         {
5207           Lisp_Object val2;
5208           val2 = Fget (Qundecided, Qeol_type);
5209           if (VECTORP (val2))
5210             val = XVECTOR (val2)->contents[eol_type];
5211         }
5212       return (highest ? val : Fcons (val, Qnil));
5213     }
5214
5215   /* At first, gather possible coding systems in VAL.  */
5216   val = Qnil;
5217   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCDR (tmp))
5218     {
5219       int idx
5220         = XFASTINT (Fget (XCAR (tmp), Qcoding_category_index));
5221       if (coding_mask & (1 << idx))
5222         {
5223           val = Fcons (Fsymbol_value (XCAR (tmp)), val);
5224           if (highest)
5225             break;
5226         }
5227     }
5228   if (!highest)
5229     val = Fnreverse (val);
5230
5231   /* Then, replace the elements with subsidiary coding systems.  */
5232   for (tmp = val; !NILP (tmp); tmp = XCDR (tmp))
5233     {
5234       if (eol_type != CODING_EOL_UNDECIDED
5235           && eol_type != CODING_EOL_INCONSISTENT)
5236         {
5237           Lisp_Object eol;
5238           eol = Fget (XCAR (tmp), Qeol_type);
5239           if (VECTORP (eol))
5240             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
5241         }
5242     }
5243   return (highest ? XCAR (val) : val);
5244 }
5245
5246 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
5247        2, 3, 0,
5248   "Detect coding system of the text in the region between START and END.\n\
5249 Return a list of possible coding systems ordered by priority.\n\
5250 \n\
5251 If only ASCII characters are found, it returns a list of single element\n\
5252 `undecided' or its subsidiary coding system according to a detected\n\
5253 end-of-line format.\n\
5254 \n\
5255 If optional argument HIGHEST is non-nil, return the coding system of\n\
5256 highest priority.")
5257   (start, end, highest)
5258      Lisp_Object start, end, highest;
5259 {
5260   int from, to;
5261   int from_byte, to_byte;
5262
5263   CHECK_NUMBER_COERCE_MARKER (start, 0);
5264   CHECK_NUMBER_COERCE_MARKER (end, 1);
5265
5266   validate_region (&start, &end);
5267   from = XINT (start), to = XINT (end);
5268   from_byte = CHAR_TO_BYTE (from);
5269   to_byte = CHAR_TO_BYTE (to);
5270
5271   if (from < GPT && to >= GPT)
5272     move_gap_both (to, to_byte);
5273
5274   return detect_coding_system (BYTE_POS_ADDR (from_byte),
5275                                to_byte - from_byte,
5276                                !NILP (highest));
5277 }
5278
5279 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
5280        1, 2, 0,
5281   "Detect coding system of the text in STRING.\n\
5282 Return a list of possible coding systems ordered by priority.\n\
5283 \n\
5284 If only ASCII characters are found, it returns a list of single element\n\
5285 `undecided' or its subsidiary coding system according to a detected\n\
5286 end-of-line format.\n\
5287 \n\
5288 If optional argument HIGHEST is non-nil, return the coding system of\n\
5289 highest priority.")
5290   (string, highest)
5291      Lisp_Object string, highest;
5292 {
5293   CHECK_STRING (string, 0);
5294
5295   return detect_coding_system (XSTRING (string)->data,
5296                                STRING_BYTES (XSTRING (string)),
5297                                !NILP (highest));
5298 }
5299
5300 Lisp_Object
5301 code_convert_region1 (start, end, coding_system, encodep)
5302      Lisp_Object start, end, coding_system;
5303      int encodep;
5304 {
5305   struct coding_system coding;
5306   int from, to, len;
5307
5308   CHECK_NUMBER_COERCE_MARKER (start, 0);
5309   CHECK_NUMBER_COERCE_MARKER (end, 1);
5310   CHECK_SYMBOL (coding_system, 2);
5311
5312   validate_region (&start, &end);
5313   from = XFASTINT (start);
5314   to = XFASTINT (end);
5315
5316   if (NILP (coding_system))
5317     return make_number (to - from);
5318
5319   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5320     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5321
5322   coding.mode |= CODING_MODE_LAST_BLOCK;
5323   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5324                        &coding, encodep, 1);
5325   Vlast_coding_system_used = coding.symbol;
5326   return make_number (coding.produced_char);
5327 }
5328
5329 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
5330        3, 3, "r\nzCoding system: ",
5331   "Decode the current region by specified coding system.\n\
5332 When called from a program, takes three arguments:\n\
5333 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5334 This function sets `last-coding-system-used' to the precise coding system\n\
5335 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5336 not fully specified.)\n\
5337 It returns the length of the decoded text.")
5338   (start, end, coding_system)
5339      Lisp_Object start, end, coding_system;
5340 {
5341   return code_convert_region1 (start, end, coding_system, 0);
5342 }
5343
5344 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
5345        3, 3, "r\nzCoding system: ",
5346   "Encode the current region by specified coding system.\n\
5347 When called from a program, takes three arguments:\n\
5348 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5349 This function sets `last-coding-system-used' to the precise coding system\n\
5350 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5351 not fully specified.)\n\
5352 It returns the length of the encoded text.")
5353   (start, end, coding_system)
5354      Lisp_Object start, end, coding_system;
5355 {
5356   return code_convert_region1 (start, end, coding_system, 1);
5357 }
5358
5359 Lisp_Object
5360 code_convert_string1 (string, coding_system, nocopy, encodep)
5361      Lisp_Object string, coding_system, nocopy;
5362      int encodep;
5363 {
5364   struct coding_system coding;
5365
5366   CHECK_STRING (string, 0);
5367   CHECK_SYMBOL (coding_system, 1);
5368
5369   if (NILP (coding_system))
5370     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
5371
5372   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5373     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5374
5375   coding.mode |= CODING_MODE_LAST_BLOCK;
5376   string = code_convert_string (string, &coding, encodep, !NILP (nocopy));
5377   Vlast_coding_system_used = coding.symbol;
5378
5379   return string;
5380 }
5381
5382 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
5383        2, 3, 0,
5384   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5385 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5386 if the decoding operation is trivial.\n\
5387 This function sets `last-coding-system-used' to the precise coding system\n\
5388 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5389 not fully specified.)")
5390   (string, coding_system, nocopy)
5391      Lisp_Object string, coding_system, nocopy;
5392 {
5393   return code_convert_string1 (string, coding_system, nocopy, 0);
5394 }
5395
5396 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
5397        2, 3, 0,
5398   "Encode STRING to CODING-SYSTEM, and return the result.\n\
5399 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5400 if the encoding operation is trivial.\n\
5401 This function sets `last-coding-system-used' to the precise coding system\n\
5402 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5403 not fully specified.)")
5404   (string, coding_system, nocopy)
5405      Lisp_Object string, coding_system, nocopy;
5406 {
5407   return code_convert_string1 (string, coding_system, nocopy, 1);
5408 }
5409
5410 /* Encode or decode STRING according to CODING_SYSTEM.
5411    Do not set Vlast_coding_system_used.
5412
5413    This function is called only from macros DECODE_FILE and
5414    ENCODE_FILE, thus we ignore character composition.  */
5415
5416 Lisp_Object
5417 code_convert_string_norecord (string, coding_system, encodep)
5418      Lisp_Object string, coding_system;
5419      int encodep;
5420 {
5421   struct coding_system coding;
5422
5423   CHECK_STRING (string, 0);
5424   CHECK_SYMBOL (coding_system, 1);
5425
5426   if (NILP (coding_system))
5427     return string;
5428
5429   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5430     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5431
5432   coding.composing = COMPOSITION_DISABLED;
5433   coding.mode |= CODING_MODE_LAST_BLOCK;
5434   return code_convert_string (string, &coding, encodep, Qt);
5435 }
5436 \f
5437 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
5438   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
5439 Return the corresponding character.")
5440   (code)
5441      Lisp_Object code;
5442 {
5443   unsigned char c1, c2, s1, s2;
5444   Lisp_Object val;
5445
5446   CHECK_NUMBER (code, 0);
5447   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
5448   if (s1 == 0)
5449     {
5450       if (s2 < 0x80)
5451         XSETFASTINT (val, s2);
5452       else if (s2 >= 0xA0 || s2 <= 0xDF)
5453         XSETFASTINT (val,
5454                      MAKE_NON_ASCII_CHAR (charset_katakana_jisx0201, s2, 0));
5455       else
5456         error ("Invalid Shift JIS code: %x", XFASTINT (code));
5457     }
5458   else
5459     {
5460       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
5461           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
5462         error ("Invalid Shift JIS code: %x", XFASTINT (code));
5463       DECODE_SJIS (s1, s2, c1, c2);
5464       XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
5465     }
5466   return val;
5467 }
5468
5469 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
5470   "Encode a Japanese character CHAR to shift_jis encoding.\n\
5471 Return the corresponding code in SJIS.")
5472   (ch)
5473      Lisp_Object ch;
5474 {
5475   int charset, c1, c2, s1, s2;
5476   Lisp_Object val;
5477
5478   CHECK_NUMBER (ch, 0);
5479   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
5480   if (charset == CHARSET_ASCII)
5481     {
5482       val = ch;
5483     }
5484   else if (charset == charset_jisx0208
5485            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
5486     {
5487       ENCODE_SJIS (c1, c2, s1, s2);
5488       XSETFASTINT (val, (s1 << 8) | s2);
5489     }
5490   else if (charset == charset_katakana_jisx0201
5491            && c1 > 0x20 && c2 < 0xE0)
5492     {
5493       XSETFASTINT (val, c1 | 0x80);
5494     }
5495   else
5496     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
5497   return val;
5498 }
5499
5500 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
5501   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
5502 Return the corresponding character.")
5503   (code)
5504      Lisp_Object code;
5505 {
5506   int charset;
5507   unsigned char b1, b2, c1, c2;
5508   Lisp_Object val;
5509
5510   CHECK_NUMBER (code, 0);
5511   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
5512   if (b1 == 0)
5513     {
5514       if (b2 >= 0x80)
5515         error ("Invalid BIG5 code: %x", XFASTINT (code));
5516       val = code;
5517     }
5518   else
5519     {
5520       if ((b1 < 0xA1 || b1 > 0xFE)
5521           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
5522         error ("Invalid BIG5 code: %x", XFASTINT (code));
5523       DECODE_BIG5 (b1, b2, charset, c1, c2);
5524       XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
5525     }
5526   return val;
5527 }
5528
5529 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
5530   "Encode the Big5 character CHAR to BIG5 coding system.\n\
5531 Return the corresponding character code in Big5.")
5532   (ch)
5533      Lisp_Object ch;
5534 {
5535   int charset, c1, c2, b1, b2;
5536   Lisp_Object val;
5537
5538   CHECK_NUMBER (ch, 0);
5539   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
5540   if (charset == CHARSET_ASCII)
5541     {
5542       val = ch;
5543     }
5544   else if ((charset == charset_big5_1
5545             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
5546            || (charset == charset_big5_2
5547                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
5548     {
5549       ENCODE_BIG5 (charset, c1, c2, b1, b2);
5550       XSETFASTINT (val, (b1 << 8) | b2);
5551     }
5552   else
5553     error ("Can't encode to Big5: %d", XFASTINT (ch));
5554   return val;
5555 }
5556 \f
5557 DEFUN ("set-terminal-coding-system-internal",
5558        Fset_terminal_coding_system_internal,
5559        Sset_terminal_coding_system_internal, 1, 1, 0, "")
5560   (coding_system)
5561      Lisp_Object coding_system;
5562 {
5563   CHECK_SYMBOL (coding_system, 0);
5564   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
5565   /* We had better not send unsafe characters to terminal.  */
5566   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
5567   /* Characer composition should be disabled.  */
5568   terminal_coding.composing = COMPOSITION_DISABLED;
5569   return Qnil;
5570 }
5571
5572 DEFUN ("set-safe-terminal-coding-system-internal",
5573        Fset_safe_terminal_coding_system_internal,
5574        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
5575   (coding_system)
5576      Lisp_Object coding_system;
5577 {
5578   CHECK_SYMBOL (coding_system, 0);
5579   setup_coding_system (Fcheck_coding_system (coding_system),
5580                        &safe_terminal_coding);
5581   /* Characer composition should be disabled.  */
5582   safe_terminal_coding.composing = COMPOSITION_DISABLED;
5583   return Qnil;
5584 }
5585
5586 DEFUN ("terminal-coding-system",
5587        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
5588   "Return coding system specified for terminal output.")
5589   ()
5590 {
5591   return terminal_coding.symbol;
5592 }
5593
5594 DEFUN ("set-keyboard-coding-system-internal",
5595        Fset_keyboard_coding_system_internal,
5596        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
5597   (coding_system)
5598      Lisp_Object coding_system;
5599 {
5600   CHECK_SYMBOL (coding_system, 0);
5601   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
5602   /* Characer composition should be disabled.  */
5603   keyboard_coding.composing = COMPOSITION_DISABLED;
5604   return Qnil;
5605 }
5606
5607 DEFUN ("keyboard-coding-system",
5608        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
5609   "Return coding system specified for decoding keyboard input.")
5610   ()
5611 {
5612   return keyboard_coding.symbol;
5613 }
5614
5615 \f
5616 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
5617        Sfind_operation_coding_system,  1, MANY, 0,
5618   "Choose a coding system for an operation based on the target name.\n\
5619 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
5620 DECODING-SYSTEM is the coding system to use for decoding\n\
5621 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
5622 for encoding (in case OPERATION does encoding).\n\
5623 \n\
5624 The first argument OPERATION specifies an I/O primitive:\n\
5625   For file I/O, `insert-file-contents' or `write-region'.\n\
5626   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
5627   For network I/O, `open-network-stream'.\n\
5628 \n\
5629 The remaining arguments should be the same arguments that were passed\n\
5630 to the primitive.  Depending on which primitive, one of those arguments\n\
5631 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
5632 whichever argument specifies the file name is TARGET.\n\
5633 \n\
5634 TARGET has a meaning which depends on OPERATION:\n\
5635   For file I/O, TARGET is a file name.\n\
5636   For process I/O, TARGET is a process name.\n\
5637   For network I/O, TARGET is a service name or a port number\n\
5638 \n\
5639 This function looks up what specified for TARGET in,\n\
5640 `file-coding-system-alist', `process-coding-system-alist',\n\
5641 or `network-coding-system-alist' depending on OPERATION.\n\
5642 They may specify a coding system, a cons of coding systems,\n\
5643 or a function symbol to call.\n\
5644 In the last case, we call the function with one argument,\n\
5645 which is a list of all the arguments given to this function.")
5646   (nargs, args)
5647      int nargs;
5648      Lisp_Object *args;
5649 {
5650   Lisp_Object operation, target_idx, target, val;
5651   register Lisp_Object chain;
5652
5653   if (nargs < 2)
5654     error ("Too few arguments");
5655   operation = args[0];
5656   if (!SYMBOLP (operation)
5657       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
5658     error ("Invalid first arguement");
5659   if (nargs < 1 + XINT (target_idx))
5660     error ("Too few arguments for operation: %s",
5661            XSYMBOL (operation)->name->data);
5662   target = args[XINT (target_idx) + 1];
5663   if (!(STRINGP (target)
5664         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
5665     error ("Invalid %dth argument", XINT (target_idx) + 1);
5666
5667   chain = ((EQ (operation, Qinsert_file_contents)
5668             || EQ (operation, Qwrite_region))
5669            ? Vfile_coding_system_alist
5670            : (EQ (operation, Qopen_network_stream)
5671               ? Vnetwork_coding_system_alist
5672               : Vprocess_coding_system_alist));
5673   if (NILP (chain))
5674     return Qnil;
5675
5676   for (; CONSP (chain); chain = XCDR (chain))
5677     {
5678       Lisp_Object elt;
5679       elt = XCAR (chain);
5680
5681       if (CONSP (elt)
5682           && ((STRINGP (target)
5683                && STRINGP (XCAR (elt))
5684                && fast_string_match (XCAR (elt), target) >= 0)
5685               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
5686         {
5687           val = XCDR (elt);
5688           /* Here, if VAL is both a valid coding system and a valid
5689              function symbol, we return VAL as a coding system.  */
5690           if (CONSP (val))
5691             return val;
5692           if (! SYMBOLP (val))
5693             return Qnil;
5694           if (! NILP (Fcoding_system_p (val)))
5695             return Fcons (val, val);
5696           if (! NILP (Ffboundp (val)))
5697             {
5698               val = call1 (val, Flist (nargs, args));
5699               if (CONSP (val))
5700                 return val;
5701               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
5702                 return Fcons (val, val);
5703             }
5704           return Qnil;
5705         }
5706     }
5707   return Qnil;
5708 }
5709
5710 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
5711        Supdate_coding_systems_internal, 0, 0, 0,
5712   "Update internal database for ISO2022 and CCL based coding systems.\n\
5713 When values of the following coding categories are changed, you must\n\
5714 call this function:\n\
5715   coding-category-iso-7, coding-category-iso-7-tight,\n\
5716   coding-category-iso-8-1, coding-category-iso-8-2,\n\
5717   coding-category-iso-7-else, coding-category-iso-8-else,\n\
5718   coding-category-ccl")
5719   ()
5720 {
5721   int i;
5722
5723   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++)
5724     {
5725       Lisp_Object val;
5726
5727       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
5728       if (!NILP (val))
5729         {
5730           if (! coding_system_table[i])
5731             coding_system_table[i] = ((struct coding_system *)
5732                                       xmalloc (sizeof (struct coding_system)));
5733           setup_coding_system (val, coding_system_table[i]);
5734         }
5735       else if (coding_system_table[i])
5736         {
5737           xfree (coding_system_table[i]);
5738           coding_system_table[i] = NULL;
5739         }
5740     }
5741
5742   return Qnil;
5743 }
5744
5745 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
5746        Sset_coding_priority_internal, 0, 0, 0,
5747   "Update internal database for the current value of `coding-category-list'.\n\
5748 This function is internal use only.")
5749   ()
5750 {
5751   int i = 0, idx;
5752   Lisp_Object val;
5753
5754   val = Vcoding_category_list;
5755
5756   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
5757     {
5758       if (! SYMBOLP (XCAR (val)))
5759         break;
5760       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
5761       if (idx >= CODING_CATEGORY_IDX_MAX)
5762         break;
5763       coding_priorities[i++] = (1 << idx);
5764       val = XCDR (val);
5765     }
5766   /* If coding-category-list is valid and contains all coding
5767      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
5768      the following code saves Emacs from craching.  */
5769   while (i < CODING_CATEGORY_IDX_MAX)
5770     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5771
5772   return Qnil;
5773 }
5774
5775 #endif /* emacs */
5776
5777 \f
5778 /*** 9. Post-amble ***/
5779
5780 void
5781 init_coding ()
5782 {
5783   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5784 }
5785
5786 void
5787 init_coding_once ()
5788 {
5789   int i;
5790
5791   /* Emacs' internal format specific initialize routine.  */
5792   for (i = 0; i <= 0x20; i++)
5793     emacs_code_class[i] = EMACS_control_code;
5794   emacs_code_class[0x0A] = EMACS_linefeed_code;
5795   emacs_code_class[0x0D] = EMACS_carriage_return_code;
5796   for (i = 0x21 ; i < 0x7F; i++)
5797     emacs_code_class[i] = EMACS_ascii_code;
5798   emacs_code_class[0x7F] = EMACS_control_code;
5799   for (i = 0x80; i < 0xFF; i++)
5800     emacs_code_class[i] = EMACS_invalid_code;
5801   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5802   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5803   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5804   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5805
5806   /* ISO2022 specific initialize routine.  */
5807   for (i = 0; i < 0x20; i++)
5808     iso_code_class[i] = ISO_control_code;
5809   for (i = 0x21; i < 0x7F; i++)
5810     iso_code_class[i] = ISO_graphic_plane_0;
5811   for (i = 0x80; i < 0xA0; i++)
5812     iso_code_class[i] = ISO_control_code;
5813   for (i = 0xA1; i < 0xFF; i++)
5814     iso_code_class[i] = ISO_graphic_plane_1;
5815   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5816   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5817   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5818   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5819   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5820   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5821   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5822   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5823   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5824   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5825
5826   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5827
5828   setup_coding_system (Qnil, &keyboard_coding);
5829   setup_coding_system (Qnil, &terminal_coding);
5830   setup_coding_system (Qnil, &safe_terminal_coding);
5831   setup_coding_system (Qnil, &default_buffer_file_coding);
5832
5833   bzero (coding_system_table, sizeof coding_system_table);
5834
5835   bzero (ascii_skip_code, sizeof ascii_skip_code);
5836   for (i = 0; i < 128; i++)
5837     ascii_skip_code[i] = 1;
5838
5839 #if defined (MSDOS) || defined (WINDOWSNT)
5840   system_eol_type = CODING_EOL_CRLF;
5841 #else
5842   system_eol_type = CODING_EOL_LF;
5843 #endif
5844
5845   inhibit_pre_post_conversion = 0;
5846 }
5847
5848 #ifdef emacs
5849
5850 void
5851 syms_of_coding ()
5852 {
5853   Qtarget_idx = intern ("target-idx");
5854   staticpro (&Qtarget_idx);
5855
5856   Qcoding_system_history = intern ("coding-system-history");
5857   staticpro (&Qcoding_system_history);
5858   Fset (Qcoding_system_history, Qnil);
5859
5860   /* Target FILENAME is the first argument.  */
5861   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5862   /* Target FILENAME is the third argument.  */
5863   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5864
5865   Qcall_process = intern ("call-process");
5866   staticpro (&Qcall_process);
5867   /* Target PROGRAM is the first argument.  */
5868   Fput (Qcall_process, Qtarget_idx, make_number (0));
5869
5870   Qcall_process_region = intern ("call-process-region");
5871   staticpro (&Qcall_process_region);
5872   /* Target PROGRAM is the third argument.  */
5873   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5874
5875   Qstart_process = intern ("start-process");
5876   staticpro (&Qstart_process);
5877   /* Target PROGRAM is the third argument.  */
5878   Fput (Qstart_process, Qtarget_idx, make_number (2));
5879
5880   Qopen_network_stream = intern ("open-network-stream");
5881   staticpro (&Qopen_network_stream);
5882   /* Target SERVICE is the fourth argument.  */
5883   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5884
5885   Qcoding_system = intern ("coding-system");
5886   staticpro (&Qcoding_system);
5887
5888   Qeol_type = intern ("eol-type");
5889   staticpro (&Qeol_type);
5890
5891   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5892   staticpro (&Qbuffer_file_coding_system);
5893
5894   Qpost_read_conversion = intern ("post-read-conversion");
5895   staticpro (&Qpost_read_conversion);
5896
5897   Qpre_write_conversion = intern ("pre-write-conversion");
5898   staticpro (&Qpre_write_conversion);
5899
5900   Qno_conversion = intern ("no-conversion");
5901   staticpro (&Qno_conversion);
5902
5903   Qundecided = intern ("undecided");
5904   staticpro (&Qundecided);
5905
5906   Qcoding_system_p = intern ("coding-system-p");
5907   staticpro (&Qcoding_system_p);
5908
5909   Qcoding_system_error = intern ("coding-system-error");
5910   staticpro (&Qcoding_system_error);
5911
5912   Fput (Qcoding_system_error, Qerror_conditions,
5913         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5914   Fput (Qcoding_system_error, Qerror_message,
5915         build_string ("Invalid coding system"));
5916
5917   Qcoding_category = intern ("coding-category");
5918   staticpro (&Qcoding_category);
5919   Qcoding_category_index = intern ("coding-category-index");
5920   staticpro (&Qcoding_category_index);
5921
5922   Vcoding_category_table
5923     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5924   staticpro (&Vcoding_category_table);
5925   {
5926     int i;
5927     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5928       {
5929         XVECTOR (Vcoding_category_table)->contents[i]
5930           = intern (coding_category_name[i]);
5931         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5932               Qcoding_category_index, make_number (i));
5933       }
5934   }
5935
5936   Qtranslation_table = intern ("translation-table");
5937   staticpro (&Qtranslation_table);
5938   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
5939
5940   Qtranslation_table_id = intern ("translation-table-id");
5941   staticpro (&Qtranslation_table_id);
5942
5943   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
5944   staticpro (&Qtranslation_table_for_decode);
5945
5946   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
5947   staticpro (&Qtranslation_table_for_encode);
5948
5949   Qsafe_charsets = intern ("safe-charsets");
5950   staticpro (&Qsafe_charsets);
5951
5952   Qvalid_codes = intern ("valid-codes");
5953   staticpro (&Qvalid_codes);
5954
5955   Qemacs_mule = intern ("emacs-mule");
5956   staticpro (&Qemacs_mule);
5957
5958   Qraw_text = intern ("raw-text");
5959   staticpro (&Qraw_text);
5960
5961   defsubr (&Scoding_system_p);
5962   defsubr (&Sread_coding_system);
5963   defsubr (&Sread_non_nil_coding_system);
5964   defsubr (&Scheck_coding_system);
5965   defsubr (&Sdetect_coding_region);
5966   defsubr (&Sdetect_coding_string);
5967   defsubr (&Sdecode_coding_region);
5968   defsubr (&Sencode_coding_region);
5969   defsubr (&Sdecode_coding_string);
5970   defsubr (&Sencode_coding_string);
5971   defsubr (&Sdecode_sjis_char);
5972   defsubr (&Sencode_sjis_char);
5973   defsubr (&Sdecode_big5_char);
5974   defsubr (&Sencode_big5_char);
5975   defsubr (&Sset_terminal_coding_system_internal);
5976   defsubr (&Sset_safe_terminal_coding_system_internal);
5977   defsubr (&Sterminal_coding_system);
5978   defsubr (&Sset_keyboard_coding_system_internal);
5979   defsubr (&Skeyboard_coding_system);
5980   defsubr (&Sfind_operation_coding_system);
5981   defsubr (&Supdate_coding_systems_internal);
5982   defsubr (&Sset_coding_priority_internal);
5983
5984   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5985     "List of coding systems.\n\
5986 \n\
5987 Do not alter the value of this variable manually.  This variable should be\n\
5988 updated by the functions `make-coding-system' and\n\
5989 `define-coding-system-alias'.");
5990   Vcoding_system_list = Qnil;
5991
5992   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5993     "Alist of coding system names.\n\
5994 Each element is one element list of coding system name.\n\
5995 This variable is given to `completing-read' as TABLE argument.\n\
5996 \n\
5997 Do not alter the value of this variable manually.  This variable should be\n\
5998 updated by the functions `make-coding-system' and\n\
5999 `define-coding-system-alias'.");
6000   Vcoding_system_alist = Qnil;
6001
6002   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
6003     "List of coding-categories (symbols) ordered by priority.");
6004   {
6005     int i;
6006
6007     Vcoding_category_list = Qnil;
6008     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
6009       Vcoding_category_list
6010         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
6011                  Vcoding_category_list);
6012   }
6013
6014   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
6015     "Specify the coding system for read operations.\n\
6016 It is useful to bind this variable with `let', but do not set it globally.\n\
6017 If the value is a coding system, it is used for decoding on read operation.\n\
6018 If not, an appropriate element is used from one of the coding system alists:\n\
6019 There are three such tables, `file-coding-system-alist',\n\
6020 `process-coding-system-alist', and `network-coding-system-alist'.");
6021   Vcoding_system_for_read = Qnil;
6022
6023   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
6024     "Specify the coding system for write operations.\n\
6025 Programs bind this variable with `let', but you should not set it globally.\n\
6026 If the value is a coding system, it is used for encoding of output,\n\
6027 when writing it to a file and when sending it to a file or subprocess.\n\
6028 \n\
6029 If this does not specify a coding system, an appropriate element\n\
6030 is used from one of the coding system alists:\n\
6031 There are three such tables, `file-coding-system-alist',\n\
6032 `process-coding-system-alist', and `network-coding-system-alist'.\n\
6033 For output to files, if the above procedure does not specify a coding system,\n\
6034 the value of `buffer-file-coding-system' is used.");
6035   Vcoding_system_for_write = Qnil;
6036
6037   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
6038     "Coding system used in the latest file or process I/O.");
6039   Vlast_coding_system_used = Qnil;
6040
6041   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
6042     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
6043 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6044 such conversion.");
6045   inhibit_eol_conversion = 0;
6046
6047   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
6048     "Non-nil means process buffer inherits coding system of process output.\n\
6049 Bind it to t if the process output is to be treated as if it were a file\n\
6050 read from some filesystem.");
6051   inherit_process_coding_system = 0;
6052
6053   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
6054     "Alist to decide a coding system to use for a file I/O operation.\n\
6055 The format is ((PATTERN . VAL) ...),\n\
6056 where PATTERN is a regular expression matching a file name,\n\
6057 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6058 If VAL is a coding system, it is used for both decoding and encoding\n\
6059 the file contents.\n\
6060 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6061 and the cdr part is used for encoding.\n\
6062 If VAL is a function symbol, the function must return a coding system\n\
6063 or a cons of coding systems which are used as above.\n\
6064 \n\
6065 See also the function `find-operation-coding-system'\n\
6066 and the variable `auto-coding-alist'.");
6067   Vfile_coding_system_alist = Qnil;
6068
6069   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
6070     "Alist to decide a coding system to use for a process I/O operation.\n\
6071 The format is ((PATTERN . VAL) ...),\n\
6072 where PATTERN is a regular expression matching a program name,\n\
6073 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6074 If VAL is a coding system, it is used for both decoding what received\n\
6075 from the program and encoding what sent to the program.\n\
6076 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6077 and the cdr part is used for encoding.\n\
6078 If VAL is a function symbol, the function must return a coding system\n\
6079 or a cons of coding systems which are used as above.\n\
6080 \n\
6081 See also the function `find-operation-coding-system'.");
6082   Vprocess_coding_system_alist = Qnil;
6083
6084   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
6085     "Alist to decide a coding system to use for a network I/O operation.\n\
6086 The format is ((PATTERN . VAL) ...),\n\
6087 where PATTERN is a regular expression matching a network service name\n\
6088 or is a port number to connect to,\n\
6089 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6090 If VAL is a coding system, it is used for both decoding what received\n\
6091 from the network stream and encoding what sent to the network stream.\n\
6092 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6093 and the cdr part is used for encoding.\n\
6094 If VAL is a function symbol, the function must return a coding system\n\
6095 or a cons of coding systems which are used as above.\n\
6096 \n\
6097 See also the function `find-operation-coding-system'.");
6098   Vnetwork_coding_system_alist = Qnil;
6099
6100   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
6101     "Coding system to use with system messages.");
6102   Vlocale_coding_system = Qnil;
6103
6104   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
6105     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6106   eol_mnemonic_unix = build_string (":");
6107
6108   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
6109     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6110   eol_mnemonic_dos = build_string ("\\");
6111
6112   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
6113     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6114   eol_mnemonic_mac = build_string ("/");
6115
6116   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
6117     "*String displayed in mode line when end-of-line format is not yet determined.");
6118   eol_mnemonic_undecided = build_string (":");
6119
6120   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
6121     "*Non-nil enables character translation while encoding and decoding.");
6122   Venable_character_translation = Qt;
6123
6124   DEFVAR_LISP ("standard-translation-table-for-decode",
6125     &Vstandard_translation_table_for_decode,
6126     "Table for translating characters while decoding.");
6127   Vstandard_translation_table_for_decode = Qnil;
6128
6129   DEFVAR_LISP ("standard-translation-table-for-encode",
6130     &Vstandard_translation_table_for_encode,
6131     "Table for translationg characters while encoding.");
6132   Vstandard_translation_table_for_encode = Qnil;
6133
6134   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
6135     "Alist of charsets vs revision numbers.\n\
6136 While encoding, if a charset (car part of an element) is found,\n\
6137 designate it with the escape sequence identifing revision (cdr part of the element).");
6138   Vcharset_revision_alist = Qnil;
6139
6140   DEFVAR_LISP ("default-process-coding-system",
6141                &Vdefault_process_coding_system,
6142     "Cons of coding systems used for process I/O by default.\n\
6143 The car part is used for decoding a process output,\n\
6144 the cdr part is used for encoding a text to be sent to a process.");
6145   Vdefault_process_coding_system = Qnil;
6146
6147   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
6148     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
6149 This is a vector of length 256.\n\
6150 If Nth element is non-nil, the existence of code N in a file\n\
6151 \(or output of subprocess) doesn't prevent it to be detected as\n\
6152 a coding system of ISO 2022 variant which has a flag\n\
6153 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
6154 or reading output of a subprocess.\n\
6155 Only 128th through 159th elements has a meaning.");
6156   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
6157
6158   DEFVAR_LISP ("select-safe-coding-system-function",
6159                &Vselect_safe_coding_system_function,
6160     "Function to call to select safe coding system for encoding a text.\n\
6161 \n\
6162 If set, this function is called to force a user to select a proper\n\
6163 coding system which can encode the text in the case that a default\n\
6164 coding system used in each operation can't encode the text.\n\
6165 \n\
6166 The default value is `select-safe-coding-system' (which see).");
6167   Vselect_safe_coding_system_function = Qnil;
6168
6169 }
6170
6171 char *
6172 emacs_strerror (error_number)
6173      int error_number;
6174 {
6175   char *str;
6176
6177   synchronize_system_messages_locale ();
6178   str = strerror (error_number);
6179
6180   if (! NILP (Vlocale_coding_system))
6181     {
6182       Lisp_Object dec = code_convert_string_norecord (build_string (str),
6183                                                       Vlocale_coding_system,
6184                                                       0);
6185       str = (char *) XSTRING (dec)->data;
6186     }
6187
6188   return str;
6189 }
6190
6191 #endif /* emacs */