code.delx.au - gnu-emacs/blob - src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995,97,1998,2002,2003  Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001,2002,2003  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   0. General comments
  26   1. Preamble
  27   2. Emacs' internal format (emacs-mule) handlers
  28   3. ISO2022 handlers
  29   4. Shift-JIS and BIG5 handlers
  30   5. CCL handlers
  31   6. End-of-line handlers
  32   7. C library functions
  33   8. Emacs Lisp library functions
  34   9. Post-amble
  35
  36 */
  37
  38 /*** 0. General comments ***/
  39
  40
  41 /*** GENERAL NOTE on CODING SYSTEMS ***
  42
  43   A coding system is an encoding mechanism for one or more character
  44   sets.  Here's a list of coding systems which Emacs can handle.  When
  45   we say "decode", it means converting some other coding system to
  46   Emacs' internal format (emacs-mule), and when we say "encode",
  47   it means converting the coding system emacs-mule to some other
  48   coding system.
  49
  50   0. Emacs' internal format (emacs-mule)
  51
  52   Emacs itself holds a multi-lingual character in buffers and strings
  53   in a special format.  Details are described in section 2.
  54
  55   1. ISO2022
  56
  57   The most famous coding system for multiple character sets.  X's
  58   Compound Text, various EUCs (Extended Unix Code), and coding
  59   systems used in Internet communication such as ISO-2022-JP are
  60   all variants of ISO2022.  Details are described in section 3.
  61
  62   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  63
  64   A coding system to encode character sets: ASCII, JISX0201, and
  65   JISX0208.  Widely used for PC's in Japan.  Details are described in
  66   section 4.
  67
  68   3. BIG5
  69
  70   A coding system to encode the character sets ASCII and Big5.  Widely
  71   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  72   described in section 4.  In this file, when we write "BIG5"
  73   (all uppercase), we mean the coding system, and when we write
  74   "Big5" (capitalized), we mean the character set.
  75
  76   4. Raw text
  77
  78   A coding system for text containing random 8-bit code.  Emacs does
  79   no code conversion on such text except for end-of-line format.
  80
  81   5. Other
  82
  83   If a user wants to read/write text encoded in a coding system not
  84   listed above, he can supply a decoder and an encoder for it as CCL
  85   (Code Conversion Language) programs.  Emacs executes the CCL program
  86   while reading/writing.
  87
  88   Emacs represents a coding system by a Lisp symbol that has a property
  89   `coding-system'.  But, before actually using the coding system, the
  90   information about it is set in a structure of type `struct
  91   coding_system' for rapid processing.  See section 6 for more details.
  92
  93 */
  94
  95 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  96
  97   How end-of-line of text is encoded depends on the operating system.
  98   For instance, Unix's format is just one byte of `line-feed' code,
  99   whereas DOS's format is two-byte sequence of `carriage-return' and
 100   `line-feed' codes.  MacOS's format is usually one byte of
 101   `carriage-return'.
 102
 103   Since text character encoding and end-of-line encoding are
 104   independent, any coding system described above can have any
 105   end-of-line format.  So Emacs has information about end-of-line
 106   format in each coding-system.  See section 6 for more details.
 107
 108 */
 109
 110 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 111
 112   These functions check if a text between SRC and SRC_END is encoded
 113   in the coding system category XXX.  Each returns an integer value in
 114   which appropriate flag bits for the category XXX are set.  The flag
 115   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 116   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 117   of the range 0x80..0x9F are in multibyte form.  */
 118 #if 0
 119 int
 120 detect_coding_emacs_mule (src, src_end, multibytep)
 121      unsigned char *src, *src_end;
 122      int multibytep;
 123 {
 124   ...
 125 }
 126 #endif
 127
 128 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 129
 130   These functions decode SRC_BYTES length of unibyte text at SOURCE
 131   encoded in CODING to Emacs' internal format.  The resulting
 132   multibyte text goes to a place pointed to by DESTINATION, the length
 133   of which should not exceed DST_BYTES.
 134
 135   These functions set the information about original and decoded texts
 136   in the members `produced', `produced_char', `consumed', and
 137   `consumed_char' of the structure *CODING.  They also set the member
 138   `result' to one of CODING_FINISH_XXX indicating how the decoding
 139   finished.
 140
 141   DST_BYTES zero means that the source area and destination area are
 142   overlapped, which means that we can produce a decoded text until it
 143   reaches the head of the not-yet-decoded source text.
 144
 145   Below is a template for these functions.  */
 146 #if 0
 147 static void
 148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 149      struct coding_system *coding;
 150      unsigned char *source, *destination;
 151      int src_bytes, dst_bytes;
 152 {
 153   ...
 154 }
 155 #endif
 156
 157 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 158
 159   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 160   internal multibyte format to CODING.  The resulting unibyte text
 161   goes to a place pointed to by DESTINATION, the length of which
 162   should not exceed DST_BYTES.
 163
 164   These functions set the information about original and encoded texts
 165   in the members `produced', `produced_char', `consumed', and
 166   `consumed_char' of the structure *CODING.  They also set the member
 167   `result' to one of CODING_FINISH_XXX indicating how the encoding
 168   finished.
 169
 170   DST_BYTES zero means that the source area and destination area are
 171   overlapped, which means that we can produce encoded text until it
 172   reaches at the head of the not-yet-encoded source text.
 173
 174   Below is a template for these functions.  */
 175 #if 0
 176 static void
 177 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 178      struct coding_system *coding;
 179      unsigned char *source, *destination;
 180      int src_bytes, dst_bytes;
 181 {
 182   ...
 183 }
 184 #endif
 185
 186 /*** COMMONLY USED MACROS ***/
 187
 188 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 189    get one, two, and three bytes from the source text respectively.
 190    If there are not enough bytes in the source, they jump to
 191    `label_end_of_loop'.  The caller should set variables `coding',
 192    `src' and `src_end' to appropriate pointer in advance.  These
 193    macros are called from decoding routines `decode_coding_XXX', thus
 194    it is assumed that the source text is unibyte.  */
 195
 196 #define ONE_MORE_BYTE(c1)                                       \
 197   do {                                                          \
 198     if (src >= src_end)                                         \
 199       {                                                         \
 200         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 201         goto label_end_of_loop;                                 \
 202       }                                                         \
 203     c1 = *src++;                                                \
 204   } while (0)
 205
 206 #define TWO_MORE_BYTES(c1, c2)                                  \
 207   do {                                                          \
 208     if (src + 1 >= src_end)                                     \
 209       {                                                         \
 210         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 211         goto label_end_of_loop;                                 \
 212       }                                                         \
 213     c1 = *src++;                                                \
 214     c2 = *src++;                                                \
 215   } while (0)
 216
 217
 218 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 219    form if MULTIBYTEP is nonzero.  */
 220
 221 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 222   do {                                                          \
 223     if (src >= src_end)                                         \
 224       {                                                         \
 225         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 226         goto label_end_of_loop;                                 \
 227       }                                                         \
 228     c1 = *src++;                                                \
 229     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 230       c1 = *src++ - 0x20;                                       \
 231   } while (0)
 232
 233 /* Set C to the next character at the source text pointed by `src'.
 234    If there are not enough characters in the source, jump to
 235    `label_end_of_loop'.  The caller should set variables `coding'
 236    `src', `src_end', and `translation_table' to appropriate pointers
 237    in advance.  This macro is used in encoding routines
 238    `encode_coding_XXX', thus it assumes that the source text is in
 239    multibyte form except for 8-bit characters.  8-bit characters are
 240    in multibyte form if coding->src_multibyte is nonzero, else they
 241    are represented by a single byte.  */
 242
 243 #define ONE_MORE_CHAR(c)                                        \
 244   do {                                                          \
 245     int len = src_end - src;                                    \
 246     int bytes;                                                  \
 247     if (len <= 0)                                               \
 248       {                                                         \
 249         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 250         goto label_end_of_loop;                                 \
 251       }                                                         \
 252     if (coding->src_multibyte                                   \
 253         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 254       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 255     else                                                        \
 256       c = *src, bytes = 1;                                      \
 257     if (!NILP (translation_table))                              \
 258       c = translate_char (translation_table, c, -1, 0, 0);      \
 259     src += bytes;                                               \
 260   } while (0)
 261
 262
 263 /* Produce a multibyte form of character C to `dst'.  Jump to
 264    `label_end_of_loop' if there's not enough space at `dst'.
 265
 266    If we are now in the middle of a composition sequence, the decoded
 267    character may be ALTCHAR (for the current composition).  In that
 268    case, the character goes to coding->cmp_data->data instead of
 269    `dst'.
 270
 271    This macro is used in decoding routines.  */
 272
 273 #define EMIT_CHAR(c)                                                    \
 274   do {                                                                  \
 275     if (! COMPOSING_P (coding)                                          \
 276         || coding->composing == COMPOSITION_RELATIVE                    \
 277         || coding->composing == COMPOSITION_WITH_RULE)                  \
 278       {                                                                 \
 279         int bytes = CHAR_BYTES (c);                                     \
 280         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 281           {                                                             \
 282             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 283             goto label_end_of_loop;                                     \
 284           }                                                             \
 285         dst += CHAR_STRING (c, dst);                                    \
 286         coding->produced_char++;                                        \
 287       }                                                                 \
 288                                                                         \
 289     if (COMPOSING_P (coding)                                            \
 290         && coding->composing != COMPOSITION_RELATIVE)                   \
 291       {                                                                 \
 292         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 293         coding->composition_rule_follows                                \
 294           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 295       }                                                                 \
 296   } while (0)
 297
 298
 299 #define EMIT_ONE_BYTE(c)                                        \
 300   do {                                                          \
 301     if (dst >= (dst_bytes ? dst_end : src))                     \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     *dst++ = c;                                                 \
 307   } while (0)
 308
 309 #define EMIT_TWO_BYTES(c1, c2)                                  \
 310   do {                                                          \
 311     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 312       {                                                         \
 313         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 314         goto label_end_of_loop;                                 \
 315       }                                                         \
 316     *dst++ = c1, *dst++ = c2;                                   \
 317   } while (0)
 318
 319 #define EMIT_BYTES(from, to)                                    \
 320   do {                                                          \
 321     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 322       {                                                         \
 323         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 324         goto label_end_of_loop;                                 \
 325       }                                                         \
 326     while (from < to)                                           \
 327       *dst++ = *from++;                                         \
 328   } while (0)
 329
 330 \f
 331 /*** 1. Preamble ***/
 332
 333 #ifdef emacs
 334 #include <config.h>
 335 #endif
 336
 337 #include <stdio.h>
 338
 339 #ifdef emacs
 340
 341 #include "lisp.h"
 342 #include "buffer.h"
 343 #include "charset.h"
 344 #include "composite.h"
 345 #include "ccl.h"
 346 #include "coding.h"
 347 #include "window.h"
 348 #include "intervals.h"
 349 #include "frame.h"
 350 #include "termhooks.h"
 351
 352 #else  /* not emacs */
 353
 354 #include "mulelib.h"
 355
 356 #endif /* not emacs */
 357
 358 Lisp_Object Qcoding_system, Qeol_type;
 359 Lisp_Object Qbuffer_file_coding_system;
 360 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 361 Lisp_Object Qno_conversion, Qundecided;
 362 Lisp_Object Qcoding_system_history;
 363 Lisp_Object Qsafe_chars;
 364 Lisp_Object Qvalid_codes;
 365
 366 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 367 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 368 Lisp_Object Qstart_process, Qopen_network_stream;
 369 Lisp_Object Qtarget_idx;
 370
 371 /* If a symbol has this property, evaluate the value to define the
 372    symbol as a coding system.  */
 373 Lisp_Object Qcoding_system_define_form;
 374
 375 Lisp_Object Vselect_safe_coding_system_function;
 376
 377 int coding_system_require_warning;
 378
 379 /* Mnemonic string for each format of end-of-line.  */
 380 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 381 /* Mnemonic string to indicate format of end-of-line is not yet
 382    decided.  */
 383 Lisp_Object eol_mnemonic_undecided;
 384
 385 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 386    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 387 int system_eol_type;
 388
 389 #ifdef emacs
 390
 391 /* Information about which coding system is safe for which chars.
 392    The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
 393
 394    GENERIC-LIST is a list of generic coding systems which can encode
 395    any characters.
 396
 397    NON-GENERIC-ALIST is an alist of non generic coding systems vs the
 398    corresponding char table that contains safe chars.  */
 399 Lisp_Object Vcoding_system_safe_chars;
 400
 401 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 402
 403 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 404
 405 /* Coding system emacs-mule and raw-text are for converting only
 406    end-of-line format.  */
 407 Lisp_Object Qemacs_mule, Qraw_text;
 408
 409 Lisp_Object Qutf_8;
 410
 411 /* Coding-systems are handed between Emacs Lisp programs and C internal
 412    routines by the following three variables.  */
 413 /* Coding-system for reading files and receiving data from process.  */
 414 Lisp_Object Vcoding_system_for_read;
 415 /* Coding-system for writing files and sending data to process.  */
 416 Lisp_Object Vcoding_system_for_write;
 417 /* Coding-system actually used in the latest I/O.  */
 418 Lisp_Object Vlast_coding_system_used;
 419
 420 /* A vector of length 256 which contains information about special
 421    Latin codes (especially for dealing with Microsoft codes).  */
 422 Lisp_Object Vlatin_extra_code_table;
 423
 424 /* Flag to inhibit code conversion of end-of-line format.  */
 425 int inhibit_eol_conversion;
 426
 427 /* Flag to inhibit ISO2022 escape sequence detection.  */
 428 int inhibit_iso_escape_detection;
 429
 430 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 431 int inherit_process_coding_system;
 432
 433 /* Coding system to be used to encode text for terminal display when
 434    terminal coding system is nil.  */
 435 struct coding_system safe_terminal_coding;
 436
 437 /* Default coding system to be used to write a file.  */
 438 struct coding_system default_buffer_file_coding;
 439
 440 Lisp_Object Vfile_coding_system_alist;
 441 Lisp_Object Vprocess_coding_system_alist;
 442 Lisp_Object Vnetwork_coding_system_alist;
 443
 444 Lisp_Object Vlocale_coding_system;
 445
 446 #endif /* emacs */
 447
 448 Lisp_Object Qcoding_category, Qcoding_category_index;
 449
 450 /* List of symbols `coding-category-xxx' ordered by priority.  */
 451 Lisp_Object Vcoding_category_list;
 452
 453 /* Table of coding categories (Lisp symbols).  */
 454 Lisp_Object Vcoding_category_table;
 455
 456 /* Table of names of symbol for each coding-category.  */
 457 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 458   "coding-category-emacs-mule",
 459   "coding-category-sjis",
 460   "coding-category-iso-7",
 461   "coding-category-iso-7-tight",
 462   "coding-category-iso-8-1",
 463   "coding-category-iso-8-2",
 464   "coding-category-iso-7-else",
 465   "coding-category-iso-8-else",
 466   "coding-category-ccl",
 467   "coding-category-big5",
 468   "coding-category-utf-8",
 469   "coding-category-utf-16-be",
 470   "coding-category-utf-16-le",
 471   "coding-category-raw-text",
 472   "coding-category-binary"
 473 };
 474
 475 /* Table of pointers to coding systems corresponding to each coding
 476    categories.  */
 477 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 478
 479 /* Table of coding category masks.  Nth element is a mask for a coding
 480    category of which priority is Nth.  */
 481 static
 482 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 483
 484 /* Flag to tell if we look up translation table on character code
 485    conversion.  */
 486 Lisp_Object Venable_character_translation;
 487 /* Standard translation table to look up on decoding (reading).  */
 488 Lisp_Object Vstandard_translation_table_for_decode;
 489 /* Standard translation table to look up on encoding (writing).  */
 490 Lisp_Object Vstandard_translation_table_for_encode;
 491
 492 Lisp_Object Qtranslation_table;
 493 Lisp_Object Qtranslation_table_id;
 494 Lisp_Object Qtranslation_table_for_decode;
 495 Lisp_Object Qtranslation_table_for_encode;
 496
 497 /* Alist of charsets vs revision number.  */
 498 Lisp_Object Vcharset_revision_alist;
 499
 500 /* Default coding systems used for process I/O.  */
 501 Lisp_Object Vdefault_process_coding_system;
 502
 503 /* Char table for translating Quail and self-inserting input.  */
 504 Lisp_Object Vtranslation_table_for_input;
 505
 506 /* Global flag to tell that we can't call post-read-conversion and
 507    pre-write-conversion functions.  Usually the value is zero, but it
 508    is set to 1 temporarily while such functions are running.  This is
 509    to avoid infinite recursive call.  */
 510 static int inhibit_pre_post_conversion;
 511
 512 Lisp_Object Qchar_coding_system;
 513
 514 /* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
 515    its validity.  */
 516
 517 Lisp_Object
 518 coding_safe_chars (coding_system)
 519      Lisp_Object coding_system;
 520 {
 521   Lisp_Object coding_spec, plist, safe_chars;
 522
 523   coding_spec = Fget (coding_system, Qcoding_system);
 524   plist = XVECTOR (coding_spec)->contents[3];
 525   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 526   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 527 }
 528
 529 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 530   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 531
 532 \f
 533 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 534
 535 /* Emacs' internal format for representation of multiple character
 536    sets is a kind of multi-byte encoding, i.e. characters are
 537    represented by variable-length sequences of one-byte codes.
 538
 539    ASCII characters and control characters (e.g. `tab', `newline') are
 540    represented by one-byte sequences which are their ASCII codes, in
 541    the range 0x00 through 0x7F.
 542
 543    8-bit characters of the range 0x80..0x9F are represented by
 544    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 545    code + 0x20).
 546
 547    8-bit characters of the range 0xA0..0xFF are represented by
 548    one-byte sequences which are their 8-bit code.
 549
 550    The other characters are represented by a sequence of `base
 551    leading-code', optional `extended leading-code', and one or two
 552    `position-code's.  The length of the sequence is determined by the
 553    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 554    whereas extended leading-code and position-code take the range 0xA0
 555    through 0xFF.  See `charset.h' for more details about leading-code
 556    and position-code.
 557
 558    --- CODE RANGE of Emacs' internal format ---
 559    character set        range
 560    -------------        -----
 561    ascii                0x00..0x7F
 562    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 563    eight-bit-graphic    0xA0..0xBF
 564    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 565    ---------------------------------------------
 566
 567    As this is the internal character representation, the format is
 568    usually not used externally (i.e. in a file or in a data sent to a
 569    process).  But, it is possible to have a text externally in this
 570    format (i.e. by encoding by the coding system `emacs-mule').
 571
 572    In that case, a sequence of one-byte codes has a slightly different
 573    form.
 574
 575    Firstly, all characters in eight-bit-control are represented by
 576    one-byte sequences which are their 8-bit code.
 577
 578    Next, character composition data are represented by the byte
 579    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 580    where,
 581         METHOD is 0xF0 plus one of composition method (enum
 582         composition_method),
 583
 584         BYTES is 0xA0 plus the byte length of these composition data,
 585
 586         CHARS is 0xA0 plus the number of characters composed by these
 587         data,
 588
 589         COMPONENTs are characters of multibyte form or composition
 590         rules encoded by two-byte of ASCII codes.
 591
 592    In addition, for backward compatibility, the following formats are
 593    also recognized as composition data on decoding.
 594
 595    0x80 MSEQ ...
 596    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 597
 598    Here,
 599         MSEQ is a multibyte form but in these special format:
 600           ASCII: 0xA0 ASCII_CODE+0x80,
 601           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 602         RULE is a one byte code of the range 0xA0..0xF0 that
 603         represents a composition rule.
 604   */
 605
 606 enum emacs_code_class_type emacs_code_class[256];
 607
 608 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 609    Check if a text is encoded in Emacs' internal format.  If it is,
 610    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 611
 612 static int
 613 detect_coding_emacs_mule (src, src_end, multibytep)
 614       unsigned char *src, *src_end;
 615       int multibytep;
 616 {
 617   unsigned char c;
 618   int composing = 0;
 619   /* Dummy for ONE_MORE_BYTE.  */
 620   struct coding_system dummy_coding;
 621   struct coding_system *coding = &dummy_coding;
 622
 623   while (1)
 624     {
 625       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 626
 627       if (composing)
 628         {
 629           if (c < 0xA0)
 630             composing = 0;
 631           else if (c == 0xA0)
 632             {
 633               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 634               c &= 0x7F;
 635             }
 636           else
 637             c -= 0x20;
 638         }
 639
 640       if (c < 0x20)
 641         {
 642           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 643             return 0;
 644         }
 645       else if (c >= 0x80 && c < 0xA0)
 646         {
 647           if (c == 0x80)
 648             /* Old leading code for a composite character.  */
 649             composing = 1;
 650           else
 651             {
 652               unsigned char *src_base = src - 1;
 653               int bytes;
 654
 655               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 656                                                bytes))
 657                 return 0;
 658               src = src_base + bytes;
 659             }
 660         }
 661     }
 662  label_end_of_loop:
 663   return CODING_CATEGORY_MASK_EMACS_MULE;
 664 }
 665
 666
 667 /* Record the starting position START and METHOD of one composition.  */
 668
 669 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 670   do {                                                          \
 671     struct composition_data *cmp_data = coding->cmp_data;       \
 672     int *data = cmp_data->data + cmp_data->used;                \
 673     coding->cmp_data_start = cmp_data->used;                    \
 674     data[0] = -1;                                               \
 675     data[1] = cmp_data->char_offset + start;                    \
 676     data[3] = (int) method;                                     \
 677     cmp_data->used += 4;                                        \
 678   } while (0)
 679
 680 /* Record the ending position END of the current composition.  */
 681
 682 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 683   do {                                                          \
 684     struct composition_data *cmp_data = coding->cmp_data;       \
 685     int *data = cmp_data->data + coding->cmp_data_start;        \
 686     data[0] = cmp_data->used - coding->cmp_data_start;          \
 687     data[2] = cmp_data->char_offset + end;                      \
 688   } while (0)
 689
 690 /* Record one COMPONENT (alternate character or composition rule).  */
 691
 692 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)             \
 693   do {                                                                  \
 694     coding->cmp_data->data[coding->cmp_data->used++] = component;       \
 695     if (coding->cmp_data->used - coding->cmp_data_start                 \
 696         == COMPOSITION_DATA_MAX_BUNCH_LENGTH)                           \
 697       {                                                                 \
 698         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
 699         coding->composing = COMPOSITION_NO;                             \
 700       }                                                                 \
 701   } while (0)
 702
 703
 704 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 705    is not less than SRC_END, return -1 without incrementing Src.  */
 706
 707 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 708
 709
 710 /* Decode a character represented as a component of composition
 711    sequence of Emacs 20 style at SRC.  Set C to that character, store
 712    its multibyte form sequence at P, and set P to the end of that
 713    sequence.  If no valid character is found, set C to -1.  */
 714
 715 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 716   do {                                                          \
 717     int bytes;                                                  \
 718                                                                 \
 719     c = SAFE_ONE_MORE_BYTE ();                                  \
 720     if (c < 0)                                                  \
 721       break;                                                    \
 722     if (CHAR_HEAD_P (c))                                        \
 723       c = -1;                                                   \
 724     else if (c == 0xA0)                                         \
 725       {                                                         \
 726         c = SAFE_ONE_MORE_BYTE ();                              \
 727         if (c < 0xA0)                                           \
 728           c = -1;                                               \
 729         else                                                    \
 730           {                                                     \
 731             c -= 0xA0;                                          \
 732             *p++ = c;                                           \
 733           }                                                     \
 734       }                                                         \
 735     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 736       {                                                         \
 737         unsigned char *p0 = p;                                  \
 738                                                                 \
 739         c -= 0x20;                                              \
 740         *p++ = c;                                               \
 741         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 742         while (--bytes)                                         \
 743           {                                                     \
 744             c = SAFE_ONE_MORE_BYTE ();                          \
 745             if (c < 0)                                          \
 746               break;                                            \
 747             *p++ = c;                                           \
 748           }                                                     \
 749         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)      \
 750             || (coding->flags /* We are recovering a file.  */  \
 751                 && p0[0] == LEADING_CODE_8_BIT_CONTROL          \
 752                 && ! CHAR_HEAD_P (p0[1])))                      \
 753           c = STRING_CHAR (p0, bytes);                          \
 754         else                                                    \
 755           c = -1;                                               \
 756       }                                                         \
 757     else                                                        \
 758       c = -1;                                                   \
 759   } while (0)
 760
 761
 762 /* Decode a composition rule represented as a component of composition
 763    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 764    valid rule is found, set C to -1.  */
 765
 766 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 767   do {                                                  \
 768     c = SAFE_ONE_MORE_BYTE ();                          \
 769     c -= 0xA0;                                          \
 770     if (c < 0 || c >= 81)                               \
 771       c = -1;                                           \
 772     else                                                \
 773       {                                                 \
 774         gref = c / 9, nref = c % 9;                     \
 775         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 776       }                                                 \
 777   } while (0)
 778
 779
 780 /* Decode composition sequence encoded by `emacs-mule' at the source
 781    pointed by SRC.  SRC_END is the end of source.  Store information
 782    of the composition in CODING->cmp_data.
 783
 784    For backward compatibility, decode also a composition sequence of
 785    Emacs 20 style.  In that case, the composition sequence contains
 786    characters that should be extracted into a buffer or string.  Store
 787    those characters at *DESTINATION in multibyte form.
 788
 789    If we encounter an invalid byte sequence, return 0.
 790    If we encounter an insufficient source or destination, or
 791    insufficient space in CODING->cmp_data, return 1.
 792    Otherwise, return consumed bytes in the source.
 793
 794 */
 795 static INLINE int
 796 decode_composition_emacs_mule (coding, src, src_end,
 797                                destination, dst_end, dst_bytes)
 798      struct coding_system *coding;
 799      unsigned char *src, *src_end, **destination, *dst_end;
 800      int dst_bytes;
 801 {
 802   unsigned char *dst = *destination;
 803   int method, data_len, nchars;
 804   unsigned char *src_base = src++;
 805   /* Store components of composition.  */
 806   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 807   int ncomponent;
 808   /* Store multibyte form of characters to be composed.  This is for
 809      Emacs 20 style composition sequence.  */
 810   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 811   unsigned char *bufp = buf;
 812   int c, i, gref, nref;
 813
 814   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 815       >= COMPOSITION_DATA_SIZE)
 816     {
 817       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 818       return -1;
 819     }
 820
 821   ONE_MORE_BYTE (c);
 822   if (c - 0xF0 >= COMPOSITION_RELATIVE
 823            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 824     {
 825       int with_rule;
 826
 827       method = c - 0xF0;
 828       with_rule = (method == COMPOSITION_WITH_RULE
 829                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 830       ONE_MORE_BYTE (c);
 831       data_len = c - 0xA0;
 832       if (data_len < 4
 833           || src_base + data_len > src_end)
 834         return 0;
 835       ONE_MORE_BYTE (c);
 836       nchars = c - 0xA0;
 837       if (c < 1)
 838         return 0;
 839       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 840         {
 841           /* If it is longer than this, it can't be valid.  */
 842           if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
 843             return 0;
 844
 845           if (ncomponent % 2 && with_rule)
 846             {
 847               ONE_MORE_BYTE (gref);
 848               gref -= 32;
 849               ONE_MORE_BYTE (nref);
 850               nref -= 32;
 851               c = COMPOSITION_ENCODE_RULE (gref, nref);
 852             }
 853           else
 854             {
 855               int bytes;
 856               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
 857                   || (coding->flags /* We are recovering a file.  */
 858                       && src[0] == LEADING_CODE_8_BIT_CONTROL
 859                       && ! CHAR_HEAD_P (src[1])))
 860                 c = STRING_CHAR (src, bytes);
 861               else
 862                 c = *src, bytes = 1;
 863               src += bytes;
 864             }
 865           component[ncomponent] = c;
 866         }
 867     }
 868   else
 869     {
 870       /* This may be an old Emacs 20 style format.  See the comment at
 871          the section 2 of this file.  */
 872       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 873       if (src == src_end
 874           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 875         goto label_end_of_loop;
 876
 877       src_end = src;
 878       src = src_base + 1;
 879       if (c < 0xC0)
 880         {
 881           method = COMPOSITION_RELATIVE;
 882           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 883             {
 884               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 885               if (c < 0)
 886                 break;
 887               component[ncomponent++] = c;
 888             }
 889           if (ncomponent < 2)
 890             return 0;
 891           nchars = ncomponent;
 892         }
 893       else if (c == 0xFF)
 894         {
 895           method = COMPOSITION_WITH_RULE;
 896           src++;
 897           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 898           if (c < 0)
 899             return 0;
 900           component[0] = c;
 901           for (ncomponent = 1;
 902                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 903             {
 904               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 905               if (c < 0)
 906                 break;
 907               component[ncomponent++] = c;
 908               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 909               if (c < 0)
 910                 break;
 911               component[ncomponent++] = c;
 912             }
 913           if (ncomponent < 3)
 914             return 0;
 915           nchars = (ncomponent + 1) / 2;
 916         }
 917       else
 918         return 0;
 919     }
 920
 921   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 922     {
 923       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 924       for (i = 0; i < ncomponent; i++)
 925         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 926       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 927       if (buf < bufp)
 928         {
 929           unsigned char *p = buf;
 930           EMIT_BYTES (p, bufp);
 931           *destination += bufp - buf;
 932           coding->produced_char += nchars;
 933         }
 934       return (src - src_base);
 935     }
 936  label_end_of_loop:
 937   return -1;
 938 }
 939
 940 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 941
 942 static void
 943 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 944      struct coding_system *coding;
 945      unsigned char *source, *destination;
 946      int src_bytes, dst_bytes;
 947 {
 948   unsigned char *src = source;
 949   unsigned char *src_end = source + src_bytes;
 950   unsigned char *dst = destination;
 951   unsigned char *dst_end = destination + dst_bytes;
 952   /* SRC_BASE remembers the start position in source in each loop.
 953      The loop will be exited when there's not enough source code, or
 954      when there's not enough destination area to produce a
 955      character.  */
 956   unsigned char *src_base;
 957
 958   coding->produced_char = 0;
 959   while ((src_base = src) < src_end)
 960     {
 961       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 962       int bytes;
 963
 964       if (*src == '\r')
 965         {
 966           int c = *src++;
 967
 968           if (coding->eol_type == CODING_EOL_CR)
 969             c = '\n';
 970           else if (coding->eol_type == CODING_EOL_CRLF)
 971             {
 972               ONE_MORE_BYTE (c);
 973               if (c != '\n')
 974                 {
 975                   src--;
 976                   c = '\r';
 977                 }
 978             }
 979           *dst++ = c;
 980           coding->produced_char++;
 981           continue;
 982         }
 983       else if (*src == '\n')
 984         {
 985           if ((coding->eol_type == CODING_EOL_CR
 986                || coding->eol_type == CODING_EOL_CRLF)
 987               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 988             {
 989               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 990               goto label_end_of_loop;
 991             }
 992           *dst++ = *src++;
 993           coding->produced_char++;
 994           continue;
 995         }
 996       else if (*src == 0x80 && coding->cmp_data)
 997         {
 998           /* Start of composition data.  */
 999           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
1000                                                          &dst, dst_end,
1001                                                          dst_bytes);
1002           if (consumed < 0)
1003             goto label_end_of_loop;
1004           else if (consumed > 0)
1005             {
1006               src += consumed;
1007               continue;
1008             }
1009           bytes = CHAR_STRING (*src, tmp);
1010           p = tmp;
1011           src++;
1012         }
1013       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
1014                || (coding->flags /* We are recovering a file.  */
1015                    && src[0] == LEADING_CODE_8_BIT_CONTROL
1016                    && ! CHAR_HEAD_P (src[1])))
1017         {
1018           p = src;
1019           src += bytes;
1020         }
1021       else
1022         {
1023           int i, c;
1024
1025           bytes = BYTES_BY_CHAR_HEAD (*src);
1026           src++;
1027           for (i = 1; i < bytes; i++)
1028             {
1029               ONE_MORE_BYTE (c);
1030               if (CHAR_HEAD_P (c))
1031                 break;
1032             }
1033           if (i < bytes)
1034             {
1035               bytes = CHAR_STRING (*src_base, tmp);
1036               p = tmp;
1037               src = src_base + 1;
1038             }
1039           else
1040             {
1041               p = src_base;
1042             }
1043         }
1044       if (dst + bytes >= (dst_bytes ? dst_end : src))
1045         {
1046           coding->result = CODING_FINISH_INSUFFICIENT_DST;
1047           break;
1048         }
1049       while (bytes--) *dst++ = *p++;
1050       coding->produced_char++;
1051     }
1052  label_end_of_loop:
1053   coding->consumed = coding->consumed_char = src_base - source;
1054   coding->produced = dst - destination;
1055 }
1056
1057
1058 /* Encode composition data stored at DATA into a special byte sequence
1059    starting by 0x80.  Update CODING->cmp_data_start and maybe
1060    CODING->cmp_data for the next call.  */
1061
1062 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1063   do {                                                                  \
1064     unsigned char buf[1024], *p0 = buf, *p;                             \
1065     int len = data[0];                                                  \
1066     int i;                                                              \
1067                                                                         \
1068     buf[0] = 0x80;                                                      \
1069     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1070     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1071     p = buf + 4;                                                        \
1072     if (data[3] == COMPOSITION_WITH_RULE                                \
1073         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1074       {                                                                 \
1075         p += CHAR_STRING (data[4], p);                                  \
1076         for (i = 5; i < len; i += 2)                                    \
1077           {                                                             \
1078             int gref, nref;                                             \
1079              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1080             *p++ = 0x20 + gref;                                         \
1081             *p++ = 0x20 + nref;                                         \
1082             p += CHAR_STRING (data[i + 1], p);                          \
1083           }                                                             \
1084       }                                                                 \
1085     else                                                                \
1086       {                                                                 \
1087         for (i = 4; i < len; i++)                                       \
1088           p += CHAR_STRING (data[i], p);                                \
1089       }                                                                 \
1090     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1091                                                                         \
1092     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1093       {                                                                 \
1094         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1095         goto label_end_of_loop;                                         \
1096       }                                                                 \
1097     while (p0 < p)                                                      \
1098       *dst++ = *p0++;                                                   \
1099     coding->cmp_data_start += data[0];                                  \
1100     if (coding->cmp_data_start == coding->cmp_data->used                \
1101         && coding->cmp_data->next)                                      \
1102       {                                                                 \
1103         coding->cmp_data = coding->cmp_data->next;                      \
1104         coding->cmp_data_start = 0;                                     \
1105       }                                                                 \
1106   } while (0)
1107
1108
1109 static void encode_eol P_ ((struct coding_system *, const unsigned char *,
1110                             unsigned char *, int, int));
1111
1112 static void
1113 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1114      struct coding_system *coding;
1115      unsigned char *source, *destination;
1116      int src_bytes, dst_bytes;
1117 {
1118   unsigned char *src = source;
1119   unsigned char *src_end = source + src_bytes;
1120   unsigned char *dst = destination;
1121   unsigned char *dst_end = destination + dst_bytes;
1122   unsigned char *src_base;
1123   int c;
1124   int char_offset;
1125   int *data;
1126
1127   Lisp_Object translation_table;
1128
1129   translation_table = Qnil;
1130
1131   /* Optimization for the case that there's no composition.  */
1132   if (!coding->cmp_data || coding->cmp_data->used == 0)
1133     {
1134       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1135       return;
1136     }
1137
1138   char_offset = coding->cmp_data->char_offset;
1139   data = coding->cmp_data->data + coding->cmp_data_start;
1140   while (1)
1141     {
1142       src_base = src;
1143
1144       /* If SRC starts a composition, encode the information about the
1145          composition in advance.  */
1146       if (coding->cmp_data_start < coding->cmp_data->used
1147           && char_offset + coding->consumed_char == data[1])
1148         {
1149           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1150           char_offset = coding->cmp_data->char_offset;
1151           data = coding->cmp_data->data + coding->cmp_data_start;
1152         }
1153
1154       ONE_MORE_CHAR (c);
1155       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1156                         || coding->eol_type == CODING_EOL_CR))
1157         {
1158           if (coding->eol_type == CODING_EOL_CRLF)
1159             EMIT_TWO_BYTES ('\r', c);
1160           else
1161             EMIT_ONE_BYTE ('\r');
1162         }
1163       else if (SINGLE_BYTE_CHAR_P (c))
1164         {
1165           if (coding->flags && ! ASCII_BYTE_P (c))
1166             {
1167               /* As we are auto saving, retain the multibyte form for
1168                  8-bit chars.  */
1169               unsigned char buf[MAX_MULTIBYTE_LENGTH];
1170               int bytes = CHAR_STRING (c, buf);
1171
1172               if (bytes == 1)
1173                 EMIT_ONE_BYTE (buf[0]);
1174               else
1175                 EMIT_TWO_BYTES (buf[0], buf[1]);
1176             }
1177           else
1178             EMIT_ONE_BYTE (c);
1179         }
1180       else
1181         EMIT_BYTES (src_base, src);
1182       coding->consumed_char++;
1183     }
1184  label_end_of_loop:
1185   coding->consumed = src_base - source;
1186   coding->produced = coding->produced_char = dst - destination;
1187   return;
1188 }
1189
1190 \f
1191 /*** 3. ISO2022 handlers ***/
1192
1193 /* The following note describes the coding system ISO2022 briefly.
1194    Since the intention of this note is to help understand the
1195    functions in this file, some parts are NOT ACCURATE or are OVERLY
1196    SIMPLIFIED.  For thorough understanding, please refer to the
1197    original document of ISO2022.  This is equivalent to the standard
1198    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1199
1200    ISO2022 provides many mechanisms to encode several character sets
1201    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1202    is encoded using bytes less than 128.  This may make the encoded
1203    text a little bit longer, but the text passes more easily through
1204    several types of gateway, some of which strip off the MSB (Most
1205    Significant Bit).
1206
1207    There are two kinds of character sets: control character sets and
1208    graphic character sets.  The former contain control characters such
1209    as `newline' and `escape' to provide control functions (control
1210    functions are also provided by escape sequences).  The latter
1211    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1212    two control character sets and many graphic character sets.
1213
1214    Graphic character sets are classified into one of the following
1215    four classes, according to the number of bytes (DIMENSION) and
1216    number of characters in one dimension (CHARS) of the set:
1217    - DIMENSION1_CHARS94
1218    - DIMENSION1_CHARS96
1219    - DIMENSION2_CHARS94
1220    - DIMENSION2_CHARS96
1221
1222    In addition, each character set is assigned an identification tag,
1223    unique for each set, called the "final character" (denoted as <F>
1224    hereafter).  The <F> of each character set is decided by ECMA(*)
1225    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1226    (0x30..0x3F are for private use only).
1227
1228    Note (*): ECMA = European Computer Manufacturers Association
1229
1230    Here are examples of graphic character sets [NAME(<F>)]:
1231         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1232         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1233         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1234         o DIMENSION2_CHARS96 -- none for the moment
1235
1236    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1237         C0 [0x00..0x1F] -- control character plane 0
1238         GL [0x20..0x7F] -- graphic character plane 0
1239         C1 [0x80..0x9F] -- control character plane 1
1240         GR [0xA0..0xFF] -- graphic character plane 1
1241
1242    A control character set is directly designated and invoked to C0 or
1243    C1 by an escape sequence.  The most common case is that:
1244    - ISO646's  control character set is designated/invoked to C0, and
1245    - ISO6429's control character set is designated/invoked to C1,
1246    and usually these designations/invocations are omitted in encoded
1247    text.  In a 7-bit environment, only C0 can be used, and a control
1248    character for C1 is encoded by an appropriate escape sequence to
1249    fit into the environment.  All control characters for C1 are
1250    defined to have corresponding escape sequences.
1251
1252    A graphic character set is at first designated to one of four
1253    graphic registers (G0 through G3), then these graphic registers are
1254    invoked to GL or GR.  These designations and invocations can be
1255    done independently.  The most common case is that G0 is invoked to
1256    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1257    these invocations and designations are omitted in encoded text.
1258    In a 7-bit environment, only GL can be used.
1259
1260    When a graphic character set of CHARS94 is invoked to GL, codes
1261    0x20 and 0x7F of the GL area work as control characters SPACE and
1262    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1263    be used.
1264
1265    There are two ways of invocation: locking-shift and single-shift.
1266    With locking-shift, the invocation lasts until the next different
1267    invocation, whereas with single-shift, the invocation affects the
1268    following character only and doesn't affect the locking-shift
1269    state.  Invocations are done by the following control characters or
1270    escape sequences:
1271
1272    ----------------------------------------------------------------------
1273    abbrev  function                  cntrl escape seq   description
1274    ----------------------------------------------------------------------
1275    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1276    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1277    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1278    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1279    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1280    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1281    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1282    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1283    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1284    ----------------------------------------------------------------------
1285    (*) These are not used by any known coding system.
1286
1287    Control characters for these functions are defined by macros
1288    ISO_CODE_XXX in `coding.h'.
1289
1290    Designations are done by the following escape sequences:
1291    ----------------------------------------------------------------------
1292    escape sequence      description
1293    ----------------------------------------------------------------------
1294    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1295    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1296    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1297    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1298    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1299    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1300    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1301    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1302    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1303    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1304    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1305    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1306    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1307    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1308    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1309    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1310    ----------------------------------------------------------------------
1311
1312    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1313    of dimension 1, chars 94, and final character <F>, etc...
1314
1315    Note (*): Although these designations are not allowed in ISO2022,
1316    Emacs accepts them on decoding, and produces them on encoding
1317    CHARS96 character sets in a coding system which is characterized as
1318    7-bit environment, non-locking-shift, and non-single-shift.
1319
1320    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1321    '(' can be omitted.  We refer to this as "short-form" hereafter.
1322
1323    Now you may notice that there are a lot of ways of encoding the
1324    same multilingual text in ISO2022.  Actually, there exist many
1325    coding systems such as Compound Text (used in X11's inter client
1326    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1327    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1328    localized platforms), and all of these are variants of ISO2022.
1329
1330    In addition to the above, Emacs handles two more kinds of escape
1331    sequences: ISO6429's direction specification and Emacs' private
1332    sequence for specifying character composition.
1333
1334    ISO6429's direction specification takes the following form:
1335         o CSI ']'      -- end of the current direction
1336         o CSI '0' ']'  -- end of the current direction
1337         o CSI '1' ']'  -- start of left-to-right text
1338         o CSI '2' ']'  -- start of right-to-left text
1339    The control character CSI (0x9B: control sequence introducer) is
1340    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1341
1342    Character composition specification takes the following form:
1343         o ESC '0' -- start relative composition
1344         o ESC '1' -- end composition
1345         o ESC '2' -- start rule-base composition (*)
1346         o ESC '3' -- start relative composition with alternate chars  (**)
1347         o ESC '4' -- start rule-base composition with alternate chars  (**)
1348   Since these are not standard escape sequences of any ISO standard,
1349   the use of them with these meanings is restricted to Emacs only.
1350
1351   (*) This form is used only in Emacs 20.5 and older versions,
1352   but the newer versions can safely decode it.
1353   (**) This form is used only in Emacs 21.1 and newer versions,
1354   and the older versions can't decode it.
1355
1356   Here's a list of example usages of these composition escape
1357   sequences (categorized by `enum composition_method').
1358
1359   COMPOSITION_RELATIVE:
1360         ESC 0 CHAR [ CHAR ] ESC 1
1361   COMPOSITION_WITH_RULE:
1362         ESC 2 CHAR [ RULE CHAR ] ESC 1
1363   COMPOSITION_WITH_ALTCHARS:
1364         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1365   COMPOSITION_WITH_RULE_ALTCHARS:
1366         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1367
1368 enum iso_code_class_type iso_code_class[256];
1369
1370 #define CHARSET_OK(idx, charset, c)                                     \
1371   (coding_system_table[idx]                                             \
1372    && (charset == CHARSET_ASCII                                         \
1373        || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
1374            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1375    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1376                                               charset)                  \
1377        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1378
1379 #define SHIFT_OUT_OK(idx) \
1380   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1381
1382 #define COMPOSITION_OK(idx)     \
1383   (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1384
1385 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1386    Check if a text is encoded in ISO2022.  If it is, return an
1387    integer in which appropriate flag bits any of:
1388         CODING_CATEGORY_MASK_ISO_7
1389         CODING_CATEGORY_MASK_ISO_7_TIGHT
1390         CODING_CATEGORY_MASK_ISO_8_1
1391         CODING_CATEGORY_MASK_ISO_8_2
1392         CODING_CATEGORY_MASK_ISO_7_ELSE
1393         CODING_CATEGORY_MASK_ISO_8_ELSE
1394    are set.  If a code which should never appear in ISO2022 is found,
1395    returns 0.  */
1396
1397 static int
1398 detect_coding_iso2022 (src, src_end, multibytep)
1399      unsigned char *src, *src_end;
1400      int multibytep;
1401 {
1402   int mask = CODING_CATEGORY_MASK_ISO;
1403   int mask_found = 0;
1404   int reg[4], shift_out = 0, single_shifting = 0;
1405   int c, c1, charset;
1406   /* Dummy for ONE_MORE_BYTE.  */
1407   struct coding_system dummy_coding;
1408   struct coding_system *coding = &dummy_coding;
1409   Lisp_Object safe_chars;
1410
1411   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1412   while (mask && src < src_end)
1413     {
1414       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1415     retry:
1416       switch (c)
1417         {
1418         case ISO_CODE_ESC:
1419           if (inhibit_iso_escape_detection)
1420             break;
1421           single_shifting = 0;
1422           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1423           if (c >= '(' && c <= '/')
1424             {
1425               /* Designation sequence for a charset of dimension 1.  */
1426               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1427               if (c1 < ' ' || c1 >= 0x80
1428                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1429                 /* Invalid designation sequence.  Just ignore.  */
1430                 break;
1431               reg[(c - '(') % 4] = charset;
1432             }
1433           else if (c == '$')
1434             {
1435               /* Designation sequence for a charset of dimension 2.  */
1436               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1437               if (c >= '@' && c <= 'B')
1438                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1439                 reg[0] = charset = iso_charset_table[1][0][c];
1440               else if (c >= '(' && c <= '/')
1441                 {
1442                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1443                   if (c1 < ' ' || c1 >= 0x80
1444                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1445                     /* Invalid designation sequence.  Just ignore.  */
1446                     break;
1447                   reg[(c - '(') % 4] = charset;
1448                 }
1449               else
1450                 /* Invalid designation sequence.  Just ignore.  */
1451                 break;
1452             }
1453           else if (c == 'N' || c == 'O')
1454             {
1455               /* ESC <Fe> for SS2 or SS3.  */
1456               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1457               break;
1458             }
1459           else if (c >= '0' && c <= '4')
1460             {
1461               /* ESC <Fp> for start/end composition.  */
1462               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1463                 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1464               else
1465                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1466               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1467                 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1468               else
1469                 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1470               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1471                 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1472               else
1473                 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1474               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1475                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1476               else
1477                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1478               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1479                 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1480               else
1481                 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1482               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1483                 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1484               else
1485                 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1486               break;
1487             }
1488           else
1489             /* Invalid escape sequence.  Just ignore.  */
1490             break;
1491
1492           /* We found a valid designation sequence for CHARSET.  */
1493           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1494           c = MAKE_CHAR (charset, 0, 0);
1495           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1496             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1497           else
1498             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1499           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1500             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1501           else
1502             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1503           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1504             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1505           else
1506             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1507           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1508             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1509           else
1510             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1511           break;
1512
1513         case ISO_CODE_SO:
1514           if (inhibit_iso_escape_detection)
1515             break;
1516           single_shifting = 0;
1517           if (shift_out == 0
1518               && (reg[1] >= 0
1519                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1520                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1521             {
1522               /* Locking shift out.  */
1523               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1524               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1525             }
1526           break;
1527
1528         case ISO_CODE_SI:
1529           if (inhibit_iso_escape_detection)
1530             break;
1531           single_shifting = 0;
1532           if (shift_out == 1)
1533             {
1534               /* Locking shift in.  */
1535               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1536               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1537             }
1538           break;
1539
1540         case ISO_CODE_CSI:
1541           single_shifting = 0;
1542         case ISO_CODE_SS2:
1543         case ISO_CODE_SS3:
1544           {
1545             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1546
1547             if (inhibit_iso_escape_detection)
1548               break;
1549             if (c != ISO_CODE_CSI)
1550               {
1551                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1552                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1553                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1554                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1555                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1556                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1557                 single_shifting = 1;
1558               }
1559             if (VECTORP (Vlatin_extra_code_table)
1560                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1561               {
1562                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1563                     & CODING_FLAG_ISO_LATIN_EXTRA)
1564                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1565                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1566                     & CODING_FLAG_ISO_LATIN_EXTRA)
1567                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1568               }
1569             mask &= newmask;
1570             mask_found |= newmask;
1571           }
1572           break;
1573
1574         default:
1575           if (c < 0x80)
1576             {
1577               single_shifting = 0;
1578               break;
1579             }
1580           else if (c < 0xA0)
1581             {
1582               single_shifting = 0;
1583               if (VECTORP (Vlatin_extra_code_table)
1584                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1585                 {
1586                   int newmask = 0;
1587
1588                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1589                       & CODING_FLAG_ISO_LATIN_EXTRA)
1590                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1591                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1592                       & CODING_FLAG_ISO_LATIN_EXTRA)
1593                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1594                   mask &= newmask;
1595                   mask_found |= newmask;
1596                 }
1597               else
1598                 return 0;
1599             }
1600           else
1601             {
1602               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1603                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1604               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1605               /* Check the length of succeeding codes of the range
1606                  0xA0..0FF.  If the byte length is odd, we exclude
1607                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1608                  when we are not single shifting.  */
1609               if (!single_shifting
1610                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1611                 {
1612                   int i = 1;
1613
1614                   c = -1;
1615                   while (src < src_end)
1616                     {
1617                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1618                       if (c < 0xA0)
1619                         break;
1620                       i++;
1621                     }
1622
1623                   if (i & 1 && src < src_end)
1624                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1625                   else
1626                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1627                   if (c >= 0)
1628                     /* This means that we have read one extra byte.  */
1629                     goto retry;
1630                 }
1631             }
1632           break;
1633         }
1634     }
1635  label_end_of_loop:
1636   return (mask & mask_found);
1637 }
1638
1639 /* Decode a character of which charset is CHARSET, the 1st position
1640    code is C1, the 2nd position code is C2, and return the decoded
1641    character code.  If the variable `translation_table' is non-nil,
1642    returned the translated code.  */
1643
1644 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1645   (NILP (translation_table)                     \
1646    ? MAKE_CHAR (charset, c1, c2)                \
1647    : translate_char (translation_table, -1, charset, c1, c2))
1648
1649 /* Set designation state into CODING.  */
1650 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1651   do {                                                                     \
1652     int charset, c;                                                        \
1653                                                                            \
1654     if (final_char < '0' || final_char >= 128)                             \
1655       goto label_invalid_code;                                             \
1656     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1657                                  make_number (chars),                      \
1658                                  make_number (final_char));                \
1659     c = MAKE_CHAR (charset, 0, 0);                                         \
1660     if (charset >= 0                                                       \
1661         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1662             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1663       {                                                                    \
1664         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1665             && reg == 0                                                    \
1666             && charset == CHARSET_ASCII)                                   \
1667           {                                                                \
1668             /* We should insert this designation sequence as is so         \
1669                that it is surely written back to a file.  */               \
1670             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1671             goto label_invalid_code;                                       \
1672           }                                                                \
1673         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1674         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1675             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1676           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1677         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1678       }                                                                    \
1679     else                                                                   \
1680       {                                                                    \
1681         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1682         goto label_invalid_code;                                           \
1683       }                                                                    \
1684   } while (0)
1685
1686 /* Allocate a memory block for storing information about compositions.
1687    The block is chained to the already allocated blocks.  */
1688
1689 void
1690 coding_allocate_composition_data (coding, char_offset)
1691      struct coding_system *coding;
1692      int char_offset;
1693 {
1694   struct composition_data *cmp_data
1695     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1696
1697   cmp_data->char_offset = char_offset;
1698   cmp_data->used = 0;
1699   cmp_data->prev = coding->cmp_data;
1700   cmp_data->next = NULL;
1701   if (coding->cmp_data)
1702     coding->cmp_data->next = cmp_data;
1703   coding->cmp_data = cmp_data;
1704   coding->cmp_data_start = 0;
1705   coding->composing = COMPOSITION_NO;
1706 }
1707
1708 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1709    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1710    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1711    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1712    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1713   */
1714
1715 #define DECODE_COMPOSITION_START(c1)                                       \
1716   do {                                                                     \
1717     if (coding->composing == COMPOSITION_DISABLED)                         \
1718       {                                                                    \
1719         *dst++ = ISO_CODE_ESC;                                             \
1720         *dst++ = c1 & 0x7f;                                                \
1721         coding->produced_char += 2;                                        \
1722       }                                                                    \
1723     else if (!COMPOSING_P (coding))                                        \
1724       {                                                                    \
1725         /* This is surely the start of a composition.  We must be sure     \
1726            that coding->cmp_data has enough space to store the             \
1727            information about the composition.  If not, terminate the       \
1728            current decoding loop, allocate one more memory block for       \
1729            coding->cmp_data in the caller, then start the decoding         \
1730            loop again.  We can't allocate memory here directly because     \
1731            it may cause buffer/string relocation.  */                      \
1732         if (!coding->cmp_data                                              \
1733             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1734                 >= COMPOSITION_DATA_SIZE))                                 \
1735           {                                                                \
1736             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1737             goto label_end_of_loop;                                        \
1738           }                                                                \
1739         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1740                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1741                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1742                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1743         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1744                                       coding->composing);                  \
1745         coding->composition_rule_follows = 0;                              \
1746       }                                                                    \
1747     else                                                                   \
1748       {                                                                    \
1749         /* We are already handling a composition.  If the method is        \
1750            the following two, the codes following the current escape       \
1751            sequence are actual characters stored in a buffer.  */          \
1752         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1753             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1754           {                                                                \
1755             coding->composing = COMPOSITION_RELATIVE;                      \
1756             coding->composition_rule_follows = 0;                          \
1757           }                                                                \
1758       }                                                                    \
1759   } while (0)
1760
1761 /* Handle composition end sequence ESC 1.  */
1762
1763 #define DECODE_COMPOSITION_END(c1)                                      \
1764   do {                                                                  \
1765     if (! COMPOSING_P (coding))                                         \
1766       {                                                                 \
1767         *dst++ = ISO_CODE_ESC;                                          \
1768         *dst++ = c1;                                                    \
1769         coding->produced_char += 2;                                     \
1770       }                                                                 \
1771     else                                                                \
1772       {                                                                 \
1773         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1774         coding->composing = COMPOSITION_NO;                             \
1775       }                                                                 \
1776   } while (0)
1777
1778 /* Decode a composition rule from the byte C1 (and maybe one more byte
1779    from SRC) and store one encoded composition rule in
1780    coding->cmp_data.  */
1781
1782 #define DECODE_COMPOSITION_RULE(c1)                                     \
1783   do {                                                                  \
1784     int rule = 0;                                                       \
1785     (c1) -= 32;                                                         \
1786     if (c1 < 81)                /* old format (before ver.21) */        \
1787       {                                                                 \
1788         int gref = (c1) / 9;                                            \
1789         int nref = (c1) % 9;                                            \
1790         if (gref == 4) gref = 10;                                       \
1791         if (nref == 4) nref = 10;                                       \
1792         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1793       }                                                                 \
1794     else if (c1 < 93)           /* new format (after ver.21) */         \
1795       {                                                                 \
1796         ONE_MORE_BYTE (c2);                                             \
1797         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1798       }                                                                 \
1799     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1800     coding->composition_rule_follows = 0;                               \
1801   } while (0)
1802
1803
1804 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1805
1806 static void
1807 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1808      struct coding_system *coding;
1809      unsigned char *source, *destination;
1810      int src_bytes, dst_bytes;
1811 {
1812   unsigned char *src = source;
1813   unsigned char *src_end = source + src_bytes;
1814   unsigned char *dst = destination;
1815   unsigned char *dst_end = destination + dst_bytes;
1816   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1817   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1818   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1819   /* SRC_BASE remembers the start position in source in each loop.
1820      The loop will be exited when there's not enough source code
1821      (within macro ONE_MORE_BYTE), or when there's not enough
1822      destination area to produce a character (within macro
1823      EMIT_CHAR).  */
1824   unsigned char *src_base;
1825   int c, charset;
1826   Lisp_Object translation_table;
1827   Lisp_Object safe_chars;
1828
1829   safe_chars = coding_safe_chars (coding->symbol);
1830
1831   if (NILP (Venable_character_translation))
1832     translation_table = Qnil;
1833   else
1834     {
1835       translation_table = coding->translation_table_for_decode;
1836       if (NILP (translation_table))
1837         translation_table = Vstandard_translation_table_for_decode;
1838     }
1839
1840   coding->result = CODING_FINISH_NORMAL;
1841
1842   while (1)
1843     {
1844       int c1, c2 = 0;
1845
1846       src_base = src;
1847       ONE_MORE_BYTE (c1);
1848
1849       /* We produce no character or one character.  */
1850       switch (iso_code_class [c1])
1851         {
1852         case ISO_0x20_or_0x7F:
1853           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1854             {
1855               DECODE_COMPOSITION_RULE (c1);
1856               continue;
1857             }
1858           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1859             {
1860               /* This is SPACE or DEL.  */
1861               charset = CHARSET_ASCII;
1862               break;
1863             }
1864           /* This is a graphic character, we fall down ...  */
1865
1866         case ISO_graphic_plane_0:
1867           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1868             {
1869               DECODE_COMPOSITION_RULE (c1);
1870               continue;
1871             }
1872           charset = charset0;
1873           break;
1874
1875         case ISO_0xA0_or_0xFF:
1876           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1877               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1878             goto label_invalid_code;
1879           /* This is a graphic character, we fall down ... */
1880
1881         case ISO_graphic_plane_1:
1882           if (charset1 < 0)
1883             goto label_invalid_code;
1884           charset = charset1;
1885           break;
1886
1887         case ISO_control_0:
1888           if (COMPOSING_P (coding))
1889             DECODE_COMPOSITION_END ('1');
1890
1891           /* All ISO2022 control characters in this class have the
1892              same representation in Emacs internal format.  */
1893           if (c1 == '\n'
1894               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1895               && (coding->eol_type == CODING_EOL_CR
1896                   || coding->eol_type == CODING_EOL_CRLF))
1897             {
1898               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1899               goto label_end_of_loop;
1900             }
1901           charset = CHARSET_ASCII;
1902           break;
1903
1904         case ISO_control_1:
1905           if (COMPOSING_P (coding))
1906             DECODE_COMPOSITION_END ('1');
1907           goto label_invalid_code;
1908
1909         case ISO_carriage_return:
1910           if (COMPOSING_P (coding))
1911             DECODE_COMPOSITION_END ('1');
1912
1913           if (coding->eol_type == CODING_EOL_CR)
1914             c1 = '\n';
1915           else if (coding->eol_type == CODING_EOL_CRLF)
1916             {
1917               ONE_MORE_BYTE (c1);
1918               if (c1 != ISO_CODE_LF)
1919                 {
1920                   src--;
1921                   c1 = '\r';
1922                 }
1923             }
1924           charset = CHARSET_ASCII;
1925           break;
1926
1927         case ISO_shift_out:
1928           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1929               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1930             goto label_invalid_code;
1931           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1932           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1933           continue;
1934
1935         case ISO_shift_in:
1936           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1937             goto label_invalid_code;
1938           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1939           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1940           continue;
1941
1942         case ISO_single_shift_2_7:
1943         case ISO_single_shift_2:
1944           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1945             goto label_invalid_code;
1946           /* SS2 is handled as an escape sequence of ESC 'N' */
1947           c1 = 'N';
1948           goto label_escape_sequence;
1949
1950         case ISO_single_shift_3:
1951           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1952             goto label_invalid_code;
1953           /* SS2 is handled as an escape sequence of ESC 'O' */
1954           c1 = 'O';
1955           goto label_escape_sequence;
1956
1957         case ISO_control_sequence_introducer:
1958           /* CSI is handled as an escape sequence of ESC '[' ...  */
1959           c1 = '[';
1960           goto label_escape_sequence;
1961
1962         case ISO_escape:
1963           ONE_MORE_BYTE (c1);
1964         label_escape_sequence:
1965           /* Escape sequences handled by Emacs are invocation,
1966              designation, direction specification, and character
1967              composition specification.  */
1968           switch (c1)
1969             {
1970             case '&':           /* revision of following character set */
1971               ONE_MORE_BYTE (c1);
1972               if (!(c1 >= '@' && c1 <= '~'))
1973                 goto label_invalid_code;
1974               ONE_MORE_BYTE (c1);
1975               if (c1 != ISO_CODE_ESC)
1976                 goto label_invalid_code;
1977               ONE_MORE_BYTE (c1);
1978               goto label_escape_sequence;
1979
1980             case '$':           /* designation of 2-byte character set */
1981               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1982                 goto label_invalid_code;
1983               ONE_MORE_BYTE (c1);
1984               if (c1 >= '@' && c1 <= 'B')
1985                 {       /* designation of JISX0208.1978, GB2312.1980,
1986                            or JISX0208.1980 */
1987                   DECODE_DESIGNATION (0, 2, 94, c1);
1988                 }
1989               else if (c1 >= 0x28 && c1 <= 0x2B)
1990                 {       /* designation of DIMENSION2_CHARS94 character set */
1991                   ONE_MORE_BYTE (c2);
1992                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1993                 }
1994               else if (c1 >= 0x2C && c1 <= 0x2F)
1995                 {       /* designation of DIMENSION2_CHARS96 character set */
1996                   ONE_MORE_BYTE (c2);
1997                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1998                 }
1999               else
2000                 goto label_invalid_code;
2001               /* We must update these variables now.  */
2002               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2003               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2004               continue;
2005
2006             case 'n':           /* invocation of locking-shift-2 */
2007               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
2008                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
2009                 goto label_invalid_code;
2010               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
2011               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2012               continue;
2013
2014             case 'o':           /* invocation of locking-shift-3 */
2015               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
2016                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2017                 goto label_invalid_code;
2018               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
2019               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2020               continue;
2021
2022             case 'N':           /* invocation of single-shift-2 */
2023               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2024                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
2025                 goto label_invalid_code;
2026               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
2027               ONE_MORE_BYTE (c1);
2028               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2029                 goto label_invalid_code;
2030               break;
2031
2032             case 'O':           /* invocation of single-shift-3 */
2033               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2034                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2035                 goto label_invalid_code;
2036               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
2037               ONE_MORE_BYTE (c1);
2038               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2039                 goto label_invalid_code;
2040               break;
2041
2042             case '0': case '2': case '3': case '4': /* start composition */
2043               DECODE_COMPOSITION_START (c1);
2044               continue;
2045
2046             case '1':           /* end composition */
2047               DECODE_COMPOSITION_END (c1);
2048               continue;
2049
2050             case '[':           /* specification of direction */
2051               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
2052                 goto label_invalid_code;
2053               /* For the moment, nested direction is not supported.
2054                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
2055                  left-to-right, and nonzero means right-to-left.  */
2056               ONE_MORE_BYTE (c1);
2057               switch (c1)
2058                 {
2059                 case ']':       /* end of the current direction */
2060                   coding->mode &= ~CODING_MODE_DIRECTION;
2061
2062                 case '0':       /* end of the current direction */
2063                 case '1':       /* start of left-to-right direction */
2064                   ONE_MORE_BYTE (c1);
2065                   if (c1 == ']')
2066                     coding->mode &= ~CODING_MODE_DIRECTION;
2067                   else
2068                     goto label_invalid_code;
2069                   break;
2070
2071                 case '2':       /* start of right-to-left direction */
2072                   ONE_MORE_BYTE (c1);
2073                   if (c1 == ']')
2074                     coding->mode |= CODING_MODE_DIRECTION;
2075                   else
2076                     goto label_invalid_code;
2077                   break;
2078
2079                 default:
2080                   goto label_invalid_code;
2081                 }
2082               continue;
2083
2084             case '%':
2085               if (COMPOSING_P (coding))
2086                 DECODE_COMPOSITION_END ('1');
2087               ONE_MORE_BYTE (c1);
2088               if (c1 == '/')
2089                 {
2090                   /* CTEXT extended segment:
2091                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
2092                      We keep these bytes as is for the moment.
2093                      They may be decoded by post-read-conversion.  */
2094                   int dim, M, L;
2095                   int size, required;
2096                   int produced_chars;
2097
2098                   ONE_MORE_BYTE (dim);
2099                   ONE_MORE_BYTE (M);
2100                   ONE_MORE_BYTE (L);
2101                   size = ((M - 128) * 128) + (L - 128);
2102                   required = 8 + size * 2;
2103                   if (dst + required > (dst_bytes ? dst_end : src))
2104                     goto label_end_of_loop;
2105                   *dst++ = ISO_CODE_ESC;
2106                   *dst++ = '%';
2107                   *dst++ = '/';
2108                   *dst++ = dim;
2109                   produced_chars = 4;
2110                   dst += CHAR_STRING (M, dst), produced_chars++;
2111                   dst += CHAR_STRING (L, dst), produced_chars++;
2112                   while (size-- > 0)
2113                     {
2114                       ONE_MORE_BYTE (c1);
2115                       dst += CHAR_STRING (c1, dst), produced_chars++;
2116                     }
2117                   coding->produced_char += produced_chars;
2118                 }
2119               else if (c1 == 'G')
2120                 {
2121                   unsigned char *d = dst;
2122                   int produced_chars;
2123
2124                   /* XFree86 extension for embedding UTF-8 in CTEXT:
2125                      ESC % G --UTF-8-BYTES-- ESC % @
2126                      We keep these bytes as is for the moment.
2127                      They may be decoded by post-read-conversion.  */
2128                   if (d + 6 > (dst_bytes ? dst_end : src))
2129                     goto label_end_of_loop;
2130                   *d++ = ISO_CODE_ESC;
2131                   *d++ = '%';
2132                   *d++ = 'G';
2133                   produced_chars = 3;
2134                   while (d + 1 < (dst_bytes ? dst_end : src))
2135                     {
2136                       ONE_MORE_BYTE (c1);
2137                       if (c1 == ISO_CODE_ESC
2138                           && src + 1 < src_end
2139                           && src[0] == '%'
2140                           && src[1] == '@')
2141                         {
2142                           src += 2;
2143                           break;
2144                         }
2145                       d += CHAR_STRING (c1, d), produced_chars++;
2146                     }
2147                   if (d + 3 > (dst_bytes ? dst_end : src))
2148                     goto label_end_of_loop;
2149                   *d++ = ISO_CODE_ESC;
2150                   *d++ = '%';
2151                   *d++ = '@';
2152                   dst = d;
2153                   coding->produced_char += produced_chars + 3;
2154                 }
2155               else
2156                 goto label_invalid_code;
2157               continue;
2158
2159             default:
2160               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2161                 goto label_invalid_code;
2162               if (c1 >= 0x28 && c1 <= 0x2B)
2163                 {       /* designation of DIMENSION1_CHARS94 character set */
2164                   ONE_MORE_BYTE (c2);
2165                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
2166                 }
2167               else if (c1 >= 0x2C && c1 <= 0x2F)
2168                 {       /* designation of DIMENSION1_CHARS96 character set */
2169                   ONE_MORE_BYTE (c2);
2170                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2171                 }
2172               else
2173                 goto label_invalid_code;
2174               /* We must update these variables now.  */
2175               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2176               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2177               continue;
2178             }
2179         }
2180
2181       /* Now we know CHARSET and 1st position code C1 of a character.
2182          Produce a multibyte sequence for that character while getting
2183          2nd position code C2 if necessary.  */
2184       if (CHARSET_DIMENSION (charset) == 2)
2185         {
2186           ONE_MORE_BYTE (c2);
2187           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2188             /* C2 is not in a valid range.  */
2189             goto label_invalid_code;
2190         }
2191       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2192       EMIT_CHAR (c);
2193       continue;
2194
2195     label_invalid_code:
2196       coding->errors++;
2197       if (COMPOSING_P (coding))
2198         DECODE_COMPOSITION_END ('1');
2199       src = src_base;
2200       c = *src++;
2201       EMIT_CHAR (c);
2202     }
2203
2204  label_end_of_loop:
2205   coding->consumed = coding->consumed_char = src_base - source;
2206   coding->produced = dst - destination;
2207   return;
2208 }
2209
2210
2211 /* ISO2022 encoding stuff.  */
2212
2213 /*
2214    It is not enough to say just "ISO2022" on encoding, we have to
2215    specify more details.  In Emacs, each ISO2022 coding system
2216    variant has the following specifications:
2217         1. Initial designation to G0 through G3.
2218         2. Allows short-form designation?
2219         3. ASCII should be designated to G0 before control characters?
2220         4. ASCII should be designated to G0 at end of line?
2221         5. 7-bit environment or 8-bit environment?
2222         6. Use locking-shift?
2223         7. Use Single-shift?
2224    And the following two are only for Japanese:
2225         8. Use ASCII in place of JIS0201-1976-Roman?
2226         9. Use JISX0208-1983 in place of JISX0208-1978?
2227    These specifications are encoded in `coding->flags' as flag bits
2228    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2229    details.
2230 */
2231
2232 /* Produce codes (escape sequence) for designating CHARSET to graphic
2233    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2234    '@', 'A', or 'B' and the coding system CODING allows, produce
2235    designation sequence of short-form.  */
2236
2237 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2238   do {                                                                  \
2239     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2240     char *intermediate_char_94 = "()*+";                                \
2241     char *intermediate_char_96 = ",-./";                                \
2242     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2243                                                                         \
2244     if (revision < 255)                                                 \
2245       {                                                                 \
2246         *dst++ = ISO_CODE_ESC;                                          \
2247         *dst++ = '&';                                                   \
2248         *dst++ = '@' + revision;                                        \
2249       }                                                                 \
2250     *dst++ = ISO_CODE_ESC;                                              \
2251     if (CHARSET_DIMENSION (charset) == 1)                               \
2252       {                                                                 \
2253         if (CHARSET_CHARS (charset) == 94)                              \
2254           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2255         else                                                            \
2256           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2257       }                                                                 \
2258     else                                                                \
2259       {                                                                 \
2260         *dst++ = '$';                                                   \
2261         if (CHARSET_CHARS (charset) == 94)                              \
2262           {                                                             \
2263             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2264                 || reg != 0                                             \
2265                 || final_char < '@' || final_char > 'B')                \
2266               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2267           }                                                             \
2268         else                                                            \
2269           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2270       }                                                                 \
2271     *dst++ = final_char;                                                \
2272     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2273   } while (0)
2274
2275 /* The following two macros produce codes (control character or escape
2276    sequence) for ISO2022 single-shift functions (single-shift-2 and
2277    single-shift-3).  */
2278
2279 #define ENCODE_SINGLE_SHIFT_2                           \
2280   do {                                                  \
2281     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2282       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2283     else                                                \
2284       *dst++ = ISO_CODE_SS2;                            \
2285     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2286   } while (0)
2287
2288 #define ENCODE_SINGLE_SHIFT_3                           \
2289   do {                                                  \
2290     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2291       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2292     else                                                \
2293       *dst++ = ISO_CODE_SS3;                            \
2294     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2295   } while (0)
2296
2297 /* The following four macros produce codes (control character or
2298    escape sequence) for ISO2022 locking-shift functions (shift-in,
2299    shift-out, locking-shift-2, and locking-shift-3).  */
2300
2301 #define ENCODE_SHIFT_IN                         \
2302   do {                                          \
2303     *dst++ = ISO_CODE_SI;                       \
2304     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2305   } while (0)
2306
2307 #define ENCODE_SHIFT_OUT                        \
2308   do {                                          \
2309     *dst++ = ISO_CODE_SO;                       \
2310     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2311   } while (0)
2312
2313 #define ENCODE_LOCKING_SHIFT_2                  \
2314   do {                                          \
2315     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2316     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2317   } while (0)
2318
2319 #define ENCODE_LOCKING_SHIFT_3                  \
2320   do {                                          \
2321     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2322     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2323   } while (0)
2324
2325 /* Produce codes for a DIMENSION1 character whose character set is
2326    CHARSET and whose position-code is C1.  Designation and invocation
2327    sequences are also produced in advance if necessary.  */
2328
2329 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2330   do {                                                                  \
2331     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2332       {                                                                 \
2333         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2334           *dst++ = c1 & 0x7F;                                           \
2335         else                                                            \
2336           *dst++ = c1 | 0x80;                                           \
2337         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2338         break;                                                          \
2339       }                                                                 \
2340     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2341       {                                                                 \
2342         *dst++ = c1 & 0x7F;                                             \
2343         break;                                                          \
2344       }                                                                 \
2345     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2346       {                                                                 \
2347         *dst++ = c1 | 0x80;                                             \
2348         break;                                                          \
2349       }                                                                 \
2350     else                                                                \
2351       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2352          must invoke it, or, at first, designate it to some graphic     \
2353          register.  Then repeat the loop to actually produce the        \
2354          character.  */                                                 \
2355       dst = encode_invocation_designation (charset, coding, dst);       \
2356   } while (1)
2357
2358 /* Produce codes for a DIMENSION2 character whose character set is
2359    CHARSET and whose position-codes are C1 and C2.  Designation and
2360    invocation codes are also produced in advance if necessary.  */
2361
2362 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2363   do {                                                                  \
2364     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2365       {                                                                 \
2366         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2367           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2368         else                                                            \
2369           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2370         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2371         break;                                                          \
2372       }                                                                 \
2373     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2374       {                                                                 \
2375         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2376         break;                                                          \
2377       }                                                                 \
2378     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2379       {                                                                 \
2380         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2381         break;                                                          \
2382       }                                                                 \
2383     else                                                                \
2384       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2385          must invoke it, or, at first, designate it to some graphic     \
2386          register.  Then repeat the loop to actually produce the        \
2387          character.  */                                                 \
2388       dst = encode_invocation_designation (charset, coding, dst);       \
2389   } while (1)
2390
2391 #define ENCODE_ISO_CHARACTER(c)                                 \
2392   do {                                                          \
2393     int charset, c1, c2;                                        \
2394                                                                 \
2395     SPLIT_CHAR (c, charset, c1, c2);                            \
2396     if (CHARSET_DEFINED_P (charset))                            \
2397       {                                                         \
2398         if (CHARSET_DIMENSION (charset) == 1)                   \
2399           {                                                     \
2400             if (charset == CHARSET_ASCII                        \
2401                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2402               charset = charset_latin_jisx0201;                 \
2403             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2404           }                                                     \
2405         else                                                    \
2406           {                                                     \
2407             if (charset == charset_jisx0208                     \
2408                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2409               charset = charset_jisx0208_1978;                  \
2410             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2411           }                                                     \
2412       }                                                         \
2413     else                                                        \
2414       {                                                         \
2415         *dst++ = c1;                                            \
2416         if (c2 >= 0)                                            \
2417           *dst++ = c2;                                          \
2418       }                                                         \
2419   } while (0)
2420
2421
2422 /* Instead of encoding character C, produce one or two `?'s.  */
2423
2424 #define ENCODE_UNSAFE_CHARACTER(c)                              \
2425   do {                                                          \
2426     ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);        \
2427     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                   \
2428       ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);      \
2429   } while (0)
2430
2431
2432 /* Produce designation and invocation codes at a place pointed by DST
2433    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2434    Return new DST.  */
2435
2436 unsigned char *
2437 encode_invocation_designation (charset, coding, dst)
2438      int charset;
2439      struct coding_system *coding;
2440      unsigned char *dst;
2441 {
2442   int reg;                      /* graphic register number */
2443
2444   /* At first, check designations.  */
2445   for (reg = 0; reg < 4; reg++)
2446     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2447       break;
2448
2449   if (reg >= 4)
2450     {
2451       /* CHARSET is not yet designated to any graphic registers.  */
2452       /* At first check the requested designation.  */
2453       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2454       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2455         /* Since CHARSET requests no special designation, designate it
2456            to graphic register 0.  */
2457         reg = 0;
2458
2459       ENCODE_DESIGNATION (charset, reg, coding);
2460     }
2461
2462   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2463       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2464     {
2465       /* Since the graphic register REG is not invoked to any graphic
2466          planes, invoke it to graphic plane 0.  */
2467       switch (reg)
2468         {
2469         case 0:                 /* graphic register 0 */
2470           ENCODE_SHIFT_IN;
2471           break;
2472
2473         case 1:                 /* graphic register 1 */
2474           ENCODE_SHIFT_OUT;
2475           break;
2476
2477         case 2:                 /* graphic register 2 */
2478           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2479             ENCODE_SINGLE_SHIFT_2;
2480           else
2481             ENCODE_LOCKING_SHIFT_2;
2482           break;
2483
2484         case 3:                 /* graphic register 3 */
2485           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2486             ENCODE_SINGLE_SHIFT_3;
2487           else
2488             ENCODE_LOCKING_SHIFT_3;
2489           break;
2490         }
2491     }
2492
2493   return dst;
2494 }
2495
2496 /* Produce 2-byte codes for encoded composition rule RULE.  */
2497
2498 #define ENCODE_COMPOSITION_RULE(rule)           \
2499   do {                                          \
2500     int gref, nref;                             \
2501     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2502     *dst++ = 32 + 81 + gref;                    \
2503     *dst++ = 32 + nref;                         \
2504   } while (0)
2505
2506 /* Produce codes for indicating the start of a composition sequence
2507    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2508    which specify information about the composition.  See the comment
2509    in coding.h for the format of DATA.  */
2510
2511 #define ENCODE_COMPOSITION_START(coding, data)                          \
2512   do {                                                                  \
2513     coding->composing = data[3];                                        \
2514     *dst++ = ISO_CODE_ESC;                                              \
2515     if (coding->composing == COMPOSITION_RELATIVE)                      \
2516       *dst++ = '0';                                                     \
2517     else                                                                \
2518       {                                                                 \
2519         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2520                   ? '3' : '4');                                         \
2521         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2522         coding->composition_rule_follows = 0;                           \
2523       }                                                                 \
2524   } while (0)
2525
2526 /* Produce codes for indicating the end of the current composition.  */
2527
2528 #define ENCODE_COMPOSITION_END(coding, data)                    \
2529   do {                                                          \
2530     *dst++ = ISO_CODE_ESC;                                      \
2531     *dst++ = '1';                                               \
2532     coding->cmp_data_start += data[0];                          \
2533     coding->composing = COMPOSITION_NO;                         \
2534     if (coding->cmp_data_start == coding->cmp_data->used        \
2535         && coding->cmp_data->next)                              \
2536       {                                                         \
2537         coding->cmp_data = coding->cmp_data->next;              \
2538         coding->cmp_data_start = 0;                             \
2539       }                                                         \
2540   } while (0)
2541
2542 /* Produce composition start sequence ESC 0.  Here, this sequence
2543    doesn't mean the start of a new composition but means that we have
2544    just produced components (alternate chars and composition rules) of
2545    the composition and the actual text follows in SRC.  */
2546
2547 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2548   do {                                          \
2549     *dst++ = ISO_CODE_ESC;                      \
2550     *dst++ = '0';                               \
2551     coding->composing = COMPOSITION_RELATIVE;   \
2552   } while (0)
2553
2554 /* The following three macros produce codes for indicating direction
2555    of text.  */
2556 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2557   do {                                                  \
2558     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2559       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2560     else                                                \
2561       *dst++ = ISO_CODE_CSI;                            \
2562   } while (0)
2563
2564 #define ENCODE_DIRECTION_R2L    \
2565   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2566
2567 #define ENCODE_DIRECTION_L2R    \
2568   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2569
2570 /* Produce codes for designation and invocation to reset the graphic
2571    planes and registers to initial state.  */
2572 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2573   do {                                                                      \
2574     int reg;                                                                \
2575     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2576       ENCODE_SHIFT_IN;                                                      \
2577     for (reg = 0; reg < 4; reg++)                                           \
2578       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2579           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2580               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2581         ENCODE_DESIGNATION                                                  \
2582           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2583   } while (0)
2584
2585 /* Produce designation sequences of charsets in the line started from
2586    SRC to a place pointed by DST, and return updated DST.
2587
2588    If the current block ends before any end-of-line, we may fail to
2589    find all the necessary designations.  */
2590
2591 static unsigned char *
2592 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2593      struct coding_system *coding;
2594      Lisp_Object translation_table;
2595      unsigned char *src, *src_end, *dst;
2596 {
2597   int charset, c, found = 0, reg;
2598   /* Table of charsets to be designated to each graphic register.  */
2599   int r[4];
2600
2601   for (reg = 0; reg < 4; reg++)
2602     r[reg] = -1;
2603
2604   while (found < 4)
2605     {
2606       ONE_MORE_CHAR (c);
2607       if (c == '\n')
2608         break;
2609
2610       charset = CHAR_CHARSET (c);
2611       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2612       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2613         {
2614           found++;
2615           r[reg] = charset;
2616         }
2617     }
2618
2619  label_end_of_loop:
2620   if (found)
2621     {
2622       for (reg = 0; reg < 4; reg++)
2623         if (r[reg] >= 0
2624             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2625           ENCODE_DESIGNATION (r[reg], reg, coding);
2626     }
2627
2628   return dst;
2629 }
2630
2631 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2632
2633 static void
2634 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2635      struct coding_system *coding;
2636      unsigned char *source, *destination;
2637      int src_bytes, dst_bytes;
2638 {
2639   unsigned char *src = source;
2640   unsigned char *src_end = source + src_bytes;
2641   unsigned char *dst = destination;
2642   unsigned char *dst_end = destination + dst_bytes;
2643   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2644      from DST_END to assure overflow checking is necessary only at the
2645      head of loop.  */
2646   unsigned char *adjusted_dst_end = dst_end - 19;
2647   /* SRC_BASE remembers the start position in source in each loop.
2648      The loop will be exited when there's not enough source text to
2649      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2650      there's not enough destination area to produce encoded codes
2651      (within macro EMIT_BYTES).  */
2652   unsigned char *src_base;
2653   int c;
2654   Lisp_Object translation_table;
2655   Lisp_Object safe_chars;
2656
2657   if (coding->flags & CODING_FLAG_ISO_SAFE)
2658     coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
2659
2660   safe_chars = coding_safe_chars (coding->symbol);
2661
2662   if (NILP (Venable_character_translation))
2663     translation_table = Qnil;
2664   else
2665     {
2666       translation_table = coding->translation_table_for_encode;
2667       if (NILP (translation_table))
2668         translation_table = Vstandard_translation_table_for_encode;
2669     }
2670
2671   coding->consumed_char = 0;
2672   coding->errors = 0;
2673   while (1)
2674     {
2675       src_base = src;
2676
2677       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2678         {
2679           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2680           break;
2681         }
2682
2683       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2684           && CODING_SPEC_ISO_BOL (coding))
2685         {
2686           /* We have to produce designation sequences if any now.  */
2687           dst = encode_designation_at_bol (coding, translation_table,
2688                                            src, src_end, dst);
2689           CODING_SPEC_ISO_BOL (coding) = 0;
2690         }
2691
2692       /* Check composition start and end.  */
2693       if (coding->composing != COMPOSITION_DISABLED
2694           && coding->cmp_data_start < coding->cmp_data->used)
2695         {
2696           struct composition_data *cmp_data = coding->cmp_data;
2697           int *data = cmp_data->data + coding->cmp_data_start;
2698           int this_pos = cmp_data->char_offset + coding->consumed_char;
2699
2700           if (coding->composing == COMPOSITION_RELATIVE)
2701             {
2702               if (this_pos == data[2])
2703                 {
2704                   ENCODE_COMPOSITION_END (coding, data);
2705                   cmp_data = coding->cmp_data;
2706                   data = cmp_data->data + coding->cmp_data_start;
2707                 }
2708             }
2709           else if (COMPOSING_P (coding))
2710             {
2711               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2712               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2713                 /* We have consumed components of the composition.
2714                    What follows in SRC is the composition's base
2715                    text.  */
2716                 ENCODE_COMPOSITION_FAKE_START (coding);
2717               else
2718                 {
2719                   int c = cmp_data->data[coding->cmp_data_index++];
2720                   if (coding->composition_rule_follows)
2721                     {
2722                       ENCODE_COMPOSITION_RULE (c);
2723                       coding->composition_rule_follows = 0;
2724                     }
2725                   else
2726                     {
2727                       if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2728                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2729                         ENCODE_UNSAFE_CHARACTER (c);
2730                       else
2731                         ENCODE_ISO_CHARACTER (c);
2732                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2733                         coding->composition_rule_follows = 1;
2734                     }
2735                   continue;
2736                 }
2737             }
2738           if (!COMPOSING_P (coding))
2739             {
2740               if (this_pos == data[1])
2741                 {
2742                   ENCODE_COMPOSITION_START (coding, data);
2743                   continue;
2744                 }
2745             }
2746         }
2747
2748       ONE_MORE_CHAR (c);
2749
2750       /* Now encode the character C.  */
2751       if (c < 0x20 || c == 0x7F)
2752         {
2753           if (c == '\r')
2754             {
2755               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2756                 {
2757                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2758                     ENCODE_RESET_PLANE_AND_REGISTER;
2759                   *dst++ = c;
2760                   continue;
2761                 }
2762               /* fall down to treat '\r' as '\n' ...  */
2763               c = '\n';
2764             }
2765           if (c == '\n')
2766             {
2767               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2768                 ENCODE_RESET_PLANE_AND_REGISTER;
2769               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2770                 bcopy (coding->spec.iso2022.initial_designation,
2771                        coding->spec.iso2022.current_designation,
2772                        sizeof coding->spec.iso2022.initial_designation);
2773               if (coding->eol_type == CODING_EOL_LF
2774                   || coding->eol_type == CODING_EOL_UNDECIDED)
2775                 *dst++ = ISO_CODE_LF;
2776               else if (coding->eol_type == CODING_EOL_CRLF)
2777                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2778               else
2779                 *dst++ = ISO_CODE_CR;
2780               CODING_SPEC_ISO_BOL (coding) = 1;
2781             }
2782           else
2783             {
2784               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2785                 ENCODE_RESET_PLANE_AND_REGISTER;
2786               *dst++ = c;
2787             }
2788         }
2789       else if (ASCII_BYTE_P (c))
2790         ENCODE_ISO_CHARACTER (c);
2791       else if (SINGLE_BYTE_CHAR_P (c))
2792         {
2793           *dst++ = c;
2794           coding->errors++;
2795         }
2796       else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2797                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2798         ENCODE_UNSAFE_CHARACTER (c);
2799       else
2800         ENCODE_ISO_CHARACTER (c);
2801
2802       coding->consumed_char++;
2803     }
2804
2805  label_end_of_loop:
2806   coding->consumed = src_base - source;
2807   coding->produced = coding->produced_char = dst - destination;
2808 }
2809
2810 \f
2811 /*** 4. SJIS and BIG5 handlers ***/
2812
2813 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2814    quite widely.  So, for the moment, Emacs supports them in the bare
2815    C code.  But, in the future, they may be supported only by CCL.  */
2816
2817 /* SJIS is a coding system encoding three character sets: ASCII, right
2818    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2819    as is.  A character of charset katakana-jisx0201 is encoded by
2820    "position-code + 0x80".  A character of charset japanese-jisx0208
2821    is encoded in 2-byte but two position-codes are divided and shifted
2822    so that it fits in the range below.
2823
2824    --- CODE RANGE of SJIS ---
2825    (character set)      (range)
2826    ASCII                0x00 .. 0x7F
2827    KATAKANA-JISX0201    0xA1 .. 0xDF
2828    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2829             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2830    -------------------------------
2831
2832 */
2833
2834 /* BIG5 is a coding system encoding two character sets: ASCII and
2835    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2836    character set and is encoded in two bytes.
2837
2838    --- CODE RANGE of BIG5 ---
2839    (character set)      (range)
2840    ASCII                0x00 .. 0x7F
2841    Big5 (1st byte)      0xA1 .. 0xFE
2842         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2843    --------------------------
2844
2845    Since the number of characters in Big5 is larger than maximum
2846    characters in Emacs' charset (96x96), it can't be handled as one
2847    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2848    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2849    contains frequently used characters and the latter contains less
2850    frequently used characters.  */
2851
2852 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2853    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2854    C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
2855    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2856
2857 /* Number of Big5 characters which have the same code in 1st byte.  */
2858 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2859
2860 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2861   do {                                                                  \
2862     unsigned int temp                                                   \
2863       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2864     if (b1 < 0xC9)                                                      \
2865       charset = charset_big5_1;                                         \
2866     else                                                                \
2867       {                                                                 \
2868         charset = charset_big5_2;                                       \
2869         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2870       }                                                                 \
2871     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2872     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2873   } while (0)
2874
2875 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2876   do {                                                                  \
2877     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2878     if (charset == charset_big5_2)                                      \
2879       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2880     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2881     b2 = temp % BIG5_SAME_ROW;                                          \
2882     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2883   } while (0)
2884
2885 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2886    Check if a text is encoded in SJIS.  If it is, return
2887    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2888
2889 static int
2890 detect_coding_sjis (src, src_end, multibytep)
2891      unsigned char *src, *src_end;
2892      int multibytep;
2893 {
2894   int c;
2895   /* Dummy for ONE_MORE_BYTE.  */
2896   struct coding_system dummy_coding;
2897   struct coding_system *coding = &dummy_coding;
2898
2899   while (1)
2900     {
2901       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2902       if (c < 0x80)
2903         continue;
2904       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2905         return 0;
2906       if (c <= 0x9F || c >= 0xE0)
2907         {
2908           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2909           if (c < 0x40 || c == 0x7F || c > 0xFC)
2910             return 0;
2911         }
2912     }
2913  label_end_of_loop:
2914   return CODING_CATEGORY_MASK_SJIS;
2915 }
2916
2917 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2918    Check if a text is encoded in BIG5.  If it is, return
2919    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2920
2921 static int
2922 detect_coding_big5 (src, src_end, multibytep)
2923      unsigned char *src, *src_end;
2924      int multibytep;
2925 {
2926   int c;
2927   /* Dummy for ONE_MORE_BYTE.  */
2928   struct coding_system dummy_coding;
2929   struct coding_system *coding = &dummy_coding;
2930
2931   while (1)
2932     {
2933       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2934       if (c < 0x80)
2935         continue;
2936       if (c < 0xA1 || c > 0xFE)
2937         return 0;
2938       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2939       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2940         return 0;
2941     }
2942  label_end_of_loop:
2943   return CODING_CATEGORY_MASK_BIG5;
2944 }
2945
2946 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2947    Check if a text is encoded in UTF-8.  If it is, return
2948    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2949
2950 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2951 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2952 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2953 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2954 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2955 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2956 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2957
2958 static int
2959 detect_coding_utf_8 (src, src_end, multibytep)
2960      unsigned char *src, *src_end;
2961      int multibytep;
2962 {
2963   unsigned char c;
2964   int seq_maybe_bytes;
2965   /* Dummy for ONE_MORE_BYTE.  */
2966   struct coding_system dummy_coding;
2967   struct coding_system *coding = &dummy_coding;
2968
2969   while (1)
2970     {
2971       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2972       if (UTF_8_1_OCTET_P (c))
2973         continue;
2974       else if (UTF_8_2_OCTET_LEADING_P (c))
2975         seq_maybe_bytes = 1;
2976       else if (UTF_8_3_OCTET_LEADING_P (c))
2977         seq_maybe_bytes = 2;
2978       else if (UTF_8_4_OCTET_LEADING_P (c))
2979         seq_maybe_bytes = 3;
2980       else if (UTF_8_5_OCTET_LEADING_P (c))
2981         seq_maybe_bytes = 4;
2982       else if (UTF_8_6_OCTET_LEADING_P (c))
2983         seq_maybe_bytes = 5;
2984       else
2985         return 0;
2986
2987       do
2988         {
2989           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2990           if (!UTF_8_EXTRA_OCTET_P (c))
2991             return 0;
2992           seq_maybe_bytes--;
2993         }
2994       while (seq_maybe_bytes > 0);
2995     }
2996
2997  label_end_of_loop:
2998   return CODING_CATEGORY_MASK_UTF_8;
2999 }
3000
3001 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3002    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
3003    Little Endian (otherwise).  If it is, return
3004    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
3005    else return 0.  */
3006
3007 #define UTF_16_INVALID_P(val)   \
3008   (((val) == 0xFFFE)            \
3009    || ((val) == 0xFFFF))
3010
3011 #define UTF_16_HIGH_SURROGATE_P(val) \
3012   (((val) & 0xD800) == 0xD800)
3013
3014 #define UTF_16_LOW_SURROGATE_P(val) \
3015   (((val) & 0xDC00) == 0xDC00)
3016
3017 static int
3018 detect_coding_utf_16 (src, src_end, multibytep)
3019      unsigned char *src, *src_end;
3020      int multibytep;
3021 {
3022   unsigned char c1, c2;
3023   /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
3024   struct coding_system dummy_coding;
3025   struct coding_system *coding = &dummy_coding;
3026
3027   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
3028   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
3029
3030   if ((c1 == 0xFF) && (c2 == 0xFE))
3031     return CODING_CATEGORY_MASK_UTF_16_LE;
3032   else if ((c1 == 0xFE) && (c2 == 0xFF))
3033     return CODING_CATEGORY_MASK_UTF_16_BE;
3034
3035  label_end_of_loop:
3036   return 0;
3037 }
3038
3039 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3040    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3041
3042 static void
3043 decode_coding_sjis_big5 (coding, source, destination,
3044                          src_bytes, dst_bytes, sjis_p)
3045      struct coding_system *coding;
3046      unsigned char *source, *destination;
3047      int src_bytes, dst_bytes;
3048      int sjis_p;
3049 {
3050   unsigned char *src = source;
3051   unsigned char *src_end = source + src_bytes;
3052   unsigned char *dst = destination;
3053   unsigned char *dst_end = destination + dst_bytes;
3054   /* SRC_BASE remembers the start position in source in each loop.
3055      The loop will be exited when there's not enough source code
3056      (within macro ONE_MORE_BYTE), or when there's not enough
3057      destination area to produce a character (within macro
3058      EMIT_CHAR).  */
3059   unsigned char *src_base;
3060   Lisp_Object translation_table;
3061
3062   if (NILP (Venable_character_translation))
3063     translation_table = Qnil;
3064   else
3065     {
3066       translation_table = coding->translation_table_for_decode;
3067       if (NILP (translation_table))
3068         translation_table = Vstandard_translation_table_for_decode;
3069     }
3070
3071   coding->produced_char = 0;
3072   while (1)
3073     {
3074       int c, charset, c1, c2 = 0;
3075
3076       src_base = src;
3077       ONE_MORE_BYTE (c1);
3078
3079       if (c1 < 0x80)
3080         {
3081           charset = CHARSET_ASCII;
3082           if (c1 < 0x20)
3083             {
3084               if (c1 == '\r')
3085                 {
3086                   if (coding->eol_type == CODING_EOL_CRLF)
3087                     {
3088                       ONE_MORE_BYTE (c2);
3089                       if (c2 == '\n')
3090                         c1 = c2;
3091                       else
3092                         /* To process C2 again, SRC is subtracted by 1.  */
3093                         src--;
3094                     }
3095                   else if (coding->eol_type == CODING_EOL_CR)
3096                     c1 = '\n';
3097                 }
3098               else if (c1 == '\n'
3099                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3100                        && (coding->eol_type == CODING_EOL_CR
3101                            || coding->eol_type == CODING_EOL_CRLF))
3102                 {
3103                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3104                   goto label_end_of_loop;
3105                 }
3106             }
3107         }
3108       else
3109         {
3110           if (sjis_p)
3111             {
3112               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
3113                 goto label_invalid_code;
3114               if (c1 <= 0x9F || c1 >= 0xE0)
3115                 {
3116                   /* SJIS -> JISX0208 */
3117                   ONE_MORE_BYTE (c2);
3118                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
3119                     goto label_invalid_code;
3120                   DECODE_SJIS (c1, c2, c1, c2);
3121                   charset = charset_jisx0208;
3122                 }
3123               else
3124                 /* SJIS -> JISX0201-Kana */
3125                 charset = charset_katakana_jisx0201;
3126             }
3127           else
3128             {
3129               /* BIG5 -> Big5 */
3130               if (c1 < 0xA0 || c1 > 0xFE)
3131                 goto label_invalid_code;
3132               ONE_MORE_BYTE (c2);
3133               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
3134                 goto label_invalid_code;
3135               DECODE_BIG5 (c1, c2, charset, c1, c2);
3136             }
3137         }
3138
3139       c = DECODE_ISO_CHARACTER (charset, c1, c2);
3140       EMIT_CHAR (c);
3141       continue;
3142
3143     label_invalid_code:
3144       coding->errors++;
3145       src = src_base;
3146       c = *src++;
3147       EMIT_CHAR (c);
3148     }
3149
3150  label_end_of_loop:
3151   coding->consumed = coding->consumed_char = src_base - source;
3152   coding->produced = dst - destination;
3153   return;
3154 }
3155
3156 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3157    This function can encode charsets `ascii', `katakana-jisx0201',
3158    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
3159    are sure that all these charsets are registered as official charset
3160    (i.e. do not have extended leading-codes).  Characters of other
3161    charsets are produced without any encoding.  If SJIS_P is 1, encode
3162    SJIS text, else encode BIG5 text.  */
3163
3164 static void
3165 encode_coding_sjis_big5 (coding, source, destination,
3166                          src_bytes, dst_bytes, sjis_p)
3167      struct coding_system *coding;
3168      unsigned char *source, *destination;
3169      int src_bytes, dst_bytes;
3170      int sjis_p;
3171 {
3172   unsigned char *src = source;
3173   unsigned char *src_end = source + src_bytes;
3174   unsigned char *dst = destination;
3175   unsigned char *dst_end = destination + dst_bytes;
3176   /* SRC_BASE remembers the start position in source in each loop.
3177      The loop will be exited when there's not enough source text to
3178      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3179      there's not enough destination area to produce encoded codes
3180      (within macro EMIT_BYTES).  */
3181   unsigned char *src_base;
3182   Lisp_Object translation_table;
3183
3184   if (NILP (Venable_character_translation))
3185     translation_table = Qnil;
3186   else
3187     {
3188       translation_table = coding->translation_table_for_encode;
3189       if (NILP (translation_table))
3190         translation_table = Vstandard_translation_table_for_encode;
3191     }
3192
3193   while (1)
3194     {
3195       int c, charset, c1, c2;
3196
3197       src_base = src;
3198       ONE_MORE_CHAR (c);
3199
3200       /* Now encode the character C.  */
3201       if (SINGLE_BYTE_CHAR_P (c))
3202         {
3203           switch (c)
3204             {
3205             case '\r':
3206               if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
3207                 {
3208                   EMIT_ONE_BYTE (c);
3209                   break;
3210                 }
3211               c = '\n';
3212             case '\n':
3213               if (coding->eol_type == CODING_EOL_CRLF)
3214                 {
3215                   EMIT_TWO_BYTES ('\r', c);
3216                   break;
3217                 }
3218               else if (coding->eol_type == CODING_EOL_CR)
3219                 c = '\r';
3220             default:
3221               EMIT_ONE_BYTE (c);
3222             }
3223         }
3224       else
3225         {
3226           SPLIT_CHAR (c, charset, c1, c2);
3227           if (sjis_p)
3228             {
3229               if (charset == charset_jisx0208
3230                   || charset == charset_jisx0208_1978)
3231                 {
3232                   ENCODE_SJIS (c1, c2, c1, c2);
3233                   EMIT_TWO_BYTES (c1, c2);
3234                 }
3235               else if (charset == charset_katakana_jisx0201)
3236                 EMIT_ONE_BYTE (c1 | 0x80);
3237               else if (charset == charset_latin_jisx0201)
3238                 EMIT_ONE_BYTE (c1);
3239               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3240                 {
3241                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3242                   if (CHARSET_WIDTH (charset) > 1)
3243                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3244                 }
3245               else
3246                 /* There's no way other than producing the internal
3247                    codes as is.  */
3248                 EMIT_BYTES (src_base, src);
3249             }
3250           else
3251             {
3252               if (charset == charset_big5_1 || charset == charset_big5_2)
3253                 {
3254                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3255                   EMIT_TWO_BYTES (c1, c2);
3256                 }
3257               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3258                 {
3259                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3260                   if (CHARSET_WIDTH (charset) > 1)
3261                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3262                 }
3263               else
3264                 /* There's no way other than producing the internal
3265                    codes as is.  */
3266                 EMIT_BYTES (src_base, src);
3267             }
3268         }
3269       coding->consumed_char++;
3270     }
3271
3272  label_end_of_loop:
3273   coding->consumed = src_base - source;
3274   coding->produced = coding->produced_char = dst - destination;
3275 }
3276
3277 \f
3278 /*** 5. CCL handlers ***/
3279
3280 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3281    Check if a text is encoded in a coding system of which
3282    encoder/decoder are written in CCL program.  If it is, return
3283    CODING_CATEGORY_MASK_CCL, else return 0.  */
3284
3285 static int
3286 detect_coding_ccl (src, src_end, multibytep)
3287      unsigned char *src, *src_end;
3288      int multibytep;
3289 {
3290   unsigned char *valid;
3291   int c;
3292   /* Dummy for ONE_MORE_BYTE.  */
3293   struct coding_system dummy_coding;
3294   struct coding_system *coding = &dummy_coding;
3295
3296   /* No coding system is assigned to coding-category-ccl.  */
3297   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3298     return 0;
3299
3300   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3301   while (1)
3302     {
3303       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3304       if (! valid[c])
3305         return 0;
3306     }
3307  label_end_of_loop:
3308   return CODING_CATEGORY_MASK_CCL;
3309 }
3310
3311 \f
3312 /*** 6. End-of-line handlers ***/
3313
3314 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3315
3316 static void
3317 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3318      struct coding_system *coding;
3319      unsigned char *source, *destination;
3320      int src_bytes, dst_bytes;
3321 {
3322   unsigned char *src = source;
3323   unsigned char *dst = destination;
3324   unsigned char *src_end = src + src_bytes;
3325   unsigned char *dst_end = dst + dst_bytes;
3326   Lisp_Object translation_table;
3327   /* SRC_BASE remembers the start position in source in each loop.
3328      The loop will be exited when there's not enough source code
3329      (within macro ONE_MORE_BYTE), or when there's not enough
3330      destination area to produce a character (within macro
3331      EMIT_CHAR).  */
3332   unsigned char *src_base;
3333   int c;
3334
3335   translation_table = Qnil;
3336   switch (coding->eol_type)
3337     {
3338     case CODING_EOL_CRLF:
3339       while (1)
3340         {
3341           src_base = src;
3342           ONE_MORE_BYTE (c);
3343           if (c == '\r')
3344             {
3345               ONE_MORE_BYTE (c);
3346               if (c != '\n')
3347                 {
3348                   src--;
3349                   c = '\r';
3350                 }
3351             }
3352           else if (c == '\n'
3353                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3354             {
3355               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3356               goto label_end_of_loop;
3357             }
3358           EMIT_CHAR (c);
3359         }
3360       break;
3361
3362     case CODING_EOL_CR:
3363       while (1)
3364         {
3365           src_base = src;
3366           ONE_MORE_BYTE (c);
3367           if (c == '\n')
3368             {
3369               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3370                 {
3371                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3372                   goto label_end_of_loop;
3373                 }
3374             }
3375           else if (c == '\r')
3376             c = '\n';
3377           EMIT_CHAR (c);
3378         }
3379       break;
3380
3381     default:                    /* no need for EOL handling */
3382       while (1)
3383         {
3384           src_base = src;
3385           ONE_MORE_BYTE (c);
3386           EMIT_CHAR (c);
3387         }
3388     }
3389
3390  label_end_of_loop:
3391   coding->consumed = coding->consumed_char = src_base - source;
3392   coding->produced = dst - destination;
3393   return;
3394 }
3395
3396 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3397    format of end-of-line according to `coding->eol_type'.  It also
3398    convert multibyte form 8-bit characters to unibyte if
3399    CODING->src_multibyte is nonzero.  If `coding->mode &
3400    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3401    also means end-of-line.  */
3402
3403 static void
3404 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3405      struct coding_system *coding;
3406      const unsigned char *source;
3407      unsigned char *destination;
3408      int src_bytes, dst_bytes;
3409 {
3410   const unsigned char *src = source;
3411   unsigned char *dst = destination;
3412   const unsigned char *src_end = src + src_bytes;
3413   unsigned char *dst_end = dst + dst_bytes;
3414   Lisp_Object translation_table;
3415   /* SRC_BASE remembers the start position in source in each loop.
3416      The loop will be exited when there's not enough source text to
3417      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3418      there's not enough destination area to produce encoded codes
3419      (within macro EMIT_BYTES).  */
3420   const unsigned char *src_base;
3421   unsigned char *tmp;
3422   int c;
3423   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3424
3425   translation_table = Qnil;
3426   if (coding->src_multibyte
3427       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3428     {
3429       src_end--;
3430       src_bytes--;
3431       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3432     }
3433
3434   if (coding->eol_type == CODING_EOL_CRLF)
3435     {
3436       while (src < src_end)
3437         {
3438           src_base = src;
3439           c = *src++;
3440           if (c >= 0x20)
3441             EMIT_ONE_BYTE (c);
3442           else if (c == '\n' || (c == '\r' && selective_display))
3443             EMIT_TWO_BYTES ('\r', '\n');
3444           else
3445             EMIT_ONE_BYTE (c);
3446         }
3447       src_base = src;
3448     label_end_of_loop:
3449       ;
3450     }
3451   else
3452     {
3453       if (!dst_bytes || src_bytes <= dst_bytes)
3454         {
3455           safe_bcopy (src, dst, src_bytes);
3456           src_base = src_end;
3457           dst += src_bytes;
3458         }
3459       else
3460         {
3461           if (coding->src_multibyte
3462               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3463             dst_bytes--;
3464           safe_bcopy (src, dst, dst_bytes);
3465           src_base = src + dst_bytes;
3466           dst = destination + dst_bytes;
3467           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3468         }
3469       if (coding->eol_type == CODING_EOL_CR)
3470         {
3471           for (tmp = destination; tmp < dst; tmp++)
3472             if (*tmp == '\n') *tmp = '\r';
3473         }
3474       else if (selective_display)
3475         {
3476           for (tmp = destination; tmp < dst; tmp++)
3477             if (*tmp == '\r') *tmp = '\n';
3478         }
3479     }
3480   if (coding->src_multibyte)
3481     dst = destination + str_as_unibyte (destination, dst - destination);
3482
3483   coding->consumed = src_base - source;
3484   coding->produced = dst - destination;
3485   coding->produced_char = coding->produced;
3486 }
3487
3488 \f
3489 /*** 7. C library functions ***/
3490
3491 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3492    has a property `coding-system'.  The value of this property is a
3493    vector of length 5 (called the coding-vector).  Among elements of
3494    this vector, the first (element[0]) and the fifth (element[4])
3495    carry important information for decoding/encoding.  Before
3496    decoding/encoding, this information should be set in fields of a
3497    structure of type `coding_system'.
3498
3499    The value of the property `coding-system' can be a symbol of another
3500    subsidiary coding-system.  In that case, Emacs gets coding-vector
3501    from that symbol.
3502
3503    `element[0]' contains information to be set in `coding->type'.  The
3504    value and its meaning is as follows:
3505
3506    0 -- coding_type_emacs_mule
3507    1 -- coding_type_sjis
3508    2 -- coding_type_iso2022
3509    3 -- coding_type_big5
3510    4 -- coding_type_ccl encoder/decoder written in CCL
3511    nil -- coding_type_no_conversion
3512    t -- coding_type_undecided (automatic conversion on decoding,
3513                                no-conversion on encoding)
3514
3515    `element[4]' contains information to be set in `coding->flags' and
3516    `coding->spec'.  The meaning varies by `coding->type'.
3517
3518    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3519    of length 32 (of which the first 13 sub-elements are used now).
3520    Meanings of these sub-elements are:
3521
3522    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3523         If the value is an integer of valid charset, the charset is
3524         assumed to be designated to graphic register N initially.
3525
3526         If the value is minus, it is a minus value of charset which
3527         reserves graphic register N, which means that the charset is
3528         not designated initially but should be designated to graphic
3529         register N just before encoding a character in that charset.
3530
3531         If the value is nil, graphic register N is never used on
3532         encoding.
3533
3534    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3535         Each value takes t or nil.  See the section ISO2022 of
3536         `coding.h' for more information.
3537
3538    If `coding->type' is `coding_type_big5', element[4] is t to denote
3539    BIG5-ETen or nil to denote BIG5-HKU.
3540
3541    If `coding->type' takes the other value, element[4] is ignored.
3542
3543    Emacs Lisp's coding systems also carry information about format of
3544    end-of-line in a value of property `eol-type'.  If the value is
3545    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3546    means CODING_EOL_CR.  If it is not integer, it should be a vector
3547    of subsidiary coding systems of which property `eol-type' has one
3548    of the above values.
3549
3550 */
3551
3552 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3553    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3554    is setup so that no conversion is necessary and return -1, else
3555    return 0.  */
3556
3557 int
3558 setup_coding_system (coding_system, coding)
3559      Lisp_Object coding_system;
3560      struct coding_system *coding;
3561 {
3562   Lisp_Object coding_spec, coding_type, eol_type, plist;
3563   Lisp_Object val;
3564
3565   /* At first, zero clear all members.  */
3566   bzero (coding, sizeof (struct coding_system));
3567
3568   /* Initialize some fields required for all kinds of coding systems.  */
3569   coding->symbol = coding_system;
3570   coding->heading_ascii = -1;
3571   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3572   coding->composing = COMPOSITION_DISABLED;
3573   coding->cmp_data = NULL;
3574
3575   if (NILP (coding_system))
3576     goto label_invalid_coding_system;
3577
3578   coding_spec = Fget (coding_system, Qcoding_system);
3579
3580   if (!VECTORP (coding_spec)
3581       || XVECTOR (coding_spec)->size != 5
3582       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3583     goto label_invalid_coding_system;
3584
3585   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3586   if (VECTORP (eol_type))
3587     {
3588       coding->eol_type = CODING_EOL_UNDECIDED;
3589       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3590     }
3591   else if (XFASTINT (eol_type) == 1)
3592     {
3593       coding->eol_type = CODING_EOL_CRLF;
3594       coding->common_flags
3595         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3596     }
3597   else if (XFASTINT (eol_type) == 2)
3598     {
3599       coding->eol_type = CODING_EOL_CR;
3600       coding->common_flags
3601         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3602     }
3603   else
3604     coding->eol_type = CODING_EOL_LF;
3605
3606   coding_type = XVECTOR (coding_spec)->contents[0];
3607   /* Try short cut.  */
3608   if (SYMBOLP (coding_type))
3609     {
3610       if (EQ (coding_type, Qt))
3611         {
3612           coding->type = coding_type_undecided;
3613           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3614         }
3615       else
3616         coding->type = coding_type_no_conversion;
3617       /* Initialize this member.  Any thing other than
3618          CODING_CATEGORY_IDX_UTF_16_BE and
3619          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3620          special treatment in detect_eol.  */
3621       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3622
3623       return 0;
3624     }
3625
3626   /* Get values of coding system properties:
3627      `post-read-conversion', `pre-write-conversion',
3628      `translation-table-for-decode', `translation-table-for-encode'.  */
3629   plist = XVECTOR (coding_spec)->contents[3];
3630   /* Pre & post conversion functions should be disabled if
3631      inhibit_eol_conversion is nonzero.  This is the case that a code
3632      conversion function is called while those functions are running.  */
3633   if (! inhibit_pre_post_conversion)
3634     {
3635       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3636       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3637     }
3638   val = Fplist_get (plist, Qtranslation_table_for_decode);
3639   if (SYMBOLP (val))
3640     val = Fget (val, Qtranslation_table_for_decode);
3641   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3642   val = Fplist_get (plist, Qtranslation_table_for_encode);
3643   if (SYMBOLP (val))
3644     val = Fget (val, Qtranslation_table_for_encode);
3645   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3646   val = Fplist_get (plist, Qcoding_category);
3647   if (!NILP (val))
3648     {
3649       val = Fget (val, Qcoding_category_index);
3650       if (INTEGERP (val))
3651         coding->category_idx = XINT (val);
3652       else
3653         goto label_invalid_coding_system;
3654     }
3655   else
3656     goto label_invalid_coding_system;
3657
3658   /* If the coding system has non-nil `composition' property, enable
3659      composition handling.  */
3660   val = Fplist_get (plist, Qcomposition);
3661   if (!NILP (val))
3662     coding->composing = COMPOSITION_NO;
3663
3664   switch (XFASTINT (coding_type))
3665     {
3666     case 0:
3667       coding->type = coding_type_emacs_mule;
3668       coding->common_flags
3669         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3670       if (!NILP (coding->post_read_conversion))
3671         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3672       if (!NILP (coding->pre_write_conversion))
3673         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3674       break;
3675
3676     case 1:
3677       coding->type = coding_type_sjis;
3678       coding->common_flags
3679         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3680       break;
3681
3682     case 2:
3683       coding->type = coding_type_iso2022;
3684       coding->common_flags
3685         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3686       {
3687         Lisp_Object val, temp;
3688         Lisp_Object *flags;
3689         int i, charset, reg_bits = 0;
3690
3691         val = XVECTOR (coding_spec)->contents[4];
3692
3693         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3694           goto label_invalid_coding_system;
3695
3696         flags = XVECTOR (val)->contents;
3697         coding->flags
3698           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3699              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3700              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3701              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3702              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3703              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3704              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3705              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3706              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3707              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3708              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3709              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3710              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3711              );
3712
3713         /* Invoke graphic register 0 to plane 0.  */
3714         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3715         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3716         CODING_SPEC_ISO_INVOCATION (coding, 1)
3717           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3718         /* Not single shifting at first.  */
3719         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3720         /* Beginning of buffer should also be regarded as bol. */
3721         CODING_SPEC_ISO_BOL (coding) = 1;
3722
3723         for (charset = 0; charset <= MAX_CHARSET; charset++)
3724           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3725         val = Vcharset_revision_alist;
3726         while (CONSP (val))
3727           {
3728             charset = get_charset_id (Fcar_safe (XCAR (val)));
3729             if (charset >= 0
3730                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3731                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3732               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3733             val = XCDR (val);
3734           }
3735
3736         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3737            FLAGS[REG] can be one of below:
3738                 integer CHARSET: CHARSET occupies register I,
3739                 t: designate nothing to REG initially, but can be used
3740                   by any charsets,
3741                 list of integer, nil, or t: designate the first
3742                   element (if integer) to REG initially, the remaining
3743                   elements (if integer) is designated to REG on request,
3744                   if an element is t, REG can be used by any charsets,
3745                 nil: REG is never used.  */
3746         for (charset = 0; charset <= MAX_CHARSET; charset++)
3747           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3748             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3749         for (i = 0; i < 4; i++)
3750           {
3751             if ((INTEGERP (flags[i])
3752                  && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
3753                 || (charset = get_charset_id (flags[i])) >= 0)
3754               {
3755                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3756                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3757               }
3758             else if (EQ (flags[i], Qt))
3759               {
3760                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3761                 reg_bits |= 1 << i;
3762                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3763               }
3764             else if (CONSP (flags[i]))
3765               {
3766                 Lisp_Object tail;
3767                 tail = flags[i];
3768
3769                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3770                 if ((INTEGERP (XCAR (tail))
3771                      && (charset = XINT (XCAR (tail)),
3772                          CHARSET_VALID_P (charset)))
3773                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3774                   {
3775                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3776                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3777                   }
3778                 else
3779                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3780                 tail = XCDR (tail);
3781                 while (CONSP (tail))
3782                   {
3783                     if ((INTEGERP (XCAR (tail))
3784                          && (charset = XINT (XCAR (tail)),
3785                              CHARSET_VALID_P (charset)))
3786                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3787                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3788                         = i;
3789                     else if (EQ (XCAR (tail), Qt))
3790                       reg_bits |= 1 << i;
3791                     tail = XCDR (tail);
3792                   }
3793               }
3794             else
3795               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3796
3797             CODING_SPEC_ISO_DESIGNATION (coding, i)
3798               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3799           }
3800
3801         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3802           {
3803             /* REG 1 can be used only by locking shift in 7-bit env.  */
3804             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3805               reg_bits &= ~2;
3806             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3807               /* Without any shifting, only REG 0 and 1 can be used.  */
3808               reg_bits &= 3;
3809           }
3810
3811         if (reg_bits)
3812           for (charset = 0; charset <= MAX_CHARSET; charset++)
3813             {
3814               if (CHARSET_DEFINED_P (charset)
3815                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3816                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3817                 {
3818                   /* There exist some default graphic registers to be
3819                      used by CHARSET.  */
3820
3821                   /* We had better avoid designating a charset of
3822                      CHARS96 to REG 0 as far as possible.  */
3823                   if (CHARSET_CHARS (charset) == 96)
3824                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3825                       = (reg_bits & 2
3826                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3827                   else
3828                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3829                       = (reg_bits & 1
3830                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3831                 }
3832             }
3833       }
3834       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3835       coding->spec.iso2022.last_invalid_designation_register = -1;
3836       break;
3837
3838     case 3:
3839       coding->type = coding_type_big5;
3840       coding->common_flags
3841         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3842       coding->flags
3843         = (NILP (XVECTOR (coding_spec)->contents[4])
3844            ? CODING_FLAG_BIG5_HKU
3845            : CODING_FLAG_BIG5_ETEN);
3846       break;
3847
3848     case 4:
3849       coding->type = coding_type_ccl;
3850       coding->common_flags
3851         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3852       {
3853         val = XVECTOR (coding_spec)->contents[4];
3854         if (! CONSP (val)
3855             || setup_ccl_program (&(coding->spec.ccl.decoder),
3856                                   XCAR (val)) < 0
3857             || setup_ccl_program (&(coding->spec.ccl.encoder),
3858                                   XCDR (val)) < 0)
3859           goto label_invalid_coding_system;
3860
3861         bzero (coding->spec.ccl.valid_codes, 256);
3862         val = Fplist_get (plist, Qvalid_codes);
3863         if (CONSP (val))
3864           {
3865             Lisp_Object this;
3866
3867             for (; CONSP (val); val = XCDR (val))
3868               {
3869                 this = XCAR (val);
3870                 if (INTEGERP (this)
3871                     && XINT (this) >= 0 && XINT (this) < 256)
3872                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3873                 else if (CONSP (this)
3874                          && INTEGERP (XCAR (this))
3875                          && INTEGERP (XCDR (this)))
3876                   {
3877                     int start = XINT (XCAR (this));
3878                     int end = XINT (XCDR (this));
3879
3880                     if (start >= 0 && start <= end && end < 256)
3881                       while (start <= end)
3882                         coding->spec.ccl.valid_codes[start++] = 1;
3883                   }
3884               }
3885           }
3886       }
3887       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3888       coding->spec.ccl.cr_carryover = 0;
3889       coding->spec.ccl.eight_bit_carryover[0] = 0;
3890       break;
3891
3892     case 5:
3893       coding->type = coding_type_raw_text;
3894       break;
3895
3896     default:
3897       goto label_invalid_coding_system;
3898     }
3899   return 0;
3900
3901  label_invalid_coding_system:
3902   coding->type = coding_type_no_conversion;
3903   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3904   coding->common_flags = 0;
3905   coding->eol_type = CODING_EOL_LF;
3906   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3907   return -1;
3908 }
3909
3910 /* Free memory blocks allocated for storing composition information.  */
3911
3912 void
3913 coding_free_composition_data (coding)
3914      struct coding_system *coding;
3915 {
3916   struct composition_data *cmp_data = coding->cmp_data, *next;
3917
3918   if (!cmp_data)
3919     return;
3920   /* Memory blocks are chained.  At first, rewind to the first, then,
3921      free blocks one by one.  */
3922   while (cmp_data->prev)
3923     cmp_data = cmp_data->prev;
3924   while (cmp_data)
3925     {
3926       next = cmp_data->next;
3927       xfree (cmp_data);
3928       cmp_data = next;
3929     }
3930   coding->cmp_data = NULL;
3931 }
3932
3933 /* Set `char_offset' member of all memory blocks pointed by
3934    coding->cmp_data to POS.  */
3935
3936 void
3937 coding_adjust_composition_offset (coding, pos)
3938      struct coding_system *coding;
3939      int pos;
3940 {
3941   struct composition_data *cmp_data;
3942
3943   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3944     cmp_data->char_offset = pos;
3945 }
3946
3947 /* Setup raw-text or one of its subsidiaries in the structure
3948    coding_system CODING according to the already setup value eol_type
3949    in CODING.  CODING should be setup for some coding system in
3950    advance.  */
3951
3952 void
3953 setup_raw_text_coding_system (coding)
3954      struct coding_system *coding;
3955 {
3956   if (coding->type != coding_type_raw_text)
3957     {
3958       coding->symbol = Qraw_text;
3959       coding->type = coding_type_raw_text;
3960       if (coding->eol_type != CODING_EOL_UNDECIDED)
3961         {
3962           Lisp_Object subsidiaries;
3963           subsidiaries = Fget (Qraw_text, Qeol_type);
3964
3965           if (VECTORP (subsidiaries)
3966               && XVECTOR (subsidiaries)->size == 3)
3967             coding->symbol
3968               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3969         }
3970       setup_coding_system (coding->symbol, coding);
3971     }
3972   return;
3973 }
3974
3975 /* Emacs has a mechanism to automatically detect a coding system if it
3976    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3977    it's impossible to distinguish some coding systems accurately
3978    because they use the same range of codes.  So, at first, coding
3979    systems are categorized into 7, those are:
3980
3981    o coding-category-emacs-mule
3982
3983         The category for a coding system which has the same code range
3984         as Emacs' internal format.  Assigned the coding-system (Lisp
3985         symbol) `emacs-mule' by default.
3986
3987    o coding-category-sjis
3988
3989         The category for a coding system which has the same code range
3990         as SJIS.  Assigned the coding-system (Lisp
3991         symbol) `japanese-shift-jis' by default.
3992
3993    o coding-category-iso-7
3994
3995         The category for a coding system which has the same code range
3996         as ISO2022 of 7-bit environment.  This doesn't use any locking
3997         shift and single shift functions.  This can encode/decode all
3998         charsets.  Assigned the coding-system (Lisp symbol)
3999         `iso-2022-7bit' by default.
4000
4001    o coding-category-iso-7-tight
4002
4003         Same as coding-category-iso-7 except that this can
4004         encode/decode only the specified charsets.
4005
4006    o coding-category-iso-8-1
4007
4008         The category for a coding system which has the same code range
4009         as ISO2022 of 8-bit environment and graphic plane 1 used only
4010         for DIMENSION1 charset.  This doesn't use any locking shift
4011         and single shift functions.  Assigned the coding-system (Lisp
4012         symbol) `iso-latin-1' by default.
4013
4014    o coding-category-iso-8-2
4015
4016         The category for a coding system which has the same code range
4017         as ISO2022 of 8-bit environment and graphic plane 1 used only
4018         for DIMENSION2 charset.  This doesn't use any locking shift
4019         and single shift functions.  Assigned the coding-system (Lisp
4020         symbol) `japanese-iso-8bit' by default.
4021
4022    o coding-category-iso-7-else
4023
4024         The category for a coding system which has the same code range
4025         as ISO2022 of 7-bit environment but uses locking shift or
4026         single shift functions.  Assigned the coding-system (Lisp
4027         symbol) `iso-2022-7bit-lock' by default.
4028
4029    o coding-category-iso-8-else
4030
4031         The category for a coding system which has the same code range
4032         as ISO2022 of 8-bit environment but uses locking shift or
4033         single shift functions.  Assigned the coding-system (Lisp
4034         symbol) `iso-2022-8bit-ss2' by default.
4035
4036    o coding-category-big5
4037
4038         The category for a coding system which has the same code range
4039         as BIG5.  Assigned the coding-system (Lisp symbol)
4040         `cn-big5' by default.
4041
4042    o coding-category-utf-8
4043
4044         The category for a coding system which has the same code range
4045         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
4046         symbol) `utf-8' by default.
4047
4048    o coding-category-utf-16-be
4049
4050         The category for a coding system in which a text has an
4051         Unicode signature (cf. Unicode Standard) in the order of BIG
4052         endian at the head.  Assigned the coding-system (Lisp symbol)
4053         `utf-16-be' by default.
4054
4055    o coding-category-utf-16-le
4056
4057         The category for a coding system in which a text has an
4058         Unicode signature (cf. Unicode Standard) in the order of
4059         LITTLE endian at the head.  Assigned the coding-system (Lisp
4060         symbol) `utf-16-le' by default.
4061
4062    o coding-category-ccl
4063
4064         The category for a coding system of which encoder/decoder is
4065         written in CCL programs.  The default value is nil, i.e., no
4066         coding system is assigned.
4067
4068    o coding-category-binary
4069
4070         The category for a coding system not categorized in any of the
4071         above.  Assigned the coding-system (Lisp symbol)
4072         `no-conversion' by default.
4073
4074    Each of them is a Lisp symbol and the value is an actual
4075    `coding-system' (this is also a Lisp symbol) assigned by a user.
4076    What Emacs does actually is to detect a category of coding system.
4077    Then, it uses a `coding-system' assigned to it.  If Emacs can't
4078    decide a single possible category, it selects a category of the
4079    highest priority.  Priorities of categories are also specified by a
4080    user in a Lisp variable `coding-category-list'.
4081
4082 */
4083
4084 static
4085 int ascii_skip_code[256];
4086
4087 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
4088    If it detects possible coding systems, return an integer in which
4089    appropriate flag bits are set.  Flag bits are defined by macros
4090    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
4091    it should point the table `coding_priorities'.  In that case, only
4092    the flag bit for a coding system of the highest priority is set in
4093    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
4094    range 0x80..0x9F are in multibyte form.
4095
4096    How many ASCII characters are at the head is returned as *SKIP.  */
4097
4098 static int
4099 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
4100      unsigned char *source;
4101      int src_bytes, *priorities, *skip;
4102      int multibytep;
4103 {
4104   register unsigned char c;
4105   unsigned char *src = source, *src_end = source + src_bytes;
4106   unsigned int mask, utf16_examined_p, iso2022_examined_p;
4107   int i;
4108
4109   /* At first, skip all ASCII characters and control characters except
4110      for three ISO2022 specific control characters.  */
4111   ascii_skip_code[ISO_CODE_SO] = 0;
4112   ascii_skip_code[ISO_CODE_SI] = 0;
4113   ascii_skip_code[ISO_CODE_ESC] = 0;
4114
4115  label_loop_detect_coding:
4116   while (src < src_end && ascii_skip_code[*src]) src++;
4117   *skip = src - source;
4118
4119   if (src >= src_end)
4120     /* We found nothing other than ASCII.  There's nothing to do.  */
4121     return 0;
4122
4123   c = *src;
4124   /* The text seems to be encoded in some multilingual coding system.
4125      Now, try to find in which coding system the text is encoded.  */
4126   if (c < 0x80)
4127     {
4128       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
4129       /* C is an ISO2022 specific control code of C0.  */
4130       mask = detect_coding_iso2022 (src, src_end, multibytep);
4131       if (mask == 0)
4132         {
4133           /* No valid ISO2022 code follows C.  Try again.  */
4134           src++;
4135           if (c == ISO_CODE_ESC)
4136             ascii_skip_code[ISO_CODE_ESC] = 1;
4137           else
4138             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
4139           goto label_loop_detect_coding;
4140         }
4141       if (priorities)
4142         {
4143           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4144             {
4145               if (mask & priorities[i])
4146                 return priorities[i];
4147             }
4148           return CODING_CATEGORY_MASK_RAW_TEXT;
4149         }
4150     }
4151   else
4152     {
4153       int try;
4154
4155       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
4156         c = src[1] - 0x20;
4157
4158       if (c < 0xA0)
4159         {
4160           /* C is the first byte of SJIS character code,
4161              or a leading-code of Emacs' internal format (emacs-mule),
4162              or the first byte of UTF-16.  */
4163           try = (CODING_CATEGORY_MASK_SJIS
4164                   | CODING_CATEGORY_MASK_EMACS_MULE
4165                   | CODING_CATEGORY_MASK_UTF_16_BE
4166                   | CODING_CATEGORY_MASK_UTF_16_LE);
4167
4168           /* Or, if C is a special latin extra code,
4169              or is an ISO2022 specific control code of C1 (SS2 or SS3),
4170              or is an ISO2022 control-sequence-introducer (CSI),
4171              we should also consider the possibility of ISO2022 codings.  */
4172           if ((VECTORP (Vlatin_extra_code_table)
4173                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4174               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4175               || (c == ISO_CODE_CSI
4176                   && (src < src_end
4177                       && (*src == ']'
4178                           || ((*src == '0' || *src == '1' || *src == '2')
4179                               && src + 1 < src_end
4180                               && src[1] == ']')))))
4181             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4182                      | CODING_CATEGORY_MASK_ISO_8BIT);
4183         }
4184       else
4185         /* C is a character of ISO2022 in graphic plane right,
4186            or a SJIS's 1-byte character code (i.e. JISX0201),
4187            or the first byte of BIG5's 2-byte code,
4188            or the first byte of UTF-8/16.  */
4189         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4190                 | CODING_CATEGORY_MASK_ISO_8BIT
4191                 | CODING_CATEGORY_MASK_SJIS
4192                 | CODING_CATEGORY_MASK_BIG5
4193                 | CODING_CATEGORY_MASK_UTF_8
4194                 | CODING_CATEGORY_MASK_UTF_16_BE
4195                 | CODING_CATEGORY_MASK_UTF_16_LE);
4196
4197       /* Or, we may have to consider the possibility of CCL.  */
4198       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4199           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4200               ->spec.ccl.valid_codes)[c])
4201         try |= CODING_CATEGORY_MASK_CCL;
4202
4203       mask = 0;
4204       utf16_examined_p = iso2022_examined_p = 0;
4205       if (priorities)
4206         {
4207           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4208             {
4209               if (!iso2022_examined_p
4210                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4211                 {
4212                   mask |= detect_coding_iso2022 (src, src_end, multibytep);
4213                   iso2022_examined_p = 1;
4214                 }
4215               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4216                 mask |= detect_coding_sjis (src, src_end, multibytep);
4217               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4218                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4219               else if (!utf16_examined_p
4220                        && (priorities[i] & try &
4221                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4222                 {
4223                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4224                   utf16_examined_p = 1;
4225                 }
4226               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4227                 mask |= detect_coding_big5 (src, src_end, multibytep);
4228               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4229                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4230               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4231                 mask |= detect_coding_ccl (src, src_end, multibytep);
4232               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4233                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4234               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4235                 mask |= CODING_CATEGORY_MASK_BINARY;
4236               if (mask & priorities[i])
4237                 return priorities[i];
4238             }
4239           return CODING_CATEGORY_MASK_RAW_TEXT;
4240         }
4241       if (try & CODING_CATEGORY_MASK_ISO)
4242         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4243       if (try & CODING_CATEGORY_MASK_SJIS)
4244         mask |= detect_coding_sjis (src, src_end, multibytep);
4245       if (try & CODING_CATEGORY_MASK_BIG5)
4246         mask |= detect_coding_big5 (src, src_end, multibytep);
4247       if (try & CODING_CATEGORY_MASK_UTF_8)
4248         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4249       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4250         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4251       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4252         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4253       if (try & CODING_CATEGORY_MASK_CCL)
4254         mask |= detect_coding_ccl (src, src_end, multibytep);
4255     }
4256   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4257 }
4258
4259 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4260    The information of the detected coding system is set in CODING.  */
4261
4262 void
4263 detect_coding (coding, src, src_bytes)
4264      struct coding_system *coding;
4265      const unsigned char *src;
4266      int src_bytes;
4267 {
4268   unsigned int idx;
4269   int skip, mask;
4270   Lisp_Object val;
4271
4272   val = Vcoding_category_list;
4273   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4274                              coding->src_multibyte);
4275   coding->heading_ascii = skip;
4276
4277   if (!mask) return;
4278
4279   /* We found a single coding system of the highest priority in MASK.  */
4280   idx = 0;
4281   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4282   if (! mask)
4283     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4284
4285   val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
4286
4287   if (coding->eol_type != CODING_EOL_UNDECIDED)
4288     {
4289       Lisp_Object tmp;
4290
4291       tmp = Fget (val, Qeol_type);
4292       if (VECTORP (tmp))
4293         val = XVECTOR (tmp)->contents[coding->eol_type];
4294     }
4295
4296   /* Setup this new coding system while preserving some slots.  */
4297   {
4298     int src_multibyte = coding->src_multibyte;
4299     int dst_multibyte = coding->dst_multibyte;
4300
4301     setup_coding_system (val, coding);
4302     coding->src_multibyte = src_multibyte;
4303     coding->dst_multibyte = dst_multibyte;
4304     coding->heading_ascii = skip;
4305   }
4306 }
4307
4308 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4309    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4310    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4311
4312    How many non-eol characters are at the head is returned as *SKIP.  */
4313
4314 #define MAX_EOL_CHECK_COUNT 3
4315
4316 static int
4317 detect_eol_type (source, src_bytes, skip)
4318      unsigned char *source;
4319      int src_bytes, *skip;
4320 {
4321   unsigned char *src = source, *src_end = src + src_bytes;
4322   unsigned char c;
4323   int total = 0;                /* How many end-of-lines are found so far.  */
4324   int eol_type = CODING_EOL_UNDECIDED;
4325   int this_eol_type;
4326
4327   *skip = 0;
4328
4329   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4330     {
4331       c = *src++;
4332       if (c == '\n' || c == '\r')
4333         {
4334           if (*skip == 0)
4335             *skip = src - 1 - source;
4336           total++;
4337           if (c == '\n')
4338             this_eol_type = CODING_EOL_LF;
4339           else if (src >= src_end || *src != '\n')
4340             this_eol_type = CODING_EOL_CR;
4341           else
4342             this_eol_type = CODING_EOL_CRLF, src++;
4343
4344           if (eol_type == CODING_EOL_UNDECIDED)
4345             /* This is the first end-of-line.  */
4346             eol_type = this_eol_type;
4347           else if (eol_type != this_eol_type)
4348             {
4349               /* The found type is different from what found before.  */
4350               eol_type = CODING_EOL_INCONSISTENT;
4351               break;
4352             }
4353         }
4354     }
4355
4356   if (*skip == 0)
4357     *skip = src_end - source;
4358   return eol_type;
4359 }
4360
4361 /* Like detect_eol_type, but detect EOL type in 2-octet
4362    big-endian/little-endian format for coding systems utf-16-be and
4363    utf-16-le.  */
4364
4365 static int
4366 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4367      unsigned char *source;
4368      int src_bytes, *skip, big_endian_p;
4369 {
4370   unsigned char *src = source, *src_end = src + src_bytes;
4371   unsigned int c1, c2;
4372   int total = 0;                /* How many end-of-lines are found so far.  */
4373   int eol_type = CODING_EOL_UNDECIDED;
4374   int this_eol_type;
4375   int msb, lsb;
4376
4377   if (big_endian_p)
4378     msb = 0, lsb = 1;
4379   else
4380     msb = 1, lsb = 0;
4381
4382   *skip = 0;
4383
4384   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4385     {
4386       c1 = (src[msb] << 8) | (src[lsb]);
4387       src += 2;
4388
4389       if (c1 == '\n' || c1 == '\r')
4390         {
4391           if (*skip == 0)
4392             *skip = src - 2 - source;
4393           total++;
4394           if (c1 == '\n')
4395             {
4396               this_eol_type = CODING_EOL_LF;
4397             }
4398           else
4399             {
4400               if ((src + 1) >= src_end)
4401                 {
4402                   this_eol_type = CODING_EOL_CR;
4403                 }
4404               else
4405                 {
4406                   c2 = (src[msb] << 8) | (src[lsb]);
4407                   if (c2 == '\n')
4408                     this_eol_type = CODING_EOL_CRLF, src += 2;
4409                   else
4410                     this_eol_type = CODING_EOL_CR;
4411                 }
4412             }
4413
4414           if (eol_type == CODING_EOL_UNDECIDED)
4415             /* This is the first end-of-line.  */
4416             eol_type = this_eol_type;
4417           else if (eol_type != this_eol_type)
4418             {
4419               /* The found type is different from what found before.  */
4420               eol_type = CODING_EOL_INCONSISTENT;
4421               break;
4422             }
4423         }
4424     }
4425
4426   if (*skip == 0)
4427     *skip = src_end - source;
4428   return eol_type;
4429 }
4430
4431 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4432    is encoded.  If it detects an appropriate format of end-of-line, it
4433    sets the information in *CODING.  */
4434
4435 void
4436 detect_eol (coding, src, src_bytes)
4437      struct coding_system *coding;
4438      const unsigned char *src;
4439      int src_bytes;
4440 {
4441   Lisp_Object val;
4442   int skip;
4443   int eol_type;
4444
4445   switch (coding->category_idx)
4446     {
4447     case CODING_CATEGORY_IDX_UTF_16_BE:
4448       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4449       break;
4450     case CODING_CATEGORY_IDX_UTF_16_LE:
4451       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4452       break;
4453     default:
4454       eol_type = detect_eol_type (src, src_bytes, &skip);
4455       break;
4456     }
4457
4458   if (coding->heading_ascii > skip)
4459     coding->heading_ascii = skip;
4460   else
4461     skip = coding->heading_ascii;
4462
4463   if (eol_type == CODING_EOL_UNDECIDED)
4464     return;
4465   if (eol_type == CODING_EOL_INCONSISTENT)
4466     {
4467 #if 0
4468       /* This code is suppressed until we find a better way to
4469          distinguish raw text file and binary file.  */
4470
4471       /* If we have already detected that the coding is raw-text, the
4472          coding should actually be no-conversion.  */
4473       if (coding->type == coding_type_raw_text)
4474         {
4475           setup_coding_system (Qno_conversion, coding);
4476           return;
4477         }
4478       /* Else, let's decode only text code anyway.  */
4479 #endif /* 0 */
4480       eol_type = CODING_EOL_LF;
4481     }
4482
4483   val = Fget (coding->symbol, Qeol_type);
4484   if (VECTORP (val) && XVECTOR (val)->size == 3)
4485     {
4486       int src_multibyte = coding->src_multibyte;
4487       int dst_multibyte = coding->dst_multibyte;
4488       struct composition_data *cmp_data = coding->cmp_data;
4489
4490       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4491       coding->src_multibyte = src_multibyte;
4492       coding->dst_multibyte = dst_multibyte;
4493       coding->heading_ascii = skip;
4494       coding->cmp_data = cmp_data;
4495     }
4496 }
4497
4498 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4499
4500 #define DECODING_BUFFER_MAG(coding)                     \
4501   (coding->type == coding_type_iso2022                  \
4502    ? 3                                                  \
4503    : (coding->type == coding_type_ccl                   \
4504       ? coding->spec.ccl.decoder.buf_magnification      \
4505       : 2))
4506
4507 /* Return maximum size (bytes) of a buffer enough for decoding
4508    SRC_BYTES of text encoded in CODING.  */
4509
4510 int
4511 decoding_buffer_size (coding, src_bytes)
4512      struct coding_system *coding;
4513      int src_bytes;
4514 {
4515   return (src_bytes * DECODING_BUFFER_MAG (coding)
4516           + CONVERSION_BUFFER_EXTRA_ROOM);
4517 }
4518
4519 /* Return maximum size (bytes) of a buffer enough for encoding
4520    SRC_BYTES of text to CODING.  */
4521
4522 int
4523 encoding_buffer_size (coding, src_bytes)
4524      struct coding_system *coding;
4525      int src_bytes;
4526 {
4527   int magnification;
4528
4529   if (coding->type == coding_type_ccl)
4530     {
4531       magnification = coding->spec.ccl.encoder.buf_magnification;
4532       if (coding->eol_type == CODING_EOL_CRLF)
4533         magnification *= 2;
4534     }
4535   else if (CODING_REQUIRE_ENCODING (coding))
4536     magnification = 3;
4537   else
4538     magnification = 1;
4539
4540   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4541 }
4542
4543 /* Working buffer for code conversion.  */
4544 struct conversion_buffer
4545 {
4546   int size;                     /* size of data.  */
4547   int on_stack;                 /* 1 if allocated by alloca.  */
4548   unsigned char *data;
4549 };
4550
4551 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4552 #define allocate_conversion_buffer(buf, len)            \
4553   do {                                                  \
4554     if (len < MAX_ALLOCA)                               \
4555       {                                                 \
4556         buf.data = (unsigned char *) alloca (len);      \
4557         buf.on_stack = 1;                               \
4558       }                                                 \
4559     else                                                \
4560       {                                                 \
4561         buf.data = (unsigned char *) xmalloc (len);     \
4562         buf.on_stack = 0;                               \
4563       }                                                 \
4564     buf.size = len;                                     \
4565   } while (0)
4566
4567 /* Double the allocated memory for *BUF.  */
4568 static void
4569 extend_conversion_buffer (buf)
4570      struct conversion_buffer *buf;
4571 {
4572   if (buf->on_stack)
4573     {
4574       unsigned char *save = buf->data;
4575       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4576       bcopy (save, buf->data, buf->size);
4577       buf->on_stack = 0;
4578     }
4579   else
4580     {
4581       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4582     }
4583   buf->size *= 2;
4584 }
4585
4586 /* Free the allocated memory for BUF if it is not on stack.  */
4587 static void
4588 free_conversion_buffer (buf)
4589      struct conversion_buffer *buf;
4590 {
4591   if (!buf->on_stack)
4592     xfree (buf->data);
4593 }
4594
4595 int
4596 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4597      struct coding_system *coding;
4598      unsigned char *source, *destination;
4599      int src_bytes, dst_bytes, encodep;
4600 {
4601   struct ccl_program *ccl
4602     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4603   unsigned char *dst = destination;
4604
4605   ccl->suppress_error = coding->suppress_error;
4606   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4607   if (encodep)
4608     {
4609       /* On encoding, EOL format is converted within ccl_driver.  For
4610          that, setup proper information in the structure CCL.  */
4611       ccl->eol_type = coding->eol_type;
4612       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4613         ccl->eol_type = CODING_EOL_LF;
4614       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4615       ccl->eight_bit_control = coding->dst_multibyte;
4616     }
4617   else
4618     ccl->eight_bit_control = 1;
4619   ccl->multibyte = coding->src_multibyte;
4620   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4621     {
4622       /* Move carryover bytes to DESTINATION.  */
4623       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4624       while (*p)
4625         *dst++ = *p++;
4626       coding->spec.ccl.eight_bit_carryover[0] = 0;
4627       if (dst_bytes)
4628         dst_bytes -= dst - destination;
4629     }
4630
4631   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4632                                   &(coding->consumed))
4633                       + dst - destination);
4634
4635   if (encodep)
4636     {
4637       coding->produced_char = coding->produced;
4638       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4639     }
4640   else if (!ccl->eight_bit_control)
4641     {
4642       /* The produced bytes forms a valid multibyte sequence. */
4643       coding->produced_char
4644         = multibyte_chars_in_text (destination, coding->produced);
4645       coding->spec.ccl.eight_bit_carryover[0] = 0;
4646     }
4647   else
4648     {
4649       /* On decoding, the destination should always multibyte.  But,
4650          CCL program might have been generated an invalid multibyte
4651          sequence.  Here we make such a sequence valid as
4652          multibyte.  */
4653       int bytes
4654         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4655
4656       if ((coding->consumed < src_bytes
4657            || !ccl->last_block)
4658           && coding->produced >= 1
4659           && destination[coding->produced - 1] >= 0x80)
4660         {
4661           /* We should not convert the tailing 8-bit codes to
4662              multibyte form even if they doesn't form a valid
4663              multibyte sequence.  They may form a valid sequence in
4664              the next call.  */
4665           int carryover = 0;
4666
4667           if (destination[coding->produced - 1] < 0xA0)
4668             carryover = 1;
4669           else if (coding->produced >= 2)
4670             {
4671               if (destination[coding->produced - 2] >= 0x80)
4672                 {
4673                   if (destination[coding->produced - 2] < 0xA0)
4674                     carryover = 2;
4675                   else if (coding->produced >= 3
4676                            && destination[coding->produced - 3] >= 0x80
4677                            && destination[coding->produced - 3] < 0xA0)
4678                     carryover = 3;
4679                 }
4680             }
4681           if (carryover > 0)
4682             {
4683               BCOPY_SHORT (destination + coding->produced - carryover,
4684                            coding->spec.ccl.eight_bit_carryover,
4685                            carryover);
4686               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4687               coding->produced -= carryover;
4688             }
4689         }
4690       coding->produced = str_as_multibyte (destination, bytes,
4691                                            coding->produced,
4692                                            &(coding->produced_char));
4693     }
4694
4695   switch (ccl->status)
4696     {
4697     case CCL_STAT_SUSPEND_BY_SRC:
4698       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4699       break;
4700     case CCL_STAT_SUSPEND_BY_DST:
4701       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4702       break;
4703     case CCL_STAT_QUIT:
4704     case CCL_STAT_INVALID_CMD:
4705       coding->result = CODING_FINISH_INTERRUPT;
4706       break;
4707     default:
4708       coding->result = CODING_FINISH_NORMAL;
4709       break;
4710     }
4711   return coding->result;
4712 }
4713
4714 /* Decode EOL format of the text at PTR of BYTES length destructively
4715    according to CODING->eol_type.  This is called after the CCL
4716    program produced a decoded text at PTR.  If we do CRLF->LF
4717    conversion, update CODING->produced and CODING->produced_char.  */
4718
4719 static void
4720 decode_eol_post_ccl (coding, ptr, bytes)
4721      struct coding_system *coding;
4722      unsigned char *ptr;
4723      int bytes;
4724 {
4725   Lisp_Object val, saved_coding_symbol;
4726   unsigned char *pend = ptr + bytes;
4727   int dummy;
4728
4729   /* Remember the current coding system symbol.  We set it back when
4730      an inconsistent EOL is found so that `last-coding-system-used' is
4731      set to the coding system that doesn't specify EOL conversion.  */
4732   saved_coding_symbol = coding->symbol;
4733
4734   coding->spec.ccl.cr_carryover = 0;
4735   if (coding->eol_type == CODING_EOL_UNDECIDED)
4736     {
4737       /* Here, to avoid the call of setup_coding_system, we directly
4738          call detect_eol_type.  */
4739       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4740       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4741         coding->eol_type = CODING_EOL_LF;
4742       if (coding->eol_type != CODING_EOL_UNDECIDED)
4743         {
4744           val = Fget (coding->symbol, Qeol_type);
4745           if (VECTORP (val) && XVECTOR (val)->size == 3)
4746             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4747         }
4748       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4749     }
4750
4751   if (coding->eol_type == CODING_EOL_LF
4752       || coding->eol_type == CODING_EOL_UNDECIDED)
4753     {
4754       /* We have nothing to do.  */
4755       ptr = pend;
4756     }
4757   else if (coding->eol_type == CODING_EOL_CRLF)
4758     {
4759       unsigned char *pstart = ptr, *p = ptr;
4760
4761       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4762           && *(pend - 1) == '\r')
4763         {
4764           /* If the last character is CR, we can't handle it here
4765              because LF will be in the not-yet-decoded source text.
4766              Record that the CR is not yet processed.  */
4767           coding->spec.ccl.cr_carryover = 1;
4768           coding->produced--;
4769           coding->produced_char--;
4770           pend--;
4771         }
4772       while (ptr < pend)
4773         {
4774           if (*ptr == '\r')
4775             {
4776               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4777                 {
4778                   *p++ = '\n';
4779                   ptr += 2;
4780                 }
4781               else
4782                 {
4783                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4784                     goto undo_eol_conversion;
4785                   *p++ = *ptr++;
4786                 }
4787             }
4788           else if (*ptr == '\n'
4789                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4790             goto undo_eol_conversion;
4791           else
4792             *p++ = *ptr++;
4793           continue;
4794
4795         undo_eol_conversion:
4796           /* We have faced with inconsistent EOL format at PTR.
4797              Convert all LFs before PTR back to CRLFs.  */
4798           for (p--, ptr--; p >= pstart; p--)
4799             {
4800               if (*p == '\n')
4801                 *ptr-- = '\n', *ptr-- = '\r';
4802               else
4803                 *ptr-- = *p;
4804             }
4805           /*  If carryover is recorded, cancel it because we don't
4806               convert CRLF anymore.  */
4807           if (coding->spec.ccl.cr_carryover)
4808             {
4809               coding->spec.ccl.cr_carryover = 0;
4810               coding->produced++;
4811               coding->produced_char++;
4812               pend++;
4813             }
4814           p = ptr = pend;
4815           coding->eol_type = CODING_EOL_LF;
4816           coding->symbol = saved_coding_symbol;
4817         }
4818       if (p < pend)
4819         {
4820           /* As each two-byte sequence CRLF was converted to LF, (PEND
4821              - P) is the number of deleted characters.  */
4822           coding->produced -= pend - p;
4823           coding->produced_char -= pend - p;
4824         }
4825     }
4826   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4827     {
4828       unsigned char *p = ptr;
4829
4830       for (; ptr < pend; ptr++)
4831         {
4832           if (*ptr == '\r')
4833             *ptr = '\n';
4834           else if (*ptr == '\n'
4835                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4836             {
4837               for (; p < ptr; p++)
4838                 {
4839                   if (*p == '\n')
4840                     *p = '\r';
4841                 }
4842               ptr = pend;
4843               coding->eol_type = CODING_EOL_LF;
4844               coding->symbol = saved_coding_symbol;
4845             }
4846         }
4847     }
4848 }
4849
4850 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4851    decoding, it may detect coding system and format of end-of-line if
4852    those are not yet decided.  The source should be unibyte, the
4853    result is multibyte if CODING->dst_multibyte is nonzero, else
4854    unibyte.  */
4855
4856 int
4857 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4858      struct coding_system *coding;
4859      const unsigned char *source;
4860      unsigned char *destination;
4861      int src_bytes, dst_bytes;
4862 {
4863   int extra = 0;
4864
4865   if (coding->type == coding_type_undecided)
4866     detect_coding (coding, source, src_bytes);
4867
4868   if (coding->eol_type == CODING_EOL_UNDECIDED
4869       && coding->type != coding_type_ccl)
4870     {
4871       detect_eol (coding, source, src_bytes);
4872       /* We had better recover the original eol format if we
4873          encounter an inconsistent eol format while decoding.  */
4874       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4875     }
4876
4877   coding->produced = coding->produced_char = 0;
4878   coding->consumed = coding->consumed_char = 0;
4879   coding->errors = 0;
4880   coding->result = CODING_FINISH_NORMAL;
4881
4882   switch (coding->type)
4883     {
4884     case coding_type_sjis:
4885       decode_coding_sjis_big5 (coding, source, destination,
4886                                src_bytes, dst_bytes, 1);
4887       break;
4888
4889     case coding_type_iso2022:
4890       decode_coding_iso2022 (coding, source, destination,
4891                              src_bytes, dst_bytes);
4892       break;
4893
4894     case coding_type_big5:
4895       decode_coding_sjis_big5 (coding, source, destination,
4896                                src_bytes, dst_bytes, 0);
4897       break;
4898
4899     case coding_type_emacs_mule:
4900       decode_coding_emacs_mule (coding, source, destination,
4901                                 src_bytes, dst_bytes);
4902       break;
4903
4904     case coding_type_ccl:
4905       if (coding->spec.ccl.cr_carryover)
4906         {
4907           /* Put the CR which was not processed by the previous call
4908              of decode_eol_post_ccl in DESTINATION.  It will be
4909              decoded together with the following LF by the call to
4910              decode_eol_post_ccl below.  */
4911           *destination = '\r';
4912           coding->produced++;
4913           coding->produced_char++;
4914           dst_bytes--;
4915           extra = coding->spec.ccl.cr_carryover;
4916         }
4917       ccl_coding_driver (coding, source, destination + extra,
4918                          src_bytes, dst_bytes, 0);
4919       if (coding->eol_type != CODING_EOL_LF)
4920         {
4921           coding->produced += extra;
4922           coding->produced_char += extra;
4923           decode_eol_post_ccl (coding, destination, coding->produced);
4924         }
4925       break;
4926
4927     default:
4928       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4929     }
4930
4931   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4932       && coding->mode & CODING_MODE_LAST_BLOCK
4933       && coding->consumed == src_bytes)
4934     coding->result = CODING_FINISH_NORMAL;
4935
4936   if (coding->mode & CODING_MODE_LAST_BLOCK
4937       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4938     {
4939       const unsigned char *src = source + coding->consumed;
4940       unsigned char *dst = destination + coding->produced;
4941
4942       src_bytes -= coding->consumed;
4943       coding->errors++;
4944       if (COMPOSING_P (coding))
4945         DECODE_COMPOSITION_END ('1');
4946       while (src_bytes--)
4947         {
4948           int c = *src++;
4949           dst += CHAR_STRING (c, dst);
4950           coding->produced_char++;
4951         }
4952       coding->consumed = coding->consumed_char = src - source;
4953       coding->produced = dst - destination;
4954       coding->result = CODING_FINISH_NORMAL;
4955     }
4956
4957   if (!coding->dst_multibyte)
4958     {
4959       coding->produced = str_as_unibyte (destination, coding->produced);
4960       coding->produced_char = coding->produced;
4961     }
4962
4963   return coding->result;
4964 }
4965
4966 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4967    multibyteness of the source is CODING->src_multibyte, the
4968    multibyteness of the result is always unibyte.  */
4969
4970 int
4971 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4972      struct coding_system *coding;
4973      const unsigned char *source;
4974      unsigned char *destination;
4975      int src_bytes, dst_bytes;
4976 {
4977   coding->produced = coding->produced_char = 0;
4978   coding->consumed = coding->consumed_char = 0;
4979   coding->errors = 0;
4980   coding->result = CODING_FINISH_NORMAL;
4981
4982   switch (coding->type)
4983     {
4984     case coding_type_sjis:
4985       encode_coding_sjis_big5 (coding, source, destination,
4986                                src_bytes, dst_bytes, 1);
4987       break;
4988
4989     case coding_type_iso2022:
4990       encode_coding_iso2022 (coding, source, destination,
4991                              src_bytes, dst_bytes);
4992       break;
4993
4994     case coding_type_big5:
4995       encode_coding_sjis_big5 (coding, source, destination,
4996                                src_bytes, dst_bytes, 0);
4997       break;
4998
4999     case coding_type_emacs_mule:
5000       encode_coding_emacs_mule (coding, source, destination,
5001                                 src_bytes, dst_bytes);
5002       break;
5003
5004     case coding_type_ccl:
5005       ccl_coding_driver (coding, source, destination,
5006                          src_bytes, dst_bytes, 1);
5007       break;
5008
5009     default:
5010       encode_eol (coding, source, destination, src_bytes, dst_bytes);
5011     }
5012
5013   if (coding->mode & CODING_MODE_LAST_BLOCK
5014       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
5015     {
5016       const unsigned char *src = source + coding->consumed;
5017       unsigned char *dst = destination + coding->produced;
5018
5019       if (coding->type == coding_type_iso2022)
5020         ENCODE_RESET_PLANE_AND_REGISTER;
5021       if (COMPOSING_P (coding))
5022         *dst++ = ISO_CODE_ESC, *dst++ = '1';
5023       if (coding->consumed < src_bytes)
5024         {
5025           int len = src_bytes - coding->consumed;
5026
5027           BCOPY_SHORT (src, dst, len);
5028           if (coding->src_multibyte)
5029             len = str_as_unibyte (dst, len);
5030           dst += len;
5031           coding->consumed = src_bytes;
5032         }
5033       coding->produced = coding->produced_char = dst - destination;
5034       coding->result = CODING_FINISH_NORMAL;
5035     }
5036
5037   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
5038       && coding->consumed == src_bytes)
5039     coding->result = CODING_FINISH_NORMAL;
5040
5041   return coding->result;
5042 }
5043
5044 /* Scan text in the region between *BEG and *END (byte positions),
5045    skip characters which we don't have to decode by coding system
5046    CODING at the head and tail, then set *BEG and *END to the region
5047    of the text we actually have to convert.  The caller should move
5048    the gap out of the region in advance if the region is from a
5049    buffer.
5050
5051    If STR is not NULL, *BEG and *END are indices into STR.  */
5052
5053 static void
5054 shrink_decoding_region (beg, end, coding, str)
5055      int *beg, *end;
5056      struct coding_system *coding;
5057      unsigned char *str;
5058 {
5059   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
5060   int eol_conversion;
5061   Lisp_Object translation_table;
5062
5063   if (coding->type == coding_type_ccl
5064       || coding->type == coding_type_undecided
5065       || coding->eol_type != CODING_EOL_LF
5066       || !NILP (coding->post_read_conversion)
5067       || coding->composing != COMPOSITION_DISABLED)
5068     {
5069       /* We can't skip any data.  */
5070       return;
5071     }
5072   if (coding->type == coding_type_no_conversion
5073       || coding->type == coding_type_raw_text
5074       || coding->type == coding_type_emacs_mule)
5075     {
5076       /* We need no conversion, but don't have to skip any data here.
5077          Decoding routine handles them effectively anyway.  */
5078       return;
5079     }
5080
5081   translation_table = coding->translation_table_for_decode;
5082   if (NILP (translation_table) && !NILP (Venable_character_translation))
5083     translation_table = Vstandard_translation_table_for_decode;
5084   if (CHAR_TABLE_P (translation_table))
5085     {
5086       int i;
5087       for (i = 0; i < 128; i++)
5088         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5089           break;
5090       if (i < 128)
5091         /* Some ASCII character should be translated.  We give up
5092            shrinking.  */
5093         return;
5094     }
5095
5096   if (coding->heading_ascii >= 0)
5097     /* Detection routine has already found how much we can skip at the
5098        head.  */
5099     *beg += coding->heading_ascii;
5100
5101   if (str)
5102     {
5103       begp_orig = begp = str + *beg;
5104       endp_orig = endp = str + *end;
5105     }
5106   else
5107     {
5108       begp_orig = begp = BYTE_POS_ADDR (*beg);
5109       endp_orig = endp = begp + *end - *beg;
5110     }
5111
5112   eol_conversion = (coding->eol_type == CODING_EOL_CR
5113                     || coding->eol_type == CODING_EOL_CRLF);
5114
5115   switch (coding->type)
5116     {
5117     case coding_type_sjis:
5118     case coding_type_big5:
5119       /* We can skip all ASCII characters at the head.  */
5120       if (coding->heading_ascii < 0)
5121         {
5122           if (eol_conversion)
5123             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
5124           else
5125             while (begp < endp && *begp < 0x80) begp++;
5126         }
5127       /* We can skip all ASCII characters at the tail except for the
5128          second byte of SJIS or BIG5 code.  */
5129       if (eol_conversion)
5130         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
5131       else
5132         while (begp < endp && endp[-1] < 0x80) endp--;
5133       /* Do not consider LF as ascii if preceded by CR, since that
5134          confuses eol decoding. */
5135       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5136         endp++;
5137       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
5138         endp++;
5139       break;
5140
5141     case coding_type_iso2022:
5142       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5143         /* We can't skip any data.  */
5144         break;
5145       if (coding->heading_ascii < 0)
5146         {
5147           /* We can skip all ASCII characters at the head except for a
5148              few control codes.  */
5149           while (begp < endp && (c = *begp) < 0x80
5150                  && c != ISO_CODE_CR && c != ISO_CODE_SO
5151                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
5152                  && (!eol_conversion || c != ISO_CODE_LF))
5153             begp++;
5154         }
5155       switch (coding->category_idx)
5156         {
5157         case CODING_CATEGORY_IDX_ISO_8_1:
5158         case CODING_CATEGORY_IDX_ISO_8_2:
5159           /* We can skip all ASCII characters at the tail.  */
5160           if (eol_conversion)
5161             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
5162           else
5163             while (begp < endp && endp[-1] < 0x80) endp--;
5164           /* Do not consider LF as ascii if preceded by CR, since that
5165              confuses eol decoding. */
5166           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5167             endp++;
5168           break;
5169
5170         case CODING_CATEGORY_IDX_ISO_7:
5171         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
5172           {
5173             /* We can skip all characters at the tail except for 8-bit
5174                codes and ESC and the following 2-byte at the tail.  */
5175             unsigned char *eight_bit = NULL;
5176
5177             if (eol_conversion)
5178               while (begp < endp
5179                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
5180                 {
5181                   if (!eight_bit && c & 0x80) eight_bit = endp;
5182                   endp--;
5183                 }
5184             else
5185               while (begp < endp
5186                      && (c = endp[-1]) != ISO_CODE_ESC)
5187                 {
5188                   if (!eight_bit && c & 0x80) eight_bit = endp;
5189                   endp--;
5190                 }
5191             /* Do not consider LF as ascii if preceded by CR, since that
5192                confuses eol decoding. */
5193             if (begp < endp && endp < endp_orig
5194                 && endp[-1] == '\r' && endp[0] == '\n')
5195               endp++;
5196             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5197               {
5198                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5199                   /* This is an ASCII designation sequence.  We can
5200                      surely skip the tail.  But, if we have
5201                      encountered an 8-bit code, skip only the codes
5202                      after that.  */
5203                   endp = eight_bit ? eight_bit : endp + 2;
5204                 else
5205                   /* Hmmm, we can't skip the tail.  */
5206                   endp = endp_orig;
5207               }
5208             else if (eight_bit)
5209               endp = eight_bit;
5210           }
5211         }
5212       break;
5213
5214     default:
5215       abort ();
5216     }
5217   *beg += begp - begp_orig;
5218   *end += endp - endp_orig;
5219   return;
5220 }
5221
5222 /* Like shrink_decoding_region but for encoding.  */
5223
5224 static void
5225 shrink_encoding_region (beg, end, coding, str)
5226      int *beg, *end;
5227      struct coding_system *coding;
5228      unsigned char *str;
5229 {
5230   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5231   int eol_conversion;
5232   Lisp_Object translation_table;
5233
5234   if (coding->type == coding_type_ccl
5235       || coding->eol_type == CODING_EOL_CRLF
5236       || coding->eol_type == CODING_EOL_CR
5237       || (coding->cmp_data && coding->cmp_data->used > 0))
5238     {
5239       /* We can't skip any data.  */
5240       return;
5241     }
5242   if (coding->type == coding_type_no_conversion
5243       || coding->type == coding_type_raw_text
5244       || coding->type == coding_type_emacs_mule
5245       || coding->type == coding_type_undecided)
5246     {
5247       /* We need no conversion, but don't have to skip any data here.
5248          Encoding routine handles them effectively anyway.  */
5249       return;
5250     }
5251
5252   translation_table = coding->translation_table_for_encode;
5253   if (NILP (translation_table) && !NILP (Venable_character_translation))
5254     translation_table = Vstandard_translation_table_for_encode;
5255   if (CHAR_TABLE_P (translation_table))
5256     {
5257       int i;
5258       for (i = 0; i < 128; i++)
5259         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5260           break;
5261       if (i < 128)
5262         /* Some ASCII character should be translated.  We give up
5263            shrinking.  */
5264         return;
5265     }
5266
5267   if (str)
5268     {
5269       begp_orig = begp = str + *beg;
5270       endp_orig = endp = str + *end;
5271     }
5272   else
5273     {
5274       begp_orig = begp = BYTE_POS_ADDR (*beg);
5275       endp_orig = endp = begp + *end - *beg;
5276     }
5277
5278   eol_conversion = (coding->eol_type == CODING_EOL_CR
5279                     || coding->eol_type == CODING_EOL_CRLF);
5280
5281   /* Here, we don't have to check coding->pre_write_conversion because
5282      the caller is expected to have handled it already.  */
5283   switch (coding->type)
5284     {
5285     case coding_type_iso2022:
5286       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5287         /* We can't skip any data.  */
5288         break;
5289       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5290         {
5291           unsigned char *bol = begp;
5292           while (begp < endp && *begp < 0x80)
5293             {
5294               begp++;
5295               if (begp[-1] == '\n')
5296                 bol = begp;
5297             }
5298           begp = bol;
5299           goto label_skip_tail;
5300         }
5301       /* fall down ... */
5302
5303     case coding_type_sjis:
5304     case coding_type_big5:
5305       /* We can skip all ASCII characters at the head and tail.  */
5306       if (eol_conversion)
5307         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5308       else
5309         while (begp < endp && *begp < 0x80) begp++;
5310     label_skip_tail:
5311       if (eol_conversion)
5312         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5313       else
5314         while (begp < endp && *(endp - 1) < 0x80) endp--;
5315       break;
5316
5317     default:
5318       abort ();
5319     }
5320
5321   *beg += begp - begp_orig;
5322   *end += endp - endp_orig;
5323   return;
5324 }
5325
5326 /* As shrinking conversion region requires some overhead, we don't try
5327    shrinking if the length of conversion region is less than this
5328    value.  */
5329 static int shrink_conversion_region_threshhold = 1024;
5330
5331 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5332   do {                                                                  \
5333     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5334       {                                                                 \
5335         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5336         else shrink_decoding_region (beg, end, coding, str);            \
5337       }                                                                 \
5338   } while (0)
5339
5340 static Lisp_Object
5341 code_convert_region_unwind (arg)
5342      Lisp_Object arg;
5343 {
5344   inhibit_pre_post_conversion = 0;
5345   Vlast_coding_system_used = arg;
5346   return Qnil;
5347 }
5348
5349 /* Store information about all compositions in the range FROM and TO
5350    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5351    buffer or a string, defaults to the current buffer.  */
5352
5353 void
5354 coding_save_composition (coding, from, to, obj)
5355      struct coding_system *coding;
5356      int from, to;
5357      Lisp_Object obj;
5358 {
5359   Lisp_Object prop;
5360   int start, end;
5361
5362   if (coding->composing == COMPOSITION_DISABLED)
5363     return;
5364   if (!coding->cmp_data)
5365     coding_allocate_composition_data (coding, from);
5366   if (!find_composition (from, to, &start, &end, &prop, obj)
5367       || end > to)
5368     return;
5369   if (start < from
5370       && (!find_composition (end, to, &start, &end, &prop, obj)
5371           || end > to))
5372     return;
5373   coding->composing = COMPOSITION_NO;
5374   do
5375     {
5376       if (COMPOSITION_VALID_P (start, end, prop))
5377         {
5378           enum composition_method method = COMPOSITION_METHOD (prop);
5379           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5380               >= COMPOSITION_DATA_SIZE)
5381             coding_allocate_composition_data (coding, from);
5382           /* For relative composition, we remember start and end
5383              positions, for the other compositions, we also remember
5384              components.  */
5385           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5386           if (method != COMPOSITION_RELATIVE)
5387             {
5388               /* We must store a*/
5389               Lisp_Object val, ch;
5390
5391               val = COMPOSITION_COMPONENTS (prop);
5392               if (CONSP (val))
5393                 while (CONSP (val))
5394                   {
5395                     ch = XCAR (val), val = XCDR (val);
5396                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5397                   }
5398               else if (VECTORP (val) || STRINGP (val))
5399                 {
5400                   int len = (VECTORP (val)
5401                              ? XVECTOR (val)->size : SCHARS (val));
5402                   int i;
5403                   for (i = 0; i < len; i++)
5404                     {
5405                       ch = (STRINGP (val)
5406                             ? Faref (val, make_number (i))
5407                             : XVECTOR (val)->contents[i]);
5408                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5409                     }
5410                 }
5411               else              /* INTEGERP (val) */
5412                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5413             }
5414           CODING_ADD_COMPOSITION_END (coding, end - from);
5415         }
5416       start = end;
5417     }
5418   while (start < to
5419          && find_composition (start, to, &start, &end, &prop, obj)
5420          && end <= to);
5421
5422   /* Make coding->cmp_data point to the first memory block.  */
5423   while (coding->cmp_data->prev)
5424     coding->cmp_data = coding->cmp_data->prev;
5425   coding->cmp_data_start = 0;
5426 }
5427
5428 /* Reflect the saved information about compositions to OBJ.
5429    CODING->cmp_data points to a memory block for the information.  OBJ
5430    is a buffer or a string, defaults to the current buffer.  */
5431
5432 void
5433 coding_restore_composition (coding, obj)
5434      struct coding_system *coding;
5435      Lisp_Object obj;
5436 {
5437   struct composition_data *cmp_data = coding->cmp_data;
5438
5439   if (!cmp_data)
5440     return;
5441
5442   while (cmp_data->prev)
5443     cmp_data = cmp_data->prev;
5444
5445   while (cmp_data)
5446     {
5447       int i;
5448
5449       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5450            i += cmp_data->data[i])
5451         {
5452           int *data = cmp_data->data + i;
5453           enum composition_method method = (enum composition_method) data[3];
5454           Lisp_Object components;
5455
5456           if (data[0] < 0 || i + data[0] > cmp_data->used)
5457             /* Invalid composition data.  */
5458             break;
5459
5460           if (method == COMPOSITION_RELATIVE)
5461             components = Qnil;
5462           else
5463             {
5464               int len = data[0] - 4, j;
5465               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5466
5467               if (method == COMPOSITION_WITH_RULE_ALTCHARS
5468                   && len % 2 == 0)
5469                 len --;
5470               if (len < 1)
5471                 /* Invalid composition data.  */
5472                 break;
5473               for (j = 0; j < len; j++)
5474                 args[j] = make_number (data[4 + j]);
5475               components = (method == COMPOSITION_WITH_ALTCHARS
5476                             ? Fstring (len, args)
5477                             : Fvector (len, args));
5478             }
5479           compose_text (data[1], data[2], components, Qnil, obj);
5480         }
5481       cmp_data = cmp_data->next;
5482     }
5483 }
5484
5485 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5486    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5487    coding system CODING, and return the status code of code conversion
5488    (currently, this value has no meaning).
5489
5490    How many characters (and bytes) are converted to how many
5491    characters (and bytes) are recorded in members of the structure
5492    CODING.
5493
5494    If REPLACE is nonzero, we do various things as if the original text
5495    is deleted and a new text is inserted.  See the comments in
5496    replace_range (insdel.c) to know what we are doing.
5497
5498    If REPLACE is zero, it is assumed that the source text is unibyte.
5499    Otherwise, it is assumed that the source text is multibyte.  */
5500
5501 int
5502 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5503      int from, from_byte, to, to_byte, encodep, replace;
5504      struct coding_system *coding;
5505 {
5506   int len = to - from, len_byte = to_byte - from_byte;
5507   int nchars_del = 0, nbytes_del = 0;
5508   int require, inserted, inserted_byte;
5509   int head_skip, tail_skip, total_skip = 0;
5510   Lisp_Object saved_coding_symbol;
5511   int first = 1;
5512   unsigned char *src, *dst;
5513   Lisp_Object deletion;
5514   int orig_point = PT, orig_len = len;
5515   int prev_Z;
5516   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5517
5518   deletion = Qnil;
5519   saved_coding_symbol = coding->symbol;
5520
5521   if (from < PT && PT < to)
5522     {
5523       TEMP_SET_PT_BOTH (from, from_byte);
5524       orig_point = from;
5525     }
5526
5527   if (replace)
5528     {
5529       int saved_from = from;
5530       int saved_inhibit_modification_hooks;
5531
5532       prepare_to_modify_buffer (from, to, &from);
5533       if (saved_from != from)
5534         {
5535           to = from + len;
5536           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5537           len_byte = to_byte - from_byte;
5538         }
5539
5540       /* The code conversion routine can not preserve text properties
5541          for now.  So, we must remove all text properties in the
5542          region.  Here, we must suppress all modification hooks.  */
5543       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5544       inhibit_modification_hooks = 1;
5545       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5546       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5547     }
5548
5549   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5550     {
5551       /* We must detect encoding of text and eol format.  */
5552
5553       if (from < GPT && to > GPT)
5554         move_gap_both (from, from_byte);
5555       if (coding->type == coding_type_undecided)
5556         {
5557           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5558           if (coding->type == coding_type_undecided)
5559             {
5560               /* It seems that the text contains only ASCII, but we
5561                  should not leave it undecided because the deeper
5562                  decoding routine (decode_coding) tries to detect the
5563                  encodings again in vain.  */
5564               coding->type = coding_type_emacs_mule;
5565               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5566               /* As emacs-mule decoder will handle composition, we
5567                  need this setting to allocate coding->cmp_data
5568                  later.  */
5569               coding->composing = COMPOSITION_NO;
5570             }
5571         }
5572       if (coding->eol_type == CODING_EOL_UNDECIDED
5573           && coding->type != coding_type_ccl)
5574         {
5575           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5576           if (coding->eol_type == CODING_EOL_UNDECIDED)
5577             coding->eol_type = CODING_EOL_LF;
5578           /* We had better recover the original eol format if we
5579              encounter an inconsistent eol format while decoding.  */
5580           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5581         }
5582     }
5583
5584   /* Now we convert the text.  */
5585
5586   /* For encoding, we must process pre-write-conversion in advance.  */
5587   if (! inhibit_pre_post_conversion
5588       && encodep
5589       && SYMBOLP (coding->pre_write_conversion)
5590       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5591     {
5592       /* The function in pre-write-conversion may put a new text in a
5593          new buffer.  */
5594       struct buffer *prev = current_buffer;
5595       Lisp_Object new;
5596
5597       record_unwind_protect (code_convert_region_unwind,
5598                              Vlast_coding_system_used);
5599       /* We should not call any more pre-write/post-read-conversion
5600          functions while this pre-write-conversion is running.  */
5601       inhibit_pre_post_conversion = 1;
5602       call2 (coding->pre_write_conversion,
5603              make_number (from), make_number (to));
5604       inhibit_pre_post_conversion = 0;
5605       /* Discard the unwind protect.  */
5606       specpdl_ptr--;
5607
5608       if (current_buffer != prev)
5609         {
5610           len = ZV - BEGV;
5611           new = Fcurrent_buffer ();
5612           set_buffer_internal_1 (prev);
5613           del_range_2 (from, from_byte, to, to_byte, 0);
5614           TEMP_SET_PT_BOTH (from, from_byte);
5615           insert_from_buffer (XBUFFER (new), 1, len, 0);
5616           Fkill_buffer (new);
5617           if (orig_point >= to)
5618             orig_point += len - orig_len;
5619           else if (orig_point > from)
5620             orig_point = from;
5621           orig_len = len;
5622           to = from + len;
5623           from_byte = CHAR_TO_BYTE (from);
5624           to_byte = CHAR_TO_BYTE (to);
5625           len_byte = to_byte - from_byte;
5626           TEMP_SET_PT_BOTH (from, from_byte);
5627         }
5628     }
5629
5630   if (replace)
5631     {
5632       if (! EQ (current_buffer->undo_list, Qt))
5633         deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5634       else
5635         {
5636           nchars_del = to - from;
5637           nbytes_del = to_byte - from_byte;
5638         }
5639     }
5640
5641   if (coding->composing != COMPOSITION_DISABLED)
5642     {
5643       if (encodep)
5644         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5645       else
5646         coding_allocate_composition_data (coding, from);
5647     }
5648
5649   /* Try to skip the heading and tailing ASCIIs.  */
5650   if (coding->type != coding_type_ccl)
5651     {
5652       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5653
5654       if (from < GPT && GPT < to)
5655         move_gap_both (from, from_byte);
5656       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5657       if (from_byte == to_byte
5658           && (encodep || NILP (coding->post_read_conversion))
5659           && ! CODING_REQUIRE_FLUSHING (coding))
5660         {
5661           coding->produced = len_byte;
5662           coding->produced_char = len;
5663           if (!replace)
5664             /* We must record and adjust for this new text now.  */
5665             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5666           return 0;
5667         }
5668
5669       head_skip = from_byte - from_byte_orig;
5670       tail_skip = to_byte_orig - to_byte;
5671       total_skip = head_skip + tail_skip;
5672       from += head_skip;
5673       to -= tail_skip;
5674       len -= total_skip; len_byte -= total_skip;
5675     }
5676
5677   /* For conversion, we must put the gap before the text in addition to
5678      making the gap larger for efficient decoding.  The required gap
5679      size starts from 2000 which is the magic number used in make_gap.
5680      But, after one batch of conversion, it will be incremented if we
5681      find that it is not enough .  */
5682   require = 2000;
5683
5684   if (GAP_SIZE  < require)
5685     make_gap (require - GAP_SIZE);
5686   move_gap_both (from, from_byte);
5687
5688   inserted = inserted_byte = 0;
5689
5690   GAP_SIZE += len_byte;
5691   ZV -= len;
5692   Z -= len;
5693   ZV_BYTE -= len_byte;
5694   Z_BYTE -= len_byte;
5695
5696   if (GPT - BEG < BEG_UNCHANGED)
5697     BEG_UNCHANGED = GPT - BEG;
5698   if (Z - GPT < END_UNCHANGED)
5699     END_UNCHANGED = Z - GPT;
5700
5701   if (!encodep && coding->src_multibyte)
5702     {
5703       /* Decoding routines expects that the source text is unibyte.
5704          We must convert 8-bit characters of multibyte form to
5705          unibyte.  */
5706       int len_byte_orig = len_byte;
5707       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5708       if (len_byte < len_byte_orig)
5709         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5710                     len_byte);
5711       coding->src_multibyte = 0;
5712     }
5713
5714   for (;;)
5715     {
5716       int result;
5717
5718       /* The buffer memory is now:
5719          +--------+converted-text+---------+-------original-text-------+---+
5720          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5721                   |<---------------------- GAP ----------------------->|  */
5722       src = GAP_END_ADDR - len_byte;
5723       dst = GPT_ADDR + inserted_byte;
5724
5725       if (encodep)
5726         result = encode_coding (coding, src, dst, len_byte, 0);
5727       else
5728         {
5729           if (coding->composing != COMPOSITION_DISABLED)
5730             coding->cmp_data->char_offset = from + inserted;
5731           result = decode_coding (coding, src, dst, len_byte, 0);
5732         }
5733
5734       /* The buffer memory is now:
5735          +--------+-------converted-text----+--+------original-text----+---+
5736          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5737                   |<---------------------- GAP ----------------------->|  */
5738
5739       inserted += coding->produced_char;
5740       inserted_byte += coding->produced;
5741       len_byte -= coding->consumed;
5742
5743       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5744         {
5745           coding_allocate_composition_data (coding, from + inserted);
5746           continue;
5747         }
5748
5749       src += coding->consumed;
5750       dst += coding->produced;
5751
5752       if (result == CODING_FINISH_NORMAL)
5753         {
5754           src += len_byte;
5755           break;
5756         }
5757       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5758         {
5759           unsigned char *pend = dst, *p = pend - inserted_byte;
5760           Lisp_Object eol_type;
5761
5762           /* Encode LFs back to the original eol format (CR or CRLF).  */
5763           if (coding->eol_type == CODING_EOL_CR)
5764             {
5765               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5766             }
5767           else
5768             {
5769               int count = 0;
5770
5771               while (p < pend) if (*p++ == '\n') count++;
5772               if (src - dst < count)
5773                 {
5774                   /* We don't have sufficient room for encoding LFs
5775                      back to CRLF.  We must record converted and
5776                      not-yet-converted text back to the buffer
5777                      content, enlarge the gap, then record them out of
5778                      the buffer contents again.  */
5779                   int add = len_byte + inserted_byte;
5780
5781                   GAP_SIZE -= add;
5782                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5783                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5784                   make_gap (count - GAP_SIZE);
5785                   GAP_SIZE += add;
5786                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5787                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5788                   /* Don't forget to update SRC, DST, and PEND.  */
5789                   src = GAP_END_ADDR - len_byte;
5790                   dst = GPT_ADDR + inserted_byte;
5791                   pend = dst;
5792                 }
5793               inserted += count;
5794               inserted_byte += count;
5795               coding->produced += count;
5796               p = dst = pend + count;
5797               while (count)
5798                 {
5799                   *--p = *--pend;
5800                   if (*p == '\n') count--, *--p = '\r';
5801                 }
5802             }
5803
5804           /* Suppress eol-format conversion in the further conversion.  */
5805           coding->eol_type = CODING_EOL_LF;
5806
5807           /* Set the coding system symbol to that for Unix-like EOL.  */
5808           eol_type = Fget (saved_coding_symbol, Qeol_type);
5809           if (VECTORP (eol_type)
5810               && XVECTOR (eol_type)->size == 3
5811               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5812             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5813           else
5814             coding->symbol = saved_coding_symbol;
5815
5816           continue;
5817         }
5818       if (len_byte <= 0)
5819         {
5820           if (coding->type != coding_type_ccl
5821               || coding->mode & CODING_MODE_LAST_BLOCK)
5822             break;
5823           coding->mode |= CODING_MODE_LAST_BLOCK;
5824           continue;
5825         }
5826       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5827         {
5828           /* The source text ends in invalid codes.  Let's just
5829              make them valid buffer contents, and finish conversion.  */
5830           if (multibyte_p)
5831             {
5832               unsigned char *start = dst;
5833
5834               inserted += len_byte;
5835               while (len_byte--)
5836                 {
5837                   int c = *src++;
5838                   dst += CHAR_STRING (c, dst);
5839                 }
5840
5841               inserted_byte += dst - start;
5842             }
5843           else
5844             {
5845               inserted += len_byte;
5846               inserted_byte += len_byte;
5847               while (len_byte--)
5848                 *dst++ = *src++;
5849             }
5850           break;
5851         }
5852       if (result == CODING_FINISH_INTERRUPT)
5853         {
5854           /* The conversion procedure was interrupted by a user.  */
5855           break;
5856         }
5857       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5858       if (coding->consumed < 1)
5859         {
5860           /* It's quite strange to require more memory without
5861              consuming any bytes.  Perhaps CCL program bug.  */
5862           break;
5863         }
5864       if (first)
5865         {
5866           /* We have just done the first batch of conversion which was
5867              stopped because of insufficient gap.  Let's reconsider the
5868              required gap size (i.e. SRT - DST) now.
5869
5870              We have converted ORIG bytes (== coding->consumed) into
5871              NEW bytes (coding->produced).  To convert the remaining
5872              LEN bytes, we may need REQUIRE bytes of gap, where:
5873                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5874                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5875              Here, we are sure that NEW >= ORIG.  */
5876           float ratio;
5877
5878           if (coding->produced <= coding->consumed)
5879             {
5880               /* This happens because of CCL-based coding system with
5881                  eol-type CRLF.  */
5882               require = 0;
5883             }
5884           else
5885             {
5886               ratio = (coding->produced - coding->consumed) / coding->consumed;
5887               require = len_byte * ratio;
5888             }
5889           first = 0;
5890         }
5891       if ((src - dst) < (require + 2000))
5892         {
5893           /* See the comment above the previous call of make_gap.  */
5894           int add = len_byte + inserted_byte;
5895
5896           GAP_SIZE -= add;
5897           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5898           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5899           make_gap (require + 2000);
5900           GAP_SIZE += add;
5901           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5902           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5903         }
5904     }
5905   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5906
5907   if (encodep && coding->dst_multibyte)
5908     {
5909       /* The output is unibyte.  We must convert 8-bit characters to
5910          multibyte form.  */
5911       if (inserted_byte * 2 > GAP_SIZE)
5912         {
5913           GAP_SIZE -= inserted_byte;
5914           ZV += inserted_byte; Z += inserted_byte;
5915           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5916           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5917           make_gap (inserted_byte - GAP_SIZE);
5918           GAP_SIZE += inserted_byte;
5919           ZV -= inserted_byte; Z -= inserted_byte;
5920           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5921           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5922         }
5923       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5924     }
5925
5926   /* If we shrank the conversion area, adjust it now.  */
5927   if (total_skip > 0)
5928     {
5929       if (tail_skip > 0)
5930         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5931       inserted += total_skip; inserted_byte += total_skip;
5932       GAP_SIZE += total_skip;
5933       GPT -= head_skip; GPT_BYTE -= head_skip;
5934       ZV -= total_skip; ZV_BYTE -= total_skip;
5935       Z -= total_skip; Z_BYTE -= total_skip;
5936       from -= head_skip; from_byte -= head_skip;
5937       to += tail_skip; to_byte += tail_skip;
5938     }
5939
5940   prev_Z = Z;
5941   if (! EQ (current_buffer->undo_list, Qt))
5942     adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5943   else
5944     adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
5945                                  inserted, inserted_byte);
5946   inserted = Z - prev_Z;
5947
5948   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5949     coding_restore_composition (coding, Fcurrent_buffer ());
5950   coding_free_composition_data (coding);
5951
5952   if (! inhibit_pre_post_conversion
5953       && ! encodep && ! NILP (coding->post_read_conversion))
5954     {
5955       Lisp_Object val;
5956       Lisp_Object saved_coding_system;
5957
5958       if (from != PT)
5959         TEMP_SET_PT_BOTH (from, from_byte);
5960       prev_Z = Z;
5961       record_unwind_protect (code_convert_region_unwind,
5962                              Vlast_coding_system_used);
5963       saved_coding_system = Vlast_coding_system_used;
5964       Vlast_coding_system_used = coding->symbol;
5965       /* We should not call any more pre-write/post-read-conversion
5966          functions while this post-read-conversion is running.  */
5967       inhibit_pre_post_conversion = 1;
5968       val = call1 (coding->post_read_conversion, make_number (inserted));
5969       inhibit_pre_post_conversion = 0;
5970       coding->symbol = Vlast_coding_system_used;
5971       Vlast_coding_system_used = saved_coding_system;
5972       /* Discard the unwind protect.  */
5973       specpdl_ptr--;
5974       CHECK_NUMBER (val);
5975       inserted += Z - prev_Z;
5976     }
5977
5978   if (orig_point >= from)
5979     {
5980       if (orig_point >= from + orig_len)
5981         orig_point += inserted - orig_len;
5982       else
5983         orig_point = from;
5984       TEMP_SET_PT (orig_point);
5985     }
5986
5987   if (replace)
5988     {
5989       signal_after_change (from, to - from, inserted);
5990       update_compositions (from, from + inserted, CHECK_BORDER);
5991     }
5992
5993   {
5994     coding->consumed = to_byte - from_byte;
5995     coding->consumed_char = to - from;
5996     coding->produced = inserted_byte;
5997     coding->produced_char = inserted;
5998   }
5999
6000   return 0;
6001 }
6002
6003 Lisp_Object
6004 run_pre_post_conversion_on_str (str, coding, encodep)
6005      Lisp_Object str;
6006      struct coding_system *coding;
6007      int encodep;
6008 {
6009   int count = SPECPDL_INDEX ();
6010   struct gcpro gcpro1, gcpro2;
6011   int multibyte = STRING_MULTIBYTE (str);
6012   Lisp_Object buffer;
6013   struct buffer *buf;
6014   Lisp_Object old_deactivate_mark;
6015
6016   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
6017   record_unwind_protect (code_convert_region_unwind,
6018                          Vlast_coding_system_used);
6019   /* It is not crucial to specbind this.  */
6020   old_deactivate_mark = Vdeactivate_mark;
6021   GCPRO2 (str, old_deactivate_mark);
6022
6023   buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
6024   buf = XBUFFER (buffer);
6025
6026   delete_all_overlays (buf);
6027   buf->directory = current_buffer->directory;
6028   buf->read_only = Qnil;
6029   buf->filename = Qnil;
6030   buf->undo_list = Qt;
6031   eassert (buf->overlays_before == NULL);
6032   eassert (buf->overlays_after == NULL);
6033
6034   set_buffer_internal (buf);
6035   /* We must insert the contents of STR as is without
6036      unibyte<->multibyte conversion.  For that, we adjust the
6037      multibyteness of the working buffer to that of STR.  */
6038   Ferase_buffer ();
6039   buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
6040
6041   insert_from_string (str, 0, 0,
6042                       SCHARS (str), SBYTES (str), 0);
6043   UNGCPRO;
6044   inhibit_pre_post_conversion = 1;
6045   if (encodep)
6046     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
6047   else
6048     {
6049       Vlast_coding_system_used = coding->symbol;
6050       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
6051       call1 (coding->post_read_conversion, make_number (Z - BEG));
6052       coding->symbol = Vlast_coding_system_used;
6053     }
6054   inhibit_pre_post_conversion = 0;
6055   Vdeactivate_mark = old_deactivate_mark;
6056   str = make_buffer_string (BEG, Z, 1);
6057   return unbind_to (count, str);
6058 }
6059
6060 Lisp_Object
6061 decode_coding_string (str, coding, nocopy)
6062      Lisp_Object str;
6063      struct coding_system *coding;
6064      int nocopy;
6065 {
6066   int len;
6067   struct conversion_buffer buf;
6068   int from, to_byte;
6069   Lisp_Object saved_coding_symbol;
6070   int result;
6071   int require_decoding;
6072   int shrinked_bytes = 0;
6073   Lisp_Object newstr;
6074   int consumed, consumed_char, produced, produced_char;
6075
6076   from = 0;
6077   to_byte = SBYTES (str);
6078
6079   saved_coding_symbol = coding->symbol;
6080   coding->src_multibyte = STRING_MULTIBYTE (str);
6081   coding->dst_multibyte = 1;
6082   if (CODING_REQUIRE_DETECTION (coding))
6083     {
6084       /* See the comments in code_convert_region.  */
6085       if (coding->type == coding_type_undecided)
6086         {
6087           detect_coding (coding, SDATA (str), to_byte);
6088           if (coding->type == coding_type_undecided)
6089             {
6090               coding->type = coding_type_emacs_mule;
6091               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
6092               /* As emacs-mule decoder will handle composition, we
6093                  need this setting to allocate coding->cmp_data
6094                  later.  */
6095               coding->composing = COMPOSITION_NO;
6096             }
6097         }
6098       if (coding->eol_type == CODING_EOL_UNDECIDED
6099           && coding->type != coding_type_ccl)
6100         {
6101           saved_coding_symbol = coding->symbol;
6102           detect_eol (coding, SDATA (str), to_byte);
6103           if (coding->eol_type == CODING_EOL_UNDECIDED)
6104             coding->eol_type = CODING_EOL_LF;
6105           /* We had better recover the original eol format if we
6106              encounter an inconsistent eol format while decoding.  */
6107           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
6108         }
6109     }
6110
6111   if (coding->type == coding_type_no_conversion
6112       || coding->type == coding_type_raw_text)
6113     coding->dst_multibyte = 0;
6114
6115   require_decoding = CODING_REQUIRE_DECODING (coding);
6116
6117   if (STRING_MULTIBYTE (str))
6118     {
6119       /* Decoding routines expect the source text to be unibyte.  */
6120       str = Fstring_as_unibyte (str);
6121       to_byte = SBYTES (str);
6122       nocopy = 1;
6123       coding->src_multibyte = 0;
6124     }
6125
6126   /* Try to skip the heading and tailing ASCIIs.  */
6127   if (require_decoding && coding->type != coding_type_ccl)
6128     {
6129       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6130                                 0);
6131       if (from == to_byte)
6132         require_decoding = 0;
6133       shrinked_bytes = from + (SBYTES (str) - to_byte);
6134     }
6135
6136   if (!require_decoding
6137       && !(SYMBOLP (coding->post_read_conversion)
6138            && !NILP (Ffboundp (coding->post_read_conversion))))
6139     {
6140       coding->consumed = SBYTES (str);
6141       coding->consumed_char = SCHARS (str);
6142       if (coding->dst_multibyte)
6143         {
6144           str = Fstring_as_multibyte (str);
6145           nocopy = 1;
6146         }
6147       coding->produced = SBYTES (str);
6148       coding->produced_char = SCHARS (str);
6149       return (nocopy ? str : Fcopy_sequence (str));
6150     }
6151
6152   if (coding->composing != COMPOSITION_DISABLED)
6153     coding_allocate_composition_data (coding, from);
6154   len = decoding_buffer_size (coding, to_byte - from);
6155   allocate_conversion_buffer (buf, len);
6156
6157   consumed = consumed_char = produced = produced_char = 0;
6158   while (1)
6159     {
6160       result = decode_coding (coding, SDATA (str) + from + consumed,
6161                               buf.data + produced, to_byte - from - consumed,
6162                               buf.size - produced);
6163       consumed += coding->consumed;
6164       consumed_char += coding->consumed_char;
6165       produced += coding->produced;
6166       produced_char += coding->produced_char;
6167       if (result == CODING_FINISH_NORMAL
6168           || (result == CODING_FINISH_INSUFFICIENT_SRC
6169               && coding->consumed == 0))
6170         break;
6171       if (result == CODING_FINISH_INSUFFICIENT_CMP)
6172         coding_allocate_composition_data (coding, from + produced_char);
6173       else if (result == CODING_FINISH_INSUFFICIENT_DST)
6174         extend_conversion_buffer (&buf);
6175       else if (result == CODING_FINISH_INCONSISTENT_EOL)
6176         {
6177           Lisp_Object eol_type;
6178
6179           /* Recover the original EOL format.  */
6180           if (coding->eol_type == CODING_EOL_CR)
6181             {
6182               unsigned char *p;
6183               for (p = buf.data; p < buf.data + produced; p++)
6184                 if (*p == '\n') *p = '\r';
6185             }
6186           else if (coding->eol_type == CODING_EOL_CRLF)
6187             {
6188               int num_eol = 0;
6189               unsigned char *p0, *p1;
6190               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
6191                 if (*p0 == '\n') num_eol++;
6192               if (produced + num_eol >= buf.size)
6193                 extend_conversion_buffer (&buf);
6194               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
6195                 {
6196                   *--p1 = *--p0;
6197                   if (*p0 == '\n') *--p1 = '\r';
6198                 }
6199               produced += num_eol;
6200               produced_char += num_eol;
6201             }
6202           /* Suppress eol-format conversion in the further conversion.  */
6203           coding->eol_type = CODING_EOL_LF;
6204
6205           /* Set the coding system symbol to that for Unix-like EOL.  */
6206           eol_type = Fget (saved_coding_symbol, Qeol_type);
6207           if (VECTORP (eol_type)
6208               && XVECTOR (eol_type)->size == 3
6209               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
6210             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
6211           else
6212             coding->symbol = saved_coding_symbol;
6213
6214
6215         }
6216     }
6217
6218   coding->consumed = consumed;
6219   coding->consumed_char = consumed_char;
6220   coding->produced = produced;
6221   coding->produced_char = produced_char;
6222
6223   if (coding->dst_multibyte)
6224     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
6225                                            produced + shrinked_bytes);
6226   else
6227     newstr = make_uninit_string (produced + shrinked_bytes);
6228   if (from > 0)
6229     STRING_COPYIN (newstr, 0, SDATA (str), from);
6230   STRING_COPYIN (newstr, from, buf.data, produced);
6231   if (shrinked_bytes > from)
6232     STRING_COPYIN (newstr, from + produced,
6233                    SDATA (str) + to_byte,
6234                    shrinked_bytes - from);
6235   free_conversion_buffer (&buf);
6236
6237   coding->consumed += shrinked_bytes;
6238   coding->consumed_char += shrinked_bytes;
6239   coding->produced += shrinked_bytes;
6240   coding->produced_char += shrinked_bytes;
6241
6242   if (coding->cmp_data && coding->cmp_data->used)
6243     coding_restore_composition (coding, newstr);
6244   coding_free_composition_data (coding);
6245
6246   if (SYMBOLP (coding->post_read_conversion)
6247       && !NILP (Ffboundp (coding->post_read_conversion)))
6248     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
6249
6250   return newstr;
6251 }
6252
6253 Lisp_Object
6254 encode_coding_string (str, coding, nocopy)
6255      Lisp_Object str;
6256      struct coding_system *coding;
6257      int nocopy;
6258 {
6259   int len;
6260   struct conversion_buffer buf;
6261   int from, to, to_byte;
6262   int result;
6263   int shrinked_bytes = 0;
6264   Lisp_Object newstr;
6265   int consumed, consumed_char, produced, produced_char;
6266
6267   if (SYMBOLP (coding->pre_write_conversion)
6268       && !NILP (Ffboundp (coding->pre_write_conversion)))
6269     str = run_pre_post_conversion_on_str (str, coding, 1);
6270
6271   from = 0;
6272   to = SCHARS (str);
6273   to_byte = SBYTES (str);
6274
6275   /* Encoding routines determine the multibyteness of the source text
6276      by coding->src_multibyte.  */
6277   coding->src_multibyte = STRING_MULTIBYTE (str);
6278   coding->dst_multibyte = 0;
6279   if (! CODING_REQUIRE_ENCODING (coding))
6280     {
6281       coding->consumed = SBYTES (str);
6282       coding->consumed_char = SCHARS (str);
6283       if (STRING_MULTIBYTE (str))
6284         {
6285           str = Fstring_as_unibyte (str);
6286           nocopy = 1;
6287         }
6288       coding->produced = SBYTES (str);
6289       coding->produced_char = SCHARS (str);
6290       return (nocopy ? str : Fcopy_sequence (str));
6291     }
6292
6293   if (coding->composing != COMPOSITION_DISABLED)
6294     coding_save_composition (coding, from, to, str);
6295
6296   /* Try to skip the heading and tailing ASCIIs.  */
6297   if (coding->type != coding_type_ccl)
6298     {
6299       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6300                                 1);
6301       if (from == to_byte)
6302         return (nocopy ? str : Fcopy_sequence (str));
6303       shrinked_bytes = from + (SBYTES (str) - to_byte);
6304     }
6305
6306   len = encoding_buffer_size (coding, to_byte - from);
6307   allocate_conversion_buffer (buf, len);
6308
6309   consumed = consumed_char = produced = produced_char = 0;
6310   while (1)
6311     {
6312       result = encode_coding (coding, SDATA (str) + from + consumed,
6313                               buf.data + produced, to_byte - from - consumed,
6314                               buf.size - produced);
6315       consumed += coding->consumed;
6316       consumed_char += coding->consumed_char;
6317       produced += coding->produced;
6318       produced_char += coding->produced_char;
6319       if (result == CODING_FINISH_NORMAL
6320           || result == CODING_FINISH_INTERRUPT
6321           || (result == CODING_FINISH_INSUFFICIENT_SRC
6322               && coding->consumed == 0))
6323         break;
6324       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6325       extend_conversion_buffer (&buf);
6326     }
6327
6328   coding->consumed = consumed;
6329   coding->consumed_char = consumed_char;
6330   coding->produced = produced;
6331   coding->produced_char = produced_char;
6332
6333   newstr = make_uninit_string (produced + shrinked_bytes);
6334   if (from > 0)
6335     STRING_COPYIN (newstr, 0, SDATA (str), from);
6336   STRING_COPYIN (newstr, from, buf.data, produced);
6337   if (shrinked_bytes > from)
6338     STRING_COPYIN (newstr, from + produced,
6339                    SDATA (str) + to_byte,
6340                    shrinked_bytes - from);
6341
6342   free_conversion_buffer (&buf);
6343   coding_free_composition_data (coding);
6344
6345   return newstr;
6346 }
6347
6348 \f
6349 #ifdef emacs
6350 /*** 8. Emacs Lisp library functions ***/
6351
6352 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6353        doc: /* Return t if OBJECT is nil or a coding-system.
6354 See the documentation of `make-coding-system' for information
6355 about coding-system objects.  */)
6356      (obj)
6357      Lisp_Object obj;
6358 {
6359   if (NILP (obj))
6360     return Qt;
6361   if (!SYMBOLP (obj))
6362     return Qnil;
6363   if (! NILP (Fget (obj, Qcoding_system_define_form)))
6364     return Qt;
6365   /* Get coding-spec vector for OBJ.  */
6366   obj = Fget (obj, Qcoding_system);
6367   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6368           ? Qt : Qnil);
6369 }
6370
6371 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6372        Sread_non_nil_coding_system, 1, 1, 0,
6373        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6374      (prompt)
6375      Lisp_Object prompt;
6376 {
6377   Lisp_Object val;
6378   do
6379     {
6380       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6381                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6382     }
6383   while (SCHARS (val) == 0);
6384   return (Fintern (val, Qnil));
6385 }
6386
6387 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6388        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6389 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6390      (prompt, default_coding_system)
6391      Lisp_Object prompt, default_coding_system;
6392 {
6393   Lisp_Object val;
6394   if (SYMBOLP (default_coding_system))
6395     default_coding_system = SYMBOL_NAME (default_coding_system);
6396   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6397                           Qt, Qnil, Qcoding_system_history,
6398                           default_coding_system, Qnil);
6399   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6400 }
6401
6402 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6403        1, 1, 0,
6404        doc: /* Check validity of CODING-SYSTEM.
6405 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6406 It is valid if it is nil or a symbol with a non-nil `coding-system' property.
6407 The value of this property should be a vector of length 5.  */)
6408      (coding_system)
6409      Lisp_Object coding_system;
6410 {
6411   Lisp_Object define_form;
6412
6413   define_form = Fget (coding_system, Qcoding_system_define_form);
6414   if (! NILP (define_form))
6415     {
6416       Fput (coding_system, Qcoding_system_define_form, Qnil);
6417       safe_eval (define_form);
6418     }
6419   if (!NILP (Fcoding_system_p (coding_system)))
6420     return coding_system;
6421   while (1)
6422     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6423 }
6424 \f
6425 Lisp_Object
6426 detect_coding_system (src, src_bytes, highest, multibytep)
6427      const unsigned char *src;
6428      int src_bytes, highest;
6429      int multibytep;
6430 {
6431   int coding_mask, eol_type;
6432   Lisp_Object val, tmp;
6433   int dummy;
6434
6435   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6436   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6437   if (eol_type == CODING_EOL_INCONSISTENT)
6438     eol_type = CODING_EOL_UNDECIDED;
6439
6440   if (!coding_mask)
6441     {
6442       val = Qundecided;
6443       if (eol_type != CODING_EOL_UNDECIDED)
6444         {
6445           Lisp_Object val2;
6446           val2 = Fget (Qundecided, Qeol_type);
6447           if (VECTORP (val2))
6448             val = XVECTOR (val2)->contents[eol_type];
6449         }
6450       return (highest ? val : Fcons (val, Qnil));
6451     }
6452
6453   /* At first, gather possible coding systems in VAL.  */
6454   val = Qnil;
6455   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6456     {
6457       Lisp_Object category_val, category_index;
6458
6459       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6460       category_val = Fsymbol_value (XCAR (tmp));
6461       if (!NILP (category_val)
6462           && NATNUMP (category_index)
6463           && (coding_mask & (1 << XFASTINT (category_index))))
6464         {
6465           val = Fcons (category_val, val);
6466           if (highest)
6467             break;
6468         }
6469     }
6470   if (!highest)
6471     val = Fnreverse (val);
6472
6473   /* Then, replace the elements with subsidiary coding systems.  */
6474   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6475     {
6476       if (eol_type != CODING_EOL_UNDECIDED
6477           && eol_type != CODING_EOL_INCONSISTENT)
6478         {
6479           Lisp_Object eol;
6480           eol = Fget (XCAR (tmp), Qeol_type);
6481           if (VECTORP (eol))
6482             XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
6483         }
6484     }
6485   return (highest ? XCAR (val) : val);
6486 }
6487
6488 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6489        2, 3, 0,
6490        doc: /* Detect how the byte sequence in the region is encoded.
6491 Return a list of possible coding systems used on decoding a byte
6492 sequence containing the bytes in the region between START and END when
6493 the coding system `undecided' is specified.  The list is ordered by
6494 priority decided in the current language environment.
6495
6496 If only ASCII characters are found, it returns a list of single element
6497 `undecided' or its subsidiary coding system according to a detected
6498 end-of-line format.
6499
6500 If optional argument HIGHEST is non-nil, return the coding system of
6501 highest priority.  */)
6502      (start, end, highest)
6503      Lisp_Object start, end, highest;
6504 {
6505   int from, to;
6506   int from_byte, to_byte;
6507   int include_anchor_byte = 0;
6508
6509   CHECK_NUMBER_COERCE_MARKER (start);
6510   CHECK_NUMBER_COERCE_MARKER (end);
6511
6512   validate_region (&start, &end);
6513   from = XINT (start), to = XINT (end);
6514   from_byte = CHAR_TO_BYTE (from);
6515   to_byte = CHAR_TO_BYTE (to);
6516
6517   if (from < GPT && to >= GPT)
6518     move_gap_both (to, to_byte);
6519   /* If we an anchor byte `\0' follows the region, we include it in
6520      the detecting source.  Then code detectors can handle the tailing
6521      byte sequence more accurately.
6522
6523      Fix me: This is not a perfect solution.  It is better that we
6524      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6525   */
6526   if (to == Z || (to == GPT && GAP_SIZE > 0))
6527     include_anchor_byte = 1;
6528   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6529                                to_byte - from_byte + include_anchor_byte,
6530                                !NILP (highest),
6531                                !NILP (current_buffer
6532                                       ->enable_multibyte_characters));
6533 }
6534
6535 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6536        1, 2, 0,
6537        doc: /* Detect how the byte sequence in STRING is encoded.
6538 Return a list of possible coding systems used on decoding a byte
6539 sequence containing the bytes in STRING when the coding system
6540 `undecided' is specified.  The list is ordered by priority decided in
6541 the current language environment.
6542
6543 If only ASCII characters are found, it returns a list of single element
6544 `undecided' or its subsidiary coding system according to a detected
6545 end-of-line format.
6546
6547 If optional argument HIGHEST is non-nil, return the coding system of
6548 highest priority.  */)
6549      (string, highest)
6550      Lisp_Object string, highest;
6551 {
6552   CHECK_STRING (string);
6553
6554   return detect_coding_system (SDATA (string),
6555                                /* "+ 1" is to include the anchor byte
6556                                   `\0'.  With this, code detectors can
6557                                   handle the tailing bytes more
6558                                   accurately.  */
6559                                SBYTES (string) + 1,
6560                                !NILP (highest),
6561                                STRING_MULTIBYTE (string));
6562 }
6563
6564 /*  Subroutine for Fsafe_coding_systems_region_internal.
6565
6566     Return a list of coding systems that safely encode the multibyte
6567     text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
6568     possible coding systems.  If it is nil, it means that we have not
6569     yet found any coding systems.
6570
6571     WORK_TABLE a char-table of which element is set to t once the
6572     element is looked up.
6573
6574     If a non-ASCII single byte char is found, set
6575     *single_byte_char_found to 1.  */
6576
6577 static Lisp_Object
6578 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6579      unsigned char *p, *pend;
6580      Lisp_Object safe_codings, work_table;
6581      int *single_byte_char_found;
6582 {
6583   int c, len;
6584   Lisp_Object val, ch;
6585   Lisp_Object prev, tail;
6586
6587   if (NILP (safe_codings))
6588     goto done_safe_codings;
6589   while (p < pend)
6590     {
6591       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6592       p += len;
6593       if (ASCII_BYTE_P (c))
6594         /* We can ignore ASCII characters here.  */
6595         continue;
6596       if (SINGLE_BYTE_CHAR_P (c))
6597         *single_byte_char_found = 1;
6598       /* Check the safe coding systems for C.  */
6599       ch = make_number (c);
6600       val = Faref (work_table, ch);
6601       if (EQ (val, Qt))
6602         /* This element was already checked.  Ignore it.  */
6603         continue;
6604       /* Remember that we checked this element.  */
6605       Faset (work_table, ch, Qt);
6606
6607       for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6608         {
6609           Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6610           int encodable;
6611
6612           elt = XCAR (tail);
6613           if (CONSP (XCDR (elt)))
6614             {
6615               /* This entry has this format now:
6616                  ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6617                           ACCEPT-LATIN-EXTRA ) */
6618               val = XCDR (elt);
6619               encodable = ! NILP (Faref (XCAR (val), ch));
6620               if (! encodable)
6621                 {
6622                   val = XCDR (val);
6623                   translation_table = XCAR (val);
6624                   hash_table = XCAR (XCDR (val));
6625                   accept_latin_extra = XCAR (XCDR (XCDR (val)));
6626                 }
6627             }
6628           else
6629             {
6630               /* This entry has this format now: ( CODING . SAFE-CHARS) */
6631               encodable = ! NILP (Faref (XCDR (elt), ch));
6632               if (! encodable)
6633                 {
6634                   /* Transform the format to:
6635                      ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6636                        ACCEPT-LATIN-EXTRA )  */
6637                   val = Fget (XCAR (elt), Qcoding_system);
6638                   translation_table
6639                     = Fplist_get (AREF (val, 3),
6640                                   Qtranslation_table_for_encode);
6641                   if (SYMBOLP (translation_table))
6642                     translation_table = Fget (translation_table,
6643                                               Qtranslation_table);
6644                   hash_table
6645                     = (CHAR_TABLE_P (translation_table)
6646                        ? XCHAR_TABLE (translation_table)->extras[1]
6647                        : Qnil);
6648                   accept_latin_extra
6649                     = ((EQ (AREF (val, 0), make_number (2))
6650                         && VECTORP (AREF (val, 4)))
6651                        ? AREF (AREF (val, 4), 16)
6652                        : Qnil);
6653                   XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6654                                         translation_table, hash_table,
6655                                         accept_latin_extra));
6656                 }
6657             }
6658
6659           if (! encodable
6660               && ((CHAR_TABLE_P (translation_table)
6661                    && ! NILP (Faref (translation_table, ch)))
6662                   || (HASH_TABLE_P (hash_table)
6663                       && ! NILP (Fgethash (ch, hash_table, Qnil)))
6664                   || (SINGLE_BYTE_CHAR_P (c)
6665                       && ! NILP (accept_latin_extra)
6666                       && VECTORP (Vlatin_extra_code_table)
6667                       && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6668             encodable = 1;
6669           if (encodable)
6670             prev = tail;
6671           else
6672             {
6673               /* Exclude this coding system from SAFE_CODINGS.  */
6674               if (EQ (tail, safe_codings))
6675                 {
6676                   safe_codings = XCDR (safe_codings);
6677                   if (NILP (safe_codings))
6678                     goto done_safe_codings;
6679                 }
6680               else
6681                 XSETCDR (prev, XCDR (tail));
6682             }
6683         }
6684     }
6685
6686  done_safe_codings:
6687   /* If the above loop was terminated before P reaches PEND, it means
6688      SAFE_CODINGS was set to nil.  If we have not yet found an
6689      non-ASCII single-byte char, check it now.  */
6690   if (! *single_byte_char_found)
6691     while (p < pend)
6692       {
6693         c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6694         p += len;
6695         if (! ASCII_BYTE_P (c)
6696             && SINGLE_BYTE_CHAR_P (c))
6697           {
6698             *single_byte_char_found = 1;
6699             break;
6700           }
6701       }
6702   return safe_codings;
6703 }
6704
6705 DEFUN ("find-coding-systems-region-internal",
6706        Ffind_coding_systems_region_internal,
6707        Sfind_coding_systems_region_internal, 2, 2, 0,
6708        doc: /* Internal use only.  */)
6709      (start, end)
6710      Lisp_Object start, end;
6711 {
6712   Lisp_Object work_table, safe_codings;
6713   int non_ascii_p = 0;
6714   int single_byte_char_found = 0;
6715   const unsigned char *p1, *p1end, *p2, *p2end, *p;
6716
6717   if (STRINGP (start))
6718     {
6719       if (!STRING_MULTIBYTE (start))
6720         return Qt;
6721       p1 = SDATA (start), p1end = p1 + SBYTES (start);
6722       p2 = p2end = p1end;
6723       if (SCHARS (start) != SBYTES (start))
6724         non_ascii_p = 1;
6725     }
6726   else
6727     {
6728       int from, to, stop;
6729
6730       CHECK_NUMBER_COERCE_MARKER (start);
6731       CHECK_NUMBER_COERCE_MARKER (end);
6732       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6733         args_out_of_range (start, end);
6734       if (NILP (current_buffer->enable_multibyte_characters))
6735         return Qt;
6736       from = CHAR_TO_BYTE (XINT (start));
6737       to = CHAR_TO_BYTE (XINT (end));
6738       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6739       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6740       if (stop == to)
6741         p2 = p2end = p1end;
6742       else
6743         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6744       if (XINT (end) - XINT (start) != to - from)
6745         non_ascii_p = 1;
6746     }
6747
6748   if (!non_ascii_p)
6749     {
6750       /* We are sure that the text contains no multibyte character.
6751          Check if it contains eight-bit-graphic.  */
6752       p = p1;
6753       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6754       if (p == p1end)
6755         {
6756           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6757           if (p == p2end)
6758             return Qt;
6759         }
6760     }
6761
6762   /* The text contains non-ASCII characters.  */
6763
6764   work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6765   safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6766
6767   safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6768                                     &single_byte_char_found);
6769   if (p2 < p2end)
6770     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6771                                       &single_byte_char_found);
6772   if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6773     safe_codings = Qt;
6774   else
6775     {
6776       /* Turn safe_codings to a list of coding systems... */
6777       Lisp_Object val;
6778
6779       if (single_byte_char_found)
6780         /* ... and append these for eight-bit chars.  */
6781         val = Fcons (Qraw_text,
6782                      Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
6783       else
6784         /* ... and append generic coding systems.  */
6785         val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
6786
6787       for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
6788         val = Fcons (XCAR (XCAR (safe_codings)), val);
6789       safe_codings = val;
6790     }
6791
6792   return safe_codings;
6793 }
6794
6795
6796 /* Search from position POS for such characters that are unencodable
6797    accoding to SAFE_CHARS, and return a list of their positions.  P
6798    points where in the memory the character at POS exists.  Limit the
6799    search at PEND or when Nth unencodable characters are found.
6800
6801    If SAFE_CHARS is a char table, an element for an unencodable
6802    character is nil.
6803
6804    If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6805
6806    Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6807    eight-bit-graphic characters are unencodable.  */
6808
6809 static Lisp_Object
6810 unencodable_char_position (safe_chars, pos, p, pend, n)
6811      Lisp_Object safe_chars;
6812      int pos;
6813      unsigned char *p, *pend;
6814      int n;
6815 {
6816   Lisp_Object pos_list;
6817
6818   pos_list = Qnil;
6819   while (p < pend)
6820     {
6821       int len;
6822       int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
6823
6824       if (c >= 128
6825           && (CHAR_TABLE_P (safe_chars)
6826               ? NILP (CHAR_TABLE_REF (safe_chars, c))
6827               : (NILP (safe_chars) || c < 256)))
6828         {
6829           pos_list = Fcons (make_number (pos), pos_list);
6830           if (--n <= 0)
6831             break;
6832         }
6833       pos++;
6834       p += len;
6835     }
6836   return Fnreverse (pos_list);
6837 }
6838
6839
6840 DEFUN ("unencodable-char-position", Funencodable_char_position,
6841        Sunencodable_char_position, 3, 5, 0,
6842        doc: /*
6843 Return position of first un-encodable character in a region.
6844 START and END specfiy the region and CODING-SYSTEM specifies the
6845 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
6846
6847 If optional 4th argument COUNT is non-nil, it specifies at most how
6848 many un-encodable characters to search.  In this case, the value is a
6849 list of positions.
6850
6851 If optional 5th argument STRING is non-nil, it is a string to search
6852 for un-encodable characters.  In that case, START and END are indexes
6853 to the string.  */)
6854      (start, end, coding_system, count, string)
6855      Lisp_Object start, end, coding_system, count, string;
6856 {
6857   int n;
6858   Lisp_Object safe_chars;
6859   struct coding_system coding;
6860   Lisp_Object positions;
6861   int from, to;
6862   unsigned char *p, *pend;
6863
6864   if (NILP (string))
6865     {
6866       validate_region (&start, &end);
6867       from = XINT (start);
6868       to = XINT (end);
6869       if (NILP (current_buffer->enable_multibyte_characters))
6870         return Qnil;
6871       p = CHAR_POS_ADDR (from);
6872       if (to == GPT)
6873         pend = GPT_ADDR;
6874       else
6875         pend = CHAR_POS_ADDR (to);
6876     }
6877   else
6878     {
6879       CHECK_STRING (string);
6880       CHECK_NATNUM (start);
6881       CHECK_NATNUM (end);
6882       from = XINT (start);
6883       to = XINT (end);
6884       if (from > to
6885           || to > SCHARS (string))
6886         args_out_of_range_3 (string, start, end);
6887       if (! STRING_MULTIBYTE (string))
6888         return Qnil;
6889       p = SDATA (string) + string_char_to_byte (string, from);
6890       pend = SDATA (string) + string_char_to_byte (string, to);
6891     }
6892
6893   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
6894
6895   if (NILP (count))
6896     n = 1;
6897   else
6898     {
6899       CHECK_NATNUM (count);
6900       n = XINT (count);
6901     }
6902
6903   if (coding.type == coding_type_no_conversion
6904       || coding.type == coding_type_raw_text)
6905     return Qnil;
6906
6907   if (coding.type == coding_type_undecided)
6908     safe_chars = Qnil;
6909   else
6910     safe_chars = coding_safe_chars (coding_system);
6911
6912   if (STRINGP (string)
6913       || from >= GPT || to <= GPT)
6914     positions = unencodable_char_position (safe_chars, from, p, pend, n);
6915   else
6916     {
6917       Lisp_Object args[2];
6918
6919       args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
6920       n -= XINT (Flength (args[0]));
6921       if (n <= 0)
6922         positions = args[0];
6923       else
6924         {
6925           args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
6926                                                pend, n);
6927           positions = Fappend (2, args);
6928         }
6929     }
6930
6931   return  (NILP (count) ? Fcar (positions) : positions);
6932 }
6933
6934
6935 Lisp_Object
6936 code_convert_region1 (start, end, coding_system, encodep)
6937      Lisp_Object start, end, coding_system;
6938      int encodep;
6939 {
6940   struct coding_system coding;
6941   int from, to;
6942
6943   CHECK_NUMBER_COERCE_MARKER (start);
6944   CHECK_NUMBER_COERCE_MARKER (end);
6945   CHECK_SYMBOL (coding_system);
6946
6947   validate_region (&start, &end);
6948   from = XFASTINT (start);
6949   to = XFASTINT (end);
6950
6951   if (NILP (coding_system))
6952     return make_number (to - from);
6953
6954   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6955     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6956
6957   coding.mode |= CODING_MODE_LAST_BLOCK;
6958   coding.src_multibyte = coding.dst_multibyte
6959     = !NILP (current_buffer->enable_multibyte_characters);
6960   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6961                        &coding, encodep, 1);
6962   Vlast_coding_system_used = coding.symbol;
6963   return make_number (coding.produced_char);
6964 }
6965
6966 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6967        3, 3, "r\nzCoding system: ",
6968        doc: /* Decode the current region from the specified coding system.
6969 When called from a program, takes three arguments:
6970 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6971 This function sets `last-coding-system-used' to the precise coding system
6972 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6973 not fully specified.)
6974 It returns the length of the decoded text.  */)
6975      (start, end, coding_system)
6976      Lisp_Object start, end, coding_system;
6977 {
6978   return code_convert_region1 (start, end, coding_system, 0);
6979 }
6980
6981 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6982        3, 3, "r\nzCoding system: ",
6983        doc: /* Encode the current region into the specified coding system.
6984 When called from a program, takes three arguments:
6985 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6986 This function sets `last-coding-system-used' to the precise coding system
6987 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6988 not fully specified.)
6989 It returns the length of the encoded text.  */)
6990      (start, end, coding_system)
6991      Lisp_Object start, end, coding_system;
6992 {
6993   return code_convert_region1 (start, end, coding_system, 1);
6994 }
6995
6996 Lisp_Object
6997 code_convert_string1 (string, coding_system, nocopy, encodep)
6998      Lisp_Object string, coding_system, nocopy;
6999      int encodep;
7000 {
7001   struct coding_system coding;
7002
7003   CHECK_STRING (string);
7004   CHECK_SYMBOL (coding_system);
7005
7006   if (NILP (coding_system))
7007     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
7008
7009   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
7010     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
7011
7012   coding.mode |= CODING_MODE_LAST_BLOCK;
7013   string = (encodep
7014             ? encode_coding_string (string, &coding, !NILP (nocopy))
7015             : decode_coding_string (string, &coding, !NILP (nocopy)));
7016   Vlast_coding_system_used = coding.symbol;
7017
7018   return string;
7019 }
7020
7021 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7022        2, 3, 0,
7023        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7024 Optional arg NOCOPY non-nil means it is OK to return STRING itself
7025 if the decoding operation is trivial.
7026 This function sets `last-coding-system-used' to the precise coding system
7027 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7028 not fully specified.)  */)
7029      (string, coding_system, nocopy)
7030      Lisp_Object string, coding_system, nocopy;
7031 {
7032   return code_convert_string1 (string, coding_system, nocopy, 0);
7033 }
7034
7035 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7036        2, 3, 0,
7037        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7038 Optional arg NOCOPY non-nil means it is OK to return STRING itself
7039 if the encoding operation is trivial.
7040 This function sets `last-coding-system-used' to the precise coding system
7041 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7042 not fully specified.)  */)
7043      (string, coding_system, nocopy)
7044      Lisp_Object string, coding_system, nocopy;
7045 {
7046   return code_convert_string1 (string, coding_system, nocopy, 1);
7047 }
7048
7049 /* Encode or decode STRING according to CODING_SYSTEM.
7050    Do not set Vlast_coding_system_used.
7051
7052    This function is called only from macros DECODE_FILE and
7053    ENCODE_FILE, thus we ignore character composition.  */
7054
7055 Lisp_Object
7056 code_convert_string_norecord (string, coding_system, encodep)
7057      Lisp_Object string, coding_system;
7058      int encodep;
7059 {
7060   struct coding_system coding;
7061
7062   CHECK_STRING (string);
7063   CHECK_SYMBOL (coding_system);
7064
7065   if (NILP (coding_system))
7066     return string;
7067
7068   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
7069     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
7070
7071   coding.composing = COMPOSITION_DISABLED;
7072   coding.mode |= CODING_MODE_LAST_BLOCK;
7073   return (encodep
7074           ? encode_coding_string (string, &coding, 1)
7075           : decode_coding_string (string, &coding, 1));
7076 }
7077 \f
7078 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7079        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7080 Return the corresponding character.  */)
7081      (code)
7082      Lisp_Object code;
7083 {
7084   unsigned char c1, c2, s1, s2;
7085   Lisp_Object val;
7086
7087   CHECK_NUMBER (code);
7088   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
7089   if (s1 == 0)
7090     {
7091       if (s2 < 0x80)
7092         XSETFASTINT (val, s2);
7093       else if (s2 >= 0xA0 || s2 <= 0xDF)
7094         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
7095       else
7096         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7097     }
7098   else
7099     {
7100       if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
7101           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
7102         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7103       DECODE_SJIS (s1, s2, c1, c2);
7104       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
7105     }
7106   return val;
7107 }
7108
7109 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7110        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7111 Return the corresponding code in SJIS.  */)
7112      (ch)
7113      Lisp_Object ch;
7114 {
7115   int charset, c1, c2, s1, s2;
7116   Lisp_Object val;
7117
7118   CHECK_NUMBER (ch);
7119   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7120   if (charset == CHARSET_ASCII)
7121     {
7122       val = ch;
7123     }
7124   else if (charset == charset_jisx0208
7125            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
7126     {
7127       ENCODE_SJIS (c1, c2, s1, s2);
7128       XSETFASTINT (val, (s1 << 8) | s2);
7129     }
7130   else if (charset == charset_katakana_jisx0201
7131            && c1 > 0x20 && c2 < 0xE0)
7132     {
7133       XSETFASTINT (val, c1 | 0x80);
7134     }
7135   else
7136     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
7137   return val;
7138 }
7139
7140 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7141        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7142 Return the corresponding character.  */)
7143      (code)
7144      Lisp_Object code;
7145 {
7146   int charset;
7147   unsigned char b1, b2, c1, c2;
7148   Lisp_Object val;
7149
7150   CHECK_NUMBER (code);
7151   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
7152   if (b1 == 0)
7153     {
7154       if (b2 >= 0x80)
7155         error ("Invalid BIG5 code: %x", XFASTINT (code));
7156       val = code;
7157     }
7158   else
7159     {
7160       if ((b1 < 0xA1 || b1 > 0xFE)
7161           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
7162         error ("Invalid BIG5 code: %x", XFASTINT (code));
7163       DECODE_BIG5 (b1, b2, charset, c1, c2);
7164       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
7165     }
7166   return val;
7167 }
7168
7169 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7170        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7171 Return the corresponding character code in Big5.  */)
7172      (ch)
7173      Lisp_Object ch;
7174 {
7175   int charset, c1, c2, b1, b2;
7176   Lisp_Object val;
7177
7178   CHECK_NUMBER (ch);
7179   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7180   if (charset == CHARSET_ASCII)
7181     {
7182       val = ch;
7183     }
7184   else if ((charset == charset_big5_1
7185             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
7186            || (charset == charset_big5_2
7187                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
7188     {
7189       ENCODE_BIG5 (charset, c1, c2, b1, b2);
7190       XSETFASTINT (val, (b1 << 8) | b2);
7191     }
7192   else
7193     error ("Can't encode to Big5: %d", XFASTINT (ch));
7194   return val;
7195 }
7196 \f
7197 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
7198        Sset_terminal_coding_system_internal, 1, 1, 0,
7199        doc: /* Internal use only.  */)
7200      (coding_system)
7201      Lisp_Object coding_system;
7202 {
7203   struct coding_system *terminal_coding = FRAME_TERMINAL_CODING (SELECTED_FRAME ());
7204   CHECK_SYMBOL (coding_system);
7205   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
7206   /* We had better not send unsafe characters to terminal.  */
7207   terminal_coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
7208   /* Character composition should be disabled.  */
7209   terminal_coding->composing = COMPOSITION_DISABLED;
7210   /* Error notification should be suppressed.  */
7211   terminal_coding->suppress_error = 1;
7212   terminal_coding->src_multibyte = 1;
7213   terminal_coding->dst_multibyte = 0;
7214   return Qnil;
7215 }
7216
7217 DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
7218        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7219        doc: /* Internal use only.  */)
7220      (coding_system)
7221      Lisp_Object coding_system;
7222 {
7223   CHECK_SYMBOL (coding_system);
7224   setup_coding_system (Fcheck_coding_system (coding_system),
7225                        &safe_terminal_coding);
7226   /* Character composition should be disabled.  */
7227   safe_terminal_coding.composing = COMPOSITION_DISABLED;
7228   /* Error notification should be suppressed.  */
7229   safe_terminal_coding.suppress_error = 1;
7230   safe_terminal_coding.src_multibyte = 1;
7231   safe_terminal_coding.dst_multibyte = 0;
7232   return Qnil;
7233 }
7234
7235 DEFUN ("terminal-coding-system", Fterminal_coding_system,
7236        Sterminal_coding_system, 0, 0, 0,
7237        doc: /* Return coding system specified for terminal output.  */)
7238      ()
7239 {
7240   return FRAME_TERMINAL_CODING (SELECTED_FRAME ())->symbol;
7241 }
7242
7243 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
7244        Sset_keyboard_coding_system_internal, 1, 1, 0,
7245        doc: /* Internal use only.  */)
7246      (coding_system)
7247      Lisp_Object coding_system;
7248 {
7249   CHECK_SYMBOL (coding_system);
7250   setup_coding_system (Fcheck_coding_system (coding_system),
7251                        FRAME_KEYBOARD_CODING (SELECTED_FRAME ()));
7252   /* Character composition should be disabled.  */
7253   FRAME_KEYBOARD_CODING (SELECTED_FRAME ())->composing = COMPOSITION_DISABLED;
7254   return Qnil;
7255 }
7256
7257 DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
7258        Skeyboard_coding_system, 0, 0, 0,
7259        doc: /* Return coding system specified for decoding keyboard input.  */)
7260      ()
7261 {
7262   return FRAME_KEYBOARD_CODING (SELECTED_FRAME ())->symbol;
7263 }
7264
7265 \f
7266 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7267        Sfind_operation_coding_system,  1, MANY, 0,
7268        doc: /* Choose a coding system for an operation based on the target name.
7269 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7270 DECODING-SYSTEM is the coding system to use for decoding
7271 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7272 for encoding (in case OPERATION does encoding).
7273
7274 The first argument OPERATION specifies an I/O primitive:
7275   For file I/O, `insert-file-contents' or `write-region'.
7276   For process I/O, `call-process', `call-process-region', or `start-process'.
7277   For network I/O, `open-network-stream'.
7278
7279 The remaining arguments should be the same arguments that were passed
7280 to the primitive.  Depending on which primitive, one of those arguments
7281 is selected as the TARGET.  For example, if OPERATION does file I/O,
7282 whichever argument specifies the file name is TARGET.
7283
7284 TARGET has a meaning which depends on OPERATION:
7285   For file I/O, TARGET is a file name.
7286   For process I/O, TARGET is a process name.
7287   For network I/O, TARGET is a service name or a port number
7288
7289 This function looks up what specified for TARGET in,
7290 `file-coding-system-alist', `process-coding-system-alist',
7291 or `network-coding-system-alist' depending on OPERATION.
7292 They may specify a coding system, a cons of coding systems,
7293 or a function symbol to call.
7294 In the last case, we call the function with one argument,
7295 which is a list of all the arguments given to this function.
7296
7297 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7298      (nargs, args)
7299      int nargs;
7300      Lisp_Object *args;
7301 {
7302   Lisp_Object operation, target_idx, target, val;
7303   register Lisp_Object chain;
7304
7305   if (nargs < 2)
7306     error ("Too few arguments");
7307   operation = args[0];
7308   if (!SYMBOLP (operation)
7309       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7310     error ("Invalid first argument");
7311   if (nargs < 1 + XINT (target_idx))
7312     error ("Too few arguments for operation: %s",
7313            SDATA (SYMBOL_NAME (operation)));
7314   /* For write-region, if the 6th argument (i.e. VISIT, the 5th
7315      argument to write-region) is string, it must be treated as a
7316      target file name.  */
7317   if (EQ (operation, Qwrite_region)
7318       && nargs > 5
7319       && STRINGP (args[5]))
7320     target_idx = make_number (4);
7321   target = args[XINT (target_idx) + 1];
7322   if (!(STRINGP (target)
7323         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7324     error ("Invalid argument %d", XINT (target_idx) + 1);
7325
7326   chain = ((EQ (operation, Qinsert_file_contents)
7327             || EQ (operation, Qwrite_region))
7328            ? Vfile_coding_system_alist
7329            : (EQ (operation, Qopen_network_stream)
7330               ? Vnetwork_coding_system_alist
7331               : Vprocess_coding_system_alist));
7332   if (NILP (chain))
7333     return Qnil;
7334
7335   for (; CONSP (chain); chain = XCDR (chain))
7336     {
7337       Lisp_Object elt;
7338       elt = XCAR (chain);
7339
7340       if (CONSP (elt)
7341           && ((STRINGP (target)
7342                && STRINGP (XCAR (elt))
7343                && fast_string_match (XCAR (elt), target) >= 0)
7344               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7345         {
7346           val = XCDR (elt);
7347           /* Here, if VAL is both a valid coding system and a valid
7348              function symbol, we return VAL as a coding system.  */
7349           if (CONSP (val))
7350             return val;
7351           if (! SYMBOLP (val))
7352             return Qnil;
7353           if (! NILP (Fcoding_system_p (val)))
7354             return Fcons (val, val);
7355           if (! NILP (Ffboundp (val)))
7356             {
7357               val = call1 (val, Flist (nargs, args));
7358               if (CONSP (val))
7359                 return val;
7360               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7361                 return Fcons (val, val);
7362             }
7363           return Qnil;
7364         }
7365     }
7366   return Qnil;
7367 }
7368
7369 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
7370        Supdate_coding_systems_internal, 0, 0, 0,
7371        doc: /* Update internal database for ISO2022 and CCL based coding systems.
7372 When values of any coding categories are changed, you must
7373 call this function.  */)
7374      ()
7375 {
7376   int i;
7377
7378   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
7379     {
7380       Lisp_Object val;
7381
7382       val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
7383       if (!NILP (val))
7384         {
7385           if (! coding_system_table[i])
7386             coding_system_table[i] = ((struct coding_system *)
7387                                       xmalloc (sizeof (struct coding_system)));
7388           setup_coding_system (val, coding_system_table[i]);
7389         }
7390       else if (coding_system_table[i])
7391         {
7392           xfree (coding_system_table[i]);
7393           coding_system_table[i] = NULL;
7394         }
7395     }
7396
7397   return Qnil;
7398 }
7399
7400 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
7401        Sset_coding_priority_internal, 0, 0, 0,
7402        doc: /* Update internal database for the current value of `coding-category-list'.
7403 This function is internal use only.  */)
7404      ()
7405 {
7406   int i = 0, idx;
7407   Lisp_Object val;
7408
7409   val = Vcoding_category_list;
7410
7411   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
7412     {
7413       if (! SYMBOLP (XCAR (val)))
7414         break;
7415       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
7416       if (idx >= CODING_CATEGORY_IDX_MAX)
7417         break;
7418       coding_priorities[i++] = (1 << idx);
7419       val = XCDR (val);
7420     }
7421   /* If coding-category-list is valid and contains all coding
7422      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
7423      the following code saves Emacs from crashing.  */
7424   while (i < CODING_CATEGORY_IDX_MAX)
7425     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
7426
7427   return Qnil;
7428 }
7429
7430 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7431        Sdefine_coding_system_internal, 1, 1, 0,
7432        doc: /* Register CODING-SYSTEM as a base coding system.
7433 This function is internal use only.  */)
7434      (coding_system)
7435      Lisp_Object coding_system;
7436 {
7437   Lisp_Object safe_chars, slot;
7438
7439   if (NILP (Fcheck_coding_system (coding_system)))
7440     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7441   safe_chars = coding_safe_chars (coding_system);
7442   if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
7443     error ("No valid safe-chars property for %s",
7444            SDATA (SYMBOL_NAME (coding_system)));
7445   if (EQ (safe_chars, Qt))
7446     {
7447       if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
7448         XSETCAR (Vcoding_system_safe_chars,
7449                  Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
7450     }
7451   else
7452     {
7453       slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
7454       if (NILP (slot))
7455         XSETCDR (Vcoding_system_safe_chars,
7456                  nconc2 (XCDR (Vcoding_system_safe_chars),
7457                          Fcons (Fcons (coding_system, safe_chars), Qnil)));
7458       else
7459         XSETCDR (slot, safe_chars);
7460     }
7461   return Qnil;
7462 }
7463
7464 #endif /* emacs */
7465
7466 \f
7467 /*** 9. Post-amble ***/
7468
7469 void
7470 init_coding_once ()
7471 {
7472   int i;
7473
7474   /* Emacs' internal format specific initialize routine.  */
7475   for (i = 0; i <= 0x20; i++)
7476     emacs_code_class[i] = EMACS_control_code;
7477   emacs_code_class[0x0A] = EMACS_linefeed_code;
7478   emacs_code_class[0x0D] = EMACS_carriage_return_code;
7479   for (i = 0x21 ; i < 0x7F; i++)
7480     emacs_code_class[i] = EMACS_ascii_code;
7481   emacs_code_class[0x7F] = EMACS_control_code;
7482   for (i = 0x80; i < 0xFF; i++)
7483     emacs_code_class[i] = EMACS_invalid_code;
7484   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
7485   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
7486   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
7487   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
7488
7489   /* ISO2022 specific initialize routine.  */
7490   for (i = 0; i < 0x20; i++)
7491     iso_code_class[i] = ISO_control_0;
7492   for (i = 0x21; i < 0x7F; i++)
7493     iso_code_class[i] = ISO_graphic_plane_0;
7494   for (i = 0x80; i < 0xA0; i++)
7495     iso_code_class[i] = ISO_control_1;
7496   for (i = 0xA1; i < 0xFF; i++)
7497     iso_code_class[i] = ISO_graphic_plane_1;
7498   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7499   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7500   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7501   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7502   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7503   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7504   iso_code_class[ISO_CODE_ESC] = ISO_escape;
7505   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7506   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7507   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7508
7509   setup_coding_system (Qnil, &safe_terminal_coding);
7510   setup_coding_system (Qnil, &default_buffer_file_coding);
7511
7512   bzero (coding_system_table, sizeof coding_system_table);
7513
7514   bzero (ascii_skip_code, sizeof ascii_skip_code);
7515   for (i = 0; i < 128; i++)
7516     ascii_skip_code[i] = 1;
7517
7518 #if defined (MSDOS) || defined (WINDOWSNT)
7519   system_eol_type = CODING_EOL_CRLF;
7520 #else
7521   system_eol_type = CODING_EOL_LF;
7522 #endif
7523
7524   inhibit_pre_post_conversion = 0;
7525 }
7526
7527 #ifdef emacs
7528
7529 void
7530 syms_of_coding ()
7531 {
7532   Qtarget_idx = intern ("target-idx");
7533   staticpro (&Qtarget_idx);
7534
7535   Qcoding_system_history = intern ("coding-system-history");
7536   staticpro (&Qcoding_system_history);
7537   Fset (Qcoding_system_history, Qnil);
7538
7539   /* Target FILENAME is the first argument.  */
7540   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7541   /* Target FILENAME is the third argument.  */
7542   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7543
7544   Qcall_process = intern ("call-process");
7545   staticpro (&Qcall_process);
7546   /* Target PROGRAM is the first argument.  */
7547   Fput (Qcall_process, Qtarget_idx, make_number (0));
7548
7549   Qcall_process_region = intern ("call-process-region");
7550   staticpro (&Qcall_process_region);
7551   /* Target PROGRAM is the third argument.  */
7552   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7553
7554   Qstart_process = intern ("start-process");
7555   staticpro (&Qstart_process);
7556   /* Target PROGRAM is the third argument.  */
7557   Fput (Qstart_process, Qtarget_idx, make_number (2));
7558
7559   Qopen_network_stream = intern ("open-network-stream");
7560   staticpro (&Qopen_network_stream);
7561   /* Target SERVICE is the fourth argument.  */
7562   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7563
7564   Qcoding_system = intern ("coding-system");
7565   staticpro (&Qcoding_system);
7566
7567   Qeol_type = intern ("eol-type");
7568   staticpro (&Qeol_type);
7569
7570   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7571   staticpro (&Qbuffer_file_coding_system);
7572
7573   Qpost_read_conversion = intern ("post-read-conversion");
7574   staticpro (&Qpost_read_conversion);
7575
7576   Qpre_write_conversion = intern ("pre-write-conversion");
7577   staticpro (&Qpre_write_conversion);
7578
7579   Qno_conversion = intern ("no-conversion");
7580   staticpro (&Qno_conversion);
7581
7582   Qundecided = intern ("undecided");
7583   staticpro (&Qundecided);
7584
7585   Qcoding_system_p = intern ("coding-system-p");
7586   staticpro (&Qcoding_system_p);
7587
7588   Qcoding_system_error = intern ("coding-system-error");
7589   staticpro (&Qcoding_system_error);
7590
7591   Fput (Qcoding_system_error, Qerror_conditions,
7592         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7593   Fput (Qcoding_system_error, Qerror_message,
7594         build_string ("Invalid coding system"));
7595
7596   Qcoding_category = intern ("coding-category");
7597   staticpro (&Qcoding_category);
7598   Qcoding_category_index = intern ("coding-category-index");
7599   staticpro (&Qcoding_category_index);
7600
7601   Vcoding_category_table
7602     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7603   staticpro (&Vcoding_category_table);
7604   {
7605     int i;
7606     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7607       {
7608         XVECTOR (Vcoding_category_table)->contents[i]
7609           = intern (coding_category_name[i]);
7610         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7611               Qcoding_category_index, make_number (i));
7612       }
7613   }
7614
7615   Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7616   staticpro (&Vcoding_system_safe_chars);
7617
7618   Qtranslation_table = intern ("translation-table");
7619   staticpro (&Qtranslation_table);
7620   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
7621
7622   Qtranslation_table_id = intern ("translation-table-id");
7623   staticpro (&Qtranslation_table_id);
7624
7625   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7626   staticpro (&Qtranslation_table_for_decode);
7627
7628   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7629   staticpro (&Qtranslation_table_for_encode);
7630
7631   Qsafe_chars = intern ("safe-chars");
7632   staticpro (&Qsafe_chars);
7633
7634   Qchar_coding_system = intern ("char-coding-system");
7635   staticpro (&Qchar_coding_system);
7636
7637   /* Intern this now in case it isn't already done.
7638      Setting this variable twice is harmless.
7639      But don't staticpro it here--that is done in alloc.c.  */
7640   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7641   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7642   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7643
7644   Qvalid_codes = intern ("valid-codes");
7645   staticpro (&Qvalid_codes);
7646
7647   Qemacs_mule = intern ("emacs-mule");
7648   staticpro (&Qemacs_mule);
7649
7650   Qraw_text = intern ("raw-text");
7651   staticpro (&Qraw_text);
7652
7653   Qutf_8 = intern ("utf-8");
7654   staticpro (&Qutf_8);
7655
7656   Qcoding_system_define_form = intern ("coding-system-define-form");
7657   staticpro (&Qcoding_system_define_form);
7658
7659   defsubr (&Scoding_system_p);
7660   defsubr (&Sread_coding_system);
7661   defsubr (&Sread_non_nil_coding_system);
7662   defsubr (&Scheck_coding_system);
7663   defsubr (&Sdetect_coding_region);
7664   defsubr (&Sdetect_coding_string);
7665   defsubr (&Sfind_coding_systems_region_internal);
7666   defsubr (&Sunencodable_char_position);
7667   defsubr (&Sdecode_coding_region);
7668   defsubr (&Sencode_coding_region);
7669   defsubr (&Sdecode_coding_string);
7670   defsubr (&Sencode_coding_string);
7671   defsubr (&Sdecode_sjis_char);
7672   defsubr (&Sencode_sjis_char);
7673   defsubr (&Sdecode_big5_char);
7674   defsubr (&Sencode_big5_char);
7675   defsubr (&Sset_terminal_coding_system_internal);
7676   defsubr (&Sset_safe_terminal_coding_system_internal);
7677   defsubr (&Sterminal_coding_system);
7678   defsubr (&Sset_keyboard_coding_system_internal);
7679   defsubr (&Skeyboard_coding_system);
7680   defsubr (&Sfind_operation_coding_system);
7681   defsubr (&Supdate_coding_systems_internal);
7682   defsubr (&Sset_coding_priority_internal);
7683   defsubr (&Sdefine_coding_system_internal);
7684
7685   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7686                doc: /* List of coding systems.
7687
7688 Do not alter the value of this variable manually.  This variable should be
7689 updated by the functions `make-coding-system' and
7690 `define-coding-system-alias'.  */);
7691   Vcoding_system_list = Qnil;
7692
7693   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7694                doc: /* Alist of coding system names.
7695 Each element is one element list of coding system name.
7696 This variable is given to `completing-read' as TABLE argument.
7697
7698 Do not alter the value of this variable manually.  This variable should be
7699 updated by the functions `make-coding-system' and
7700 `define-coding-system-alias'.  */);
7701   Vcoding_system_alist = Qnil;
7702
7703   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7704                doc: /* List of coding-categories (symbols) ordered by priority.
7705
7706 On detecting a coding system, Emacs tries code detection algorithms
7707 associated with each coding-category one by one in this order.  When
7708 one algorithm agrees with a byte sequence of source text, the coding
7709 system bound to the corresponding coding-category is selected.  */);
7710   {
7711     int i;
7712
7713     Vcoding_category_list = Qnil;
7714     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7715       Vcoding_category_list
7716         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7717                  Vcoding_category_list);
7718   }
7719
7720   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7721                doc: /* Specify the coding system for read operations.
7722 It is useful to bind this variable with `let', but do not set it globally.
7723 If the value is a coding system, it is used for decoding on read operation.
7724 If not, an appropriate element is used from one of the coding system alists:
7725 There are three such tables, `file-coding-system-alist',
7726 `process-coding-system-alist', and `network-coding-system-alist'.  */);
7727   Vcoding_system_for_read = Qnil;
7728
7729   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7730                doc: /* Specify the coding system for write operations.
7731 Programs bind this variable with `let', but you should not set it globally.
7732 If the value is a coding system, it is used for encoding of output,
7733 when writing it to a file and when sending it to a file or subprocess.
7734
7735 If this does not specify a coding system, an appropriate element
7736 is used from one of the coding system alists:
7737 There are three such tables, `file-coding-system-alist',
7738 `process-coding-system-alist', and `network-coding-system-alist'.
7739 For output to files, if the above procedure does not specify a coding system,
7740 the value of `buffer-file-coding-system' is used.  */);
7741   Vcoding_system_for_write = Qnil;
7742
7743   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7744                doc: /* Coding system used in the latest file or process I/O.
7745 Also set by `encode-coding-region', `decode-coding-region',
7746 `encode-coding-string' and `decode-coding-string'.  */);
7747   Vlast_coding_system_used = Qnil;
7748
7749   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7750                doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7751 See info node `Coding Systems' and info node `Text and Binary' concerning
7752 such conversion.  */);
7753   inhibit_eol_conversion = 0;
7754
7755   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7756                doc: /* Non-nil means process buffer inherits coding system of process output.
7757 Bind it to t if the process output is to be treated as if it were a file
7758 read from some filesystem.  */);
7759   inherit_process_coding_system = 0;
7760
7761   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7762                doc: /* Alist to decide a coding system to use for a file I/O operation.
7763 The format is ((PATTERN . VAL) ...),
7764 where PATTERN is a regular expression matching a file name,
7765 VAL is a coding system, a cons of coding systems, or a function symbol.
7766 If VAL is a coding system, it is used for both decoding and encoding
7767 the file contents.
7768 If VAL is a cons of coding systems, the car part is used for decoding,
7769 and the cdr part is used for encoding.
7770 If VAL is a function symbol, the function must return a coding system
7771 or a cons of coding systems which are used as above.  The function gets
7772 the arguments with which `find-operation-coding-system' was called.
7773
7774 See also the function `find-operation-coding-system'
7775 and the variable `auto-coding-alist'.  */);
7776   Vfile_coding_system_alist = Qnil;
7777
7778   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7779     doc: /* Alist to decide a coding system to use for a process I/O operation.
7780 The format is ((PATTERN . VAL) ...),
7781 where PATTERN is a regular expression matching a program name,
7782 VAL is a coding system, a cons of coding systems, or a function symbol.
7783 If VAL is a coding system, it is used for both decoding what received
7784 from the program and encoding what sent to the program.
7785 If VAL is a cons of coding systems, the car part is used for decoding,
7786 and the cdr part is used for encoding.
7787 If VAL is a function symbol, the function must return a coding system
7788 or a cons of coding systems which are used as above.
7789
7790 See also the function `find-operation-coding-system'.  */);
7791   Vprocess_coding_system_alist = Qnil;
7792
7793   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7794     doc: /* Alist to decide a coding system to use for a network I/O operation.
7795 The format is ((PATTERN . VAL) ...),
7796 where PATTERN is a regular expression matching a network service name
7797 or is a port number to connect to,
7798 VAL is a coding system, a cons of coding systems, or a function symbol.
7799 If VAL is a coding system, it is used for both decoding what received
7800 from the network stream and encoding what sent to the network stream.
7801 If VAL is a cons of coding systems, the car part is used for decoding,
7802 and the cdr part is used for encoding.
7803 If VAL is a function symbol, the function must return a coding system
7804 or a cons of coding systems which are used as above.
7805
7806 See also the function `find-operation-coding-system'.  */);
7807   Vnetwork_coding_system_alist = Qnil;
7808
7809   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7810                doc: /* Coding system to use with system messages.
7811 Also used for decoding keyboard input on X Window system.  */);
7812   Vlocale_coding_system = Qnil;
7813
7814   /* The eol mnemonics are reset in startup.el system-dependently.  */
7815   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7816                doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
7817   eol_mnemonic_unix = build_string (":");
7818
7819   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7820                doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
7821   eol_mnemonic_dos = build_string ("\\");
7822
7823   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7824                doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
7825   eol_mnemonic_mac = build_string ("/");
7826
7827   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7828                doc: /* *String displayed in mode line when end-of-line format is not yet determined.  */);
7829   eol_mnemonic_undecided = build_string (":");
7830
7831   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7832                doc: /* *Non-nil enables character translation while encoding and decoding.  */);
7833   Venable_character_translation = Qt;
7834
7835   DEFVAR_LISP ("standard-translation-table-for-decode",
7836                &Vstandard_translation_table_for_decode,
7837                doc: /* Table for translating characters while decoding.  */);
7838   Vstandard_translation_table_for_decode = Qnil;
7839
7840   DEFVAR_LISP ("standard-translation-table-for-encode",
7841                &Vstandard_translation_table_for_encode,
7842                doc: /* Table for translating characters while encoding.  */);
7843   Vstandard_translation_table_for_encode = Qnil;
7844
7845   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7846                doc: /* Alist of charsets vs revision numbers.
7847 While encoding, if a charset (car part of an element) is found,
7848 designate it with the escape sequence identifying revision (cdr part of the element).  */);
7849   Vcharset_revision_alist = Qnil;
7850
7851   DEFVAR_LISP ("default-process-coding-system",
7852                &Vdefault_process_coding_system,
7853                doc: /* Cons of coding systems used for process I/O by default.
7854 The car part is used for decoding a process output,
7855 the cdr part is used for encoding a text to be sent to a process.  */);
7856   Vdefault_process_coding_system = Qnil;
7857
7858   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7859                doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
7860 This is a vector of length 256.
7861 If Nth element is non-nil, the existence of code N in a file
7862 \(or output of subprocess) doesn't prevent it to be detected as
7863 a coding system of ISO 2022 variant which has a flag
7864 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
7865 or reading output of a subprocess.
7866 Only 128th through 159th elements has a meaning.  */);
7867   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7868
7869   DEFVAR_LISP ("select-safe-coding-system-function",
7870                &Vselect_safe_coding_system_function,
7871                doc: /* Function to call to select safe coding system for encoding a text.
7872
7873 If set, this function is called to force a user to select a proper
7874 coding system which can encode the text in the case that a default
7875 coding system used in each operation can't encode the text.
7876
7877 The default value is `select-safe-coding-system' (which see).  */);
7878   Vselect_safe_coding_system_function = Qnil;
7879
7880   DEFVAR_BOOL ("coding-system-require-warning",
7881                &coding_system_require_warning,
7882                doc: /* Internal use only.
7883 If non-nil, on writing a file, `select-safe-coding-system-function' is
7884 called even if `coding-system-for-write' is non-nil.  The command
7885 `universal-coding-system-argument' binds this variable to t temporarily.  */);
7886   coding_system_require_warning = 0;
7887
7888
7889   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7890                &inhibit_iso_escape_detection,
7891                doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7892
7893 By default, on reading a file, Emacs tries to detect how the text is
7894 encoded.  This code detection is sensitive to escape sequences.  If
7895 the sequence is valid as ISO2022, the code is determined as one of
7896 the ISO2022 encodings, and the file is decoded by the corresponding
7897 coding system (e.g. `iso-2022-7bit').
7898
7899 However, there may be a case that you want to read escape sequences in
7900 a file as is.  In such a case, you can set this variable to non-nil.
7901 Then, as the code detection ignores any escape sequences, no file is
7902 detected as encoded in some ISO2022 encoding.  The result is that all
7903 escape sequences become visible in a buffer.
7904
7905 The default value is nil, and it is strongly recommended not to change
7906 it.  That is because many Emacs Lisp source files that contain
7907 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
7908 in Emacs's distribution, and they won't be decoded correctly on
7909 reading if you suppress escape sequence detection.
7910
7911 The other way to read escape sequences in a file without decoding is
7912 to explicitly specify some coding system that doesn't use ISO2022's
7913 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
7914   inhibit_iso_escape_detection = 0;
7915
7916   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
7917                doc: /* Char table for translating self-inserting characters.
7918 This is applied to the result of input methods, not their input.  See also
7919 `keyboard-translate-table'.  */);
7920     Vtranslation_table_for_input = Qnil;
7921 }
7922
7923 char *
7924 emacs_strerror (error_number)
7925      int error_number;
7926 {
7927   char *str;
7928
7929   synchronize_system_messages_locale ();
7930   str = strerror (error_number);
7931
7932   if (! NILP (Vlocale_coding_system))
7933     {
7934       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7935                                                       Vlocale_coding_system,
7936                                                       0);
7937       str = (char *) SDATA (dec);
7938     }
7939
7940   return str;
7941 }
7942
7943 #endif /* emacs */
7944
7945 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
7946    (do not change this comment) */