code.delx.au - gnu-emacs/blob - src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292
 293 #include "lisp.h"
 294 #include "buffer.h"
 295 #include "character.h"
 296 #include "charset.h"
 297 #include "ccl.h"
 298 #include "composite.h"
 299 #include "coding.h"
 300 #include "window.h"
 301 #include "frame.h"
 302 #include "termhooks.h"
 303
 304 Lisp_Object Vcoding_system_hash_table;
 305
 306 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 307 Lisp_Object Qunix, Qdos;
 308 extern Lisp_Object Qmac;        /* frame.c */
 309 Lisp_Object Qbuffer_file_coding_system;
 310 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 311 Lisp_Object Qdefault_char;
 312 Lisp_Object Qno_conversion, Qundecided;
 313 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 314 Lisp_Object Qbig, Qlittle;
 315 Lisp_Object Qcoding_system_history;
 316 Lisp_Object Qvalid_codes;
 317 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 318 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 319 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 320 Lisp_Object QCascii_compatible_p;
 321
 322 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 323 Lisp_Object Qcall_process, Qcall_process_region;
 324 Lisp_Object Qstart_process, Qopen_network_stream;
 325 Lisp_Object Qtarget_idx;
 326
 327 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 328 Lisp_Object Qinterrupted, Qinsufficient_memory;
 329
 330 extern Lisp_Object Qcompletion_ignore_case;
 331
 332 /* If a symbol has this property, evaluate the value to define the
 333    symbol as a coding system.  */
 334 static Lisp_Object Qcoding_system_define_form;
 335
 336 int coding_system_require_warning;
 337
 338 Lisp_Object Vselect_safe_coding_system_function;
 339
 340 /* Mnemonic string for each format of end-of-line.  */
 341 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 342 /* Mnemonic string to indicate format of end-of-line is not yet
 343    decided.  */
 344 Lisp_Object eol_mnemonic_undecided;
 345
 346 /* Format of end-of-line decided by system.  This is Qunix on
 347    Unix and Mac, Qdos on DOS/Windows.
 348    This has an effect only for external encoding (i.e. for output to
 349    file and process), not for in-buffer or Lisp string encoding.  */
 350 static Lisp_Object system_eol_type;
 351
 352 #ifdef emacs
 353
 354 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 355
 356 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 357
 358 /* Coding system emacs-mule and raw-text are for converting only
 359    end-of-line format.  */
 360 Lisp_Object Qemacs_mule, Qraw_text;
 361 Lisp_Object Qutf_8_emacs;
 362
 363 /* Coding-systems are handed between Emacs Lisp programs and C internal
 364    routines by the following three variables.  */
 365 /* Coding-system for reading files and receiving data from process.  */
 366 Lisp_Object Vcoding_system_for_read;
 367 /* Coding-system for writing files and sending data to process.  */
 368 Lisp_Object Vcoding_system_for_write;
 369 /* Coding-system actually used in the latest I/O.  */
 370 Lisp_Object Vlast_coding_system_used;
 371 /* Set to non-nil when an error is detected while code conversion.  */
 372 Lisp_Object Vlast_code_conversion_error;
 373 /* A vector of length 256 which contains information about special
 374    Latin codes (especially for dealing with Microsoft codes).  */
 375 Lisp_Object Vlatin_extra_code_table;
 376
 377 /* Flag to inhibit code conversion of end-of-line format.  */
 378 int inhibit_eol_conversion;
 379
 380 /* Flag to inhibit ISO2022 escape sequence detection.  */
 381 int inhibit_iso_escape_detection;
 382
 383 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 384 int inherit_process_coding_system;
 385
 386 /* Coding system to be used to encode text for terminal display when
 387    terminal coding system is nil.  */
 388 struct coding_system safe_terminal_coding;
 389
 390 Lisp_Object Vfile_coding_system_alist;
 391 Lisp_Object Vprocess_coding_system_alist;
 392 Lisp_Object Vnetwork_coding_system_alist;
 393
 394 Lisp_Object Vlocale_coding_system;
 395
 396 #endif /* emacs */
 397
 398 /* Flag to tell if we look up translation table on character code
 399    conversion.  */
 400 Lisp_Object Venable_character_translation;
 401 /* Standard translation table to look up on decoding (reading).  */
 402 Lisp_Object Vstandard_translation_table_for_decode;
 403 /* Standard translation table to look up on encoding (writing).  */
 404 Lisp_Object Vstandard_translation_table_for_encode;
 405
 406 Lisp_Object Qtranslation_table;
 407 Lisp_Object Qtranslation_table_id;
 408 Lisp_Object Qtranslation_table_for_decode;
 409 Lisp_Object Qtranslation_table_for_encode;
 410
 411 /* Alist of charsets vs revision number.  */
 412 static Lisp_Object Vcharset_revision_table;
 413
 414 /* Default coding systems used for process I/O.  */
 415 Lisp_Object Vdefault_process_coding_system;
 416
 417 /* Char table for translating Quail and self-inserting input.  */
 418 Lisp_Object Vtranslation_table_for_input;
 419
 420 /* Two special coding systems.  */
 421 Lisp_Object Vsjis_coding_system;
 422 Lisp_Object Vbig5_coding_system;
 423
 424 /* ISO2022 section */
 425
 426 #define CODING_ISO_INITIAL(coding, reg)                 \
 427   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 428                      coding_attr_iso_initial),          \
 429                reg)))
 430
 431
 432 #define CODING_ISO_REQUEST(coding, charset_id)  \
 433   ((charset_id <= (coding)->max_charset_id      \
 434     ? (coding)->safe_charsets[charset_id]       \
 435     : -1))
 436
 437
 438 #define CODING_ISO_FLAGS(coding)        \
 439   ((coding)->spec.iso_2022.flags)
 440 #define CODING_ISO_DESIGNATION(coding, reg)     \
 441   ((coding)->spec.iso_2022.current_designation[reg])
 442 #define CODING_ISO_INVOCATION(coding, plane)    \
 443   ((coding)->spec.iso_2022.current_invocation[plane])
 444 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 445   ((coding)->spec.iso_2022.single_shifting)
 446 #define CODING_ISO_BOL(coding)  \
 447   ((coding)->spec.iso_2022.bol)
 448 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 449   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 450
 451 /* Control characters of ISO2022.  */
 452                         /* code */      /* function */
 453 #define ISO_CODE_LF     0x0A            /* line-feed */
 454 #define ISO_CODE_CR     0x0D            /* carriage-return */
 455 #define ISO_CODE_SO     0x0E            /* shift-out */
 456 #define ISO_CODE_SI     0x0F            /* shift-in */
 457 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 458 #define ISO_CODE_ESC    0x1B            /* escape */
 459 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 460 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 461 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 462
 463 /* All code (1-byte) of ISO2022 is classified into one of the
 464    followings.  */
 465 enum iso_code_class_type
 466   {
 467     ISO_control_0,              /* Control codes in the range
 468                                    0x00..0x1F and 0x7F, except for the
 469                                    following 5 codes.  */
 470     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 471     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 472     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 473     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 474     ISO_control_1,              /* Control codes in the range
 475                                    0x80..0x9F, except for the
 476                                    following 3 codes.  */
 477     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 478     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 479     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 480     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 481     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 482     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 483     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 484   };
 485
 486 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 487     `iso-flags' attribute of an iso2022 coding system.  */
 488
 489 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 490    instead of the correct short-form sequence (e.g. ESC $ A).  */
 491 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 492
 493 /* If set, reset graphic planes and registers at end-of-line to the
 494    initial state.  */
 495 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 496
 497 /* If set, reset graphic planes and registers before any control
 498    characters to the initial state.  */
 499 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 500
 501 /* If set, encode by 7-bit environment.  */
 502 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 503
 504 /* If set, use locking-shift function.  */
 505 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 506
 507 /* If set, use single-shift function.  Overwrite
 508    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 509 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 510
 511 /* If set, use designation escape sequence.  */
 512 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 513
 514 /* If set, produce revision number sequence.  */
 515 #define CODING_ISO_FLAG_REVISION        0x0080
 516
 517 /* If set, produce ISO6429's direction specifying sequence.  */
 518 #define CODING_ISO_FLAG_DIRECTION       0x0100
 519
 520 /* If set, assume designation states are reset at beginning of line on
 521    output.  */
 522 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 523
 524 /* If set, designation sequence should be placed at beginning of line
 525    on output.  */
 526 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 527
 528 /* If set, do not encode unsafe charactes on output.  */
 529 #define CODING_ISO_FLAG_SAFE            0x0800
 530
 531 /* If set, extra latin codes (128..159) are accepted as a valid code
 532    on input.  */
 533 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 534
 535 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 536
 537 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 538
 539 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 540
 541 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 542
 543 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 544
 545 /* A character to be produced on output if encoding of the original
 546    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 547 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 548
 549 /* UTF-8 section */
 550 #define CODING_UTF_8_BOM(coding)        \
 551   ((coding)->spec.utf_8_bom)
 552
 553 /* UTF-16 section */
 554 #define CODING_UTF_16_BOM(coding)       \
 555   ((coding)->spec.utf_16.bom)
 556
 557 #define CODING_UTF_16_ENDIAN(coding)    \
 558   ((coding)->spec.utf_16.endian)
 559
 560 #define CODING_UTF_16_SURROGATE(coding) \
 561   ((coding)->spec.utf_16.surrogate)
 562
 563
 564 /* CCL section */
 565 #define CODING_CCL_DECODER(coding)      \
 566   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 567 #define CODING_CCL_ENCODER(coding)      \
 568   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 569 #define CODING_CCL_VALIDS(coding)                                          \
 570   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 571
 572 /* Index for each coding category in `coding_categories' */
 573
 574 enum coding_category
 575   {
 576     coding_category_iso_7,
 577     coding_category_iso_7_tight,
 578     coding_category_iso_8_1,
 579     coding_category_iso_8_2,
 580     coding_category_iso_7_else,
 581     coding_category_iso_8_else,
 582     coding_category_utf_8_auto,
 583     coding_category_utf_8_nosig,
 584     coding_category_utf_8_sig,
 585     coding_category_utf_16_auto,
 586     coding_category_utf_16_be,
 587     coding_category_utf_16_le,
 588     coding_category_utf_16_be_nosig,
 589     coding_category_utf_16_le_nosig,
 590     coding_category_charset,
 591     coding_category_sjis,
 592     coding_category_big5,
 593     coding_category_ccl,
 594     coding_category_emacs_mule,
 595     /* All above are targets of code detection.  */
 596     coding_category_raw_text,
 597     coding_category_undecided,
 598     coding_category_max
 599   };
 600
 601 /* Definitions of flag bits used in detect_coding_XXXX.  */
 602 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 603 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 604 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 605 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 606 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 607 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 608 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 609 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 610 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 611 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 612 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 613 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 614 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 615 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 616 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 617 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 618 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 619 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 620 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 621 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 622
 623 /* This value is returned if detect_coding_mask () find nothing other
 624    than ASCII characters.  */
 625 #define CATEGORY_MASK_ANY               \
 626   (CATEGORY_MASK_ISO_7                  \
 627    | CATEGORY_MASK_ISO_7_TIGHT          \
 628    | CATEGORY_MASK_ISO_8_1              \
 629    | CATEGORY_MASK_ISO_8_2              \
 630    | CATEGORY_MASK_ISO_7_ELSE           \
 631    | CATEGORY_MASK_ISO_8_ELSE           \
 632    | CATEGORY_MASK_UTF_8_AUTO           \
 633    | CATEGORY_MASK_UTF_8_NOSIG          \
 634    | CATEGORY_MASK_UTF_8_SIG            \
 635    | CATEGORY_MASK_UTF_16_AUTO          \
 636    | CATEGORY_MASK_UTF_16_BE            \
 637    | CATEGORY_MASK_UTF_16_LE            \
 638    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 639    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 640    | CATEGORY_MASK_CHARSET              \
 641    | CATEGORY_MASK_SJIS                 \
 642    | CATEGORY_MASK_BIG5                 \
 643    | CATEGORY_MASK_CCL                  \
 644    | CATEGORY_MASK_EMACS_MULE)
 645
 646
 647 #define CATEGORY_MASK_ISO_7BIT \
 648   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 649
 650 #define CATEGORY_MASK_ISO_8BIT \
 651   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 652
 653 #define CATEGORY_MASK_ISO_ELSE \
 654   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 655
 656 #define CATEGORY_MASK_ISO_ESCAPE        \
 657   (CATEGORY_MASK_ISO_7                  \
 658    | CATEGORY_MASK_ISO_7_TIGHT          \
 659    | CATEGORY_MASK_ISO_7_ELSE           \
 660    | CATEGORY_MASK_ISO_8_ELSE)
 661
 662 #define CATEGORY_MASK_ISO       \
 663   (  CATEGORY_MASK_ISO_7BIT     \
 664      | CATEGORY_MASK_ISO_8BIT   \
 665      | CATEGORY_MASK_ISO_ELSE)
 666
 667 #define CATEGORY_MASK_UTF_16            \
 668   (CATEGORY_MASK_UTF_16_AUTO            \
 669    | CATEGORY_MASK_UTF_16_BE            \
 670    | CATEGORY_MASK_UTF_16_LE            \
 671    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 672    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 673
 674 #define CATEGORY_MASK_UTF_8     \
 675   (CATEGORY_MASK_UTF_8_AUTO     \
 676    | CATEGORY_MASK_UTF_8_NOSIG  \
 677    | CATEGORY_MASK_UTF_8_SIG)
 678
 679 /* List of symbols `coding-category-xxx' ordered by priority.  This
 680    variable is exposed to Emacs Lisp.  */
 681 static Lisp_Object Vcoding_category_list;
 682
 683 /* Table of coding categories (Lisp symbols).  This variable is for
 684    internal use oly.  */
 685 static Lisp_Object Vcoding_category_table;
 686
 687 /* Table of coding-categories ordered by priority.  */
 688 static enum coding_category coding_priorities[coding_category_max];
 689
 690 /* Nth element is a coding context for the coding system bound to the
 691    Nth coding category.  */
 692 static struct coding_system coding_categories[coding_category_max];
 693
 694 /*** Commonly used macros and functions ***/
 695
 696 #ifndef min
 697 #define min(a, b) ((a) < (b) ? (a) : (b))
 698 #endif
 699 #ifndef max
 700 #define max(a, b) ((a) > (b) ? (a) : (b))
 701 #endif
 702
 703 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 704   do {                                                  \
 705     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 706     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 707   } while (0)
 708
 709
 710 /* Safely get one byte from the source text pointed by SRC which ends
 711    at SRC_END, and set C to that byte.  If there are not enough bytes
 712    in the source, it jumps to `no_more_source'.  If multibytep is
 713    nonzero, and a multibyte character is found at SRC, set C to the
 714    negative value of the character code.  The caller should declare
 715    and set these variables appropriately in advance:
 716         src, src_end, multibytep */
 717
 718 #define ONE_MORE_BYTE(c)                                \
 719   do {                                                  \
 720     if (src == src_end)                                 \
 721       {                                                 \
 722         if (src_base < src)                             \
 723           record_conversion_result                      \
 724             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 725         goto no_more_source;                            \
 726       }                                                 \
 727     c = *src++;                                         \
 728     if (multibytep && (c & 0x80))                       \
 729       {                                                 \
 730         if ((c & 0xFE) == 0xC0)                         \
 731           c = ((c & 1) << 6) | *src++;                  \
 732         else                                            \
 733           {                                             \
 734             src--;                                      \
 735             c = - string_char (src, &src, NULL);        \
 736             record_conversion_result                    \
 737               (coding, CODING_RESULT_INVALID_SRC);      \
 738           }                                             \
 739       }                                                 \
 740     consumed_chars++;                                   \
 741   } while (0)
 742
 743
 744 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 745   do {                                                  \
 746     c = *src++;                                         \
 747     if (multibytep && (c & 0x80))                       \
 748       {                                                 \
 749         if ((c & 0xFE) == 0xC0)                         \
 750           c = ((c & 1) << 6) | *src++;                  \
 751         else                                            \
 752           {                                             \
 753             src--;                                      \
 754             c = - string_char (src, &src, NULL);        \
 755             record_conversion_result                    \
 756               (coding, CODING_RESULT_INVALID_SRC);      \
 757           }                                             \
 758       }                                                 \
 759     consumed_chars++;                                   \
 760   } while (0)
 761
 762
 763 /* Store a byte C in the place pointed by DST and increment DST to the
 764    next free point, and increment PRODUCED_CHARS.  The caller should
 765    assure that C is 0..127, and declare and set the variable `dst'
 766    appropriately in advance.
 767 */
 768
 769
 770 #define EMIT_ONE_ASCII_BYTE(c)  \
 771   do {                          \
 772     produced_chars++;           \
 773     *dst++ = (c);               \
 774   } while (0)
 775
 776
 777 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 778
 779 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 780   do {                                  \
 781     produced_chars += 2;                \
 782     *dst++ = (c1), *dst++ = (c2);       \
 783   } while (0)
 784
 785
 786 /* Store a byte C in the place pointed by DST and increment DST to the
 787    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 788    nonzero, store in an appropriate multibyte from.  The caller should
 789    declare and set the variables `dst' and `multibytep' appropriately
 790    in advance.  */
 791
 792 #define EMIT_ONE_BYTE(c)                \
 793   do {                                  \
 794     produced_chars++;                   \
 795     if (multibytep)                     \
 796       {                                 \
 797         int ch = (c);                   \
 798         if (ch >= 0x80)                 \
 799           ch = BYTE8_TO_CHAR (ch);      \
 800         CHAR_STRING_ADVANCE (ch, dst);  \
 801       }                                 \
 802     else                                \
 803       *dst++ = (c);                     \
 804   } while (0)
 805
 806
 807 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 808
 809 #define EMIT_TWO_BYTES(c1, c2)          \
 810   do {                                  \
 811     produced_chars += 2;                \
 812     if (multibytep)                     \
 813       {                                 \
 814         int ch;                         \
 815                                         \
 816         ch = (c1);                      \
 817         if (ch >= 0x80)                 \
 818           ch = BYTE8_TO_CHAR (ch);      \
 819         CHAR_STRING_ADVANCE (ch, dst);  \
 820         ch = (c2);                      \
 821         if (ch >= 0x80)                 \
 822           ch = BYTE8_TO_CHAR (ch);      \
 823         CHAR_STRING_ADVANCE (ch, dst);  \
 824       }                                 \
 825     else                                \
 826       {                                 \
 827         *dst++ = (c1);                  \
 828         *dst++ = (c2);                  \
 829       }                                 \
 830   } while (0)
 831
 832
 833 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 834   do {                                  \
 835     EMIT_ONE_BYTE (c1);                 \
 836     EMIT_TWO_BYTES (c2, c3);            \
 837   } while (0)
 838
 839
 840 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 841   do {                                          \
 842     EMIT_TWO_BYTES (c1, c2);                    \
 843     EMIT_TWO_BYTES (c3, c4);                    \
 844   } while (0)
 845
 846
 847 /* Prototypes for static functions.  */
 848 static void record_conversion_result P_ ((struct coding_system *coding,
 849                                           enum coding_result_code result));
 850 static int detect_coding_utf_8 P_ ((struct coding_system *,
 851                                     struct coding_detection_info *info));
 852 static void decode_coding_utf_8 P_ ((struct coding_system *));
 853 static int encode_coding_utf_8 P_ ((struct coding_system *));
 854
 855 static int detect_coding_utf_16 P_ ((struct coding_system *,
 856                                      struct coding_detection_info *info));
 857 static void decode_coding_utf_16 P_ ((struct coding_system *));
 858 static int encode_coding_utf_16 P_ ((struct coding_system *));
 859
 860 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 861                                        struct coding_detection_info *info));
 862 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 863 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 864
 865 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 866                                          struct coding_detection_info *info));
 867 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 868 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 869
 870 static int detect_coding_sjis P_ ((struct coding_system *,
 871                                    struct coding_detection_info *info));
 872 static void decode_coding_sjis P_ ((struct coding_system *));
 873 static int encode_coding_sjis P_ ((struct coding_system *));
 874
 875 static int detect_coding_big5 P_ ((struct coding_system *,
 876                                    struct coding_detection_info *info));
 877 static void decode_coding_big5 P_ ((struct coding_system *));
 878 static int encode_coding_big5 P_ ((struct coding_system *));
 879
 880 static int detect_coding_ccl P_ ((struct coding_system *,
 881                                   struct coding_detection_info *info));
 882 static void decode_coding_ccl P_ ((struct coding_system *));
 883 static int encode_coding_ccl P_ ((struct coding_system *));
 884
 885 static void decode_coding_raw_text P_ ((struct coding_system *));
 886 static int encode_coding_raw_text P_ ((struct coding_system *));
 887
 888 static void coding_set_source P_ ((struct coding_system *));
 889 static void coding_set_destination P_ ((struct coding_system *));
 890 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 891 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 892                                             EMACS_INT, EMACS_INT));
 893 static unsigned char *alloc_destination P_ ((struct coding_system *,
 894                                              EMACS_INT, unsigned char *));
 895 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 896 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 897                                                      int *, int *,
 898                                                      unsigned char *));
 899 static int detect_eol P_ ((const unsigned char *,
 900                            EMACS_INT, enum coding_category));
 901 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 902 static void decode_eol P_ ((struct coding_system *));
 903 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 904 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 905                                         int, int *, int *));
 906 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 907 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 908                                             EMACS_INT));
 909 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 910                                         EMACS_INT));
 911 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 912 static int decode_coding P_ ((struct coding_system *));
 913 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 914                                                       struct coding_system *,
 915                                                       int *, EMACS_INT *));
 916 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 917                                                   struct coding_system *,
 918                                                   int *, EMACS_INT *));
 919 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 920 static int encode_coding P_ ((struct coding_system *));
 921 static Lisp_Object make_conversion_work_buffer P_ ((int));
 922 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 923 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 924 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 925
 926 static void
 927 record_conversion_result (struct coding_system *coding,
 928                           enum coding_result_code result)
 929 {
 930   coding->result = result;
 931   switch (result)
 932     {
 933     case CODING_RESULT_INSUFFICIENT_SRC:
 934       Vlast_code_conversion_error = Qinsufficient_source;
 935       break;
 936     case CODING_RESULT_INCONSISTENT_EOL:
 937       Vlast_code_conversion_error = Qinconsistent_eol;
 938       break;
 939     case CODING_RESULT_INVALID_SRC:
 940       Vlast_code_conversion_error = Qinvalid_source;
 941       break;
 942     case CODING_RESULT_INTERRUPT:
 943       Vlast_code_conversion_error = Qinterrupted;
 944       break;
 945     case CODING_RESULT_INSUFFICIENT_MEM:
 946       Vlast_code_conversion_error = Qinsufficient_memory;
 947       break;
 948     default:
 949       Vlast_code_conversion_error = intern ("Unknown error");
 950     }
 951 }
 952
 953 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 954   do {                                                                       \
 955     charset_map_loaded = 0;                                                  \
 956     c = DECODE_CHAR (charset, code);                                         \
 957     if (charset_map_loaded)                                                  \
 958       {                                                                      \
 959         const unsigned char *orig = coding->source;                          \
 960         EMACS_INT offset;                                                    \
 961                                                                              \
 962         coding_set_source (coding);                                          \
 963         offset = coding->source - orig;                                      \
 964         src += offset;                                                       \
 965         src_base += offset;                                                  \
 966         src_end += offset;                                                   \
 967       }                                                                      \
 968   } while (0)
 969
 970
 971 /* If there are at least BYTES length of room at dst, allocate memory
 972    for coding->destination and update dst and dst_end.  We don't have
 973    to take care of coding->source which will be relocated.  It is
 974    handled by calling coding_set_source in encode_coding.  */
 975
 976 #define ASSURE_DESTINATION(bytes)                               \
 977   do {                                                          \
 978     if (dst + (bytes) >= dst_end)                               \
 979       {                                                         \
 980         int more_bytes = charbuf_end - charbuf + (bytes);       \
 981                                                                 \
 982         dst = alloc_destination (coding, more_bytes, dst);      \
 983         dst_end = coding->destination + coding->dst_bytes;      \
 984       }                                                         \
 985   } while (0)
 986
 987
 988 /* Store multibyte form of the character C in P, and advance P to the
 989    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
 990    never calls MAYBE_UNIFY_CHAR.  */
 991
 992 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
 993   do {                                          \
 994     if ((c) <= MAX_1_BYTE_CHAR)                 \
 995       *(p)++ = (c);                             \
 996     else if ((c) <= MAX_2_BYTE_CHAR)            \
 997       *(p)++ = (0xC0 | ((c) >> 6)),             \
 998         *(p)++ = (0x80 | ((c) & 0x3F));         \
 999     else if ((c) <= MAX_3_BYTE_CHAR)            \
1000       *(p)++ = (0xE0 | ((c) >> 12)),            \
1001         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1002         *(p)++ = (0x80 | ((c) & 0x3F));         \
1003     else if ((c) <= MAX_4_BYTE_CHAR)            \
1004       *(p)++ = (0xF0 | (c >> 18)),              \
1005         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1006         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1007         *(p)++ = (0x80 | (c & 0x3F));           \
1008     else if ((c) <= MAX_5_BYTE_CHAR)            \
1009       *(p)++ = 0xF8,                            \
1010         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1011         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1012         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1013         *(p)++ = (0x80 | (c & 0x3F));           \
1014     else                                        \
1015       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1016   } while (0)
1017
1018
1019 /* Return the character code of character whose multibyte form is at
1020    P, and advance P to the end of the multibyte form.  This is like
1021    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1022
1023 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1024   (!((p)[0] & 0x80)                                             \
1025    ? *(p)++                                                     \
1026    : ! ((p)[0] & 0x20)                                          \
1027    ? ((p) += 2,                                                 \
1028       ((((p)[-2] & 0x1F) << 6)                                  \
1029        | ((p)[-1] & 0x3F)                                       \
1030        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1031    : ! ((p)[0] & 0x10)                                          \
1032    ? ((p) += 3,                                                 \
1033       ((((p)[-3] & 0x0F) << 12)                                 \
1034        | (((p)[-2] & 0x3F) << 6)                                \
1035        | ((p)[-1] & 0x3F)))                                     \
1036    : ! ((p)[0] & 0x08)                                          \
1037    ? ((p) += 4,                                                 \
1038       ((((p)[-4] & 0xF) << 18)                                  \
1039        | (((p)[-3] & 0x3F) << 12)                               \
1040        | (((p)[-2] & 0x3F) << 6)                                \
1041        | ((p)[-1] & 0x3F)))                                     \
1042    : ((p) += 5,                                                 \
1043       ((((p)[-4] & 0x3F) << 18)                                 \
1044        | (((p)[-3] & 0x3F) << 12)                               \
1045        | (((p)[-2] & 0x3F) << 6)                                \
1046        | ((p)[-1] & 0x3F))))
1047
1048
1049 static void
1050 coding_set_source (coding)
1051      struct coding_system *coding;
1052 {
1053   if (BUFFERP (coding->src_object))
1054     {
1055       struct buffer *buf = XBUFFER (coding->src_object);
1056
1057       if (coding->src_pos < 0)
1058         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1059       else
1060         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1061     }
1062   else if (STRINGP (coding->src_object))
1063     {
1064       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1065     }
1066   else
1067     /* Otherwise, the source is C string and is never relocated
1068        automatically.  Thus we don't have to update anything.  */
1069     ;
1070 }
1071
1072 static void
1073 coding_set_destination (coding)
1074      struct coding_system *coding;
1075 {
1076   if (BUFFERP (coding->dst_object))
1077     {
1078       if (coding->src_pos < 0)
1079         {
1080           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1081           coding->dst_bytes = (GAP_END_ADDR
1082                                - (coding->src_bytes - coding->consumed)
1083                                - coding->destination);
1084         }
1085       else
1086         {
1087           /* We are sure that coding->dst_pos_byte is before the gap
1088              of the buffer. */
1089           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1090                                  + coding->dst_pos_byte - BEG_BYTE);
1091           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1092                                - coding->destination);
1093         }
1094     }
1095   else
1096     /* Otherwise, the destination is C string and is never relocated
1097        automatically.  Thus we don't have to update anything.  */
1098     ;
1099 }
1100
1101
1102 static void
1103 coding_alloc_by_realloc (coding, bytes)
1104      struct coding_system *coding;
1105      EMACS_INT bytes;
1106 {
1107   coding->destination = (unsigned char *) xrealloc (coding->destination,
1108                                                     coding->dst_bytes + bytes);
1109   coding->dst_bytes += bytes;
1110 }
1111
1112 static void
1113 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1114      struct coding_system *coding;
1115      EMACS_INT gap_head_used, bytes;
1116 {
1117   if (EQ (coding->src_object, coding->dst_object))
1118     {
1119       /* The gap may contain the produced data at the head and not-yet
1120          consumed data at the tail.  To preserve those data, we at
1121          first make the gap size to zero, then increase the gap
1122          size.  */
1123       EMACS_INT add = GAP_SIZE;
1124
1125       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1126       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1127       make_gap (bytes);
1128       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1129       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1130     }
1131   else
1132     {
1133       Lisp_Object this_buffer;
1134
1135       this_buffer = Fcurrent_buffer ();
1136       set_buffer_internal (XBUFFER (coding->dst_object));
1137       make_gap (bytes);
1138       set_buffer_internal (XBUFFER (this_buffer));
1139     }
1140 }
1141
1142
1143 static unsigned char *
1144 alloc_destination (coding, nbytes, dst)
1145      struct coding_system *coding;
1146      EMACS_INT nbytes;
1147      unsigned char *dst;
1148 {
1149   EMACS_INT offset = dst - coding->destination;
1150
1151   if (BUFFERP (coding->dst_object))
1152     {
1153       struct buffer *buf = XBUFFER (coding->dst_object);
1154
1155       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1156     }
1157   else
1158     coding_alloc_by_realloc (coding, nbytes);
1159   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1160   coding_set_destination (coding);
1161   dst = coding->destination + offset;
1162   return dst;
1163 }
1164
1165 /** Macros for annotations.  */
1166
1167 /* Maximum length of annotation data (sum of annotations for
1168    composition and charset).  */
1169 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1170
1171 /* An annotation data is stored in the array coding->charbuf in this
1172    format:
1173      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1174    LENGTH is the number of elements in the annotation.
1175    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1176    NCHARS is the number of characters in the text annotated.
1177
1178    The format of the following elements depend on ANNOTATION_MASK.
1179
1180    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1181    follows:
1182      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1183    METHOD is one of enum composition_method.
1184    Optionnal COMPOSITION-COMPONENTS are characters and composition
1185    rules.
1186
1187    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1188    follows.  */
1189
1190 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1191   do {                                                  \
1192     *(buf)++ = -(len);                                  \
1193     *(buf)++ = (mask);                                  \
1194     *(buf)++ = (nchars);                                \
1195     coding->annotated = 1;                              \
1196   } while (0);
1197
1198 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1199   do {                                                                      \
1200     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1201     *buf++ = method;                                                        \
1202   } while (0)
1203
1204
1205 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1206   do {                                                                  \
1207     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1208     *buf++ = id;                                                        \
1209   } while (0)
1210
1211 \f
1212 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1213
1214
1215
1216 \f
1217 /*** 3. UTF-8 ***/
1218
1219 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1220    Check if a text is encoded in UTF-8.  If it is, return 1, else
1221    return 0.  */
1222
1223 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1224 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1225 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1226 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1227 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1228 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1229
1230 #define UTF_BOM 0xFEFF
1231 #define UTF_8_BOM_1 0xEF
1232 #define UTF_8_BOM_2 0xBB
1233 #define UTF_8_BOM_3 0xBF
1234
1235 static int
1236 detect_coding_utf_8 (coding, detect_info)
1237      struct coding_system *coding;
1238      struct coding_detection_info *detect_info;
1239 {
1240   const unsigned char *src = coding->source, *src_base;
1241   const unsigned char *src_end = coding->source + coding->src_bytes;
1242   int multibytep = coding->src_multibyte;
1243   int consumed_chars = 0;
1244   int bom_found = 0;
1245   int found = 0;
1246
1247   detect_info->checked |= CATEGORY_MASK_UTF_8;
1248   /* A coding system of this category is always ASCII compatible.  */
1249   src += coding->head_ascii;
1250
1251   while (1)
1252     {
1253       int c, c1, c2, c3, c4;
1254
1255       src_base = src;
1256       ONE_MORE_BYTE (c);
1257       if (c < 0 || UTF_8_1_OCTET_P (c))
1258         continue;
1259       ONE_MORE_BYTE (c1);
1260       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1261         break;
1262       if (UTF_8_2_OCTET_LEADING_P (c))
1263         {
1264           found = 1;
1265           continue;
1266         }
1267       ONE_MORE_BYTE (c2);
1268       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1269         break;
1270       if (UTF_8_3_OCTET_LEADING_P (c))
1271         {
1272           found = 1;
1273           if (src_base == coding->source
1274               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1275             bom_found = 1;
1276           continue;
1277         }
1278       ONE_MORE_BYTE (c3);
1279       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1280         break;
1281       if (UTF_8_4_OCTET_LEADING_P (c))
1282         {
1283           found = 1;
1284           continue;
1285         }
1286       ONE_MORE_BYTE (c4);
1287       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1288         break;
1289       if (UTF_8_5_OCTET_LEADING_P (c))
1290         {
1291           found = 1;
1292           continue;
1293         }
1294       break;
1295     }
1296   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1297   return 0;
1298
1299  no_more_source:
1300   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1301     {
1302       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1303       return 0;
1304     }
1305   if (bom_found)
1306     {
1307       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1308       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1309     }
1310   else
1311     {
1312       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1313       detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1314     }
1315   return 1;
1316 }
1317
1318
1319 static void
1320 decode_coding_utf_8 (coding)
1321      struct coding_system *coding;
1322 {
1323   const unsigned char *src = coding->source + coding->consumed;
1324   const unsigned char *src_end = coding->source + coding->src_bytes;
1325   const unsigned char *src_base;
1326   int *charbuf = coding->charbuf + coding->charbuf_used;
1327   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1328   int consumed_chars = 0, consumed_chars_base;
1329   int multibytep = coding->src_multibyte;
1330   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1331   Lisp_Object attr, charset_list;
1332   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1333   int byte_after_cr = -1;
1334
1335   CODING_GET_INFO (coding, attr, charset_list);
1336
1337   if (bom != utf_without_bom)
1338     {
1339       int c1, c2, c3;
1340
1341       src_base = src;
1342       ONE_MORE_BYTE (c1);
1343       if (! UTF_8_3_OCTET_LEADING_P (c1))
1344         src = src_base;
1345       else
1346         {
1347           ONE_MORE_BYTE (c2);
1348           if (! UTF_8_EXTRA_OCTET_P (c2))
1349             src = src_base;
1350           else
1351             {
1352               ONE_MORE_BYTE (c3);
1353               if (! UTF_8_EXTRA_OCTET_P (c3))
1354                 src = src_base;
1355               else
1356                 {
1357                   if ((c1 != UTF_8_BOM_1)
1358                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1359                     src = src_base;
1360                   else
1361                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1362                 }
1363             }
1364         }
1365     }
1366   CODING_UTF_8_BOM (coding) = utf_without_bom;
1367
1368
1369
1370   while (1)
1371     {
1372       int c, c1, c2, c3, c4, c5;
1373
1374       src_base = src;
1375       consumed_chars_base = consumed_chars;
1376
1377       if (charbuf >= charbuf_end)
1378         break;
1379
1380       if (byte_after_cr >= 0)
1381         c1 = byte_after_cr, byte_after_cr = -1;
1382       else
1383         ONE_MORE_BYTE (c1);
1384       if (c1 < 0)
1385         {
1386           c = - c1;
1387         }
1388       else if (UTF_8_1_OCTET_P(c1))
1389         {
1390           if (eol_crlf && c1 == '\r')
1391             ONE_MORE_BYTE (byte_after_cr);
1392           c = c1;
1393         }
1394       else
1395         {
1396           ONE_MORE_BYTE (c2);
1397           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1398             goto invalid_code;
1399           if (UTF_8_2_OCTET_LEADING_P (c1))
1400             {
1401               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1402               /* Reject overlong sequences here and below.  Encoders
1403                  producing them are incorrect, they can be misleading,
1404                  and they mess up read/write invariance.  */
1405               if (c < 128)
1406                 goto invalid_code;
1407             }
1408           else
1409             {
1410               ONE_MORE_BYTE (c3);
1411               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1412                 goto invalid_code;
1413               if (UTF_8_3_OCTET_LEADING_P (c1))
1414                 {
1415                   c = (((c1 & 0xF) << 12)
1416                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1417                   if (c < 0x800
1418                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1419                     goto invalid_code;
1420                 }
1421               else
1422                 {
1423                   ONE_MORE_BYTE (c4);
1424                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1425                     goto invalid_code;
1426                   if (UTF_8_4_OCTET_LEADING_P (c1))
1427                     {
1428                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1429                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1430                     if (c < 0x10000)
1431                       goto invalid_code;
1432                     }
1433                   else
1434                     {
1435                       ONE_MORE_BYTE (c5);
1436                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1437                         goto invalid_code;
1438                       if (UTF_8_5_OCTET_LEADING_P (c1))
1439                         {
1440                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1441                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1442                                | (c5 & 0x3F));
1443                           if ((c > MAX_CHAR) || (c < 0x200000))
1444                             goto invalid_code;
1445                         }
1446                       else
1447                         goto invalid_code;
1448                     }
1449                 }
1450             }
1451         }
1452
1453       *charbuf++ = c;
1454       continue;
1455
1456     invalid_code:
1457       src = src_base;
1458       consumed_chars = consumed_chars_base;
1459       ONE_MORE_BYTE (c);
1460       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1461       coding->errors++;
1462     }
1463
1464  no_more_source:
1465   coding->consumed_char += consumed_chars_base;
1466   coding->consumed = src_base - coding->source;
1467   coding->charbuf_used = charbuf - coding->charbuf;
1468 }
1469
1470
1471 static int
1472 encode_coding_utf_8 (coding)
1473      struct coding_system *coding;
1474 {
1475   int multibytep = coding->dst_multibyte;
1476   int *charbuf = coding->charbuf;
1477   int *charbuf_end = charbuf + coding->charbuf_used;
1478   unsigned char *dst = coding->destination + coding->produced;
1479   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1480   int produced_chars = 0;
1481   int c;
1482
1483   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1484     {
1485       ASSURE_DESTINATION (3);
1486       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1487       CODING_UTF_8_BOM (coding) = utf_without_bom;
1488     }
1489
1490   if (multibytep)
1491     {
1492       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1493
1494       while (charbuf < charbuf_end)
1495         {
1496           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1497
1498           ASSURE_DESTINATION (safe_room);
1499           c = *charbuf++;
1500           if (CHAR_BYTE8_P (c))
1501             {
1502               c = CHAR_TO_BYTE8 (c);
1503               EMIT_ONE_BYTE (c);
1504             }
1505           else
1506             {
1507               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1508               for (p = str; p < pend; p++)
1509                 EMIT_ONE_BYTE (*p);
1510             }
1511         }
1512     }
1513   else
1514     {
1515       int safe_room = MAX_MULTIBYTE_LENGTH;
1516
1517       while (charbuf < charbuf_end)
1518         {
1519           ASSURE_DESTINATION (safe_room);
1520           c = *charbuf++;
1521           if (CHAR_BYTE8_P (c))
1522             *dst++ = CHAR_TO_BYTE8 (c);
1523           else
1524             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1525           produced_chars++;
1526         }
1527     }
1528   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1529   coding->produced_char += produced_chars;
1530   coding->produced = dst - coding->destination;
1531   return 0;
1532 }
1533
1534
1535 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1536    Check if a text is encoded in one of UTF-16 based coding systems.
1537    If it is, return 1, else return 0.  */
1538
1539 #define UTF_16_HIGH_SURROGATE_P(val) \
1540   (((val) & 0xFC00) == 0xD800)
1541
1542 #define UTF_16_LOW_SURROGATE_P(val) \
1543   (((val) & 0xFC00) == 0xDC00)
1544
1545 #define UTF_16_INVALID_P(val)   \
1546   (((val) == 0xFFFE)            \
1547    || ((val) == 0xFFFF)         \
1548    || UTF_16_LOW_SURROGATE_P (val))
1549
1550
1551 static int
1552 detect_coding_utf_16 (coding, detect_info)
1553      struct coding_system *coding;
1554      struct coding_detection_info *detect_info;
1555 {
1556   const unsigned char *src = coding->source, *src_base = src;
1557   const unsigned char *src_end = coding->source + coding->src_bytes;
1558   int multibytep = coding->src_multibyte;
1559   int consumed_chars = 0;
1560   int c1, c2;
1561
1562   detect_info->checked |= CATEGORY_MASK_UTF_16;
1563   if (coding->mode & CODING_MODE_LAST_BLOCK
1564       && (coding->src_chars & 1))
1565     {
1566       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1567       return 0;
1568     }
1569
1570   ONE_MORE_BYTE (c1);
1571   ONE_MORE_BYTE (c2);
1572   if ((c1 == 0xFF) && (c2 == 0xFE))
1573     {
1574       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1575                              | CATEGORY_MASK_UTF_16_AUTO);
1576       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1577                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1578                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1579     }
1580   else if ((c1 == 0xFE) && (c2 == 0xFF))
1581     {
1582       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1583                              | CATEGORY_MASK_UTF_16_AUTO);
1584       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1585                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1586                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1587     }
1588   else
1589     {
1590       /* We check the dispersion of Eth and Oth bytes where E is even and
1591          O is odd.  If both are high, we assume binary data.*/
1592       unsigned char e[256], o[256];
1593       unsigned e_num = 1, o_num = 1;
1594
1595       memset (e, 0, 256);
1596       memset (o, 0, 256);
1597       e[c1] = 1;
1598       o[c2] = 1;
1599
1600       detect_info->rejected
1601         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1602
1603       while (1)
1604         {
1605           ONE_MORE_BYTE (c1);
1606           ONE_MORE_BYTE (c2);
1607           if (! e[c1])
1608             {
1609               e[c1] = 1;
1610               e_num++;
1611               if (e_num >= 128)
1612                 break;
1613             }
1614           if (! o[c2])
1615             {
1616               o[c1] = 1;
1617               o_num++;
1618               if (o_num >= 128)
1619                 break;
1620             }
1621         }
1622       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1623       return 0;
1624     }
1625
1626  no_more_source:
1627   return 1;
1628 }
1629
1630 static void
1631 decode_coding_utf_16 (coding)
1632      struct coding_system *coding;
1633 {
1634   const unsigned char *src = coding->source + coding->consumed;
1635   const unsigned char *src_end = coding->source + coding->src_bytes;
1636   const unsigned char *src_base;
1637   int *charbuf = coding->charbuf + coding->charbuf_used;
1638   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1639   int consumed_chars = 0, consumed_chars_base;
1640   int multibytep = coding->src_multibyte;
1641   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1642   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1643   int surrogate = CODING_UTF_16_SURROGATE (coding);
1644   Lisp_Object attr, charset_list;
1645   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1646   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1647
1648   CODING_GET_INFO (coding, attr, charset_list);
1649
1650   if (bom == utf_with_bom)
1651     {
1652       int c, c1, c2;
1653
1654       src_base = src;
1655       ONE_MORE_BYTE (c1);
1656       ONE_MORE_BYTE (c2);
1657       c = (c1 << 8) | c2;
1658
1659       if (endian == utf_16_big_endian
1660           ? c != 0xFEFF : c != 0xFFFE)
1661         {
1662           /* The first two bytes are not BOM.  Treat them as bytes
1663              for a normal character.  */
1664           src = src_base;
1665           coding->errors++;
1666         }
1667       CODING_UTF_16_BOM (coding) = utf_without_bom;
1668     }
1669   else if (bom == utf_detect_bom)
1670     {
1671       /* We have already tried to detect BOM and failed in
1672          detect_coding.  */
1673       CODING_UTF_16_BOM (coding) = utf_without_bom;
1674     }
1675
1676   while (1)
1677     {
1678       int c, c1, c2;
1679
1680       src_base = src;
1681       consumed_chars_base = consumed_chars;
1682
1683       if (charbuf + 2 >= charbuf_end)
1684         break;
1685
1686       if (byte_after_cr1 >= 0)
1687         c1 = byte_after_cr1, byte_after_cr1 = -1;
1688       else
1689         ONE_MORE_BYTE (c1);
1690       if (c1 < 0)
1691         {
1692           *charbuf++ = -c1;
1693           continue;
1694         }
1695       if (byte_after_cr2 >= 0)
1696         c2 = byte_after_cr2, byte_after_cr2 = -1;
1697       else
1698         ONE_MORE_BYTE (c2);
1699       if (c2 < 0)
1700         {
1701           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1702           *charbuf++ = -c2;
1703           continue;
1704         }
1705       c = (endian == utf_16_big_endian
1706            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1707
1708       if (surrogate)
1709         {
1710           if (! UTF_16_LOW_SURROGATE_P (c))
1711             {
1712               if (endian == utf_16_big_endian)
1713                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1714               else
1715                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1716               *charbuf++ = c1;
1717               *charbuf++ = c2;
1718               coding->errors++;
1719               if (UTF_16_HIGH_SURROGATE_P (c))
1720                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1721               else
1722                 *charbuf++ = c;
1723             }
1724           else
1725             {
1726               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1727               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1728               *charbuf++ = 0x10000 + c;
1729             }
1730         }
1731       else
1732         {
1733           if (UTF_16_HIGH_SURROGATE_P (c))
1734             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1735           else
1736             {
1737               if (eol_crlf && c == '\r')
1738                 {
1739                   ONE_MORE_BYTE (byte_after_cr1);
1740                   ONE_MORE_BYTE (byte_after_cr2);
1741                 }
1742               *charbuf++ = c;
1743             }
1744         }
1745     }
1746
1747  no_more_source:
1748   coding->consumed_char += consumed_chars_base;
1749   coding->consumed = src_base - coding->source;
1750   coding->charbuf_used = charbuf - coding->charbuf;
1751 }
1752
1753 static int
1754 encode_coding_utf_16 (coding)
1755      struct coding_system *coding;
1756 {
1757   int multibytep = coding->dst_multibyte;
1758   int *charbuf = coding->charbuf;
1759   int *charbuf_end = charbuf + coding->charbuf_used;
1760   unsigned char *dst = coding->destination + coding->produced;
1761   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1762   int safe_room = 8;
1763   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1764   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1765   int produced_chars = 0;
1766   Lisp_Object attrs, charset_list;
1767   int c;
1768
1769   CODING_GET_INFO (coding, attrs, charset_list);
1770
1771   if (bom != utf_without_bom)
1772     {
1773       ASSURE_DESTINATION (safe_room);
1774       if (big_endian)
1775         EMIT_TWO_BYTES (0xFE, 0xFF);
1776       else
1777         EMIT_TWO_BYTES (0xFF, 0xFE);
1778       CODING_UTF_16_BOM (coding) = utf_without_bom;
1779     }
1780
1781   while (charbuf < charbuf_end)
1782     {
1783       ASSURE_DESTINATION (safe_room);
1784       c = *charbuf++;
1785       if (c >= MAX_UNICODE_CHAR)
1786         c = coding->default_char;
1787
1788       if (c < 0x10000)
1789         {
1790           if (big_endian)
1791             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1792           else
1793             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1794         }
1795       else
1796         {
1797           int c1, c2;
1798
1799           c -= 0x10000;
1800           c1 = (c >> 10) + 0xD800;
1801           c2 = (c & 0x3FF) + 0xDC00;
1802           if (big_endian)
1803             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1804           else
1805             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1806         }
1807     }
1808   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1809   coding->produced = dst - coding->destination;
1810   coding->produced_char += produced_chars;
1811   return 0;
1812 }
1813
1814 \f
1815 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1816
1817 /* Emacs' internal format for representation of multiple character
1818    sets is a kind of multi-byte encoding, i.e. characters are
1819    represented by variable-length sequences of one-byte codes.
1820
1821    ASCII characters and control characters (e.g. `tab', `newline') are
1822    represented by one-byte sequences which are their ASCII codes, in
1823    the range 0x00 through 0x7F.
1824
1825    8-bit characters of the range 0x80..0x9F are represented by
1826    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1827    code + 0x20).
1828
1829    8-bit characters of the range 0xA0..0xFF are represented by
1830    one-byte sequences which are their 8-bit code.
1831
1832    The other characters are represented by a sequence of `base
1833    leading-code', optional `extended leading-code', and one or two
1834    `position-code's.  The length of the sequence is determined by the
1835    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1836    whereas extended leading-code and position-code take the range 0xA0
1837    through 0xFF.  See `charset.h' for more details about leading-code
1838    and position-code.
1839
1840    --- CODE RANGE of Emacs' internal format ---
1841    character set        range
1842    -------------        -----
1843    ascii                0x00..0x7F
1844    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1845    eight-bit-graphic    0xA0..0xBF
1846    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1847    ---------------------------------------------
1848
1849    As this is the internal character representation, the format is
1850    usually not used externally (i.e. in a file or in a data sent to a
1851    process).  But, it is possible to have a text externally in this
1852    format (i.e. by encoding by the coding system `emacs-mule').
1853
1854    In that case, a sequence of one-byte codes has a slightly different
1855    form.
1856
1857    At first, all characters in eight-bit-control are represented by
1858    one-byte sequences which are their 8-bit code.
1859
1860    Next, character composition data are represented by the byte
1861    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1862    where,
1863         METHOD is 0xF0 plus one of composition method (enum
1864         composition_method),
1865
1866         BYTES is 0xA0 plus a byte length of this composition data,
1867
1868         CHARS is 0x20 plus a number of characters composed by this
1869         data,
1870
1871         COMPONENTs are characters of multibye form or composition
1872         rules encoded by two-byte of ASCII codes.
1873
1874    In addition, for backward compatibility, the following formats are
1875    also recognized as composition data on decoding.
1876
1877    0x80 MSEQ ...
1878    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1879
1880    Here,
1881         MSEQ is a multibyte form but in these special format:
1882           ASCII: 0xA0 ASCII_CODE+0x80,
1883           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1884         RULE is a one byte code of the range 0xA0..0xF0 that
1885         represents a composition rule.
1886   */
1887
1888 char emacs_mule_bytes[256];
1889
1890 int
1891 emacs_mule_char (coding, src, nbytes, nchars, id)
1892      struct coding_system *coding;
1893      const unsigned char *src;
1894      int *nbytes, *nchars, *id;
1895 {
1896   const unsigned char *src_end = coding->source + coding->src_bytes;
1897   const unsigned char *src_base = src;
1898   int multibytep = coding->src_multibyte;
1899   struct charset *charset;
1900   unsigned code;
1901   int c;
1902   int consumed_chars = 0;
1903
1904   ONE_MORE_BYTE (c);
1905   if (c < 0)
1906     {
1907       c = -c;
1908       charset = emacs_mule_charset[0];
1909     }
1910   else
1911     {
1912       if (c >= 0xA0)
1913         {
1914           /* Old style component character of a composition.  */
1915           if (c == 0xA0)
1916             {
1917               ONE_MORE_BYTE (c);
1918               c -= 0x80;
1919             }
1920           else
1921             c -= 0x20;
1922         }
1923
1924       switch (emacs_mule_bytes[c])
1925         {
1926         case 2:
1927           if (! (charset = emacs_mule_charset[c]))
1928             goto invalid_code;
1929           ONE_MORE_BYTE (c);
1930           if (c < 0xA0)
1931             goto invalid_code;
1932           code = c & 0x7F;
1933           break;
1934
1935         case 3:
1936           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1937               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1938             {
1939               ONE_MORE_BYTE (c);
1940               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1941                 goto invalid_code;
1942               ONE_MORE_BYTE (c);
1943               if (c < 0xA0)
1944                 goto invalid_code;
1945               code = c & 0x7F;
1946             }
1947           else
1948             {
1949               if (! (charset = emacs_mule_charset[c]))
1950                 goto invalid_code;
1951               ONE_MORE_BYTE (c);
1952               if (c < 0xA0)
1953                 goto invalid_code;
1954               code = (c & 0x7F) << 8;
1955               ONE_MORE_BYTE (c);
1956               if (c < 0xA0)
1957                 goto invalid_code;
1958               code |= c & 0x7F;
1959             }
1960           break;
1961
1962         case 4:
1963           ONE_MORE_BYTE (c);
1964           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1965             goto invalid_code;
1966           ONE_MORE_BYTE (c);
1967           if (c < 0xA0)
1968             goto invalid_code;
1969           code = (c & 0x7F) << 8;
1970           ONE_MORE_BYTE (c);
1971           if (c < 0xA0)
1972             goto invalid_code;
1973           code |= c & 0x7F;
1974           break;
1975
1976         case 1:
1977           code = c;
1978           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1979                                      ? charset_ascii : charset_eight_bit);
1980           break;
1981
1982         default:
1983           abort ();
1984         }
1985       c = DECODE_CHAR (charset, code);
1986       if (c < 0)
1987         goto invalid_code;
1988     }
1989   *nbytes = src - src_base;
1990   *nchars = consumed_chars;
1991   if (id)
1992     *id = charset->id;
1993   return c;
1994
1995  no_more_source:
1996   return -2;
1997
1998  invalid_code:
1999   return -1;
2000 }
2001
2002
2003 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2004    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
2005    else return 0.  */
2006
2007 static int
2008 detect_coding_emacs_mule (coding, detect_info)
2009      struct coding_system *coding;
2010      struct coding_detection_info *detect_info;
2011 {
2012   const unsigned char *src = coding->source, *src_base;
2013   const unsigned char *src_end = coding->source + coding->src_bytes;
2014   int multibytep = coding->src_multibyte;
2015   int consumed_chars = 0;
2016   int c;
2017   int found = 0;
2018
2019   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
2020   /* A coding system of this category is always ASCII compatible.  */
2021   src += coding->head_ascii;
2022
2023   while (1)
2024     {
2025       src_base = src;
2026       ONE_MORE_BYTE (c);
2027       if (c < 0)
2028         continue;
2029       if (c == 0x80)
2030         {
2031           /* Perhaps the start of composite character.  We simple skip
2032              it because analyzing it is too heavy for detecting.  But,
2033              at least, we check that the composite character
2034              constitutes of more than 4 bytes.  */
2035           const unsigned char *src_base;
2036
2037         repeat:
2038           src_base = src;
2039           do
2040             {
2041               ONE_MORE_BYTE (c);
2042             }
2043           while (c >= 0xA0);
2044
2045           if (src - src_base <= 4)
2046             break;
2047           found = CATEGORY_MASK_EMACS_MULE;
2048           if (c == 0x80)
2049             goto repeat;
2050         }
2051
2052       if (c < 0x80)
2053         {
2054           if (c < 0x20
2055               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2056             break;
2057         }
2058       else
2059         {
2060           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2061
2062           while (more_bytes > 0)
2063             {
2064               ONE_MORE_BYTE (c);
2065               if (c < 0xA0)
2066                 {
2067                   src--;        /* Unread the last byte.  */
2068                   break;
2069                 }
2070               more_bytes--;
2071             }
2072           if (more_bytes != 0)
2073             break;
2074           found = CATEGORY_MASK_EMACS_MULE;
2075         }
2076     }
2077   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2078   return 0;
2079
2080  no_more_source:
2081   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2082     {
2083       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2084       return 0;
2085     }
2086   detect_info->found |= found;
2087   return 1;
2088 }
2089
2090
2091 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2092
2093 /* Decode a character represented as a component of composition
2094    sequence of Emacs 20/21 style at SRC.  Set C to that character and
2095    update SRC to the head of next character (or an encoded composition
2096    rule).  If SRC doesn't points a composition component, set C to -1.
2097    If SRC points an invalid byte sequence, global exit by a return
2098    value 0.  */
2099
2100 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
2101   do                                                            \
2102     {                                                           \
2103       int c;                                                    \
2104       int nbytes, nchars;                                       \
2105                                                                 \
2106       if (src == src_end)                                       \
2107         break;                                                  \
2108       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
2109       if (c < 0)                                                \
2110         {                                                       \
2111           if (c == -2)                                          \
2112             break;                                              \
2113           goto invalid_code;                                    \
2114         }                                                       \
2115       *buf++ = c;                                               \
2116       src += nbytes;                                            \
2117       consumed_chars += nchars;                                 \
2118     }                                                           \
2119   while (0)
2120
2121
2122 /* Decode a composition rule represented as a component of composition
2123    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
2124    and increment BUF.  If SRC points an invalid byte sequence, set C
2125    to -1.  */
2126
2127 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
2128   do {                                                  \
2129     int c, gref, nref;                                  \
2130                                                         \
2131     if (src >= src_end)                                 \
2132       goto invalid_code;                                \
2133     ONE_MORE_BYTE_NO_CHECK (c);                         \
2134     c -= 0xA0;                                          \
2135     if (c < 0 || c >= 81)                               \
2136       goto invalid_code;                                \
2137                                                         \
2138     gref = c / 9, nref = c % 9;                         \
2139     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2140   } while (0)
2141
2142
2143 /* Decode a composition rule represented as a component of composition
2144    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
2145    and increment BUF.  If SRC points an invalid byte sequence, set C
2146    to -1.  */
2147
2148 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
2149   do {                                                  \
2150     int gref, nref;                                     \
2151                                                         \
2152     if (src + 1>= src_end)                              \
2153       goto invalid_code;                                \
2154     ONE_MORE_BYTE_NO_CHECK (gref);                      \
2155     gref -= 0x20;                                       \
2156     ONE_MORE_BYTE_NO_CHECK (nref);                      \
2157     nref -= 0x20;                                       \
2158     if (gref < 0 || gref >= 81                          \
2159         || nref < 0 || nref >= 81)                      \
2160       goto invalid_code;                                \
2161     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2162   } while (0)
2163
2164
2165 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
2166   do {                                                                  \
2167     /* Emacs 21 style format.  The first three bytes at SRC are         \
2168        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
2169        the byte length of this composition information, CHARS is the    \
2170        number of characters composed by this composition.  */           \
2171     enum composition_method method = c - 0xF2;                          \
2172     int *charbuf_base = charbuf;                                        \
2173     int consumed_chars_limit;                                           \
2174     int nbytes, nchars;                                                 \
2175                                                                         \
2176     ONE_MORE_BYTE (c);                                                  \
2177     if (c < 0)                                                          \
2178       goto invalid_code;                                                \
2179     nbytes = c - 0xA0;                                                  \
2180     if (nbytes < 3)                                                     \
2181       goto invalid_code;                                                \
2182     ONE_MORE_BYTE (c);                                                  \
2183     if (c < 0)                                                          \
2184       goto invalid_code;                                                \
2185     nchars = c - 0xA0;                                                  \
2186     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2187     consumed_chars_limit = consumed_chars_base + nbytes;                \
2188     if (method != COMPOSITION_RELATIVE)                                 \
2189       {                                                                 \
2190         int i = 0;                                                      \
2191         while (consumed_chars < consumed_chars_limit)                   \
2192           {                                                             \
2193             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
2194               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
2195             else                                                        \
2196               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
2197             i++;                                                        \
2198           }                                                             \
2199         if (consumed_chars < consumed_chars_limit)                      \
2200           goto invalid_code;                                            \
2201         charbuf_base[0] -= i;                                           \
2202       }                                                                 \
2203   } while (0)
2204
2205
2206 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2207   do {                                                                  \
2208     /* Emacs 20 style format for relative composition.  */              \
2209     /* Store multibyte form of characters to be composed.  */           \
2210     enum composition_method method = COMPOSITION_RELATIVE;              \
2211     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2212     int *buf = components;                                              \
2213     int i, j;                                                           \
2214                                                                         \
2215     src = src_base;                                                     \
2216     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2217     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2218       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2219     if (i < 2)                                                          \
2220       goto invalid_code;                                                \
2221     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2222     for (j = 0; j < i; j++)                                             \
2223       *charbuf++ = components[j];                                       \
2224   } while (0)
2225
2226
2227 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2228   do {                                                          \
2229     /* Emacs 20 style format for rule-base composition.  */     \
2230     /* Store multibyte form of characters to be composed.  */   \
2231     enum composition_method method = COMPOSITION_WITH_RULE;     \
2232     int *charbuf_base = charbuf;                                \
2233     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2234     int *buf = components;                                      \
2235     int i, j;                                                   \
2236                                                                 \
2237     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2238     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2239       {                                                         \
2240         if (*src < 0xA0)                                        \
2241           break;                                                \
2242         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2243         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2244       }                                                         \
2245     if (i <= 1 || (buf - components) % 2 == 0)                  \
2246       goto invalid_code;                                        \
2247     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2248       goto no_more_source;                                      \
2249     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2250     i = i * 2 - 1;                                              \
2251     for (j = 0; j < i; j++)                                     \
2252       *charbuf++ = components[j];                               \
2253     charbuf_base[0] -= i;                                       \
2254     for (j = 0; j < i; j += 2)                                  \
2255       *charbuf++ = components[j];                               \
2256   } while (0)
2257
2258
2259 static void
2260 decode_coding_emacs_mule (coding)
2261      struct coding_system *coding;
2262 {
2263   const unsigned char *src = coding->source + coding->consumed;
2264   const unsigned char *src_end = coding->source + coding->src_bytes;
2265   const unsigned char *src_base;
2266   int *charbuf = coding->charbuf + coding->charbuf_used;
2267   int *charbuf_end
2268     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2269   int consumed_chars = 0, consumed_chars_base;
2270   int multibytep = coding->src_multibyte;
2271   Lisp_Object attrs, charset_list;
2272   int char_offset = coding->produced_char;
2273   int last_offset = char_offset;
2274   int last_id = charset_ascii;
2275   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2276   int byte_after_cr = -1;
2277
2278   CODING_GET_INFO (coding, attrs, charset_list);
2279
2280   while (1)
2281     {
2282       int c;
2283
2284       src_base = src;
2285       consumed_chars_base = consumed_chars;
2286
2287       if (charbuf >= charbuf_end)
2288         break;
2289
2290       if (byte_after_cr >= 0)
2291         c = byte_after_cr, byte_after_cr = -1;
2292       else
2293         ONE_MORE_BYTE (c);
2294       if (c < 0)
2295         {
2296           *charbuf++ = -c;
2297           char_offset++;
2298         }
2299       else if (c < 0x80)
2300         {
2301           if (eol_crlf && c == '\r')
2302             ONE_MORE_BYTE (byte_after_cr);
2303           *charbuf++ = c;
2304           char_offset++;
2305         }
2306       else if (c == 0x80)
2307         {
2308           ONE_MORE_BYTE (c);
2309           if (c < 0)
2310             goto invalid_code;
2311           if (c - 0xF2 >= COMPOSITION_RELATIVE
2312               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2313             DECODE_EMACS_MULE_21_COMPOSITION (c);
2314           else if (c < 0xC0)
2315             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2316           else if (c == 0xFF)
2317             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2318           else
2319             goto invalid_code;
2320         }
2321       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2322         {
2323           int nbytes, nchars;
2324           int id;
2325
2326           src = src_base;
2327           consumed_chars = consumed_chars_base;
2328           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2329           if (c < 0)
2330             {
2331               if (c == -2)
2332                 break;
2333               goto invalid_code;
2334             }
2335           if (last_id != id)
2336             {
2337               if (last_id != charset_ascii)
2338                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2339               last_id = id;
2340               last_offset = char_offset;
2341             }
2342           *charbuf++ = c;
2343           src += nbytes;
2344           consumed_chars += nchars;
2345           char_offset++;
2346         }
2347       else
2348         goto invalid_code;
2349       continue;
2350
2351     invalid_code:
2352       src = src_base;
2353       consumed_chars = consumed_chars_base;
2354       ONE_MORE_BYTE (c);
2355       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2356       char_offset++;
2357       coding->errors++;
2358     }
2359
2360  no_more_source:
2361   if (last_id != charset_ascii)
2362     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2363   coding->consumed_char += consumed_chars_base;
2364   coding->consumed = src_base - coding->source;
2365   coding->charbuf_used = charbuf - coding->charbuf;
2366 }
2367
2368
2369 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2370   do {                                          \
2371     if (id < 0xA0)                              \
2372       codes[0] = id, codes[1] = 0;              \
2373     else if (id < 0xE0)                         \
2374       codes[0] = 0x9A, codes[1] = id;           \
2375     else if (id < 0xF0)                         \
2376       codes[0] = 0x9B, codes[1] = id;           \
2377     else if (id < 0xF5)                         \
2378       codes[0] = 0x9C, codes[1] = id;           \
2379     else                                        \
2380       codes[0] = 0x9D, codes[1] = id;           \
2381   } while (0);
2382
2383
2384 static int
2385 encode_coding_emacs_mule (coding)
2386      struct coding_system *coding;
2387 {
2388   int multibytep = coding->dst_multibyte;
2389   int *charbuf = coding->charbuf;
2390   int *charbuf_end = charbuf + coding->charbuf_used;
2391   unsigned char *dst = coding->destination + coding->produced;
2392   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2393   int safe_room = 8;
2394   int produced_chars = 0;
2395   Lisp_Object attrs, charset_list;
2396   int c;
2397   int preferred_charset_id = -1;
2398
2399   CODING_GET_INFO (coding, attrs, charset_list);
2400   if (! EQ (charset_list, Vemacs_mule_charset_list))
2401     {
2402       CODING_ATTR_CHARSET_LIST (attrs)
2403         = charset_list = Vemacs_mule_charset_list;
2404     }
2405
2406   while (charbuf < charbuf_end)
2407     {
2408       ASSURE_DESTINATION (safe_room);
2409       c = *charbuf++;
2410
2411       if (c < 0)
2412         {
2413           /* Handle an annotation.  */
2414           switch (*charbuf)
2415             {
2416             case CODING_ANNOTATE_COMPOSITION_MASK:
2417               /* Not yet implemented.  */
2418               break;
2419             case CODING_ANNOTATE_CHARSET_MASK:
2420               preferred_charset_id = charbuf[3];
2421               if (preferred_charset_id >= 0
2422                   && NILP (Fmemq (make_number (preferred_charset_id),
2423                                   charset_list)))
2424                 preferred_charset_id = -1;
2425               break;
2426             default:
2427               abort ();
2428             }
2429           charbuf += -c - 1;
2430           continue;
2431         }
2432
2433       if (ASCII_CHAR_P (c))
2434         EMIT_ONE_ASCII_BYTE (c);
2435       else if (CHAR_BYTE8_P (c))
2436         {
2437           c = CHAR_TO_BYTE8 (c);
2438           EMIT_ONE_BYTE (c);
2439         }
2440       else
2441         {
2442           struct charset *charset;
2443           unsigned code;
2444           int dimension;
2445           int emacs_mule_id;
2446           unsigned char leading_codes[2];
2447
2448           if (preferred_charset_id >= 0)
2449             {
2450               charset = CHARSET_FROM_ID (preferred_charset_id);
2451               if (! CHAR_CHARSET_P (c, charset))
2452                 charset = char_charset (c, charset_list, NULL);
2453             }
2454           else
2455             charset = char_charset (c, charset_list, &code);
2456           if (! charset)
2457             {
2458               c = coding->default_char;
2459               if (ASCII_CHAR_P (c))
2460                 {
2461                   EMIT_ONE_ASCII_BYTE (c);
2462                   continue;
2463                 }
2464               charset = char_charset (c, charset_list, &code);
2465             }
2466           dimension = CHARSET_DIMENSION (charset);
2467           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2468           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2469           EMIT_ONE_BYTE (leading_codes[0]);
2470           if (leading_codes[1])
2471             EMIT_ONE_BYTE (leading_codes[1]);
2472           if (dimension == 1)
2473             EMIT_ONE_BYTE (code | 0x80);
2474           else
2475             {
2476               code |= 0x8080;
2477               EMIT_ONE_BYTE (code >> 8);
2478               EMIT_ONE_BYTE (code & 0xFF);
2479             }
2480         }
2481     }
2482   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2483   coding->produced_char += produced_chars;
2484   coding->produced = dst - coding->destination;
2485   return 0;
2486 }
2487
2488 \f
2489 /*** 7. ISO2022 handlers ***/
2490
2491 /* The following note describes the coding system ISO2022 briefly.
2492    Since the intention of this note is to help understand the
2493    functions in this file, some parts are NOT ACCURATE or are OVERLY
2494    SIMPLIFIED.  For thorough understanding, please refer to the
2495    original document of ISO2022.  This is equivalent to the standard
2496    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2497
2498    ISO2022 provides many mechanisms to encode several character sets
2499    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2500    is encoded using bytes less than 128.  This may make the encoded
2501    text a little bit longer, but the text passes more easily through
2502    several types of gateway, some of which strip off the MSB (Most
2503    Significant Bit).
2504
2505    There are two kinds of character sets: control character sets and
2506    graphic character sets.  The former contain control characters such
2507    as `newline' and `escape' to provide control functions (control
2508    functions are also provided by escape sequences).  The latter
2509    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2510    two control character sets and many graphic character sets.
2511
2512    Graphic character sets are classified into one of the following
2513    four classes, according to the number of bytes (DIMENSION) and
2514    number of characters in one dimension (CHARS) of the set:
2515    - DIMENSION1_CHARS94
2516    - DIMENSION1_CHARS96
2517    - DIMENSION2_CHARS94
2518    - DIMENSION2_CHARS96
2519
2520    In addition, each character set is assigned an identification tag,
2521    unique for each set, called the "final character" (denoted as <F>
2522    hereafter).  The <F> of each character set is decided by ECMA(*)
2523    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2524    (0x30..0x3F are for private use only).
2525
2526    Note (*): ECMA = European Computer Manufacturers Association
2527
2528    Here are examples of graphic character sets [NAME(<F>)]:
2529         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2530         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2531         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2532         o DIMENSION2_CHARS96 -- none for the moment
2533
2534    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2535         C0 [0x00..0x1F] -- control character plane 0
2536         GL [0x20..0x7F] -- graphic character plane 0
2537         C1 [0x80..0x9F] -- control character plane 1
2538         GR [0xA0..0xFF] -- graphic character plane 1
2539
2540    A control character set is directly designated and invoked to C0 or
2541    C1 by an escape sequence.  The most common case is that:
2542    - ISO646's  control character set is designated/invoked to C0, and
2543    - ISO6429's control character set is designated/invoked to C1,
2544    and usually these designations/invocations are omitted in encoded
2545    text.  In a 7-bit environment, only C0 can be used, and a control
2546    character for C1 is encoded by an appropriate escape sequence to
2547    fit into the environment.  All control characters for C1 are
2548    defined to have corresponding escape sequences.
2549
2550    A graphic character set is at first designated to one of four
2551    graphic registers (G0 through G3), then these graphic registers are
2552    invoked to GL or GR.  These designations and invocations can be
2553    done independently.  The most common case is that G0 is invoked to
2554    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2555    these invocations and designations are omitted in encoded text.
2556    In a 7-bit environment, only GL can be used.
2557
2558    When a graphic character set of CHARS94 is invoked to GL, codes
2559    0x20 and 0x7F of the GL area work as control characters SPACE and
2560    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2561    be used.
2562
2563    There are two ways of invocation: locking-shift and single-shift.
2564    With locking-shift, the invocation lasts until the next different
2565    invocation, whereas with single-shift, the invocation affects the
2566    following character only and doesn't affect the locking-shift
2567    state.  Invocations are done by the following control characters or
2568    escape sequences:
2569
2570    ----------------------------------------------------------------------
2571    abbrev  function                  cntrl escape seq   description
2572    ----------------------------------------------------------------------
2573    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2574    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2575    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2576    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2577    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2578    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2579    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2580    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2581    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2582    ----------------------------------------------------------------------
2583    (*) These are not used by any known coding system.
2584
2585    Control characters for these functions are defined by macros
2586    ISO_CODE_XXX in `coding.h'.
2587
2588    Designations are done by the following escape sequences:
2589    ----------------------------------------------------------------------
2590    escape sequence      description
2591    ----------------------------------------------------------------------
2592    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2593    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2594    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2595    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2596    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2597    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2598    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2599    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2600    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2601    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2602    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2603    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2604    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2605    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2606    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2607    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2608    ----------------------------------------------------------------------
2609
2610    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2611    of dimension 1, chars 94, and final character <F>, etc...
2612
2613    Note (*): Although these designations are not allowed in ISO2022,
2614    Emacs accepts them on decoding, and produces them on encoding
2615    CHARS96 character sets in a coding system which is characterized as
2616    7-bit environment, non-locking-shift, and non-single-shift.
2617
2618    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2619    '(' must be omitted.  We refer to this as "short-form" hereafter.
2620
2621    Now you may notice that there are a lot of ways of encoding the
2622    same multilingual text in ISO2022.  Actually, there exist many
2623    coding systems such as Compound Text (used in X11's inter client
2624    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2625    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2626    localized platforms), and all of these are variants of ISO2022.
2627
2628    In addition to the above, Emacs handles two more kinds of escape
2629    sequences: ISO6429's direction specification and Emacs' private
2630    sequence for specifying character composition.
2631
2632    ISO6429's direction specification takes the following form:
2633         o CSI ']'      -- end of the current direction
2634         o CSI '0' ']'  -- end of the current direction
2635         o CSI '1' ']'  -- start of left-to-right text
2636         o CSI '2' ']'  -- start of right-to-left text
2637    The control character CSI (0x9B: control sequence introducer) is
2638    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2639
2640    Character composition specification takes the following form:
2641         o ESC '0' -- start relative composition
2642         o ESC '1' -- end composition
2643         o ESC '2' -- start rule-base composition (*)
2644         o ESC '3' -- start relative composition with alternate chars  (**)
2645         o ESC '4' -- start rule-base composition with alternate chars  (**)
2646   Since these are not standard escape sequences of any ISO standard,
2647   the use of them with these meanings is restricted to Emacs only.
2648
2649   (*) This form is used only in Emacs 20.7 and older versions,
2650   but newer versions can safely decode it.
2651   (**) This form is used only in Emacs 21.1 and newer versions,
2652   and older versions can't decode it.
2653
2654   Here's a list of example usages of these composition escape
2655   sequences (categorized by `enum composition_method').
2656
2657   COMPOSITION_RELATIVE:
2658         ESC 0 CHAR [ CHAR ] ESC 1
2659   COMPOSITION_WITH_RULE:
2660         ESC 2 CHAR [ RULE CHAR ] ESC 1
2661   COMPOSITION_WITH_ALTCHARS:
2662         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2663   COMPOSITION_WITH_RULE_ALTCHARS:
2664         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2665
2666 enum iso_code_class_type iso_code_class[256];
2667
2668 #define SAFE_CHARSET_P(coding, id)      \
2669   ((id) <= (coding)->max_charset_id     \
2670    && (coding)->safe_charsets[id] >= 0)
2671
2672
2673 #define SHIFT_OUT_OK(category)  \
2674   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2675
2676 static void
2677 setup_iso_safe_charsets (attrs)
2678      Lisp_Object attrs;
2679 {
2680   Lisp_Object charset_list, safe_charsets;
2681   Lisp_Object request;
2682   Lisp_Object reg_usage;
2683   Lisp_Object tail;
2684   int reg94, reg96;
2685   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2686   int max_charset_id;
2687
2688   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2689   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2690       && ! EQ (charset_list, Viso_2022_charset_list))
2691     {
2692       CODING_ATTR_CHARSET_LIST (attrs)
2693         = charset_list = Viso_2022_charset_list;
2694       ASET (attrs, coding_attr_safe_charsets, Qnil);
2695     }
2696
2697   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2698     return;
2699
2700   max_charset_id = 0;
2701   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2702     {
2703       int id = XINT (XCAR (tail));
2704       if (max_charset_id < id)
2705         max_charset_id = id;
2706     }
2707
2708   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2709                                 make_number (255));
2710   request = AREF (attrs, coding_attr_iso_request);
2711   reg_usage = AREF (attrs, coding_attr_iso_usage);
2712   reg94 = XINT (XCAR (reg_usage));
2713   reg96 = XINT (XCDR (reg_usage));
2714
2715   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2716     {
2717       Lisp_Object id;
2718       Lisp_Object reg;
2719       struct charset *charset;
2720
2721       id = XCAR (tail);
2722       charset = CHARSET_FROM_ID (XINT (id));
2723       reg = Fcdr (Fassq (id, request));
2724       if (! NILP (reg))
2725         SSET (safe_charsets, XINT (id), XINT (reg));
2726       else if (charset->iso_chars_96)
2727         {
2728           if (reg96 < 4)
2729             SSET (safe_charsets, XINT (id), reg96);
2730         }
2731       else
2732         {
2733           if (reg94 < 4)
2734             SSET (safe_charsets, XINT (id), reg94);
2735         }
2736     }
2737   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2738 }
2739
2740
2741 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2742    Check if a text is encoded in one of ISO-2022 based codig systems.
2743    If it is, return 1, else return 0.  */
2744
2745 static int
2746 detect_coding_iso_2022 (coding, detect_info)
2747      struct coding_system *coding;
2748      struct coding_detection_info *detect_info;
2749 {
2750   const unsigned char *src = coding->source, *src_base = src;
2751   const unsigned char *src_end = coding->source + coding->src_bytes;
2752   int multibytep = coding->src_multibyte;
2753   int single_shifting = 0;
2754   int id;
2755   int c, c1;
2756   int consumed_chars = 0;
2757   int i;
2758   int rejected = 0;
2759   int found = 0;
2760
2761   detect_info->checked |= CATEGORY_MASK_ISO;
2762
2763   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2764     {
2765       struct coding_system *this = &(coding_categories[i]);
2766       Lisp_Object attrs, val;
2767
2768       if (this->id < 0)
2769         continue;
2770       attrs = CODING_ID_ATTRS (this->id);
2771       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2772           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2773         setup_iso_safe_charsets (attrs);
2774       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2775       this->max_charset_id = SCHARS (val) - 1;
2776       this->safe_charsets = (char *) SDATA (val);
2777     }
2778
2779   /* A coding system of this category is always ASCII compatible.  */
2780   src += coding->head_ascii;
2781
2782   while (rejected != CATEGORY_MASK_ISO)
2783     {
2784       src_base = src;
2785       ONE_MORE_BYTE (c);
2786       switch (c)
2787         {
2788         case ISO_CODE_ESC:
2789           if (inhibit_iso_escape_detection)
2790             break;
2791           single_shifting = 0;
2792           ONE_MORE_BYTE (c);
2793           if (c >= '(' && c <= '/')
2794             {
2795               /* Designation sequence for a charset of dimension 1.  */
2796               ONE_MORE_BYTE (c1);
2797               if (c1 < ' ' || c1 >= 0x80
2798                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2799                 /* Invalid designation sequence.  Just ignore.  */
2800                 break;
2801             }
2802           else if (c == '$')
2803             {
2804               /* Designation sequence for a charset of dimension 2.  */
2805               ONE_MORE_BYTE (c);
2806               if (c >= '@' && c <= 'B')
2807                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2808                 id = iso_charset_table[1][0][c];
2809               else if (c >= '(' && c <= '/')
2810                 {
2811                   ONE_MORE_BYTE (c1);
2812                   if (c1 < ' ' || c1 >= 0x80
2813                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2814                     /* Invalid designation sequence.  Just ignore.  */
2815                     break;
2816                 }
2817               else
2818                 /* Invalid designation sequence.  Just ignore it.  */
2819                 break;
2820             }
2821           else if (c == 'N' || c == 'O')
2822             {
2823               /* ESC <Fe> for SS2 or SS3.  */
2824               single_shifting = 1;
2825               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2826               break;
2827             }
2828           else if (c >= '0' && c <= '4')
2829             {
2830               /* ESC <Fp> for start/end composition.  */
2831               found |= CATEGORY_MASK_ISO;
2832               break;
2833             }
2834           else
2835             {
2836               /* Invalid escape sequence.  Just ignore it.  */
2837               break;
2838             }
2839
2840           /* We found a valid designation sequence for CHARSET.  */
2841           rejected |= CATEGORY_MASK_ISO_8BIT;
2842           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2843                               id))
2844             found |= CATEGORY_MASK_ISO_7;
2845           else
2846             rejected |= CATEGORY_MASK_ISO_7;
2847           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2848                               id))
2849             found |= CATEGORY_MASK_ISO_7_TIGHT;
2850           else
2851             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2852           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2853                               id))
2854             found |= CATEGORY_MASK_ISO_7_ELSE;
2855           else
2856             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2857           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2858                               id))
2859             found |= CATEGORY_MASK_ISO_8_ELSE;
2860           else
2861             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2862           break;
2863
2864         case ISO_CODE_SO:
2865         case ISO_CODE_SI:
2866           /* Locking shift out/in.  */
2867           if (inhibit_iso_escape_detection)
2868             break;
2869           single_shifting = 0;
2870           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2871           break;
2872
2873         case ISO_CODE_CSI:
2874           /* Control sequence introducer.  */
2875           single_shifting = 0;
2876           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2877           found |= CATEGORY_MASK_ISO_8_ELSE;
2878           goto check_extra_latin;
2879
2880         case ISO_CODE_SS2:
2881         case ISO_CODE_SS3:
2882           /* Single shift.   */
2883           if (inhibit_iso_escape_detection)
2884             break;
2885           single_shifting = 0;
2886           rejected |= CATEGORY_MASK_ISO_7BIT;
2887           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2888               & CODING_ISO_FLAG_SINGLE_SHIFT)
2889             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2890           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2891               & CODING_ISO_FLAG_SINGLE_SHIFT)
2892             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2893           if (single_shifting)
2894             break;
2895           goto check_extra_latin;
2896
2897         default:
2898           if (c < 0)
2899             continue;
2900           if (c < 0x80)
2901             {
2902               single_shifting = 0;
2903               break;
2904             }
2905           if (c >= 0xA0)
2906             {
2907               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2908               found |= CATEGORY_MASK_ISO_8_1;
2909               /* Check the length of succeeding codes of the range
2910                  0xA0..0FF.  If the byte length is even, we include
2911                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2912                  only when we are not single shifting.  */
2913               if (! single_shifting
2914                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2915                 {
2916                   int i = 1;
2917                   while (src < src_end)
2918                     {
2919                       ONE_MORE_BYTE (c);
2920                       if (c < 0xA0)
2921                         break;
2922                       i++;
2923                     }
2924
2925                   if (i & 1 && src < src_end)
2926                     rejected |= CATEGORY_MASK_ISO_8_2;
2927                   else
2928                     found |= CATEGORY_MASK_ISO_8_2;
2929                 }
2930               break;
2931             }
2932         check_extra_latin:
2933           single_shifting = 0;
2934           if (! VECTORP (Vlatin_extra_code_table)
2935               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2936             {
2937               rejected = CATEGORY_MASK_ISO;
2938               break;
2939             }
2940           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2941               & CODING_ISO_FLAG_LATIN_EXTRA)
2942             found |= CATEGORY_MASK_ISO_8_1;
2943           else
2944             rejected |= CATEGORY_MASK_ISO_8_1;
2945           rejected |= CATEGORY_MASK_ISO_8_2;
2946         }
2947     }
2948   detect_info->rejected |= CATEGORY_MASK_ISO;
2949   return 0;
2950
2951  no_more_source:
2952   detect_info->rejected |= rejected;
2953   detect_info->found |= (found & ~rejected);
2954   return 1;
2955 }
2956
2957
2958 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2959    escape sequence should be kept.  */
2960 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2961   do {                                                                  \
2962     int id, prev;                                                       \
2963                                                                         \
2964     if (final < '0' || final >= 128                                     \
2965         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2966         || !SAFE_CHARSET_P (coding, id))                                \
2967       {                                                                 \
2968         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2969         chars_96 = -1;                                                  \
2970         break;                                                          \
2971       }                                                                 \
2972     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2973     if (id == charset_jisx0201_roman)                                   \
2974       {                                                                 \
2975         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2976           id = charset_ascii;                                           \
2977       }                                                                 \
2978     else if (id == charset_jisx0208_1978)                               \
2979       {                                                                 \
2980         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2981           id = charset_jisx0208;                                        \
2982       }                                                                 \
2983     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2984     /* If there was an invalid designation to REG previously, and this  \
2985        designation is ASCII to REG, we should keep this designation     \
2986        sequence.  */                                                    \
2987     if (prev == -2 && id == charset_ascii)                              \
2988       chars_96 = -1;                                                    \
2989   } while (0)
2990
2991
2992 #define MAYBE_FINISH_COMPOSITION()                              \
2993   do {                                                          \
2994     int i;                                                      \
2995     if (composition_state == COMPOSING_NO)                      \
2996       break;                                                    \
2997     /* It is assured that we have enough room for producing     \
2998        characters stored in the table `components'.  */         \
2999     if (charbuf + component_idx > charbuf_end)                  \
3000       goto no_more_source;                                      \
3001     composition_state = COMPOSING_NO;                           \
3002     if (method == COMPOSITION_RELATIVE                          \
3003         || method == COMPOSITION_WITH_ALTCHARS)                 \
3004       {                                                         \
3005         for (i = 0; i < component_idx; i++)                     \
3006           *charbuf++ = components[i];                           \
3007         char_offset += component_idx;                           \
3008       }                                                         \
3009     else                                                        \
3010       {                                                         \
3011         for (i = 0; i < component_idx; i += 2)                  \
3012           *charbuf++ = components[i];                           \
3013         char_offset += (component_idx / 2) + 1;                 \
3014       }                                                         \
3015   } while (0)
3016
3017
3018 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3019    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3020    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3021    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3022    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3023   */
3024
3025 #define DECODE_COMPOSITION_START(c1)                                    \
3026   do {                                                                  \
3027     if (c1 == '0'                                                       \
3028         && composition_state == COMPOSING_COMPONENT_RULE)               \
3029       {                                                                 \
3030         component_len = component_idx;                                  \
3031         composition_state = COMPOSING_CHAR;                             \
3032       }                                                                 \
3033     else                                                                \
3034       {                                                                 \
3035         const unsigned char *p;                                         \
3036                                                                         \
3037         MAYBE_FINISH_COMPOSITION ();                                    \
3038         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
3039           goto no_more_source;                                          \
3040         for (p = src; p < src_end - 1; p++)                             \
3041           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
3042             break;                                                      \
3043         if (p == src_end - 1)                                           \
3044           {                                                             \
3045             /* The current composition doesn't end in the current       \
3046                source.  */                                              \
3047             record_conversion_result                                    \
3048               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
3049             goto no_more_source;                                        \
3050           }                                                             \
3051                                                                         \
3052         /* This is surely the start of a composition.  */               \
3053         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
3054                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
3055                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
3056                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
3057         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
3058                              : COMPOSING_COMPONENT_CHAR);               \
3059         component_idx = component_len = 0;                              \
3060       }                                                                 \
3061   } while (0)
3062
3063
3064 /* Handle compositoin end sequence ESC 1.  */
3065
3066 #define DECODE_COMPOSITION_END()                                        \
3067   do {                                                                  \
3068     int nchars = (component_len > 0 ? component_idx - component_len     \
3069                   : method == COMPOSITION_RELATIVE ? component_idx      \
3070                   : (component_idx + 1) / 2);                           \
3071     int i;                                                              \
3072     int *saved_charbuf = charbuf;                                       \
3073                                                                         \
3074     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
3075     if (method != COMPOSITION_RELATIVE)                                 \
3076       {                                                                 \
3077         if (component_len == 0)                                         \
3078           for (i = 0; i < component_idx; i++)                           \
3079             *charbuf++ = components[i];                                 \
3080         else                                                            \
3081           for (i = 0; i < component_len; i++)                           \
3082             *charbuf++ = components[i];                                 \
3083         *saved_charbuf = saved_charbuf - charbuf;                       \
3084       }                                                                 \
3085     if (method == COMPOSITION_WITH_RULE)                                \
3086       for (i = 0; i < component_idx; i += 2, char_offset++)             \
3087         *charbuf++ = components[i];                                     \
3088     else                                                                \
3089       for (i = component_len; i < component_idx; i++, char_offset++)    \
3090         *charbuf++ = components[i];                                     \
3091     coding->annotated = 1;                                              \
3092     composition_state = COMPOSING_NO;                                   \
3093   } while (0)
3094
3095
3096 /* Decode a composition rule from the byte C1 (and maybe one more byte
3097    from SRC) and store one encoded composition rule in
3098    coding->cmp_data.  */
3099
3100 #define DECODE_COMPOSITION_RULE(c1)                                     \
3101   do {                                                                  \
3102     (c1) -= 32;                                                         \
3103     if (c1 < 81)                /* old format (before ver.21) */        \
3104       {                                                                 \
3105         int gref = (c1) / 9;                                            \
3106         int nref = (c1) % 9;                                            \
3107         if (gref == 4) gref = 10;                                       \
3108         if (nref == 4) nref = 10;                                       \
3109         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
3110       }                                                                 \
3111     else if (c1 < 93)           /* new format (after ver.21) */         \
3112       {                                                                 \
3113         ONE_MORE_BYTE (c2);                                             \
3114         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
3115       }                                                                 \
3116     else                                                                \
3117       c1 = 0;                                                           \
3118   } while (0)
3119
3120
3121 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3122
3123 static void
3124 decode_coding_iso_2022 (coding)
3125      struct coding_system *coding;
3126 {
3127   const unsigned char *src = coding->source + coding->consumed;
3128   const unsigned char *src_end = coding->source + coding->src_bytes;
3129   const unsigned char *src_base;
3130   int *charbuf = coding->charbuf + coding->charbuf_used;
3131   int *charbuf_end
3132     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
3133   int consumed_chars = 0, consumed_chars_base;
3134   int multibytep = coding->src_multibyte;
3135   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3136   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3137   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3138   int charset_id_2, charset_id_3;
3139   struct charset *charset;
3140   int c;
3141   /* For handling composition sequence.  */
3142 #define COMPOSING_NO                    0
3143 #define COMPOSING_CHAR                  1
3144 #define COMPOSING_RULE                  2
3145 #define COMPOSING_COMPONENT_CHAR        3
3146 #define COMPOSING_COMPONENT_RULE        4
3147
3148   int composition_state = COMPOSING_NO;
3149   enum composition_method method;
3150   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3151   int component_idx;
3152   int component_len;
3153   Lisp_Object attrs, charset_list;
3154   int char_offset = coding->produced_char;
3155   int last_offset = char_offset;
3156   int last_id = charset_ascii;
3157   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3158   int byte_after_cr = -1;
3159
3160   CODING_GET_INFO (coding, attrs, charset_list);
3161   setup_iso_safe_charsets (attrs);
3162   /* Charset list may have been changed.  */
3163   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3164   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3165
3166   while (1)
3167     {
3168       int c1, c2;
3169
3170       src_base = src;
3171       consumed_chars_base = consumed_chars;
3172
3173       if (charbuf >= charbuf_end)
3174         break;
3175
3176       if (byte_after_cr >= 0)
3177         c1 = byte_after_cr, byte_after_cr = -1;
3178       else
3179         ONE_MORE_BYTE (c1);
3180       if (c1 < 0)
3181         goto invalid_code;
3182
3183       /* We produce at most one character.  */
3184       switch (iso_code_class [c1])
3185         {
3186         case ISO_0x20_or_0x7F:
3187           if (composition_state != COMPOSING_NO)
3188             {
3189               if (composition_state == COMPOSING_RULE
3190                   || composition_state == COMPOSING_COMPONENT_RULE)
3191                 {
3192                   DECODE_COMPOSITION_RULE (c1);
3193                   components[component_idx++] = c1;
3194                   composition_state--;
3195                   continue;
3196                 }
3197             }
3198           if (charset_id_0 < 0
3199               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3200             /* This is SPACE or DEL.  */
3201             charset = CHARSET_FROM_ID (charset_ascii);
3202           else
3203             charset = CHARSET_FROM_ID (charset_id_0);
3204           break;
3205
3206         case ISO_graphic_plane_0:
3207           if (composition_state != COMPOSING_NO)
3208             {
3209               if (composition_state == COMPOSING_RULE
3210                   || composition_state == COMPOSING_COMPONENT_RULE)
3211                 {
3212                   DECODE_COMPOSITION_RULE (c1);
3213                   components[component_idx++] = c1;
3214                   composition_state--;
3215                   continue;
3216                 }
3217             }
3218           if (charset_id_0 < 0)
3219             charset = CHARSET_FROM_ID (charset_ascii);
3220           else
3221             charset = CHARSET_FROM_ID (charset_id_0);
3222           break;
3223
3224         case ISO_0xA0_or_0xFF:
3225           if (charset_id_1 < 0
3226               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3227               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3228             goto invalid_code;
3229           /* This is a graphic character, we fall down ... */
3230
3231         case ISO_graphic_plane_1:
3232           if (charset_id_1 < 0)
3233             goto invalid_code;
3234           charset = CHARSET_FROM_ID (charset_id_1);
3235           break;
3236
3237         case ISO_control_0:
3238           if (eol_crlf && c1 == '\r')
3239             ONE_MORE_BYTE (byte_after_cr);
3240           MAYBE_FINISH_COMPOSITION ();
3241           charset = CHARSET_FROM_ID (charset_ascii);
3242           break;
3243
3244         case ISO_control_1:
3245           MAYBE_FINISH_COMPOSITION ();
3246           goto invalid_code;
3247
3248         case ISO_shift_out:
3249           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3250               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3251             goto invalid_code;
3252           CODING_ISO_INVOCATION (coding, 0) = 1;
3253           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3254           continue;
3255
3256         case ISO_shift_in:
3257           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3258             goto invalid_code;
3259           CODING_ISO_INVOCATION (coding, 0) = 0;
3260           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3261           continue;
3262
3263         case ISO_single_shift_2_7:
3264         case ISO_single_shift_2:
3265           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3266             goto invalid_code;
3267           /* SS2 is handled as an escape sequence of ESC 'N' */
3268           c1 = 'N';
3269           goto label_escape_sequence;
3270
3271         case ISO_single_shift_3:
3272           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3273             goto invalid_code;
3274           /* SS2 is handled as an escape sequence of ESC 'O' */
3275           c1 = 'O';
3276           goto label_escape_sequence;
3277
3278         case ISO_control_sequence_introducer:
3279           /* CSI is handled as an escape sequence of ESC '[' ...  */
3280           c1 = '[';
3281           goto label_escape_sequence;
3282
3283         case ISO_escape:
3284           ONE_MORE_BYTE (c1);
3285         label_escape_sequence:
3286           /* Escape sequences handled here are invocation,
3287              designation, direction specification, and character
3288              composition specification.  */
3289           switch (c1)
3290             {
3291             case '&':           /* revision of following character set */
3292               ONE_MORE_BYTE (c1);
3293               if (!(c1 >= '@' && c1 <= '~'))
3294                 goto invalid_code;
3295               ONE_MORE_BYTE (c1);
3296               if (c1 != ISO_CODE_ESC)
3297                 goto invalid_code;
3298               ONE_MORE_BYTE (c1);
3299               goto label_escape_sequence;
3300
3301             case '$':           /* designation of 2-byte character set */
3302               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3303                 goto invalid_code;
3304               {
3305                 int reg, chars96;
3306
3307                 ONE_MORE_BYTE (c1);
3308                 if (c1 >= '@' && c1 <= 'B')
3309                   {     /* designation of JISX0208.1978, GB2312.1980,
3310                            or JISX0208.1980 */
3311                     reg = 0, chars96 = 0;
3312                   }
3313                 else if (c1 >= 0x28 && c1 <= 0x2B)
3314                   { /* designation of DIMENSION2_CHARS94 character set */
3315                     reg = c1 - 0x28, chars96 = 0;
3316                     ONE_MORE_BYTE (c1);
3317                   }
3318                 else if (c1 >= 0x2C && c1 <= 0x2F)
3319                   { /* designation of DIMENSION2_CHARS96 character set */
3320                     reg = c1 - 0x2C, chars96 = 1;
3321                     ONE_MORE_BYTE (c1);
3322                   }
3323                 else
3324                   goto invalid_code;
3325                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3326                 /* We must update these variables now.  */
3327                 if (reg == 0)
3328                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3329                 else if (reg == 1)
3330                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3331                 if (chars96 < 0)
3332                   goto invalid_code;
3333               }
3334               continue;
3335
3336             case 'n':           /* invocation of locking-shift-2 */
3337               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3338                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3339                 goto invalid_code;
3340               CODING_ISO_INVOCATION (coding, 0) = 2;
3341               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3342               continue;
3343
3344             case 'o':           /* invocation of locking-shift-3 */
3345               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3346                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3347                 goto invalid_code;
3348               CODING_ISO_INVOCATION (coding, 0) = 3;
3349               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3350               continue;
3351
3352             case 'N':           /* invocation of single-shift-2 */
3353               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3354                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3355                 goto invalid_code;
3356               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3357               if (charset_id_2 < 0)
3358                 charset = CHARSET_FROM_ID (charset_ascii);
3359               else
3360                 charset = CHARSET_FROM_ID (charset_id_2);
3361               ONE_MORE_BYTE (c1);
3362               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3363                 goto invalid_code;
3364               break;
3365
3366             case 'O':           /* invocation of single-shift-3 */
3367               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3368                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3369                 goto invalid_code;
3370               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3371               if (charset_id_3 < 0)
3372                 charset = CHARSET_FROM_ID (charset_ascii);
3373               else
3374                 charset = CHARSET_FROM_ID (charset_id_3);
3375               ONE_MORE_BYTE (c1);
3376               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3377                 goto invalid_code;
3378               break;
3379
3380             case '0': case '2': case '3': case '4': /* start composition */
3381               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3382                 goto invalid_code;
3383               DECODE_COMPOSITION_START (c1);
3384               continue;
3385
3386             case '1':           /* end composition */
3387               if (composition_state == COMPOSING_NO)
3388                 goto invalid_code;
3389               DECODE_COMPOSITION_END ();
3390               continue;
3391
3392             case '[':           /* specification of direction */
3393               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3394                 goto invalid_code;
3395               /* For the moment, nested direction is not supported.
3396                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3397                  left-to-right, and nozero means right-to-left.  */
3398               ONE_MORE_BYTE (c1);
3399               switch (c1)
3400                 {
3401                 case ']':       /* end of the current direction */
3402                   coding->mode &= ~CODING_MODE_DIRECTION;
3403
3404                 case '0':       /* end of the current direction */
3405                 case '1':       /* start of left-to-right direction */
3406                   ONE_MORE_BYTE (c1);
3407                   if (c1 == ']')
3408                     coding->mode &= ~CODING_MODE_DIRECTION;
3409                   else
3410                     goto invalid_code;
3411                   break;
3412
3413                 case '2':       /* start of right-to-left direction */
3414                   ONE_MORE_BYTE (c1);
3415                   if (c1 == ']')
3416                     coding->mode |= CODING_MODE_DIRECTION;
3417                   else
3418                     goto invalid_code;
3419                   break;
3420
3421                 default:
3422                   goto invalid_code;
3423                 }
3424               continue;
3425
3426             case '%':
3427               ONE_MORE_BYTE (c1);
3428               if (c1 == '/')
3429                 {
3430                   /* CTEXT extended segment:
3431                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3432                      We keep these bytes as is for the moment.
3433                      They may be decoded by post-read-conversion.  */
3434                   int dim, M, L;
3435                   int size;
3436
3437                   ONE_MORE_BYTE (dim);
3438                   ONE_MORE_BYTE (M);
3439                   ONE_MORE_BYTE (L);
3440                   size = ((M - 128) * 128) + (L - 128);
3441                   if (charbuf + 8 + size > charbuf_end)
3442                     goto break_loop;
3443                   *charbuf++ = ISO_CODE_ESC;
3444                   *charbuf++ = '%';
3445                   *charbuf++ = '/';
3446                   *charbuf++ = dim;
3447                   *charbuf++ = BYTE8_TO_CHAR (M);
3448                   *charbuf++ = BYTE8_TO_CHAR (L);
3449                   while (size-- > 0)
3450                     {
3451                       ONE_MORE_BYTE (c1);
3452                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3453                     }
3454                 }
3455               else if (c1 == 'G')
3456                 {
3457                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3458                      ESC % G --UTF-8-BYTES-- ESC % @
3459                      We keep these bytes as is for the moment.
3460                      They may be decoded by post-read-conversion.  */
3461                   int *p = charbuf;
3462
3463                   if (p + 6 > charbuf_end)
3464                     goto break_loop;
3465                   *p++ = ISO_CODE_ESC;
3466                   *p++ = '%';
3467                   *p++ = 'G';
3468                   while (p < charbuf_end)
3469                     {
3470                       ONE_MORE_BYTE (c1);
3471                       if (c1 == ISO_CODE_ESC
3472                           && src + 1 < src_end
3473                           && src[0] == '%'
3474                           && src[1] == '@')
3475                         {
3476                           src += 2;
3477                           break;
3478                         }
3479                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3480                     }
3481                   if (p + 3 > charbuf_end)
3482                     goto break_loop;
3483                   *p++ = ISO_CODE_ESC;
3484                   *p++ = '%';
3485                   *p++ = '@';
3486                   charbuf = p;
3487                 }
3488               else
3489                 goto invalid_code;
3490               continue;
3491               break;
3492
3493             default:
3494               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3495                 goto invalid_code;
3496               {
3497                 int reg, chars96;
3498
3499                 if (c1 >= 0x28 && c1 <= 0x2B)
3500                   { /* designation of DIMENSION1_CHARS94 character set */
3501                     reg = c1 - 0x28, chars96 = 0;
3502                     ONE_MORE_BYTE (c1);
3503                   }
3504                 else if (c1 >= 0x2C && c1 <= 0x2F)
3505                   { /* designation of DIMENSION1_CHARS96 character set */
3506                     reg = c1 - 0x2C, chars96 = 1;
3507                     ONE_MORE_BYTE (c1);
3508                   }
3509                 else
3510                   goto invalid_code;
3511                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3512                 /* We must update these variables now.  */
3513                 if (reg == 0)
3514                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3515                 else if (reg == 1)
3516                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3517                 if (chars96 < 0)
3518                   goto invalid_code;
3519               }
3520               continue;
3521             }
3522         }
3523
3524       if (charset->id != charset_ascii
3525           && last_id != charset->id)
3526         {
3527           if (last_id != charset_ascii)
3528             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3529           last_id = charset->id;
3530           last_offset = char_offset;
3531         }
3532
3533       /* Now we know CHARSET and 1st position code C1 of a character.
3534          Produce a decoded character while getting 2nd position code
3535          C2 if necessary.  */
3536       c1 &= 0x7F;
3537       if (CHARSET_DIMENSION (charset) > 1)
3538         {
3539           ONE_MORE_BYTE (c2);
3540           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3541             /* C2 is not in a valid range.  */
3542             goto invalid_code;
3543           c1 = (c1 << 8) | (c2 & 0x7F);
3544           if (CHARSET_DIMENSION (charset) > 2)
3545             {
3546               ONE_MORE_BYTE (c2);
3547               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3548                 /* C2 is not in a valid range.  */
3549                 goto invalid_code;
3550               c1 = (c1 << 8) | (c2 & 0x7F);
3551             }
3552         }
3553
3554       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3555       if (c < 0)
3556         {
3557           MAYBE_FINISH_COMPOSITION ();
3558           for (; src_base < src; src_base++, char_offset++)
3559             {
3560               if (ASCII_BYTE_P (*src_base))
3561                 *charbuf++ = *src_base;
3562               else
3563                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3564             }
3565         }
3566       else if (composition_state == COMPOSING_NO)
3567         {
3568           *charbuf++ = c;
3569           char_offset++;
3570         }
3571       else
3572         {
3573           components[component_idx++] = c;
3574           if (method == COMPOSITION_WITH_RULE
3575               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3576                   && composition_state == COMPOSING_COMPONENT_CHAR))
3577             composition_state++;
3578         }
3579       continue;
3580
3581     invalid_code:
3582       MAYBE_FINISH_COMPOSITION ();
3583       src = src_base;
3584       consumed_chars = consumed_chars_base;
3585       ONE_MORE_BYTE (c);
3586       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3587       char_offset++;
3588       coding->errors++;
3589       continue;
3590
3591     break_loop:
3592       break;
3593     }
3594
3595  no_more_source:
3596   if (last_id != charset_ascii)
3597     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3598   coding->consumed_char += consumed_chars_base;
3599   coding->consumed = src_base - coding->source;
3600   coding->charbuf_used = charbuf - coding->charbuf;
3601 }
3602
3603
3604 /* ISO2022 encoding stuff.  */
3605
3606 /*
3607    It is not enough to say just "ISO2022" on encoding, we have to
3608    specify more details.  In Emacs, each coding system of ISO2022
3609    variant has the following specifications:
3610         1. Initial designation to G0 thru G3.
3611         2. Allows short-form designation?
3612         3. ASCII should be designated to G0 before control characters?
3613         4. ASCII should be designated to G0 at end of line?
3614         5. 7-bit environment or 8-bit environment?
3615         6. Use locking-shift?
3616         7. Use Single-shift?
3617    And the following two are only for Japanese:
3618         8. Use ASCII in place of JIS0201-1976-Roman?
3619         9. Use JISX0208-1983 in place of JISX0208-1978?
3620    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3621    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3622    details.
3623 */
3624
3625 /* Produce codes (escape sequence) for designating CHARSET to graphic
3626    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3627    '@', 'A', or 'B' and the coding system CODING allows, produce
3628    designation sequence of short-form.  */
3629
3630 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3631   do {                                                                  \
3632     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3633     char *intermediate_char_94 = "()*+";                                \
3634     char *intermediate_char_96 = ",-./";                                \
3635     int revision = -1;                                                  \
3636     int c;                                                              \
3637                                                                         \
3638     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3639       revision = CHARSET_ISO_REVISION (charset);                        \
3640                                                                         \
3641     if (revision >= 0)                                                  \
3642       {                                                                 \
3643         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3644         EMIT_ONE_BYTE ('@' + revision);                                 \
3645       }                                                                 \
3646     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3647     if (CHARSET_DIMENSION (charset) == 1)                               \
3648       {                                                                 \
3649         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3650           c = intermediate_char_94[reg];                                \
3651         else                                                            \
3652           c = intermediate_char_96[reg];                                \
3653         EMIT_ONE_ASCII_BYTE (c);                                        \
3654       }                                                                 \
3655     else                                                                \
3656       {                                                                 \
3657         EMIT_ONE_ASCII_BYTE ('$');                                      \
3658         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3659           {                                                             \
3660             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3661                 || reg != 0                                             \
3662                 || final_char < '@' || final_char > 'B')                \
3663               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3664           }                                                             \
3665         else                                                            \
3666           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3667       }                                                                 \
3668     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3669                                                                         \
3670     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3671   } while (0)
3672
3673
3674 /* The following two macros produce codes (control character or escape
3675    sequence) for ISO2022 single-shift functions (single-shift-2 and
3676    single-shift-3).  */
3677
3678 #define ENCODE_SINGLE_SHIFT_2                                           \
3679   do {                                                                  \
3680     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3681       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3682     else                                                                \
3683       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3684     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3685   } while (0)
3686
3687
3688 #define ENCODE_SINGLE_SHIFT_3                                           \
3689   do {                                                                  \
3690     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3691       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3692     else                                                                \
3693       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3694     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3695   } while (0)
3696
3697
3698 /* The following four macros produce codes (control character or
3699    escape sequence) for ISO2022 locking-shift functions (shift-in,
3700    shift-out, locking-shift-2, and locking-shift-3).  */
3701
3702 #define ENCODE_SHIFT_IN                                 \
3703   do {                                                  \
3704     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3705     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3706   } while (0)
3707
3708
3709 #define ENCODE_SHIFT_OUT                                \
3710   do {                                                  \
3711     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3712     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3713   } while (0)
3714
3715
3716 #define ENCODE_LOCKING_SHIFT_2                          \
3717   do {                                                  \
3718     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3719     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3720   } while (0)
3721
3722
3723 #define ENCODE_LOCKING_SHIFT_3                          \
3724   do {                                                  \
3725     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3726     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3727   } while (0)
3728
3729
3730 /* Produce codes for a DIMENSION1 character whose character set is
3731    CHARSET and whose position-code is C1.  Designation and invocation
3732    sequences are also produced in advance if necessary.  */
3733
3734 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3735   do {                                                                  \
3736     int id = CHARSET_ID (charset);                                      \
3737                                                                         \
3738     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3739         && id == charset_ascii)                                         \
3740       {                                                                 \
3741         id = charset_jisx0201_roman;                                    \
3742         charset = CHARSET_FROM_ID (id);                                 \
3743       }                                                                 \
3744                                                                         \
3745     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3746       {                                                                 \
3747         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3748           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3749         else                                                            \
3750           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3751         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3752         break;                                                          \
3753       }                                                                 \
3754     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3755       {                                                                 \
3756         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3757         break;                                                          \
3758       }                                                                 \
3759     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3760       {                                                                 \
3761         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3762         break;                                                          \
3763       }                                                                 \
3764     else                                                                \
3765       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3766          must invoke it, or, at first, designate it to some graphic     \
3767          register.  Then repeat the loop to actually produce the        \
3768          character.  */                                                 \
3769       dst = encode_invocation_designation (charset, coding, dst,        \
3770                                            &produced_chars);            \
3771   } while (1)
3772
3773
3774 /* Produce codes for a DIMENSION2 character whose character set is
3775    CHARSET and whose position-codes are C1 and C2.  Designation and
3776    invocation codes are also produced in advance if necessary.  */
3777
3778 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3779   do {                                                                  \
3780     int id = CHARSET_ID (charset);                                      \
3781                                                                         \
3782     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3783         && id == charset_jisx0208)                                      \
3784       {                                                                 \
3785         id = charset_jisx0208_1978;                                     \
3786         charset = CHARSET_FROM_ID (id);                                 \
3787       }                                                                 \
3788                                                                         \
3789     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3790       {                                                                 \
3791         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3792           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3793         else                                                            \
3794           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3795         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3796         break;                                                          \
3797       }                                                                 \
3798     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3799       {                                                                 \
3800         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3801         break;                                                          \
3802       }                                                                 \
3803     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3804       {                                                                 \
3805         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3806         break;                                                          \
3807       }                                                                 \
3808     else                                                                \
3809       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3810          must invoke it, or, at first, designate it to some graphic     \
3811          register.  Then repeat the loop to actually produce the        \
3812          character.  */                                                 \
3813       dst = encode_invocation_designation (charset, coding, dst,        \
3814                                            &produced_chars);            \
3815   } while (1)
3816
3817
3818 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3819   do {                                                                     \
3820     int code = ENCODE_CHAR ((charset),(c));                                \
3821                                                                            \
3822     if (CHARSET_DIMENSION (charset) == 1)                                  \
3823       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3824     else                                                                   \
3825       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3826   } while (0)
3827
3828
3829 /* Produce designation and invocation codes at a place pointed by DST
3830    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3831    Return new DST.  */
3832
3833 unsigned char *
3834 encode_invocation_designation (charset, coding, dst, p_nchars)
3835      struct charset *charset;
3836      struct coding_system *coding;
3837      unsigned char *dst;
3838      int *p_nchars;
3839 {
3840   int multibytep = coding->dst_multibyte;
3841   int produced_chars = *p_nchars;
3842   int reg;                      /* graphic register number */
3843   int id = CHARSET_ID (charset);
3844
3845   /* At first, check designations.  */
3846   for (reg = 0; reg < 4; reg++)
3847     if (id == CODING_ISO_DESIGNATION (coding, reg))
3848       break;
3849
3850   if (reg >= 4)
3851     {
3852       /* CHARSET is not yet designated to any graphic registers.  */
3853       /* At first check the requested designation.  */
3854       reg = CODING_ISO_REQUEST (coding, id);
3855       if (reg < 0)
3856         /* Since CHARSET requests no special designation, designate it
3857            to graphic register 0.  */
3858         reg = 0;
3859
3860       ENCODE_DESIGNATION (charset, reg, coding);
3861     }
3862
3863   if (CODING_ISO_INVOCATION (coding, 0) != reg
3864       && CODING_ISO_INVOCATION (coding, 1) != reg)
3865     {
3866       /* Since the graphic register REG is not invoked to any graphic
3867          planes, invoke it to graphic plane 0.  */
3868       switch (reg)
3869         {
3870         case 0:                 /* graphic register 0 */
3871           ENCODE_SHIFT_IN;
3872           break;
3873
3874         case 1:                 /* graphic register 1 */
3875           ENCODE_SHIFT_OUT;
3876           break;
3877
3878         case 2:                 /* graphic register 2 */
3879           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3880             ENCODE_SINGLE_SHIFT_2;
3881           else
3882             ENCODE_LOCKING_SHIFT_2;
3883           break;
3884
3885         case 3:                 /* graphic register 3 */
3886           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3887             ENCODE_SINGLE_SHIFT_3;
3888           else
3889             ENCODE_LOCKING_SHIFT_3;
3890           break;
3891         }
3892     }
3893
3894   *p_nchars = produced_chars;
3895   return dst;
3896 }
3897
3898 /* The following three macros produce codes for indicating direction
3899    of text.  */
3900 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3901   do {                                                                  \
3902     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3903       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3904     else                                                                \
3905       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3906   } while (0)
3907
3908
3909 #define ENCODE_DIRECTION_R2L()                  \
3910   do {                                          \
3911     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3912     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3913   } while (0)
3914
3915
3916 #define ENCODE_DIRECTION_L2R()                  \
3917   do {                                          \
3918     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3919     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3920   } while (0)
3921
3922
3923 /* Produce codes for designation and invocation to reset the graphic
3924    planes and registers to initial state.  */
3925 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3926   do {                                                                  \
3927     int reg;                                                            \
3928     struct charset *charset;                                            \
3929                                                                         \
3930     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3931       ENCODE_SHIFT_IN;                                                  \
3932     for (reg = 0; reg < 4; reg++)                                       \
3933       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3934           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3935               != CODING_ISO_INITIAL (coding, reg)))                     \
3936         {                                                               \
3937           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3938           ENCODE_DESIGNATION (charset, reg, coding);                    \
3939         }                                                               \
3940   } while (0)
3941
3942
3943 /* Produce designation sequences of charsets in the line started from
3944    SRC to a place pointed by DST, and return updated DST.
3945
3946    If the current block ends before any end-of-line, we may fail to
3947    find all the necessary designations.  */
3948
3949 static unsigned char *
3950 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3951      struct coding_system *coding;
3952      int *charbuf, *charbuf_end;
3953      unsigned char *dst;
3954 {
3955   struct charset *charset;
3956   /* Table of charsets to be designated to each graphic register.  */
3957   int r[4];
3958   int c, found = 0, reg;
3959   int produced_chars = 0;
3960   int multibytep = coding->dst_multibyte;
3961   Lisp_Object attrs;
3962   Lisp_Object charset_list;
3963
3964   attrs = CODING_ID_ATTRS (coding->id);
3965   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3966   if (EQ (charset_list, Qiso_2022))
3967     charset_list = Viso_2022_charset_list;
3968
3969   for (reg = 0; reg < 4; reg++)
3970     r[reg] = -1;
3971
3972   while (found < 4)
3973     {
3974       int id;
3975
3976       c = *charbuf++;
3977       if (c == '\n')
3978         break;
3979       charset = char_charset (c, charset_list, NULL);
3980       id = CHARSET_ID (charset);
3981       reg = CODING_ISO_REQUEST (coding, id);
3982       if (reg >= 0 && r[reg] < 0)
3983         {
3984           found++;
3985           r[reg] = id;
3986         }
3987     }
3988
3989   if (found)
3990     {
3991       for (reg = 0; reg < 4; reg++)
3992         if (r[reg] >= 0
3993             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3994           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3995     }
3996
3997   return dst;
3998 }
3999
4000 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4001
4002 static int
4003 encode_coding_iso_2022 (coding)
4004      struct coding_system *coding;
4005 {
4006   int multibytep = coding->dst_multibyte;
4007   int *charbuf = coding->charbuf;
4008   int *charbuf_end = charbuf + coding->charbuf_used;
4009   unsigned char *dst = coding->destination + coding->produced;
4010   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4011   int safe_room = 16;
4012   int bol_designation
4013     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4014        && CODING_ISO_BOL (coding));
4015   int produced_chars = 0;
4016   Lisp_Object attrs, eol_type, charset_list;
4017   int ascii_compatible;
4018   int c;
4019   int preferred_charset_id = -1;
4020
4021   CODING_GET_INFO (coding, attrs, charset_list);
4022   eol_type = CODING_ID_EOL_TYPE (coding->id);
4023   if (VECTORP (eol_type))
4024     eol_type = Qunix;
4025
4026   setup_iso_safe_charsets (attrs);
4027   /* Charset list may have been changed.  */
4028   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4029   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
4030
4031   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4032
4033   while (charbuf < charbuf_end)
4034     {
4035       ASSURE_DESTINATION (safe_room);
4036
4037       if (bol_designation)
4038         {
4039           unsigned char *dst_prev = dst;
4040
4041           /* We have to produce designation sequences if any now.  */
4042           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4043           bol_designation = 0;
4044           /* We are sure that designation sequences are all ASCII bytes.  */
4045           produced_chars += dst - dst_prev;
4046         }
4047
4048       c = *charbuf++;
4049
4050       if (c < 0)
4051         {
4052           /* Handle an annotation.  */
4053           switch (*charbuf)
4054             {
4055             case CODING_ANNOTATE_COMPOSITION_MASK:
4056               /* Not yet implemented.  */
4057               break;
4058             case CODING_ANNOTATE_CHARSET_MASK:
4059               preferred_charset_id = charbuf[2];
4060               if (preferred_charset_id >= 0
4061                   && NILP (Fmemq (make_number (preferred_charset_id),
4062                                   charset_list)))
4063                 preferred_charset_id = -1;
4064               break;
4065             default:
4066               abort ();
4067             }
4068           charbuf += -c - 1;
4069           continue;
4070         }
4071
4072       /* Now encode the character C.  */
4073       if (c < 0x20 || c == 0x7F)
4074         {
4075           if (c == '\n'
4076               || (c == '\r' && EQ (eol_type, Qmac)))
4077             {
4078               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4079                 ENCODE_RESET_PLANE_AND_REGISTER ();
4080               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4081                 {
4082                   int i;
4083
4084                   for (i = 0; i < 4; i++)
4085                     CODING_ISO_DESIGNATION (coding, i)
4086                       = CODING_ISO_INITIAL (coding, i);
4087                 }
4088               bol_designation
4089                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4090             }
4091           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4092             ENCODE_RESET_PLANE_AND_REGISTER ();
4093           EMIT_ONE_ASCII_BYTE (c);
4094         }
4095       else if (ASCII_CHAR_P (c))
4096         {
4097           if (ascii_compatible)
4098             EMIT_ONE_ASCII_BYTE (c);
4099           else
4100             {
4101               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4102               ENCODE_ISO_CHARACTER (charset, c);
4103             }
4104         }
4105       else if (CHAR_BYTE8_P (c))
4106         {
4107           c = CHAR_TO_BYTE8 (c);
4108           EMIT_ONE_BYTE (c);
4109         }
4110       else
4111         {
4112           struct charset *charset;
4113
4114           if (preferred_charset_id >= 0)
4115             {
4116               charset = CHARSET_FROM_ID (preferred_charset_id);
4117               if (! CHAR_CHARSET_P (c, charset))
4118                 charset = char_charset (c, charset_list, NULL);
4119             }
4120           else
4121             charset = char_charset (c, charset_list, NULL);
4122           if (!charset)
4123             {
4124               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4125                 {
4126                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4127                   charset = CHARSET_FROM_ID (charset_ascii);
4128                 }
4129               else
4130                 {
4131                   c = coding->default_char;
4132                   charset = char_charset (c, charset_list, NULL);
4133                 }
4134             }
4135           ENCODE_ISO_CHARACTER (charset, c);
4136         }
4137     }
4138
4139   if (coding->mode & CODING_MODE_LAST_BLOCK
4140       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4141     {
4142       ASSURE_DESTINATION (safe_room);
4143       ENCODE_RESET_PLANE_AND_REGISTER ();
4144     }
4145   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4146   CODING_ISO_BOL (coding) = bol_designation;
4147   coding->produced_char += produced_chars;
4148   coding->produced = dst - coding->destination;
4149   return 0;
4150 }
4151
4152 \f
4153 /*** 8,9. SJIS and BIG5 handlers ***/
4154
4155 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4156    quite widely.  So, for the moment, Emacs supports them in the bare
4157    C code.  But, in the future, they may be supported only by CCL.  */
4158
4159 /* SJIS is a coding system encoding three character sets: ASCII, right
4160    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4161    as is.  A character of charset katakana-jisx0201 is encoded by
4162    "position-code + 0x80".  A character of charset japanese-jisx0208
4163    is encoded in 2-byte but two position-codes are divided and shifted
4164    so that it fit in the range below.
4165
4166    --- CODE RANGE of SJIS ---
4167    (character set)      (range)
4168    ASCII                0x00 .. 0x7F
4169    KATAKANA-JISX0201    0xA0 .. 0xDF
4170    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4171             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4172    -------------------------------
4173
4174 */
4175
4176 /* BIG5 is a coding system encoding two character sets: ASCII and
4177    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4178    character set and is encoded in two-byte.
4179
4180    --- CODE RANGE of BIG5 ---
4181    (character set)      (range)
4182    ASCII                0x00 .. 0x7F
4183    Big5 (1st byte)      0xA1 .. 0xFE
4184         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4185    --------------------------
4186
4187   */
4188
4189 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4190    Check if a text is encoded in SJIS.  If it is, return
4191    CATEGORY_MASK_SJIS, else return 0.  */
4192
4193 static int
4194 detect_coding_sjis (coding, detect_info)
4195      struct coding_system *coding;
4196      struct coding_detection_info *detect_info;
4197 {
4198   const unsigned char *src = coding->source, *src_base;
4199   const unsigned char *src_end = coding->source + coding->src_bytes;
4200   int multibytep = coding->src_multibyte;
4201   int consumed_chars = 0;
4202   int found = 0;
4203   int c;
4204
4205   detect_info->checked |= CATEGORY_MASK_SJIS;
4206   /* A coding system of this category is always ASCII compatible.  */
4207   src += coding->head_ascii;
4208
4209   while (1)
4210     {
4211       src_base = src;
4212       ONE_MORE_BYTE (c);
4213       if (c < 0x80)
4214         continue;
4215       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4216         {
4217           ONE_MORE_BYTE (c);
4218           if (c < 0x40 || c == 0x7F || c > 0xFC)
4219             break;
4220           found = CATEGORY_MASK_SJIS;
4221         }
4222       else if (c >= 0xA0 && c < 0xE0)
4223         found = CATEGORY_MASK_SJIS;
4224       else
4225         break;
4226     }
4227   detect_info->rejected |= CATEGORY_MASK_SJIS;
4228   return 0;
4229
4230  no_more_source:
4231   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4232     {
4233       detect_info->rejected |= CATEGORY_MASK_SJIS;
4234       return 0;
4235     }
4236   detect_info->found |= found;
4237   return 1;
4238 }
4239
4240 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4241    Check if a text is encoded in BIG5.  If it is, return
4242    CATEGORY_MASK_BIG5, else return 0.  */
4243
4244 static int
4245 detect_coding_big5 (coding, detect_info)
4246      struct coding_system *coding;
4247      struct coding_detection_info *detect_info;
4248 {
4249   const unsigned char *src = coding->source, *src_base;
4250   const unsigned char *src_end = coding->source + coding->src_bytes;
4251   int multibytep = coding->src_multibyte;
4252   int consumed_chars = 0;
4253   int found = 0;
4254   int c;
4255
4256   detect_info->checked |= CATEGORY_MASK_BIG5;
4257   /* A coding system of this category is always ASCII compatible.  */
4258   src += coding->head_ascii;
4259
4260   while (1)
4261     {
4262       src_base = src;
4263       ONE_MORE_BYTE (c);
4264       if (c < 0x80)
4265         continue;
4266       if (c >= 0xA1)
4267         {
4268           ONE_MORE_BYTE (c);
4269           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4270             return 0;
4271           found = CATEGORY_MASK_BIG5;
4272         }
4273       else
4274         break;
4275     }
4276   detect_info->rejected |= CATEGORY_MASK_BIG5;
4277   return 0;
4278
4279  no_more_source:
4280   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4281     {
4282       detect_info->rejected |= CATEGORY_MASK_BIG5;
4283       return 0;
4284     }
4285   detect_info->found |= found;
4286   return 1;
4287 }
4288
4289 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4290    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4291
4292 static void
4293 decode_coding_sjis (coding)
4294      struct coding_system *coding;
4295 {
4296   const unsigned char *src = coding->source + coding->consumed;
4297   const unsigned char *src_end = coding->source + coding->src_bytes;
4298   const unsigned char *src_base;
4299   int *charbuf = coding->charbuf + coding->charbuf_used;
4300   int *charbuf_end
4301     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4302   int consumed_chars = 0, consumed_chars_base;
4303   int multibytep = coding->src_multibyte;
4304   struct charset *charset_roman, *charset_kanji, *charset_kana;
4305   struct charset *charset_kanji2;
4306   Lisp_Object attrs, charset_list, val;
4307   int char_offset = coding->produced_char;
4308   int last_offset = char_offset;
4309   int last_id = charset_ascii;
4310   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4311   int byte_after_cr = -1;
4312
4313   CODING_GET_INFO (coding, attrs, charset_list);
4314
4315   val = charset_list;
4316   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4317   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4318   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4319   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4320
4321   while (1)
4322     {
4323       int c, c1;
4324       struct charset *charset;
4325
4326       src_base = src;
4327       consumed_chars_base = consumed_chars;
4328
4329       if (charbuf >= charbuf_end)
4330         break;
4331
4332       if (byte_after_cr >= 0)
4333         c = byte_after_cr, byte_after_cr = -1;
4334       else
4335         ONE_MORE_BYTE (c);
4336       if (c < 0)
4337         goto invalid_code;
4338       if (c < 0x80)
4339         {
4340           if (eol_crlf && c == '\r')
4341             ONE_MORE_BYTE (byte_after_cr);
4342           charset = charset_roman;
4343         }
4344       else if (c == 0x80 || c == 0xA0)
4345         goto invalid_code;
4346       else if (c >= 0xA1 && c <= 0xDF)
4347         {
4348           /* SJIS -> JISX0201-Kana */
4349           c &= 0x7F;
4350           charset = charset_kana;
4351         }
4352       else if (c <= 0xEF)
4353         {
4354           /* SJIS -> JISX0208 */
4355           ONE_MORE_BYTE (c1);
4356           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4357             goto invalid_code;
4358           c = (c << 8) | c1;
4359           SJIS_TO_JIS (c);
4360           charset = charset_kanji;
4361         }
4362       else if (c <= 0xFC && charset_kanji2)
4363         {
4364           /* SJIS -> JISX0213-2 */
4365           ONE_MORE_BYTE (c1);
4366           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4367             goto invalid_code;
4368           c = (c << 8) | c1;
4369           SJIS_TO_JIS2 (c);
4370           charset = charset_kanji2;
4371         }
4372       else
4373         goto invalid_code;
4374       if (charset->id != charset_ascii
4375           && last_id != charset->id)
4376         {
4377           if (last_id != charset_ascii)
4378             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4379           last_id = charset->id;
4380           last_offset = char_offset;
4381         }
4382       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4383       *charbuf++ = c;
4384       char_offset++;
4385       continue;
4386
4387     invalid_code:
4388       src = src_base;
4389       consumed_chars = consumed_chars_base;
4390       ONE_MORE_BYTE (c);
4391       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4392       char_offset++;
4393       coding->errors++;
4394     }
4395
4396  no_more_source:
4397   if (last_id != charset_ascii)
4398     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4399   coding->consumed_char += consumed_chars_base;
4400   coding->consumed = src_base - coding->source;
4401   coding->charbuf_used = charbuf - coding->charbuf;
4402 }
4403
4404 static void
4405 decode_coding_big5 (coding)
4406      struct coding_system *coding;
4407 {
4408   const unsigned char *src = coding->source + coding->consumed;
4409   const unsigned char *src_end = coding->source + coding->src_bytes;
4410   const unsigned char *src_base;
4411   int *charbuf = coding->charbuf + coding->charbuf_used;
4412   int *charbuf_end
4413     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4414   int consumed_chars = 0, consumed_chars_base;
4415   int multibytep = coding->src_multibyte;
4416   struct charset *charset_roman, *charset_big5;
4417   Lisp_Object attrs, charset_list, val;
4418   int char_offset = coding->produced_char;
4419   int last_offset = char_offset;
4420   int last_id = charset_ascii;
4421   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4422   int byte_after_cr = -1;
4423
4424   CODING_GET_INFO (coding, attrs, charset_list);
4425   val = charset_list;
4426   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4427   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4428
4429   while (1)
4430     {
4431       int c, c1;
4432       struct charset *charset;
4433
4434       src_base = src;
4435       consumed_chars_base = consumed_chars;
4436
4437       if (charbuf >= charbuf_end)
4438         break;
4439
4440       if (byte_after_cr >= 0)
4441         c = byte_after_cr, byte_after_cr = -1;
4442       else
4443         ONE_MORE_BYTE (c);
4444
4445       if (c < 0)
4446         goto invalid_code;
4447       if (c < 0x80)
4448         {
4449           if (eol_crlf && c == '\r')
4450             ONE_MORE_BYTE (byte_after_cr);
4451           charset = charset_roman;
4452         }
4453       else
4454         {
4455           /* BIG5 -> Big5 */
4456           if (c < 0xA1 || c > 0xFE)
4457             goto invalid_code;
4458           ONE_MORE_BYTE (c1);
4459           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4460             goto invalid_code;
4461           c = c << 8 | c1;
4462           charset = charset_big5;
4463         }
4464       if (charset->id != charset_ascii
4465           && last_id != charset->id)
4466         {
4467           if (last_id != charset_ascii)
4468             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4469           last_id = charset->id;
4470           last_offset = char_offset;
4471         }
4472       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4473       *charbuf++ = c;
4474       char_offset++;
4475       continue;
4476
4477     invalid_code:
4478       src = src_base;
4479       consumed_chars = consumed_chars_base;
4480       ONE_MORE_BYTE (c);
4481       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4482       char_offset++;
4483       coding->errors++;
4484     }
4485
4486  no_more_source:
4487   if (last_id != charset_ascii)
4488     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4489   coding->consumed_char += consumed_chars_base;
4490   coding->consumed = src_base - coding->source;
4491   coding->charbuf_used = charbuf - coding->charbuf;
4492 }
4493
4494 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4495    This function can encode charsets `ascii', `katakana-jisx0201',
4496    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4497    are sure that all these charsets are registered as official charset
4498    (i.e. do not have extended leading-codes).  Characters of other
4499    charsets are produced without any encoding.  If SJIS_P is 1, encode
4500    SJIS text, else encode BIG5 text.  */
4501
4502 static int
4503 encode_coding_sjis (coding)
4504      struct coding_system *coding;
4505 {
4506   int multibytep = coding->dst_multibyte;
4507   int *charbuf = coding->charbuf;
4508   int *charbuf_end = charbuf + coding->charbuf_used;
4509   unsigned char *dst = coding->destination + coding->produced;
4510   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4511   int safe_room = 4;
4512   int produced_chars = 0;
4513   Lisp_Object attrs, charset_list, val;
4514   int ascii_compatible;
4515   struct charset *charset_roman, *charset_kanji, *charset_kana;
4516   struct charset *charset_kanji2;
4517   int c;
4518
4519   CODING_GET_INFO (coding, attrs, charset_list);
4520   val = charset_list;
4521   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4522   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4523   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4524   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4525
4526   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4527
4528   while (charbuf < charbuf_end)
4529     {
4530       ASSURE_DESTINATION (safe_room);
4531       c = *charbuf++;
4532       /* Now encode the character C.  */
4533       if (ASCII_CHAR_P (c) && ascii_compatible)
4534         EMIT_ONE_ASCII_BYTE (c);
4535       else if (CHAR_BYTE8_P (c))
4536         {
4537           c = CHAR_TO_BYTE8 (c);
4538           EMIT_ONE_BYTE (c);
4539         }
4540       else
4541         {
4542           unsigned code;
4543           struct charset *charset = char_charset (c, charset_list, &code);
4544
4545           if (!charset)
4546             {
4547               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4548                 {
4549                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4550                   charset = CHARSET_FROM_ID (charset_ascii);
4551                 }
4552               else
4553                 {
4554                   c = coding->default_char;
4555                   charset = char_charset (c, charset_list, &code);
4556                 }
4557             }
4558           if (code == CHARSET_INVALID_CODE (charset))
4559             abort ();
4560           if (charset == charset_kanji)
4561             {
4562               int c1, c2;
4563               JIS_TO_SJIS (code);
4564               c1 = code >> 8, c2 = code & 0xFF;
4565               EMIT_TWO_BYTES (c1, c2);
4566             }
4567           else if (charset == charset_kana)
4568             EMIT_ONE_BYTE (code | 0x80);
4569           else if (charset_kanji2 && charset == charset_kanji2)
4570             {
4571               int c1, c2;
4572
4573               c1 = code >> 8;
4574               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4575                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4576                 {
4577                   JIS_TO_SJIS2 (code);
4578                   c1 = code >> 8, c2 = code & 0xFF;
4579                   EMIT_TWO_BYTES (c1, c2);
4580                 }
4581               else
4582                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4583             }
4584           else
4585             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4586         }
4587     }
4588   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4589   coding->produced_char += produced_chars;
4590   coding->produced = dst - coding->destination;
4591   return 0;
4592 }
4593
4594 static int
4595 encode_coding_big5 (coding)
4596      struct coding_system *coding;
4597 {
4598   int multibytep = coding->dst_multibyte;
4599   int *charbuf = coding->charbuf;
4600   int *charbuf_end = charbuf + coding->charbuf_used;
4601   unsigned char *dst = coding->destination + coding->produced;
4602   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4603   int safe_room = 4;
4604   int produced_chars = 0;
4605   Lisp_Object attrs, charset_list, val;
4606   int ascii_compatible;
4607   struct charset *charset_roman, *charset_big5;
4608   int c;
4609
4610   CODING_GET_INFO (coding, attrs, charset_list);
4611   val = charset_list;
4612   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4613   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4614   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4615
4616   while (charbuf < charbuf_end)
4617     {
4618       ASSURE_DESTINATION (safe_room);
4619       c = *charbuf++;
4620       /* Now encode the character C.  */
4621       if (ASCII_CHAR_P (c) && ascii_compatible)
4622         EMIT_ONE_ASCII_BYTE (c);
4623       else if (CHAR_BYTE8_P (c))
4624         {
4625           c = CHAR_TO_BYTE8 (c);
4626           EMIT_ONE_BYTE (c);
4627         }
4628       else
4629         {
4630           unsigned code;
4631           struct charset *charset = char_charset (c, charset_list, &code);
4632
4633           if (! charset)
4634             {
4635               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4636                 {
4637                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4638                   charset = CHARSET_FROM_ID (charset_ascii);
4639                 }
4640               else
4641                 {
4642                   c = coding->default_char;
4643                   charset = char_charset (c, charset_list, &code);
4644                 }
4645             }
4646           if (code == CHARSET_INVALID_CODE (charset))
4647             abort ();
4648           if (charset == charset_big5)
4649             {
4650               int c1, c2;
4651
4652               c1 = code >> 8, c2 = code & 0xFF;
4653               EMIT_TWO_BYTES (c1, c2);
4654             }
4655           else
4656             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4657         }
4658     }
4659   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4660   coding->produced_char += produced_chars;
4661   coding->produced = dst - coding->destination;
4662   return 0;
4663 }
4664
4665 \f
4666 /*** 10. CCL handlers ***/
4667
4668 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4669    Check if a text is encoded in a coding system of which
4670    encoder/decoder are written in CCL program.  If it is, return
4671    CATEGORY_MASK_CCL, else return 0.  */
4672
4673 static int
4674 detect_coding_ccl (coding, detect_info)
4675      struct coding_system *coding;
4676      struct coding_detection_info *detect_info;
4677 {
4678   const unsigned char *src = coding->source, *src_base;
4679   const unsigned char *src_end = coding->source + coding->src_bytes;
4680   int multibytep = coding->src_multibyte;
4681   int consumed_chars = 0;
4682   int found = 0;
4683   unsigned char *valids;
4684   int head_ascii = coding->head_ascii;
4685   Lisp_Object attrs;
4686
4687   detect_info->checked |= CATEGORY_MASK_CCL;
4688
4689   coding = &coding_categories[coding_category_ccl];
4690   valids = CODING_CCL_VALIDS (coding);
4691   attrs = CODING_ID_ATTRS (coding->id);
4692   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4693     src += head_ascii;
4694
4695   while (1)
4696     {
4697       int c;
4698
4699       src_base = src;
4700       ONE_MORE_BYTE (c);
4701       if (c < 0 || ! valids[c])
4702         break;
4703       if ((valids[c] > 1))
4704         found = CATEGORY_MASK_CCL;
4705     }
4706   detect_info->rejected |= CATEGORY_MASK_CCL;
4707   return 0;
4708
4709  no_more_source:
4710   detect_info->found |= found;
4711   return 1;
4712 }
4713
4714 static void
4715 decode_coding_ccl (coding)
4716      struct coding_system *coding;
4717 {
4718   const unsigned char *src = coding->source + coding->consumed;
4719   const unsigned char *src_end = coding->source + coding->src_bytes;
4720   int *charbuf = coding->charbuf + coding->charbuf_used;
4721   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4722   int consumed_chars = 0;
4723   int multibytep = coding->src_multibyte;
4724   struct ccl_program ccl;
4725   int source_charbuf[1024];
4726   int source_byteidx[1024];
4727   Lisp_Object attrs, charset_list;
4728
4729   CODING_GET_INFO (coding, attrs, charset_list);
4730   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4731
4732   while (src < src_end)
4733     {
4734       const unsigned char *p = src;
4735       int *source, *source_end;
4736       int i = 0;
4737
4738       if (multibytep)
4739         while (i < 1024 && p < src_end)
4740           {
4741             source_byteidx[i] = p - src;
4742             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4743           }
4744       else
4745         while (i < 1024 && p < src_end)
4746           source_charbuf[i++] = *p++;
4747
4748       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4749         ccl.last_block = 1;
4750
4751       source = source_charbuf;
4752       source_end = source + i;
4753       while (source < source_end)
4754         {
4755           ccl_driver (&ccl, source, charbuf,
4756                       source_end - source, charbuf_end - charbuf,
4757                       charset_list);
4758           source += ccl.consumed;
4759           charbuf += ccl.produced;
4760           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4761             break;
4762         }
4763       if (source < source_end)
4764         src += source_byteidx[source - source_charbuf];
4765       else
4766         src = p;
4767       consumed_chars += source - source_charbuf;
4768
4769       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4770           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4771         break;
4772     }
4773
4774   switch (ccl.status)
4775     {
4776     case CCL_STAT_SUSPEND_BY_SRC:
4777       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4778       break;
4779     case CCL_STAT_SUSPEND_BY_DST:
4780       break;
4781     case CCL_STAT_QUIT:
4782     case CCL_STAT_INVALID_CMD:
4783       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4784       break;
4785     default:
4786       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4787       break;
4788     }
4789   coding->consumed_char += consumed_chars;
4790   coding->consumed = src - coding->source;
4791   coding->charbuf_used = charbuf - coding->charbuf;
4792 }
4793
4794 static int
4795 encode_coding_ccl (coding)
4796      struct coding_system *coding;
4797 {
4798   struct ccl_program ccl;
4799   int multibytep = coding->dst_multibyte;
4800   int *charbuf = coding->charbuf;
4801   int *charbuf_end = charbuf + coding->charbuf_used;
4802   unsigned char *dst = coding->destination + coding->produced;
4803   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4804   int destination_charbuf[1024];
4805   int i, produced_chars = 0;
4806   Lisp_Object attrs, charset_list;
4807
4808   CODING_GET_INFO (coding, attrs, charset_list);
4809   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4810
4811   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4812   ccl.dst_multibyte = coding->dst_multibyte;
4813
4814   while (charbuf < charbuf_end)
4815     {
4816       ccl_driver (&ccl, charbuf, destination_charbuf,
4817                   charbuf_end - charbuf, 1024, charset_list);
4818       if (multibytep)
4819         {
4820           ASSURE_DESTINATION (ccl.produced * 2);
4821           for (i = 0; i < ccl.produced; i++)
4822             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4823         }
4824       else
4825         {
4826           ASSURE_DESTINATION (ccl.produced);
4827           for (i = 0; i < ccl.produced; i++)
4828             *dst++ = destination_charbuf[i] & 0xFF;
4829           produced_chars += ccl.produced;
4830         }
4831       charbuf += ccl.consumed;
4832       if (ccl.status == CCL_STAT_QUIT
4833           || ccl.status == CCL_STAT_INVALID_CMD)
4834         break;
4835     }
4836
4837   switch (ccl.status)
4838     {
4839     case CCL_STAT_SUSPEND_BY_SRC:
4840       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4841       break;
4842     case CCL_STAT_SUSPEND_BY_DST:
4843       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4844       break;
4845     case CCL_STAT_QUIT:
4846     case CCL_STAT_INVALID_CMD:
4847       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4848       break;
4849     default:
4850       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4851       break;
4852     }
4853
4854   coding->produced_char += produced_chars;
4855   coding->produced = dst - coding->destination;
4856   return 0;
4857 }
4858
4859
4860 \f
4861 /*** 10, 11. no-conversion handlers ***/
4862
4863 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4864
4865 static void
4866 decode_coding_raw_text (coding)
4867      struct coding_system *coding;
4868 {
4869   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4870
4871   coding->chars_at_source = 1;
4872   coding->consumed_char = coding->src_chars;
4873   coding->consumed = coding->src_bytes;
4874   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4875     {
4876       coding->consumed_char--;
4877       coding->consumed--;
4878       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4879     }
4880   else
4881     record_conversion_result (coding, CODING_RESULT_SUCCESS);
4882 }
4883
4884 static int
4885 encode_coding_raw_text (coding)
4886      struct coding_system *coding;
4887 {
4888   int multibytep = coding->dst_multibyte;
4889   int *charbuf = coding->charbuf;
4890   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4891   unsigned char *dst = coding->destination + coding->produced;
4892   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4893   int produced_chars = 0;
4894   int c;
4895
4896   if (multibytep)
4897     {
4898       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4899
4900       if (coding->src_multibyte)
4901         while (charbuf < charbuf_end)
4902           {
4903             ASSURE_DESTINATION (safe_room);
4904             c = *charbuf++;
4905             if (ASCII_CHAR_P (c))
4906               EMIT_ONE_ASCII_BYTE (c);
4907             else if (CHAR_BYTE8_P (c))
4908               {
4909                 c = CHAR_TO_BYTE8 (c);
4910                 EMIT_ONE_BYTE (c);
4911               }
4912             else
4913               {
4914                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4915
4916                 CHAR_STRING_ADVANCE (c, p1);
4917                 while (p0 < p1)
4918                   {
4919                     EMIT_ONE_BYTE (*p0);
4920                     p0++;
4921                   }
4922               }
4923           }
4924       else
4925         while (charbuf < charbuf_end)
4926           {
4927             ASSURE_DESTINATION (safe_room);
4928             c = *charbuf++;
4929             EMIT_ONE_BYTE (c);
4930           }
4931     }
4932   else
4933     {
4934       if (coding->src_multibyte)
4935         {
4936           int safe_room = MAX_MULTIBYTE_LENGTH;
4937
4938           while (charbuf < charbuf_end)
4939             {
4940               ASSURE_DESTINATION (safe_room);
4941               c = *charbuf++;
4942               if (ASCII_CHAR_P (c))
4943                 *dst++ = c;
4944               else if (CHAR_BYTE8_P (c))
4945                 *dst++ = CHAR_TO_BYTE8 (c);
4946               else
4947                 CHAR_STRING_ADVANCE (c, dst);
4948             }
4949         }
4950       else
4951         {
4952           ASSURE_DESTINATION (charbuf_end - charbuf);
4953           while (charbuf < charbuf_end && dst < dst_end)
4954             *dst++ = *charbuf++;
4955         }
4956       produced_chars = dst - (coding->destination + coding->produced);
4957     }
4958   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4959   coding->produced_char += produced_chars;
4960   coding->produced = dst - coding->destination;
4961   return 0;
4962 }
4963
4964 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4965    Check if a text is encoded in a charset-based coding system.  If it
4966    is, return 1, else return 0.  */
4967
4968 static int
4969 detect_coding_charset (coding, detect_info)
4970      struct coding_system *coding;
4971      struct coding_detection_info *detect_info;
4972 {
4973   const unsigned char *src = coding->source, *src_base;
4974   const unsigned char *src_end = coding->source + coding->src_bytes;
4975   int multibytep = coding->src_multibyte;
4976   int consumed_chars = 0;
4977   Lisp_Object attrs, valids;
4978   int found = 0;
4979   int head_ascii = coding->head_ascii;
4980
4981   detect_info->checked |= CATEGORY_MASK_CHARSET;
4982
4983   coding = &coding_categories[coding_category_charset];
4984   attrs = CODING_ID_ATTRS (coding->id);
4985   valids = AREF (attrs, coding_attr_charset_valids);
4986
4987   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4988     src += head_ascii;
4989
4990   while (1)
4991     {
4992       int c;
4993       Lisp_Object val;
4994       struct charset *charset;
4995       int dim, idx;
4996
4997       src_base = src;
4998       ONE_MORE_BYTE (c);
4999       if (c < 0)
5000         continue;
5001       val = AREF (valids, c);
5002       if (NILP (val))
5003         break;
5004       if (c >= 0x80)
5005         found = CATEGORY_MASK_CHARSET;
5006       if (INTEGERP (val))
5007         {
5008           charset = CHARSET_FROM_ID (XFASTINT (val));
5009           dim = CHARSET_DIMENSION (charset);
5010           for (idx = 1; idx < dim; idx++)
5011             {
5012               if (src == src_end)
5013                 goto too_short;
5014               ONE_MORE_BYTE (c);
5015               if (c < charset->code_space[(dim - 1 - idx) * 2]
5016                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5017                 break;
5018             }
5019           if (idx < dim)
5020             break;
5021         }
5022       else
5023         {
5024           idx = 1;
5025           for (; CONSP (val); val = XCDR (val))
5026             {
5027               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5028               dim = CHARSET_DIMENSION (charset);
5029               while (idx < dim)
5030                 {
5031                   if (src == src_end)
5032                     goto too_short;
5033                   ONE_MORE_BYTE (c);
5034                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5035                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5036                     break;
5037                   idx++;
5038                 }
5039               if (idx == dim)
5040                 {
5041                   val = Qnil;
5042                   break;
5043                 }
5044             }
5045           if (CONSP (val))
5046             break;
5047         }
5048     }
5049  too_short:
5050   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5051   return 0;
5052
5053  no_more_source:
5054   detect_info->found |= found;
5055   return 1;
5056 }
5057
5058 static void
5059 decode_coding_charset (coding)
5060      struct coding_system *coding;
5061 {
5062   const unsigned char *src = coding->source + coding->consumed;
5063   const unsigned char *src_end = coding->source + coding->src_bytes;
5064   const unsigned char *src_base;
5065   int *charbuf = coding->charbuf + coding->charbuf_used;
5066   int *charbuf_end
5067     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
5068   int consumed_chars = 0, consumed_chars_base;
5069   int multibytep = coding->src_multibyte;
5070   Lisp_Object attrs, charset_list, valids;
5071   int char_offset = coding->produced_char;
5072   int last_offset = char_offset;
5073   int last_id = charset_ascii;
5074   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5075   int byte_after_cr = -1;
5076
5077   CODING_GET_INFO (coding, attrs, charset_list);
5078   valids = AREF (attrs, coding_attr_charset_valids);
5079
5080   while (1)
5081     {
5082       int c;
5083       Lisp_Object val;
5084       struct charset *charset;
5085       int dim;
5086       int len = 1;
5087       unsigned code;
5088
5089       src_base = src;
5090       consumed_chars_base = consumed_chars;
5091
5092       if (charbuf >= charbuf_end)
5093         break;
5094
5095       if (byte_after_cr >= 0)
5096         {
5097           c = byte_after_cr;
5098           byte_after_cr = -1;
5099         }
5100       else
5101         {
5102           ONE_MORE_BYTE (c);
5103           if (eol_crlf && c == '\r')
5104             ONE_MORE_BYTE (byte_after_cr);
5105         }
5106       if (c < 0)
5107         goto invalid_code;
5108       code = c;
5109
5110       val = AREF (valids, c);
5111       if (NILP (val))
5112         goto invalid_code;
5113       if (INTEGERP (val))
5114         {
5115           charset = CHARSET_FROM_ID (XFASTINT (val));
5116           dim = CHARSET_DIMENSION (charset);
5117           while (len < dim)
5118             {
5119               ONE_MORE_BYTE (c);
5120               code = (code << 8) | c;
5121               len++;
5122             }
5123           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5124                               charset, code, c);
5125         }
5126       else
5127         {
5128           /* VAL is a list of charset IDs.  It is assured that the
5129              list is sorted by charset dimensions (smaller one
5130              comes first).  */
5131           while (CONSP (val))
5132             {
5133               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5134               dim = CHARSET_DIMENSION (charset);
5135               while (len < dim)
5136                 {
5137                   ONE_MORE_BYTE (c);
5138                   code = (code << 8) | c;
5139                   len++;
5140                 }
5141               CODING_DECODE_CHAR (coding, src, src_base,
5142                                   src_end, charset, code, c);
5143               if (c >= 0)
5144                 break;
5145               val = XCDR (val);
5146             }
5147         }
5148       if (c < 0)
5149         goto invalid_code;
5150       if (charset->id != charset_ascii
5151           && last_id != charset->id)
5152         {
5153           if (last_id != charset_ascii)
5154             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5155           last_id = charset->id;
5156           last_offset = char_offset;
5157         }
5158
5159       *charbuf++ = c;
5160       char_offset++;
5161       continue;
5162
5163     invalid_code:
5164       src = src_base;
5165       consumed_chars = consumed_chars_base;
5166       ONE_MORE_BYTE (c);
5167       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5168       char_offset++;
5169       coding->errors++;
5170     }
5171
5172  no_more_source:
5173   if (last_id != charset_ascii)
5174     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5175   coding->consumed_char += consumed_chars_base;
5176   coding->consumed = src_base - coding->source;
5177   coding->charbuf_used = charbuf - coding->charbuf;
5178 }
5179
5180 static int
5181 encode_coding_charset (coding)
5182      struct coding_system *coding;
5183 {
5184   int multibytep = coding->dst_multibyte;
5185   int *charbuf = coding->charbuf;
5186   int *charbuf_end = charbuf + coding->charbuf_used;
5187   unsigned char *dst = coding->destination + coding->produced;
5188   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5189   int safe_room = MAX_MULTIBYTE_LENGTH;
5190   int produced_chars = 0;
5191   Lisp_Object attrs, charset_list;
5192   int ascii_compatible;
5193   int c;
5194
5195   CODING_GET_INFO (coding, attrs, charset_list);
5196   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5197
5198   while (charbuf < charbuf_end)
5199     {
5200       struct charset *charset;
5201       unsigned code;
5202
5203       ASSURE_DESTINATION (safe_room);
5204       c = *charbuf++;
5205       if (ascii_compatible && ASCII_CHAR_P (c))
5206         EMIT_ONE_ASCII_BYTE (c);
5207       else if (CHAR_BYTE8_P (c))
5208         {
5209           c = CHAR_TO_BYTE8 (c);
5210           EMIT_ONE_BYTE (c);
5211         }
5212       else
5213         {
5214           charset = char_charset (c, charset_list, &code);
5215           if (charset)
5216             {
5217               if (CHARSET_DIMENSION (charset) == 1)
5218                 EMIT_ONE_BYTE (code);
5219               else if (CHARSET_DIMENSION (charset) == 2)
5220                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5221               else if (CHARSET_DIMENSION (charset) == 3)
5222                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5223               else
5224                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5225                                  (code >> 8) & 0xFF, code & 0xFF);
5226             }
5227           else
5228             {
5229               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5230                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5231               else
5232                 c = coding->default_char;
5233               EMIT_ONE_BYTE (c);
5234             }
5235         }
5236     }
5237
5238   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5239   coding->produced_char += produced_chars;
5240   coding->produced = dst - coding->destination;
5241   return 0;
5242 }
5243
5244 \f
5245 /*** 7. C library functions ***/
5246
5247 /* Setup coding context CODING from information about CODING_SYSTEM.
5248    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5249    CODING_SYSTEM is invalid, signal an error.  */
5250
5251 void
5252 setup_coding_system (coding_system, coding)
5253      Lisp_Object coding_system;
5254      struct coding_system *coding;
5255 {
5256   Lisp_Object attrs;
5257   Lisp_Object eol_type;
5258   Lisp_Object coding_type;
5259   Lisp_Object val;
5260
5261   if (NILP (coding_system))
5262     coding_system = Qundecided;
5263
5264   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5265
5266   attrs = CODING_ID_ATTRS (coding->id);
5267   eol_type = CODING_ID_EOL_TYPE (coding->id);
5268
5269   coding->mode = 0;
5270   coding->head_ascii = -1;
5271   if (VECTORP (eol_type))
5272     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5273                             | CODING_REQUIRE_DETECTION_MASK);
5274   else if (! EQ (eol_type, Qunix))
5275     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5276                             | CODING_REQUIRE_ENCODING_MASK);
5277   else
5278     coding->common_flags = 0;
5279   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5280     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5281   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5282     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5283   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5284     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5285
5286   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5287   coding->max_charset_id = SCHARS (val) - 1;
5288   coding->safe_charsets = (char *) SDATA (val);
5289   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5290
5291   coding_type = CODING_ATTR_TYPE (attrs);
5292   if (EQ (coding_type, Qundecided))
5293     {
5294       coding->detector = NULL;
5295       coding->decoder = decode_coding_raw_text;
5296       coding->encoder = encode_coding_raw_text;
5297       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5298     }
5299   else if (EQ (coding_type, Qiso_2022))
5300     {
5301       int i;
5302       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5303
5304       /* Invoke graphic register 0 to plane 0.  */
5305       CODING_ISO_INVOCATION (coding, 0) = 0;
5306       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5307       CODING_ISO_INVOCATION (coding, 1)
5308         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5309       /* Setup the initial status of designation.  */
5310       for (i = 0; i < 4; i++)
5311         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5312       /* Not single shifting initially.  */
5313       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5314       /* Beginning of buffer should also be regarded as bol. */
5315       CODING_ISO_BOL (coding) = 1;
5316       coding->detector = detect_coding_iso_2022;
5317       coding->decoder = decode_coding_iso_2022;
5318       coding->encoder = encode_coding_iso_2022;
5319       if (flags & CODING_ISO_FLAG_SAFE)
5320         coding->mode |= CODING_MODE_SAFE_ENCODING;
5321       coding->common_flags
5322         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5323             | CODING_REQUIRE_FLUSHING_MASK);
5324       if (flags & CODING_ISO_FLAG_COMPOSITION)
5325         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5326       if (flags & CODING_ISO_FLAG_DESIGNATION)
5327         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5328       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5329         {
5330           setup_iso_safe_charsets (attrs);
5331           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5332           coding->max_charset_id = SCHARS (val) - 1;
5333           coding->safe_charsets = (char *) SDATA (val);
5334         }
5335       CODING_ISO_FLAGS (coding) = flags;
5336     }
5337   else if (EQ (coding_type, Qcharset))
5338     {
5339       coding->detector = detect_coding_charset;
5340       coding->decoder = decode_coding_charset;
5341       coding->encoder = encode_coding_charset;
5342       coding->common_flags
5343         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5344     }
5345   else if (EQ (coding_type, Qutf_8))
5346     {
5347       val = AREF (attrs, coding_attr_utf_bom);
5348       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5349                                    : EQ (val, Qt) ? utf_with_bom
5350                                    : utf_without_bom);
5351       coding->detector = detect_coding_utf_8;
5352       coding->decoder = decode_coding_utf_8;
5353       coding->encoder = encode_coding_utf_8;
5354       coding->common_flags
5355         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5356       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5357         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5358     }
5359   else if (EQ (coding_type, Qutf_16))
5360     {
5361       val = AREF (attrs, coding_attr_utf_bom);
5362       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5363                                     : EQ (val, Qt) ? utf_with_bom
5364                                     : utf_without_bom);
5365       val = AREF (attrs, coding_attr_utf_16_endian);
5366       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5367                                        : utf_16_little_endian);
5368       CODING_UTF_16_SURROGATE (coding) = 0;
5369       coding->detector = detect_coding_utf_16;
5370       coding->decoder = decode_coding_utf_16;
5371       coding->encoder = encode_coding_utf_16;
5372       coding->common_flags
5373         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5374       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5375         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5376     }
5377   else if (EQ (coding_type, Qccl))
5378     {
5379       coding->detector = detect_coding_ccl;
5380       coding->decoder = decode_coding_ccl;
5381       coding->encoder = encode_coding_ccl;
5382       coding->common_flags
5383         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5384             | CODING_REQUIRE_FLUSHING_MASK);
5385     }
5386   else if (EQ (coding_type, Qemacs_mule))
5387     {
5388       coding->detector = detect_coding_emacs_mule;
5389       coding->decoder = decode_coding_emacs_mule;
5390       coding->encoder = encode_coding_emacs_mule;
5391       coding->common_flags
5392         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5393       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5394           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5395         {
5396           Lisp_Object tail, safe_charsets;
5397           int max_charset_id = 0;
5398
5399           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5400                tail = XCDR (tail))
5401             if (max_charset_id < XFASTINT (XCAR (tail)))
5402               max_charset_id = XFASTINT (XCAR (tail));
5403           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5404                                         make_number (255));
5405           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5406                tail = XCDR (tail))
5407             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5408           coding->max_charset_id = max_charset_id;
5409           coding->safe_charsets = (char *) SDATA (safe_charsets);
5410         }
5411     }
5412   else if (EQ (coding_type, Qshift_jis))
5413     {
5414       coding->detector = detect_coding_sjis;
5415       coding->decoder = decode_coding_sjis;
5416       coding->encoder = encode_coding_sjis;
5417       coding->common_flags
5418         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5419     }
5420   else if (EQ (coding_type, Qbig5))
5421     {
5422       coding->detector = detect_coding_big5;
5423       coding->decoder = decode_coding_big5;
5424       coding->encoder = encode_coding_big5;
5425       coding->common_flags
5426         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5427     }
5428   else                          /* EQ (coding_type, Qraw_text) */
5429     {
5430       coding->detector = NULL;
5431       coding->decoder = decode_coding_raw_text;
5432       coding->encoder = encode_coding_raw_text;
5433       if (! EQ (eol_type, Qunix))
5434         {
5435           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5436           if (! VECTORP (eol_type))
5437             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5438         }
5439
5440     }
5441
5442   return;
5443 }
5444
5445 /* Return a list of charsets supported by CODING.  */
5446
5447 Lisp_Object
5448 coding_charset_list (coding)
5449      struct coding_system *coding;
5450 {
5451   Lisp_Object attrs, charset_list;
5452
5453   CODING_GET_INFO (coding, attrs, charset_list);
5454   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5455     {
5456       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5457
5458       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5459         charset_list = Viso_2022_charset_list;
5460     }
5461   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5462     {
5463       charset_list = Vemacs_mule_charset_list;
5464     }
5465   return charset_list;
5466 }
5467
5468
5469 /* Return raw-text or one of its subsidiaries that has the same
5470    eol_type as CODING-SYSTEM.  */
5471
5472 Lisp_Object
5473 raw_text_coding_system (coding_system)
5474      Lisp_Object coding_system;
5475 {
5476   Lisp_Object spec, attrs;
5477   Lisp_Object eol_type, raw_text_eol_type;
5478
5479   if (NILP (coding_system))
5480     return Qraw_text;
5481   spec = CODING_SYSTEM_SPEC (coding_system);
5482   attrs = AREF (spec, 0);
5483
5484   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5485     return coding_system;
5486
5487   eol_type = AREF (spec, 2);
5488   if (VECTORP (eol_type))
5489     return Qraw_text;
5490   spec = CODING_SYSTEM_SPEC (Qraw_text);
5491   raw_text_eol_type = AREF (spec, 2);
5492   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5493           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5494           : AREF (raw_text_eol_type, 2));
5495 }
5496
5497
5498 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5499    does, return one of the subsidiary that has the same eol-spec as
5500    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5501    inherit end-of-line format from the system's setting
5502    (system_eol_type).  */
5503
5504 Lisp_Object
5505 coding_inherit_eol_type (coding_system, parent)
5506      Lisp_Object coding_system, parent;
5507 {
5508   Lisp_Object spec, eol_type;
5509
5510   if (NILP (coding_system))
5511     coding_system = Qraw_text;
5512   spec = CODING_SYSTEM_SPEC (coding_system);
5513   eol_type = AREF (spec, 2);
5514   if (VECTORP (eol_type))
5515     {
5516       Lisp_Object parent_eol_type;
5517
5518       if (! NILP (parent))
5519         {
5520           Lisp_Object parent_spec;
5521
5522           parent_spec = CODING_SYSTEM_SPEC (parent);
5523           parent_eol_type = AREF (parent_spec, 2);
5524         }
5525       else
5526         parent_eol_type = system_eol_type;
5527       if (EQ (parent_eol_type, Qunix))
5528         coding_system = AREF (eol_type, 0);
5529       else if (EQ (parent_eol_type, Qdos))
5530         coding_system = AREF (eol_type, 1);
5531       else if (EQ (parent_eol_type, Qmac))
5532         coding_system = AREF (eol_type, 2);
5533     }
5534   return coding_system;
5535 }
5536
5537 /* Emacs has a mechanism to automatically detect a coding system if it
5538    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5539    it's impossible to distinguish some coding systems accurately
5540    because they use the same range of codes.  So, at first, coding
5541    systems are categorized into 7, those are:
5542
5543    o coding-category-emacs-mule
5544
5545         The category for a coding system which has the same code range
5546         as Emacs' internal format.  Assigned the coding-system (Lisp
5547         symbol) `emacs-mule' by default.
5548
5549    o coding-category-sjis
5550
5551         The category for a coding system which has the same code range
5552         as SJIS.  Assigned the coding-system (Lisp
5553         symbol) `japanese-shift-jis' by default.
5554
5555    o coding-category-iso-7
5556
5557         The category for a coding system which has the same code range
5558         as ISO2022 of 7-bit environment.  This doesn't use any locking
5559         shift and single shift functions.  This can encode/decode all
5560         charsets.  Assigned the coding-system (Lisp symbol)
5561         `iso-2022-7bit' by default.
5562
5563    o coding-category-iso-7-tight
5564
5565         Same as coding-category-iso-7 except that this can
5566         encode/decode only the specified charsets.
5567
5568    o coding-category-iso-8-1
5569
5570         The category for a coding system which has the same code range
5571         as ISO2022 of 8-bit environment and graphic plane 1 used only
5572         for DIMENSION1 charset.  This doesn't use any locking shift
5573         and single shift functions.  Assigned the coding-system (Lisp
5574         symbol) `iso-latin-1' by default.
5575
5576    o coding-category-iso-8-2
5577
5578         The category for a coding system which has the same code range
5579         as ISO2022 of 8-bit environment and graphic plane 1 used only
5580         for DIMENSION2 charset.  This doesn't use any locking shift
5581         and single shift functions.  Assigned the coding-system (Lisp
5582         symbol) `japanese-iso-8bit' by default.
5583
5584    o coding-category-iso-7-else
5585
5586         The category for a coding system which has the same code range
5587         as ISO2022 of 7-bit environemnt but uses locking shift or
5588         single shift functions.  Assigned the coding-system (Lisp
5589         symbol) `iso-2022-7bit-lock' by default.
5590
5591    o coding-category-iso-8-else
5592
5593         The category for a coding system which has the same code range
5594         as ISO2022 of 8-bit environemnt but uses locking shift or
5595         single shift functions.  Assigned the coding-system (Lisp
5596         symbol) `iso-2022-8bit-ss2' by default.
5597
5598    o coding-category-big5
5599
5600         The category for a coding system which has the same code range
5601         as BIG5.  Assigned the coding-system (Lisp symbol)
5602         `cn-big5' by default.
5603
5604    o coding-category-utf-8
5605
5606         The category for a coding system which has the same code range
5607         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5608         symbol) `utf-8' by default.
5609
5610    o coding-category-utf-16-be
5611
5612         The category for a coding system in which a text has an
5613         Unicode signature (cf. Unicode Standard) in the order of BIG
5614         endian at the head.  Assigned the coding-system (Lisp symbol)
5615         `utf-16-be' by default.
5616
5617    o coding-category-utf-16-le
5618
5619         The category for a coding system in which a text has an
5620         Unicode signature (cf. Unicode Standard) in the order of
5621         LITTLE endian at the head.  Assigned the coding-system (Lisp
5622         symbol) `utf-16-le' by default.
5623
5624    o coding-category-ccl
5625
5626         The category for a coding system of which encoder/decoder is
5627         written in CCL programs.  The default value is nil, i.e., no
5628         coding system is assigned.
5629
5630    o coding-category-binary
5631
5632         The category for a coding system not categorized in any of the
5633         above.  Assigned the coding-system (Lisp symbol)
5634         `no-conversion' by default.
5635
5636    Each of them is a Lisp symbol and the value is an actual
5637    `coding-system's (this is also a Lisp symbol) assigned by a user.
5638    What Emacs does actually is to detect a category of coding system.
5639    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5640    decide only one possible category, it selects a category of the
5641    highest priority.  Priorities of categories are also specified by a
5642    user in a Lisp variable `coding-category-list'.
5643
5644 */
5645
5646 #define EOL_SEEN_NONE   0
5647 #define EOL_SEEN_LF     1
5648 #define EOL_SEEN_CR     2
5649 #define EOL_SEEN_CRLF   4
5650
5651 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5652    SOURCE is encoded.  If CATEGORY is one of
5653    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5654    two-byte, else they are encoded by one-byte.
5655
5656    Return one of EOL_SEEN_XXX.  */
5657
5658 #define MAX_EOL_CHECK_COUNT 3
5659
5660 static int
5661 detect_eol (source, src_bytes, category)
5662      const unsigned char *source;
5663      EMACS_INT src_bytes;
5664      enum coding_category category;
5665 {
5666   const unsigned char *src = source, *src_end = src + src_bytes;
5667   unsigned char c;
5668   int total  = 0;
5669   int eol_seen = EOL_SEEN_NONE;
5670
5671   if ((1 << category) & CATEGORY_MASK_UTF_16)
5672     {
5673       int msb, lsb;
5674
5675       msb = category == (coding_category_utf_16_le
5676                          | coding_category_utf_16_le_nosig);
5677       lsb = 1 - msb;
5678
5679       while (src + 1 < src_end)
5680         {
5681           c = src[lsb];
5682           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5683             {
5684               int this_eol;
5685
5686               if (c == '\n')
5687                 this_eol = EOL_SEEN_LF;
5688               else if (src + 3 >= src_end
5689                        || src[msb + 2] != 0
5690                        || src[lsb + 2] != '\n')
5691                 this_eol = EOL_SEEN_CR;
5692               else
5693                 this_eol = EOL_SEEN_CRLF;
5694
5695               if (eol_seen == EOL_SEEN_NONE)
5696                 /* This is the first end-of-line.  */
5697                 eol_seen = this_eol;
5698               else if (eol_seen != this_eol)
5699                 {
5700                   /* The found type is different from what found before.  */
5701                   eol_seen = EOL_SEEN_LF;
5702                   break;
5703                 }
5704               if (++total == MAX_EOL_CHECK_COUNT)
5705                 break;
5706             }
5707           src += 2;
5708         }
5709     }
5710   else
5711     {
5712       while (src < src_end)
5713         {
5714           c = *src++;
5715           if (c == '\n' || c == '\r')
5716             {
5717               int this_eol;
5718
5719               if (c == '\n')
5720                 this_eol = EOL_SEEN_LF;
5721               else if (src >= src_end || *src != '\n')
5722                 this_eol = EOL_SEEN_CR;
5723               else
5724                 this_eol = EOL_SEEN_CRLF, src++;
5725
5726               if (eol_seen == EOL_SEEN_NONE)
5727                 /* This is the first end-of-line.  */
5728                 eol_seen = this_eol;
5729               else if (eol_seen != this_eol)
5730                 {
5731                   /* The found type is different from what found before.  */
5732                   eol_seen = EOL_SEEN_LF;
5733                   break;
5734                 }
5735               if (++total == MAX_EOL_CHECK_COUNT)
5736                 break;
5737             }
5738         }
5739     }
5740   return eol_seen;
5741 }
5742
5743
5744 static Lisp_Object
5745 adjust_coding_eol_type (coding, eol_seen)
5746      struct coding_system *coding;
5747      int eol_seen;
5748 {
5749   Lisp_Object eol_type;
5750
5751   eol_type = CODING_ID_EOL_TYPE (coding->id);
5752   if (eol_seen & EOL_SEEN_LF)
5753     {
5754       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5755       eol_type = Qunix;
5756     }
5757   else if (eol_seen & EOL_SEEN_CRLF)
5758     {
5759       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5760       eol_type = Qdos;
5761     }
5762   else if (eol_seen & EOL_SEEN_CR)
5763     {
5764       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5765       eol_type = Qmac;
5766     }
5767   return eol_type;
5768 }
5769
5770 /* Detect how a text specified in CODING is encoded.  If a coding
5771    system is detected, update fields of CODING by the detected coding
5772    system.  */
5773
5774 void
5775 detect_coding (coding)
5776      struct coding_system *coding;
5777 {
5778   const unsigned char *src, *src_end;
5779
5780   coding->consumed = coding->consumed_char = 0;
5781   coding->produced = coding->produced_char = 0;
5782   coding_set_source (coding);
5783
5784   src_end = coding->source + coding->src_bytes;
5785
5786   /* If we have not yet decided the text encoding type, detect it
5787      now.  */
5788   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5789     {
5790       int c, i;
5791       struct coding_detection_info detect_info;
5792       int null_byte_found = 0, eight_bit_found = 0;
5793
5794       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5795       coding->head_ascii = -1;
5796       for (src = coding->source; src < src_end; src++)
5797         {
5798           c = *src;
5799           if (c & 0x80)
5800             {
5801               eight_bit_found = 1;
5802               if (coding->head_ascii < 0)
5803                 coding->head_ascii = src - coding->source;
5804               if (null_byte_found)
5805                 break;
5806             }
5807           else if (c < 0x20)
5808             {
5809               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5810                   && ! inhibit_iso_escape_detection
5811                   && ! detect_info.checked)
5812                 {
5813                   if (coding->head_ascii < 0)
5814                     coding->head_ascii = src - coding->source;
5815                   if (detect_coding_iso_2022 (coding, &detect_info))
5816                     {
5817                       /* We have scanned the whole data.  */
5818                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5819                         /* We didn't find an 8-bit code.  We may have
5820                            found a null-byte, but it's very rare that
5821                            a binary file confirm to ISO-2022.  */
5822                         src = src_end;
5823                       break;
5824                     }
5825                 }
5826               else if (! c)
5827                 {
5828                   null_byte_found = 1;
5829                   if (eight_bit_found)
5830                     break;
5831                 }
5832             }
5833         }
5834       if (coding->head_ascii < 0)
5835         coding->head_ascii = src - coding->source;
5836
5837       if (null_byte_found || eight_bit_found
5838           || coding->head_ascii < coding->src_bytes
5839           || detect_info.found)
5840         {
5841           enum coding_category category;
5842           struct coding_system *this;
5843
5844           if (coding->head_ascii == coding->src_bytes)
5845             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5846             for (i = 0; i < coding_category_raw_text; i++)
5847               {
5848                 category = coding_priorities[i];
5849                 this = coding_categories + category;
5850                 if (detect_info.found & (1 << category))
5851                   break;
5852               }
5853           else
5854             {
5855               if (null_byte_found)
5856                 {
5857                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
5858                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
5859                 }
5860               for (i = 0; i < coding_category_raw_text; i++)
5861                 {
5862                   category = coding_priorities[i];
5863                   this = coding_categories + category;
5864                   if (this->id < 0)
5865                     {
5866                       /* No coding system of this category is defined.  */
5867                       detect_info.rejected |= (1 << category);
5868                     }
5869                   else if (category >= coding_category_raw_text)
5870                     continue;
5871                   else if (detect_info.checked & (1 << category))
5872                     {
5873                       if (detect_info.found & (1 << category))
5874                         break;
5875                     }
5876                   else if ((*(this->detector)) (coding, &detect_info)
5877                            && detect_info.found & (1 << category))
5878                     {
5879                       if (category == coding_category_utf_16_auto)
5880                         {
5881                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5882                             category = coding_category_utf_16_le;
5883                           else
5884                             category = coding_category_utf_16_be;
5885                         }
5886                       break;
5887                     }
5888                 }
5889
5890               if (i < coding_category_raw_text)
5891                 setup_coding_system (CODING_ID_NAME (this->id), coding);
5892               else if (null_byte_found)
5893                 setup_coding_system (Qno_conversion, coding);
5894               else if ((detect_info.rejected & CATEGORY_MASK_ANY)
5895                        == CATEGORY_MASK_ANY)
5896                 setup_coding_system (Qraw_text, coding);
5897               else if (detect_info.rejected)
5898                 for (i = 0; i < coding_category_raw_text; i++)
5899                   if (! (detect_info.rejected & (1 << coding_priorities[i])))
5900                     {
5901                       this = coding_categories + coding_priorities[i];
5902                       setup_coding_system (CODING_ID_NAME (this->id), coding);
5903                       break;
5904                     }
5905             }
5906         }
5907     }
5908   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5909            == coding_category_utf_8_auto)
5910     {
5911       Lisp_Object coding_systems;
5912       struct coding_detection_info detect_info;
5913
5914       coding_systems
5915         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
5916       detect_info.found = detect_info.rejected = 0;
5917       coding->head_ascii = 0;
5918       if (CONSP (coding_systems)
5919           && detect_coding_utf_8 (coding, &detect_info))
5920         {
5921           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
5922             setup_coding_system (XCAR (coding_systems), coding);
5923           else
5924             setup_coding_system (XCDR (coding_systems), coding);
5925         }
5926     }
5927   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5928            == coding_category_utf_16_auto)
5929     {
5930       Lisp_Object coding_systems;
5931       struct coding_detection_info detect_info;
5932
5933       coding_systems
5934         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
5935       detect_info.found = detect_info.rejected = 0;
5936       coding->head_ascii = 0;
5937       if (CONSP (coding_systems)
5938           && detect_coding_utf_16 (coding, &detect_info))
5939         {
5940           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5941             setup_coding_system (XCAR (coding_systems), coding);
5942           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5943             setup_coding_system (XCDR (coding_systems), coding);
5944         }
5945     }
5946 }
5947
5948
5949 static void
5950 decode_eol (coding)
5951      struct coding_system *coding;
5952 {
5953   Lisp_Object eol_type;
5954   unsigned char *p, *pbeg, *pend;
5955
5956   eol_type = CODING_ID_EOL_TYPE (coding->id);
5957   if (EQ (eol_type, Qunix))
5958     return;
5959
5960   if (NILP (coding->dst_object))
5961     pbeg = coding->destination;
5962   else
5963     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5964   pend = pbeg + coding->produced;
5965
5966   if (VECTORP (eol_type))
5967     {
5968       int eol_seen = EOL_SEEN_NONE;
5969
5970       for (p = pbeg; p < pend; p++)
5971         {
5972           if (*p == '\n')
5973             eol_seen |= EOL_SEEN_LF;
5974           else if (*p == '\r')
5975             {
5976               if (p + 1 < pend && *(p + 1) == '\n')
5977                 {
5978                   eol_seen |= EOL_SEEN_CRLF;
5979                   p++;
5980                 }
5981               else
5982                 eol_seen |= EOL_SEEN_CR;
5983             }
5984         }
5985       if (eol_seen != EOL_SEEN_NONE
5986           && eol_seen != EOL_SEEN_LF
5987           && eol_seen != EOL_SEEN_CRLF
5988           && eol_seen != EOL_SEEN_CR)
5989         eol_seen = EOL_SEEN_LF;
5990       if (eol_seen != EOL_SEEN_NONE)
5991         eol_type = adjust_coding_eol_type (coding, eol_seen);
5992     }
5993
5994   if (EQ (eol_type, Qmac))
5995     {
5996       for (p = pbeg; p < pend; p++)
5997         if (*p == '\r')
5998           *p = '\n';
5999     }
6000   else if (EQ (eol_type, Qdos))
6001     {
6002       int n = 0;
6003
6004       if (NILP (coding->dst_object))
6005         {
6006           /* Start deleting '\r' from the tail to minimize the memory
6007              movement.  */
6008           for (p = pend - 2; p >= pbeg; p--)
6009             if (*p == '\r')
6010               {
6011                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6012                 n++;
6013               }
6014         }
6015       else
6016         {
6017           int pos_byte = coding->dst_pos_byte;
6018           int pos = coding->dst_pos;
6019           int pos_end = pos + coding->produced_char - 1;
6020
6021           while (pos < pos_end)
6022             {
6023               p = BYTE_POS_ADDR (pos_byte);
6024               if (*p == '\r' && p[1] == '\n')
6025                 {
6026                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6027                   n++;
6028                   pos_end--;
6029                 }
6030               pos++;
6031               if (coding->dst_multibyte)
6032                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6033               else
6034                 pos_byte++;
6035             }
6036         }
6037       coding->produced -= n;
6038       coding->produced_char -= n;
6039     }
6040 }
6041
6042
6043 /* Return a translation table (or list of them) from coding system
6044    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6045    decoding (ENCODEP is zero). */
6046
6047 static Lisp_Object
6048 get_translation_table (attrs, encodep, max_lookup)
6049      Lisp_Object attrs;
6050      int encodep, *max_lookup;
6051 {
6052   Lisp_Object standard, translation_table;
6053   Lisp_Object val;
6054
6055   if (encodep)
6056     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6057       standard = Vstandard_translation_table_for_encode;
6058   else
6059     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6060       standard = Vstandard_translation_table_for_decode;
6061   if (NILP (translation_table))
6062     translation_table = standard;
6063   else
6064     {
6065       if (SYMBOLP (translation_table))
6066         translation_table = Fget (translation_table, Qtranslation_table);
6067       else if (CONSP (translation_table))
6068         {
6069           translation_table = Fcopy_sequence (translation_table);
6070           for (val = translation_table; CONSP (val); val = XCDR (val))
6071             if (SYMBOLP (XCAR (val)))
6072               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6073         }
6074       if (CHAR_TABLE_P (standard))
6075         {
6076           if (CONSP (translation_table))
6077             translation_table = nconc2 (translation_table,
6078                                         Fcons (standard, Qnil));
6079           else
6080             translation_table = Fcons (translation_table,
6081                                        Fcons (standard, Qnil));
6082         }
6083     }
6084
6085   if (max_lookup)
6086     {
6087       *max_lookup = 1;
6088       if (CHAR_TABLE_P (translation_table)
6089           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6090         {
6091           val = XCHAR_TABLE (translation_table)->extras[1];
6092           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6093             *max_lookup = XFASTINT (val);
6094         }
6095       else if (CONSP (translation_table))
6096         {
6097           Lisp_Object tail, val;
6098
6099           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6100             if (CHAR_TABLE_P (XCAR (tail))
6101                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6102               {
6103                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6104                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6105                   *max_lookup = XFASTINT (val);
6106               }
6107         }
6108     }
6109   return translation_table;
6110 }
6111
6112 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6113   do {                                                          \
6114     trans = Qnil;                                               \
6115     if (CHAR_TABLE_P (table))                                   \
6116       {                                                         \
6117         trans = CHAR_TABLE_REF (table, c);                      \
6118         if (CHARACTERP (trans))                                 \
6119           c = XFASTINT (trans), trans = Qnil;                   \
6120       }                                                         \
6121     else if (CONSP (table))                                     \
6122       {                                                         \
6123         Lisp_Object tail;                                       \
6124                                                                 \
6125         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6126           if (CHAR_TABLE_P (XCAR (tail)))                       \
6127             {                                                   \
6128               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6129               if (CHARACTERP (trans))                           \
6130                 c = XFASTINT (trans), trans = Qnil;             \
6131               else if (! NILP (trans))                          \
6132                 break;                                          \
6133             }                                                   \
6134       }                                                         \
6135   } while (0)
6136
6137
6138 static Lisp_Object
6139 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
6140      Lisp_Object val;
6141      int *buf, *buf_end;
6142      int last_block;
6143      int *from_nchars, *to_nchars;
6144 {
6145   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
6146      [TO-CHAR ...].  */
6147   if (CONSP (val))
6148     {
6149       Lisp_Object from, tail;
6150       int i, len;
6151
6152       for (tail = val; CONSP (tail); tail = XCDR (tail))
6153         {
6154           val = XCAR (tail);
6155           from = XCAR (val);
6156           len = ASIZE (from);
6157           for (i = 0; i < len; i++)
6158             {
6159               if (buf + i == buf_end)
6160                 {
6161                   if (! last_block)
6162                     return Qt;
6163                   break;
6164                 }
6165               if (XINT (AREF (from, i)) != buf[i])
6166                 break;
6167             }
6168           if (i == len)
6169             {
6170               val = XCDR (val);
6171               *from_nchars = len;
6172               break;
6173             }
6174         }
6175       if (! CONSP (tail))
6176         return Qnil;
6177     }
6178   if (VECTORP (val))
6179     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6180   else
6181     *buf = XINT (val);
6182   return val;
6183 }
6184
6185
6186 static int
6187 produce_chars (coding, translation_table, last_block)
6188      struct coding_system *coding;
6189      Lisp_Object translation_table;
6190      int last_block;
6191 {
6192   unsigned char *dst = coding->destination + coding->produced;
6193   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6194   EMACS_INT produced;
6195   EMACS_INT produced_chars = 0;
6196   int carryover = 0;
6197
6198   if (! coding->chars_at_source)
6199     {
6200       /* Source characters are in coding->charbuf.  */
6201       int *buf = coding->charbuf;
6202       int *buf_end = buf + coding->charbuf_used;
6203
6204       if (EQ (coding->src_object, coding->dst_object))
6205         {
6206           coding_set_source (coding);
6207           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6208         }
6209
6210       while (buf < buf_end)
6211         {
6212           int c = *buf, i;
6213
6214           if (c >= 0)
6215             {
6216               int from_nchars = 1, to_nchars = 1;
6217               Lisp_Object trans = Qnil;
6218
6219               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6220               if (! NILP (trans))
6221                 {
6222                   trans = get_translation (trans, buf, buf_end, last_block,
6223                                            &from_nchars, &to_nchars);
6224                   if (EQ (trans, Qt))
6225                     break;
6226                   c = *buf;
6227                 }
6228
6229               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6230                 {
6231                   dst = alloc_destination (coding,
6232                                            buf_end - buf
6233                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6234                                            dst);
6235                   if (EQ (coding->src_object, coding->dst_object))
6236                     {
6237                       coding_set_source (coding);
6238                       dst_end = ((unsigned char *) coding->source) + coding->consumed;
6239                     }
6240                   else
6241                     dst_end = coding->destination + coding->dst_bytes;
6242                 }
6243
6244               for (i = 0; i < to_nchars; i++)
6245                 {
6246                   if (i > 0)
6247                     c = XINT (AREF (trans, i));
6248                   if (coding->dst_multibyte
6249                       || ! CHAR_BYTE8_P (c))
6250                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6251                   else
6252                     *dst++ = CHAR_TO_BYTE8 (c);
6253                 }
6254               produced_chars += to_nchars;
6255               *buf++ = to_nchars;
6256               while (--from_nchars > 0)
6257                 *buf++ = 0;
6258             }
6259           else
6260             /* This is an annotation datum.  (-C) is the length.  */
6261             buf += -c;
6262         }
6263       carryover = buf_end - buf;
6264     }
6265   else
6266     {
6267       /* Source characters are at coding->source.  */
6268       const unsigned char *src = coding->source;
6269       const unsigned char *src_end = src + coding->consumed;
6270
6271       if (EQ (coding->dst_object, coding->src_object))
6272         dst_end = (unsigned char *) src;
6273       if (coding->src_multibyte != coding->dst_multibyte)
6274         {
6275           if (coding->src_multibyte)
6276             {
6277               int multibytep = 1;
6278               EMACS_INT consumed_chars;
6279
6280               while (1)
6281                 {
6282                   const unsigned char *src_base = src;
6283                   int c;
6284
6285                   ONE_MORE_BYTE (c);
6286                   if (dst == dst_end)
6287                     {
6288                       if (EQ (coding->src_object, coding->dst_object))
6289                         dst_end = (unsigned char *) src;
6290                       if (dst == dst_end)
6291                         {
6292                           EMACS_INT offset = src - coding->source;
6293
6294                           dst = alloc_destination (coding, src_end - src + 1,
6295                                                    dst);
6296                           dst_end = coding->destination + coding->dst_bytes;
6297                           coding_set_source (coding);
6298                           src = coding->source + offset;
6299                           src_end = coding->source + coding->src_bytes;
6300                           if (EQ (coding->src_object, coding->dst_object))
6301                             dst_end = (unsigned char *) src;
6302                         }
6303                     }
6304                   *dst++ = c;
6305                   produced_chars++;
6306                 }
6307             no_more_source:
6308               ;
6309             }
6310           else
6311             while (src < src_end)
6312               {
6313                 int multibytep = 1;
6314                 int c = *src++;
6315
6316                 if (dst >= dst_end - 1)
6317                   {
6318                     if (EQ (coding->src_object, coding->dst_object))
6319                       dst_end = (unsigned char *) src;
6320                     if (dst >= dst_end - 1)
6321                       {
6322                         EMACS_INT offset = src - coding->source;
6323                         EMACS_INT more_bytes;
6324
6325                         if (EQ (coding->src_object, coding->dst_object))
6326                           more_bytes = ((src_end - src) / 2) + 2;
6327                         else
6328                           more_bytes = src_end - src + 2;
6329                         dst = alloc_destination (coding, more_bytes, dst);
6330                         dst_end = coding->destination + coding->dst_bytes;
6331                         coding_set_source (coding);
6332                         src = coding->source + offset;
6333                         src_end = coding->source + coding->src_bytes;
6334                         if (EQ (coding->src_object, coding->dst_object))
6335                           dst_end = (unsigned char *) src;
6336                       }
6337                   }
6338                 EMIT_ONE_BYTE (c);
6339               }
6340         }
6341       else
6342         {
6343           if (!EQ (coding->src_object, coding->dst_object))
6344             {
6345               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6346
6347               if (require > 0)
6348                 {
6349                   EMACS_INT offset = src - coding->source;
6350
6351                   dst = alloc_destination (coding, require, dst);
6352                   coding_set_source (coding);
6353                   src = coding->source + offset;
6354                   src_end = coding->source + coding->src_bytes;
6355                 }
6356             }
6357           produced_chars = coding->consumed_char;
6358           while (src < src_end)
6359             *dst++ = *src++;
6360         }
6361     }
6362
6363   produced = dst - (coding->destination + coding->produced);
6364   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6365     insert_from_gap (produced_chars, produced);
6366   coding->produced += produced;
6367   coding->produced_char += produced_chars;
6368   return carryover;
6369 }
6370
6371 /* Compose text in CODING->object according to the annotation data at
6372    CHARBUF.  CHARBUF is an array:
6373      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6374  */
6375
6376 static INLINE void
6377 produce_composition (coding, charbuf, pos)
6378      struct coding_system *coding;
6379      int *charbuf;
6380      EMACS_INT pos;
6381 {
6382   int len;
6383   EMACS_INT to;
6384   enum composition_method method;
6385   Lisp_Object components;
6386
6387   len = -charbuf[0];
6388   to = pos + charbuf[2];
6389   if (to <= pos)
6390     return;
6391   method = (enum composition_method) (charbuf[3]);
6392
6393   if (method == COMPOSITION_RELATIVE)
6394     components = Qnil;
6395   else if (method >= COMPOSITION_WITH_RULE
6396            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6397     {
6398       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6399       int i;
6400
6401       len -= 4;
6402       charbuf += 4;
6403       for (i = 0; i < len; i++)
6404         {
6405           args[i] = make_number (charbuf[i]);
6406           if (charbuf[i] < 0)
6407             return;
6408         }
6409       components = (method == COMPOSITION_WITH_ALTCHARS
6410                     ? Fstring (len, args) : Fvector (len, args));
6411     }
6412   else
6413     return;
6414   compose_text (pos, to, components, Qnil, coding->dst_object);
6415 }
6416
6417
6418 /* Put `charset' property on text in CODING->object according to
6419    the annotation data at CHARBUF.  CHARBUF is an array:
6420      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6421  */
6422
6423 static INLINE void
6424 produce_charset (coding, charbuf, pos)
6425      struct coding_system *coding;
6426      int *charbuf;
6427      EMACS_INT pos;
6428 {
6429   EMACS_INT from = pos - charbuf[2];
6430   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6431
6432   Fput_text_property (make_number (from), make_number (pos),
6433                       Qcharset, CHARSET_NAME (charset),
6434                       coding->dst_object);
6435 }
6436
6437
6438 #define CHARBUF_SIZE 0x4000
6439
6440 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6441   do {                                                                  \
6442     int size = CHARBUF_SIZE;;                                           \
6443                                                                         \
6444     coding->charbuf = NULL;                                             \
6445     while (size > 1024)                                                 \
6446       {                                                                 \
6447         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6448         if (coding->charbuf)                                            \
6449           break;                                                        \
6450         size >>= 1;                                                     \
6451       }                                                                 \
6452     if (! coding->charbuf)                                              \
6453       {                                                                 \
6454         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6455         return coding->result;                                          \
6456       }                                                                 \
6457     coding->charbuf_size = size;                                        \
6458   } while (0)
6459
6460
6461 static void
6462 produce_annotation (coding, pos)
6463      struct coding_system *coding;
6464      EMACS_INT pos;
6465 {
6466   int *charbuf = coding->charbuf;
6467   int *charbuf_end = charbuf + coding->charbuf_used;
6468
6469   if (NILP (coding->dst_object))
6470     return;
6471
6472   while (charbuf < charbuf_end)
6473     {
6474       if (*charbuf >= 0)
6475         pos += *charbuf++;
6476       else
6477         {
6478           int len = -*charbuf;
6479           switch (charbuf[1])
6480             {
6481             case CODING_ANNOTATE_COMPOSITION_MASK:
6482               produce_composition (coding, charbuf, pos);
6483               break;
6484             case CODING_ANNOTATE_CHARSET_MASK:
6485               produce_charset (coding, charbuf, pos);
6486               break;
6487             default:
6488               abort ();
6489             }
6490           charbuf += len;
6491         }
6492     }
6493 }
6494
6495 /* Decode the data at CODING->src_object into CODING->dst_object.
6496    CODING->src_object is a buffer, a string, or nil.
6497    CODING->dst_object is a buffer.
6498
6499    If CODING->src_object is a buffer, it must be the current buffer.
6500    In this case, if CODING->src_pos is positive, it is a position of
6501    the source text in the buffer, otherwise, the source text is in the
6502    gap area of the buffer, and CODING->src_pos specifies the offset of
6503    the text from GPT (which must be the same as PT).  If this is the
6504    same buffer as CODING->dst_object, CODING->src_pos must be
6505    negative.
6506
6507    If CODING->src_object is a string, CODING->src_pos is an index to
6508    that string.
6509
6510    If CODING->src_object is nil, CODING->source must already point to
6511    the non-relocatable memory area.  In this case, CODING->src_pos is
6512    an offset from CODING->source.
6513
6514    The decoded data is inserted at the current point of the buffer
6515    CODING->dst_object.
6516 */
6517
6518 static int
6519 decode_coding (coding)
6520      struct coding_system *coding;
6521 {
6522   Lisp_Object attrs;
6523   Lisp_Object undo_list;
6524   Lisp_Object translation_table;
6525   int carryover;
6526   int i;
6527
6528   if (BUFFERP (coding->src_object)
6529       && coding->src_pos > 0
6530       && coding->src_pos < GPT
6531       && coding->src_pos + coding->src_chars > GPT)
6532     move_gap_both (coding->src_pos, coding->src_pos_byte);
6533
6534   undo_list = Qt;
6535   if (BUFFERP (coding->dst_object))
6536     {
6537       if (current_buffer != XBUFFER (coding->dst_object))
6538         set_buffer_internal (XBUFFER (coding->dst_object));
6539       if (GPT != PT)
6540         move_gap_both (PT, PT_BYTE);
6541       undo_list = current_buffer->undo_list;
6542       current_buffer->undo_list = Qt;
6543     }
6544
6545   coding->consumed = coding->consumed_char = 0;
6546   coding->produced = coding->produced_char = 0;
6547   coding->chars_at_source = 0;
6548   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6549   coding->errors = 0;
6550
6551   ALLOC_CONVERSION_WORK_AREA (coding);
6552
6553   attrs = CODING_ID_ATTRS (coding->id);
6554   translation_table = get_translation_table (attrs, 0, NULL);
6555
6556   carryover = 0;
6557   do
6558     {
6559       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6560
6561       coding_set_source (coding);
6562       coding->annotated = 0;
6563       coding->charbuf_used = carryover;
6564       (*(coding->decoder)) (coding);
6565       coding_set_destination (coding);
6566       carryover = produce_chars (coding, translation_table, 0);
6567       if (coding->annotated)
6568         produce_annotation (coding, pos);
6569       for (i = 0; i < carryover; i++)
6570         coding->charbuf[i]
6571           = coding->charbuf[coding->charbuf_used - carryover + i];
6572     }
6573   while (coding->consumed < coding->src_bytes
6574          && (coding->result == CODING_RESULT_SUCCESS
6575              || coding->result == CODING_RESULT_INVALID_SRC));
6576
6577   if (carryover > 0)
6578     {
6579       coding_set_destination (coding);
6580       coding->charbuf_used = carryover;
6581       produce_chars (coding, translation_table, 1);
6582     }
6583
6584   coding->carryover_bytes = 0;
6585   if (coding->consumed < coding->src_bytes)
6586     {
6587       int nbytes = coding->src_bytes - coding->consumed;
6588       const unsigned char *src;
6589
6590       coding_set_source (coding);
6591       coding_set_destination (coding);
6592       src = coding->source + coding->consumed;
6593
6594       if (coding->mode & CODING_MODE_LAST_BLOCK)
6595         {
6596           /* Flush out unprocessed data as binary chars.  We are sure
6597              that the number of data is less than the size of
6598              coding->charbuf.  */
6599           coding->charbuf_used = 0;
6600           while (nbytes-- > 0)
6601             {
6602               int c = *src++;
6603
6604               if (c & 0x80)
6605                 c = BYTE8_TO_CHAR (c);
6606               coding->charbuf[coding->charbuf_used++] = c;
6607             }
6608           produce_chars (coding, Qnil, 1);
6609         }
6610       else
6611         {
6612           /* Record unprocessed bytes in coding->carryover.  We are
6613              sure that the number of data is less than the size of
6614              coding->carryover.  */
6615           unsigned char *p = coding->carryover;
6616
6617           coding->carryover_bytes = nbytes;
6618           while (nbytes-- > 0)
6619             *p++ = *src++;
6620         }
6621       coding->consumed = coding->src_bytes;
6622     }
6623
6624   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6625     decode_eol (coding);
6626   if (BUFFERP (coding->dst_object))
6627     {
6628       current_buffer->undo_list = undo_list;
6629       record_insert (coding->dst_pos, coding->produced_char);
6630     }
6631   return coding->result;
6632 }
6633
6634
6635 /* Extract an annotation datum from a composition starting at POS and
6636    ending before LIMIT of CODING->src_object (buffer or string), store
6637    the data in BUF, set *STOP to a starting position of the next
6638    composition (if any) or to LIMIT, and return the address of the
6639    next element of BUF.
6640
6641    If such an annotation is not found, set *STOP to a starting
6642    position of a composition after POS (if any) or to LIMIT, and
6643    return BUF.  */
6644
6645 static INLINE int *
6646 handle_composition_annotation (pos, limit, coding, buf, stop)
6647      EMACS_INT pos, limit;
6648      struct coding_system *coding;
6649      int *buf;
6650      EMACS_INT *stop;
6651 {
6652   EMACS_INT start, end;
6653   Lisp_Object prop;
6654
6655   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6656       || end > limit)
6657     *stop = limit;
6658   else if (start > pos)
6659     *stop = start;
6660   else
6661     {
6662       if (start == pos)
6663         {
6664           /* We found a composition.  Store the corresponding
6665              annotation data in BUF.  */
6666           int *head = buf;
6667           enum composition_method method = COMPOSITION_METHOD (prop);
6668           int nchars = COMPOSITION_LENGTH (prop);
6669
6670           ADD_COMPOSITION_DATA (buf, nchars, method);
6671           if (method != COMPOSITION_RELATIVE)
6672             {
6673               Lisp_Object components;
6674               int len, i, i_byte;
6675
6676               components = COMPOSITION_COMPONENTS (prop);
6677               if (VECTORP (components))
6678                 {
6679                   len = XVECTOR (components)->size;
6680                   for (i = 0; i < len; i++)
6681                     *buf++ = XINT (AREF (components, i));
6682                 }
6683               else if (STRINGP (components))
6684                 {
6685                   len = SCHARS (components);
6686                   i = i_byte = 0;
6687                   while (i < len)
6688                     {
6689                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6690                       buf++;
6691                     }
6692                 }
6693               else if (INTEGERP (components))
6694                 {
6695                   len = 1;
6696                   *buf++ = XINT (components);
6697                 }
6698               else if (CONSP (components))
6699                 {
6700                   for (len = 0; CONSP (components);
6701                        len++, components = XCDR (components))
6702                     *buf++ = XINT (XCAR (components));
6703                 }
6704               else
6705                 abort ();
6706               *head -= len;
6707             }
6708         }
6709
6710       if (find_composition (end, limit, &start, &end, &prop,
6711                             coding->src_object)
6712           && end <= limit)
6713         *stop = start;
6714       else
6715         *stop = limit;
6716     }
6717   return buf;
6718 }
6719
6720
6721 /* Extract an annotation datum from a text property `charset' at POS of
6722    CODING->src_object (buffer of string), store the data in BUF, set
6723    *STOP to the position where the value of `charset' property changes
6724    (limiting by LIMIT), and return the address of the next element of
6725    BUF.
6726
6727    If the property value is nil, set *STOP to the position where the
6728    property value is non-nil (limiting by LIMIT), and return BUF.  */
6729
6730 static INLINE int *
6731 handle_charset_annotation (pos, limit, coding, buf, stop)
6732      EMACS_INT pos, limit;
6733      struct coding_system *coding;
6734      int *buf;
6735      EMACS_INT *stop;
6736 {
6737   Lisp_Object val, next;
6738   int id;
6739
6740   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6741   if (! NILP (val) && CHARSETP (val))
6742     id = XINT (CHARSET_SYMBOL_ID (val));
6743   else
6744     id = -1;
6745   ADD_CHARSET_DATA (buf, 0, id);
6746   next = Fnext_single_property_change (make_number (pos), Qcharset,
6747                                        coding->src_object,
6748                                        make_number (limit));
6749   *stop = XINT (next);
6750   return buf;
6751 }
6752
6753
6754 static void
6755 consume_chars (coding, translation_table, max_lookup)
6756      struct coding_system *coding;
6757      Lisp_Object translation_table;
6758      int max_lookup;
6759 {
6760   int *buf = coding->charbuf;
6761   int *buf_end = coding->charbuf + coding->charbuf_size;
6762   const unsigned char *src = coding->source + coding->consumed;
6763   const unsigned char *src_end = coding->source + coding->src_bytes;
6764   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6765   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6766   int multibytep = coding->src_multibyte;
6767   Lisp_Object eol_type;
6768   int c;
6769   EMACS_INT stop, stop_composition, stop_charset;
6770   int *lookup_buf = NULL;
6771
6772   if (! NILP (translation_table))
6773     lookup_buf = alloca (sizeof (int) * max_lookup);
6774
6775   eol_type = CODING_ID_EOL_TYPE (coding->id);
6776   if (VECTORP (eol_type))
6777     eol_type = Qunix;
6778
6779   /* Note: composition handling is not yet implemented.  */
6780   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6781
6782   if (NILP (coding->src_object))
6783     stop = stop_composition = stop_charset = end_pos;
6784   else
6785     {
6786       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6787         stop = stop_composition = pos;
6788       else
6789         stop = stop_composition = end_pos;
6790       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6791         stop = stop_charset = pos;
6792       else
6793         stop_charset = end_pos;
6794     }
6795
6796   /* Compensate for CRLF and conversion.  */
6797   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6798   while (buf < buf_end)
6799     {
6800       Lisp_Object trans;
6801
6802       if (pos == stop)
6803         {
6804           if (pos == end_pos)
6805             break;
6806           if (pos == stop_composition)
6807             buf = handle_composition_annotation (pos, end_pos, coding,
6808                                                  buf, &stop_composition);
6809           if (pos == stop_charset)
6810             buf = handle_charset_annotation (pos, end_pos, coding,
6811                                              buf, &stop_charset);
6812           stop = (stop_composition < stop_charset
6813                   ? stop_composition : stop_charset);
6814         }
6815
6816       if (! multibytep)
6817         {
6818           EMACS_INT bytes;
6819
6820           if (coding->encoder == encode_coding_raw_text)
6821             c = *src++, pos++;
6822           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6823             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6824           else
6825             c = BYTE8_TO_CHAR (*src), src++, pos++;
6826         }
6827       else
6828         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6829       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6830         c = '\n';
6831       if (! EQ (eol_type, Qunix))
6832         {
6833           if (c == '\n')
6834             {
6835               if (EQ (eol_type, Qdos))
6836                 *buf++ = '\r';
6837               else
6838                 c = '\r';
6839             }
6840         }
6841
6842       trans = Qnil;
6843       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6844       if (NILP (trans))
6845         *buf++ = c;
6846       else
6847         {
6848           int from_nchars = 1, to_nchars = 1;
6849           int *lookup_buf_end;
6850           const unsigned char *p = src;
6851           int i;
6852
6853           lookup_buf[0] = c;
6854           for (i = 1; i < max_lookup && p < src_end; i++)
6855             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6856           lookup_buf_end = lookup_buf + i;
6857           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6858                                    &from_nchars, &to_nchars);
6859           if (EQ (trans, Qt)
6860               || buf + to_nchars > buf_end)
6861             break;
6862           *buf++ = *lookup_buf;
6863           for (i = 1; i < to_nchars; i++)
6864             *buf++ = XINT (AREF (trans, i));
6865           for (i = 1; i < from_nchars; i++, pos++)
6866             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6867         }
6868     }
6869
6870   coding->consumed = src - coding->source;
6871   coding->consumed_char = pos - coding->src_pos;
6872   coding->charbuf_used = buf - coding->charbuf;
6873   coding->chars_at_source = 0;
6874 }
6875
6876
6877 /* Encode the text at CODING->src_object into CODING->dst_object.
6878    CODING->src_object is a buffer or a string.
6879    CODING->dst_object is a buffer or nil.
6880
6881    If CODING->src_object is a buffer, it must be the current buffer.
6882    In this case, if CODING->src_pos is positive, it is a position of
6883    the source text in the buffer, otherwise. the source text is in the
6884    gap area of the buffer, and coding->src_pos specifies the offset of
6885    the text from GPT (which must be the same as PT).  If this is the
6886    same buffer as CODING->dst_object, CODING->src_pos must be
6887    negative and CODING should not have `pre-write-conversion'.
6888
6889    If CODING->src_object is a string, CODING should not have
6890    `pre-write-conversion'.
6891
6892    If CODING->dst_object is a buffer, the encoded data is inserted at
6893    the current point of that buffer.
6894
6895    If CODING->dst_object is nil, the encoded data is placed at the
6896    memory area specified by CODING->destination.  */
6897
6898 static int
6899 encode_coding (coding)
6900      struct coding_system *coding;
6901 {
6902   Lisp_Object attrs;
6903   Lisp_Object translation_table;
6904   int max_lookup;
6905
6906   attrs = CODING_ID_ATTRS (coding->id);
6907   if (coding->encoder == encode_coding_raw_text)
6908     translation_table = Qnil, max_lookup = 0;
6909   else
6910     translation_table = get_translation_table (attrs, 1, &max_lookup);
6911
6912   if (BUFFERP (coding->dst_object))
6913     {
6914       set_buffer_internal (XBUFFER (coding->dst_object));
6915       coding->dst_multibyte
6916         = ! NILP (current_buffer->enable_multibyte_characters);
6917     }
6918
6919   coding->consumed = coding->consumed_char = 0;
6920   coding->produced = coding->produced_char = 0;
6921   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6922   coding->errors = 0;
6923
6924   ALLOC_CONVERSION_WORK_AREA (coding);
6925
6926   do {
6927     coding_set_source (coding);
6928     consume_chars (coding, translation_table, max_lookup);
6929     coding_set_destination (coding);
6930     (*(coding->encoder)) (coding);
6931   } while (coding->consumed_char < coding->src_chars);
6932
6933   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
6934     insert_from_gap (coding->produced_char, coding->produced);
6935
6936   return (coding->result);
6937 }
6938
6939
6940 /* Name (or base name) of work buffer for code conversion.  */
6941 static Lisp_Object Vcode_conversion_workbuf_name;
6942
6943 /* A working buffer used by the top level conversion.  Once it is
6944    created, it is never destroyed.  It has the name
6945    Vcode_conversion_workbuf_name.  The other working buffers are
6946    destroyed after the use is finished, and their names are modified
6947    versions of Vcode_conversion_workbuf_name.  */
6948 static Lisp_Object Vcode_conversion_reused_workbuf;
6949
6950 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6951 static int reused_workbuf_in_use;
6952
6953
6954 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6955    multibyteness of returning buffer.  */
6956
6957 static Lisp_Object
6958 make_conversion_work_buffer (multibyte)
6959      int multibyte;
6960 {
6961   Lisp_Object name, workbuf;
6962   struct buffer *current;
6963
6964   if (reused_workbuf_in_use++)
6965     {
6966       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6967       workbuf = Fget_buffer_create (name);
6968     }
6969   else
6970     {
6971       name = Vcode_conversion_workbuf_name;
6972       workbuf = Fget_buffer_create (name);
6973       if (NILP (Vcode_conversion_reused_workbuf))
6974         Vcode_conversion_reused_workbuf = workbuf;
6975     }
6976   current = current_buffer;
6977   set_buffer_internal (XBUFFER (workbuf));
6978   Ferase_buffer ();
6979   current_buffer->undo_list = Qt;
6980   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6981   set_buffer_internal (current);
6982   return workbuf;
6983 }
6984
6985
6986 static Lisp_Object
6987 code_conversion_restore (arg)
6988      Lisp_Object arg;
6989 {
6990   Lisp_Object current, workbuf;
6991   struct gcpro gcpro1;
6992
6993   GCPRO1 (arg);
6994   current = XCAR (arg);
6995   workbuf = XCDR (arg);
6996   if (! NILP (workbuf))
6997     {
6998       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6999         reused_workbuf_in_use = 0;
7000       else if (! NILP (Fbuffer_live_p (workbuf)))
7001         Fkill_buffer (workbuf);
7002     }
7003   set_buffer_internal (XBUFFER (current));
7004   UNGCPRO;
7005   return Qnil;
7006 }
7007
7008 Lisp_Object
7009 code_conversion_save (with_work_buf, multibyte)
7010      int with_work_buf, multibyte;
7011 {
7012   Lisp_Object workbuf = Qnil;
7013
7014   if (with_work_buf)
7015     workbuf = make_conversion_work_buffer (multibyte);
7016   record_unwind_protect (code_conversion_restore,
7017                          Fcons (Fcurrent_buffer (), workbuf));
7018   return workbuf;
7019 }
7020
7021 int
7022 decode_coding_gap (coding, chars, bytes)
7023      struct coding_system *coding;
7024      EMACS_INT chars, bytes;
7025 {
7026   int count = specpdl_ptr - specpdl;
7027   Lisp_Object attrs;
7028
7029   code_conversion_save (0, 0);
7030
7031   coding->src_object = Fcurrent_buffer ();
7032   coding->src_chars = chars;
7033   coding->src_bytes = bytes;
7034   coding->src_pos = -chars;
7035   coding->src_pos_byte = -bytes;
7036   coding->src_multibyte = chars < bytes;
7037   coding->dst_object = coding->src_object;
7038   coding->dst_pos = PT;
7039   coding->dst_pos_byte = PT_BYTE;
7040   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7041
7042   if (CODING_REQUIRE_DETECTION (coding))
7043     detect_coding (coding);
7044
7045   coding->mode |= CODING_MODE_LAST_BLOCK;
7046   current_buffer->text->inhibit_shrinking = 1;
7047   decode_coding (coding);
7048   current_buffer->text->inhibit_shrinking = 0;
7049
7050   attrs = CODING_ID_ATTRS (coding->id);
7051   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7052     {
7053       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7054       Lisp_Object val;
7055
7056       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7057       val = call1 (CODING_ATTR_POST_READ (attrs),
7058                    make_number (coding->produced_char));
7059       CHECK_NATNUM (val);
7060       coding->produced_char += Z - prev_Z;
7061       coding->produced += Z_BYTE - prev_Z_BYTE;
7062     }
7063
7064   unbind_to (count, Qnil);
7065   return coding->result;
7066 }
7067
7068 int
7069 encode_coding_gap (coding, chars, bytes)
7070      struct coding_system *coding;
7071      EMACS_INT chars, bytes;
7072 {
7073   int count = specpdl_ptr - specpdl;
7074
7075   code_conversion_save (0, 0);
7076
7077   coding->src_object = Fcurrent_buffer ();
7078   coding->src_chars = chars;
7079   coding->src_bytes = bytes;
7080   coding->src_pos = -chars;
7081   coding->src_pos_byte = -bytes;
7082   coding->src_multibyte = chars < bytes;
7083   coding->dst_object = coding->src_object;
7084   coding->dst_pos = PT;
7085   coding->dst_pos_byte = PT_BYTE;
7086
7087   encode_coding (coding);
7088
7089   unbind_to (count, Qnil);
7090   return coding->result;
7091 }
7092
7093
7094 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7095    SRC_OBJECT into DST_OBJECT by coding context CODING.
7096
7097    SRC_OBJECT is a buffer, a string, or Qnil.
7098
7099    If it is a buffer, the text is at point of the buffer.  FROM and TO
7100    are positions in the buffer.
7101
7102    If it is a string, the text is at the beginning of the string.
7103    FROM and TO are indices to the string.
7104
7105    If it is nil, the text is at coding->source.  FROM and TO are
7106    indices to coding->source.
7107
7108    DST_OBJECT is a buffer, Qt, or Qnil.
7109
7110    If it is a buffer, the decoded text is inserted at point of the
7111    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7112    is deleted.
7113
7114    If it is Qt, a string is made from the decoded text, and
7115    set in CODING->dst_object.
7116
7117    If it is Qnil, the decoded text is stored at CODING->destination.
7118    The caller must allocate CODING->dst_bytes bytes at
7119    CODING->destination by xmalloc.  If the decoded text is longer than
7120    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7121  */
7122
7123 void
7124 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7125                       dst_object)
7126      struct coding_system *coding;
7127      Lisp_Object src_object;
7128      EMACS_INT from, from_byte, to, to_byte;
7129      Lisp_Object dst_object;
7130 {
7131   int count = specpdl_ptr - specpdl;
7132   unsigned char *destination;
7133   EMACS_INT dst_bytes;
7134   EMACS_INT chars = to - from;
7135   EMACS_INT bytes = to_byte - from_byte;
7136   Lisp_Object attrs;
7137   int saved_pt = -1, saved_pt_byte;
7138   int need_marker_adjustment = 0;
7139   Lisp_Object old_deactivate_mark;
7140
7141   old_deactivate_mark = Vdeactivate_mark;
7142
7143   if (NILP (dst_object))
7144     {
7145       destination = coding->destination;
7146       dst_bytes = coding->dst_bytes;
7147     }
7148
7149   coding->src_object = src_object;
7150   coding->src_chars = chars;
7151   coding->src_bytes = bytes;
7152   coding->src_multibyte = chars < bytes;
7153
7154   if (STRINGP (src_object))
7155     {
7156       coding->src_pos = from;
7157       coding->src_pos_byte = from_byte;
7158     }
7159   else if (BUFFERP (src_object))
7160     {
7161       set_buffer_internal (XBUFFER (src_object));
7162       if (from != GPT)
7163         move_gap_both (from, from_byte);
7164       if (EQ (src_object, dst_object))
7165         {
7166           struct Lisp_Marker *tail;
7167
7168           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7169             {
7170               tail->need_adjustment
7171                 = tail->charpos == (tail->insertion_type ? from : to);
7172               need_marker_adjustment |= tail->need_adjustment;
7173             }
7174           saved_pt = PT, saved_pt_byte = PT_BYTE;
7175           TEMP_SET_PT_BOTH (from, from_byte);
7176           current_buffer->text->inhibit_shrinking = 1;
7177           del_range_both (from, from_byte, to, to_byte, 1);
7178           coding->src_pos = -chars;
7179           coding->src_pos_byte = -bytes;
7180         }
7181       else
7182         {
7183           coding->src_pos = from;
7184           coding->src_pos_byte = from_byte;
7185         }
7186     }
7187
7188   if (CODING_REQUIRE_DETECTION (coding))
7189     detect_coding (coding);
7190   attrs = CODING_ID_ATTRS (coding->id);
7191
7192   if (EQ (dst_object, Qt)
7193       || (! NILP (CODING_ATTR_POST_READ (attrs))
7194           && NILP (dst_object)))
7195     {
7196       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7197       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7198       coding->dst_pos = BEG;
7199       coding->dst_pos_byte = BEG_BYTE;
7200     }
7201   else if (BUFFERP (dst_object))
7202     {
7203       code_conversion_save (0, 0);
7204       coding->dst_object = dst_object;
7205       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7206       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7207       coding->dst_multibyte
7208         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7209     }
7210   else
7211     {
7212       code_conversion_save (0, 0);
7213       coding->dst_object = Qnil;
7214       /* Most callers presume this will return a multibyte result, and they
7215          won't use `binary' or `raw-text' anyway, so let's not worry about
7216          CODING_FOR_UNIBYTE.  */
7217       coding->dst_multibyte = 1;
7218     }
7219
7220   decode_coding (coding);
7221
7222   if (BUFFERP (coding->dst_object))
7223     set_buffer_internal (XBUFFER (coding->dst_object));
7224
7225   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7226     {
7227       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7228       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7229       Lisp_Object val;
7230
7231       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7232       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7233               old_deactivate_mark);
7234       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7235                         make_number (coding->produced_char));
7236       UNGCPRO;
7237       CHECK_NATNUM (val);
7238       coding->produced_char += Z - prev_Z;
7239       coding->produced += Z_BYTE - prev_Z_BYTE;
7240     }
7241
7242   if (EQ (dst_object, Qt))
7243     {
7244       coding->dst_object = Fbuffer_string ();
7245     }
7246   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7247     {
7248       set_buffer_internal (XBUFFER (coding->dst_object));
7249       if (dst_bytes < coding->produced)
7250         {
7251           destination = xrealloc (destination, coding->produced);
7252           if (! destination)
7253             {
7254               record_conversion_result (coding,
7255                                         CODING_RESULT_INSUFFICIENT_DST);
7256               unbind_to (count, Qnil);
7257               return;
7258             }
7259           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7260             move_gap_both (BEGV, BEGV_BYTE);
7261           bcopy (BEGV_ADDR, destination, coding->produced);
7262           coding->destination = destination;
7263         }
7264     }
7265
7266   if (saved_pt >= 0)
7267     {
7268       /* This is the case of:
7269          (BUFFERP (src_object) && EQ (src_object, dst_object))
7270          As we have moved PT while replacing the original buffer
7271          contents, we must recover it now.  */
7272       set_buffer_internal (XBUFFER (src_object));
7273       current_buffer->text->inhibit_shrinking = 0;
7274       if (saved_pt < from)
7275         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7276       else if (saved_pt < from + chars)
7277         TEMP_SET_PT_BOTH (from, from_byte);
7278       else if (! NILP (current_buffer->enable_multibyte_characters))
7279         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7280                           saved_pt_byte + (coding->produced - bytes));
7281       else
7282         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7283                           saved_pt_byte + (coding->produced - bytes));
7284
7285       if (need_marker_adjustment)
7286         {
7287           struct Lisp_Marker *tail;
7288
7289           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7290             if (tail->need_adjustment)
7291               {
7292                 tail->need_adjustment = 0;
7293                 if (tail->insertion_type)
7294                   {
7295                     tail->bytepos = from_byte;
7296                     tail->charpos = from;
7297                   }
7298                 else
7299                   {
7300                     tail->bytepos = from_byte + coding->produced;
7301                     tail->charpos
7302                       = (NILP (current_buffer->enable_multibyte_characters)
7303                          ? tail->bytepos : from + coding->produced_char);
7304                   }
7305               }
7306         }
7307     }
7308
7309   Vdeactivate_mark = old_deactivate_mark;
7310   unbind_to (count, coding->dst_object);
7311 }
7312
7313
7314 void
7315 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7316                       dst_object)
7317      struct coding_system *coding;
7318      Lisp_Object src_object;
7319      EMACS_INT from, from_byte, to, to_byte;
7320      Lisp_Object dst_object;
7321 {
7322   int count = specpdl_ptr - specpdl;
7323   EMACS_INT chars = to - from;
7324   EMACS_INT bytes = to_byte - from_byte;
7325   Lisp_Object attrs;
7326   int saved_pt = -1, saved_pt_byte;
7327   int need_marker_adjustment = 0;
7328   int kill_src_buffer = 0;
7329   Lisp_Object old_deactivate_mark;
7330
7331   old_deactivate_mark = Vdeactivate_mark;
7332
7333   coding->src_object = src_object;
7334   coding->src_chars = chars;
7335   coding->src_bytes = bytes;
7336   coding->src_multibyte = chars < bytes;
7337
7338   attrs = CODING_ID_ATTRS (coding->id);
7339
7340   if (EQ (src_object, dst_object))
7341     {
7342       struct Lisp_Marker *tail;
7343
7344       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7345         {
7346           tail->need_adjustment
7347             = tail->charpos == (tail->insertion_type ? from : to);
7348           need_marker_adjustment |= tail->need_adjustment;
7349         }
7350     }
7351
7352   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7353     {
7354       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7355       set_buffer_internal (XBUFFER (coding->src_object));
7356       if (STRINGP (src_object))
7357         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7358       else if (BUFFERP (src_object))
7359         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7360       else
7361         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7362
7363       if (EQ (src_object, dst_object))
7364         {
7365           set_buffer_internal (XBUFFER (src_object));
7366           saved_pt = PT, saved_pt_byte = PT_BYTE;
7367           del_range_both (from, from_byte, to, to_byte, 1);
7368           set_buffer_internal (XBUFFER (coding->src_object));
7369         }
7370
7371       {
7372         Lisp_Object args[3];
7373         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7374
7375         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7376                 old_deactivate_mark);
7377         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7378         args[1] = make_number (BEG);
7379         args[2] = make_number (Z);
7380         safe_call (3, args);
7381         UNGCPRO;
7382       }
7383       if (XBUFFER (coding->src_object) != current_buffer)
7384         kill_src_buffer = 1;
7385       coding->src_object = Fcurrent_buffer ();
7386       if (BEG != GPT)
7387         move_gap_both (BEG, BEG_BYTE);
7388       coding->src_chars = Z - BEG;
7389       coding->src_bytes = Z_BYTE - BEG_BYTE;
7390       coding->src_pos = BEG;
7391       coding->src_pos_byte = BEG_BYTE;
7392       coding->src_multibyte = Z < Z_BYTE;
7393     }
7394   else if (STRINGP (src_object))
7395     {
7396       code_conversion_save (0, 0);
7397       coding->src_pos = from;
7398       coding->src_pos_byte = from_byte;
7399     }
7400   else if (BUFFERP (src_object))
7401     {
7402       code_conversion_save (0, 0);
7403       set_buffer_internal (XBUFFER (src_object));
7404       if (EQ (src_object, dst_object))
7405         {
7406           saved_pt = PT, saved_pt_byte = PT_BYTE;
7407           coding->src_object = del_range_1 (from, to, 1, 1);
7408           coding->src_pos = 0;
7409           coding->src_pos_byte = 0;
7410         }
7411       else
7412         {
7413           if (from < GPT && to >= GPT)
7414             move_gap_both (from, from_byte);
7415           coding->src_pos = from;
7416           coding->src_pos_byte = from_byte;
7417         }
7418     }
7419   else
7420     code_conversion_save (0, 0);
7421
7422   if (BUFFERP (dst_object))
7423     {
7424       coding->dst_object = dst_object;
7425       if (EQ (src_object, dst_object))
7426         {
7427           coding->dst_pos = from;
7428           coding->dst_pos_byte = from_byte;
7429         }
7430       else
7431         {
7432           struct buffer *current = current_buffer;
7433
7434           set_buffer_temp (XBUFFER (dst_object));
7435           coding->dst_pos = PT;
7436           coding->dst_pos_byte = PT_BYTE;
7437           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7438           set_buffer_temp (current);
7439         }
7440       coding->dst_multibyte
7441         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7442     }
7443   else if (EQ (dst_object, Qt))
7444     {
7445       coding->dst_object = Qnil;
7446       coding->dst_bytes = coding->src_chars;
7447       if (coding->dst_bytes == 0)
7448         coding->dst_bytes = 1;
7449       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7450       coding->dst_multibyte = 0;
7451     }
7452   else
7453     {
7454       coding->dst_object = Qnil;
7455       coding->dst_multibyte = 0;
7456     }
7457
7458   encode_coding (coding);
7459
7460   if (EQ (dst_object, Qt))
7461     {
7462       if (BUFFERP (coding->dst_object))
7463         coding->dst_object = Fbuffer_string ();
7464       else
7465         {
7466           coding->dst_object
7467             = make_unibyte_string ((char *) coding->destination,
7468                                    coding->produced);
7469           xfree (coding->destination);
7470         }
7471     }
7472
7473   if (saved_pt >= 0)
7474     {
7475       /* This is the case of:
7476          (BUFFERP (src_object) && EQ (src_object, dst_object))
7477          As we have moved PT while replacing the original buffer
7478          contents, we must recover it now.  */
7479       set_buffer_internal (XBUFFER (src_object));
7480       if (saved_pt < from)
7481         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7482       else if (saved_pt < from + chars)
7483         TEMP_SET_PT_BOTH (from, from_byte);
7484       else if (! NILP (current_buffer->enable_multibyte_characters))
7485         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7486                           saved_pt_byte + (coding->produced - bytes));
7487       else
7488         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7489                           saved_pt_byte + (coding->produced - bytes));
7490
7491       if (need_marker_adjustment)
7492         {
7493           struct Lisp_Marker *tail;
7494
7495           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7496             if (tail->need_adjustment)
7497               {
7498                 tail->need_adjustment = 0;
7499                 if (tail->insertion_type)
7500                   {
7501                     tail->bytepos = from_byte;
7502                     tail->charpos = from;
7503                   }
7504                 else
7505                   {
7506                     tail->bytepos = from_byte + coding->produced;
7507                     tail->charpos
7508                       = (NILP (current_buffer->enable_multibyte_characters)
7509                          ? tail->bytepos : from + coding->produced_char);
7510                   }
7511               }
7512         }
7513     }
7514
7515   if (kill_src_buffer)
7516     Fkill_buffer (coding->src_object);
7517
7518   Vdeactivate_mark = old_deactivate_mark;
7519   unbind_to (count, Qnil);
7520 }
7521
7522
7523 Lisp_Object
7524 preferred_coding_system ()
7525 {
7526   int id = coding_categories[coding_priorities[0]].id;
7527
7528   return CODING_ID_NAME (id);
7529 }
7530
7531 \f
7532 #ifdef emacs
7533 /*** 8. Emacs Lisp library functions ***/
7534
7535 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7536        doc: /* Return t if OBJECT is nil or a coding-system.
7537 See the documentation of `define-coding-system' for information
7538 about coding-system objects.  */)
7539      (object)
7540      Lisp_Object object;
7541 {
7542   if (NILP (object)
7543       || CODING_SYSTEM_ID (object) >= 0)
7544     return Qt;
7545   if (! SYMBOLP (object)
7546       || NILP (Fget (object, Qcoding_system_define_form)))
7547     return Qnil;
7548   return Qt;
7549 }
7550
7551 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7552        Sread_non_nil_coding_system, 1, 1, 0,
7553        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7554      (prompt)
7555      Lisp_Object prompt;
7556 {
7557   Lisp_Object val;
7558   do
7559     {
7560       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7561                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7562     }
7563   while (SCHARS (val) == 0);
7564   return (Fintern (val, Qnil));
7565 }
7566
7567 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7568        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7569 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7570 Ignores case when completing coding systems (all Emacs coding systems
7571 are lower-case).  */)
7572      (prompt, default_coding_system)
7573      Lisp_Object prompt, default_coding_system;
7574 {
7575   Lisp_Object val;
7576   int count = SPECPDL_INDEX ();
7577
7578   if (SYMBOLP (default_coding_system))
7579     default_coding_system = SYMBOL_NAME (default_coding_system);
7580   specbind (Qcompletion_ignore_case, Qt);
7581   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7582                           Qt, Qnil, Qcoding_system_history,
7583                           default_coding_system, Qnil);
7584   unbind_to (count, Qnil);
7585   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7586 }
7587
7588 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7589        1, 1, 0,
7590        doc: /* Check validity of CODING-SYSTEM.
7591 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7592 It is valid if it is nil or a symbol defined as a coding system by the
7593 function `define-coding-system'.  */)
7594   (coding_system)
7595      Lisp_Object coding_system;
7596 {
7597   Lisp_Object define_form;
7598
7599   define_form = Fget (coding_system, Qcoding_system_define_form);
7600   if (! NILP (define_form))
7601     {
7602       Fput (coding_system, Qcoding_system_define_form, Qnil);
7603       safe_eval (define_form);
7604     }
7605   if (!NILP (Fcoding_system_p (coding_system)))
7606     return coding_system;
7607   xsignal1 (Qcoding_system_error, coding_system);
7608 }
7609
7610 \f
7611 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7612    HIGHEST is nonzero, return the coding system of the highest
7613    priority among the detected coding systems.  Otherwize return a
7614    list of detected coding systems sorted by their priorities.  If
7615    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7616    multibyte form but contains only ASCII and eight-bit chars.
7617    Otherwise, the bytes are raw bytes.
7618
7619    CODING-SYSTEM controls the detection as below:
7620
7621    If it is nil, detect both text-format and eol-format.  If the
7622    text-format part of CODING-SYSTEM is already specified
7623    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7624    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7625    detect only text-format.  */
7626
7627 Lisp_Object
7628 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7629                       coding_system)
7630      const unsigned char *src;
7631      EMACS_INT src_chars, src_bytes;
7632      int highest;
7633      int multibytep;
7634      Lisp_Object coding_system;
7635 {
7636   const unsigned char *src_end = src + src_bytes;
7637   Lisp_Object attrs, eol_type;
7638   Lisp_Object val;
7639   struct coding_system coding;
7640   int id;
7641   struct coding_detection_info detect_info;
7642   enum coding_category base_category;
7643   int null_byte_found = 0, eight_bit_found = 0;
7644
7645   if (NILP (coding_system))
7646     coding_system = Qundecided;
7647   setup_coding_system (coding_system, &coding);
7648   attrs = CODING_ID_ATTRS (coding.id);
7649   eol_type = CODING_ID_EOL_TYPE (coding.id);
7650   coding_system = CODING_ATTR_BASE_NAME (attrs);
7651
7652   coding.source = src;
7653   coding.src_chars = src_chars;
7654   coding.src_bytes = src_bytes;
7655   coding.src_multibyte = multibytep;
7656   coding.consumed = 0;
7657   coding.mode |= CODING_MODE_LAST_BLOCK;
7658
7659   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7660
7661   /* At first, detect text-format if necessary.  */
7662   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7663   if (base_category == coding_category_undecided)
7664     {
7665       enum coding_category category;
7666       struct coding_system *this;
7667       int c, i;
7668
7669       coding.head_ascii = -1;
7670       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7671       for (; src < src_end; src++)
7672         {
7673           c = *src;
7674           if (c & 0x80)
7675             {
7676               eight_bit_found = 1;
7677               if (coding.head_ascii < 0)
7678                 coding.head_ascii = src - coding.source;
7679               if (null_byte_found)
7680                 break;
7681             }
7682           if (c < 0x20)
7683             {
7684               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7685                   && ! inhibit_iso_escape_detection
7686                   && ! detect_info.checked)
7687                 {
7688                   if (coding.head_ascii < 0)
7689                     coding.head_ascii = src - coding.source;
7690                   if (detect_coding_iso_2022 (&coding, &detect_info))
7691                     {
7692                       /* We have scanned the whole data.  */
7693                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7694                         /* We didn't find an 8-bit code.  We may have
7695                            found a null-byte, but it's very rare that
7696                            a binary file confirm to ISO-2022.  */
7697                         src = src_end;
7698                       break;
7699                     }
7700                 }
7701               else if (! c)
7702                 {
7703                   null_byte_found = 1;
7704                   if (eight_bit_found)
7705                     break;
7706                 }
7707             }
7708         }
7709       if (coding.head_ascii < 0)
7710         coding.head_ascii = src - coding.source;
7711
7712       if (null_byte_found || eight_bit_found
7713           || coding.head_ascii < coding.src_bytes
7714           || detect_info.found)
7715         {
7716           if (coding.head_ascii == coding.src_bytes)
7717             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7718             for (i = 0; i < coding_category_raw_text; i++)
7719               {
7720                 category = coding_priorities[i];
7721                 this = coding_categories + category;
7722                 if (detect_info.found & (1 << category))
7723                   break;
7724               }
7725           else
7726             {
7727               if (null_byte_found)
7728                 {
7729                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
7730                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
7731                 }
7732               for (i = 0; i < coding_category_raw_text; i++)
7733                 {
7734                   category = coding_priorities[i];
7735                   this = coding_categories + category;
7736
7737                   if (this->id < 0)
7738                     {
7739                       /* No coding system of this category is defined.  */
7740                       detect_info.rejected |= (1 << category);
7741                     }
7742                   else if (category >= coding_category_raw_text)
7743                     continue;
7744                   else if (detect_info.checked & (1 << category))
7745                     {
7746                       if (highest
7747                           && (detect_info.found & (1 << category)))
7748                         break;
7749                     }
7750                   else if ((*(this->detector)) (&coding, &detect_info)
7751                            && highest
7752                            && (detect_info.found & (1 << category)))
7753                     {
7754                       if (category == coding_category_utf_16_auto)
7755                         {
7756                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7757                             category = coding_category_utf_16_le;
7758                           else
7759                             category = coding_category_utf_16_be;
7760                         }
7761                       break;
7762                     }
7763                 }
7764             }
7765         }
7766
7767       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
7768         {
7769           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7770           id = coding_categories[coding_category_raw_text].id;
7771           val = Fcons (make_number (id), Qnil);
7772         }
7773       else if (! detect_info.rejected && ! detect_info.found)
7774         {
7775           detect_info.found = CATEGORY_MASK_ANY;
7776           id = coding_categories[coding_category_undecided].id;
7777           val = Fcons (make_number (id), Qnil);
7778         }
7779       else if (highest)
7780         {
7781           if (detect_info.found)
7782             {
7783               detect_info.found = 1 << category;
7784               val = Fcons (make_number (this->id), Qnil);
7785             }
7786           else
7787             for (i = 0; i < coding_category_raw_text; i++)
7788               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7789                 {
7790                   detect_info.found = 1 << coding_priorities[i];
7791                   id = coding_categories[coding_priorities[i]].id;
7792                   val = Fcons (make_number (id), Qnil);
7793                   break;
7794                 }
7795         }
7796       else
7797         {
7798           int mask = detect_info.rejected | detect_info.found;
7799           int found = 0;
7800           val = Qnil;
7801
7802           for (i = coding_category_raw_text - 1; i >= 0; i--)
7803             {
7804               category = coding_priorities[i];
7805               if (! (mask & (1 << category)))
7806                 {
7807                   found |= 1 << category;
7808                   id = coding_categories[category].id;
7809                   if (id >= 0)
7810                     val = Fcons (make_number (id), val);
7811                 }
7812             }
7813           for (i = coding_category_raw_text - 1; i >= 0; i--)
7814             {
7815               category = coding_priorities[i];
7816               if (detect_info.found & (1 << category))
7817                 {
7818                   id = coding_categories[category].id;
7819                   val = Fcons (make_number (id), val);
7820                 }
7821             }
7822           detect_info.found |= found;
7823         }
7824     }
7825   else if (base_category == coding_category_utf_8_auto)
7826     {
7827       if (detect_coding_utf_8 (&coding, &detect_info))
7828         {
7829           struct coding_system *this;
7830
7831           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
7832             this = coding_categories + coding_category_utf_8_sig;
7833           else
7834             this = coding_categories + coding_category_utf_8_nosig;
7835           val = Fcons (make_number (this->id), Qnil);
7836         }
7837     }
7838   else if (base_category == coding_category_utf_16_auto)
7839     {
7840       if (detect_coding_utf_16 (&coding, &detect_info))
7841         {
7842           struct coding_system *this;
7843
7844           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7845             this = coding_categories + coding_category_utf_16_le;
7846           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7847             this = coding_categories + coding_category_utf_16_be;
7848           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7849             this = coding_categories + coding_category_utf_16_be_nosig;
7850           else
7851             this = coding_categories + coding_category_utf_16_le_nosig;
7852           val = Fcons (make_number (this->id), Qnil);
7853         }
7854     }
7855   else
7856     {
7857       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7858       val = Fcons (make_number (coding.id), Qnil);
7859     }
7860
7861   /* Then, detect eol-format if necessary.  */
7862   {
7863     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7864     Lisp_Object tail;
7865
7866     if (VECTORP (eol_type))
7867       {
7868         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7869           {
7870             if (null_byte_found)
7871               normal_eol = EOL_SEEN_LF;
7872             else
7873               normal_eol = detect_eol (coding.source, src_bytes,
7874                                        coding_category_raw_text);
7875           }
7876         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7877                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7878           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7879                                       coding_category_utf_16_be);
7880         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7881                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7882           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7883                                       coding_category_utf_16_le);
7884       }
7885     else
7886       {
7887         if (EQ (eol_type, Qunix))
7888           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7889         else if (EQ (eol_type, Qdos))
7890           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7891         else
7892           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7893       }
7894
7895     for (tail = val; CONSP (tail); tail = XCDR (tail))
7896       {
7897         enum coding_category category;
7898         int this_eol;
7899
7900         id = XINT (XCAR (tail));
7901         attrs = CODING_ID_ATTRS (id);
7902         category = XINT (CODING_ATTR_CATEGORY (attrs));
7903         eol_type = CODING_ID_EOL_TYPE (id);
7904         if (VECTORP (eol_type))
7905           {
7906             if (category == coding_category_utf_16_be
7907                 || category == coding_category_utf_16_be_nosig)
7908               this_eol = utf_16_be_eol;
7909             else if (category == coding_category_utf_16_le
7910                      || category == coding_category_utf_16_le_nosig)
7911               this_eol = utf_16_le_eol;
7912             else
7913               this_eol = normal_eol;
7914
7915             if (this_eol == EOL_SEEN_LF)
7916               XSETCAR (tail, AREF (eol_type, 0));
7917             else if (this_eol == EOL_SEEN_CRLF)
7918               XSETCAR (tail, AREF (eol_type, 1));
7919             else if (this_eol == EOL_SEEN_CR)
7920               XSETCAR (tail, AREF (eol_type, 2));
7921             else
7922               XSETCAR (tail, CODING_ID_NAME (id));
7923           }
7924         else
7925           XSETCAR (tail, CODING_ID_NAME (id));
7926       }
7927   }
7928
7929   return (highest ? XCAR (val) : val);
7930 }
7931
7932
7933 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7934        2, 3, 0,
7935        doc: /* Detect coding system of the text in the region between START and END.
7936 Return a list of possible coding systems ordered by priority.
7937
7938 If only ASCII characters are found (except for such ISO-2022 control
7939 characters as ESC), it returns a list of single element `undecided'
7940 or its subsidiary coding system according to a detected end-of-line
7941 format.
7942
7943 If optional argument HIGHEST is non-nil, return the coding system of
7944 highest priority.  */)
7945      (start, end, highest)
7946      Lisp_Object start, end, highest;
7947 {
7948   int from, to;
7949   int from_byte, to_byte;
7950
7951   CHECK_NUMBER_COERCE_MARKER (start);
7952   CHECK_NUMBER_COERCE_MARKER (end);
7953
7954   validate_region (&start, &end);
7955   from = XINT (start), to = XINT (end);
7956   from_byte = CHAR_TO_BYTE (from);
7957   to_byte = CHAR_TO_BYTE (to);
7958
7959   if (from < GPT && to >= GPT)
7960     move_gap_both (to, to_byte);
7961
7962   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7963                                to - from, to_byte - from_byte,
7964                                !NILP (highest),
7965                                !NILP (current_buffer
7966                                       ->enable_multibyte_characters),
7967                                Qnil);
7968 }
7969
7970 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7971        1, 2, 0,
7972        doc: /* Detect coding system of the text in STRING.
7973 Return a list of possible coding systems ordered by priority.
7974
7975 If only ASCII characters are found (except for such ISO-2022 control
7976 characters as ESC), it returns a list of single element `undecided'
7977 or its subsidiary coding system according to a detected end-of-line
7978 format.
7979
7980 If optional argument HIGHEST is non-nil, return the coding system of
7981 highest priority.  */)
7982      (string, highest)
7983      Lisp_Object string, highest;
7984 {
7985   CHECK_STRING (string);
7986
7987   return detect_coding_system (SDATA (string),
7988                                SCHARS (string), SBYTES (string),
7989                                !NILP (highest), STRING_MULTIBYTE (string),
7990                                Qnil);
7991 }
7992
7993
7994 static INLINE int
7995 char_encodable_p (c, attrs)
7996      int c;
7997      Lisp_Object attrs;
7998 {
7999   Lisp_Object tail;
8000   struct charset *charset;
8001   Lisp_Object translation_table;
8002
8003   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8004   if (! NILP (translation_table))
8005     c = translate_char (translation_table, c);
8006   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8007        CONSP (tail); tail = XCDR (tail))
8008     {
8009       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8010       if (CHAR_CHARSET_P (c, charset))
8011         break;
8012     }
8013   return (! NILP (tail));
8014 }
8015
8016
8017 /* Return a list of coding systems that safely encode the text between
8018    START and END.  If EXCLUDE is non-nil, it is a list of coding
8019    systems not to check.  The returned list doesn't contain any such
8020    coding systems.  In any case, if the text contains only ASCII or is
8021    unibyte, return t.  */
8022
8023 DEFUN ("find-coding-systems-region-internal",
8024        Ffind_coding_systems_region_internal,
8025        Sfind_coding_systems_region_internal, 2, 3, 0,
8026        doc: /* Internal use only.  */)
8027      (start, end, exclude)
8028      Lisp_Object start, end, exclude;
8029 {
8030   Lisp_Object coding_attrs_list, safe_codings;
8031   EMACS_INT start_byte, end_byte;
8032   const unsigned char *p, *pbeg, *pend;
8033   int c;
8034   Lisp_Object tail, elt;
8035
8036   if (STRINGP (start))
8037     {
8038       if (!STRING_MULTIBYTE (start)
8039           || SCHARS (start) == SBYTES (start))
8040         return Qt;
8041       start_byte = 0;
8042       end_byte = SBYTES (start);
8043     }
8044   else
8045     {
8046       CHECK_NUMBER_COERCE_MARKER (start);
8047       CHECK_NUMBER_COERCE_MARKER (end);
8048       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8049         args_out_of_range (start, end);
8050       if (NILP (current_buffer->enable_multibyte_characters))
8051         return Qt;
8052       start_byte = CHAR_TO_BYTE (XINT (start));
8053       end_byte = CHAR_TO_BYTE (XINT (end));
8054       if (XINT (end) - XINT (start) == end_byte - start_byte)
8055         return Qt;
8056
8057       if (XINT (start) < GPT && XINT (end) > GPT)
8058         {
8059           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8060             move_gap_both (XINT (start), start_byte);
8061           else
8062             move_gap_both (XINT (end), end_byte);
8063         }
8064     }
8065
8066   coding_attrs_list = Qnil;
8067   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8068     if (NILP (exclude)
8069         || NILP (Fmemq (XCAR (tail), exclude)))
8070       {
8071         Lisp_Object attrs;
8072
8073         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8074         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8075             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8076           {
8077             ASET (attrs, coding_attr_trans_tbl,
8078                   get_translation_table (attrs, 1, NULL));
8079             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8080           }
8081       }
8082
8083   if (STRINGP (start))
8084     p = pbeg = SDATA (start);
8085   else
8086     p = pbeg = BYTE_POS_ADDR (start_byte);
8087   pend = p + (end_byte - start_byte);
8088
8089   while (p < pend && ASCII_BYTE_P (*p)) p++;
8090   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8091
8092   while (p < pend)
8093     {
8094       if (ASCII_BYTE_P (*p))
8095         p++;
8096       else
8097         {
8098           c = STRING_CHAR_ADVANCE (p);
8099
8100           charset_map_loaded = 0;
8101           for (tail = coding_attrs_list; CONSP (tail);)
8102             {
8103               elt = XCAR (tail);
8104               if (NILP (elt))
8105                 tail = XCDR (tail);
8106               else if (char_encodable_p (c, elt))
8107                 tail = XCDR (tail);
8108               else if (CONSP (XCDR (tail)))
8109                 {
8110                   XSETCAR (tail, XCAR (XCDR (tail)));
8111                   XSETCDR (tail, XCDR (XCDR (tail)));
8112                 }
8113               else
8114                 {
8115                   XSETCAR (tail, Qnil);
8116                   tail = XCDR (tail);
8117                 }
8118             }
8119           if (charset_map_loaded)
8120             {
8121               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8122
8123               if (STRINGP (start))
8124                 pbeg = SDATA (start);
8125               else
8126                 pbeg = BYTE_POS_ADDR (start_byte);
8127               p = pbeg + p_offset;
8128               pend = pbeg + pend_offset;
8129             }
8130         }
8131     }
8132
8133   safe_codings = list2 (Qraw_text, Qno_conversion);
8134   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8135     if (! NILP (XCAR (tail)))
8136       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8137
8138   return safe_codings;
8139 }
8140
8141
8142 DEFUN ("unencodable-char-position", Funencodable_char_position,
8143        Sunencodable_char_position, 3, 5, 0,
8144        doc: /*
8145 Return position of first un-encodable character in a region.
8146 START and END specify the region and CODING-SYSTEM specifies the
8147 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8148
8149 If optional 4th argument COUNT is non-nil, it specifies at most how
8150 many un-encodable characters to search.  In this case, the value is a
8151 list of positions.
8152
8153 If optional 5th argument STRING is non-nil, it is a string to search
8154 for un-encodable characters.  In that case, START and END are indexes
8155 to the string.  */)
8156      (start, end, coding_system, count, string)
8157      Lisp_Object start, end, coding_system, count, string;
8158 {
8159   int n;
8160   struct coding_system coding;
8161   Lisp_Object attrs, charset_list, translation_table;
8162   Lisp_Object positions;
8163   int from, to;
8164   const unsigned char *p, *stop, *pend;
8165   int ascii_compatible;
8166
8167   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8168   attrs = CODING_ID_ATTRS (coding.id);
8169   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8170     return Qnil;
8171   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8172   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8173   translation_table = get_translation_table (attrs, 1, NULL);
8174
8175   if (NILP (string))
8176     {
8177       validate_region (&start, &end);
8178       from = XINT (start);
8179       to = XINT (end);
8180       if (NILP (current_buffer->enable_multibyte_characters)
8181           || (ascii_compatible
8182               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8183         return Qnil;
8184       p = CHAR_POS_ADDR (from);
8185       pend = CHAR_POS_ADDR (to);
8186       if (from < GPT && to >= GPT)
8187         stop = GPT_ADDR;
8188       else
8189         stop = pend;
8190     }
8191   else
8192     {
8193       CHECK_STRING (string);
8194       CHECK_NATNUM (start);
8195       CHECK_NATNUM (end);
8196       from = XINT (start);
8197       to = XINT (end);
8198       if (from > to
8199           || to > SCHARS (string))
8200         args_out_of_range_3 (string, start, end);
8201       if (! STRING_MULTIBYTE (string))
8202         return Qnil;
8203       p = SDATA (string) + string_char_to_byte (string, from);
8204       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8205       if (ascii_compatible && (to - from) == (pend - p))
8206         return Qnil;
8207     }
8208
8209   if (NILP (count))
8210     n = 1;
8211   else
8212     {
8213       CHECK_NATNUM (count);
8214       n = XINT (count);
8215     }
8216
8217   positions = Qnil;
8218   while (1)
8219     {
8220       int c;
8221
8222       if (ascii_compatible)
8223         while (p < stop && ASCII_BYTE_P (*p))
8224           p++, from++;
8225       if (p >= stop)
8226         {
8227           if (p >= pend)
8228             break;
8229           stop = pend;
8230           p = GAP_END_ADDR;
8231         }
8232
8233       c = STRING_CHAR_ADVANCE (p);
8234       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8235           && ! char_charset (translate_char (translation_table, c),
8236                              charset_list, NULL))
8237         {
8238           positions = Fcons (make_number (from), positions);
8239           n--;
8240           if (n == 0)
8241             break;
8242         }
8243
8244       from++;
8245     }
8246
8247   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8248 }
8249
8250
8251 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8252        Scheck_coding_systems_region, 3, 3, 0,
8253        doc: /* Check if the region is encodable by coding systems.
8254
8255 START and END are buffer positions specifying the region.
8256 CODING-SYSTEM-LIST is a list of coding systems to check.
8257
8258 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8259 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8260 whole region, POS0, POS1, ... are buffer positions where non-encodable
8261 characters are found.
8262
8263 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8264 value is nil.
8265
8266 START may be a string.  In that case, check if the string is
8267 encodable, and the value contains indices to the string instead of
8268 buffer positions.  END is ignored.  */)
8269      (start, end, coding_system_list)
8270      Lisp_Object start, end, coding_system_list;
8271 {
8272   Lisp_Object list;
8273   EMACS_INT start_byte, end_byte;
8274   int pos;
8275   const unsigned char *p, *pbeg, *pend;
8276   int c;
8277   Lisp_Object tail, elt, attrs;
8278
8279   if (STRINGP (start))
8280     {
8281       if (!STRING_MULTIBYTE (start)
8282           && SCHARS (start) != SBYTES (start))
8283         return Qnil;
8284       start_byte = 0;
8285       end_byte = SBYTES (start);
8286       pos = 0;
8287     }
8288   else
8289     {
8290       CHECK_NUMBER_COERCE_MARKER (start);
8291       CHECK_NUMBER_COERCE_MARKER (end);
8292       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8293         args_out_of_range (start, end);
8294       if (NILP (current_buffer->enable_multibyte_characters))
8295         return Qnil;
8296       start_byte = CHAR_TO_BYTE (XINT (start));
8297       end_byte = CHAR_TO_BYTE (XINT (end));
8298       if (XINT (end) - XINT (start) == end_byte - start_byte)
8299         return Qt;
8300
8301       if (XINT (start) < GPT && XINT (end) > GPT)
8302         {
8303           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8304             move_gap_both (XINT (start), start_byte);
8305           else
8306             move_gap_both (XINT (end), end_byte);
8307         }
8308       pos = XINT (start);
8309     }
8310
8311   list = Qnil;
8312   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8313     {
8314       elt = XCAR (tail);
8315       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8316       ASET (attrs, coding_attr_trans_tbl,
8317             get_translation_table (attrs, 1, NULL));
8318       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8319     }
8320
8321   if (STRINGP (start))
8322     p = pbeg = SDATA (start);
8323   else
8324     p = pbeg = BYTE_POS_ADDR (start_byte);
8325   pend = p + (end_byte - start_byte);
8326
8327   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8328   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8329
8330   while (p < pend)
8331     {
8332       if (ASCII_BYTE_P (*p))
8333         p++;
8334       else
8335         {
8336           c = STRING_CHAR_ADVANCE (p);
8337
8338           charset_map_loaded = 0;
8339           for (tail = list; CONSP (tail); tail = XCDR (tail))
8340             {
8341               elt = XCDR (XCAR (tail));
8342               if (! char_encodable_p (c, XCAR (elt)))
8343                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8344             }
8345           if (charset_map_loaded)
8346             {
8347               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8348
8349               if (STRINGP (start))
8350                 pbeg = SDATA (start);
8351               else
8352                 pbeg = BYTE_POS_ADDR (start_byte);
8353               p = pbeg + p_offset;
8354               pend = pbeg + pend_offset;
8355             }
8356         }
8357       pos++;
8358     }
8359
8360   tail = list;
8361   list = Qnil;
8362   for (; CONSP (tail); tail = XCDR (tail))
8363     {
8364       elt = XCAR (tail);
8365       if (CONSP (XCDR (XCDR (elt))))
8366         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8367                       list);
8368     }
8369
8370   return list;
8371 }
8372
8373
8374 Lisp_Object
8375 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8376      Lisp_Object start, end, coding_system, dst_object;
8377      int encodep, norecord;
8378 {
8379   struct coding_system coding;
8380   EMACS_INT from, from_byte, to, to_byte;
8381   Lisp_Object src_object;
8382
8383   CHECK_NUMBER_COERCE_MARKER (start);
8384   CHECK_NUMBER_COERCE_MARKER (end);
8385   if (NILP (coding_system))
8386     coding_system = Qno_conversion;
8387   else
8388     CHECK_CODING_SYSTEM (coding_system);
8389   src_object = Fcurrent_buffer ();
8390   if (NILP (dst_object))
8391     dst_object = src_object;
8392   else if (! EQ (dst_object, Qt))
8393     CHECK_BUFFER (dst_object);
8394
8395   validate_region (&start, &end);
8396   from = XFASTINT (start);
8397   from_byte = CHAR_TO_BYTE (from);
8398   to = XFASTINT (end);
8399   to_byte = CHAR_TO_BYTE (to);
8400
8401   setup_coding_system (coding_system, &coding);
8402   coding.mode |= CODING_MODE_LAST_BLOCK;
8403
8404   if (encodep)
8405     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8406                           dst_object);
8407   else
8408     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8409                           dst_object);
8410   if (! norecord)
8411     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8412
8413   return (BUFFERP (dst_object)
8414           ? make_number (coding.produced_char)
8415           : coding.dst_object);
8416 }
8417
8418
8419 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8420        3, 4, "r\nzCoding system: ",
8421        doc: /* Decode the current region from the specified coding system.
8422 When called from a program, takes four arguments:
8423         START, END, CODING-SYSTEM, and DESTINATION.
8424 START and END are buffer positions.
8425
8426 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8427 If nil, the region between START and END is replaced by the decoded text.
8428 If buffer, the decoded text is inserted in the buffer.
8429 In those cases, the length of the decoded text is returned.
8430 If DESTINATION is t, the decoded text is returned.
8431
8432 This function sets `last-coding-system-used' to the precise coding system
8433 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8434 not fully specified.)  */)
8435      (start, end, coding_system, destination)
8436      Lisp_Object start, end, coding_system, destination;
8437 {
8438   return code_convert_region (start, end, coding_system, destination, 0, 0);
8439 }
8440
8441 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8442        3, 4, "r\nzCoding system: ",
8443        doc: /* Encode the current region by specified coding system.
8444 When called from a program, takes four arguments:
8445         START, END, CODING-SYSTEM and DESTINATION.
8446 START and END are buffer positions.
8447
8448 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8449 If nil, the region between START and END is replace by the encoded text.
8450 If buffer, the encoded text is inserted in the buffer.
8451 In those cases, the length of the encoded text is returned.
8452 If DESTINATION is t, the encoded text is returned.
8453
8454 This function sets `last-coding-system-used' to the precise coding system
8455 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8456 not fully specified.)  */)
8457   (start, end, coding_system, destination)
8458      Lisp_Object start, end, coding_system, destination;
8459 {
8460   return code_convert_region (start, end, coding_system, destination, 1, 0);
8461 }
8462
8463 Lisp_Object
8464 code_convert_string (string, coding_system, dst_object,
8465                      encodep, nocopy, norecord)
8466      Lisp_Object string, coding_system, dst_object;
8467      int encodep, nocopy, norecord;
8468 {
8469   struct coding_system coding;
8470   EMACS_INT chars, bytes;
8471
8472   CHECK_STRING (string);
8473   if (NILP (coding_system))
8474     {
8475       if (! norecord)
8476         Vlast_coding_system_used = Qno_conversion;
8477       if (NILP (dst_object))
8478         return (nocopy ? Fcopy_sequence (string) : string);
8479     }
8480
8481   if (NILP (coding_system))
8482     coding_system = Qno_conversion;
8483   else
8484     CHECK_CODING_SYSTEM (coding_system);
8485   if (NILP (dst_object))
8486     dst_object = Qt;
8487   else if (! EQ (dst_object, Qt))
8488     CHECK_BUFFER (dst_object);
8489
8490   setup_coding_system (coding_system, &coding);
8491   coding.mode |= CODING_MODE_LAST_BLOCK;
8492   chars = SCHARS (string);
8493   bytes = SBYTES (string);
8494   if (encodep)
8495     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8496   else
8497     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8498   if (! norecord)
8499     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8500
8501   return (BUFFERP (dst_object)
8502           ? make_number (coding.produced_char)
8503           : coding.dst_object);
8504 }
8505
8506
8507 /* Encode or decode STRING according to CODING_SYSTEM.
8508    Do not set Vlast_coding_system_used.
8509
8510    This function is called only from macros DECODE_FILE and
8511    ENCODE_FILE, thus we ignore character composition.  */
8512
8513 Lisp_Object
8514 code_convert_string_norecord (string, coding_system, encodep)
8515      Lisp_Object string, coding_system;
8516      int encodep;
8517 {
8518   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8519 }
8520
8521
8522 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8523        2, 4, 0,
8524        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8525
8526 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8527 if the decoding operation is trivial.
8528
8529 Optional fourth arg BUFFER non-nil means that the decoded text is
8530 inserted in BUFFER instead of returned as a string.  In this case,
8531 the return value is the length of the decoded text.
8532
8533 This function sets `last-coding-system-used' to the precise coding system
8534 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8535 not fully specified.)  */)
8536   (string, coding_system, nocopy, buffer)
8537      Lisp_Object string, coding_system, nocopy, buffer;
8538 {
8539   return code_convert_string (string, coding_system, buffer,
8540                               0, ! NILP (nocopy), 0);
8541 }
8542
8543 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8544        2, 4, 0,
8545        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8546
8547 Optional third arg NOCOPY non-nil means it is OK to return STRING
8548 itself if the encoding operation is trivial.
8549
8550 Optional fourth arg BUFFER non-nil means that the encoded text is
8551 inserted in BUFFER instead of returned as a string.  In this case,
8552 the return value is the length of the encoded text.
8553
8554 This function sets `last-coding-system-used' to the precise coding system
8555 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8556 not fully specified.)  */)
8557      (string, coding_system, nocopy, buffer)
8558      Lisp_Object string, coding_system, nocopy, buffer;
8559 {
8560   return code_convert_string (string, coding_system, buffer,
8561                               1, ! NILP (nocopy), 1);
8562 }
8563
8564 \f
8565 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8566        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8567 Return the corresponding character.  */)
8568      (code)
8569      Lisp_Object code;
8570 {
8571   Lisp_Object spec, attrs, val;
8572   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8573   int c;
8574
8575   CHECK_NATNUM (code);
8576   c = XFASTINT (code);
8577   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8578   attrs = AREF (spec, 0);
8579
8580   if (ASCII_BYTE_P (c)
8581       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8582     return code;
8583
8584   val = CODING_ATTR_CHARSET_LIST (attrs);
8585   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8586   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8587   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8588
8589   if (c <= 0x7F)
8590     charset = charset_roman;
8591   else if (c >= 0xA0 && c < 0xDF)
8592     {
8593       charset = charset_kana;
8594       c -= 0x80;
8595     }
8596   else
8597     {
8598       int s1 = c >> 8, s2 = c & 0xFF;
8599
8600       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8601           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8602         error ("Invalid code: %d", code);
8603       SJIS_TO_JIS (c);
8604       charset = charset_kanji;
8605     }
8606   c = DECODE_CHAR (charset, c);
8607   if (c < 0)
8608     error ("Invalid code: %d", code);
8609   return make_number (c);
8610 }
8611
8612
8613 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8614        doc: /* Encode a Japanese character CH to shift_jis encoding.
8615 Return the corresponding code in SJIS.  */)
8616      (ch)
8617     Lisp_Object ch;
8618 {
8619   Lisp_Object spec, attrs, charset_list;
8620   int c;
8621   struct charset *charset;
8622   unsigned code;
8623
8624   CHECK_CHARACTER (ch);
8625   c = XFASTINT (ch);
8626   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8627   attrs = AREF (spec, 0);
8628
8629   if (ASCII_CHAR_P (c)
8630       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8631     return ch;
8632
8633   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8634   charset = char_charset (c, charset_list, &code);
8635   if (code == CHARSET_INVALID_CODE (charset))
8636     error ("Can't encode by shift_jis encoding: %d", c);
8637   JIS_TO_SJIS (code);
8638
8639   return make_number (code);
8640 }
8641
8642 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8643        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8644 Return the corresponding character.  */)
8645      (code)
8646      Lisp_Object code;
8647 {
8648   Lisp_Object spec, attrs, val;
8649   struct charset *charset_roman, *charset_big5, *charset;
8650   int c;
8651
8652   CHECK_NATNUM (code);
8653   c = XFASTINT (code);
8654   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8655   attrs = AREF (spec, 0);
8656
8657   if (ASCII_BYTE_P (c)
8658       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8659     return code;
8660
8661   val = CODING_ATTR_CHARSET_LIST (attrs);
8662   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8663   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8664
8665   if (c <= 0x7F)
8666     charset = charset_roman;
8667   else
8668     {
8669       int b1 = c >> 8, b2 = c & 0x7F;
8670       if (b1 < 0xA1 || b1 > 0xFE
8671           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8672         error ("Invalid code: %d", code);
8673       charset = charset_big5;
8674     }
8675   c = DECODE_CHAR (charset, (unsigned )c);
8676   if (c < 0)
8677     error ("Invalid code: %d", code);
8678   return make_number (c);
8679 }
8680
8681 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8682        doc: /* Encode the Big5 character CH to BIG5 coding system.
8683 Return the corresponding character code in Big5.  */)
8684      (ch)
8685      Lisp_Object ch;
8686 {
8687   Lisp_Object spec, attrs, charset_list;
8688   struct charset *charset;
8689   int c;
8690   unsigned code;
8691
8692   CHECK_CHARACTER (ch);
8693   c = XFASTINT (ch);
8694   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8695   attrs = AREF (spec, 0);
8696   if (ASCII_CHAR_P (c)
8697       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8698     return ch;
8699
8700   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8701   charset = char_charset (c, charset_list, &code);
8702   if (code == CHARSET_INVALID_CODE (charset))
8703     error ("Can't encode by Big5 encoding: %d", c);
8704
8705   return make_number (code);
8706 }
8707
8708 \f
8709 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
8710        Sset_terminal_coding_system_internal, 1, 2, 0,
8711        doc: /* Internal use only.  */)
8712      (coding_system, terminal)
8713      Lisp_Object coding_system;
8714      Lisp_Object terminal;
8715 {
8716   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8717   CHECK_SYMBOL (coding_system);
8718   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
8719   /* We had better not send unsafe characters to terminal.  */
8720   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
8721   /* Characer composition should be disabled.  */
8722   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8723   terminal_coding->src_multibyte = 1;
8724   terminal_coding->dst_multibyte = 0;
8725   return Qnil;
8726 }
8727
8728 DEFUN ("set-safe-terminal-coding-system-internal",
8729        Fset_safe_terminal_coding_system_internal,
8730        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8731        doc: /* Internal use only.  */)
8732      (coding_system)
8733      Lisp_Object coding_system;
8734 {
8735   CHECK_SYMBOL (coding_system);
8736   setup_coding_system (Fcheck_coding_system (coding_system),
8737                        &safe_terminal_coding);
8738   /* Characer composition should be disabled.  */
8739   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8740   safe_terminal_coding.src_multibyte = 1;
8741   safe_terminal_coding.dst_multibyte = 0;
8742   return Qnil;
8743 }
8744
8745 DEFUN ("terminal-coding-system", Fterminal_coding_system,
8746        Sterminal_coding_system, 0, 1, 0,
8747        doc: /* Return coding system specified for terminal output on the given terminal.
8748 TERMINAL may be a terminal id, a frame, or nil for the selected
8749 frame's terminal device.  */)
8750      (terminal)
8751      Lisp_Object terminal;
8752 {
8753   struct coding_system *terminal_coding
8754     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8755   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
8756
8757   /* For backward compatibility, return nil if it is `undecided'. */
8758   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8759 }
8760
8761 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
8762        Sset_keyboard_coding_system_internal, 1, 2, 0,
8763        doc: /* Internal use only.  */)
8764      (coding_system, terminal)
8765      Lisp_Object coding_system;
8766      Lisp_Object terminal;
8767 {
8768   struct terminal *t = get_terminal (terminal, 1);
8769   CHECK_SYMBOL (coding_system);
8770   setup_coding_system (Fcheck_coding_system (coding_system),
8771                        TERMINAL_KEYBOARD_CODING (t));
8772   /* Characer composition should be disabled.  */
8773   TERMINAL_KEYBOARD_CODING (t)->common_flags
8774     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8775   return Qnil;
8776 }
8777
8778 DEFUN ("keyboard-coding-system",
8779        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
8780        doc: /* Return coding system specified for decoding keyboard input.  */)
8781      (terminal)
8782      Lisp_Object terminal;
8783 {
8784   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8785                          (get_terminal (terminal, 1))->id);
8786 }
8787
8788 \f
8789 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8790        Sfind_operation_coding_system,  1, MANY, 0,
8791        doc: /* Choose a coding system for an operation based on the target name.
8792 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8793 DECODING-SYSTEM is the coding system to use for decoding
8794 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8795 for encoding (in case OPERATION does encoding).
8796
8797 The first argument OPERATION specifies an I/O primitive:
8798   For file I/O, `insert-file-contents' or `write-region'.
8799   For process I/O, `call-process', `call-process-region', or `start-process'.
8800   For network I/O, `open-network-stream'.
8801
8802 The remaining arguments should be the same arguments that were passed
8803 to the primitive.  Depending on which primitive, one of those arguments
8804 is selected as the TARGET.  For example, if OPERATION does file I/O,
8805 whichever argument specifies the file name is TARGET.
8806
8807 TARGET has a meaning which depends on OPERATION:
8808   For file I/O, TARGET is a file name (except for the special case below).
8809   For process I/O, TARGET is a process name.
8810   For network I/O, TARGET is a service name or a port number.
8811
8812 This function looks up what is specified for TARGET in
8813 `file-coding-system-alist', `process-coding-system-alist',
8814 or `network-coding-system-alist' depending on OPERATION.
8815 They may specify a coding system, a cons of coding systems,
8816 or a function symbol to call.
8817 In the last case, we call the function with one argument,
8818 which is a list of all the arguments given to this function.
8819 If the function can't decide a coding system, it can return
8820 `undecided' so that the normal code-detection is performed.
8821
8822 If OPERATION is `insert-file-contents', the argument corresponding to
8823 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8824 file name to look up, and BUFFER is a buffer that contains the file's
8825 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8826 function to call for FILENAME, that function should examine the
8827 contents of BUFFER instead of reading the file.
8828
8829 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
8830      (nargs, args)
8831      int nargs;
8832      Lisp_Object *args;
8833 {
8834   Lisp_Object operation, target_idx, target, val;
8835   register Lisp_Object chain;
8836
8837   if (nargs < 2)
8838     error ("Too few arguments");
8839   operation = args[0];
8840   if (!SYMBOLP (operation)
8841       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8842     error ("Invalid first argument");
8843   if (nargs < 1 + XINT (target_idx))
8844     error ("Too few arguments for operation: %s",
8845            SDATA (SYMBOL_NAME (operation)));
8846   target = args[XINT (target_idx) + 1];
8847   if (!(STRINGP (target)
8848         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8849             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8850         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8851     error ("Invalid %dth argument", XINT (target_idx) + 1);
8852   if (CONSP (target))
8853     target = XCAR (target);
8854
8855   chain = ((EQ (operation, Qinsert_file_contents)
8856             || EQ (operation, Qwrite_region))
8857            ? Vfile_coding_system_alist
8858            : (EQ (operation, Qopen_network_stream)
8859               ? Vnetwork_coding_system_alist
8860               : Vprocess_coding_system_alist));
8861   if (NILP (chain))
8862     return Qnil;
8863
8864   for (; CONSP (chain); chain = XCDR (chain))
8865     {
8866       Lisp_Object elt;
8867
8868       elt = XCAR (chain);
8869       if (CONSP (elt)
8870           && ((STRINGP (target)
8871                && STRINGP (XCAR (elt))
8872                && fast_string_match (XCAR (elt), target) >= 0)
8873               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8874         {
8875           val = XCDR (elt);
8876           /* Here, if VAL is both a valid coding system and a valid
8877              function symbol, we return VAL as a coding system.  */
8878           if (CONSP (val))
8879             return val;
8880           if (! SYMBOLP (val))
8881             return Qnil;
8882           if (! NILP (Fcoding_system_p (val)))
8883             return Fcons (val, val);
8884           if (! NILP (Ffboundp (val)))
8885             {
8886               /* We use call1 rather than safe_call1
8887                  so as to get bug reports about functions called here
8888                  which don't handle the current interface.  */
8889               val = call1 (val, Flist (nargs, args));
8890               if (CONSP (val))
8891                 return val;
8892               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8893                 return Fcons (val, val);
8894             }
8895           return Qnil;
8896         }
8897     }
8898   return Qnil;
8899 }
8900
8901 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8902        Sset_coding_system_priority, 0, MANY, 0,
8903        doc: /* Assign higher priority to the coding systems given as arguments.
8904 If multiple coding systems belong to the same category,
8905 all but the first one are ignored.
8906
8907 usage: (set-coding-system-priority &rest coding-systems)  */)
8908      (nargs, args)
8909      int nargs;
8910      Lisp_Object *args;
8911 {
8912   int i, j;
8913   int changed[coding_category_max];
8914   enum coding_category priorities[coding_category_max];
8915
8916   bzero (changed, sizeof changed);
8917
8918   for (i = j = 0; i < nargs; i++)
8919     {
8920       enum coding_category category;
8921       Lisp_Object spec, attrs;
8922
8923       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8924       attrs = AREF (spec, 0);
8925       category = XINT (CODING_ATTR_CATEGORY (attrs));
8926       if (changed[category])
8927         /* Ignore this coding system because a coding system of the
8928            same category already had a higher priority.  */
8929         continue;
8930       changed[category] = 1;
8931       priorities[j++] = category;
8932       if (coding_categories[category].id >= 0
8933           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8934         setup_coding_system (args[i], &coding_categories[category]);
8935       Fset (AREF (Vcoding_category_table, category), args[i]);
8936     }
8937
8938   /* Now we have decided top J priorities.  Reflect the order of the
8939      original priorities to the remaining priorities.  */
8940
8941   for (i = j, j = 0; i < coding_category_max; i++, j++)
8942     {
8943       while (j < coding_category_max
8944              && changed[coding_priorities[j]])
8945         j++;
8946       if (j == coding_category_max)
8947         abort ();
8948       priorities[i] = coding_priorities[j];
8949     }
8950
8951   bcopy (priorities, coding_priorities, sizeof priorities);
8952
8953   /* Update `coding-category-list'.  */
8954   Vcoding_category_list = Qnil;
8955   for (i = coding_category_max - 1; i >= 0; i--)
8956     Vcoding_category_list
8957       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8958                Vcoding_category_list);
8959
8960   return Qnil;
8961 }
8962
8963 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8964        Scoding_system_priority_list, 0, 1, 0,
8965        doc: /* Return a list of coding systems ordered by their priorities.
8966 HIGHESTP non-nil means just return the highest priority one.  */)
8967      (highestp)
8968      Lisp_Object highestp;
8969 {
8970   int i;
8971   Lisp_Object val;
8972
8973   for (i = 0, val = Qnil; i < coding_category_max; i++)
8974     {
8975       enum coding_category category = coding_priorities[i];
8976       int id = coding_categories[category].id;
8977       Lisp_Object attrs;
8978
8979       if (id < 0)
8980         continue;
8981       attrs = CODING_ID_ATTRS (id);
8982       if (! NILP (highestp))
8983         return CODING_ATTR_BASE_NAME (attrs);
8984       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8985     }
8986   return Fnreverse (val);
8987 }
8988
8989 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8990
8991 static Lisp_Object
8992 make_subsidiaries (base)
8993      Lisp_Object base;
8994 {
8995   Lisp_Object subsidiaries;
8996   int base_name_len = SBYTES (SYMBOL_NAME (base));
8997   char *buf = (char *) alloca (base_name_len + 6);
8998   int i;
8999
9000   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9001   subsidiaries = Fmake_vector (make_number (3), Qnil);
9002   for (i = 0; i < 3; i++)
9003     {
9004       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9005       ASET (subsidiaries, i, intern (buf));
9006     }
9007   return subsidiaries;
9008 }
9009
9010
9011 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9012        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9013        doc: /* For internal use only.
9014 usage: (define-coding-system-internal ...)  */)
9015      (nargs, args)
9016      int nargs;
9017      Lisp_Object *args;
9018 {
9019   Lisp_Object name;
9020   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9021   Lisp_Object attrs;            /* Vector of attributes.  */
9022   Lisp_Object eol_type;
9023   Lisp_Object aliases;
9024   Lisp_Object coding_type, charset_list, safe_charsets;
9025   enum coding_category category;
9026   Lisp_Object tail, val;
9027   int max_charset_id = 0;
9028   int i;
9029
9030   if (nargs < coding_arg_max)
9031     goto short_args;
9032
9033   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9034
9035   name = args[coding_arg_name];
9036   CHECK_SYMBOL (name);
9037   CODING_ATTR_BASE_NAME (attrs) = name;
9038
9039   val = args[coding_arg_mnemonic];
9040   if (! STRINGP (val))
9041     CHECK_CHARACTER (val);
9042   CODING_ATTR_MNEMONIC (attrs) = val;
9043
9044   coding_type = args[coding_arg_coding_type];
9045   CHECK_SYMBOL (coding_type);
9046   CODING_ATTR_TYPE (attrs) = coding_type;
9047
9048   charset_list = args[coding_arg_charset_list];
9049   if (SYMBOLP (charset_list))
9050     {
9051       if (EQ (charset_list, Qiso_2022))
9052         {
9053           if (! EQ (coding_type, Qiso_2022))
9054             error ("Invalid charset-list");
9055           charset_list = Viso_2022_charset_list;
9056         }
9057       else if (EQ (charset_list, Qemacs_mule))
9058         {
9059           if (! EQ (coding_type, Qemacs_mule))
9060             error ("Invalid charset-list");
9061           charset_list = Vemacs_mule_charset_list;
9062         }
9063       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9064         if (max_charset_id < XFASTINT (XCAR (tail)))
9065           max_charset_id = XFASTINT (XCAR (tail));
9066     }
9067   else
9068     {
9069       charset_list = Fcopy_sequence (charset_list);
9070       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9071         {
9072           struct charset *charset;
9073
9074           val = XCAR (tail);
9075           CHECK_CHARSET_GET_CHARSET (val, charset);
9076           if (EQ (coding_type, Qiso_2022)
9077               ? CHARSET_ISO_FINAL (charset) < 0
9078               : EQ (coding_type, Qemacs_mule)
9079               ? CHARSET_EMACS_MULE_ID (charset) < 0
9080               : 0)
9081             error ("Can't handle charset `%s'",
9082                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9083
9084           XSETCAR (tail, make_number (charset->id));
9085           if (max_charset_id < charset->id)
9086             max_charset_id = charset->id;
9087         }
9088     }
9089   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9090
9091   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
9092                                 make_number (255));
9093   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9094     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9095   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9096
9097   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9098
9099   val = args[coding_arg_decode_translation_table];
9100   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9101     CHECK_SYMBOL (val);
9102   CODING_ATTR_DECODE_TBL (attrs) = val;
9103
9104   val = args[coding_arg_encode_translation_table];
9105   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9106     CHECK_SYMBOL (val);
9107   CODING_ATTR_ENCODE_TBL (attrs) = val;
9108
9109   val = args[coding_arg_post_read_conversion];
9110   CHECK_SYMBOL (val);
9111   CODING_ATTR_POST_READ (attrs) = val;
9112
9113   val = args[coding_arg_pre_write_conversion];
9114   CHECK_SYMBOL (val);
9115   CODING_ATTR_PRE_WRITE (attrs) = val;
9116
9117   val = args[coding_arg_default_char];
9118   if (NILP (val))
9119     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9120   else
9121     {
9122       CHECK_CHARACTER (val);
9123       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9124     }
9125
9126   val = args[coding_arg_for_unibyte];
9127   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9128
9129   val = args[coding_arg_plist];
9130   CHECK_LIST (val);
9131   CODING_ATTR_PLIST (attrs) = val;
9132
9133   if (EQ (coding_type, Qcharset))
9134     {
9135       /* Generate a lisp vector of 256 elements.  Each element is nil,
9136          integer, or a list of charset IDs.
9137
9138          If Nth element is nil, the byte code N is invalid in this
9139          coding system.
9140
9141          If Nth element is a number NUM, N is the first byte of a
9142          charset whose ID is NUM.
9143
9144          If Nth element is a list of charset IDs, N is the first byte
9145          of one of them.  The list is sorted by dimensions of the
9146          charsets.  A charset of smaller dimension comes firtst. */
9147       val = Fmake_vector (make_number (256), Qnil);
9148
9149       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9150         {
9151           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9152           int dim = CHARSET_DIMENSION (charset);
9153           int idx = (dim - 1) * 4;
9154
9155           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9156             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9157
9158           for (i = charset->code_space[idx];
9159                i <= charset->code_space[idx + 1]; i++)
9160             {
9161               Lisp_Object tmp, tmp2;
9162               int dim2;
9163
9164               tmp = AREF (val, i);
9165               if (NILP (tmp))
9166                 tmp = XCAR (tail);
9167               else if (NUMBERP (tmp))
9168                 {
9169                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9170                   if (dim < dim2)
9171                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9172                   else
9173                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9174                 }
9175               else
9176                 {
9177                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9178                     {
9179                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9180                       if (dim < dim2)
9181                         break;
9182                     }
9183                   if (NILP (tmp2))
9184                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9185                   else
9186                     {
9187                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9188                       XSETCAR (tmp2, XCAR (tail));
9189                     }
9190                 }
9191               ASET (val, i, tmp);
9192             }
9193         }
9194       ASET (attrs, coding_attr_charset_valids, val);
9195       category = coding_category_charset;
9196     }
9197   else if (EQ (coding_type, Qccl))
9198     {
9199       Lisp_Object valids;
9200
9201       if (nargs < coding_arg_ccl_max)
9202         goto short_args;
9203
9204       val = args[coding_arg_ccl_decoder];
9205       CHECK_CCL_PROGRAM (val);
9206       if (VECTORP (val))
9207         val = Fcopy_sequence (val);
9208       ASET (attrs, coding_attr_ccl_decoder, val);
9209
9210       val = args[coding_arg_ccl_encoder];
9211       CHECK_CCL_PROGRAM (val);
9212       if (VECTORP (val))
9213         val = Fcopy_sequence (val);
9214       ASET (attrs, coding_attr_ccl_encoder, val);
9215
9216       val = args[coding_arg_ccl_valids];
9217       valids = Fmake_string (make_number (256), make_number (0));
9218       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9219         {
9220           int from, to;
9221
9222           val = Fcar (tail);
9223           if (INTEGERP (val))
9224             {
9225               from = to = XINT (val);
9226               if (from < 0 || from > 255)
9227                 args_out_of_range_3 (val, make_number (0), make_number (255));
9228             }
9229           else
9230             {
9231               CHECK_CONS (val);
9232               CHECK_NATNUM_CAR (val);
9233               CHECK_NATNUM_CDR (val);
9234               from = XINT (XCAR (val));
9235               if (from > 255)
9236                 args_out_of_range_3 (XCAR (val),
9237                                      make_number (0), make_number (255));
9238               to = XINT (XCDR (val));
9239               if (to < from || to > 255)
9240                 args_out_of_range_3 (XCDR (val),
9241                                      XCAR (val), make_number (255));
9242             }
9243           for (i = from; i <= to; i++)
9244             SSET (valids, i, 1);
9245         }
9246       ASET (attrs, coding_attr_ccl_valids, valids);
9247
9248       category = coding_category_ccl;
9249     }
9250   else if (EQ (coding_type, Qutf_16))
9251     {
9252       Lisp_Object bom, endian;
9253
9254       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9255
9256       if (nargs < coding_arg_utf16_max)
9257         goto short_args;
9258
9259       bom = args[coding_arg_utf16_bom];
9260       if (! NILP (bom) && ! EQ (bom, Qt))
9261         {
9262           CHECK_CONS (bom);
9263           val = XCAR (bom);
9264           CHECK_CODING_SYSTEM (val);
9265           val = XCDR (bom);
9266           CHECK_CODING_SYSTEM (val);
9267         }
9268       ASET (attrs, coding_attr_utf_bom, bom);
9269
9270       endian = args[coding_arg_utf16_endian];
9271       CHECK_SYMBOL (endian);
9272       if (NILP (endian))
9273         endian = Qbig;
9274       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9275         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9276       ASET (attrs, coding_attr_utf_16_endian, endian);
9277
9278       category = (CONSP (bom)
9279                   ? coding_category_utf_16_auto
9280                   : NILP (bom)
9281                   ? (EQ (endian, Qbig)
9282                      ? coding_category_utf_16_be_nosig
9283                      : coding_category_utf_16_le_nosig)
9284                   : (EQ (endian, Qbig)
9285                      ? coding_category_utf_16_be
9286                      : coding_category_utf_16_le));
9287     }
9288   else if (EQ (coding_type, Qiso_2022))
9289     {
9290       Lisp_Object initial, reg_usage, request, flags;
9291       int i;
9292
9293       if (nargs < coding_arg_iso2022_max)
9294         goto short_args;
9295
9296       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9297       CHECK_VECTOR (initial);
9298       for (i = 0; i < 4; i++)
9299         {
9300           val = Faref (initial, make_number (i));
9301           if (! NILP (val))
9302             {
9303               struct charset *charset;
9304
9305               CHECK_CHARSET_GET_CHARSET (val, charset);
9306               ASET (initial, i, make_number (CHARSET_ID (charset)));
9307               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9308                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9309             }
9310           else
9311             ASET (initial, i, make_number (-1));
9312         }
9313
9314       reg_usage = args[coding_arg_iso2022_reg_usage];
9315       CHECK_CONS (reg_usage);
9316       CHECK_NUMBER_CAR (reg_usage);
9317       CHECK_NUMBER_CDR (reg_usage);
9318
9319       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9320       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9321         {
9322           int id;
9323           Lisp_Object tmp;
9324
9325           val = Fcar (tail);
9326           CHECK_CONS (val);
9327           tmp = XCAR (val);
9328           CHECK_CHARSET_GET_ID (tmp, id);
9329           CHECK_NATNUM_CDR (val);
9330           if (XINT (XCDR (val)) >= 4)
9331             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9332           XSETCAR (val, make_number (id));
9333         }
9334
9335       flags = args[coding_arg_iso2022_flags];
9336       CHECK_NATNUM (flags);
9337       i = XINT (flags);
9338       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9339         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9340
9341       ASET (attrs, coding_attr_iso_initial, initial);
9342       ASET (attrs, coding_attr_iso_usage, reg_usage);
9343       ASET (attrs, coding_attr_iso_request, request);
9344       ASET (attrs, coding_attr_iso_flags, flags);
9345       setup_iso_safe_charsets (attrs);
9346
9347       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9348         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9349                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9350                     ? coding_category_iso_7_else
9351                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9352                     ? coding_category_iso_7
9353                     : coding_category_iso_7_tight);
9354       else
9355         {
9356           int id = XINT (AREF (initial, 1));
9357
9358           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9359                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9360                        || id < 0)
9361                       ? coding_category_iso_8_else
9362                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9363                       ? coding_category_iso_8_1
9364                       : coding_category_iso_8_2);
9365         }
9366       if (category != coding_category_iso_8_1
9367           && category != coding_category_iso_8_2)
9368         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9369     }
9370   else if (EQ (coding_type, Qemacs_mule))
9371     {
9372       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9373         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9374       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9375       category = coding_category_emacs_mule;
9376     }
9377   else if (EQ (coding_type, Qshift_jis))
9378     {
9379
9380       struct charset *charset;
9381
9382       if (XINT (Flength (charset_list)) != 3
9383           && XINT (Flength (charset_list)) != 4)
9384         error ("There should be three or four charsets");
9385
9386       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9387       if (CHARSET_DIMENSION (charset) != 1)
9388         error ("Dimension of charset %s is not one",
9389                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9390       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9391         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9392
9393       charset_list = XCDR (charset_list);
9394       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9395       if (CHARSET_DIMENSION (charset) != 1)
9396         error ("Dimension of charset %s is not one",
9397                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9398
9399       charset_list = XCDR (charset_list);
9400       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9401       if (CHARSET_DIMENSION (charset) != 2)
9402         error ("Dimension of charset %s is not two",
9403                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9404
9405       charset_list = XCDR (charset_list);
9406       if (! NILP (charset_list))
9407         {
9408           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9409           if (CHARSET_DIMENSION (charset) != 2)
9410             error ("Dimension of charset %s is not two",
9411                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9412         }
9413
9414       category = coding_category_sjis;
9415       Vsjis_coding_system = name;
9416     }
9417   else if (EQ (coding_type, Qbig5))
9418     {
9419       struct charset *charset;
9420
9421       if (XINT (Flength (charset_list)) != 2)
9422         error ("There should be just two charsets");
9423
9424       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9425       if (CHARSET_DIMENSION (charset) != 1)
9426         error ("Dimension of charset %s is not one",
9427                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9428       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9429         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9430
9431       charset_list = XCDR (charset_list);
9432       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9433       if (CHARSET_DIMENSION (charset) != 2)
9434         error ("Dimension of charset %s is not two",
9435                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9436
9437       category = coding_category_big5;
9438       Vbig5_coding_system = name;
9439     }
9440   else if (EQ (coding_type, Qraw_text))
9441     {
9442       category = coding_category_raw_text;
9443       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9444     }
9445   else if (EQ (coding_type, Qutf_8))
9446     {
9447       Lisp_Object bom;
9448
9449       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9450
9451       if (nargs < coding_arg_utf8_max)
9452         goto short_args;
9453
9454       bom = args[coding_arg_utf8_bom];
9455       if (! NILP (bom) && ! EQ (bom, Qt))
9456         {
9457           CHECK_CONS (bom);
9458           val = XCAR (bom);
9459           CHECK_CODING_SYSTEM (val);
9460           val = XCDR (bom);
9461           CHECK_CODING_SYSTEM (val);
9462         }
9463       ASET (attrs, coding_attr_utf_bom, bom);
9464
9465       category = (CONSP (bom) ? coding_category_utf_8_auto
9466                   : NILP (bom) ? coding_category_utf_8_nosig
9467                   : coding_category_utf_8_sig);
9468     }
9469   else if (EQ (coding_type, Qundecided))
9470     category = coding_category_undecided;
9471   else
9472     error ("Invalid coding system type: %s",
9473            SDATA (SYMBOL_NAME (coding_type)));
9474
9475   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9476   CODING_ATTR_PLIST (attrs)
9477     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9478                                 CODING_ATTR_PLIST (attrs)));
9479   CODING_ATTR_PLIST (attrs)
9480     = Fcons (QCascii_compatible_p,
9481              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9482                     CODING_ATTR_PLIST (attrs)));
9483
9484   eol_type = args[coding_arg_eol_type];
9485   if (! NILP (eol_type)
9486       && ! EQ (eol_type, Qunix)
9487       && ! EQ (eol_type, Qdos)
9488       && ! EQ (eol_type, Qmac))
9489     error ("Invalid eol-type");
9490
9491   aliases = Fcons (name, Qnil);
9492
9493   if (NILP (eol_type))
9494     {
9495       eol_type = make_subsidiaries (name);
9496       for (i = 0; i < 3; i++)
9497         {
9498           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9499
9500           this_name = AREF (eol_type, i);
9501           this_aliases = Fcons (this_name, Qnil);
9502           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9503           this_spec = Fmake_vector (make_number (3), attrs);
9504           ASET (this_spec, 1, this_aliases);
9505           ASET (this_spec, 2, this_eol_type);
9506           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9507           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9508           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9509           if (NILP (val))
9510             Vcoding_system_alist
9511               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9512                        Vcoding_system_alist);
9513         }
9514     }
9515
9516   spec_vec = Fmake_vector (make_number (3), attrs);
9517   ASET (spec_vec, 1, aliases);
9518   ASET (spec_vec, 2, eol_type);
9519
9520   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9521   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9522   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9523   if (NILP (val))
9524     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9525                                   Vcoding_system_alist);
9526
9527   {
9528     int id = coding_categories[category].id;
9529
9530     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9531       setup_coding_system (name, &coding_categories[category]);
9532   }
9533
9534   return Qnil;
9535
9536  short_args:
9537   return Fsignal (Qwrong_number_of_arguments,
9538                   Fcons (intern ("define-coding-system-internal"),
9539                          make_number (nargs)));
9540 }
9541
9542
9543 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9544        3, 3, 0,
9545        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9546   (coding_system, prop, val)
9547      Lisp_Object coding_system, prop, val;
9548 {
9549   Lisp_Object spec, attrs;
9550
9551   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9552   attrs = AREF (spec, 0);
9553   if (EQ (prop, QCmnemonic))
9554     {
9555       if (! STRINGP (val))
9556         CHECK_CHARACTER (val);
9557       CODING_ATTR_MNEMONIC (attrs) = val;
9558     }
9559   else if (EQ (prop, QCdefalut_char))
9560     {
9561       if (NILP (val))
9562         val = make_number (' ');
9563       else
9564         CHECK_CHARACTER (val);
9565       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9566     }
9567   else if (EQ (prop, QCdecode_translation_table))
9568     {
9569       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9570         CHECK_SYMBOL (val);
9571       CODING_ATTR_DECODE_TBL (attrs) = val;
9572     }
9573   else if (EQ (prop, QCencode_translation_table))
9574     {
9575       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9576         CHECK_SYMBOL (val);
9577       CODING_ATTR_ENCODE_TBL (attrs) = val;
9578     }
9579   else if (EQ (prop, QCpost_read_conversion))
9580     {
9581       CHECK_SYMBOL (val);
9582       CODING_ATTR_POST_READ (attrs) = val;
9583     }
9584   else if (EQ (prop, QCpre_write_conversion))
9585     {
9586       CHECK_SYMBOL (val);
9587       CODING_ATTR_PRE_WRITE (attrs) = val;
9588     }
9589   else if (EQ (prop, QCascii_compatible_p))
9590     {
9591       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9592     }
9593
9594   CODING_ATTR_PLIST (attrs)
9595     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9596   return val;
9597 }
9598
9599
9600 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9601        Sdefine_coding_system_alias, 2, 2, 0,
9602        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9603      (alias, coding_system)
9604      Lisp_Object alias, coding_system;
9605 {
9606   Lisp_Object spec, aliases, eol_type, val;
9607
9608   CHECK_SYMBOL (alias);
9609   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9610   aliases = AREF (spec, 1);
9611   /* ALIASES should be a list of length more than zero, and the first
9612      element is a base coding system.  Append ALIAS at the tail of the
9613      list.  */
9614   while (!NILP (XCDR (aliases)))
9615     aliases = XCDR (aliases);
9616   XSETCDR (aliases, Fcons (alias, Qnil));
9617
9618   eol_type = AREF (spec, 2);
9619   if (VECTORP (eol_type))
9620     {
9621       Lisp_Object subsidiaries;
9622       int i;
9623
9624       subsidiaries = make_subsidiaries (alias);
9625       for (i = 0; i < 3; i++)
9626         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9627                                      AREF (eol_type, i));
9628     }
9629
9630   Fputhash (alias, spec, Vcoding_system_hash_table);
9631   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9632   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9633   if (NILP (val))
9634     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9635                                   Vcoding_system_alist);
9636
9637   return Qnil;
9638 }
9639
9640 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9641        1, 1, 0,
9642        doc: /* Return the base of CODING-SYSTEM.
9643 Any alias or subsidiary coding system is not a base coding system.  */)
9644   (coding_system)
9645      Lisp_Object coding_system;
9646 {
9647   Lisp_Object spec, attrs;
9648
9649   if (NILP (coding_system))
9650     return (Qno_conversion);
9651   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9652   attrs = AREF (spec, 0);
9653   return CODING_ATTR_BASE_NAME (attrs);
9654 }
9655
9656 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9657        1, 1, 0,
9658        doc: "Return the property list of CODING-SYSTEM.")
9659      (coding_system)
9660      Lisp_Object coding_system;
9661 {
9662   Lisp_Object spec, attrs;
9663
9664   if (NILP (coding_system))
9665     coding_system = Qno_conversion;
9666   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9667   attrs = AREF (spec, 0);
9668   return CODING_ATTR_PLIST (attrs);
9669 }
9670
9671
9672 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9673        1, 1, 0,
9674        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9675      (coding_system)
9676      Lisp_Object coding_system;
9677 {
9678   Lisp_Object spec;
9679
9680   if (NILP (coding_system))
9681     coding_system = Qno_conversion;
9682   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9683   return AREF (spec, 1);
9684 }
9685
9686 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9687        Scoding_system_eol_type, 1, 1, 0,
9688        doc: /* Return eol-type of CODING-SYSTEM.
9689 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
9690
9691 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9692 and CR respectively.
9693
9694 A vector value indicates that a format of end-of-line should be
9695 detected automatically.  Nth element of the vector is the subsidiary
9696 coding system whose eol-type is N.  */)
9697      (coding_system)
9698      Lisp_Object coding_system;
9699 {
9700   Lisp_Object spec, eol_type;
9701   int n;
9702
9703   if (NILP (coding_system))
9704     coding_system = Qno_conversion;
9705   if (! CODING_SYSTEM_P (coding_system))
9706     return Qnil;
9707   spec = CODING_SYSTEM_SPEC (coding_system);
9708   eol_type = AREF (spec, 2);
9709   if (VECTORP (eol_type))
9710     return Fcopy_sequence (eol_type);
9711   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9712   return make_number (n);
9713 }
9714
9715 #endif /* emacs */
9716
9717 \f
9718 /*** 9. Post-amble ***/
9719
9720 void
9721 init_coding_once ()
9722 {
9723   int i;
9724
9725   for (i = 0; i < coding_category_max; i++)
9726     {
9727       coding_categories[i].id = -1;
9728       coding_priorities[i] = i;
9729     }
9730
9731   /* ISO2022 specific initialize routine.  */
9732   for (i = 0; i < 0x20; i++)
9733     iso_code_class[i] = ISO_control_0;
9734   for (i = 0x21; i < 0x7F; i++)
9735     iso_code_class[i] = ISO_graphic_plane_0;
9736   for (i = 0x80; i < 0xA0; i++)
9737     iso_code_class[i] = ISO_control_1;
9738   for (i = 0xA1; i < 0xFF; i++)
9739     iso_code_class[i] = ISO_graphic_plane_1;
9740   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9741   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9742   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9743   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9744   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9745   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9746   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9747   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9748   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9749
9750   for (i = 0; i < 256; i++)
9751     {
9752       emacs_mule_bytes[i] = 1;
9753     }
9754   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9755   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9756   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9757   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9758 }
9759
9760 #ifdef emacs
9761
9762 void
9763 syms_of_coding ()
9764 {
9765   staticpro (&Vcoding_system_hash_table);
9766   {
9767     Lisp_Object args[2];
9768     args[0] = QCtest;
9769     args[1] = Qeq;
9770     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9771   }
9772
9773   staticpro (&Vsjis_coding_system);
9774   Vsjis_coding_system = Qnil;
9775
9776   staticpro (&Vbig5_coding_system);
9777   Vbig5_coding_system = Qnil;
9778
9779   staticpro (&Vcode_conversion_reused_workbuf);
9780   Vcode_conversion_reused_workbuf = Qnil;
9781
9782   staticpro (&Vcode_conversion_workbuf_name);
9783   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9784
9785   reused_workbuf_in_use = 0;
9786
9787   DEFSYM (Qcharset, "charset");
9788   DEFSYM (Qtarget_idx, "target-idx");
9789   DEFSYM (Qcoding_system_history, "coding-system-history");
9790   Fset (Qcoding_system_history, Qnil);
9791
9792   /* Target FILENAME is the first argument.  */
9793   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9794   /* Target FILENAME is the third argument.  */
9795   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9796
9797   DEFSYM (Qcall_process, "call-process");
9798   /* Target PROGRAM is the first argument.  */
9799   Fput (Qcall_process, Qtarget_idx, make_number (0));
9800
9801   DEFSYM (Qcall_process_region, "call-process-region");
9802   /* Target PROGRAM is the third argument.  */
9803   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9804
9805   DEFSYM (Qstart_process, "start-process");
9806   /* Target PROGRAM is the third argument.  */
9807   Fput (Qstart_process, Qtarget_idx, make_number (2));
9808
9809   DEFSYM (Qopen_network_stream, "open-network-stream");
9810   /* Target SERVICE is the fourth argument.  */
9811   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9812
9813   DEFSYM (Qcoding_system, "coding-system");
9814   DEFSYM (Qcoding_aliases, "coding-aliases");
9815
9816   DEFSYM (Qeol_type, "eol-type");
9817   DEFSYM (Qunix, "unix");
9818   DEFSYM (Qdos, "dos");
9819
9820   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9821   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9822   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9823   DEFSYM (Qdefault_char, "default-char");
9824   DEFSYM (Qundecided, "undecided");
9825   DEFSYM (Qno_conversion, "no-conversion");
9826   DEFSYM (Qraw_text, "raw-text");
9827
9828   DEFSYM (Qiso_2022, "iso-2022");
9829
9830   DEFSYM (Qutf_8, "utf-8");
9831   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9832
9833   DEFSYM (Qutf_16, "utf-16");
9834   DEFSYM (Qbig, "big");
9835   DEFSYM (Qlittle, "little");
9836
9837   DEFSYM (Qshift_jis, "shift-jis");
9838   DEFSYM (Qbig5, "big5");
9839
9840   DEFSYM (Qcoding_system_p, "coding-system-p");
9841
9842   DEFSYM (Qcoding_system_error, "coding-system-error");
9843   Fput (Qcoding_system_error, Qerror_conditions,
9844         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9845   Fput (Qcoding_system_error, Qerror_message,
9846         build_string ("Invalid coding system"));
9847
9848   /* Intern this now in case it isn't already done.
9849      Setting this variable twice is harmless.
9850      But don't staticpro it here--that is done in alloc.c.  */
9851   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9852
9853   DEFSYM (Qtranslation_table, "translation-table");
9854   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9855   DEFSYM (Qtranslation_table_id, "translation-table-id");
9856   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9857   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9858
9859   DEFSYM (Qvalid_codes, "valid-codes");
9860
9861   DEFSYM (Qemacs_mule, "emacs-mule");
9862
9863   DEFSYM (QCcategory, ":category");
9864   DEFSYM (QCmnemonic, ":mnemonic");
9865   DEFSYM (QCdefalut_char, ":default-char");
9866   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9867   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9868   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9869   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9870   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9871
9872   Vcoding_category_table
9873     = Fmake_vector (make_number (coding_category_max), Qnil);
9874   staticpro (&Vcoding_category_table);
9875   /* Followings are target of code detection.  */
9876   ASET (Vcoding_category_table, coding_category_iso_7,
9877         intern ("coding-category-iso-7"));
9878   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9879         intern ("coding-category-iso-7-tight"));
9880   ASET (Vcoding_category_table, coding_category_iso_8_1,
9881         intern ("coding-category-iso-8-1"));
9882   ASET (Vcoding_category_table, coding_category_iso_8_2,
9883         intern ("coding-category-iso-8-2"));
9884   ASET (Vcoding_category_table, coding_category_iso_7_else,
9885         intern ("coding-category-iso-7-else"));
9886   ASET (Vcoding_category_table, coding_category_iso_8_else,
9887         intern ("coding-category-iso-8-else"));
9888   ASET (Vcoding_category_table, coding_category_utf_8_auto,
9889         intern ("coding-category-utf-8-auto"));
9890   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
9891         intern ("coding-category-utf-8"));
9892   ASET (Vcoding_category_table, coding_category_utf_8_sig,
9893         intern ("coding-category-utf-8-sig"));
9894   ASET (Vcoding_category_table, coding_category_utf_16_be,
9895         intern ("coding-category-utf-16-be"));
9896   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9897         intern ("coding-category-utf-16-auto"));
9898   ASET (Vcoding_category_table, coding_category_utf_16_le,
9899         intern ("coding-category-utf-16-le"));
9900   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9901         intern ("coding-category-utf-16-be-nosig"));
9902   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9903         intern ("coding-category-utf-16-le-nosig"));
9904   ASET (Vcoding_category_table, coding_category_charset,
9905         intern ("coding-category-charset"));
9906   ASET (Vcoding_category_table, coding_category_sjis,
9907         intern ("coding-category-sjis"));
9908   ASET (Vcoding_category_table, coding_category_big5,
9909         intern ("coding-category-big5"));
9910   ASET (Vcoding_category_table, coding_category_ccl,
9911         intern ("coding-category-ccl"));
9912   ASET (Vcoding_category_table, coding_category_emacs_mule,
9913         intern ("coding-category-emacs-mule"));
9914   /* Followings are NOT target of code detection.  */
9915   ASET (Vcoding_category_table, coding_category_raw_text,
9916         intern ("coding-category-raw-text"));
9917   ASET (Vcoding_category_table, coding_category_undecided,
9918         intern ("coding-category-undecided"));
9919
9920   DEFSYM (Qinsufficient_source, "insufficient-source");
9921   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9922   DEFSYM (Qinvalid_source, "invalid-source");
9923   DEFSYM (Qinterrupted, "interrupted");
9924   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9925   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
9926
9927   defsubr (&Scoding_system_p);
9928   defsubr (&Sread_coding_system);
9929   defsubr (&Sread_non_nil_coding_system);
9930   defsubr (&Scheck_coding_system);
9931   defsubr (&Sdetect_coding_region);
9932   defsubr (&Sdetect_coding_string);
9933   defsubr (&Sfind_coding_systems_region_internal);
9934   defsubr (&Sunencodable_char_position);
9935   defsubr (&Scheck_coding_systems_region);
9936   defsubr (&Sdecode_coding_region);
9937   defsubr (&Sencode_coding_region);
9938   defsubr (&Sdecode_coding_string);
9939   defsubr (&Sencode_coding_string);
9940   defsubr (&Sdecode_sjis_char);
9941   defsubr (&Sencode_sjis_char);
9942   defsubr (&Sdecode_big5_char);
9943   defsubr (&Sencode_big5_char);
9944   defsubr (&Sset_terminal_coding_system_internal);
9945   defsubr (&Sset_safe_terminal_coding_system_internal);
9946   defsubr (&Sterminal_coding_system);
9947   defsubr (&Sset_keyboard_coding_system_internal);
9948   defsubr (&Skeyboard_coding_system);
9949   defsubr (&Sfind_operation_coding_system);
9950   defsubr (&Sset_coding_system_priority);
9951   defsubr (&Sdefine_coding_system_internal);
9952   defsubr (&Sdefine_coding_system_alias);
9953   defsubr (&Scoding_system_put);
9954   defsubr (&Scoding_system_base);
9955   defsubr (&Scoding_system_plist);
9956   defsubr (&Scoding_system_aliases);
9957   defsubr (&Scoding_system_eol_type);
9958   defsubr (&Scoding_system_priority_list);
9959
9960   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9961                doc: /* List of coding systems.
9962
9963 Do not alter the value of this variable manually.  This variable should be
9964 updated by the functions `define-coding-system' and
9965 `define-coding-system-alias'.  */);
9966   Vcoding_system_list = Qnil;
9967
9968   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9969                doc: /* Alist of coding system names.
9970 Each element is one element list of coding system name.
9971 This variable is given to `completing-read' as COLLECTION argument.
9972
9973 Do not alter the value of this variable manually.  This variable should be
9974 updated by the functions `make-coding-system' and
9975 `define-coding-system-alias'.  */);
9976   Vcoding_system_alist = Qnil;
9977
9978   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9979                doc: /* List of coding-categories (symbols) ordered by priority.
9980
9981 On detecting a coding system, Emacs tries code detection algorithms
9982 associated with each coding-category one by one in this order.  When
9983 one algorithm agrees with a byte sequence of source text, the coding
9984 system bound to the corresponding coding-category is selected.
9985
9986 Don't modify this variable directly, but use `set-coding-priority'.  */);
9987   {
9988     int i;
9989
9990     Vcoding_category_list = Qnil;
9991     for (i = coding_category_max - 1; i >= 0; i--)
9992       Vcoding_category_list
9993         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9994                  Vcoding_category_list);
9995   }
9996
9997   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
9998                doc: /* Specify the coding system for read operations.
9999 It is useful to bind this variable with `let', but do not set it globally.
10000 If the value is a coding system, it is used for decoding on read operation.
10001 If not, an appropriate element is used from one of the coding system alists.
10002 There are three such tables: `file-coding-system-alist',
10003 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10004   Vcoding_system_for_read = Qnil;
10005
10006   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10007                doc: /* Specify the coding system for write operations.
10008 Programs bind this variable with `let', but you should not set it globally.
10009 If the value is a coding system, it is used for encoding of output,
10010 when writing it to a file and when sending it to a file or subprocess.
10011
10012 If this does not specify a coding system, an appropriate element
10013 is used from one of the coding system alists.
10014 There are three such tables: `file-coding-system-alist',
10015 `process-coding-system-alist', and `network-coding-system-alist'.
10016 For output to files, if the above procedure does not specify a coding system,
10017 the value of `buffer-file-coding-system' is used.  */);
10018   Vcoding_system_for_write = Qnil;
10019
10020   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10021                doc: /*
10022 Coding system used in the latest file or process I/O.  */);
10023   Vlast_coding_system_used = Qnil;
10024
10025   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10026                doc: /*
10027 Error status of the last code conversion.
10028
10029 When an error was detected in the last code conversion, this variable
10030 is set to one of the following symbols.
10031   `insufficient-source'
10032   `inconsistent-eol'
10033   `invalid-source'
10034   `interrupted'
10035   `insufficient-memory'
10036 When no error was detected, the value doesn't change.  So, to check
10037 the error status of a code conversion by this variable, you must
10038 explicitly set this variable to nil before performing code
10039 conversion.  */);
10040   Vlast_code_conversion_error = Qnil;
10041
10042   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10043                doc: /*
10044 *Non-nil means always inhibit code conversion of end-of-line format.
10045 See info node `Coding Systems' and info node `Text and Binary' concerning
10046 such conversion.  */);
10047   inhibit_eol_conversion = 0;
10048
10049   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10050                doc: /*
10051 Non-nil means process buffer inherits coding system of process output.
10052 Bind it to t if the process output is to be treated as if it were a file
10053 read from some filesystem.  */);
10054   inherit_process_coding_system = 0;
10055
10056   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10057                doc: /*
10058 Alist to decide a coding system to use for a file I/O operation.
10059 The format is ((PATTERN . VAL) ...),
10060 where PATTERN is a regular expression matching a file name,
10061 VAL is a coding system, a cons of coding systems, or a function symbol.
10062 If VAL is a coding system, it is used for both decoding and encoding
10063 the file contents.
10064 If VAL is a cons of coding systems, the car part is used for decoding,
10065 and the cdr part is used for encoding.
10066 If VAL is a function symbol, the function must return a coding system
10067 or a cons of coding systems which are used as above.  The function is
10068 called with an argument that is a list of the arguments with which
10069 `find-operation-coding-system' was called.  If the function can't decide
10070 a coding system, it can return `undecided' so that the normal
10071 code-detection is performed.
10072
10073 See also the function `find-operation-coding-system'
10074 and the variable `auto-coding-alist'.  */);
10075   Vfile_coding_system_alist = Qnil;
10076
10077   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10078                doc: /*
10079 Alist to decide a coding system to use for a process I/O operation.
10080 The format is ((PATTERN . VAL) ...),
10081 where PATTERN is a regular expression matching a program name,
10082 VAL is a coding system, a cons of coding systems, or a function symbol.
10083 If VAL is a coding system, it is used for both decoding what received
10084 from the program and encoding what sent to the program.
10085 If VAL is a cons of coding systems, the car part is used for decoding,
10086 and the cdr part is used for encoding.
10087 If VAL is a function symbol, the function must return a coding system
10088 or a cons of coding systems which are used as above.
10089
10090 See also the function `find-operation-coding-system'.  */);
10091   Vprocess_coding_system_alist = Qnil;
10092
10093   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10094                doc: /*
10095 Alist to decide a coding system to use for a network I/O operation.
10096 The format is ((PATTERN . VAL) ...),
10097 where PATTERN is a regular expression matching a network service name
10098 or is a port number to connect to,
10099 VAL is a coding system, a cons of coding systems, or a function symbol.
10100 If VAL is a coding system, it is used for both decoding what received
10101 from the network stream and encoding what sent to the network stream.
10102 If VAL is a cons of coding systems, the car part is used for decoding,
10103 and the cdr part is used for encoding.
10104 If VAL is a function symbol, the function must return a coding system
10105 or a cons of coding systems which are used as above.
10106
10107 See also the function `find-operation-coding-system'.  */);
10108   Vnetwork_coding_system_alist = Qnil;
10109
10110   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10111                doc: /* Coding system to use with system messages.
10112 Also used for decoding keyboard input on X Window system.  */);
10113   Vlocale_coding_system = Qnil;
10114
10115   /* The eol mnemonics are reset in startup.el system-dependently.  */
10116   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10117                doc: /*
10118 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10119   eol_mnemonic_unix = build_string (":");
10120
10121   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10122                doc: /*
10123 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10124   eol_mnemonic_dos = build_string ("\\");
10125
10126   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10127                doc: /*
10128 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10129   eol_mnemonic_mac = build_string ("/");
10130
10131   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10132                doc: /*
10133 *String displayed in mode line when end-of-line format is not yet determined.  */);
10134   eol_mnemonic_undecided = build_string (":");
10135
10136   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10137                doc: /*
10138 *Non-nil enables character translation while encoding and decoding.  */);
10139   Venable_character_translation = Qt;
10140
10141   DEFVAR_LISP ("standard-translation-table-for-decode",
10142                &Vstandard_translation_table_for_decode,
10143                doc: /* Table for translating characters while decoding.  */);
10144   Vstandard_translation_table_for_decode = Qnil;
10145
10146   DEFVAR_LISP ("standard-translation-table-for-encode",
10147                &Vstandard_translation_table_for_encode,
10148                doc: /* Table for translating characters while encoding.  */);
10149   Vstandard_translation_table_for_encode = Qnil;
10150
10151   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10152                doc: /* Alist of charsets vs revision numbers.
10153 While encoding, if a charset (car part of an element) is found,
10154 designate it with the escape sequence identifying revision (cdr part
10155 of the element).  */);
10156   Vcharset_revision_table = Qnil;
10157
10158   DEFVAR_LISP ("default-process-coding-system",
10159                &Vdefault_process_coding_system,
10160                doc: /* Cons of coding systems used for process I/O by default.
10161 The car part is used for decoding a process output,
10162 the cdr part is used for encoding a text to be sent to a process.  */);
10163   Vdefault_process_coding_system = Qnil;
10164
10165   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10166                doc: /*
10167 Table of extra Latin codes in the range 128..159 (inclusive).
10168 This is a vector of length 256.
10169 If Nth element is non-nil, the existence of code N in a file
10170 \(or output of subprocess) doesn't prevent it to be detected as
10171 a coding system of ISO 2022 variant which has a flag
10172 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10173 or reading output of a subprocess.
10174 Only 128th through 159th elements have a meaning.  */);
10175   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10176
10177   DEFVAR_LISP ("select-safe-coding-system-function",
10178                &Vselect_safe_coding_system_function,
10179                doc: /*
10180 Function to call to select safe coding system for encoding a text.
10181
10182 If set, this function is called to force a user to select a proper
10183 coding system which can encode the text in the case that a default
10184 coding system used in each operation can't encode the text.  The
10185 function should take care that the buffer is not modified while
10186 the coding system is being selected.
10187
10188 The default value is `select-safe-coding-system' (which see).  */);
10189   Vselect_safe_coding_system_function = Qnil;
10190
10191   DEFVAR_BOOL ("coding-system-require-warning",
10192                &coding_system_require_warning,
10193                doc: /* Internal use only.
10194 If non-nil, on writing a file, `select-safe-coding-system-function' is
10195 called even if `coding-system-for-write' is non-nil.  The command
10196 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10197   coding_system_require_warning = 0;
10198
10199
10200   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10201                &inhibit_iso_escape_detection,
10202                doc: /*
10203 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
10204
10205 By default, on reading a file, Emacs tries to detect how the text is
10206 encoded.  This code detection is sensitive to escape sequences.  If
10207 the sequence is valid as ISO2022, the code is determined as one of
10208 the ISO2022 encodings, and the file is decoded by the corresponding
10209 coding system (e.g. `iso-2022-7bit').
10210
10211 However, there may be a case that you want to read escape sequences in
10212 a file as is.  In such a case, you can set this variable to non-nil.
10213 Then, as the code detection ignores any escape sequences, no file is
10214 detected as encoded in some ISO2022 encoding.  The result is that all
10215 escape sequences become visible in a buffer.
10216
10217 The default value is nil, and it is strongly recommended not to change
10218 it.  That is because many Emacs Lisp source files that contain
10219 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10220 in Emacs's distribution, and they won't be decoded correctly on
10221 reading if you suppress escape sequence detection.
10222
10223 The other way to read escape sequences in a file without decoding is
10224 to explicitly specify some coding system that doesn't use ISO2022's
10225 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10226   inhibit_iso_escape_detection = 0;
10227
10228   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10229                doc: /* Char table for translating self-inserting characters.
10230 This is applied to the result of input methods, not their input.
10231 See also `keyboard-translate-table'.  */);
10232     Vtranslation_table_for_input = Qnil;
10233
10234   {
10235     Lisp_Object args[coding_arg_max];
10236     Lisp_Object plist[16];
10237     int i;
10238
10239     for (i = 0; i < coding_arg_max; i++)
10240       args[i] = Qnil;
10241
10242     plist[0] = intern (":name");
10243     plist[1] = args[coding_arg_name] = Qno_conversion;
10244     plist[2] = intern (":mnemonic");
10245     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10246     plist[4] = intern (":coding-type");
10247     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10248     plist[6] = intern (":ascii-compatible-p");
10249     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10250     plist[8] = intern (":default-char");
10251     plist[9] = args[coding_arg_default_char] = make_number (0);
10252     plist[10] = intern (":for-unibyte");
10253     plist[11] = args[coding_arg_for_unibyte] = Qt;
10254     plist[12] = intern (":docstring");
10255     plist[13] = build_string ("Do no conversion.\n\
10256 \n\
10257 When you visit a file with this coding, the file is read into a\n\
10258 unibyte buffer as is, thus each byte of a file is treated as a\n\
10259 character.");
10260     plist[14] = intern (":eol-type");
10261     plist[15] = args[coding_arg_eol_type] = Qunix;
10262     args[coding_arg_plist] = Flist (16, plist);
10263     Fdefine_coding_system_internal (coding_arg_max, args);
10264
10265     plist[1] = args[coding_arg_name] = Qundecided;
10266     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10267     plist[5] = args[coding_arg_coding_type] = Qundecided;
10268     /* This is already set.
10269        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10270     plist[8] = intern (":charset-list");
10271     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10272     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10273     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10274     plist[15] = args[coding_arg_eol_type] = Qnil;
10275     args[coding_arg_plist] = Flist (16, plist);
10276     Fdefine_coding_system_internal (coding_arg_max, args);
10277   }
10278
10279   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10280
10281   {
10282     int i;
10283
10284     for (i = 0; i < coding_category_max; i++)
10285       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10286   }
10287 #if defined (MSDOS) || defined (WINDOWSNT)
10288   system_eol_type = Qdos;
10289 #else
10290   system_eol_type = Qunix;
10291 #endif
10292   staticpro (&system_eol_type);
10293 }
10294
10295 char *
10296 emacs_strerror (error_number)
10297      int error_number;
10298 {
10299   char *str;
10300
10301   synchronize_system_messages_locale ();
10302   str = strerror (error_number);
10303
10304   if (! NILP (Vlocale_coding_system))
10305     {
10306       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10307                                                       Vlocale_coding_system,
10308                                                       0);
10309       str = (char *) SDATA (dec);
10310     }
10311
10312   return str;
10313 }
10314
10315 #endif /* emacs */
10316
10317 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10318    (do not change this comment) */