1 /* Coding system handler (conversion, detection, etc).
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
3 2006 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
11 This file is part of GNU Emacs.
13 GNU Emacs is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
18 GNU Emacs is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with GNU Emacs; see the file COPYING. If not, write to
25 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 Boston, MA 02110-1301, USA. */
28 /*** TABLE OF CONTENTS ***
32 2. Emacs' internal format (emacs-utf-8) handlers
35 5. Charset-base coding systems handlers
36 6. emacs-mule (old Emacs' internal format) handlers
38 8. Shift-JIS and BIG5 handlers
40 10. C library functions
41 11. Emacs Lisp library functions
46 /*** 0. General comments ***
51 A coding system is an object for an encoding mechanism that contains
52 information about how to convert byte sequences to character
53 sequences and vice versa. When we say "decode", it means converting
54 a byte sequence of a specific coding system into a character
55 sequence that is represented by Emacs' internal coding system
56 `emacs-utf-8', and when we say "encode", it means converting a
57 character sequence of emacs-utf-8 to a byte sequence of a specific
60 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
61 C level, a coding system is represented by a vector of attributes
62 stored in the hash table Vcharset_hash_table. The conversion from
63 coding system symbol to attributes vector is done by looking up
64 Vcharset_hash_table by the symbol.
66 Coding systems are classified into the following types depending on
67 the encoding mechanism. Here's a brief description of the types.
73 o Charset-base coding system
75 A coding system defined by one or more (coded) character sets.
76 Decoding and encoding are done by a code converter defined for each
79 o Old Emacs internal format (emacs-mule)
81 The coding system adopted by old versions of Emacs (20 and 21).
83 o ISO2022-base coding system
85 The most famous coding system for multiple character sets. X's
86 Compound Text, various EUCs (Extended Unix Code), and coding systems
87 used in the Internet communication such as ISO-2022-JP are all
90 o SJIS (or Shift-JIS or MS-Kanji-Code)
92 A coding system to encode character sets: ASCII, JISX0201, and
93 JISX0208. Widely used for PC's in Japan. Details are described in
98 A coding system to encode character sets: ASCII and Big5. Widely
99 used for Chinese (mainly in Taiwan and Hong Kong). Details are
100 described in section 8. In this file, when we write "big5" (all
101 lowercase), we mean the coding system, and when we write "Big5"
102 (capitalized), we mean the character set.
106 If a user wants to decode/encode text encoded in a coding system
107 not listed above, he can supply a decoder and an encoder for it in
108 CCL (Code Conversion Language) programs. Emacs executes the CCL
109 program while decoding/encoding.
113 A coding system for text containing raw eight-bit data. Emacs
114 treats each byte of source text as a character (except for
115 end-of-line conversion).
119 Like raw text, but don't do end-of-line conversion.
124 How text end-of-line is encoded depends on operating system. For
125 instance, Unix's format is just one byte of LF (line-feed) code,
126 whereas DOS's format is two-byte sequence of `carriage-return' and
127 `line-feed' codes. MacOS's format is usually one byte of
130 Since text character encoding and end-of-line encoding are
131 independent, any coding system described above can take any format
132 of end-of-line (except for no-conversion).
136 Before using a coding system for code conversion (i.e. decoding and
137 encoding), we setup a structure of type `struct coding_system'.
138 This structure keeps various information about a specific code
139 conversion (e.g. the location of source and destination data).
146 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
148 These functions check if a byte sequence specified as a source in
149 CODING conforms to the format of XXX, and update the members of
152 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
154 Below is the template of these functions. */
158 detect_coding_XXX (coding
, detect_info
)
159 struct coding_system
*coding
;
160 struct coding_detection_info
*detect_info
;
162 const unsigned char *src
= coding
->source
;
163 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
164 int multibytep
= coding
->src_multibyte
;
165 int consumed_chars
= 0;
171 /* Get one byte from the source. If the souce is exausted, jump
172 to no_more_source:. */
175 if (! __C_conforms_to_XXX___ (c
))
177 if (! __C_strongly_suggests_XXX__ (c
))
178 found
= CATEGORY_MASK_XXX
;
180 /* The byte sequence is invalid for XXX. */
181 detect_info
->rejected
|= CATEGORY_MASK_XXX
;
185 /* The source exausted successfully. */
186 detect_info
->found
|= found
;
191 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
203 Below is the template of these functions. */
207 decode_coding_XXXX (coding
)
208 struct coding_system
*coding
;
210 const unsigned char *src
= coding
->source
+ coding
->consumed
;
211 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 const unsigned char *src_base
;
216 /* A buffer to produce decoded characters. */
217 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
218 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
219 int multibytep
= coding
->src_multibyte
;
224 if (charbuf
< charbuf_end
)
225 /* No more room to produce a decoded character. */
232 if (src_base
< src_end
233 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base
< src_end
&& charbuf
< charbuf_end
)
237 *charbuf
++ = *src_base
++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
241 /* Remember how many characters we produced. */
242 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
246 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
262 Below is a template of these functions. */
265 encode_coding_XXX (coding
)
266 struct coding_system
*coding
;
268 int multibytep
= coding
->dst_multibyte
;
269 int *charbuf
= coding
->charbuf
;
270 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
271 unsigned char *dst
= coding
->destination
+ coding
->produced
;
272 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
273 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
274 int produced_chars
= 0;
276 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
279 /* Encode C into DST, and increment DST. */
281 label_no_more_destination
:
282 /* How many chars and bytes we produced. */
283 coding
->produced_char
+= produced_chars
;
284 coding
->produced
= dst
- coding
->destination
;
289 /*** 1. Preamble ***/
296 #include "character.h"
299 #include "composite.h"
303 Lisp_Object Vcoding_system_hash_table
;
305 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
306 Lisp_Object Qunix
, Qdos
;
307 extern Lisp_Object Qmac
; /* frame.c */
308 Lisp_Object Qbuffer_file_coding_system
;
309 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
310 Lisp_Object Qdefault_char
;
311 Lisp_Object Qno_conversion
, Qundecided
;
312 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
313 Lisp_Object Qbig
, Qlittle
;
314 Lisp_Object Qcoding_system_history
;
315 Lisp_Object Qvalid_codes
;
316 Lisp_Object QCcategory
, QCmnemonic
, QCdefalut_char
;
317 Lisp_Object QCdecode_translation_table
, QCencode_translation_table
;
318 Lisp_Object QCpost_read_conversion
, QCpre_write_conversion
;
319 Lisp_Object QCascii_compatible_p
;
321 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
322 Lisp_Object Qcall_process
, Qcall_process_region
;
323 Lisp_Object Qstart_process
, Qopen_network_stream
;
324 Lisp_Object Qtarget_idx
;
326 Lisp_Object Qinsufficient_source
, Qinconsistent_eol
, Qinvalid_source
;
327 Lisp_Object Qinterrupted
, Qinsufficient_memory
;
329 /* If a symbol has this property, evaluate the value to define the
330 symbol as a coding system. */
331 static Lisp_Object Qcoding_system_define_form
;
333 int coding_system_require_warning
;
335 Lisp_Object Vselect_safe_coding_system_function
;
337 /* Mnemonic string for each format of end-of-line. */
338 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
339 /* Mnemonic string to indicate format of end-of-line is not yet
341 Lisp_Object eol_mnemonic_undecided
;
343 /* Format of end-of-line decided by system. This is Qunix on
344 Unix and Mac, Qdos on DOS/Windows.
345 This has an effect only for external encoding (i.e. for output to
346 file and process), not for in-buffer or Lisp string encoding. */
347 static Lisp_Object system_eol_type
;
351 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
353 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
355 /* Coding system emacs-mule and raw-text are for converting only
356 end-of-line format. */
357 Lisp_Object Qemacs_mule
, Qraw_text
;
358 Lisp_Object Qutf_8_emacs
;
360 /* Coding-systems are handed between Emacs Lisp programs and C internal
361 routines by the following three variables. */
362 /* Coding-system for reading files and receiving data from process. */
363 Lisp_Object Vcoding_system_for_read
;
364 /* Coding-system for writing files and sending data to process. */
365 Lisp_Object Vcoding_system_for_write
;
366 /* Coding-system actually used in the latest I/O. */
367 Lisp_Object Vlast_coding_system_used
;
368 /* Set to non-nil when an error is detected while code conversion. */
369 Lisp_Object Vlast_code_conversion_error
;
370 /* A vector of length 256 which contains information about special
371 Latin codes (especially for dealing with Microsoft codes). */
372 Lisp_Object Vlatin_extra_code_table
;
374 /* Flag to inhibit code conversion of end-of-line format. */
375 int inhibit_eol_conversion
;
377 /* Flag to inhibit ISO2022 escape sequence detection. */
378 int inhibit_iso_escape_detection
;
380 /* Flag to make buffer-file-coding-system inherit from process-coding. */
381 int inherit_process_coding_system
;
383 /* Coding system to be used to encode text for terminal display. */
384 struct coding_system terminal_coding
;
386 /* Coding system to be used to encode text for terminal display when
387 terminal coding system is nil. */
388 struct coding_system safe_terminal_coding
;
390 /* Coding system of what is sent from terminal keyboard. */
391 struct coding_system keyboard_coding
;
393 Lisp_Object Vfile_coding_system_alist
;
394 Lisp_Object Vprocess_coding_system_alist
;
395 Lisp_Object Vnetwork_coding_system_alist
;
397 Lisp_Object Vlocale_coding_system
;
401 /* Flag to tell if we look up translation table on character code
403 Lisp_Object Venable_character_translation
;
404 /* Standard translation table to look up on decoding (reading). */
405 Lisp_Object Vstandard_translation_table_for_decode
;
406 /* Standard translation table to look up on encoding (writing). */
407 Lisp_Object Vstandard_translation_table_for_encode
;
409 Lisp_Object Qtranslation_table
;
410 Lisp_Object Qtranslation_table_id
;
411 Lisp_Object Qtranslation_table_for_decode
;
412 Lisp_Object Qtranslation_table_for_encode
;
414 /* Alist of charsets vs revision number. */
415 static Lisp_Object Vcharset_revision_table
;
417 /* Default coding systems used for process I/O. */
418 Lisp_Object Vdefault_process_coding_system
;
420 /* Char table for translating Quail and self-inserting input. */
421 Lisp_Object Vtranslation_table_for_input
;
423 /* Two special coding systems. */
424 Lisp_Object Vsjis_coding_system
;
425 Lisp_Object Vbig5_coding_system
;
427 /* ISO2022 section */
429 #define CODING_ISO_INITIAL(coding, reg) \
430 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
431 coding_attr_iso_initial), \
435 #define CODING_ISO_REQUEST(coding, charset_id) \
436 ((charset_id <= (coding)->max_charset_id \
437 ? (coding)->safe_charsets[charset_id] \
441 #define CODING_ISO_FLAGS(coding) \
442 ((coding)->spec.iso_2022.flags)
443 #define CODING_ISO_DESIGNATION(coding, reg) \
444 ((coding)->spec.iso_2022.current_designation[reg])
445 #define CODING_ISO_INVOCATION(coding, plane) \
446 ((coding)->spec.iso_2022.current_invocation[plane])
447 #define CODING_ISO_SINGLE_SHIFTING(coding) \
448 ((coding)->spec.iso_2022.single_shifting)
449 #define CODING_ISO_BOL(coding) \
450 ((coding)->spec.iso_2022.bol)
451 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
452 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
454 /* Control characters of ISO2022. */
455 /* code */ /* function */
456 #define ISO_CODE_LF 0x0A /* line-feed */
457 #define ISO_CODE_CR 0x0D /* carriage-return */
458 #define ISO_CODE_SO 0x0E /* shift-out */
459 #define ISO_CODE_SI 0x0F /* shift-in */
460 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
461 #define ISO_CODE_ESC 0x1B /* escape */
462 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
463 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
464 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
466 /* All code (1-byte) of ISO2022 is classified into one of the
468 enum iso_code_class_type
470 ISO_control_0
, /* Control codes in the range
471 0x00..0x1F and 0x7F, except for the
472 following 5 codes. */
473 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
474 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
475 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
476 ISO_escape
, /* ISO_CODE_SO (0x1B) */
477 ISO_control_1
, /* Control codes in the range
478 0x80..0x9F, except for the
479 following 3 codes. */
480 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
481 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
482 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
483 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
484 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
485 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
486 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
489 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
490 `iso-flags' attribute of an iso2022 coding system. */
492 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
493 instead of the correct short-form sequence (e.g. ESC $ A). */
494 #define CODING_ISO_FLAG_LONG_FORM 0x0001
496 /* If set, reset graphic planes and registers at end-of-line to the
498 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
500 /* If set, reset graphic planes and registers before any control
501 characters to the initial state. */
502 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
504 /* If set, encode by 7-bit environment. */
505 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
507 /* If set, use locking-shift function. */
508 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
510 /* If set, use single-shift function. Overwrite
511 CODING_ISO_FLAG_LOCKING_SHIFT. */
512 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
514 /* If set, use designation escape sequence. */
515 #define CODING_ISO_FLAG_DESIGNATION 0x0040
517 /* If set, produce revision number sequence. */
518 #define CODING_ISO_FLAG_REVISION 0x0080
520 /* If set, produce ISO6429's direction specifying sequence. */
521 #define CODING_ISO_FLAG_DIRECTION 0x0100
523 /* If set, assume designation states are reset at beginning of line on
525 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
527 /* If set, designation sequence should be placed at beginning of line
529 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
531 /* If set, do not encode unsafe charactes on output. */
532 #define CODING_ISO_FLAG_SAFE 0x0800
534 /* If set, extra latin codes (128..159) are accepted as a valid code
536 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
538 #define CODING_ISO_FLAG_COMPOSITION 0x2000
540 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
542 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
544 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
546 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
548 /* A character to be produced on output if encoding of the original
549 character is prohibited by CODING_ISO_FLAG_SAFE. */
550 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
554 #define CODING_UTF_16_BOM(coding) \
555 ((coding)->spec.utf_16.bom)
557 #define CODING_UTF_16_ENDIAN(coding) \
558 ((coding)->spec.utf_16.endian)
560 #define CODING_UTF_16_SURROGATE(coding) \
561 ((coding)->spec.utf_16.surrogate)
565 #define CODING_CCL_DECODER(coding) \
566 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
567 #define CODING_CCL_ENCODER(coding) \
568 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
569 #define CODING_CCL_VALIDS(coding) \
570 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
572 /* Index for each coding category in `coding_categories' */
576 coding_category_iso_7
,
577 coding_category_iso_7_tight
,
578 coding_category_iso_8_1
,
579 coding_category_iso_8_2
,
580 coding_category_iso_7_else
,
581 coding_category_iso_8_else
,
582 coding_category_utf_8
,
583 coding_category_utf_16_auto
,
584 coding_category_utf_16_be
,
585 coding_category_utf_16_le
,
586 coding_category_utf_16_be_nosig
,
587 coding_category_utf_16_le_nosig
,
588 coding_category_charset
,
589 coding_category_sjis
,
590 coding_category_big5
,
592 coding_category_emacs_mule
,
593 /* All above are targets of code detection. */
594 coding_category_raw_text
,
595 coding_category_undecided
,
599 /* Definitions of flag bits used in detect_coding_XXXX. */
600 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
601 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
602 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
603 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
604 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
605 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
606 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
607 #define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
608 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
609 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
610 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
611 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
612 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
613 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
614 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
615 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
616 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
617 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
619 /* This value is returned if detect_coding_mask () find nothing other
620 than ASCII characters. */
621 #define CATEGORY_MASK_ANY \
622 (CATEGORY_MASK_ISO_7 \
623 | CATEGORY_MASK_ISO_7_TIGHT \
624 | CATEGORY_MASK_ISO_8_1 \
625 | CATEGORY_MASK_ISO_8_2 \
626 | CATEGORY_MASK_ISO_7_ELSE \
627 | CATEGORY_MASK_ISO_8_ELSE \
628 | CATEGORY_MASK_UTF_8 \
629 | CATEGORY_MASK_UTF_16_BE \
630 | CATEGORY_MASK_UTF_16_LE \
631 | CATEGORY_MASK_UTF_16_BE_NOSIG \
632 | CATEGORY_MASK_UTF_16_LE_NOSIG \
633 | CATEGORY_MASK_CHARSET \
634 | CATEGORY_MASK_SJIS \
635 | CATEGORY_MASK_BIG5 \
636 | CATEGORY_MASK_CCL \
637 | CATEGORY_MASK_EMACS_MULE)
640 #define CATEGORY_MASK_ISO_7BIT \
641 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
643 #define CATEGORY_MASK_ISO_8BIT \
644 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
646 #define CATEGORY_MASK_ISO_ELSE \
647 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
649 #define CATEGORY_MASK_ISO_ESCAPE \
650 (CATEGORY_MASK_ISO_7 \
651 | CATEGORY_MASK_ISO_7_TIGHT \
652 | CATEGORY_MASK_ISO_7_ELSE \
653 | CATEGORY_MASK_ISO_8_ELSE)
655 #define CATEGORY_MASK_ISO \
656 ( CATEGORY_MASK_ISO_7BIT \
657 | CATEGORY_MASK_ISO_8BIT \
658 | CATEGORY_MASK_ISO_ELSE)
660 #define CATEGORY_MASK_UTF_16 \
661 (CATEGORY_MASK_UTF_16_BE \
662 | CATEGORY_MASK_UTF_16_LE \
663 | CATEGORY_MASK_UTF_16_BE_NOSIG \
664 | CATEGORY_MASK_UTF_16_LE_NOSIG)
667 /* List of symbols `coding-category-xxx' ordered by priority. This
668 variable is exposed to Emacs Lisp. */
669 static Lisp_Object Vcoding_category_list
;
671 /* Table of coding categories (Lisp symbols). This variable is for
673 static Lisp_Object Vcoding_category_table
;
675 /* Table of coding-categories ordered by priority. */
676 static enum coding_category coding_priorities
[coding_category_max
];
678 /* Nth element is a coding context for the coding system bound to the
679 Nth coding category. */
680 static struct coding_system coding_categories
[coding_category_max
];
682 /*** Commonly used macros and functions ***/
685 #define min(a, b) ((a) < (b) ? (a) : (b))
688 #define max(a, b) ((a) > (b) ? (a) : (b))
691 #define CODING_GET_INFO(coding, attrs, charset_list) \
693 (attrs) = CODING_ID_ATTRS ((coding)->id); \
694 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
698 /* Safely get one byte from the source text pointed by SRC which ends
699 at SRC_END, and set C to that byte. If there are not enough bytes
700 in the source, it jumps to `no_more_source'. If multibytep is
701 nonzero, and a multibyte character is found at SRC, set C to the
702 negative value of the character code. The caller should declare
703 and set these variables appropriately in advance:
704 src, src_end, multibytep */
706 #define ONE_MORE_BYTE(c) \
708 if (src == src_end) \
710 if (src_base < src) \
711 record_conversion_result \
712 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
713 goto no_more_source; \
716 if (multibytep && (c & 0x80)) \
718 if ((c & 0xFE) == 0xC0) \
719 c = ((c & 1) << 6) | *src++; \
723 c = - string_char (src, &src, NULL); \
724 record_conversion_result \
725 (coding, CODING_RESULT_INVALID_SRC); \
732 #define ONE_MORE_BYTE_NO_CHECK(c) \
735 if (multibytep && (c & 0x80)) \
737 if ((c & 0xFE) == 0xC0) \
738 c = ((c & 1) << 6) | *src++; \
742 c = - string_char (src, &src, NULL); \
743 record_conversion_result \
744 (coding, CODING_RESULT_INVALID_SRC); \
751 /* Store a byte C in the place pointed by DST and increment DST to the
752 next free point, and increment PRODUCED_CHARS. The caller should
753 assure that C is 0..127, and declare and set the variable `dst'
754 appropriately in advance.
758 #define EMIT_ONE_ASCII_BYTE(c) \
765 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
767 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
769 produced_chars += 2; \
770 *dst++ = (c1), *dst++ = (c2); \
774 /* Store a byte C in the place pointed by DST and increment DST to the
775 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
776 nonzero, store in an appropriate multibyte from. The caller should
777 declare and set the variables `dst' and `multibytep' appropriately
780 #define EMIT_ONE_BYTE(c) \
787 ch = BYTE8_TO_CHAR (ch); \
788 CHAR_STRING_ADVANCE (ch, dst); \
795 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
797 #define EMIT_TWO_BYTES(c1, c2) \
799 produced_chars += 2; \
806 ch = BYTE8_TO_CHAR (ch); \
807 CHAR_STRING_ADVANCE (ch, dst); \
810 ch = BYTE8_TO_CHAR (ch); \
811 CHAR_STRING_ADVANCE (ch, dst); \
821 #define EMIT_THREE_BYTES(c1, c2, c3) \
823 EMIT_ONE_BYTE (c1); \
824 EMIT_TWO_BYTES (c2, c3); \
828 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
830 EMIT_TWO_BYTES (c1, c2); \
831 EMIT_TWO_BYTES (c3, c4); \
835 /* Prototypes for static functions. */
836 static void record_conversion_result
P_ ((struct coding_system
*coding
,
837 enum coding_result_code result
));
838 static int detect_coding_utf_8
P_ ((struct coding_system
*,
839 struct coding_detection_info
*info
));
840 static void decode_coding_utf_8
P_ ((struct coding_system
*));
841 static int encode_coding_utf_8
P_ ((struct coding_system
*));
843 static int detect_coding_utf_16
P_ ((struct coding_system
*,
844 struct coding_detection_info
*info
));
845 static void decode_coding_utf_16
P_ ((struct coding_system
*));
846 static int encode_coding_utf_16
P_ ((struct coding_system
*));
848 static int detect_coding_iso_2022
P_ ((struct coding_system
*,
849 struct coding_detection_info
*info
));
850 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
851 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
853 static int detect_coding_emacs_mule
P_ ((struct coding_system
*,
854 struct coding_detection_info
*info
));
855 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
856 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
858 static int detect_coding_sjis
P_ ((struct coding_system
*,
859 struct coding_detection_info
*info
));
860 static void decode_coding_sjis
P_ ((struct coding_system
*));
861 static int encode_coding_sjis
P_ ((struct coding_system
*));
863 static int detect_coding_big5
P_ ((struct coding_system
*,
864 struct coding_detection_info
*info
));
865 static void decode_coding_big5
P_ ((struct coding_system
*));
866 static int encode_coding_big5
P_ ((struct coding_system
*));
868 static int detect_coding_ccl
P_ ((struct coding_system
*,
869 struct coding_detection_info
*info
));
870 static void decode_coding_ccl
P_ ((struct coding_system
*));
871 static int encode_coding_ccl
P_ ((struct coding_system
*));
873 static void decode_coding_raw_text
P_ ((struct coding_system
*));
874 static int encode_coding_raw_text
P_ ((struct coding_system
*));
876 static void coding_set_source
P_ ((struct coding_system
*));
877 static void coding_set_destination
P_ ((struct coding_system
*));
878 static void coding_alloc_by_realloc
P_ ((struct coding_system
*, EMACS_INT
));
879 static void coding_alloc_by_making_gap
P_ ((struct coding_system
*,
881 static unsigned char *alloc_destination
P_ ((struct coding_system
*,
882 EMACS_INT
, unsigned char *));
883 static void setup_iso_safe_charsets
P_ ((Lisp_Object
));
884 static unsigned char *encode_designation_at_bol
P_ ((struct coding_system
*,
887 static int detect_eol
P_ ((const unsigned char *,
888 EMACS_INT
, enum coding_category
));
889 static Lisp_Object adjust_coding_eol_type
P_ ((struct coding_system
*, int));
890 static void decode_eol
P_ ((struct coding_system
*));
891 static Lisp_Object get_translation_table
P_ ((Lisp_Object
, int, int *));
892 static Lisp_Object get_translation
P_ ((Lisp_Object
, int *, int *,
894 static int produce_chars
P_ ((struct coding_system
*, Lisp_Object
, int));
895 static INLINE
void produce_composition
P_ ((struct coding_system
*, int *,
897 static INLINE
void produce_charset
P_ ((struct coding_system
*, int *,
899 static void produce_annotation
P_ ((struct coding_system
*, EMACS_INT
));
900 static int decode_coding
P_ ((struct coding_system
*));
901 static INLINE
int *handle_composition_annotation
P_ ((EMACS_INT
, EMACS_INT
,
902 struct coding_system
*,
903 int *, EMACS_INT
*));
904 static INLINE
int *handle_charset_annotation
P_ ((EMACS_INT
, EMACS_INT
,
905 struct coding_system
*,
906 int *, EMACS_INT
*));
907 static void consume_chars
P_ ((struct coding_system
*, Lisp_Object
, int));
908 static int encode_coding
P_ ((struct coding_system
*));
909 static Lisp_Object make_conversion_work_buffer
P_ ((int));
910 static Lisp_Object code_conversion_restore
P_ ((Lisp_Object
));
911 static INLINE
int char_encodable_p
P_ ((int, Lisp_Object
));
912 static Lisp_Object make_subsidiaries
P_ ((Lisp_Object
));
915 record_conversion_result (struct coding_system
*coding
,
916 enum coding_result_code result
)
918 coding
->result
= result
;
921 case CODING_RESULT_INSUFFICIENT_SRC
:
922 Vlast_code_conversion_error
= Qinsufficient_source
;
924 case CODING_RESULT_INCONSISTENT_EOL
:
925 Vlast_code_conversion_error
= Qinconsistent_eol
;
927 case CODING_RESULT_INVALID_SRC
:
928 Vlast_code_conversion_error
= Qinvalid_source
;
930 case CODING_RESULT_INTERRUPT
:
931 Vlast_code_conversion_error
= Qinterrupted
;
933 case CODING_RESULT_INSUFFICIENT_MEM
:
934 Vlast_code_conversion_error
= Qinsufficient_memory
;
937 Vlast_code_conversion_error
= intern ("Unknown error");
941 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
943 charset_map_loaded = 0; \
944 c = DECODE_CHAR (charset, code); \
945 if (charset_map_loaded) \
947 const unsigned char *orig = coding->source; \
950 coding_set_source (coding); \
951 offset = coding->source - orig; \
953 src_base += offset; \
959 #define ASSURE_DESTINATION(bytes) \
961 if (dst + (bytes) >= dst_end) \
963 int more_bytes = charbuf_end - charbuf + (bytes); \
965 dst = alloc_destination (coding, more_bytes, dst); \
966 dst_end = coding->destination + coding->dst_bytes; \
973 coding_set_source (coding
)
974 struct coding_system
*coding
;
976 if (BUFFERP (coding
->src_object
))
978 struct buffer
*buf
= XBUFFER (coding
->src_object
);
980 if (coding
->src_pos
< 0)
981 coding
->source
= BUF_GAP_END_ADDR (buf
) + coding
->src_pos_byte
;
983 coding
->source
= BUF_BYTE_ADDRESS (buf
, coding
->src_pos_byte
);
985 else if (STRINGP (coding
->src_object
))
987 coding
->source
= SDATA (coding
->src_object
) + coding
->src_pos_byte
;
990 /* Otherwise, the source is C string and is never relocated
991 automatically. Thus we don't have to update anything. */
996 coding_set_destination (coding
)
997 struct coding_system
*coding
;
999 if (BUFFERP (coding
->dst_object
))
1001 if (coding
->src_pos
< 0)
1003 coding
->destination
= BEG_ADDR
+ coding
->dst_pos_byte
- 1;
1004 coding
->dst_bytes
= (GAP_END_ADDR
1005 - (coding
->src_bytes
- coding
->consumed
)
1006 - coding
->destination
);
1010 /* We are sure that coding->dst_pos_byte is before the gap
1012 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
1013 + coding
->dst_pos_byte
- 1);
1014 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
1015 - coding
->destination
);
1019 /* Otherwise, the destination is C string and is never relocated
1020 automatically. Thus we don't have to update anything. */
1026 coding_alloc_by_realloc (coding
, bytes
)
1027 struct coding_system
*coding
;
1030 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
1031 coding
->dst_bytes
+ bytes
);
1032 coding
->dst_bytes
+= bytes
;
1036 coding_alloc_by_making_gap (coding
, bytes
)
1037 struct coding_system
*coding
;
1040 if (BUFFERP (coding
->dst_object
)
1041 && EQ (coding
->src_object
, coding
->dst_object
))
1043 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
1045 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
1047 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
1051 Lisp_Object this_buffer
;
1053 this_buffer
= Fcurrent_buffer ();
1054 set_buffer_internal (XBUFFER (coding
->dst_object
));
1056 set_buffer_internal (XBUFFER (this_buffer
));
1061 static unsigned char *
1062 alloc_destination (coding
, nbytes
, dst
)
1063 struct coding_system
*coding
;
1067 EMACS_INT offset
= dst
- coding
->destination
;
1069 if (BUFFERP (coding
->dst_object
))
1070 coding_alloc_by_making_gap (coding
, nbytes
);
1072 coding_alloc_by_realloc (coding
, nbytes
);
1073 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1074 coding_set_destination (coding
);
1075 dst
= coding
->destination
+ offset
;
1079 /** Macros for annotations. */
1081 /* Maximum length of annotation data (sum of annotations for
1082 composition and charset). */
1083 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1085 /* An annotation data is stored in the array coding->charbuf in this
1087 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1088 LENGTH is the number of elements in the annotation.
1089 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1090 NCHARS is the number of characters in the text annotated.
1092 The format of the following elements depend on ANNOTATION_MASK.
1094 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1096 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1097 METHOD is one of enum composition_method.
1098 Optionnal COMPOSITION-COMPONENTS are characters and composition
1101 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1104 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
1106 *(buf)++ = -(len); \
1107 *(buf)++ = (mask); \
1108 *(buf)++ = (nchars); \
1109 coding->annotated = 1; \
1112 #define ADD_COMPOSITION_DATA(buf, nchars, method) \
1114 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1119 #define ADD_CHARSET_DATA(buf, nchars, id) \
1121 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1126 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1133 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1134 Check if a text is encoded in UTF-8. If it is, return 1, else
1137 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1138 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1139 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1140 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1141 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1142 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1145 detect_coding_utf_8 (coding
, detect_info
)
1146 struct coding_system
*coding
;
1147 struct coding_detection_info
*detect_info
;
1149 const unsigned char *src
= coding
->source
, *src_base
;
1150 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1151 int multibytep
= coding
->src_multibyte
;
1152 int consumed_chars
= 0;
1155 detect_info
->checked
|= CATEGORY_MASK_UTF_8
;
1156 /* A coding system of this category is always ASCII compatible. */
1157 src
+= coding
->head_ascii
;
1161 int c
, c1
, c2
, c3
, c4
;
1165 if (c
< 0 || UTF_8_1_OCTET_P (c
))
1168 if (c1
< 0 || ! UTF_8_EXTRA_OCTET_P (c1
))
1170 if (UTF_8_2_OCTET_LEADING_P (c
))
1172 found
= CATEGORY_MASK_UTF_8
;
1176 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1178 if (UTF_8_3_OCTET_LEADING_P (c
))
1180 found
= CATEGORY_MASK_UTF_8
;
1184 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1186 if (UTF_8_4_OCTET_LEADING_P (c
))
1188 found
= CATEGORY_MASK_UTF_8
;
1192 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1194 if (UTF_8_5_OCTET_LEADING_P (c
))
1196 found
= CATEGORY_MASK_UTF_8
;
1201 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1205 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1207 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1210 detect_info
->found
|= found
;
1216 decode_coding_utf_8 (coding
)
1217 struct coding_system
*coding
;
1219 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1220 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1221 const unsigned char *src_base
;
1222 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
1223 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
1224 int consumed_chars
= 0, consumed_chars_base
;
1225 int multibytep
= coding
->src_multibyte
;
1226 Lisp_Object attr
, charset_list
;
1228 CODING_GET_INFO (coding
, attr
, charset_list
);
1232 int c
, c1
, c2
, c3
, c4
, c5
;
1235 consumed_chars_base
= consumed_chars
;
1237 if (charbuf
>= charbuf_end
)
1245 else if (UTF_8_1_OCTET_P(c1
))
1252 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1254 if (UTF_8_2_OCTET_LEADING_P (c1
))
1256 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1257 /* Reject overlong sequences here and below. Encoders
1258 producing them are incorrect, they can be misleading,
1259 and they mess up read/write invariance. */
1266 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1268 if (UTF_8_3_OCTET_LEADING_P (c1
))
1270 c
= (((c1
& 0xF) << 12)
1271 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1273 || (c
>= 0xd800 && c
< 0xe000)) /* surrogates (invalid) */
1279 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1281 if (UTF_8_4_OCTET_LEADING_P (c1
))
1283 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1284 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1291 if (c5
< 0 || ! UTF_8_EXTRA_OCTET_P (c5
))
1293 if (UTF_8_5_OCTET_LEADING_P (c1
))
1295 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1296 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1298 if ((c
> MAX_CHAR
) || (c
< 0x200000))
1313 consumed_chars
= consumed_chars_base
;
1315 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1320 coding
->consumed_char
+= consumed_chars_base
;
1321 coding
->consumed
= src_base
- coding
->source
;
1322 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1327 encode_coding_utf_8 (coding
)
1328 struct coding_system
*coding
;
1330 int multibytep
= coding
->dst_multibyte
;
1331 int *charbuf
= coding
->charbuf
;
1332 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1333 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1334 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1335 int produced_chars
= 0;
1340 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1342 while (charbuf
< charbuf_end
)
1344 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1346 ASSURE_DESTINATION (safe_room
);
1348 if (CHAR_BYTE8_P (c
))
1350 c
= CHAR_TO_BYTE8 (c
);
1355 CHAR_STRING_ADVANCE (c
, pend
);
1356 for (p
= str
; p
< pend
; p
++)
1363 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1365 while (charbuf
< charbuf_end
)
1367 ASSURE_DESTINATION (safe_room
);
1369 if (CHAR_BYTE8_P (c
))
1370 *dst
++ = CHAR_TO_BYTE8 (c
);
1372 dst
+= CHAR_STRING (c
, dst
);
1376 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1377 coding
->produced_char
+= produced_chars
;
1378 coding
->produced
= dst
- coding
->destination
;
1383 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1384 Check if a text is encoded in one of UTF-16 based coding systems.
1385 If it is, return 1, else return 0. */
1387 #define UTF_16_HIGH_SURROGATE_P(val) \
1388 (((val) & 0xFC00) == 0xD800)
1390 #define UTF_16_LOW_SURROGATE_P(val) \
1391 (((val) & 0xFC00) == 0xDC00)
1393 #define UTF_16_INVALID_P(val) \
1394 (((val) == 0xFFFE) \
1395 || ((val) == 0xFFFF) \
1396 || UTF_16_LOW_SURROGATE_P (val))
1400 detect_coding_utf_16 (coding
, detect_info
)
1401 struct coding_system
*coding
;
1402 struct coding_detection_info
*detect_info
;
1404 const unsigned char *src
= coding
->source
, *src_base
= src
;
1405 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1406 int multibytep
= coding
->src_multibyte
;
1407 int consumed_chars
= 0;
1410 detect_info
->checked
|= CATEGORY_MASK_UTF_16
;
1411 if (coding
->mode
& CODING_MODE_LAST_BLOCK
1412 && (coding
->src_chars
& 1))
1414 detect_info
->rejected
|= CATEGORY_MASK_UTF_16
;
1420 if ((c1
== 0xFF) && (c2
== 0xFE))
1422 detect_info
->found
|= (CATEGORY_MASK_UTF_16_LE
1423 | CATEGORY_MASK_UTF_16_AUTO
);
1424 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_BE
1425 | CATEGORY_MASK_UTF_16_BE_NOSIG
1426 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1428 else if ((c1
== 0xFE) && (c2
== 0xFF))
1430 detect_info
->found
|= (CATEGORY_MASK_UTF_16_BE
1431 | CATEGORY_MASK_UTF_16_AUTO
);
1432 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_LE
1433 | CATEGORY_MASK_UTF_16_BE_NOSIG
1434 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1436 else if (c1
>= 0 && c2
>= 0)
1438 detect_info
->rejected
1439 |= (CATEGORY_MASK_UTF_16_BE
| CATEGORY_MASK_UTF_16_LE
);
1446 decode_coding_utf_16 (coding
)
1447 struct coding_system
*coding
;
1449 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1450 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1451 const unsigned char *src_base
;
1452 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
1453 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
1454 int consumed_chars
= 0, consumed_chars_base
;
1455 int multibytep
= coding
->src_multibyte
;
1456 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1457 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1458 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1459 Lisp_Object attr
, charset_list
;
1461 CODING_GET_INFO (coding
, attr
, charset_list
);
1463 if (bom
== utf_16_with_bom
)
1472 if (endian
== utf_16_big_endian
1473 ? c
!= 0xFEFF : c
!= 0xFFFE)
1475 /* The first two bytes are not BOM. Treat them as bytes
1476 for a normal character. */
1480 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1482 else if (bom
== utf_16_detect_bom
)
1484 /* We have already tried to detect BOM and failed in
1486 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1494 consumed_chars_base
= consumed_chars
;
1496 if (charbuf
+ 2 >= charbuf_end
)
1508 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
1512 c
= (endian
== utf_16_big_endian
1513 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1516 if (! UTF_16_LOW_SURROGATE_P (c
))
1518 if (endian
== utf_16_big_endian
)
1519 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1521 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1525 if (UTF_16_HIGH_SURROGATE_P (c
))
1526 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1532 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1533 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1534 *charbuf
++ = 0x10000 + c
;
1539 if (UTF_16_HIGH_SURROGATE_P (c
))
1540 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1547 coding
->consumed_char
+= consumed_chars_base
;
1548 coding
->consumed
= src_base
- coding
->source
;
1549 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1553 encode_coding_utf_16 (coding
)
1554 struct coding_system
*coding
;
1556 int multibytep
= coding
->dst_multibyte
;
1557 int *charbuf
= coding
->charbuf
;
1558 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1559 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1560 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1562 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1563 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1564 int produced_chars
= 0;
1565 Lisp_Object attrs
, charset_list
;
1568 CODING_GET_INFO (coding
, attrs
, charset_list
);
1570 if (bom
!= utf_16_without_bom
)
1572 ASSURE_DESTINATION (safe_room
);
1574 EMIT_TWO_BYTES (0xFE, 0xFF);
1576 EMIT_TWO_BYTES (0xFF, 0xFE);
1577 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1580 while (charbuf
< charbuf_end
)
1582 ASSURE_DESTINATION (safe_room
);
1584 if (c
>= MAX_UNICODE_CHAR
)
1585 c
= coding
->default_char
;
1590 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1592 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1599 c1
= (c
>> 10) + 0xD800;
1600 c2
= (c
& 0x3FF) + 0xDC00;
1602 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1604 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1607 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1608 coding
->produced
= dst
- coding
->destination
;
1609 coding
->produced_char
+= produced_chars
;
1614 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1616 /* Emacs' internal format for representation of multiple character
1617 sets is a kind of multi-byte encoding, i.e. characters are
1618 represented by variable-length sequences of one-byte codes.
1620 ASCII characters and control characters (e.g. `tab', `newline') are
1621 represented by one-byte sequences which are their ASCII codes, in
1622 the range 0x00 through 0x7F.
1624 8-bit characters of the range 0x80..0x9F are represented by
1625 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1628 8-bit characters of the range 0xA0..0xFF are represented by
1629 one-byte sequences which are their 8-bit code.
1631 The other characters are represented by a sequence of `base
1632 leading-code', optional `extended leading-code', and one or two
1633 `position-code's. The length of the sequence is determined by the
1634 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1635 whereas extended leading-code and position-code take the range 0xA0
1636 through 0xFF. See `charset.h' for more details about leading-code
1639 --- CODE RANGE of Emacs' internal format ---
1643 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1644 eight-bit-graphic 0xA0..0xBF
1645 ELSE 0x81..0x9D + [0xA0..0xFF]+
1646 ---------------------------------------------
1648 As this is the internal character representation, the format is
1649 usually not used externally (i.e. in a file or in a data sent to a
1650 process). But, it is possible to have a text externally in this
1651 format (i.e. by encoding by the coding system `emacs-mule').
1653 In that case, a sequence of one-byte codes has a slightly different
1656 At first, all characters in eight-bit-control are represented by
1657 one-byte sequences which are their 8-bit code.
1659 Next, character composition data are represented by the byte
1660 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1662 METHOD is 0xF0 plus one of composition method (enum
1663 composition_method),
1665 BYTES is 0xA0 plus a byte length of this composition data,
1667 CHARS is 0x20 plus a number of characters composed by this
1670 COMPONENTs are characters of multibye form or composition
1671 rules encoded by two-byte of ASCII codes.
1673 In addition, for backward compatibility, the following formats are
1674 also recognized as composition data on decoding.
1677 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1680 MSEQ is a multibyte form but in these special format:
1681 ASCII: 0xA0 ASCII_CODE+0x80,
1682 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1683 RULE is a one byte code of the range 0xA0..0xF0 that
1684 represents a composition rule.
1687 char emacs_mule_bytes
[256];
1690 emacs_mule_char (coding
, src
, nbytes
, nchars
, id
)
1691 struct coding_system
*coding
;
1692 const unsigned char *src
;
1693 int *nbytes
, *nchars
, *id
;
1695 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1696 const unsigned char *src_base
= src
;
1697 int multibytep
= coding
->src_multibyte
;
1698 struct charset
*charset
;
1701 int consumed_chars
= 0;
1707 charset
= emacs_mule_charset
[0];
1713 /* Old style component character of a compostion. */
1723 switch (emacs_mule_bytes
[c
])
1726 if (! (charset
= emacs_mule_charset
[c
]))
1735 if (c
== EMACS_MULE_LEADING_CODE_PRIVATE_11
1736 || c
== EMACS_MULE_LEADING_CODE_PRIVATE_12
)
1739 if (c
< 0xA0 || ! (charset
= emacs_mule_charset
[c
]))
1748 if (! (charset
= emacs_mule_charset
[c
]))
1753 code
= (c
& 0x7F) << 8;
1763 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1768 code
= (c
& 0x7F) << 8;
1777 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
)
1778 ? charset_ascii
: charset_eight_bit
);
1784 c
= DECODE_CHAR (charset
, code
);
1788 *nbytes
= src
- src_base
;
1789 *nchars
= consumed_chars
;
1802 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1803 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1807 detect_coding_emacs_mule (coding
, detect_info
)
1808 struct coding_system
*coding
;
1809 struct coding_detection_info
*detect_info
;
1811 const unsigned char *src
= coding
->source
, *src_base
;
1812 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1813 int multibytep
= coding
->src_multibyte
;
1814 int consumed_chars
= 0;
1818 detect_info
->checked
|= CATEGORY_MASK_EMACS_MULE
;
1819 /* A coding system of this category is always ASCII compatible. */
1820 src
+= coding
->head_ascii
;
1830 /* Perhaps the start of composite character. We simple skip
1831 it because analyzing it is too heavy for detecting. But,
1832 at least, we check that the composite character
1833 constitues of more than 4 bytes. */
1834 const unsigned char *src_base
;
1844 if (src
- src_base
<= 4)
1846 found
= CATEGORY_MASK_EMACS_MULE
;
1854 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1859 int more_bytes
= emacs_mule_bytes
[*src_base
] - 1;
1861 while (more_bytes
> 0)
1866 src
--; /* Unread the last byte. */
1871 if (more_bytes
!= 0)
1873 found
= CATEGORY_MASK_EMACS_MULE
;
1876 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1880 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1882 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1885 detect_info
->found
|= found
;
1890 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1892 /* Decode a character represented as a component of composition
1893 sequence of Emacs 20/21 style at SRC. Set C to that character and
1894 update SRC to the head of next character (or an encoded composition
1895 rule). If SRC doesn't points a composition component, set C to -1.
1896 If SRC points an invalid byte sequence, global exit by a return
1899 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1903 int nbytes, nchars; \
1905 if (src == src_end) \
1907 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1912 goto invalid_code; \
1916 consumed_chars += nchars; \
1921 /* Decode a composition rule represented as a component of composition
1922 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1923 and increment BUF. If SRC points an invalid byte sequence, set C
1926 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1928 int c, gref, nref; \
1930 if (src >= src_end) \
1931 goto invalid_code; \
1932 ONE_MORE_BYTE_NO_CHECK (c); \
1934 if (c < 0 || c >= 81) \
1935 goto invalid_code; \
1937 gref = c / 9, nref = c % 9; \
1938 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1942 /* Decode a composition rule represented as a component of composition
1943 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1944 and increment BUF. If SRC points an invalid byte sequence, set C
1947 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1951 if (src + 1>= src_end) \
1952 goto invalid_code; \
1953 ONE_MORE_BYTE_NO_CHECK (gref); \
1955 ONE_MORE_BYTE_NO_CHECK (nref); \
1957 if (gref < 0 || gref >= 81 \
1958 || nref < 0 || nref >= 81) \
1959 goto invalid_code; \
1960 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1964 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1966 /* Emacs 21 style format. The first three bytes at SRC are \
1967 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1968 the byte length of this composition information, CHARS is the \
1969 number of characters composed by this composition. */ \
1970 enum composition_method method = c - 0xF2; \
1971 int *charbuf_base = charbuf; \
1972 int consumed_chars_limit; \
1973 int nbytes, nchars; \
1975 ONE_MORE_BYTE (c); \
1977 goto invalid_code; \
1978 nbytes = c - 0xA0; \
1980 goto invalid_code; \
1981 ONE_MORE_BYTE (c); \
1983 goto invalid_code; \
1984 nchars = c - 0xA0; \
1985 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
1986 consumed_chars_limit = consumed_chars_base + nbytes; \
1987 if (method != COMPOSITION_RELATIVE) \
1990 while (consumed_chars < consumed_chars_limit) \
1992 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1993 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1995 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1998 if (consumed_chars < consumed_chars_limit) \
1999 goto invalid_code; \
2000 charbuf_base[0] -= i; \
2005 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
2007 /* Emacs 20 style format for relative composition. */ \
2008 /* Store multibyte form of characters to be composed. */ \
2009 enum composition_method method = COMPOSITION_RELATIVE; \
2010 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2011 int *buf = components; \
2015 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2016 for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
2017 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2019 goto invalid_code; \
2020 ADD_COMPOSITION_DATA (charbuf, i, method); \
2021 for (j = 0; j < i; j++) \
2022 *charbuf++ = components[j]; \
2026 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
2028 /* Emacs 20 style format for rule-base composition. */ \
2029 /* Store multibyte form of characters to be composed. */ \
2030 enum composition_method method = COMPOSITION_WITH_RULE; \
2031 int *charbuf_base = charbuf; \
2032 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2033 int *buf = components; \
2036 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2037 for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \
2041 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
2042 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2044 if (i <= 1 || (buf - components) % 2 == 0) \
2045 goto invalid_code; \
2046 if (charbuf + i + (i / 2) + 1 >= charbuf_end) \
2047 goto no_more_source; \
2048 ADD_COMPOSITION_DATA (charbuf, i, method); \
2050 for (j = 0; j < i; j++) \
2051 *charbuf++ = components[j]; \
2052 charbuf_base[0] -= i; \
2053 for (j = 0; j < i; j += 2) \
2054 *charbuf++ = components[j]; \
2059 decode_coding_emacs_mule (coding
)
2060 struct coding_system
*coding
;
2062 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2063 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2064 const unsigned char *src_base
;
2065 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
2067 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
2068 int consumed_chars
= 0, consumed_chars_base
;
2069 int multibytep
= coding
->src_multibyte
;
2070 Lisp_Object attrs
, charset_list
;
2071 int char_offset
= coding
->produced_char
;
2072 int last_offset
= char_offset
;
2073 int last_id
= charset_ascii
;
2075 CODING_GET_INFO (coding
, attrs
, charset_list
);
2082 consumed_chars_base
= consumed_chars
;
2084 if (charbuf
>= charbuf_end
)
2103 if (c
- 0xF2 >= COMPOSITION_RELATIVE
2104 && c
- 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS
)
2105 DECODE_EMACS_MULE_21_COMPOSITION (c
);
2107 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
2109 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
2113 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
2119 consumed_chars
= consumed_chars_base
;
2120 c
= emacs_mule_char (coding
, src
, &nbytes
, &nchars
, &id
);
2129 if (last_id
!= charset_ascii
)
2130 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
2132 last_offset
= char_offset
;
2136 consumed_chars
+= nchars
;
2145 consumed_chars
= consumed_chars_base
;
2147 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
2153 if (last_id
!= charset_ascii
)
2154 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
2155 coding
->consumed_char
+= consumed_chars_base
;
2156 coding
->consumed
= src_base
- coding
->source
;
2157 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
2161 #define EMACS_MULE_LEADING_CODES(id, codes) \
2164 codes[0] = id, codes[1] = 0; \
2165 else if (id < 0xE0) \
2166 codes[0] = 0x9A, codes[1] = id; \
2167 else if (id < 0xF0) \
2168 codes[0] = 0x9B, codes[1] = id; \
2169 else if (id < 0xF5) \
2170 codes[0] = 0x9C, codes[1] = id; \
2172 codes[0] = 0x9D, codes[1] = id; \
2177 encode_coding_emacs_mule (coding
)
2178 struct coding_system
*coding
;
2180 int multibytep
= coding
->dst_multibyte
;
2181 int *charbuf
= coding
->charbuf
;
2182 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
2183 unsigned char *dst
= coding
->destination
+ coding
->produced
;
2184 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
2186 int produced_chars
= 0;
2187 Lisp_Object attrs
, charset_list
;
2189 int preferred_charset_id
= -1;
2191 CODING_GET_INFO (coding
, attrs
, charset_list
);
2192 if (! EQ (charset_list
, Vemacs_mule_charset_list
))
2194 CODING_ATTR_CHARSET_LIST (attrs
)
2195 = charset_list
= Vemacs_mule_charset_list
;
2198 while (charbuf
< charbuf_end
)
2200 ASSURE_DESTINATION (safe_room
);
2205 /* Handle an annotation. */
2208 case CODING_ANNOTATE_COMPOSITION_MASK
:
2209 /* Not yet implemented. */
2211 case CODING_ANNOTATE_CHARSET_MASK
:
2212 preferred_charset_id
= charbuf
[3];
2213 if (preferred_charset_id
>= 0
2214 && NILP (Fmemq (make_number (preferred_charset_id
),
2216 preferred_charset_id
= -1;
2225 if (ASCII_CHAR_P (c
))
2226 EMIT_ONE_ASCII_BYTE (c
);
2227 else if (CHAR_BYTE8_P (c
))
2229 c
= CHAR_TO_BYTE8 (c
);
2234 struct charset
*charset
;
2238 unsigned char leading_codes
[2];
2240 if (preferred_charset_id
>= 0)
2242 charset
= CHARSET_FROM_ID (preferred_charset_id
);
2243 if (! CHAR_CHARSET_P (c
, charset
))
2244 charset
= char_charset (c
, charset_list
, NULL
);
2247 charset
= char_charset (c
, charset_list
, &code
);
2250 c
= coding
->default_char
;
2251 if (ASCII_CHAR_P (c
))
2253 EMIT_ONE_ASCII_BYTE (c
);
2256 charset
= char_charset (c
, charset_list
, &code
);
2258 dimension
= CHARSET_DIMENSION (charset
);
2259 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
2260 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
2261 EMIT_ONE_BYTE (leading_codes
[0]);
2262 if (leading_codes
[1])
2263 EMIT_ONE_BYTE (leading_codes
[1]);
2265 EMIT_ONE_BYTE (code
| 0x80);
2269 EMIT_ONE_BYTE (code
>> 8);
2270 EMIT_ONE_BYTE (code
& 0xFF);
2274 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
2275 coding
->produced_char
+= produced_chars
;
2276 coding
->produced
= dst
- coding
->destination
;
2281 /*** 7. ISO2022 handlers ***/
2283 /* The following note describes the coding system ISO2022 briefly.
2284 Since the intention of this note is to help understand the
2285 functions in this file, some parts are NOT ACCURATE or are OVERLY
2286 SIMPLIFIED. For thorough understanding, please refer to the
2287 original document of ISO2022. This is equivalent to the standard
2288 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2290 ISO2022 provides many mechanisms to encode several character sets
2291 in 7-bit and 8-bit environments. For 7-bit environments, all text
2292 is encoded using bytes less than 128. This may make the encoded
2293 text a little bit longer, but the text passes more easily through
2294 several types of gateway, some of which strip off the MSB (Most
2297 There are two kinds of character sets: control character sets and
2298 graphic character sets. The former contain control characters such
2299 as `newline' and `escape' to provide control functions (control
2300 functions are also provided by escape sequences). The latter
2301 contain graphic characters such as 'A' and '-'. Emacs recognizes
2302 two control character sets and many graphic character sets.
2304 Graphic character sets are classified into one of the following
2305 four classes, according to the number of bytes (DIMENSION) and
2306 number of characters in one dimension (CHARS) of the set:
2307 - DIMENSION1_CHARS94
2308 - DIMENSION1_CHARS96
2309 - DIMENSION2_CHARS94
2310 - DIMENSION2_CHARS96
2312 In addition, each character set is assigned an identification tag,
2313 unique for each set, called the "final character" (denoted as <F>
2314 hereafter). The <F> of each character set is decided by ECMA(*)
2315 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2316 (0x30..0x3F are for private use only).
2318 Note (*): ECMA = European Computer Manufacturers Association
2320 Here are examples of graphic character sets [NAME(<F>)]:
2321 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2322 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2323 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2324 o DIMENSION2_CHARS96 -- none for the moment
2326 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2327 C0 [0x00..0x1F] -- control character plane 0
2328 GL [0x20..0x7F] -- graphic character plane 0
2329 C1 [0x80..0x9F] -- control character plane 1
2330 GR [0xA0..0xFF] -- graphic character plane 1
2332 A control character set is directly designated and invoked to C0 or
2333 C1 by an escape sequence. The most common case is that:
2334 - ISO646's control character set is designated/invoked to C0, and
2335 - ISO6429's control character set is designated/invoked to C1,
2336 and usually these designations/invocations are omitted in encoded
2337 text. In a 7-bit environment, only C0 can be used, and a control
2338 character for C1 is encoded by an appropriate escape sequence to
2339 fit into the environment. All control characters for C1 are
2340 defined to have corresponding escape sequences.
2342 A graphic character set is at first designated to one of four
2343 graphic registers (G0 through G3), then these graphic registers are
2344 invoked to GL or GR. These designations and invocations can be
2345 done independently. The most common case is that G0 is invoked to
2346 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2347 these invocations and designations are omitted in encoded text.
2348 In a 7-bit environment, only GL can be used.
2350 When a graphic character set of CHARS94 is invoked to GL, codes
2351 0x20 and 0x7F of the GL area work as control characters SPACE and
2352 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2355 There are two ways of invocation: locking-shift and single-shift.
2356 With locking-shift, the invocation lasts until the next different
2357 invocation, whereas with single-shift, the invocation affects the
2358 following character only and doesn't affect the locking-shift
2359 state. Invocations are done by the following control characters or
2362 ----------------------------------------------------------------------
2363 abbrev function cntrl escape seq description
2364 ----------------------------------------------------------------------
2365 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2366 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2367 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2368 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2369 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2370 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2371 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2372 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2373 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2374 ----------------------------------------------------------------------
2375 (*) These are not used by any known coding system.
2377 Control characters for these functions are defined by macros
2378 ISO_CODE_XXX in `coding.h'.
2380 Designations are done by the following escape sequences:
2381 ----------------------------------------------------------------------
2382 escape sequence description
2383 ----------------------------------------------------------------------
2384 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2385 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2386 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2387 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2388 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2389 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2390 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2391 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2392 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2393 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2394 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2395 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2396 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2397 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2398 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2399 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2400 ----------------------------------------------------------------------
2402 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2403 of dimension 1, chars 94, and final character <F>, etc...
2405 Note (*): Although these designations are not allowed in ISO2022,
2406 Emacs accepts them on decoding, and produces them on encoding
2407 CHARS96 character sets in a coding system which is characterized as
2408 7-bit environment, non-locking-shift, and non-single-shift.
2410 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2411 '(' must be omitted. We refer to this as "short-form" hereafter.
2413 Now you may notice that there are a lot of ways of encoding the
2414 same multilingual text in ISO2022. Actually, there exist many
2415 coding systems such as Compound Text (used in X11's inter client
2416 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2417 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2418 localized platforms), and all of these are variants of ISO2022.
2420 In addition to the above, Emacs handles two more kinds of escape
2421 sequences: ISO6429's direction specification and Emacs' private
2422 sequence for specifying character composition.
2424 ISO6429's direction specification takes the following form:
2425 o CSI ']' -- end of the current direction
2426 o CSI '0' ']' -- end of the current direction
2427 o CSI '1' ']' -- start of left-to-right text
2428 o CSI '2' ']' -- start of right-to-left text
2429 The control character CSI (0x9B: control sequence introducer) is
2430 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2432 Character composition specification takes the following form:
2433 o ESC '0' -- start relative composition
2434 o ESC '1' -- end composition
2435 o ESC '2' -- start rule-base composition (*)
2436 o ESC '3' -- start relative composition with alternate chars (**)
2437 o ESC '4' -- start rule-base composition with alternate chars (**)
2438 Since these are not standard escape sequences of any ISO standard,
2439 the use of them with these meanings is restricted to Emacs only.
2441 (*) This form is used only in Emacs 20.7 and older versions,
2442 but newer versions can safely decode it.
2443 (**) This form is used only in Emacs 21.1 and newer versions,
2444 and older versions can't decode it.
2446 Here's a list of example usages of these composition escape
2447 sequences (categorized by `enum composition_method').
2449 COMPOSITION_RELATIVE:
2450 ESC 0 CHAR [ CHAR ] ESC 1
2451 COMPOSITION_WITH_RULE:
2452 ESC 2 CHAR [ RULE CHAR ] ESC 1
2453 COMPOSITION_WITH_ALTCHARS:
2454 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2455 COMPOSITION_WITH_RULE_ALTCHARS:
2456 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2458 enum iso_code_class_type iso_code_class
[256];
2460 #define SAFE_CHARSET_P(coding, id) \
2461 ((id) <= (coding)->max_charset_id \
2462 && (coding)->safe_charsets[id] >= 0)
2465 #define SHIFT_OUT_OK(category) \
2466 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2469 setup_iso_safe_charsets (attrs
)
2472 Lisp_Object charset_list
, safe_charsets
;
2473 Lisp_Object request
;
2474 Lisp_Object reg_usage
;
2477 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2480 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2481 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2482 && ! EQ (charset_list
, Viso_2022_charset_list
))
2484 CODING_ATTR_CHARSET_LIST (attrs
)
2485 = charset_list
= Viso_2022_charset_list
;
2486 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2489 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2493 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2495 int id
= XINT (XCAR (tail
));
2496 if (max_charset_id
< id
)
2497 max_charset_id
= id
;
2500 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2502 request
= AREF (attrs
, coding_attr_iso_request
);
2503 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2504 reg94
= XINT (XCAR (reg_usage
));
2505 reg96
= XINT (XCDR (reg_usage
));
2507 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2511 struct charset
*charset
;
2514 charset
= CHARSET_FROM_ID (XINT (id
));
2515 reg
= Fcdr (Fassq (id
, request
));
2517 SSET (safe_charsets
, XINT (id
), XINT (reg
));
2518 else if (charset
->iso_chars_96
)
2521 SSET (safe_charsets
, XINT (id
), reg96
);
2526 SSET (safe_charsets
, XINT (id
), reg94
);
2529 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2533 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2534 Check if a text is encoded in one of ISO-2022 based codig systems.
2535 If it is, return 1, else return 0. */
2538 detect_coding_iso_2022 (coding
, detect_info
)
2539 struct coding_system
*coding
;
2540 struct coding_detection_info
*detect_info
;
2542 const unsigned char *src
= coding
->source
, *src_base
= src
;
2543 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2544 int multibytep
= coding
->src_multibyte
;
2545 int single_shifting
= 0;
2548 int consumed_chars
= 0;
2553 detect_info
->checked
|= CATEGORY_MASK_ISO
;
2555 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2557 struct coding_system
*this = &(coding_categories
[i
]);
2558 Lisp_Object attrs
, val
;
2560 attrs
= CODING_ID_ATTRS (this->id
);
2561 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2562 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2563 setup_iso_safe_charsets (attrs
);
2564 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2565 this->max_charset_id
= SCHARS (val
) - 1;
2566 this->safe_charsets
= (char *) SDATA (val
);
2569 /* A coding system of this category is always ASCII compatible. */
2570 src
+= coding
->head_ascii
;
2572 while (rejected
!= CATEGORY_MASK_ISO
)
2579 if (inhibit_iso_escape_detection
)
2581 single_shifting
= 0;
2583 if (c
>= '(' && c
<= '/')
2585 /* Designation sequence for a charset of dimension 1. */
2587 if (c1
< ' ' || c1
>= 0x80
2588 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2589 /* Invalid designation sequence. Just ignore. */
2594 /* Designation sequence for a charset of dimension 2. */
2596 if (c
>= '@' && c
<= 'B')
2597 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2598 id
= iso_charset_table
[1][0][c
];
2599 else if (c
>= '(' && c
<= '/')
2602 if (c1
< ' ' || c1
>= 0x80
2603 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2604 /* Invalid designation sequence. Just ignore. */
2608 /* Invalid designation sequence. Just ignore it. */
2611 else if (c
== 'N' || c
== 'O')
2613 /* ESC <Fe> for SS2 or SS3. */
2614 single_shifting
= 1;
2615 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2618 else if (c
>= '0' && c
<= '4')
2620 /* ESC <Fp> for start/end composition. */
2621 found
|= CATEGORY_MASK_ISO
;
2626 /* Invalid escape sequence. Just ignore it. */
2630 /* We found a valid designation sequence for CHARSET. */
2631 rejected
|= CATEGORY_MASK_ISO_8BIT
;
2632 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2634 found
|= CATEGORY_MASK_ISO_7
;
2636 rejected
|= CATEGORY_MASK_ISO_7
;
2637 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2639 found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2641 rejected
|= CATEGORY_MASK_ISO_7_TIGHT
;
2642 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2644 found
|= CATEGORY_MASK_ISO_7_ELSE
;
2646 rejected
|= CATEGORY_MASK_ISO_7_ELSE
;
2647 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2649 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2651 rejected
|= CATEGORY_MASK_ISO_8_ELSE
;
2656 /* Locking shift out/in. */
2657 if (inhibit_iso_escape_detection
)
2659 single_shifting
= 0;
2660 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2661 found
|= CATEGORY_MASK_ISO_ELSE
;
2665 /* Control sequence introducer. */
2666 single_shifting
= 0;
2667 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2668 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2669 goto check_extra_latin
;
2674 if (inhibit_iso_escape_detection
)
2676 single_shifting
= 0;
2677 rejected
|= CATEGORY_MASK_ISO_7BIT
;
2678 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2679 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2680 found
|= CATEGORY_MASK_ISO_8_1
, single_shifting
= 1;
2681 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2682 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2683 found
|= CATEGORY_MASK_ISO_8_2
, single_shifting
= 1;
2684 if (single_shifting
)
2686 goto check_extra_latin
;
2693 single_shifting
= 0;
2698 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2699 found
|= CATEGORY_MASK_ISO_8_1
;
2700 /* Check the length of succeeding codes of the range
2701 0xA0..0FF. If the byte length is even, we include
2702 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2703 only when we are not single shifting. */
2704 if (! single_shifting
2705 && ! (rejected
& CATEGORY_MASK_ISO_8_2
))
2708 while (src
< src_end
)
2716 if (i
& 1 && src
< src_end
)
2717 rejected
|= CATEGORY_MASK_ISO_8_2
;
2719 found
|= CATEGORY_MASK_ISO_8_2
;
2724 single_shifting
= 0;
2725 if (! VECTORP (Vlatin_extra_code_table
)
2726 || NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2728 rejected
= CATEGORY_MASK_ISO
;
2731 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2732 & CODING_ISO_FLAG_LATIN_EXTRA
)
2733 found
|= CATEGORY_MASK_ISO_8_1
;
2735 rejected
|= CATEGORY_MASK_ISO_8_1
;
2736 rejected
|= CATEGORY_MASK_ISO_8_2
;
2739 detect_info
->rejected
|= CATEGORY_MASK_ISO
;
2743 detect_info
->rejected
|= rejected
;
2744 detect_info
->found
|= (found
& ~rejected
);
2749 /* Set designation state into CODING. Set CHARS_96 to -1 if the
2750 escape sequence should be kept. */
2751 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2755 if (final < '0' || final >= 128 \
2756 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2757 || !SAFE_CHARSET_P (coding, id)) \
2759 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2763 prev = CODING_ISO_DESIGNATION (coding, reg); \
2764 if (id == charset_jisx0201_roman) \
2766 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2767 id = charset_ascii; \
2769 else if (id == charset_jisx0208_1978) \
2771 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2772 id = charset_jisx0208; \
2774 CODING_ISO_DESIGNATION (coding, reg) = id; \
2775 /* If there was an invalid designation to REG previously, and this \
2776 designation is ASCII to REG, we should keep this designation \
2778 if (prev == -2 && id == charset_ascii) \
2783 #define MAYBE_FINISH_COMPOSITION() \
2786 if (composition_state == COMPOSING_NO) \
2788 /* It is assured that we have enough room for producing \
2789 characters stored in the table `components'. */ \
2790 if (charbuf + component_idx > charbuf_end) \
2791 goto no_more_source; \
2792 composition_state = COMPOSING_NO; \
2793 if (method == COMPOSITION_RELATIVE \
2794 || method == COMPOSITION_WITH_ALTCHARS) \
2796 for (i = 0; i < component_idx; i++) \
2797 *charbuf++ = components[i]; \
2798 char_offset += component_idx; \
2802 for (i = 0; i < component_idx; i += 2) \
2803 *charbuf++ = components[i]; \
2804 char_offset += (component_idx / 2) + 1; \
2809 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2810 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2811 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2812 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2813 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2816 #define DECODE_COMPOSITION_START(c1) \
2819 && composition_state == COMPOSING_COMPONENT_RULE) \
2821 component_len = component_idx; \
2822 composition_state = COMPOSING_CHAR; \
2826 const unsigned char *p; \
2828 MAYBE_FINISH_COMPOSITION (); \
2829 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2830 goto no_more_source; \
2831 for (p = src; p < src_end - 1; p++) \
2832 if (*p == ISO_CODE_ESC && p[1] == '1') \
2834 if (p == src_end - 1) \
2836 /* The current composition doesn't end in the current \
2838 record_conversion_result \
2839 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
2840 goto no_more_source; \
2843 /* This is surely the start of a composition. */ \
2844 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2845 : c1 == '2' ? COMPOSITION_WITH_RULE \
2846 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2847 : COMPOSITION_WITH_RULE_ALTCHARS); \
2848 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2849 : COMPOSING_COMPONENT_CHAR); \
2850 component_idx = component_len = 0; \
2855 /* Handle compositoin end sequence ESC 1. */
2857 #define DECODE_COMPOSITION_END() \
2859 int nchars = (component_len > 0 ? component_idx - component_len \
2860 : method == COMPOSITION_RELATIVE ? component_idx \
2861 : (component_idx + 1) / 2); \
2863 int *saved_charbuf = charbuf; \
2865 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
2866 if (method != COMPOSITION_RELATIVE) \
2868 if (component_len == 0) \
2869 for (i = 0; i < component_idx; i++) \
2870 *charbuf++ = components[i]; \
2872 for (i = 0; i < component_len; i++) \
2873 *charbuf++ = components[i]; \
2874 *saved_charbuf = saved_charbuf - charbuf; \
2876 if (method == COMPOSITION_WITH_RULE) \
2877 for (i = 0; i < component_idx; i += 2, char_offset++) \
2878 *charbuf++ = components[i]; \
2880 for (i = component_len; i < component_idx; i++, char_offset++) \
2881 *charbuf++ = components[i]; \
2882 coding->annotated = 1; \
2883 composition_state = COMPOSING_NO; \
2887 /* Decode a composition rule from the byte C1 (and maybe one more byte
2888 from SRC) and store one encoded composition rule in
2889 coding->cmp_data. */
2891 #define DECODE_COMPOSITION_RULE(c1) \
2894 if (c1 < 81) /* old format (before ver.21) */ \
2896 int gref = (c1) / 9; \
2897 int nref = (c1) % 9; \
2898 if (gref == 4) gref = 10; \
2899 if (nref == 4) nref = 10; \
2900 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2902 else if (c1 < 93) /* new format (after ver.21) */ \
2904 ONE_MORE_BYTE (c2); \
2905 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2912 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2915 decode_coding_iso_2022 (coding
)
2916 struct coding_system
*coding
;
2918 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2919 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2920 const unsigned char *src_base
;
2921 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
2923 = coding
->charbuf
+ coding
->charbuf_size
- 4 - MAX_ANNOTATION_LENGTH
;
2924 int consumed_chars
= 0, consumed_chars_base
;
2925 int multibytep
= coding
->src_multibyte
;
2926 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2927 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2928 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2929 int charset_id_2
, charset_id_3
;
2930 struct charset
*charset
;
2932 /* For handling composition sequence. */
2933 #define COMPOSING_NO 0
2934 #define COMPOSING_CHAR 1
2935 #define COMPOSING_RULE 2
2936 #define COMPOSING_COMPONENT_CHAR 3
2937 #define COMPOSING_COMPONENT_RULE 4
2939 int composition_state
= COMPOSING_NO
;
2940 enum composition_method method
;
2941 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2944 Lisp_Object attrs
, charset_list
;
2945 int char_offset
= coding
->produced_char
;
2946 int last_offset
= char_offset
;
2947 int last_id
= charset_ascii
;
2949 CODING_GET_INFO (coding
, attrs
, charset_list
);
2950 setup_iso_safe_charsets (attrs
);
2957 consumed_chars_base
= consumed_chars
;
2959 if (charbuf
>= charbuf_end
)
2966 /* We produce at most one character. */
2967 switch (iso_code_class
[c1
])
2969 case ISO_0x20_or_0x7F
:
2970 if (composition_state
!= COMPOSING_NO
)
2972 if (composition_state
== COMPOSING_RULE
2973 || composition_state
== COMPOSING_COMPONENT_RULE
)
2975 DECODE_COMPOSITION_RULE (c1
);
2976 components
[component_idx
++] = c1
;
2977 composition_state
--;
2981 if (charset_id_0
< 0
2982 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2983 /* This is SPACE or DEL. */
2984 charset
= CHARSET_FROM_ID (charset_ascii
);
2986 charset
= CHARSET_FROM_ID (charset_id_0
);
2989 case ISO_graphic_plane_0
:
2990 if (composition_state
!= COMPOSING_NO
)
2992 if (composition_state
== COMPOSING_RULE
2993 || composition_state
== COMPOSING_COMPONENT_RULE
)
2995 DECODE_COMPOSITION_RULE (c1
);
2996 components
[component_idx
++] = c1
;
2997 composition_state
--;
3001 if (charset_id_0
< 0)
3002 charset
= CHARSET_FROM_ID (charset_ascii
);
3004 charset
= CHARSET_FROM_ID (charset_id_0
);
3007 case ISO_0xA0_or_0xFF
:
3008 if (charset_id_1
< 0
3009 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
3010 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
3012 /* This is a graphic character, we fall down ... */
3014 case ISO_graphic_plane_1
:
3015 if (charset_id_1
< 0)
3017 charset
= CHARSET_FROM_ID (charset_id_1
);
3021 MAYBE_FINISH_COMPOSITION ();
3022 charset
= CHARSET_FROM_ID (charset_ascii
);
3026 MAYBE_FINISH_COMPOSITION ();
3030 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3031 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
3033 CODING_ISO_INVOCATION (coding
, 0) = 1;
3034 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3038 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
3040 CODING_ISO_INVOCATION (coding
, 0) = 0;
3041 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3044 case ISO_single_shift_2_7
:
3045 case ISO_single_shift_2
:
3046 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
3048 /* SS2 is handled as an escape sequence of ESC 'N' */
3050 goto label_escape_sequence
;
3052 case ISO_single_shift_3
:
3053 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
3055 /* SS2 is handled as an escape sequence of ESC 'O' */
3057 goto label_escape_sequence
;
3059 case ISO_control_sequence_introducer
:
3060 /* CSI is handled as an escape sequence of ESC '[' ... */
3062 goto label_escape_sequence
;
3066 label_escape_sequence
:
3067 /* Escape sequences handled here are invocation,
3068 designation, direction specification, and character
3069 composition specification. */
3072 case '&': /* revision of following character set */
3074 if (!(c1
>= '@' && c1
<= '~'))
3077 if (c1
!= ISO_CODE_ESC
)
3080 goto label_escape_sequence
;
3082 case '$': /* designation of 2-byte character set */
3083 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3089 if (c1
>= '@' && c1
<= 'B')
3090 { /* designation of JISX0208.1978, GB2312.1980,
3092 reg
= 0, chars96
= 0;
3094 else if (c1
>= 0x28 && c1
<= 0x2B)
3095 { /* designation of DIMENSION2_CHARS94 character set */
3096 reg
= c1
- 0x28, chars96
= 0;
3099 else if (c1
>= 0x2C && c1
<= 0x2F)
3100 { /* designation of DIMENSION2_CHARS96 character set */
3101 reg
= c1
- 0x2C, chars96
= 1;
3106 DECODE_DESIGNATION (reg
, 2, chars96
, c1
);
3107 /* We must update these variables now. */
3109 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3111 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3117 case 'n': /* invocation of locking-shift-2 */
3118 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3119 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3121 CODING_ISO_INVOCATION (coding
, 0) = 2;
3122 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3125 case 'o': /* invocation of locking-shift-3 */
3126 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3127 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3129 CODING_ISO_INVOCATION (coding
, 0) = 3;
3130 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3133 case 'N': /* invocation of single-shift-2 */
3134 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3135 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3137 charset_id_2
= CODING_ISO_DESIGNATION (coding
, 2);
3138 if (charset_id_2
< 0)
3139 charset
= CHARSET_FROM_ID (charset_ascii
);
3141 charset
= CHARSET_FROM_ID (charset_id_2
);
3143 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3147 case 'O': /* invocation of single-shift-3 */
3148 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3149 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3151 charset_id_3
= CODING_ISO_DESIGNATION (coding
, 3);
3152 if (charset_id_3
< 0)
3153 charset
= CHARSET_FROM_ID (charset_ascii
);
3155 charset
= CHARSET_FROM_ID (charset_id_3
);
3157 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3161 case '0': case '2': case '3': case '4': /* start composition */
3162 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
3164 DECODE_COMPOSITION_START (c1
);
3167 case '1': /* end composition */
3168 if (composition_state
== COMPOSING_NO
)
3170 DECODE_COMPOSITION_END ();
3173 case '[': /* specification of direction */
3174 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
3176 /* For the moment, nested direction is not supported.
3177 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3178 left-to-right, and nozero means right-to-left. */
3182 case ']': /* end of the current direction */
3183 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3185 case '0': /* end of the current direction */
3186 case '1': /* start of left-to-right direction */
3189 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3194 case '2': /* start of right-to-left direction */
3197 coding
->mode
|= CODING_MODE_DIRECTION
;
3211 /* CTEXT extended segment:
3212 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3213 We keep these bytes as is for the moment.
3214 They may be decoded by post-read-conversion. */
3218 ONE_MORE_BYTE (dim
);
3221 size
= ((M
- 128) * 128) + (L
- 128);
3222 if (charbuf
+ 8 + size
> charbuf_end
)
3224 *charbuf
++ = ISO_CODE_ESC
;
3228 *charbuf
++ = BYTE8_TO_CHAR (M
);
3229 *charbuf
++ = BYTE8_TO_CHAR (L
);
3233 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3238 /* XFree86 extension for embedding UTF-8 in CTEXT:
3239 ESC % G --UTF-8-BYTES-- ESC % @
3240 We keep these bytes as is for the moment.
3241 They may be decoded by post-read-conversion. */
3244 if (p
+ 6 > charbuf_end
)
3246 *p
++ = ISO_CODE_ESC
;
3249 while (p
< charbuf_end
)
3252 if (c1
== ISO_CODE_ESC
3253 && src
+ 1 < src_end
3260 *p
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3262 if (p
+ 3 > charbuf_end
)
3264 *p
++ = ISO_CODE_ESC
;
3275 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3280 if (c1
>= 0x28 && c1
<= 0x2B)
3281 { /* designation of DIMENSION1_CHARS94 character set */
3282 reg
= c1
- 0x28, chars96
= 0;
3285 else if (c1
>= 0x2C && c1
<= 0x2F)
3286 { /* designation of DIMENSION1_CHARS96 character set */
3287 reg
= c1
- 0x2C, chars96
= 1;
3292 DECODE_DESIGNATION (reg
, 1, chars96
, c1
);
3293 /* We must update these variables now. */
3295 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3297 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3305 if (charset
->id
!= charset_ascii
3306 && last_id
!= charset
->id
)
3308 if (last_id
!= charset_ascii
)
3309 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
3310 last_id
= charset
->id
;
3311 last_offset
= char_offset
;
3314 /* Now we know CHARSET and 1st position code C1 of a character.
3315 Produce a decoded character while getting 2nd position code
3318 if (CHARSET_DIMENSION (charset
) > 1)
3321 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3322 /* C2 is not in a valid range. */
3324 c1
= (c1
<< 8) | (c2
& 0x7F);
3325 if (CHARSET_DIMENSION (charset
) > 2)
3328 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3329 /* C2 is not in a valid range. */
3331 c1
= (c1
<< 8) | (c2
& 0x7F);
3335 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3338 MAYBE_FINISH_COMPOSITION ();
3339 for (; src_base
< src
; src_base
++, char_offset
++)
3341 if (ASCII_BYTE_P (*src_base
))
3342 *charbuf
++ = *src_base
;
3344 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3347 else if (composition_state
== COMPOSING_NO
)
3354 components
[component_idx
++] = c
;
3355 if (method
== COMPOSITION_WITH_RULE
3356 || (method
== COMPOSITION_WITH_RULE_ALTCHARS
3357 && composition_state
== COMPOSING_COMPONENT_CHAR
))
3358 composition_state
++;
3363 MAYBE_FINISH_COMPOSITION ();
3365 consumed_chars
= consumed_chars_base
;
3367 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3377 if (last_id
!= charset_ascii
)
3378 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
3379 coding
->consumed_char
+= consumed_chars_base
;
3380 coding
->consumed
= src_base
- coding
->source
;
3381 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3385 /* ISO2022 encoding stuff. */
3388 It is not enough to say just "ISO2022" on encoding, we have to
3389 specify more details. In Emacs, each coding system of ISO2022
3390 variant has the following specifications:
3391 1. Initial designation to G0 thru G3.
3392 2. Allows short-form designation?
3393 3. ASCII should be designated to G0 before control characters?
3394 4. ASCII should be designated to G0 at end of line?
3395 5. 7-bit environment or 8-bit environment?
3396 6. Use locking-shift?
3397 7. Use Single-shift?
3398 And the following two are only for Japanese:
3399 8. Use ASCII in place of JIS0201-1976-Roman?
3400 9. Use JISX0208-1983 in place of JISX0208-1978?
3401 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3402 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3406 /* Produce codes (escape sequence) for designating CHARSET to graphic
3407 register REG at DST, and increment DST. If <final-char> of CHARSET is
3408 '@', 'A', or 'B' and the coding system CODING allows, produce
3409 designation sequence of short-form. */
3411 #define ENCODE_DESIGNATION(charset, reg, coding) \
3413 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3414 char *intermediate_char_94 = "()*+"; \
3415 char *intermediate_char_96 = ",-./"; \
3416 int revision = -1; \
3419 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3420 revision = CHARSET_ISO_REVISION (charset); \
3422 if (revision >= 0) \
3424 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3425 EMIT_ONE_BYTE ('@' + revision); \
3427 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3428 if (CHARSET_DIMENSION (charset) == 1) \
3430 if (! CHARSET_ISO_CHARS_96 (charset)) \
3431 c = intermediate_char_94[reg]; \
3433 c = intermediate_char_96[reg]; \
3434 EMIT_ONE_ASCII_BYTE (c); \
3438 EMIT_ONE_ASCII_BYTE ('$'); \
3439 if (! CHARSET_ISO_CHARS_96 (charset)) \
3441 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3443 || final_char < '@' || final_char > 'B') \
3444 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3447 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3449 EMIT_ONE_ASCII_BYTE (final_char); \
3451 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3455 /* The following two macros produce codes (control character or escape
3456 sequence) for ISO2022 single-shift functions (single-shift-2 and
3459 #define ENCODE_SINGLE_SHIFT_2 \
3461 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3462 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3464 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3465 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3469 #define ENCODE_SINGLE_SHIFT_3 \
3471 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3472 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3474 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3475 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3479 /* The following four macros produce codes (control character or
3480 escape sequence) for ISO2022 locking-shift functions (shift-in,
3481 shift-out, locking-shift-2, and locking-shift-3). */
3483 #define ENCODE_SHIFT_IN \
3485 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3486 CODING_ISO_INVOCATION (coding, 0) = 0; \
3490 #define ENCODE_SHIFT_OUT \
3492 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3493 CODING_ISO_INVOCATION (coding, 0) = 1; \
3497 #define ENCODE_LOCKING_SHIFT_2 \
3499 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3500 CODING_ISO_INVOCATION (coding, 0) = 2; \
3504 #define ENCODE_LOCKING_SHIFT_3 \
3506 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3507 CODING_ISO_INVOCATION (coding, 0) = 3; \
3511 /* Produce codes for a DIMENSION1 character whose character set is
3512 CHARSET and whose position-code is C1. Designation and invocation
3513 sequences are also produced in advance if necessary. */
3515 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3517 int id = CHARSET_ID (charset); \
3519 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3520 && id == charset_ascii) \
3522 id = charset_jisx0201_roman; \
3523 charset = CHARSET_FROM_ID (id); \
3526 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3528 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3529 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3531 EMIT_ONE_BYTE (c1 | 0x80); \
3532 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3535 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3537 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3540 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3542 EMIT_ONE_BYTE (c1 | 0x80); \
3546 /* Since CHARSET is not yet invoked to any graphic planes, we \
3547 must invoke it, or, at first, designate it to some graphic \
3548 register. Then repeat the loop to actually produce the \
3550 dst = encode_invocation_designation (charset, coding, dst, \
3555 /* Produce codes for a DIMENSION2 character whose character set is
3556 CHARSET and whose position-codes are C1 and C2. Designation and
3557 invocation codes are also produced in advance if necessary. */
3559 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3561 int id = CHARSET_ID (charset); \
3563 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3564 && id == charset_jisx0208) \
3566 id = charset_jisx0208_1978; \
3567 charset = CHARSET_FROM_ID (id); \
3570 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3572 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3573 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3575 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3576 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3579 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3581 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3584 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3586 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3590 /* Since CHARSET is not yet invoked to any graphic planes, we \
3591 must invoke it, or, at first, designate it to some graphic \
3592 register. Then repeat the loop to actually produce the \
3594 dst = encode_invocation_designation (charset, coding, dst, \
3599 #define ENCODE_ISO_CHARACTER(charset, c) \
3601 int code = ENCODE_CHAR ((charset),(c)); \
3603 if (CHARSET_DIMENSION (charset) == 1) \
3604 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3606 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3610 /* Produce designation and invocation codes at a place pointed by DST
3611 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3615 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3616 struct charset
*charset
;
3617 struct coding_system
*coding
;
3621 int multibytep
= coding
->dst_multibyte
;
3622 int produced_chars
= *p_nchars
;
3623 int reg
; /* graphic register number */
3624 int id
= CHARSET_ID (charset
);
3626 /* At first, check designations. */
3627 for (reg
= 0; reg
< 4; reg
++)
3628 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3633 /* CHARSET is not yet designated to any graphic registers. */
3634 /* At first check the requested designation. */
3635 reg
= CODING_ISO_REQUEST (coding
, id
);
3637 /* Since CHARSET requests no special designation, designate it
3638 to graphic register 0. */
3641 ENCODE_DESIGNATION (charset
, reg
, coding
);
3644 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3645 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3647 /* Since the graphic register REG is not invoked to any graphic
3648 planes, invoke it to graphic plane 0. */
3651 case 0: /* graphic register 0 */
3655 case 1: /* graphic register 1 */
3659 case 2: /* graphic register 2 */
3660 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3661 ENCODE_SINGLE_SHIFT_2
;
3663 ENCODE_LOCKING_SHIFT_2
;
3666 case 3: /* graphic register 3 */
3667 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3668 ENCODE_SINGLE_SHIFT_3
;
3670 ENCODE_LOCKING_SHIFT_3
;
3675 *p_nchars
= produced_chars
;
3679 /* The following three macros produce codes for indicating direction
3681 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3683 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3684 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3686 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3690 #define ENCODE_DIRECTION_R2L() \
3692 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3693 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3697 #define ENCODE_DIRECTION_L2R() \
3699 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3700 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3704 /* Produce codes for designation and invocation to reset the graphic
3705 planes and registers to initial state. */
3706 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3709 struct charset *charset; \
3711 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3713 for (reg = 0; reg < 4; reg++) \
3714 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3715 && (CODING_ISO_DESIGNATION (coding, reg) \
3716 != CODING_ISO_INITIAL (coding, reg))) \
3718 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3719 ENCODE_DESIGNATION (charset, reg, coding); \
3724 /* Produce designation sequences of charsets in the line started from
3725 SRC to a place pointed by DST, and return updated DST.
3727 If the current block ends before any end-of-line, we may fail to
3728 find all the necessary designations. */
3730 static unsigned char *
3731 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3732 struct coding_system
*coding
;
3733 int *charbuf
, *charbuf_end
;
3736 struct charset
*charset
;
3737 /* Table of charsets to be designated to each graphic register. */
3739 int c
, found
= 0, reg
;
3740 int produced_chars
= 0;
3741 int multibytep
= coding
->dst_multibyte
;
3743 Lisp_Object charset_list
;
3745 attrs
= CODING_ID_ATTRS (coding
->id
);
3746 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3747 if (EQ (charset_list
, Qiso_2022
))
3748 charset_list
= Viso_2022_charset_list
;
3750 for (reg
= 0; reg
< 4; reg
++)
3760 charset
= char_charset (c
, charset_list
, NULL
);
3761 id
= CHARSET_ID (charset
);
3762 reg
= CODING_ISO_REQUEST (coding
, id
);
3763 if (reg
>= 0 && r
[reg
] < 0)
3772 for (reg
= 0; reg
< 4; reg
++)
3774 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3775 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3781 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3784 encode_coding_iso_2022 (coding
)
3785 struct coding_system
*coding
;
3787 int multibytep
= coding
->dst_multibyte
;
3788 int *charbuf
= coding
->charbuf
;
3789 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3790 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3791 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3794 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3795 && CODING_ISO_BOL (coding
));
3796 int produced_chars
= 0;
3797 Lisp_Object attrs
, eol_type
, charset_list
;
3798 int ascii_compatible
;
3800 int preferred_charset_id
= -1;
3802 CODING_GET_INFO (coding
, attrs
, charset_list
);
3803 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
3804 if (VECTORP (eol_type
))
3807 setup_iso_safe_charsets (attrs
);
3808 /* Charset list may have been changed. */
3809 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
); \
3810 coding
->safe_charsets
= (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs
));
3812 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3814 while (charbuf
< charbuf_end
)
3816 ASSURE_DESTINATION (safe_room
);
3818 if (bol_designation
)
3820 unsigned char *dst_prev
= dst
;
3822 /* We have to produce designation sequences if any now. */
3823 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3824 bol_designation
= 0;
3825 /* We are sure that designation sequences are all ASCII bytes. */
3826 produced_chars
+= dst
- dst_prev
;
3833 /* Handle an annotation. */
3836 case CODING_ANNOTATE_COMPOSITION_MASK
:
3837 /* Not yet implemented. */
3839 case CODING_ANNOTATE_CHARSET_MASK
:
3840 preferred_charset_id
= charbuf
[2];
3841 if (preferred_charset_id
>= 0
3842 && NILP (Fmemq (make_number (preferred_charset_id
),
3844 preferred_charset_id
= -1;
3853 /* Now encode the character C. */
3854 if (c
< 0x20 || c
== 0x7F)
3857 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3859 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3860 ENCODE_RESET_PLANE_AND_REGISTER ();
3861 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3865 for (i
= 0; i
< 4; i
++)
3866 CODING_ISO_DESIGNATION (coding
, i
)
3867 = CODING_ISO_INITIAL (coding
, i
);
3870 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3872 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3873 ENCODE_RESET_PLANE_AND_REGISTER ();
3874 EMIT_ONE_ASCII_BYTE (c
);
3876 else if (ASCII_CHAR_P (c
))
3878 if (ascii_compatible
)
3879 EMIT_ONE_ASCII_BYTE (c
);
3882 struct charset
*charset
= CHARSET_FROM_ID (charset_ascii
);
3883 ENCODE_ISO_CHARACTER (charset
, c
);
3886 else if (CHAR_BYTE8_P (c
))
3888 c
= CHAR_TO_BYTE8 (c
);
3893 struct charset
*charset
;
3895 if (preferred_charset_id
>= 0)
3897 charset
= CHARSET_FROM_ID (preferred_charset_id
);
3898 if (! CHAR_CHARSET_P (c
, charset
))
3899 charset
= char_charset (c
, charset_list
, NULL
);
3902 charset
= char_charset (c
, charset_list
, NULL
);
3905 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3907 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3908 charset
= CHARSET_FROM_ID (charset_ascii
);
3912 c
= coding
->default_char
;
3913 charset
= char_charset (c
, charset_list
, NULL
);
3916 ENCODE_ISO_CHARACTER (charset
, c
);
3920 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3921 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3923 ASSURE_DESTINATION (safe_room
);
3924 ENCODE_RESET_PLANE_AND_REGISTER ();
3926 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
3927 CODING_ISO_BOL (coding
) = bol_designation
;
3928 coding
->produced_char
+= produced_chars
;
3929 coding
->produced
= dst
- coding
->destination
;
3934 /*** 8,9. SJIS and BIG5 handlers ***/
3936 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3937 quite widely. So, for the moment, Emacs supports them in the bare
3938 C code. But, in the future, they may be supported only by CCL. */
3940 /* SJIS is a coding system encoding three character sets: ASCII, right
3941 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3942 as is. A character of charset katakana-jisx0201 is encoded by
3943 "position-code + 0x80". A character of charset japanese-jisx0208
3944 is encoded in 2-byte but two position-codes are divided and shifted
3945 so that it fit in the range below.
3947 --- CODE RANGE of SJIS ---
3948 (character set) (range)
3950 KATAKANA-JISX0201 0xA0 .. 0xDF
3951 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3952 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3953 -------------------------------
3957 /* BIG5 is a coding system encoding two character sets: ASCII and
3958 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3959 character set and is encoded in two-byte.
3961 --- CODE RANGE of BIG5 ---
3962 (character set) (range)
3964 Big5 (1st byte) 0xA1 .. 0xFE
3965 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3966 --------------------------
3970 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3971 Check if a text is encoded in SJIS. If it is, return
3972 CATEGORY_MASK_SJIS, else return 0. */
3975 detect_coding_sjis (coding
, detect_info
)
3976 struct coding_system
*coding
;
3977 struct coding_detection_info
*detect_info
;
3979 const unsigned char *src
= coding
->source
, *src_base
;
3980 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3981 int multibytep
= coding
->src_multibyte
;
3982 int consumed_chars
= 0;
3986 detect_info
->checked
|= CATEGORY_MASK_SJIS
;
3987 /* A coding system of this category is always ASCII compatible. */
3988 src
+= coding
->head_ascii
;
3996 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3999 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
4001 found
= CATEGORY_MASK_SJIS
;
4003 else if (c
>= 0xA0 && c
< 0xE0)
4004 found
= CATEGORY_MASK_SJIS
;
4008 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
4012 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4014 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
4017 detect_info
->found
|= found
;
4021 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4022 Check if a text is encoded in BIG5. If it is, return
4023 CATEGORY_MASK_BIG5, else return 0. */
4026 detect_coding_big5 (coding
, detect_info
)
4027 struct coding_system
*coding
;
4028 struct coding_detection_info
*detect_info
;
4030 const unsigned char *src
= coding
->source
, *src_base
;
4031 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4032 int multibytep
= coding
->src_multibyte
;
4033 int consumed_chars
= 0;
4037 detect_info
->checked
|= CATEGORY_MASK_BIG5
;
4038 /* A coding system of this category is always ASCII compatible. */
4039 src
+= coding
->head_ascii
;
4050 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
4052 found
= CATEGORY_MASK_BIG5
;
4057 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
4061 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4063 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
4066 detect_info
->found
|= found
;
4070 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4071 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
4074 decode_coding_sjis (coding
)
4075 struct coding_system
*coding
;
4077 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4078 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4079 const unsigned char *src_base
;
4080 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4082 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4083 int consumed_chars
= 0, consumed_chars_base
;
4084 int multibytep
= coding
->src_multibyte
;
4085 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4086 struct charset
*charset_kanji2
;
4087 Lisp_Object attrs
, charset_list
, val
;
4088 int char_offset
= coding
->produced_char
;
4089 int last_offset
= char_offset
;
4090 int last_id
= charset_ascii
;
4092 CODING_GET_INFO (coding
, attrs
, charset_list
);
4095 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4096 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4097 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4098 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4103 struct charset
*charset
;
4106 consumed_chars_base
= consumed_chars
;
4108 if (charbuf
>= charbuf_end
)
4115 charset
= charset_roman
;
4116 else if (c
== 0x80 || c
== 0xA0)
4118 else if (c
>= 0xA1 && c
<= 0xDF)
4120 /* SJIS -> JISX0201-Kana */
4122 charset
= charset_kana
;
4126 /* SJIS -> JISX0208 */
4128 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4132 charset
= charset_kanji
;
4134 else if (c
<= 0xFC && charset_kanji2
)
4136 /* SJIS -> JISX0213-2 */
4138 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4142 charset
= charset_kanji2
;
4146 if (charset
->id
!= charset_ascii
4147 && last_id
!= charset
->id
)
4149 if (last_id
!= charset_ascii
)
4150 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4151 last_id
= charset
->id
;
4152 last_offset
= char_offset
;
4154 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4161 consumed_chars
= consumed_chars_base
;
4163 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4169 if (last_id
!= charset_ascii
)
4170 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4171 coding
->consumed_char
+= consumed_chars_base
;
4172 coding
->consumed
= src_base
- coding
->source
;
4173 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4177 decode_coding_big5 (coding
)
4178 struct coding_system
*coding
;
4180 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4181 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4182 const unsigned char *src_base
;
4183 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4185 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4186 int consumed_chars
= 0, consumed_chars_base
;
4187 int multibytep
= coding
->src_multibyte
;
4188 struct charset
*charset_roman
, *charset_big5
;
4189 Lisp_Object attrs
, charset_list
, val
;
4190 int char_offset
= coding
->produced_char
;
4191 int last_offset
= char_offset
;
4192 int last_id
= charset_ascii
;
4194 CODING_GET_INFO (coding
, attrs
, charset_list
);
4196 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4197 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4202 struct charset
*charset
;
4205 consumed_chars_base
= consumed_chars
;
4207 if (charbuf
>= charbuf_end
)
4215 charset
= charset_roman
;
4219 if (c
< 0xA1 || c
> 0xFE)
4222 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
4225 charset
= charset_big5
;
4227 if (charset
->id
!= charset_ascii
4228 && last_id
!= charset
->id
)
4230 if (last_id
!= charset_ascii
)
4231 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4232 last_id
= charset
->id
;
4233 last_offset
= char_offset
;
4235 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4242 consumed_chars
= consumed_chars_base
;
4244 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4250 if (last_id
!= charset_ascii
)
4251 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4252 coding
->consumed_char
+= consumed_chars_base
;
4253 coding
->consumed
= src_base
- coding
->source
;
4254 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4257 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4258 This function can encode charsets `ascii', `katakana-jisx0201',
4259 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4260 are sure that all these charsets are registered as official charset
4261 (i.e. do not have extended leading-codes). Characters of other
4262 charsets are produced without any encoding. If SJIS_P is 1, encode
4263 SJIS text, else encode BIG5 text. */
4266 encode_coding_sjis (coding
)
4267 struct coding_system
*coding
;
4269 int multibytep
= coding
->dst_multibyte
;
4270 int *charbuf
= coding
->charbuf
;
4271 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4272 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4273 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4275 int produced_chars
= 0;
4276 Lisp_Object attrs
, charset_list
, val
;
4277 int ascii_compatible
;
4278 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4279 struct charset
*charset_kanji2
;
4282 CODING_GET_INFO (coding
, attrs
, charset_list
);
4284 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4285 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4286 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4287 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4289 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4291 while (charbuf
< charbuf_end
)
4293 ASSURE_DESTINATION (safe_room
);
4295 /* Now encode the character C. */
4296 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4297 EMIT_ONE_ASCII_BYTE (c
);
4298 else if (CHAR_BYTE8_P (c
))
4300 c
= CHAR_TO_BYTE8 (c
);
4306 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4310 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4312 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4313 charset
= CHARSET_FROM_ID (charset_ascii
);
4317 c
= coding
->default_char
;
4318 charset
= char_charset (c
, charset_list
, &code
);
4321 if (code
== CHARSET_INVALID_CODE (charset
))
4323 if (charset
== charset_kanji
)
4327 c1
= code
>> 8, c2
= code
& 0xFF;
4328 EMIT_TWO_BYTES (c1
, c2
);
4330 else if (charset
== charset_kana
)
4331 EMIT_ONE_BYTE (code
| 0x80);
4332 else if (charset_kanji2
&& charset
== charset_kanji2
)
4337 if (c1
== 0x21 || (c1
>= 0x23 && c1
< 0x25)
4338 || (c1
>= 0x2C && c1
<= 0x2F) || c1
>= 0x6E)
4340 JIS_TO_SJIS2 (code
);
4341 c1
= code
>> 8, c2
= code
& 0xFF;
4342 EMIT_TWO_BYTES (c1
, c2
);
4345 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4348 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4351 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4352 coding
->produced_char
+= produced_chars
;
4353 coding
->produced
= dst
- coding
->destination
;
4358 encode_coding_big5 (coding
)
4359 struct coding_system
*coding
;
4361 int multibytep
= coding
->dst_multibyte
;
4362 int *charbuf
= coding
->charbuf
;
4363 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4364 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4365 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4367 int produced_chars
= 0;
4368 Lisp_Object attrs
, charset_list
, val
;
4369 int ascii_compatible
;
4370 struct charset
*charset_roman
, *charset_big5
;
4373 CODING_GET_INFO (coding
, attrs
, charset_list
);
4375 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4376 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4377 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4379 while (charbuf
< charbuf_end
)
4381 ASSURE_DESTINATION (safe_room
);
4383 /* Now encode the character C. */
4384 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4385 EMIT_ONE_ASCII_BYTE (c
);
4386 else if (CHAR_BYTE8_P (c
))
4388 c
= CHAR_TO_BYTE8 (c
);
4394 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4398 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4400 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4401 charset
= CHARSET_FROM_ID (charset_ascii
);
4405 c
= coding
->default_char
;
4406 charset
= char_charset (c
, charset_list
, &code
);
4409 if (code
== CHARSET_INVALID_CODE (charset
))
4411 if (charset
== charset_big5
)
4415 c1
= code
>> 8, c2
= code
& 0xFF;
4416 EMIT_TWO_BYTES (c1
, c2
);
4419 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4422 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4423 coding
->produced_char
+= produced_chars
;
4424 coding
->produced
= dst
- coding
->destination
;
4429 /*** 10. CCL handlers ***/
4431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4432 Check if a text is encoded in a coding system of which
4433 encoder/decoder are written in CCL program. If it is, return
4434 CATEGORY_MASK_CCL, else return 0. */
4437 detect_coding_ccl (coding
, detect_info
)
4438 struct coding_system
*coding
;
4439 struct coding_detection_info
*detect_info
;
4441 const unsigned char *src
= coding
->source
, *src_base
;
4442 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4443 int multibytep
= coding
->src_multibyte
;
4444 int consumed_chars
= 0;
4446 unsigned char *valids
;
4447 int head_ascii
= coding
->head_ascii
;
4450 detect_info
->checked
|= CATEGORY_MASK_CCL
;
4452 coding
= &coding_categories
[coding_category_ccl
];
4453 valids
= CODING_CCL_VALIDS (coding
);
4454 attrs
= CODING_ID_ATTRS (coding
->id
);
4455 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4464 if (c
< 0 || ! valids
[c
])
4466 if ((valids
[c
] > 1))
4467 found
= CATEGORY_MASK_CCL
;
4469 detect_info
->rejected
|= CATEGORY_MASK_CCL
;
4473 detect_info
->found
|= found
;
4478 decode_coding_ccl (coding
)
4479 struct coding_system
*coding
;
4481 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4482 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4483 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4484 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
4485 int consumed_chars
= 0;
4486 int multibytep
= coding
->src_multibyte
;
4487 struct ccl_program ccl
;
4488 int source_charbuf
[1024];
4489 int source_byteidx
[1024];
4490 Lisp_Object attrs
, charset_list
;
4492 CODING_GET_INFO (coding
, attrs
, charset_list
);
4493 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4495 while (src
< src_end
)
4497 const unsigned char *p
= src
;
4498 int *source
, *source_end
;
4502 while (i
< 1024 && p
< src_end
)
4504 source_byteidx
[i
] = p
- src
;
4505 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4508 while (i
< 1024 && p
< src_end
)
4509 source_charbuf
[i
++] = *p
++;
4511 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4514 source
= source_charbuf
;
4515 source_end
= source
+ i
;
4516 while (source
< source_end
)
4518 ccl_driver (&ccl
, source
, charbuf
,
4519 source_end
- source
, charbuf_end
- charbuf
,
4521 source
+= ccl
.consumed
;
4522 charbuf
+= ccl
.produced
;
4523 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4526 if (source
< source_end
)
4527 src
+= source_byteidx
[source
- source_charbuf
];
4530 consumed_chars
+= source
- source_charbuf
;
4532 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4533 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4539 case CCL_STAT_SUSPEND_BY_SRC
:
4540 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4542 case CCL_STAT_SUSPEND_BY_DST
:
4545 case CCL_STAT_INVALID_CMD
:
4546 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4549 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4552 coding
->consumed_char
+= consumed_chars
;
4553 coding
->consumed
= src
- coding
->source
;
4554 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4558 encode_coding_ccl (coding
)
4559 struct coding_system
*coding
;
4561 struct ccl_program ccl
;
4562 int multibytep
= coding
->dst_multibyte
;
4563 int *charbuf
= coding
->charbuf
;
4564 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4565 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4566 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4567 int destination_charbuf
[1024];
4568 int i
, produced_chars
= 0;
4569 Lisp_Object attrs
, charset_list
;
4571 CODING_GET_INFO (coding
, attrs
, charset_list
);
4572 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4574 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4575 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4577 while (charbuf
< charbuf_end
)
4579 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4580 charbuf_end
- charbuf
, 1024, charset_list
);
4583 ASSURE_DESTINATION (ccl
.produced
* 2);
4584 for (i
= 0; i
< ccl
.produced
; i
++)
4585 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4589 ASSURE_DESTINATION (ccl
.produced
);
4590 for (i
= 0; i
< ccl
.produced
; i
++)
4591 *dst
++ = destination_charbuf
[i
] & 0xFF;
4592 produced_chars
+= ccl
.produced
;
4594 charbuf
+= ccl
.consumed
;
4595 if (ccl
.status
== CCL_STAT_QUIT
4596 || ccl
.status
== CCL_STAT_INVALID_CMD
)
4602 case CCL_STAT_SUSPEND_BY_SRC
:
4603 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4605 case CCL_STAT_SUSPEND_BY_DST
:
4606 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_DST
);
4609 case CCL_STAT_INVALID_CMD
:
4610 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4613 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4617 coding
->produced_char
+= produced_chars
;
4618 coding
->produced
= dst
- coding
->destination
;
4624 /*** 10, 11. no-conversion handlers ***/
4626 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4629 decode_coding_raw_text (coding
)
4630 struct coding_system
*coding
;
4632 coding
->chars_at_source
= 1;
4633 coding
->consumed_char
= 0;
4634 coding
->consumed
= 0;
4635 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4639 encode_coding_raw_text (coding
)
4640 struct coding_system
*coding
;
4642 int multibytep
= coding
->dst_multibyte
;
4643 int *charbuf
= coding
->charbuf
;
4644 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4645 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4646 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4647 int produced_chars
= 0;
4652 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4654 if (coding
->src_multibyte
)
4655 while (charbuf
< charbuf_end
)
4657 ASSURE_DESTINATION (safe_room
);
4659 if (ASCII_CHAR_P (c
))
4660 EMIT_ONE_ASCII_BYTE (c
);
4661 else if (CHAR_BYTE8_P (c
))
4663 c
= CHAR_TO_BYTE8 (c
);
4668 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4670 CHAR_STRING_ADVANCE (c
, p1
);
4673 EMIT_ONE_BYTE (*p0
);
4679 while (charbuf
< charbuf_end
)
4681 ASSURE_DESTINATION (safe_room
);
4688 if (coding
->src_multibyte
)
4690 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4692 while (charbuf
< charbuf_end
)
4694 ASSURE_DESTINATION (safe_room
);
4696 if (ASCII_CHAR_P (c
))
4698 else if (CHAR_BYTE8_P (c
))
4699 *dst
++ = CHAR_TO_BYTE8 (c
);
4701 CHAR_STRING_ADVANCE (c
, dst
);
4707 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4708 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4709 *dst
++ = *charbuf
++;
4710 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4713 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4714 coding
->produced_char
+= produced_chars
;
4715 coding
->produced
= dst
- coding
->destination
;
4719 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4720 Check if a text is encoded in a charset-based coding system. If it
4721 is, return 1, else return 0. */
4724 detect_coding_charset (coding
, detect_info
)
4725 struct coding_system
*coding
;
4726 struct coding_detection_info
*detect_info
;
4728 const unsigned char *src
= coding
->source
, *src_base
;
4729 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4730 int multibytep
= coding
->src_multibyte
;
4731 int consumed_chars
= 0;
4732 Lisp_Object attrs
, valids
;
4735 detect_info
->checked
|= CATEGORY_MASK_CHARSET
;
4737 coding
= &coding_categories
[coding_category_charset
];
4738 attrs
= CODING_ID_ATTRS (coding
->id
);
4739 valids
= AREF (attrs
, coding_attr_charset_valids
);
4741 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4742 src
+= coding
->head_ascii
;
4752 if (NILP (AREF (valids
, c
)))
4755 found
= CATEGORY_MASK_CHARSET
;
4757 detect_info
->rejected
|= CATEGORY_MASK_CHARSET
;
4761 detect_info
->found
|= found
;
4766 decode_coding_charset (coding
)
4767 struct coding_system
*coding
;
4769 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4770 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4771 const unsigned char *src_base
;
4772 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4774 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4775 int consumed_chars
= 0, consumed_chars_base
;
4776 int multibytep
= coding
->src_multibyte
;
4777 Lisp_Object attrs
, charset_list
, valids
;
4778 int char_offset
= coding
->produced_char
;
4779 int last_offset
= char_offset
;
4780 int last_id
= charset_ascii
;
4782 CODING_GET_INFO (coding
, attrs
, charset_list
);
4783 valids
= AREF (attrs
, coding_attr_charset_valids
);
4789 struct charset
*charset
;
4795 consumed_chars_base
= consumed_chars
;
4797 if (charbuf
>= charbuf_end
)
4805 val
= AREF (valids
, c
);
4810 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4811 dim
= CHARSET_DIMENSION (charset
);
4815 code
= (code
<< 8) | c
;
4818 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
,
4823 /* VAL is a list of charset IDs. It is assured that the
4824 list is sorted by charset dimensions (smaller one
4828 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (val
)));
4829 dim
= CHARSET_DIMENSION (charset
);
4833 code
= (code
<< 8) | c
;
4836 CODING_DECODE_CHAR (coding
, src
, src_base
,
4837 src_end
, charset
, code
, c
);
4845 if (charset
->id
!= charset_ascii
4846 && last_id
!= charset
->id
)
4848 if (last_id
!= charset_ascii
)
4849 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4850 last_id
= charset
->id
;
4851 last_offset
= char_offset
;
4860 consumed_chars
= consumed_chars_base
;
4862 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4868 if (last_id
!= charset_ascii
)
4869 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4870 coding
->consumed_char
+= consumed_chars_base
;
4871 coding
->consumed
= src_base
- coding
->source
;
4872 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4876 encode_coding_charset (coding
)
4877 struct coding_system
*coding
;
4879 int multibytep
= coding
->dst_multibyte
;
4880 int *charbuf
= coding
->charbuf
;
4881 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4882 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4883 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4884 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4885 int produced_chars
= 0;
4886 Lisp_Object attrs
, charset_list
;
4887 int ascii_compatible
;
4890 CODING_GET_INFO (coding
, attrs
, charset_list
);
4891 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4893 while (charbuf
< charbuf_end
)
4895 struct charset
*charset
;
4898 ASSURE_DESTINATION (safe_room
);
4900 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4901 EMIT_ONE_ASCII_BYTE (c
);
4902 else if (CHAR_BYTE8_P (c
))
4904 c
= CHAR_TO_BYTE8 (c
);
4909 charset
= char_charset (c
, charset_list
, &code
);
4912 if (CHARSET_DIMENSION (charset
) == 1)
4913 EMIT_ONE_BYTE (code
);
4914 else if (CHARSET_DIMENSION (charset
) == 2)
4915 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4916 else if (CHARSET_DIMENSION (charset
) == 3)
4917 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4919 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4920 (code
>> 8) & 0xFF, code
& 0xFF);
4924 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4925 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4927 c
= coding
->default_char
;
4933 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4934 coding
->produced_char
+= produced_chars
;
4935 coding
->produced
= dst
- coding
->destination
;
4940 /*** 7. C library functions ***/
4942 /* Setup coding context CODING from information about CODING_SYSTEM.
4943 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4944 CODING_SYSTEM is invalid, signal an error. */
4947 setup_coding_system (coding_system
, coding
)
4948 Lisp_Object coding_system
;
4949 struct coding_system
*coding
;
4952 Lisp_Object eol_type
;
4953 Lisp_Object coding_type
;
4956 if (NILP (coding_system
))
4957 coding_system
= Qundecided
;
4959 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4961 attrs
= CODING_ID_ATTRS (coding
->id
);
4962 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4965 coding
->head_ascii
= -1;
4966 coding
->common_flags
4967 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4968 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
4969 coding
->common_flags
|= CODING_REQUIRE_DECODING_MASK
;
4970 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
4971 coding
->common_flags
|= CODING_REQUIRE_ENCODING_MASK
;
4972 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs
)))
4973 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4975 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4976 coding
->max_charset_id
= SCHARS (val
) - 1;
4977 coding
->safe_charsets
= (char *) SDATA (val
);
4978 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4980 coding_type
= CODING_ATTR_TYPE (attrs
);
4981 if (EQ (coding_type
, Qundecided
))
4983 coding
->detector
= NULL
;
4984 coding
->decoder
= decode_coding_raw_text
;
4985 coding
->encoder
= encode_coding_raw_text
;
4986 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4988 else if (EQ (coding_type
, Qiso_2022
))
4991 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4993 /* Invoke graphic register 0 to plane 0. */
4994 CODING_ISO_INVOCATION (coding
, 0) = 0;
4995 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4996 CODING_ISO_INVOCATION (coding
, 1)
4997 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4998 /* Setup the initial status of designation. */
4999 for (i
= 0; i
< 4; i
++)
5000 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
5001 /* Not single shifting initially. */
5002 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
5003 /* Beginning of buffer should also be regarded as bol. */
5004 CODING_ISO_BOL (coding
) = 1;
5005 coding
->detector
= detect_coding_iso_2022
;
5006 coding
->decoder
= decode_coding_iso_2022
;
5007 coding
->encoder
= encode_coding_iso_2022
;
5008 if (flags
& CODING_ISO_FLAG_SAFE
)
5009 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
5010 coding
->common_flags
5011 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
5012 | CODING_REQUIRE_FLUSHING_MASK
);
5013 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
5014 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
5015 if (flags
& CODING_ISO_FLAG_DESIGNATION
)
5016 coding
->common_flags
|= CODING_ANNOTATE_CHARSET_MASK
;
5017 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
5019 setup_iso_safe_charsets (attrs
);
5020 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
5021 coding
->max_charset_id
= SCHARS (val
) - 1;
5022 coding
->safe_charsets
= (char *) SDATA (val
);
5024 CODING_ISO_FLAGS (coding
) = flags
;
5026 else if (EQ (coding_type
, Qcharset
))
5028 coding
->detector
= detect_coding_charset
;
5029 coding
->decoder
= decode_coding_charset
;
5030 coding
->encoder
= encode_coding_charset
;
5031 coding
->common_flags
5032 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5034 else if (EQ (coding_type
, Qutf_8
))
5036 coding
->detector
= detect_coding_utf_8
;
5037 coding
->decoder
= decode_coding_utf_8
;
5038 coding
->encoder
= encode_coding_utf_8
;
5039 coding
->common_flags
5040 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5042 else if (EQ (coding_type
, Qutf_16
))
5044 val
= AREF (attrs
, coding_attr_utf_16_bom
);
5045 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
5046 : EQ (val
, Qt
) ? utf_16_with_bom
5047 : utf_16_without_bom
);
5048 val
= AREF (attrs
, coding_attr_utf_16_endian
);
5049 CODING_UTF_16_ENDIAN (coding
) = (EQ (val
, Qbig
) ? utf_16_big_endian
5050 : utf_16_little_endian
);
5051 CODING_UTF_16_SURROGATE (coding
) = 0;
5052 coding
->detector
= detect_coding_utf_16
;
5053 coding
->decoder
= decode_coding_utf_16
;
5054 coding
->encoder
= encode_coding_utf_16
;
5055 coding
->common_flags
5056 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5057 if (CODING_UTF_16_BOM (coding
) == utf_16_detect_bom
)
5058 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
5060 else if (EQ (coding_type
, Qccl
))
5062 coding
->detector
= detect_coding_ccl
;
5063 coding
->decoder
= decode_coding_ccl
;
5064 coding
->encoder
= encode_coding_ccl
;
5065 coding
->common_flags
5066 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
5067 | CODING_REQUIRE_FLUSHING_MASK
);
5069 else if (EQ (coding_type
, Qemacs_mule
))
5071 coding
->detector
= detect_coding_emacs_mule
;
5072 coding
->decoder
= decode_coding_emacs_mule
;
5073 coding
->encoder
= encode_coding_emacs_mule
;
5074 coding
->common_flags
5075 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5076 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
5077 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
5079 Lisp_Object tail
, safe_charsets
;
5080 int max_charset_id
= 0;
5082 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
5084 if (max_charset_id
< XFASTINT (XCAR (tail
)))
5085 max_charset_id
= XFASTINT (XCAR (tail
));
5086 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
5088 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
5090 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
5091 coding
->max_charset_id
= max_charset_id
;
5092 coding
->safe_charsets
= (char *) SDATA (safe_charsets
);
5095 else if (EQ (coding_type
, Qshift_jis
))
5097 coding
->detector
= detect_coding_sjis
;
5098 coding
->decoder
= decode_coding_sjis
;
5099 coding
->encoder
= encode_coding_sjis
;
5100 coding
->common_flags
5101 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5103 else if (EQ (coding_type
, Qbig5
))
5105 coding
->detector
= detect_coding_big5
;
5106 coding
->decoder
= decode_coding_big5
;
5107 coding
->encoder
= encode_coding_big5
;
5108 coding
->common_flags
5109 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5111 else /* EQ (coding_type, Qraw_text) */
5113 coding
->detector
= NULL
;
5114 coding
->decoder
= decode_coding_raw_text
;
5115 coding
->encoder
= encode_coding_raw_text
;
5116 if (! EQ (eol_type
, Qunix
))
5118 coding
->common_flags
|= CODING_REQUIRE_DECODING_MASK
;
5119 if (! VECTORP (eol_type
))
5120 coding
->common_flags
|= CODING_REQUIRE_ENCODING_MASK
;
5128 /* Return a list of charsets supported by CODING. */
5131 coding_charset_list (coding
)
5132 struct coding_system
*coding
;
5134 Lisp_Object attrs
, charset_list
;
5136 CODING_GET_INFO (coding
, attrs
, charset_list
);
5137 if (EQ (CODING_ATTR_TYPE (attrs
), Qiso_2022
))
5139 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
5141 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
5142 charset_list
= Viso_2022_charset_list
;
5144 else if (EQ (CODING_ATTR_TYPE (attrs
), Qemacs_mule
))
5146 charset_list
= Vemacs_mule_charset_list
;
5148 return charset_list
;
5152 /* Return raw-text or one of its subsidiaries that has the same
5153 eol_type as CODING-SYSTEM. */
5156 raw_text_coding_system (coding_system
)
5157 Lisp_Object coding_system
;
5159 Lisp_Object spec
, attrs
;
5160 Lisp_Object eol_type
, raw_text_eol_type
;
5162 if (NILP (coding_system
))
5164 spec
= CODING_SYSTEM_SPEC (coding_system
);
5165 attrs
= AREF (spec
, 0);
5167 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
5168 return coding_system
;
5170 eol_type
= AREF (spec
, 2);
5171 if (VECTORP (eol_type
))
5173 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
5174 raw_text_eol_type
= AREF (spec
, 2);
5175 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
5176 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
5177 : AREF (raw_text_eol_type
, 2));
5181 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5182 does, return one of the subsidiary that has the same eol-spec as
5183 PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
5184 inherit end-of-line format from the system's setting
5185 (system_eol_type). */
5188 coding_inherit_eol_type (coding_system
, parent
)
5189 Lisp_Object coding_system
, parent
;
5191 Lisp_Object spec
, eol_type
;
5193 if (NILP (coding_system
))
5194 coding_system
= Qraw_text
;
5195 spec
= CODING_SYSTEM_SPEC (coding_system
);
5196 eol_type
= AREF (spec
, 2);
5197 if (VECTORP (eol_type
))
5199 Lisp_Object parent_eol_type
;
5201 if (! NILP (parent
))
5203 Lisp_Object parent_spec
;
5206 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
5207 parent_eol_type
= AREF (parent_spec
, 2);
5210 parent_eol_type
= system_eol_type
;
5211 if (EQ (parent_eol_type
, Qunix
))
5212 coding_system
= AREF (eol_type
, 0);
5213 else if (EQ (parent_eol_type
, Qdos
))
5214 coding_system
= AREF (eol_type
, 1);
5215 else if (EQ (parent_eol_type
, Qmac
))
5216 coding_system
= AREF (eol_type
, 2);
5218 return coding_system
;
5221 /* Emacs has a mechanism to automatically detect a coding system if it
5222 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5223 it's impossible to distinguish some coding systems accurately
5224 because they use the same range of codes. So, at first, coding
5225 systems are categorized into 7, those are:
5227 o coding-category-emacs-mule
5229 The category for a coding system which has the same code range
5230 as Emacs' internal format. Assigned the coding-system (Lisp
5231 symbol) `emacs-mule' by default.
5233 o coding-category-sjis
5235 The category for a coding system which has the same code range
5236 as SJIS. Assigned the coding-system (Lisp
5237 symbol) `japanese-shift-jis' by default.
5239 o coding-category-iso-7
5241 The category for a coding system which has the same code range
5242 as ISO2022 of 7-bit environment. This doesn't use any locking
5243 shift and single shift functions. This can encode/decode all
5244 charsets. Assigned the coding-system (Lisp symbol)
5245 `iso-2022-7bit' by default.
5247 o coding-category-iso-7-tight
5249 Same as coding-category-iso-7 except that this can
5250 encode/decode only the specified charsets.
5252 o coding-category-iso-8-1
5254 The category for a coding system which has the same code range
5255 as ISO2022 of 8-bit environment and graphic plane 1 used only
5256 for DIMENSION1 charset. This doesn't use any locking shift
5257 and single shift functions. Assigned the coding-system (Lisp
5258 symbol) `iso-latin-1' by default.
5260 o coding-category-iso-8-2
5262 The category for a coding system which has the same code range
5263 as ISO2022 of 8-bit environment and graphic plane 1 used only
5264 for DIMENSION2 charset. This doesn't use any locking shift
5265 and single shift functions. Assigned the coding-system (Lisp
5266 symbol) `japanese-iso-8bit' by default.
5268 o coding-category-iso-7-else
5270 The category for a coding system which has the same code range
5271 as ISO2022 of 7-bit environemnt but uses locking shift or
5272 single shift functions. Assigned the coding-system (Lisp
5273 symbol) `iso-2022-7bit-lock' by default.
5275 o coding-category-iso-8-else
5277 The category for a coding system which has the same code range
5278 as ISO2022 of 8-bit environemnt but uses locking shift or
5279 single shift functions. Assigned the coding-system (Lisp
5280 symbol) `iso-2022-8bit-ss2' by default.
5282 o coding-category-big5
5284 The category for a coding system which has the same code range
5285 as BIG5. Assigned the coding-system (Lisp symbol)
5286 `cn-big5' by default.
5288 o coding-category-utf-8
5290 The category for a coding system which has the same code range
5291 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
5292 symbol) `utf-8' by default.
5294 o coding-category-utf-16-be
5296 The category for a coding system in which a text has an
5297 Unicode signature (cf. Unicode Standard) in the order of BIG
5298 endian at the head. Assigned the coding-system (Lisp symbol)
5299 `utf-16-be' by default.
5301 o coding-category-utf-16-le
5303 The category for a coding system in which a text has an
5304 Unicode signature (cf. Unicode Standard) in the order of
5305 LITTLE endian at the head. Assigned the coding-system (Lisp
5306 symbol) `utf-16-le' by default.
5308 o coding-category-ccl
5310 The category for a coding system of which encoder/decoder is
5311 written in CCL programs. The default value is nil, i.e., no
5312 coding system is assigned.
5314 o coding-category-binary
5316 The category for a coding system not categorized in any of the
5317 above. Assigned the coding-system (Lisp symbol)
5318 `no-conversion' by default.
5320 Each of them is a Lisp symbol and the value is an actual
5321 `coding-system's (this is also a Lisp symbol) assigned by a user.
5322 What Emacs does actually is to detect a category of coding system.
5323 Then, it uses a `coding-system' assigned to it. If Emacs can't
5324 decide only one possible category, it selects a category of the
5325 highest priority. Priorities of categories are also specified by a
5326 user in a Lisp variable `coding-category-list'.
5330 #define EOL_SEEN_NONE 0
5331 #define EOL_SEEN_LF 1
5332 #define EOL_SEEN_CR 2
5333 #define EOL_SEEN_CRLF 4
5335 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5336 SOURCE is encoded. If CATEGORY is one of
5337 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5338 two-byte, else they are encoded by one-byte.
5340 Return one of EOL_SEEN_XXX. */
5342 #define MAX_EOL_CHECK_COUNT 3
5345 detect_eol (source
, src_bytes
, category
)
5346 const unsigned char *source
;
5347 EMACS_INT src_bytes
;
5348 enum coding_category category
;
5350 const unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
5353 int eol_seen
= EOL_SEEN_NONE
;
5355 if ((1 << category
) & CATEGORY_MASK_UTF_16
)
5359 msb
= category
== (coding_category_utf_16_le
5360 | coding_category_utf_16_le_nosig
);
5363 while (src
+ 1 < src_end
)
5366 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
5371 this_eol
= EOL_SEEN_LF
;
5372 else if (src
+ 3 >= src_end
5373 || src
[msb
+ 2] != 0
5374 || src
[lsb
+ 2] != '\n')
5375 this_eol
= EOL_SEEN_CR
;
5377 this_eol
= EOL_SEEN_CRLF
;
5379 if (eol_seen
== EOL_SEEN_NONE
)
5380 /* This is the first end-of-line. */
5381 eol_seen
= this_eol
;
5382 else if (eol_seen
!= this_eol
)
5384 /* The found type is different from what found before. */
5385 eol_seen
= EOL_SEEN_LF
;
5388 if (++total
== MAX_EOL_CHECK_COUNT
)
5396 while (src
< src_end
)
5399 if (c
== '\n' || c
== '\r')
5404 this_eol
= EOL_SEEN_LF
;
5405 else if (src
>= src_end
|| *src
!= '\n')
5406 this_eol
= EOL_SEEN_CR
;
5408 this_eol
= EOL_SEEN_CRLF
, src
++;
5410 if (eol_seen
== EOL_SEEN_NONE
)
5411 /* This is the first end-of-line. */
5412 eol_seen
= this_eol
;
5413 else if (eol_seen
!= this_eol
)
5415 /* The found type is different from what found before. */
5416 eol_seen
= EOL_SEEN_LF
;
5419 if (++total
== MAX_EOL_CHECK_COUNT
)
5429 adjust_coding_eol_type (coding
, eol_seen
)
5430 struct coding_system
*coding
;
5433 Lisp_Object eol_type
;
5435 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5436 if (eol_seen
& EOL_SEEN_LF
)
5438 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
5441 else if (eol_seen
& EOL_SEEN_CRLF
)
5443 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
5446 else if (eol_seen
& EOL_SEEN_CR
)
5448 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
5454 /* Detect how a text specified in CODING is encoded. If a coding
5455 system is detected, update fields of CODING by the detected coding
5459 detect_coding (coding
)
5460 struct coding_system
*coding
;
5462 const unsigned char *src
, *src_end
;
5464 coding
->consumed
= coding
->consumed_char
= 0;
5465 coding
->produced
= coding
->produced_char
= 0;
5466 coding_set_source (coding
);
5468 src_end
= coding
->source
+ coding
->src_bytes
;
5470 /* If we have not yet decided the text encoding type, detect it
5472 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
5475 struct coding_detection_info detect_info
;
5477 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
5478 for (i
= 0, src
= coding
->source
; src
< src_end
; i
++, src
++)
5484 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
)
5485 && ! inhibit_iso_escape_detection
5486 && ! detect_info
.checked
)
5488 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
5489 if (detect_coding_iso_2022 (coding
, &detect_info
))
5491 /* We have scanned the whole data. */
5492 if (! (detect_info
.rejected
& CATEGORY_MASK_ISO_7_ELSE
))
5493 /* We didn't find an 8-bit code. */
5499 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
5501 if (coding
->head_ascii
< coding
->src_bytes
5502 || detect_info
.found
)
5504 enum coding_category category
;
5505 struct coding_system
*this;
5507 if (coding
->head_ascii
== coding
->src_bytes
)
5508 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
5509 for (i
= 0; i
< coding_category_raw_text
; i
++)
5511 category
= coding_priorities
[i
];
5512 this = coding_categories
+ category
;
5513 if (detect_info
.found
& (1 << category
))
5517 for (i
= 0; i
< coding_category_raw_text
; i
++)
5519 category
= coding_priorities
[i
];
5520 this = coding_categories
+ category
;
5523 /* No coding system of this category is defined. */
5524 detect_info
.rejected
|= (1 << category
);
5526 else if (category
>= coding_category_raw_text
)
5528 else if (detect_info
.checked
& (1 << category
))
5530 if (detect_info
.found
& (1 << category
))
5533 else if ((*(this->detector
)) (coding
, &detect_info
)
5534 && detect_info
.found
& (1 << category
))
5536 if (category
== coding_category_utf_16_auto
)
5538 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5539 category
= coding_category_utf_16_le
;
5541 category
= coding_category_utf_16_be
;
5547 if (i
< coding_category_raw_text
)
5548 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5549 else if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
5550 setup_coding_system (Qraw_text
, coding
);
5551 else if (detect_info
.rejected
)
5552 for (i
= 0; i
< coding_category_raw_text
; i
++)
5553 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
5555 this = coding_categories
+ coding_priorities
[i
];
5556 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5561 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding
->id
)))
5562 == coding_category_utf_16_auto
)
5564 Lisp_Object coding_systems
;
5565 struct coding_detection_info detect_info
;
5568 = AREF (CODING_ID_ATTRS (coding
->id
), coding_attr_utf_16_bom
);
5569 detect_info
.found
= detect_info
.rejected
= 0;
5570 if (CONSP (coding_systems
)
5571 && detect_coding_utf_16 (coding
, &detect_info
))
5573 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5574 setup_coding_system (XCAR (coding_systems
), coding
);
5575 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
5576 setup_coding_system (XCDR (coding_systems
), coding
);
5584 struct coding_system
*coding
;
5586 Lisp_Object eol_type
;
5587 unsigned char *p
, *pbeg
, *pend
;
5589 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5590 if (EQ (eol_type
, Qunix
))
5593 if (NILP (coding
->dst_object
))
5594 pbeg
= coding
->destination
;
5596 pbeg
= BYTE_POS_ADDR (coding
->dst_pos_byte
);
5597 pend
= pbeg
+ coding
->produced
;
5599 if (VECTORP (eol_type
))
5601 int eol_seen
= EOL_SEEN_NONE
;
5603 for (p
= pbeg
; p
< pend
; p
++)
5606 eol_seen
|= EOL_SEEN_LF
;
5607 else if (*p
== '\r')
5609 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5611 eol_seen
|= EOL_SEEN_CRLF
;
5615 eol_seen
|= EOL_SEEN_CR
;
5618 if (eol_seen
!= EOL_SEEN_NONE
5619 && eol_seen
!= EOL_SEEN_LF
5620 && eol_seen
!= EOL_SEEN_CRLF
5621 && eol_seen
!= EOL_SEEN_CR
)
5622 eol_seen
= EOL_SEEN_LF
;
5623 if (eol_seen
!= EOL_SEEN_NONE
)
5624 eol_type
= adjust_coding_eol_type (coding
, eol_seen
);
5627 if (EQ (eol_type
, Qmac
))
5629 for (p
= pbeg
; p
< pend
; p
++)
5633 else if (EQ (eol_type
, Qdos
))
5637 if (NILP (coding
->dst_object
))
5639 /* Start deleting '\r' from the tail to minimize the memory
5641 for (p
= pend
- 2; p
>= pbeg
; p
--)
5644 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
-- - p
- 1);
5650 int pos_byte
= coding
->dst_pos_byte
;
5651 int pos
= coding
->dst_pos
;
5652 int pos_end
= pos
+ coding
->produced_char
- 1;
5654 while (pos
< pos_end
)
5656 p
= BYTE_POS_ADDR (pos_byte
);
5657 if (*p
== '\r' && p
[1] == '\n')
5659 del_range_2 (pos
, pos_byte
, pos
+ 1, pos_byte
+ 1, 0);
5664 pos_byte
+= BYTES_BY_CHAR_HEAD (*p
);
5667 coding
->produced
-= n
;
5668 coding
->produced_char
-= n
;
5673 /* Return a translation table (or list of them) from coding system
5674 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5675 decoding (ENCODEP is zero). */
5678 get_translation_table (attrs
, encodep
, max_lookup
)
5680 int encodep
, *max_lookup
;
5682 Lisp_Object standard
, translation_table
;
5686 translation_table
= CODING_ATTR_ENCODE_TBL (attrs
),
5687 standard
= Vstandard_translation_table_for_encode
;
5689 translation_table
= CODING_ATTR_DECODE_TBL (attrs
),
5690 standard
= Vstandard_translation_table_for_decode
;
5691 if (NILP (translation_table
))
5692 translation_table
= standard
;
5695 if (SYMBOLP (translation_table
))
5696 translation_table
= Fget (translation_table
, Qtranslation_table
);
5697 else if (CONSP (translation_table
))
5699 translation_table
= Fcopy_sequence (translation_table
);
5700 for (val
= translation_table
; CONSP (val
); val
= XCDR (val
))
5701 if (SYMBOLP (XCAR (val
)))
5702 XSETCAR (val
, Fget (XCAR (val
), Qtranslation_table
));
5704 if (CHAR_TABLE_P (standard
))
5706 if (CONSP (translation_table
))
5707 translation_table
= nconc2 (translation_table
,
5708 Fcons (standard
, Qnil
));
5710 translation_table
= Fcons (translation_table
,
5711 Fcons (standard
, Qnil
));
5718 if (CHAR_TABLE_P (translation_table
)
5719 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table
)) > 1)
5721 val
= XCHAR_TABLE (translation_table
)->extras
[1];
5722 if (NATNUMP (val
) && *max_lookup
< XFASTINT (val
))
5723 *max_lookup
= XFASTINT (val
);
5725 else if (CONSP (translation_table
))
5727 Lisp_Object tail
, val
;
5729 for (tail
= translation_table
; CONSP (tail
); tail
= XCDR (tail
))
5730 if (CHAR_TABLE_P (XCAR (tail
))
5731 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail
))) > 1)
5733 val
= XCHAR_TABLE (XCAR (tail
))->extras
[1];
5734 if (NATNUMP (val
) && *max_lookup
< XFASTINT (val
))
5735 *max_lookup
= XFASTINT (val
);
5739 return translation_table
;
5742 #define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
5745 if (CHAR_TABLE_P (table)) \
5747 trans = CHAR_TABLE_REF (table, c); \
5748 if (CHARACTERP (trans)) \
5749 c = XFASTINT (trans), trans = Qnil; \
5751 else if (CONSP (table)) \
5755 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
5756 if (CHAR_TABLE_P (XCAR (tail))) \
5758 trans = CHAR_TABLE_REF (XCAR (tail), c); \
5759 if (CHARACTERP (trans)) \
5760 c = XFASTINT (trans), trans = Qnil; \
5761 else if (! NILP (trans)) \
5769 get_translation (val
, buf
, buf_end
, last_block
, from_nchars
, to_nchars
)
5773 int *from_nchars
, *to_nchars
;
5775 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
5779 Lisp_Object from
, tail
;
5782 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
5787 for (i
= 0; i
< len
; i
++)
5789 if (buf
+ i
== buf_end
)
5795 if (XINT (AREF (from
, i
)) != buf
[i
])
5809 *buf
= XINT (AREF (val
, 0)), *to_nchars
= ASIZE (val
);
5817 produce_chars (coding
, translation_table
, last_block
)
5818 struct coding_system
*coding
;
5819 Lisp_Object translation_table
;
5822 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5823 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5825 int produced_chars
= 0;
5828 if (! coding
->chars_at_source
)
5830 /* Characters are in coding->charbuf. */
5831 int *buf
= coding
->charbuf
;
5832 int *buf_end
= buf
+ coding
->charbuf_used
;
5834 if (BUFFERP (coding
->src_object
)
5835 && EQ (coding
->src_object
, coding
->dst_object
))
5836 dst_end
= ((unsigned char *) coding
->source
) + coding
->consumed
;
5838 while (buf
< buf_end
)
5844 int from_nchars
= 1, to_nchars
= 1;
5845 Lisp_Object trans
= Qnil
;
5847 LOOKUP_TRANSLATION_TABLE (translation_table
, c
, trans
);
5850 trans
= get_translation (trans
, buf
, buf_end
, last_block
,
5851 &from_nchars
, &to_nchars
);
5857 if (dst
+ MAX_MULTIBYTE_LENGTH
* to_nchars
> dst_end
)
5859 dst
= alloc_destination (coding
,
5861 + MAX_MULTIBYTE_LENGTH
* to_nchars
,
5863 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5866 for (i
= 0; i
< to_nchars
; i
++)
5869 c
= XINT (AREF (trans
, i
));
5870 if (coding
->dst_multibyte
5871 || ! CHAR_BYTE8_P (c
))
5872 CHAR_STRING_ADVANCE (c
, dst
);
5874 *dst
++ = CHAR_TO_BYTE8 (c
);
5876 produced_chars
+= to_nchars
;
5878 while (--from_nchars
> 0)
5882 /* This is an annotation datum. (-C) is the length. */
5885 carryover
= buf_end
- buf
;
5889 const unsigned char *src
= coding
->source
;
5890 const unsigned char *src_end
= src
+ coding
->src_bytes
;
5891 Lisp_Object eol_type
;
5893 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5895 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5897 if (coding
->src_multibyte
)
5904 const unsigned char *src_base
= src
;
5910 if (EQ (eol_type
, Qdos
))
5914 record_conversion_result
5915 (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
5916 goto no_more_source
;
5921 else if (EQ (eol_type
, Qmac
))
5926 coding
->consumed
= src
- coding
->source
;
5928 if (EQ (coding
->src_object
, coding
->dst_object
))
5929 dst_end
= (unsigned char *) src
;
5932 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5934 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5935 coding_set_source (coding
);
5936 src
= coding
->source
+ coding
->consumed
;
5937 src_end
= coding
->source
+ coding
->src_bytes
;
5947 while (src
< src_end
)
5954 if (EQ (eol_type
, Qdos
))
5960 else if (EQ (eol_type
, Qmac
))
5963 if (dst
>= dst_end
- 1)
5965 coding
->consumed
= src
- coding
->source
;
5967 if (EQ (coding
->src_object
, coding
->dst_object
))
5968 dst_end
= (unsigned char *) src
;
5969 if (dst
>= dst_end
- 1)
5971 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5973 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5974 coding_set_source (coding
);
5975 src
= coding
->source
+ coding
->consumed
;
5976 src_end
= coding
->source
+ coding
->src_bytes
;
5984 if (!EQ (coding
->src_object
, coding
->dst_object
))
5986 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5990 EMACS_INT offset
= src
- coding
->source
;
5992 dst
= alloc_destination (coding
, require
, dst
);
5993 coding_set_source (coding
);
5994 src
= coding
->source
+ offset
;
5995 src_end
= coding
->source
+ coding
->src_bytes
;
5998 produced_chars
= coding
->src_chars
;
5999 while (src
< src_end
)
6005 if (EQ (eol_type
, Qdos
))
6012 else if (EQ (eol_type
, Qmac
))
6018 coding
->consumed
= coding
->src_bytes
;
6019 coding
->consumed_char
= coding
->src_chars
;
6022 produced
= dst
- (coding
->destination
+ coding
->produced
);
6023 if (BUFFERP (coding
->dst_object
))
6024 insert_from_gap (produced_chars
, produced
);
6025 coding
->produced
+= produced
;
6026 coding
->produced_char
+= produced_chars
;
6030 /* Compose text in CODING->object according to the annotation data at
6031 CHARBUF. CHARBUF is an array:
6032 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6036 produce_composition (coding
, charbuf
, pos
)
6037 struct coding_system
*coding
;
6043 enum composition_method method
;
6044 Lisp_Object components
;
6047 to
= pos
+ charbuf
[2];
6050 method
= (enum composition_method
) (charbuf
[3]);
6052 if (method
== COMPOSITION_RELATIVE
)
6054 else if (method
>= COMPOSITION_WITH_RULE
6055 && method
<= COMPOSITION_WITH_RULE_ALTCHARS
)
6057 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
6062 for (i
= 0; i
< len
; i
++)
6064 args
[i
] = make_number (charbuf
[i
]);
6068 components
= (method
== COMPOSITION_WITH_ALTCHARS
6069 ? Fstring (len
, args
) : Fvector (len
, args
));
6073 compose_text (pos
, to
, components
, Qnil
, coding
->dst_object
);
6077 /* Put `charset' property on text in CODING->object according to
6078 the annotation data at CHARBUF. CHARBUF is an array:
6079 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6083 produce_charset (coding
, charbuf
, pos
)
6084 struct coding_system
*coding
;
6088 EMACS_INT from
= pos
- charbuf
[2];
6089 struct charset
*charset
= CHARSET_FROM_ID (charbuf
[3]);
6091 Fput_text_property (make_number (from
), make_number (pos
),
6092 Qcharset
, CHARSET_NAME (charset
),
6093 coding
->dst_object
);
6097 #define CHARBUF_SIZE 0x4000
6099 #define ALLOC_CONVERSION_WORK_AREA(coding) \
6101 int size = CHARBUF_SIZE;; \
6103 coding->charbuf = NULL; \
6104 while (size > 1024) \
6106 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6107 if (coding->charbuf) \
6111 if (! coding->charbuf) \
6113 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6114 return coding->result; \
6116 coding->charbuf_size = size; \
6121 produce_annotation (coding
, pos
)
6122 struct coding_system
*coding
;
6125 int *charbuf
= coding
->charbuf
;
6126 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
6128 if (NILP (coding
->dst_object
))
6131 while (charbuf
< charbuf_end
)
6137 int len
= -*charbuf
;
6140 case CODING_ANNOTATE_COMPOSITION_MASK
:
6141 produce_composition (coding
, charbuf
, pos
);
6143 case CODING_ANNOTATE_CHARSET_MASK
:
6144 produce_charset (coding
, charbuf
, pos
);
6154 /* Decode the data at CODING->src_object into CODING->dst_object.
6155 CODING->src_object is a buffer, a string, or nil.
6156 CODING->dst_object is a buffer.
6158 If CODING->src_object is a buffer, it must be the current buffer.
6159 In this case, if CODING->src_pos is positive, it is a position of
6160 the source text in the buffer, otherwise, the source text is in the
6161 gap area of the buffer, and CODING->src_pos specifies the offset of
6162 the text from GPT (which must be the same as PT). If this is the
6163 same buffer as CODING->dst_object, CODING->src_pos must be
6166 If CODING->src_object is a string, CODING->src_pos is an index to
6169 If CODING->src_object is nil, CODING->source must already point to
6170 the non-relocatable memory area. In this case, CODING->src_pos is
6171 an offset from CODING->source.
6173 The decoded data is inserted at the current point of the buffer
6178 decode_coding (coding
)
6179 struct coding_system
*coding
;
6182 Lisp_Object undo_list
;
6183 Lisp_Object translation_table
;
6187 if (BUFFERP (coding
->src_object
)
6188 && coding
->src_pos
> 0
6189 && coding
->src_pos
< GPT
6190 && coding
->src_pos
+ coding
->src_chars
> GPT
)
6191 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
6194 if (BUFFERP (coding
->dst_object
))
6196 if (current_buffer
!= XBUFFER (coding
->dst_object
))
6197 set_buffer_internal (XBUFFER (coding
->dst_object
));
6199 move_gap_both (PT
, PT_BYTE
);
6200 undo_list
= current_buffer
->undo_list
;
6201 current_buffer
->undo_list
= Qt
;
6204 coding
->consumed
= coding
->consumed_char
= 0;
6205 coding
->produced
= coding
->produced_char
= 0;
6206 coding
->chars_at_source
= 0;
6207 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
6210 ALLOC_CONVERSION_WORK_AREA (coding
);
6212 attrs
= CODING_ID_ATTRS (coding
->id
);
6213 translation_table
= get_translation_table (attrs
, 0, NULL
);
6218 EMACS_INT pos
= coding
->dst_pos
+ coding
->produced_char
;
6220 coding_set_source (coding
);
6221 coding
->annotated
= 0;
6222 coding
->charbuf_used
= carryover
;
6223 (*(coding
->decoder
)) (coding
);
6224 coding_set_destination (coding
);
6225 carryover
= produce_chars (coding
, translation_table
, 0);
6226 if (coding
->annotated
)
6227 produce_annotation (coding
, pos
);
6228 for (i
= 0; i
< carryover
; i
++)
6230 = coding
->charbuf
[coding
->charbuf_used
- carryover
+ i
];
6232 while (coding
->consumed
< coding
->src_bytes
6233 && (coding
->result
== CODING_RESULT_SUCCESS
6234 || coding
->result
== CODING_RESULT_INVALID_SRC
));
6238 coding_set_destination (coding
);
6239 coding
->charbuf_used
= carryover
;
6240 produce_chars (coding
, translation_table
, 1);
6243 coding
->carryover_bytes
= 0;
6244 if (coding
->consumed
< coding
->src_bytes
)
6246 int nbytes
= coding
->src_bytes
- coding
->consumed
;
6247 const unsigned char *src
;
6249 coding_set_source (coding
);
6250 coding_set_destination (coding
);
6251 src
= coding
->source
+ coding
->consumed
;
6253 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
6255 /* Flush out unprocessed data as binary chars. We are sure
6256 that the number of data is less than the size of
6258 coding
->charbuf_used
= 0;
6259 while (nbytes
-- > 0)
6264 c
= BYTE8_TO_CHAR (c
);
6265 coding
->charbuf
[coding
->charbuf_used
++] = c
;
6267 produce_chars (coding
, Qnil
, 1);
6271 /* Record unprocessed bytes in coding->carryover. We are
6272 sure that the number of data is less than the size of
6273 coding->carryover. */
6274 unsigned char *p
= coding
->carryover
;
6276 coding
->carryover_bytes
= nbytes
;
6277 while (nbytes
-- > 0)
6280 coding
->consumed
= coding
->src_bytes
;
6283 if (! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
6284 decode_eol (coding
);
6285 if (BUFFERP (coding
->dst_object
))
6287 current_buffer
->undo_list
= undo_list
;
6288 record_insert (coding
->dst_pos
, coding
->produced_char
);
6290 return coding
->result
;
6294 /* Extract an annotation datum from a composition starting at POS and
6295 ending before LIMIT of CODING->src_object (buffer or string), store
6296 the data in BUF, set *STOP to a starting position of the next
6297 composition (if any) or to LIMIT, and return the address of the
6298 next element of BUF.
6300 If such an annotation is not found, set *STOP to a starting
6301 position of a composition after POS (if any) or to LIMIT, and
6305 handle_composition_annotation (pos
, limit
, coding
, buf
, stop
)
6306 EMACS_INT pos
, limit
;
6307 struct coding_system
*coding
;
6311 EMACS_INT start
, end
;
6314 if (! find_composition (pos
, limit
, &start
, &end
, &prop
, coding
->src_object
)
6317 else if (start
> pos
)
6323 /* We found a composition. Store the corresponding
6324 annotation data in BUF. */
6326 enum composition_method method
= COMPOSITION_METHOD (prop
);
6327 int nchars
= COMPOSITION_LENGTH (prop
);
6329 ADD_COMPOSITION_DATA (buf
, nchars
, method
);
6330 if (method
!= COMPOSITION_RELATIVE
)
6332 Lisp_Object components
;
6335 components
= COMPOSITION_COMPONENTS (prop
);
6336 if (VECTORP (components
))
6338 len
= XVECTOR (components
)->size
;
6339 for (i
= 0; i
< len
; i
++)
6340 *buf
++ = XINT (AREF (components
, i
));
6342 else if (STRINGP (components
))
6344 len
= SCHARS (components
);
6348 FETCH_STRING_CHAR_ADVANCE (*buf
, components
, i
, i_byte
);
6352 else if (INTEGERP (components
))
6355 *buf
++ = XINT (components
);
6357 else if (CONSP (components
))
6359 for (len
= 0; CONSP (components
);
6360 len
++, components
= XCDR (components
))
6361 *buf
++ = XINT (XCAR (components
));
6369 if (find_composition (end
, limit
, &start
, &end
, &prop
,
6380 /* Extract an annotation datum from a text property `charset' at POS of
6381 CODING->src_object (buffer of string), store the data in BUF, set
6382 *STOP to the position where the value of `charset' property changes
6383 (limiting by LIMIT), and return the address of the next element of
6386 If the property value is nil, set *STOP to the position where the
6387 property value is non-nil (limiting by LIMIT), and return BUF. */
6390 handle_charset_annotation (pos
, limit
, coding
, buf
, stop
)
6391 EMACS_INT pos
, limit
;
6392 struct coding_system
*coding
;
6396 Lisp_Object val
, next
;
6399 val
= Fget_text_property (make_number (pos
), Qcharset
, coding
->src_object
);
6400 if (! NILP (val
) && CHARSETP (val
))
6401 id
= XINT (CHARSET_SYMBOL_ID (val
));
6404 ADD_CHARSET_DATA (buf
, 0, id
);
6405 next
= Fnext_single_property_change (make_number (pos
), Qcharset
,
6407 make_number (limit
));
6408 *stop
= XINT (next
);
6414 consume_chars (coding
, translation_table
, max_lookup
)
6415 struct coding_system
*coding
;
6416 Lisp_Object translation_table
;
6419 int *buf
= coding
->charbuf
;
6420 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
;
6421 const unsigned char *src
= coding
->source
+ coding
->consumed
;
6422 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
6423 EMACS_INT pos
= coding
->src_pos
+ coding
->consumed_char
;
6424 EMACS_INT end_pos
= coding
->src_pos
+ coding
->src_chars
;
6425 int multibytep
= coding
->src_multibyte
;
6426 Lisp_Object eol_type
;
6428 EMACS_INT stop
, stop_composition
, stop_charset
;
6429 int *lookup_buf
= NULL
;
6431 if (! NILP (translation_table
))
6432 lookup_buf
= alloca (sizeof (int) * max_lookup
);
6434 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
6435 if (VECTORP (eol_type
))
6438 /* Note: composition handling is not yet implemented. */
6439 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6441 if (NILP (coding
->src_object
))
6442 stop
= stop_composition
= stop_charset
= end_pos
;
6445 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
)
6446 stop
= stop_composition
= pos
;
6448 stop
= stop_composition
= end_pos
;
6449 if (coding
->common_flags
& CODING_ANNOTATE_CHARSET_MASK
)
6450 stop
= stop_charset
= pos
;
6452 stop_charset
= end_pos
;
6455 /* Compensate for CRLF and conversion. */
6456 buf_end
-= 1 + MAX_ANNOTATION_LENGTH
;
6457 while (buf
< buf_end
)
6465 if (pos
== stop_composition
)
6466 buf
= handle_composition_annotation (pos
, end_pos
, coding
,
6467 buf
, &stop_composition
);
6468 if (pos
== stop_charset
)
6469 buf
= handle_charset_annotation (pos
, end_pos
, coding
,
6470 buf
, &stop_charset
);
6471 stop
= (stop_composition
< stop_charset
6472 ? stop_composition
: stop_charset
);
6479 if (coding
->encoder
== encode_coding_raw_text
)
6481 else if ((bytes
= MULTIBYTE_LENGTH (src
, src_end
)) > 0)
6482 c
= STRING_CHAR_ADVANCE (src
), pos
+= bytes
;
6484 c
= BYTE8_TO_CHAR (*src
), src
++, pos
++;
6487 c
= STRING_CHAR_ADVANCE (src
), pos
++;
6488 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
6490 if (! EQ (eol_type
, Qunix
))
6494 if (EQ (eol_type
, Qdos
))
6502 LOOKUP_TRANSLATION_TABLE (translation_table
, c
, trans
);
6507 int from_nchars
= 1, to_nchars
= 1;
6508 int *lookup_buf_end
;
6509 const unsigned char *p
= src
;
6513 for (i
= 1; i
< max_lookup
&& p
< src_end
; i
++)
6514 lookup_buf
[i
] = STRING_CHAR_ADVANCE (p
);
6515 lookup_buf_end
= lookup_buf
+ i
;
6516 trans
= get_translation (trans
, lookup_buf
, lookup_buf_end
, 1,
6517 &from_nchars
, &to_nchars
);
6519 || buf
+ to_nchars
> buf_end
)
6521 *buf
++ = *lookup_buf
;
6522 for (i
= 1; i
< to_nchars
; i
++)
6523 *buf
++ = XINT (AREF (trans
, i
));
6524 for (i
= 1; i
< from_nchars
; i
++, pos
++)
6525 src
+= MULTIBYTE_LENGTH_NO_CHECK (src
);
6529 coding
->consumed
= src
- coding
->source
;
6530 coding
->consumed_char
= pos
- coding
->src_pos
;
6531 coding
->charbuf_used
= buf
- coding
->charbuf
;
6532 coding
->chars_at_source
= 0;
6536 /* Encode the text at CODING->src_object into CODING->dst_object.
6537 CODING->src_object is a buffer or a string.
6538 CODING->dst_object is a buffer or nil.
6540 If CODING->src_object is a buffer, it must be the current buffer.
6541 In this case, if CODING->src_pos is positive, it is a position of
6542 the source text in the buffer, otherwise. the source text is in the
6543 gap area of the buffer, and coding->src_pos specifies the offset of
6544 the text from GPT (which must be the same as PT). If this is the
6545 same buffer as CODING->dst_object, CODING->src_pos must be
6546 negative and CODING should not have `pre-write-conversion'.
6548 If CODING->src_object is a string, CODING should not have
6549 `pre-write-conversion'.
6551 If CODING->dst_object is a buffer, the encoded data is inserted at
6552 the current point of that buffer.
6554 If CODING->dst_object is nil, the encoded data is placed at the
6555 memory area specified by CODING->destination. */
6558 encode_coding (coding
)
6559 struct coding_system
*coding
;
6562 Lisp_Object translation_table
;
6565 attrs
= CODING_ID_ATTRS (coding
->id
);
6566 if (coding
->encoder
== encode_coding_raw_text
)
6567 translation_table
= Qnil
, max_lookup
= 0;
6569 translation_table
= get_translation_table (attrs
, 1, &max_lookup
);
6571 if (BUFFERP (coding
->dst_object
))
6573 set_buffer_internal (XBUFFER (coding
->dst_object
));
6574 coding
->dst_multibyte
6575 = ! NILP (current_buffer
->enable_multibyte_characters
);
6578 coding
->consumed
= coding
->consumed_char
= 0;
6579 coding
->produced
= coding
->produced_char
= 0;
6580 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
6583 ALLOC_CONVERSION_WORK_AREA (coding
);
6586 coding_set_source (coding
);
6587 consume_chars (coding
, translation_table
, max_lookup
);
6588 coding_set_destination (coding
);
6589 (*(coding
->encoder
)) (coding
);
6590 } while (coding
->consumed_char
< coding
->src_chars
);
6592 if (BUFFERP (coding
->dst_object
))
6593 insert_from_gap (coding
->produced_char
, coding
->produced
);
6595 return (coding
->result
);
6599 /* Name (or base name) of work buffer for code conversion. */
6600 static Lisp_Object Vcode_conversion_workbuf_name
;
6602 /* A working buffer used by the top level conversion. Once it is
6603 created, it is never destroyed. It has the name
6604 Vcode_conversion_workbuf_name. The other working buffers are
6605 destroyed after the use is finished, and their names are modified
6606 versions of Vcode_conversion_workbuf_name. */
6607 static Lisp_Object Vcode_conversion_reused_workbuf
;
6609 /* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6610 static int reused_workbuf_in_use
;
6613 /* Return a working buffer of code convesion. MULTIBYTE specifies the
6614 multibyteness of returning buffer. */
6617 make_conversion_work_buffer (multibyte
)
6620 Lisp_Object name
, workbuf
;
6621 struct buffer
*current
;
6623 if (reused_workbuf_in_use
++)
6625 name
= Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name
, Qnil
);
6626 workbuf
= Fget_buffer_create (name
);
6630 name
= Vcode_conversion_workbuf_name
;
6631 workbuf
= Fget_buffer_create (name
);
6632 if (NILP (Vcode_conversion_reused_workbuf
))
6633 Vcode_conversion_reused_workbuf
= workbuf
;
6635 current
= current_buffer
;
6636 set_buffer_internal (XBUFFER (workbuf
));
6638 current_buffer
->undo_list
= Qt
;
6639 current_buffer
->enable_multibyte_characters
= multibyte
? Qt
: Qnil
;
6640 set_buffer_internal (current
);
6646 code_conversion_restore (arg
)
6649 Lisp_Object current
, workbuf
;
6650 struct gcpro gcpro1
;
6653 current
= XCAR (arg
);
6654 workbuf
= XCDR (arg
);
6655 if (! NILP (workbuf
))
6657 if (EQ (workbuf
, Vcode_conversion_reused_workbuf
))
6658 reused_workbuf_in_use
= 0;
6659 else if (! NILP (Fbuffer_live_p (workbuf
)))
6660 Fkill_buffer (workbuf
);
6662 set_buffer_internal (XBUFFER (current
));
6668 code_conversion_save (with_work_buf
, multibyte
)
6669 int with_work_buf
, multibyte
;
6671 Lisp_Object workbuf
= Qnil
;
6674 workbuf
= make_conversion_work_buffer (multibyte
);
6675 record_unwind_protect (code_conversion_restore
,
6676 Fcons (Fcurrent_buffer (), workbuf
));
6681 decode_coding_gap (coding
, chars
, bytes
)
6682 struct coding_system
*coding
;
6683 EMACS_INT chars
, bytes
;
6685 int count
= specpdl_ptr
- specpdl
;
6688 code_conversion_save (0, 0);
6690 coding
->src_object
= Fcurrent_buffer ();
6691 coding
->src_chars
= chars
;
6692 coding
->src_bytes
= bytes
;
6693 coding
->src_pos
= -chars
;
6694 coding
->src_pos_byte
= -bytes
;
6695 coding
->src_multibyte
= chars
< bytes
;
6696 coding
->dst_object
= coding
->src_object
;
6697 coding
->dst_pos
= PT
;
6698 coding
->dst_pos_byte
= PT_BYTE
;
6699 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
6701 if (CODING_REQUIRE_DETECTION (coding
))
6702 detect_coding (coding
);
6704 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
6705 decode_coding (coding
);
6707 attrs
= CODING_ID_ATTRS (coding
->id
);
6708 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6710 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6713 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6714 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6715 make_number (coding
->produced_char
));
6717 coding
->produced_char
+= Z
- prev_Z
;
6718 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6721 unbind_to (count
, Qnil
);
6722 return coding
->result
;
6726 encode_coding_gap (coding
, chars
, bytes
)
6727 struct coding_system
*coding
;
6728 EMACS_INT chars
, bytes
;
6730 int count
= specpdl_ptr
- specpdl
;
6732 code_conversion_save (0, 0);
6734 coding
->src_object
= Fcurrent_buffer ();
6735 coding
->src_chars
= chars
;
6736 coding
->src_bytes
= bytes
;
6737 coding
->src_pos
= -chars
;
6738 coding
->src_pos_byte
= -bytes
;
6739 coding
->src_multibyte
= chars
< bytes
;
6740 coding
->dst_object
= coding
->src_object
;
6741 coding
->dst_pos
= PT
;
6742 coding
->dst_pos_byte
= PT_BYTE
;
6744 encode_coding (coding
);
6746 unbind_to (count
, Qnil
);
6747 return coding
->result
;
6751 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6752 SRC_OBJECT into DST_OBJECT by coding context CODING.
6754 SRC_OBJECT is a buffer, a string, or Qnil.
6756 If it is a buffer, the text is at point of the buffer. FROM and TO
6757 are positions in the buffer.
6759 If it is a string, the text is at the beginning of the string.
6760 FROM and TO are indices to the string.
6762 If it is nil, the text is at coding->source. FROM and TO are
6763 indices to coding->source.
6765 DST_OBJECT is a buffer, Qt, or Qnil.
6767 If it is a buffer, the decoded text is inserted at point of the
6768 buffer. If the buffer is the same as SRC_OBJECT, the source text
6771 If it is Qt, a string is made from the decoded text, and
6772 set in CODING->dst_object.
6774 If it is Qnil, the decoded text is stored at CODING->destination.
6775 The caller must allocate CODING->dst_bytes bytes at
6776 CODING->destination by xmalloc. If the decoded text is longer than
6777 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6781 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6783 struct coding_system
*coding
;
6784 Lisp_Object src_object
;
6785 EMACS_INT from
, from_byte
, to
, to_byte
;
6786 Lisp_Object dst_object
;
6788 int count
= specpdl_ptr
- specpdl
;
6789 unsigned char *destination
;
6790 EMACS_INT dst_bytes
;
6791 EMACS_INT chars
= to
- from
;
6792 EMACS_INT bytes
= to_byte
- from_byte
;
6795 int saved_pt
= -1, saved_pt_byte
;
6797 buffer
= Fcurrent_buffer ();
6799 if (NILP (dst_object
))
6801 destination
= coding
->destination
;
6802 dst_bytes
= coding
->dst_bytes
;
6805 coding
->src_object
= src_object
;
6806 coding
->src_chars
= chars
;
6807 coding
->src_bytes
= bytes
;
6808 coding
->src_multibyte
= chars
< bytes
;
6810 if (STRINGP (src_object
))
6812 coding
->src_pos
= from
;
6813 coding
->src_pos_byte
= from_byte
;
6815 else if (BUFFERP (src_object
))
6817 set_buffer_internal (XBUFFER (src_object
));
6819 move_gap_both (from
, from_byte
);
6820 if (EQ (src_object
, dst_object
))
6822 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6823 TEMP_SET_PT_BOTH (from
, from_byte
);
6824 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6825 coding
->src_pos
= -chars
;
6826 coding
->src_pos_byte
= -bytes
;
6830 coding
->src_pos
= from
;
6831 coding
->src_pos_byte
= from_byte
;
6835 if (CODING_REQUIRE_DETECTION (coding
))
6836 detect_coding (coding
);
6837 attrs
= CODING_ID_ATTRS (coding
->id
);
6839 if (EQ (dst_object
, Qt
)
6840 || (! NILP (CODING_ATTR_POST_READ (attrs
))
6841 && NILP (dst_object
)))
6843 coding
->dst_object
= code_conversion_save (1, 1);
6844 coding
->dst_pos
= BEG
;
6845 coding
->dst_pos_byte
= BEG_BYTE
;
6846 coding
->dst_multibyte
= 1;
6848 else if (BUFFERP (dst_object
))
6850 code_conversion_save (0, 0);
6851 coding
->dst_object
= dst_object
;
6852 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6853 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6854 coding
->dst_multibyte
6855 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6859 code_conversion_save (0, 0);
6860 coding
->dst_object
= Qnil
;
6861 coding
->dst_multibyte
= 1;
6864 decode_coding (coding
);
6866 if (BUFFERP (coding
->dst_object
))
6867 set_buffer_internal (XBUFFER (coding
->dst_object
));
6869 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6871 struct gcpro gcpro1
, gcpro2
;
6872 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6875 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6876 GCPRO2 (coding
->src_object
, coding
->dst_object
);
6877 val
= safe_call1 (CODING_ATTR_POST_READ (attrs
),
6878 make_number (coding
->produced_char
));
6881 coding
->produced_char
+= Z
- prev_Z
;
6882 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6885 if (EQ (dst_object
, Qt
))
6887 coding
->dst_object
= Fbuffer_string ();
6889 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
6891 set_buffer_internal (XBUFFER (coding
->dst_object
));
6892 if (dst_bytes
< coding
->produced
)
6895 = (unsigned char *) xrealloc (destination
, coding
->produced
);
6898 record_conversion_result (coding
,
6899 CODING_RESULT_INSUFFICIENT_DST
);
6900 unbind_to (count
, Qnil
);
6903 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
6904 move_gap_both (BEGV
, BEGV_BYTE
);
6905 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
6906 coding
->destination
= destination
;
6912 /* This is the case of:
6913 (BUFFERP (src_object) && EQ (src_object, dst_object))
6914 As we have moved PT while replacing the original buffer
6915 contents, we must recover it now. */
6916 set_buffer_internal (XBUFFER (src_object
));
6917 if (saved_pt
< from
)
6918 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6919 else if (saved_pt
< from
+ chars
)
6920 TEMP_SET_PT_BOTH (from
, from_byte
);
6921 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6922 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6923 saved_pt_byte
+ (coding
->produced
- bytes
));
6925 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6926 saved_pt_byte
+ (coding
->produced
- bytes
));
6929 unbind_to (count
, coding
->dst_object
);
6934 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6936 struct coding_system
*coding
;
6937 Lisp_Object src_object
;
6938 EMACS_INT from
, from_byte
, to
, to_byte
;
6939 Lisp_Object dst_object
;
6941 int count
= specpdl_ptr
- specpdl
;
6942 EMACS_INT chars
= to
- from
;
6943 EMACS_INT bytes
= to_byte
- from_byte
;
6946 int saved_pt
= -1, saved_pt_byte
;
6947 int kill_src_buffer
= 0;
6949 buffer
= Fcurrent_buffer ();
6951 coding
->src_object
= src_object
;
6952 coding
->src_chars
= chars
;
6953 coding
->src_bytes
= bytes
;
6954 coding
->src_multibyte
= chars
< bytes
;
6956 attrs
= CODING_ID_ATTRS (coding
->id
);
6958 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
6960 coding
->src_object
= code_conversion_save (1, coding
->src_multibyte
);
6961 set_buffer_internal (XBUFFER (coding
->src_object
));
6962 if (STRINGP (src_object
))
6963 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
6964 else if (BUFFERP (src_object
))
6965 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
6967 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
6969 if (EQ (src_object
, dst_object
))
6971 set_buffer_internal (XBUFFER (src_object
));
6972 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6973 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6974 set_buffer_internal (XBUFFER (coding
->src_object
));
6978 Lisp_Object args
[3];
6980 args
[0] = CODING_ATTR_PRE_WRITE (attrs
);
6981 args
[1] = make_number (BEG
);
6982 args
[2] = make_number (Z
);
6983 safe_call (3, args
);
6985 if (XBUFFER (coding
->src_object
) != current_buffer
)
6986 kill_src_buffer
= 1;
6987 coding
->src_object
= Fcurrent_buffer ();
6989 move_gap_both (BEG
, BEG_BYTE
);
6990 coding
->src_chars
= Z
- BEG
;
6991 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6992 coding
->src_pos
= BEG
;
6993 coding
->src_pos_byte
= BEG_BYTE
;
6994 coding
->src_multibyte
= Z
< Z_BYTE
;
6996 else if (STRINGP (src_object
))
6998 code_conversion_save (0, 0);
6999 coding
->src_pos
= from
;
7000 coding
->src_pos_byte
= from_byte
;
7002 else if (BUFFERP (src_object
))
7004 code_conversion_save (0, 0);
7005 set_buffer_internal (XBUFFER (src_object
));
7006 if (EQ (src_object
, dst_object
))
7008 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
7009 coding
->src_object
= del_range_1 (from
, to
, 1, 1);
7010 coding
->src_pos
= 0;
7011 coding
->src_pos_byte
= 0;
7015 if (from
< GPT
&& to
>= GPT
)
7016 move_gap_both (from
, from_byte
);
7017 coding
->src_pos
= from
;
7018 coding
->src_pos_byte
= from_byte
;
7022 code_conversion_save (0, 0);
7024 if (BUFFERP (dst_object
))
7026 coding
->dst_object
= dst_object
;
7027 if (EQ (src_object
, dst_object
))
7029 coding
->dst_pos
= from
;
7030 coding
->dst_pos_byte
= from_byte
;
7034 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
7035 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
7037 coding
->dst_multibyte
7038 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
7040 else if (EQ (dst_object
, Qt
))
7042 coding
->dst_object
= Qnil
;
7043 coding
->dst_bytes
= coding
->src_chars
;
7044 if (coding
->dst_bytes
== 0)
7045 coding
->dst_bytes
= 1;
7046 coding
->destination
= (unsigned char *) xmalloc (coding
->dst_bytes
);
7047 coding
->dst_multibyte
= 0;
7051 coding
->dst_object
= Qnil
;
7052 coding
->dst_multibyte
= 0;
7055 encode_coding (coding
);
7057 if (EQ (dst_object
, Qt
))
7059 if (BUFFERP (coding
->dst_object
))
7060 coding
->dst_object
= Fbuffer_string ();
7064 = make_unibyte_string ((char *) coding
->destination
,
7066 xfree (coding
->destination
);
7072 /* This is the case of:
7073 (BUFFERP (src_object) && EQ (src_object, dst_object))
7074 As we have moved PT while replacing the original buffer
7075 contents, we must recover it now. */
7076 set_buffer_internal (XBUFFER (src_object
));
7077 if (saved_pt
< from
)
7078 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
7079 else if (saved_pt
< from
+ chars
)
7080 TEMP_SET_PT_BOTH (from
, from_byte
);
7081 else if (! NILP (current_buffer
->enable_multibyte_characters
))
7082 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
7083 saved_pt_byte
+ (coding
->produced
- bytes
));
7085 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
7086 saved_pt_byte
+ (coding
->produced
- bytes
));
7089 if (kill_src_buffer
)
7090 Fkill_buffer (coding
->src_object
);
7091 unbind_to (count
, Qnil
);
7096 preferred_coding_system ()
7098 int id
= coding_categories
[coding_priorities
[0]].id
;
7100 return CODING_ID_NAME (id
);
7105 /*** 8. Emacs Lisp library functions ***/
7107 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
7108 doc
: /* Return t if OBJECT is nil or a coding-system.
7109 See the documentation of `define-coding-system' for information
7110 about coding-system objects. */)
7115 || CODING_SYSTEM_ID (obj
) >= 0)
7118 || NILP (Fget (obj
, Qcoding_system_define_form
)))
7123 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
7124 Sread_non_nil_coding_system
, 1, 1, 0,
7125 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
7132 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
7133 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
7135 while (SCHARS (val
) == 0);
7136 return (Fintern (val
, Qnil
));
7139 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
7140 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7141 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
7142 (prompt
, default_coding_system
)
7143 Lisp_Object prompt
, default_coding_system
;
7146 if (SYMBOLP (default_coding_system
))
7147 XSETSTRING (default_coding_system
, XPNTR (SYMBOL_NAME (default_coding_system
)));
7148 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
7149 Qt
, Qnil
, Qcoding_system_history
,
7150 default_coding_system
, Qnil
);
7151 return (SCHARS (val
) == 0 ? Qnil
: Fintern (val
, Qnil
));
7154 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
7156 doc
: /* Check validity of CODING-SYSTEM.
7157 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7158 It is valid if it is nil or a symbol defined as a coding system by the
7159 function `define-coding-system'. */)
7161 Lisp_Object coding_system
;
7163 Lisp_Object define_form
;
7165 define_form
= Fget (coding_system
, Qcoding_system_define_form
);
7166 if (! NILP (define_form
))
7168 Fput (coding_system
, Qcoding_system_define_form
, Qnil
);
7169 safe_eval (define_form
);
7171 if (!NILP (Fcoding_system_p (coding_system
)))
7172 return coding_system
;
7174 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
7178 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
7179 HIGHEST is nonzero, return the coding system of the highest
7180 priority among the detected coding systems. Otherwize return a
7181 list of detected coding systems sorted by their priorities. If
7182 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7183 multibyte form but contains only ASCII and eight-bit chars.
7184 Otherwise, the bytes are raw bytes.
7186 CODING-SYSTEM controls the detection as below:
7188 If it is nil, detect both text-format and eol-format. If the
7189 text-format part of CODING-SYSTEM is already specified
7190 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
7191 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7192 detect only text-format. */
7195 detect_coding_system (src
, src_chars
, src_bytes
, highest
, multibytep
,
7197 const unsigned char *src
;
7198 int src_chars
, src_bytes
, highest
;
7200 Lisp_Object coding_system
;
7202 const unsigned char *src_end
= src
+ src_bytes
;
7203 Lisp_Object attrs
, eol_type
;
7205 struct coding_system coding
;
7207 struct coding_detection_info detect_info
;
7208 enum coding_category base_category
;
7210 if (NILP (coding_system
))
7211 coding_system
= Qundecided
;
7212 setup_coding_system (coding_system
, &coding
);
7213 attrs
= CODING_ID_ATTRS (coding
.id
);
7214 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
7215 coding_system
= CODING_ATTR_BASE_NAME (attrs
);
7217 coding
.source
= src
;
7218 coding
.src_chars
= src_chars
;
7219 coding
.src_bytes
= src_bytes
;
7220 coding
.src_multibyte
= multibytep
;
7221 coding
.consumed
= 0;
7222 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7224 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
7226 /* At first, detect text-format if necessary. */
7227 base_category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7228 if (base_category
== coding_category_undecided
)
7230 enum coding_category category
;
7231 struct coding_system
*this;
7234 /* Skip all ASCII bytes except for a few ISO2022 controls. */
7235 for (i
= 0; src
< src_end
; i
++, src
++)
7241 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
)
7242 && inhibit_iso_escape_detection
)
7244 coding
.head_ascii
= src
- coding
.source
;
7245 if (detect_coding_iso_2022 (&coding
, &detect_info
))
7247 /* We have scanned the whole data. */
7248 if (! (detect_info
.rejected
& CATEGORY_MASK_ISO_7_ELSE
))
7249 /* We didn't find an 8-bit code. */
7255 coding
.head_ascii
= src
- coding
.source
;
7258 || detect_info
.found
)
7261 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
7262 for (i
= 0; i
< coding_category_raw_text
; i
++)
7264 category
= coding_priorities
[i
];
7265 if (detect_info
.found
& (1 << category
))
7269 for (i
= 0; i
< coding_category_raw_text
; i
++)
7271 category
= coding_priorities
[i
];
7272 this = coding_categories
+ category
;
7276 /* No coding system of this category is defined. */
7277 detect_info
.rejected
|= (1 << category
);
7279 else if (category
>= coding_category_raw_text
)
7281 else if (detect_info
.checked
& (1 << category
))
7284 && (detect_info
.found
& (1 << category
)))
7289 if ((*(this->detector
)) (&coding
, &detect_info
)
7291 && (detect_info
.found
& (1 << category
)))
7293 if (category
== coding_category_utf_16_auto
)
7295 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
7296 category
= coding_category_utf_16_le
;
7298 category
= coding_category_utf_16_be
;
7306 if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
7308 detect_info
.found
= CATEGORY_MASK_RAW_TEXT
;
7309 id
= coding_categories
[coding_category_raw_text
].id
;
7310 val
= Fcons (make_number (id
), Qnil
);
7312 else if (! detect_info
.rejected
&& ! detect_info
.found
)
7314 detect_info
.found
= CATEGORY_MASK_ANY
;
7315 id
= coding_categories
[coding_category_undecided
].id
;
7316 val
= Fcons (make_number (id
), Qnil
);
7320 if (detect_info
.found
)
7322 detect_info
.found
= 1 << category
;
7323 val
= Fcons (make_number (this->id
), Qnil
);
7326 for (i
= 0; i
< coding_category_raw_text
; i
++)
7327 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
7329 detect_info
.found
= 1 << coding_priorities
[i
];
7330 id
= coding_categories
[coding_priorities
[i
]].id
;
7331 val
= Fcons (make_number (id
), Qnil
);
7337 int mask
= detect_info
.rejected
| detect_info
.found
;
7341 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
7343 category
= coding_priorities
[i
];
7344 if (! (mask
& (1 << category
)))
7346 found
|= 1 << category
;
7347 id
= coding_categories
[category
].id
;
7348 val
= Fcons (make_number (id
), val
);
7351 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
7353 category
= coding_priorities
[i
];
7354 if (detect_info
.found
& (1 << category
))
7356 id
= coding_categories
[category
].id
;
7357 val
= Fcons (make_number (id
), val
);
7360 detect_info
.found
|= found
;
7363 else if (base_category
== coding_category_utf_16_auto
)
7365 if (detect_coding_utf_16 (&coding
, &detect_info
))
7367 struct coding_system
*this;
7369 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
7370 this = coding_categories
+ coding_category_utf_16_le
;
7371 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
7372 this = coding_categories
+ coding_category_utf_16_be
;
7373 else if (detect_info
.rejected
& CATEGORY_MASK_UTF_16_LE_NOSIG
)
7374 this = coding_categories
+ coding_category_utf_16_be_nosig
;
7376 this = coding_categories
+ coding_category_utf_16_le_nosig
;
7377 val
= Fcons (make_number (this->id
), Qnil
);
7382 detect_info
.found
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
7383 val
= Fcons (make_number (coding
.id
), Qnil
);
7386 /* Then, detect eol-format if necessary. */
7388 int normal_eol
= -1, utf_16_be_eol
= -1, utf_16_le_eol
;
7391 if (VECTORP (eol_type
))
7393 if (detect_info
.found
& ~CATEGORY_MASK_UTF_16
)
7394 normal_eol
= detect_eol (coding
.source
, src_bytes
,
7395 coding_category_raw_text
);
7396 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_BE
7397 | CATEGORY_MASK_UTF_16_BE_NOSIG
))
7398 utf_16_be_eol
= detect_eol (coding
.source
, src_bytes
,
7399 coding_category_utf_16_be
);
7400 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_LE
7401 | CATEGORY_MASK_UTF_16_LE_NOSIG
))
7402 utf_16_le_eol
= detect_eol (coding
.source
, src_bytes
,
7403 coding_category_utf_16_le
);
7407 if (EQ (eol_type
, Qunix
))
7408 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_LF
;
7409 else if (EQ (eol_type
, Qdos
))
7410 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CRLF
;
7412 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CR
;
7415 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
7417 enum coding_category category
;
7420 id
= XINT (XCAR (tail
));
7421 attrs
= CODING_ID_ATTRS (id
);
7422 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7423 eol_type
= CODING_ID_EOL_TYPE (id
);
7424 if (VECTORP (eol_type
))
7426 if (category
== coding_category_utf_16_be
7427 || category
== coding_category_utf_16_be_nosig
)
7428 this_eol
= utf_16_be_eol
;
7429 else if (category
== coding_category_utf_16_le
7430 || category
== coding_category_utf_16_le_nosig
)
7431 this_eol
= utf_16_le_eol
;
7433 this_eol
= normal_eol
;
7435 if (this_eol
== EOL_SEEN_LF
)
7436 XSETCAR (tail
, AREF (eol_type
, 0));
7437 else if (this_eol
== EOL_SEEN_CRLF
)
7438 XSETCAR (tail
, AREF (eol_type
, 1));
7439 else if (this_eol
== EOL_SEEN_CR
)
7440 XSETCAR (tail
, AREF (eol_type
, 2));
7442 XSETCAR (tail
, CODING_ID_NAME (id
));
7445 XSETCAR (tail
, CODING_ID_NAME (id
));
7449 return (highest
? XCAR (val
) : val
);
7453 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
7455 doc
: /* Detect coding system of the text in the region between START and END.
7456 Return a list of possible coding systems ordered by priority.
7458 If only ASCII characters are found, it returns a list of single element
7459 `undecided' or its subsidiary coding system according to a detected
7462 If optional argument HIGHEST is non-nil, return the coding system of
7463 highest priority. */)
7464 (start
, end
, highest
)
7465 Lisp_Object start
, end
, highest
;
7468 int from_byte
, to_byte
;
7470 CHECK_NUMBER_COERCE_MARKER (start
);
7471 CHECK_NUMBER_COERCE_MARKER (end
);
7473 validate_region (&start
, &end
);
7474 from
= XINT (start
), to
= XINT (end
);
7475 from_byte
= CHAR_TO_BYTE (from
);
7476 to_byte
= CHAR_TO_BYTE (to
);
7478 if (from
< GPT
&& to
>= GPT
)
7479 move_gap_both (to
, to_byte
);
7481 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
7482 to
- from
, to_byte
- from_byte
,
7484 !NILP (current_buffer
7485 ->enable_multibyte_characters
),
7489 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
7491 doc
: /* Detect coding system of the text in STRING.
7492 Return a list of possible coding systems ordered by priority.
7494 If only ASCII characters are found, it returns a list of single element
7495 `undecided' or its subsidiary coding system according to a detected
7498 If optional argument HIGHEST is non-nil, return the coding system of
7499 highest priority. */)
7501 Lisp_Object string
, highest
;
7503 CHECK_STRING (string
);
7505 return detect_coding_system (SDATA (string
),
7506 SCHARS (string
), SBYTES (string
),
7507 !NILP (highest
), STRING_MULTIBYTE (string
),
7513 char_encodable_p (c
, attrs
)
7518 struct charset
*charset
;
7519 Lisp_Object translation_table
;
7521 translation_table
= CODING_ATTR_TRANS_TBL (attrs
);
7522 if (! NILP (translation_table
))
7523 c
= translate_char (translation_table
, c
);
7524 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
7525 CONSP (tail
); tail
= XCDR (tail
))
7527 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
7528 if (CHAR_CHARSET_P (c
, charset
))
7531 return (! NILP (tail
));
7535 /* Return a list of coding systems that safely encode the text between
7536 START and END. If EXCLUDE is non-nil, it is a list of coding
7537 systems not to check. The returned list doesn't contain any such
7538 coding systems. In any case, if the text contains only ASCII or is
7539 unibyte, return t. */
7541 DEFUN ("find-coding-systems-region-internal",
7542 Ffind_coding_systems_region_internal
,
7543 Sfind_coding_systems_region_internal
, 2, 3, 0,
7544 doc
: /* Internal use only. */)
7545 (start
, end
, exclude
)
7546 Lisp_Object start
, end
, exclude
;
7548 Lisp_Object coding_attrs_list
, safe_codings
;
7549 EMACS_INT start_byte
, end_byte
;
7550 const unsigned char *p
, *pbeg
, *pend
;
7552 Lisp_Object tail
, elt
;
7554 if (STRINGP (start
))
7556 if (!STRING_MULTIBYTE (start
)
7557 || SCHARS (start
) == SBYTES (start
))
7560 end_byte
= SBYTES (start
);
7564 CHECK_NUMBER_COERCE_MARKER (start
);
7565 CHECK_NUMBER_COERCE_MARKER (end
);
7566 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7567 args_out_of_range (start
, end
);
7568 if (NILP (current_buffer
->enable_multibyte_characters
))
7570 start_byte
= CHAR_TO_BYTE (XINT (start
));
7571 end_byte
= CHAR_TO_BYTE (XINT (end
));
7572 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7575 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7577 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7578 move_gap_both (XINT (start
), start_byte
);
7580 move_gap_both (XINT (end
), end_byte
);
7584 coding_attrs_list
= Qnil
;
7585 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7587 || NILP (Fmemq (XCAR (tail
), exclude
)))
7591 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
7592 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
7593 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
7595 ASET (attrs
, coding_attr_trans_tbl
,
7596 get_translation_table (attrs
, 1, NULL
));
7597 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
7601 if (STRINGP (start
))
7602 p
= pbeg
= SDATA (start
);
7604 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7605 pend
= p
+ (end_byte
- start_byte
);
7607 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
7608 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7612 if (ASCII_BYTE_P (*p
))
7616 c
= STRING_CHAR_ADVANCE (p
);
7618 charset_map_loaded
= 0;
7619 for (tail
= coding_attrs_list
; CONSP (tail
);)
7624 else if (char_encodable_p (c
, elt
))
7626 else if (CONSP (XCDR (tail
)))
7628 XSETCAR (tail
, XCAR (XCDR (tail
)));
7629 XSETCDR (tail
, XCDR (XCDR (tail
)));
7633 XSETCAR (tail
, Qnil
);
7637 if (charset_map_loaded
)
7639 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7641 if (STRINGP (start
))
7642 pbeg
= SDATA (start
);
7644 pbeg
= BYTE_POS_ADDR (start_byte
);
7645 p
= pbeg
+ p_offset
;
7646 pend
= pbeg
+ pend_offset
;
7651 safe_codings
= list2 (Qraw_text
, Qno_conversion
);
7652 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
7653 if (! NILP (XCAR (tail
)))
7654 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
7656 return safe_codings
;
7660 DEFUN ("unencodable-char-position", Funencodable_char_position
,
7661 Sunencodable_char_position
, 3, 5, 0,
7663 Return position of first un-encodable character in a region.
7664 START and END specfiy the region and CODING-SYSTEM specifies the
7665 encoding to check. Return nil if CODING-SYSTEM does encode the region.
7667 If optional 4th argument COUNT is non-nil, it specifies at most how
7668 many un-encodable characters to search. In this case, the value is a
7671 If optional 5th argument STRING is non-nil, it is a string to search
7672 for un-encodable characters. In that case, START and END are indexes
7674 (start
, end
, coding_system
, count
, string
)
7675 Lisp_Object start
, end
, coding_system
, count
, string
;
7678 struct coding_system coding
;
7679 Lisp_Object attrs
, charset_list
, translation_table
;
7680 Lisp_Object positions
;
7682 const unsigned char *p
, *stop
, *pend
;
7683 int ascii_compatible
;
7685 setup_coding_system (Fcheck_coding_system (coding_system
), &coding
);
7686 attrs
= CODING_ID_ATTRS (coding
.id
);
7687 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
7689 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
7690 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7691 translation_table
= get_translation_table (attrs
, 1, NULL
);
7695 validate_region (&start
, &end
);
7696 from
= XINT (start
);
7698 if (NILP (current_buffer
->enable_multibyte_characters
)
7699 || (ascii_compatible
7700 && (to
- from
) == (CHAR_TO_BYTE (to
) - (CHAR_TO_BYTE (from
)))))
7702 p
= CHAR_POS_ADDR (from
);
7703 pend
= CHAR_POS_ADDR (to
);
7704 if (from
< GPT
&& to
>= GPT
)
7711 CHECK_STRING (string
);
7712 CHECK_NATNUM (start
);
7714 from
= XINT (start
);
7717 || to
> SCHARS (string
))
7718 args_out_of_range_3 (string
, start
, end
);
7719 if (! STRING_MULTIBYTE (string
))
7721 p
= SDATA (string
) + string_char_to_byte (string
, from
);
7722 stop
= pend
= SDATA (string
) + string_char_to_byte (string
, to
);
7723 if (ascii_compatible
&& (to
- from
) == (pend
- p
))
7731 CHECK_NATNUM (count
);
7740 if (ascii_compatible
)
7741 while (p
< stop
&& ASCII_BYTE_P (*p
))
7751 c
= STRING_CHAR_ADVANCE (p
);
7752 if (! (ASCII_CHAR_P (c
) && ascii_compatible
)
7753 && ! char_charset (translate_char (translation_table
, c
),
7754 charset_list
, NULL
))
7756 positions
= Fcons (make_number (from
), positions
);
7765 return (NILP (count
) ? Fcar (positions
) : Fnreverse (positions
));
7769 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
7770 Scheck_coding_systems_region
, 3, 3, 0,
7771 doc
: /* Check if the region is encodable by coding systems.
7773 START and END are buffer positions specifying the region.
7774 CODING-SYSTEM-LIST is a list of coding systems to check.
7776 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7777 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7778 whole region, POS0, POS1, ... are buffer positions where non-encodable
7779 characters are found.
7781 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7784 START may be a string. In that case, check if the string is
7785 encodable, and the value contains indices to the string instead of
7786 buffer positions. END is ignored. */)
7787 (start
, end
, coding_system_list
)
7788 Lisp_Object start
, end
, coding_system_list
;
7791 EMACS_INT start_byte
, end_byte
;
7793 const unsigned char *p
, *pbeg
, *pend
;
7795 Lisp_Object tail
, elt
, attrs
;
7797 if (STRINGP (start
))
7799 if (!STRING_MULTIBYTE (start
)
7800 && SCHARS (start
) != SBYTES (start
))
7803 end_byte
= SBYTES (start
);
7808 CHECK_NUMBER_COERCE_MARKER (start
);
7809 CHECK_NUMBER_COERCE_MARKER (end
);
7810 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7811 args_out_of_range (start
, end
);
7812 if (NILP (current_buffer
->enable_multibyte_characters
))
7814 start_byte
= CHAR_TO_BYTE (XINT (start
));
7815 end_byte
= CHAR_TO_BYTE (XINT (end
));
7816 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7819 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7821 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7822 move_gap_both (XINT (start
), start_byte
);
7824 move_gap_both (XINT (end
), end_byte
);
7830 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7833 attrs
= AREF (CODING_SYSTEM_SPEC (elt
), 0);
7834 ASET (attrs
, coding_attr_trans_tbl
,
7835 get_translation_table (attrs
, 1, NULL
));
7836 list
= Fcons (Fcons (elt
, Fcons (attrs
, Qnil
)), list
);
7839 if (STRINGP (start
))
7840 p
= pbeg
= SDATA (start
);
7842 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7843 pend
= p
+ (end_byte
- start_byte
);
7845 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
7846 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7850 if (ASCII_BYTE_P (*p
))
7854 c
= STRING_CHAR_ADVANCE (p
);
7856 charset_map_loaded
= 0;
7857 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
7859 elt
= XCDR (XCAR (tail
));
7860 if (! char_encodable_p (c
, XCAR (elt
)))
7861 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
7863 if (charset_map_loaded
)
7865 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7867 if (STRINGP (start
))
7868 pbeg
= SDATA (start
);
7870 pbeg
= BYTE_POS_ADDR (start_byte
);
7871 p
= pbeg
+ p_offset
;
7872 pend
= pbeg
+ pend_offset
;
7880 for (; CONSP (tail
); tail
= XCDR (tail
))
7883 if (CONSP (XCDR (XCDR (elt
))))
7884 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
7893 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
7894 Lisp_Object start
, end
, coding_system
, dst_object
;
7895 int encodep
, norecord
;
7897 struct coding_system coding
;
7898 EMACS_INT from
, from_byte
, to
, to_byte
;
7899 Lisp_Object src_object
;
7901 CHECK_NUMBER_COERCE_MARKER (start
);
7902 CHECK_NUMBER_COERCE_MARKER (end
);
7903 if (NILP (coding_system
))
7904 coding_system
= Qno_conversion
;
7906 CHECK_CODING_SYSTEM (coding_system
);
7907 src_object
= Fcurrent_buffer ();
7908 if (NILP (dst_object
))
7909 dst_object
= src_object
;
7910 else if (! EQ (dst_object
, Qt
))
7911 CHECK_BUFFER (dst_object
);
7913 validate_region (&start
, &end
);
7914 from
= XFASTINT (start
);
7915 from_byte
= CHAR_TO_BYTE (from
);
7916 to
= XFASTINT (end
);
7917 to_byte
= CHAR_TO_BYTE (to
);
7919 setup_coding_system (coding_system
, &coding
);
7920 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7923 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7926 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7929 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7931 return (BUFFERP (dst_object
)
7932 ? make_number (coding
.produced_char
)
7933 : coding
.dst_object
);
7937 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
7938 3, 4, "r\nzCoding system: ",
7939 doc
: /* Decode the current region from the specified coding system.
7940 When called from a program, takes four arguments:
7941 START, END, CODING-SYSTEM, and DESTINATION.
7942 START and END are buffer positions.
7944 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7945 If nil, the region between START and END is replace by the decoded text.
7946 If buffer, the decoded text is inserted in the buffer.
7947 If t, the decoded text is returned.
7949 This function sets `last-coding-system-used' to the precise coding system
7950 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7951 not fully specified.)
7952 It returns the length of the decoded text. */)
7953 (start
, end
, coding_system
, destination
)
7954 Lisp_Object start
, end
, coding_system
, destination
;
7956 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
7959 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
7960 3, 4, "r\nzCoding system: ",
7961 doc
: /* Encode the current region by specified coding system.
7962 When called from a program, takes three arguments:
7963 START, END, and CODING-SYSTEM. START and END are buffer positions.
7965 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7966 If nil, the region between START and END is replace by the encoded text.
7967 If buffer, the encoded text is inserted in the buffer.
7968 If t, the encoded text is returned.
7970 This function sets `last-coding-system-used' to the precise coding system
7971 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7972 not fully specified.)
7973 It returns the length of the encoded text. */)
7974 (start
, end
, coding_system
, destination
)
7975 Lisp_Object start
, end
, coding_system
, destination
;
7977 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
7981 code_convert_string (string
, coding_system
, dst_object
,
7982 encodep
, nocopy
, norecord
)
7983 Lisp_Object string
, coding_system
, dst_object
;
7984 int encodep
, nocopy
, norecord
;
7986 struct coding_system coding
;
7987 EMACS_INT chars
, bytes
;
7989 CHECK_STRING (string
);
7990 if (NILP (coding_system
))
7993 Vlast_coding_system_used
= Qno_conversion
;
7994 if (NILP (dst_object
))
7995 return (nocopy
? Fcopy_sequence (string
) : string
);
7998 if (NILP (coding_system
))
7999 coding_system
= Qno_conversion
;
8001 CHECK_CODING_SYSTEM (coding_system
);
8002 if (NILP (dst_object
))
8004 else if (! EQ (dst_object
, Qt
))
8005 CHECK_BUFFER (dst_object
);
8007 setup_coding_system (coding_system
, &coding
);
8008 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
8009 chars
= SCHARS (string
);
8010 bytes
= SBYTES (string
);
8012 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
8014 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
8016 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
8018 return (BUFFERP (dst_object
)
8019 ? make_number (coding
.produced_char
)
8020 : coding
.dst_object
);
8024 /* Encode or decode STRING according to CODING_SYSTEM.
8025 Do not set Vlast_coding_system_used.
8027 This function is called only from macros DECODE_FILE and
8028 ENCODE_FILE, thus we ignore character composition. */
8031 code_convert_string_norecord (string
, coding_system
, encodep
)
8032 Lisp_Object string
, coding_system
;
8035 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
8039 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
8041 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8043 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8044 if the decoding operation is trivial.
8046 Optional fourth arg BUFFER non-nil meant that the decoded text is
8047 inserted in BUFFER instead of returned as a string. In this case,
8048 the return value is BUFFER.
8050 This function sets `last-coding-system-used' to the precise coding system
8051 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8052 not fully specified. */)
8053 (string
, coding_system
, nocopy
, buffer
)
8054 Lisp_Object string
, coding_system
, nocopy
, buffer
;
8056 return code_convert_string (string
, coding_system
, buffer
,
8057 0, ! NILP (nocopy
), 0);
8060 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
8062 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
8064 Optional third arg NOCOPY non-nil means it is OK to return STRING
8065 itself if the encoding operation is trivial.
8067 Optional fourth arg BUFFER non-nil meant that the encoded text is
8068 inserted in BUFFER instead of returned as a string. In this case,
8069 the return value is BUFFER.
8071 This function sets `last-coding-system-used' to the precise coding system
8072 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8073 not fully specified.) */)
8074 (string
, coding_system
, nocopy
, buffer
)
8075 Lisp_Object string
, coding_system
, nocopy
, buffer
;
8077 return code_convert_string (string
, coding_system
, buffer
,
8078 1, ! NILP (nocopy
), 1);
8082 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
8083 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
8084 Return the corresponding character. */)
8088 Lisp_Object spec
, attrs
, val
;
8089 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
8092 CHECK_NATNUM (code
);
8093 c
= XFASTINT (code
);
8094 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
8095 attrs
= AREF (spec
, 0);
8097 if (ASCII_BYTE_P (c
)
8098 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
8101 val
= CODING_ATTR_CHARSET_LIST (attrs
);
8102 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
8103 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
8104 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
8107 charset
= charset_roman
;
8108 else if (c
>= 0xA0 && c
< 0xDF)
8110 charset
= charset_kana
;
8115 int s1
= c
>> 8, s2
= c
& 0xFF;
8117 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
8118 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
8119 error ("Invalid code: %d", code
);
8121 charset
= charset_kanji
;
8123 c
= DECODE_CHAR (charset
, c
);
8125 error ("Invalid code: %d", code
);
8126 return make_number (c
);
8130 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
8131 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
8132 Return the corresponding code in SJIS. */)
8136 Lisp_Object spec
, attrs
, charset_list
;
8138 struct charset
*charset
;
8141 CHECK_CHARACTER (ch
);
8143 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
8144 attrs
= AREF (spec
, 0);
8146 if (ASCII_CHAR_P (c
)
8147 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
8150 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
8151 charset
= char_charset (c
, charset_list
, &code
);
8152 if (code
== CHARSET_INVALID_CODE (charset
))
8153 error ("Can't encode by shift_jis encoding: %d", c
);
8156 return make_number (code
);
8159 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
8160 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
8161 Return the corresponding character. */)
8165 Lisp_Object spec
, attrs
, val
;
8166 struct charset
*charset_roman
, *charset_big5
, *charset
;
8169 CHECK_NATNUM (code
);
8170 c
= XFASTINT (code
);
8171 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
8172 attrs
= AREF (spec
, 0);
8174 if (ASCII_BYTE_P (c
)
8175 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
8178 val
= CODING_ATTR_CHARSET_LIST (attrs
);
8179 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
8180 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
8183 charset
= charset_roman
;
8186 int b1
= c
>> 8, b2
= c
& 0x7F;
8187 if (b1
< 0xA1 || b1
> 0xFE
8188 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
8189 error ("Invalid code: %d", code
);
8190 charset
= charset_big5
;
8192 c
= DECODE_CHAR (charset
, (unsigned )c
);
8194 error ("Invalid code: %d", code
);
8195 return make_number (c
);
8198 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
8199 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
8200 Return the corresponding character code in Big5. */)
8204 Lisp_Object spec
, attrs
, charset_list
;
8205 struct charset
*charset
;
8209 CHECK_CHARACTER (ch
);
8211 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
8212 attrs
= AREF (spec
, 0);
8213 if (ASCII_CHAR_P (c
)
8214 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
8217 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
8218 charset
= char_charset (c
, charset_list
, &code
);
8219 if (code
== CHARSET_INVALID_CODE (charset
))
8220 error ("Can't encode by Big5 encoding: %d", c
);
8222 return make_number (code
);
8226 DEFUN ("set-terminal-coding-system-internal",
8227 Fset_terminal_coding_system_internal
,
8228 Sset_terminal_coding_system_internal
, 1, 1, 0,
8229 doc
: /* Internal use only. */)
8231 Lisp_Object coding_system
;
8233 CHECK_SYMBOL (coding_system
);
8234 setup_coding_system (Fcheck_coding_system (coding_system
),
8237 /* We had better not send unsafe characters to terminal. */
8238 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
8239 /* Characer composition should be disabled. */
8240 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
8241 terminal_coding
.src_multibyte
= 1;
8242 terminal_coding
.dst_multibyte
= 0;
8246 DEFUN ("set-safe-terminal-coding-system-internal",
8247 Fset_safe_terminal_coding_system_internal
,
8248 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
8249 doc
: /* Internal use only. */)
8251 Lisp_Object coding_system
;
8253 CHECK_SYMBOL (coding_system
);
8254 setup_coding_system (Fcheck_coding_system (coding_system
),
8255 &safe_terminal_coding
);
8256 /* Characer composition should be disabled. */
8257 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
8258 safe_terminal_coding
.src_multibyte
= 1;
8259 safe_terminal_coding
.dst_multibyte
= 0;
8263 DEFUN ("terminal-coding-system",
8264 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
8265 doc
: /* Return coding system specified for terminal output. */)
8268 Lisp_Object coding_system
;
8270 coding_system
= CODING_ID_NAME (terminal_coding
.id
);
8271 /* For backward compatibility, return nil if it is `undecided'. */
8272 return (! EQ (coding_system
, Qundecided
) ? coding_system
: Qnil
);
8275 DEFUN ("set-keyboard-coding-system-internal",
8276 Fset_keyboard_coding_system_internal
,
8277 Sset_keyboard_coding_system_internal
, 1, 1, 0,
8278 doc
: /* Internal use only. */)
8280 Lisp_Object coding_system
;
8282 CHECK_SYMBOL (coding_system
);
8283 setup_coding_system (Fcheck_coding_system (coding_system
),
8285 /* Characer composition should be disabled. */
8286 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
8290 DEFUN ("keyboard-coding-system",
8291 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
8292 doc
: /* Return coding system specified for decoding keyboard input. */)
8295 return CODING_ID_NAME (keyboard_coding
.id
);
8299 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
8300 Sfind_operation_coding_system
, 1, MANY
, 0,
8301 doc
: /* Choose a coding system for an operation based on the target name.
8302 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8303 DECODING-SYSTEM is the coding system to use for decoding
8304 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8305 for encoding (in case OPERATION does encoding).
8307 The first argument OPERATION specifies an I/O primitive:
8308 For file I/O, `insert-file-contents' or `write-region'.
8309 For process I/O, `call-process', `call-process-region', or `start-process'.
8310 For network I/O, `open-network-stream'.
8312 The remaining arguments should be the same arguments that were passed
8313 to the primitive. Depending on which primitive, one of those arguments
8314 is selected as the TARGET. For example, if OPERATION does file I/O,
8315 whichever argument specifies the file name is TARGET.
8317 TARGET has a meaning which depends on OPERATION:
8318 For file I/O, TARGET is a file name (except for the special case below).
8319 For process I/O, TARGET is a process name.
8320 For network I/O, TARGET is a service name or a port number
8322 This function looks up what specified for TARGET in,
8323 `file-coding-system-alist', `process-coding-system-alist',
8324 or `network-coding-system-alist' depending on OPERATION.
8325 They may specify a coding system, a cons of coding systems,
8326 or a function symbol to call.
8327 In the last case, we call the function with one argument,
8328 which is a list of all the arguments given to this function.
8330 If OPERATION is `insert-file-contents', the argument corresponding to
8331 TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
8332 file name to look up, and BUFFER is a buffer that contains the file's
8333 contents (not yet decoded). If `file-coding-system-alist' specifies a
8334 function to call for FILENAME, that function should examine the
8335 contents of BUFFER instead of reading the file.
8337 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
8342 Lisp_Object operation
, target_idx
, target
, val
;
8343 register Lisp_Object chain
;
8346 error ("Too few arguments");
8347 operation
= args
[0];
8348 if (!SYMBOLP (operation
)
8349 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
8350 error ("Invalid first arguement");
8351 if (nargs
< 1 + XINT (target_idx
))
8352 error ("Too few arguments for operation: %s",
8353 SDATA (SYMBOL_NAME (operation
)));
8354 target
= args
[XINT (target_idx
) + 1];
8355 if (!(STRINGP (target
)
8356 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
8357 error ("Invalid %dth argument", XINT (target_idx
) + 1);
8359 chain
= ((EQ (operation
, Qinsert_file_contents
)
8360 || EQ (operation
, Qwrite_region
))
8361 ? Vfile_coding_system_alist
8362 : (EQ (operation
, Qopen_network_stream
)
8363 ? Vnetwork_coding_system_alist
8364 : Vprocess_coding_system_alist
));
8368 for (; CONSP (chain
); chain
= XCDR (chain
))
8374 && ((STRINGP (target
)
8375 && STRINGP (XCAR (elt
))
8376 && fast_string_match (XCAR (elt
), target
) >= 0)
8377 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
8380 /* Here, if VAL is both a valid coding system and a valid
8381 function symbol, we return VAL as a coding system. */
8384 if (! SYMBOLP (val
))
8386 if (! NILP (Fcoding_system_p (val
)))
8387 return Fcons (val
, val
);
8388 if (! NILP (Ffboundp (val
)))
8390 val
= safe_call1 (val
, Flist (nargs
, args
));
8393 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
8394 return Fcons (val
, val
);
8402 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
8403 Sset_coding_system_priority
, 0, MANY
, 0,
8404 doc
: /* Assign higher priority to the coding systems given as arguments.
8405 If multiple coding systems belongs to the same category,
8406 all but the first one are ignored.
8408 usage: (set-coding-system-priority ...) */)
8414 int changed
[coding_category_max
];
8415 enum coding_category priorities
[coding_category_max
];
8417 bzero (changed
, sizeof changed
);
8419 for (i
= j
= 0; i
< nargs
; i
++)
8421 enum coding_category category
;
8422 Lisp_Object spec
, attrs
;
8424 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
8425 attrs
= AREF (spec
, 0);
8426 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
8427 if (changed
[category
])
8428 /* Ignore this coding system because a coding system of the
8429 same category already had a higher priority. */
8431 changed
[category
] = 1;
8432 priorities
[j
++] = category
;
8433 if (coding_categories
[category
].id
>= 0
8434 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
8435 setup_coding_system (args
[i
], &coding_categories
[category
]);
8436 Fset (AREF (Vcoding_category_table
, category
), args
[i
]);
8439 /* Now we have decided top J priorities. Reflect the order of the
8440 original priorities to the remaining priorities. */
8442 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
8444 while (j
< coding_category_max
8445 && changed
[coding_priorities
[j
]])
8447 if (j
== coding_category_max
)
8449 priorities
[i
] = coding_priorities
[j
];
8452 bcopy (priorities
, coding_priorities
, sizeof priorities
);
8454 /* Update `coding-category-list'. */
8455 Vcoding_category_list
= Qnil
;
8456 for (i
= coding_category_max
- 1; i
>= 0; i
--)
8457 Vcoding_category_list
8458 = Fcons (AREF (Vcoding_category_table
, priorities
[i
]),
8459 Vcoding_category_list
);
8464 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
8465 Scoding_system_priority_list
, 0, 1, 0,
8466 doc
: /* Return a list of coding systems ordered by their priorities.
8467 HIGHESTP non-nil means just return the highest priority one. */)
8469 Lisp_Object highestp
;
8474 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
8476 enum coding_category category
= coding_priorities
[i
];
8477 int id
= coding_categories
[category
].id
;
8482 attrs
= CODING_ID_ATTRS (id
);
8483 if (! NILP (highestp
))
8484 return CODING_ATTR_BASE_NAME (attrs
);
8485 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
8487 return Fnreverse (val
);
8490 static char *suffixes
[] = { "-unix", "-dos", "-mac" };
8493 make_subsidiaries (base
)
8496 Lisp_Object subsidiaries
;
8497 int base_name_len
= SBYTES (SYMBOL_NAME (base
));
8498 char *buf
= (char *) alloca (base_name_len
+ 6);
8501 bcopy (SDATA (SYMBOL_NAME (base
)), buf
, base_name_len
);
8502 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
8503 for (i
= 0; i
< 3; i
++)
8505 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
8506 ASET (subsidiaries
, i
, intern (buf
));
8508 return subsidiaries
;
8512 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
8513 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
8514 doc
: /* For internal use only.
8515 usage: (define-coding-system-internal ...) */)
8521 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
8522 Lisp_Object attrs
; /* Vector of attributes. */
8523 Lisp_Object eol_type
;
8524 Lisp_Object aliases
;
8525 Lisp_Object coding_type
, charset_list
, safe_charsets
;
8526 enum coding_category category
;
8527 Lisp_Object tail
, val
;
8528 int max_charset_id
= 0;
8531 if (nargs
< coding_arg_max
)
8534 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
8536 name
= args
[coding_arg_name
];
8537 CHECK_SYMBOL (name
);
8538 CODING_ATTR_BASE_NAME (attrs
) = name
;
8540 val
= args
[coding_arg_mnemonic
];
8541 if (! STRINGP (val
))
8542 CHECK_CHARACTER (val
);
8543 CODING_ATTR_MNEMONIC (attrs
) = val
;
8545 coding_type
= args
[coding_arg_coding_type
];
8546 CHECK_SYMBOL (coding_type
);
8547 CODING_ATTR_TYPE (attrs
) = coding_type
;
8549 charset_list
= args
[coding_arg_charset_list
];
8550 if (SYMBOLP (charset_list
))
8552 if (EQ (charset_list
, Qiso_2022
))
8554 if (! EQ (coding_type
, Qiso_2022
))
8555 error ("Invalid charset-list");
8556 charset_list
= Viso_2022_charset_list
;
8558 else if (EQ (charset_list
, Qemacs_mule
))
8560 if (! EQ (coding_type
, Qemacs_mule
))
8561 error ("Invalid charset-list");
8562 charset_list
= Vemacs_mule_charset_list
;
8564 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8565 if (max_charset_id
< XFASTINT (XCAR (tail
)))
8566 max_charset_id
= XFASTINT (XCAR (tail
));
8570 charset_list
= Fcopy_sequence (charset_list
);
8571 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
8573 struct charset
*charset
;
8576 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8577 if (EQ (coding_type
, Qiso_2022
)
8578 ? CHARSET_ISO_FINAL (charset
) < 0
8579 : EQ (coding_type
, Qemacs_mule
)
8580 ? CHARSET_EMACS_MULE_ID (charset
) < 0
8582 error ("Can't handle charset `%s'",
8583 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8585 XSETCAR (tail
, make_number (charset
->id
));
8586 if (max_charset_id
< charset
->id
)
8587 max_charset_id
= charset
->id
;
8590 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
8592 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
8594 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8595 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
8596 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
8598 CODING_ATTR_ASCII_COMPAT (attrs
) = args
[coding_arg_ascii_compatible_p
];
8600 val
= args
[coding_arg_decode_translation_table
];
8601 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8603 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8605 val
= args
[coding_arg_encode_translation_table
];
8606 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8608 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8610 val
= args
[coding_arg_post_read_conversion
];
8612 CODING_ATTR_POST_READ (attrs
) = val
;
8614 val
= args
[coding_arg_pre_write_conversion
];
8616 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8618 val
= args
[coding_arg_default_char
];
8620 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
8623 CHECK_CHARACTER (val
);
8624 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8627 val
= args
[coding_arg_for_unibyte
];
8628 CODING_ATTR_FOR_UNIBYTE (attrs
) = NILP (val
) ? Qnil
: Qt
;
8630 val
= args
[coding_arg_plist
];
8632 CODING_ATTR_PLIST (attrs
) = val
;
8634 if (EQ (coding_type
, Qcharset
))
8636 /* Generate a lisp vector of 256 elements. Each element is nil,
8637 integer, or a list of charset IDs.
8639 If Nth element is nil, the byte code N is invalid in this
8642 If Nth element is a number NUM, N is the first byte of a
8643 charset whose ID is NUM.
8645 If Nth element is a list of charset IDs, N is the first byte
8646 of one of them. The list is sorted by dimensions of the
8647 charsets. A charset of smaller dimension comes firtst. */
8648 val
= Fmake_vector (make_number (256), Qnil
);
8650 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8652 struct charset
*charset
= CHARSET_FROM_ID (XFASTINT (XCAR (tail
)));
8653 int dim
= CHARSET_DIMENSION (charset
);
8654 int idx
= (dim
- 1) * 4;
8656 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8657 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8659 for (i
= charset
->code_space
[idx
];
8660 i
<= charset
->code_space
[idx
+ 1]; i
++)
8662 Lisp_Object tmp
, tmp2
;
8665 tmp
= AREF (val
, i
);
8668 else if (NUMBERP (tmp
))
8670 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp
)));
8672 tmp
= Fcons (XCAR (tail
), Fcons (tmp
, Qnil
));
8674 tmp
= Fcons (tmp
, Fcons (XCAR (tail
), Qnil
));
8678 for (tmp2
= tmp
; CONSP (tmp2
); tmp2
= XCDR (tmp2
))
8680 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2
))));
8685 tmp
= nconc2 (tmp
, Fcons (XCAR (tail
), Qnil
));
8688 XSETCDR (tmp2
, Fcons (XCAR (tmp2
), XCDR (tmp2
)));
8689 XSETCAR (tmp2
, XCAR (tail
));
8695 ASET (attrs
, coding_attr_charset_valids
, val
);
8696 category
= coding_category_charset
;
8698 else if (EQ (coding_type
, Qccl
))
8702 if (nargs
< coding_arg_ccl_max
)
8705 val
= args
[coding_arg_ccl_decoder
];
8706 CHECK_CCL_PROGRAM (val
);
8708 val
= Fcopy_sequence (val
);
8709 ASET (attrs
, coding_attr_ccl_decoder
, val
);
8711 val
= args
[coding_arg_ccl_encoder
];
8712 CHECK_CCL_PROGRAM (val
);
8714 val
= Fcopy_sequence (val
);
8715 ASET (attrs
, coding_attr_ccl_encoder
, val
);
8717 val
= args
[coding_arg_ccl_valids
];
8718 valids
= Fmake_string (make_number (256), make_number (0));
8719 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
8726 from
= to
= XINT (val
);
8727 if (from
< 0 || from
> 255)
8728 args_out_of_range_3 (val
, make_number (0), make_number (255));
8733 CHECK_NATNUM_CAR (val
);
8734 CHECK_NATNUM_CDR (val
);
8735 from
= XINT (XCAR (val
));
8737 args_out_of_range_3 (XCAR (val
),
8738 make_number (0), make_number (255));
8739 to
= XINT (XCDR (val
));
8740 if (to
< from
|| to
> 255)
8741 args_out_of_range_3 (XCDR (val
),
8742 XCAR (val
), make_number (255));
8744 for (i
= from
; i
<= to
; i
++)
8745 SSET (valids
, i
, 1);
8747 ASET (attrs
, coding_attr_ccl_valids
, valids
);
8749 category
= coding_category_ccl
;
8751 else if (EQ (coding_type
, Qutf_16
))
8753 Lisp_Object bom
, endian
;
8755 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8757 if (nargs
< coding_arg_utf16_max
)
8760 bom
= args
[coding_arg_utf16_bom
];
8761 if (! NILP (bom
) && ! EQ (bom
, Qt
))
8765 CHECK_CODING_SYSTEM (val
);
8767 CHECK_CODING_SYSTEM (val
);
8769 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
8771 endian
= args
[coding_arg_utf16_endian
];
8772 CHECK_SYMBOL (endian
);
8775 else if (! EQ (endian
, Qbig
) && ! EQ (endian
, Qlittle
))
8776 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian
)));
8777 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
8779 category
= (CONSP (bom
)
8780 ? coding_category_utf_16_auto
8782 ? (EQ (endian
, Qbig
)
8783 ? coding_category_utf_16_be_nosig
8784 : coding_category_utf_16_le_nosig
)
8785 : (EQ (endian
, Qbig
)
8786 ? coding_category_utf_16_be
8787 : coding_category_utf_16_le
));
8789 else if (EQ (coding_type
, Qiso_2022
))
8791 Lisp_Object initial
, reg_usage
, request
, flags
;
8794 if (nargs
< coding_arg_iso2022_max
)
8797 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
8798 CHECK_VECTOR (initial
);
8799 for (i
= 0; i
< 4; i
++)
8801 val
= Faref (initial
, make_number (i
));
8804 struct charset
*charset
;
8806 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8807 ASET (initial
, i
, make_number (CHARSET_ID (charset
)));
8808 if (i
== 0 && CHARSET_ASCII_COMPATIBLE_P (charset
))
8809 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8812 ASET (initial
, i
, make_number (-1));
8815 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
8816 CHECK_CONS (reg_usage
);
8817 CHECK_NUMBER_CAR (reg_usage
);
8818 CHECK_NUMBER_CDR (reg_usage
);
8820 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
8821 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
8829 CHECK_CHARSET_GET_ID (tmp
, id
);
8830 CHECK_NATNUM_CDR (val
);
8831 if (XINT (XCDR (val
)) >= 4)
8832 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
8833 XSETCAR (val
, make_number (id
));
8836 flags
= args
[coding_arg_iso2022_flags
];
8837 CHECK_NATNUM (flags
);
8839 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
8840 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
8842 ASET (attrs
, coding_attr_iso_initial
, initial
);
8843 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
8844 ASET (attrs
, coding_attr_iso_request
, request
);
8845 ASET (attrs
, coding_attr_iso_flags
, flags
);
8846 setup_iso_safe_charsets (attrs
);
8848 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
8849 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
8850 | CODING_ISO_FLAG_SINGLE_SHIFT
))
8851 ? coding_category_iso_7_else
8852 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8853 ? coding_category_iso_7
8854 : coding_category_iso_7_tight
);
8857 int id
= XINT (AREF (initial
, 1));
8859 category
= (((i
& CODING_ISO_FLAG_LOCKING_SHIFT
)
8860 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8862 ? coding_category_iso_8_else
8863 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
8864 ? coding_category_iso_8_1
8865 : coding_category_iso_8_2
);
8867 if (category
!= coding_category_iso_8_1
8868 && category
!= coding_category_iso_8_2
)
8869 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8871 else if (EQ (coding_type
, Qemacs_mule
))
8873 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
8874 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
8875 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8876 category
= coding_category_emacs_mule
;
8878 else if (EQ (coding_type
, Qshift_jis
))
8881 struct charset
*charset
;
8883 if (XINT (Flength (charset_list
)) != 3
8884 && XINT (Flength (charset_list
)) != 4)
8885 error ("There should be three or four charsets");
8887 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8888 if (CHARSET_DIMENSION (charset
) != 1)
8889 error ("Dimension of charset %s is not one",
8890 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8891 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8892 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8894 charset_list
= XCDR (charset_list
);
8895 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8896 if (CHARSET_DIMENSION (charset
) != 1)
8897 error ("Dimension of charset %s is not one",
8898 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8900 charset_list
= XCDR (charset_list
);
8901 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8902 if (CHARSET_DIMENSION (charset
) != 2)
8903 error ("Dimension of charset %s is not two",
8904 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8906 charset_list
= XCDR (charset_list
);
8907 if (! NILP (charset_list
))
8909 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8910 if (CHARSET_DIMENSION (charset
) != 2)
8911 error ("Dimension of charset %s is not two",
8912 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8915 category
= coding_category_sjis
;
8916 Vsjis_coding_system
= name
;
8918 else if (EQ (coding_type
, Qbig5
))
8920 struct charset
*charset
;
8922 if (XINT (Flength (charset_list
)) != 2)
8923 error ("There should be just two charsets");
8925 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8926 if (CHARSET_DIMENSION (charset
) != 1)
8927 error ("Dimension of charset %s is not one",
8928 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8929 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8930 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8932 charset_list
= XCDR (charset_list
);
8933 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8934 if (CHARSET_DIMENSION (charset
) != 2)
8935 error ("Dimension of charset %s is not two",
8936 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8938 category
= coding_category_big5
;
8939 Vbig5_coding_system
= name
;
8941 else if (EQ (coding_type
, Qraw_text
))
8943 category
= coding_category_raw_text
;
8944 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8946 else if (EQ (coding_type
, Qutf_8
))
8948 category
= coding_category_utf_8
;
8949 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8951 else if (EQ (coding_type
, Qundecided
))
8952 category
= coding_category_undecided
;
8954 error ("Invalid coding system type: %s",
8955 SDATA (SYMBOL_NAME (coding_type
)));
8957 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
8958 CODING_ATTR_PLIST (attrs
)
8959 = Fcons (QCcategory
, Fcons (AREF (Vcoding_category_table
, category
),
8960 CODING_ATTR_PLIST (attrs
)));
8961 CODING_ATTR_PLIST (attrs
)
8962 = Fcons (QCascii_compatible_p
,
8963 Fcons (CODING_ATTR_ASCII_COMPAT (attrs
),
8964 CODING_ATTR_PLIST (attrs
)));
8966 eol_type
= args
[coding_arg_eol_type
];
8967 if (! NILP (eol_type
)
8968 && ! EQ (eol_type
, Qunix
)
8969 && ! EQ (eol_type
, Qdos
)
8970 && ! EQ (eol_type
, Qmac
))
8971 error ("Invalid eol-type");
8973 aliases
= Fcons (name
, Qnil
);
8975 if (NILP (eol_type
))
8977 eol_type
= make_subsidiaries (name
);
8978 for (i
= 0; i
< 3; i
++)
8980 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
8982 this_name
= AREF (eol_type
, i
);
8983 this_aliases
= Fcons (this_name
, Qnil
);
8984 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
8985 this_spec
= Fmake_vector (make_number (3), attrs
);
8986 ASET (this_spec
, 1, this_aliases
);
8987 ASET (this_spec
, 2, this_eol_type
);
8988 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
8989 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
8990 val
= Fassoc (Fsymbol_name (this_name
), Vcoding_system_alist
);
8992 Vcoding_system_alist
8993 = Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
8994 Vcoding_system_alist
);
8998 spec_vec
= Fmake_vector (make_number (3), attrs
);
8999 ASET (spec_vec
, 1, aliases
);
9000 ASET (spec_vec
, 2, eol_type
);
9002 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
9003 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
9004 val
= Fassoc (Fsymbol_name (name
), Vcoding_system_alist
);
9006 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
9007 Vcoding_system_alist
);
9010 int id
= coding_categories
[category
].id
;
9012 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
9013 setup_coding_system (name
, &coding_categories
[category
]);
9019 return Fsignal (Qwrong_number_of_arguments
,
9020 Fcons (intern ("define-coding-system-internal"),
9021 make_number (nargs
)));
9025 DEFUN ("coding-system-put", Fcoding_system_put
, Scoding_system_put
,
9027 doc
: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
9028 (coding_system
, prop
, val
)
9029 Lisp_Object coding_system
, prop
, val
;
9031 Lisp_Object spec
, attrs
;
9033 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
9034 attrs
= AREF (spec
, 0);
9035 if (EQ (prop
, QCmnemonic
))
9037 if (! STRINGP (val
))
9038 CHECK_CHARACTER (val
);
9039 CODING_ATTR_MNEMONIC (attrs
) = val
;
9041 else if (EQ (prop
, QCdefalut_char
))
9044 val
= make_number (' ');
9046 CHECK_CHARACTER (val
);
9047 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
9049 else if (EQ (prop
, QCdecode_translation_table
))
9051 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
9053 CODING_ATTR_DECODE_TBL (attrs
) = val
;
9055 else if (EQ (prop
, QCencode_translation_table
))
9057 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
9059 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
9061 else if (EQ (prop
, QCpost_read_conversion
))
9064 CODING_ATTR_POST_READ (attrs
) = val
;
9066 else if (EQ (prop
, QCpre_write_conversion
))
9069 CODING_ATTR_PRE_WRITE (attrs
) = val
;
9071 else if (EQ (prop
, QCascii_compatible_p
))
9073 CODING_ATTR_ASCII_COMPAT (attrs
) = val
;
9076 CODING_ATTR_PLIST (attrs
)
9077 = Fplist_put (CODING_ATTR_PLIST (attrs
), prop
, val
);
9082 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
9083 Sdefine_coding_system_alias
, 2, 2, 0,
9084 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
9085 (alias
, coding_system
)
9086 Lisp_Object alias
, coding_system
;
9088 Lisp_Object spec
, aliases
, eol_type
, val
;
9090 CHECK_SYMBOL (alias
);
9091 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
9092 aliases
= AREF (spec
, 1);
9093 /* ALISES should be a list of length more than zero, and the first
9094 element is a base coding system. Append ALIAS at the tail of the
9096 while (!NILP (XCDR (aliases
)))
9097 aliases
= XCDR (aliases
);
9098 XSETCDR (aliases
, Fcons (alias
, Qnil
));
9100 eol_type
= AREF (spec
, 2);
9101 if (VECTORP (eol_type
))
9103 Lisp_Object subsidiaries
;
9106 subsidiaries
= make_subsidiaries (alias
);
9107 for (i
= 0; i
< 3; i
++)
9108 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
9109 AREF (eol_type
, i
));
9112 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
9113 Vcoding_system_list
= Fcons (alias
, Vcoding_system_list
);
9114 val
= Fassoc (Fsymbol_name (alias
), Vcoding_system_alist
);
9116 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (alias
), Qnil
),
9117 Vcoding_system_alist
);
9122 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
9124 doc
: /* Return the base of CODING-SYSTEM.
9125 Any alias or subsidiary coding system is not a base coding system. */)
9127 Lisp_Object coding_system
;
9129 Lisp_Object spec
, attrs
;
9131 if (NILP (coding_system
))
9132 return (Qno_conversion
);
9133 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
9134 attrs
= AREF (spec
, 0);
9135 return CODING_ATTR_BASE_NAME (attrs
);
9138 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
9140 doc
: "Return the property list of CODING-SYSTEM.")
9142 Lisp_Object coding_system
;
9144 Lisp_Object spec
, attrs
;
9146 if (NILP (coding_system
))
9147 coding_system
= Qno_conversion
;
9148 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
9149 attrs
= AREF (spec
, 0);
9150 return CODING_ATTR_PLIST (attrs
);
9154 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
9156 doc
: /* Return the list of aliases of CODING-SYSTEM. */)
9158 Lisp_Object coding_system
;
9162 if (NILP (coding_system
))
9163 coding_system
= Qno_conversion
;
9164 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
9165 return AREF (spec
, 1);
9168 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
9169 Scoding_system_eol_type
, 1, 1, 0,
9170 doc
: /* Return eol-type of CODING-SYSTEM.
9171 An eol-type is integer 0, 1, 2, or a vector of coding systems.
9173 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9174 and CR respectively.
9176 A vector value indicates that a format of end-of-line should be
9177 detected automatically. Nth element of the vector is the subsidiary
9178 coding system whose eol-type is N. */)
9180 Lisp_Object coding_system
;
9182 Lisp_Object spec
, eol_type
;
9185 if (NILP (coding_system
))
9186 coding_system
= Qno_conversion
;
9187 if (! CODING_SYSTEM_P (coding_system
))
9189 spec
= CODING_SYSTEM_SPEC (coding_system
);
9190 eol_type
= AREF (spec
, 2);
9191 if (VECTORP (eol_type
))
9192 return Fcopy_sequence (eol_type
);
9193 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
9194 return make_number (n
);
9200 /*** 9. Post-amble ***/
9207 for (i
= 0; i
< coding_category_max
; i
++)
9209 coding_categories
[i
].id
= -1;
9210 coding_priorities
[i
] = i
;
9213 /* ISO2022 specific initialize routine. */
9214 for (i
= 0; i
< 0x20; i
++)
9215 iso_code_class
[i
] = ISO_control_0
;
9216 for (i
= 0x21; i
< 0x7F; i
++)
9217 iso_code_class
[i
] = ISO_graphic_plane_0
;
9218 for (i
= 0x80; i
< 0xA0; i
++)
9219 iso_code_class
[i
] = ISO_control_1
;
9220 for (i
= 0xA1; i
< 0xFF; i
++)
9221 iso_code_class
[i
] = ISO_graphic_plane_1
;
9222 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
9223 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
9224 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
9225 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
9226 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
9227 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
9228 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
9229 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
9230 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
9232 for (i
= 0; i
< 256; i
++)
9234 emacs_mule_bytes
[i
] = 1;
9236 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_11
] = 3;
9237 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_12
] = 3;
9238 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_21
] = 4;
9239 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_22
] = 4;
9247 staticpro (&Vcoding_system_hash_table
);
9249 Lisp_Object args
[2];
9252 Vcoding_system_hash_table
= Fmake_hash_table (2, args
);
9255 staticpro (&Vsjis_coding_system
);
9256 Vsjis_coding_system
= Qnil
;
9258 staticpro (&Vbig5_coding_system
);
9259 Vbig5_coding_system
= Qnil
;
9261 staticpro (&Vcode_conversion_reused_workbuf
);
9262 Vcode_conversion_reused_workbuf
= Qnil
;
9264 staticpro (&Vcode_conversion_workbuf_name
);
9265 Vcode_conversion_workbuf_name
= build_string (" *code-conversion-work*");
9267 reused_workbuf_in_use
= 0;
9269 DEFSYM (Qcharset
, "charset");
9270 DEFSYM (Qtarget_idx
, "target-idx");
9271 DEFSYM (Qcoding_system_history
, "coding-system-history");
9272 Fset (Qcoding_system_history
, Qnil
);
9274 /* Target FILENAME is the first argument. */
9275 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
9276 /* Target FILENAME is the third argument. */
9277 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
9279 DEFSYM (Qcall_process
, "call-process");
9280 /* Target PROGRAM is the first argument. */
9281 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
9283 DEFSYM (Qcall_process_region
, "call-process-region");
9284 /* Target PROGRAM is the third argument. */
9285 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
9287 DEFSYM (Qstart_process
, "start-process");
9288 /* Target PROGRAM is the third argument. */
9289 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
9291 DEFSYM (Qopen_network_stream
, "open-network-stream");
9292 /* Target SERVICE is the fourth argument. */
9293 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
9295 DEFSYM (Qcoding_system
, "coding-system");
9296 DEFSYM (Qcoding_aliases
, "coding-aliases");
9298 DEFSYM (Qeol_type
, "eol-type");
9299 DEFSYM (Qunix
, "unix");
9300 DEFSYM (Qdos
, "dos");
9302 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
9303 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
9304 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
9305 DEFSYM (Qdefault_char
, "default-char");
9306 DEFSYM (Qundecided
, "undecided");
9307 DEFSYM (Qno_conversion
, "no-conversion");
9308 DEFSYM (Qraw_text
, "raw-text");
9310 DEFSYM (Qiso_2022
, "iso-2022");
9312 DEFSYM (Qutf_8
, "utf-8");
9313 DEFSYM (Qutf_8_emacs
, "utf-8-emacs");
9315 DEFSYM (Qutf_16
, "utf-16");
9316 DEFSYM (Qbig
, "big");
9317 DEFSYM (Qlittle
, "little");
9319 DEFSYM (Qshift_jis
, "shift-jis");
9320 DEFSYM (Qbig5
, "big5");
9322 DEFSYM (Qcoding_system_p
, "coding-system-p");
9324 DEFSYM (Qcoding_system_error
, "coding-system-error");
9325 Fput (Qcoding_system_error
, Qerror_conditions
,
9326 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
9327 Fput (Qcoding_system_error
, Qerror_message
,
9328 build_string ("Invalid coding system"));
9330 /* Intern this now in case it isn't already done.
9331 Setting this variable twice is harmless.
9332 But don't staticpro it here--that is done in alloc.c. */
9333 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
9335 DEFSYM (Qtranslation_table
, "translation-table");
9336 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (2));
9337 DEFSYM (Qtranslation_table_id
, "translation-table-id");
9338 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
9339 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
9341 DEFSYM (Qvalid_codes
, "valid-codes");
9343 DEFSYM (Qemacs_mule
, "emacs-mule");
9345 DEFSYM (QCcategory
, ":category");
9346 DEFSYM (QCmnemonic
, ":mnemonic");
9347 DEFSYM (QCdefalut_char
, ":default-char");
9348 DEFSYM (QCdecode_translation_table
, ":decode-translation-table");
9349 DEFSYM (QCencode_translation_table
, ":encode-translation-table");
9350 DEFSYM (QCpost_read_conversion
, ":post-read-conversion");
9351 DEFSYM (QCpre_write_conversion
, ":pre-write-conversion");
9352 DEFSYM (QCascii_compatible_p
, ":ascii-compatible-p");
9354 Vcoding_category_table
9355 = Fmake_vector (make_number (coding_category_max
), Qnil
);
9356 staticpro (&Vcoding_category_table
);
9357 /* Followings are target of code detection. */
9358 ASET (Vcoding_category_table
, coding_category_iso_7
,
9359 intern ("coding-category-iso-7"));
9360 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
9361 intern ("coding-category-iso-7-tight"));
9362 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
9363 intern ("coding-category-iso-8-1"));
9364 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
9365 intern ("coding-category-iso-8-2"));
9366 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
9367 intern ("coding-category-iso-7-else"));
9368 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
9369 intern ("coding-category-iso-8-else"));
9370 ASET (Vcoding_category_table
, coding_category_utf_8
,
9371 intern ("coding-category-utf-8"));
9372 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
9373 intern ("coding-category-utf-16-be"));
9374 ASET (Vcoding_category_table
, coding_category_utf_16_auto
,
9375 intern ("coding-category-utf-16-auto"));
9376 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
9377 intern ("coding-category-utf-16-le"));
9378 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
9379 intern ("coding-category-utf-16-be-nosig"));
9380 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
9381 intern ("coding-category-utf-16-le-nosig"));
9382 ASET (Vcoding_category_table
, coding_category_charset
,
9383 intern ("coding-category-charset"));
9384 ASET (Vcoding_category_table
, coding_category_sjis
,
9385 intern ("coding-category-sjis"));
9386 ASET (Vcoding_category_table
, coding_category_big5
,
9387 intern ("coding-category-big5"));
9388 ASET (Vcoding_category_table
, coding_category_ccl
,
9389 intern ("coding-category-ccl"));
9390 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
9391 intern ("coding-category-emacs-mule"));
9392 /* Followings are NOT target of code detection. */
9393 ASET (Vcoding_category_table
, coding_category_raw_text
,
9394 intern ("coding-category-raw-text"));
9395 ASET (Vcoding_category_table
, coding_category_undecided
,
9396 intern ("coding-category-undecided"));
9398 DEFSYM (Qinsufficient_source
, "insufficient-source");
9399 DEFSYM (Qinconsistent_eol
, "inconsistent-eol");
9400 DEFSYM (Qinvalid_source
, "invalid-source");
9401 DEFSYM (Qinterrupted
, "interrupted");
9402 DEFSYM (Qinsufficient_memory
, "insufficient-memory");
9403 DEFSYM (Qcoding_system_define_form
, "coding-system-define-form");
9405 defsubr (&Scoding_system_p
);
9406 defsubr (&Sread_coding_system
);
9407 defsubr (&Sread_non_nil_coding_system
);
9408 defsubr (&Scheck_coding_system
);
9409 defsubr (&Sdetect_coding_region
);
9410 defsubr (&Sdetect_coding_string
);
9411 defsubr (&Sfind_coding_systems_region_internal
);
9412 defsubr (&Sunencodable_char_position
);
9413 defsubr (&Scheck_coding_systems_region
);
9414 defsubr (&Sdecode_coding_region
);
9415 defsubr (&Sencode_coding_region
);
9416 defsubr (&Sdecode_coding_string
);
9417 defsubr (&Sencode_coding_string
);
9418 defsubr (&Sdecode_sjis_char
);
9419 defsubr (&Sencode_sjis_char
);
9420 defsubr (&Sdecode_big5_char
);
9421 defsubr (&Sencode_big5_char
);
9422 defsubr (&Sset_terminal_coding_system_internal
);
9423 defsubr (&Sset_safe_terminal_coding_system_internal
);
9424 defsubr (&Sterminal_coding_system
);
9425 defsubr (&Sset_keyboard_coding_system_internal
);
9426 defsubr (&Skeyboard_coding_system
);
9427 defsubr (&Sfind_operation_coding_system
);
9428 defsubr (&Sset_coding_system_priority
);
9429 defsubr (&Sdefine_coding_system_internal
);
9430 defsubr (&Sdefine_coding_system_alias
);
9431 defsubr (&Scoding_system_put
);
9432 defsubr (&Scoding_system_base
);
9433 defsubr (&Scoding_system_plist
);
9434 defsubr (&Scoding_system_aliases
);
9435 defsubr (&Scoding_system_eol_type
);
9436 defsubr (&Scoding_system_priority_list
);
9438 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
9439 doc
: /* List of coding systems.
9441 Do not alter the value of this variable manually. This variable should be
9442 updated by the functions `define-coding-system' and
9443 `define-coding-system-alias'. */);
9444 Vcoding_system_list
= Qnil
;
9446 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
9447 doc
: /* Alist of coding system names.
9448 Each element is one element list of coding system name.
9449 This variable is given to `completing-read' as TABLE argument.
9451 Do not alter the value of this variable manually. This variable should be
9452 updated by the functions `make-coding-system' and
9453 `define-coding-system-alias'. */);
9454 Vcoding_system_alist
= Qnil
;
9456 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
9457 doc
: /* List of coding-categories (symbols) ordered by priority.
9459 On detecting a coding system, Emacs tries code detection algorithms
9460 associated with each coding-category one by one in this order. When
9461 one algorithm agrees with a byte sequence of source text, the coding
9462 system bound to the corresponding coding-category is selected.
9464 Don't modify this variable directly, but use `set-coding-priority'. */);
9468 Vcoding_category_list
= Qnil
;
9469 for (i
= coding_category_max
- 1; i
>= 0; i
--)
9470 Vcoding_category_list
9471 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
9472 Vcoding_category_list
);
9475 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
9476 doc
: /* Specify the coding system for read operations.
9477 It is useful to bind this variable with `let', but do not set it globally.
9478 If the value is a coding system, it is used for decoding on read operation.
9479 If not, an appropriate element is used from one of the coding system alists:
9480 There are three such tables, `file-coding-system-alist',
9481 `process-coding-system-alist', and `network-coding-system-alist'. */);
9482 Vcoding_system_for_read
= Qnil
;
9484 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
9485 doc
: /* Specify the coding system for write operations.
9486 Programs bind this variable with `let', but you should not set it globally.
9487 If the value is a coding system, it is used for encoding of output,
9488 when writing it to a file and when sending it to a file or subprocess.
9490 If this does not specify a coding system, an appropriate element
9491 is used from one of the coding system alists:
9492 There are three such tables, `file-coding-system-alist',
9493 `process-coding-system-alist', and `network-coding-system-alist'.
9494 For output to files, if the above procedure does not specify a coding system,
9495 the value of `buffer-file-coding-system' is used. */);
9496 Vcoding_system_for_write
= Qnil
;
9498 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
9500 Coding system used in the latest file or process I/O. */);
9501 Vlast_coding_system_used
= Qnil
;
9503 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error
,
9505 Error status of the last code conversion.
9507 When an error was detected in the last code conversion, this variable
9508 is set to one of the following symbols.
9509 `insufficient-source'
9513 `insufficient-memory'
9514 When no error was detected, the value doesn't change. So, to check
9515 the error status of a code conversion by this variable, you must
9516 explicitly set this variable to nil before performing code
9518 Vlast_code_conversion_error
= Qnil
;
9520 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
9522 *Non-nil means always inhibit code conversion of end-of-line format.
9523 See info node `Coding Systems' and info node `Text and Binary' concerning
9524 such conversion. */);
9525 inhibit_eol_conversion
= 0;
9527 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
9529 Non-nil means process buffer inherits coding system of process output.
9530 Bind it to t if the process output is to be treated as if it were a file
9531 read from some filesystem. */);
9532 inherit_process_coding_system
= 0;
9534 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
9536 Alist to decide a coding system to use for a file I/O operation.
9537 The format is ((PATTERN . VAL) ...),
9538 where PATTERN is a regular expression matching a file name,
9539 VAL is a coding system, a cons of coding systems, or a function symbol.
9540 If VAL is a coding system, it is used for both decoding and encoding
9542 If VAL is a cons of coding systems, the car part is used for decoding,
9543 and the cdr part is used for encoding.
9544 If VAL is a function symbol, the function must return a coding system
9545 or a cons of coding systems which are used as above. The function gets
9546 the arguments with which `find-operation-coding-systems' was called.
9548 See also the function `find-operation-coding-system'
9549 and the variable `auto-coding-alist'. */);
9550 Vfile_coding_system_alist
= Qnil
;
9552 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
9554 Alist to decide a coding system to use for a process I/O operation.
9555 The format is ((PATTERN . VAL) ...),
9556 where PATTERN is a regular expression matching a program name,
9557 VAL is a coding system, a cons of coding systems, or a function symbol.
9558 If VAL is a coding system, it is used for both decoding what received
9559 from the program and encoding what sent to the program.
9560 If VAL is a cons of coding systems, the car part is used for decoding,
9561 and the cdr part is used for encoding.
9562 If VAL is a function symbol, the function must return a coding system
9563 or a cons of coding systems which are used as above.
9565 See also the function `find-operation-coding-system'. */);
9566 Vprocess_coding_system_alist
= Qnil
;
9568 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
9570 Alist to decide a coding system to use for a network I/O operation.
9571 The format is ((PATTERN . VAL) ...),
9572 where PATTERN is a regular expression matching a network service name
9573 or is a port number to connect to,
9574 VAL is a coding system, a cons of coding systems, or a function symbol.
9575 If VAL is a coding system, it is used for both decoding what received
9576 from the network stream and encoding what sent to the network stream.
9577 If VAL is a cons of coding systems, the car part is used for decoding,
9578 and the cdr part is used for encoding.
9579 If VAL is a function symbol, the function must return a coding system
9580 or a cons of coding systems which are used as above.
9582 See also the function `find-operation-coding-system'. */);
9583 Vnetwork_coding_system_alist
= Qnil
;
9585 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
9586 doc
: /* Coding system to use with system messages.
9587 Also used for decoding keyboard input on X Window system. */);
9588 Vlocale_coding_system
= Qnil
;
9590 /* The eol mnemonics are reset in startup.el system-dependently. */
9591 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
9593 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
9594 eol_mnemonic_unix
= build_string (":");
9596 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
9598 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
9599 eol_mnemonic_dos
= build_string ("\\");
9601 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
9603 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
9604 eol_mnemonic_mac
= build_string ("/");
9606 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
9608 *String displayed in mode line when end-of-line format is not yet determined. */);
9609 eol_mnemonic_undecided
= build_string (":");
9611 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
9613 *Non-nil enables character translation while encoding and decoding. */);
9614 Venable_character_translation
= Qt
;
9616 DEFVAR_LISP ("standard-translation-table-for-decode",
9617 &Vstandard_translation_table_for_decode
,
9618 doc
: /* Table for translating characters while decoding. */);
9619 Vstandard_translation_table_for_decode
= Qnil
;
9621 DEFVAR_LISP ("standard-translation-table-for-encode",
9622 &Vstandard_translation_table_for_encode
,
9623 doc
: /* Table for translating characters while encoding. */);
9624 Vstandard_translation_table_for_encode
= Qnil
;
9626 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
9627 doc
: /* Alist of charsets vs revision numbers.
9628 While encoding, if a charset (car part of an element) is found,
9629 designate it with the escape sequence identifying revision (cdr part
9630 of the element). */);
9631 Vcharset_revision_table
= Qnil
;
9633 DEFVAR_LISP ("default-process-coding-system",
9634 &Vdefault_process_coding_system
,
9635 doc
: /* Cons of coding systems used for process I/O by default.
9636 The car part is used for decoding a process output,
9637 the cdr part is used for encoding a text to be sent to a process. */);
9638 Vdefault_process_coding_system
= Qnil
;
9640 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
9642 Table of extra Latin codes in the range 128..159 (inclusive).
9643 This is a vector of length 256.
9644 If Nth element is non-nil, the existence of code N in a file
9645 \(or output of subprocess) doesn't prevent it to be detected as
9646 a coding system of ISO 2022 variant which has a flag
9647 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9648 or reading output of a subprocess.
9649 Only 128th through 159th elements has a meaning. */);
9650 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
9652 DEFVAR_LISP ("select-safe-coding-system-function",
9653 &Vselect_safe_coding_system_function
,
9655 Function to call to select safe coding system for encoding a text.
9657 If set, this function is called to force a user to select a proper
9658 coding system which can encode the text in the case that a default
9659 coding system used in each operation can't encode the text.
9661 The default value is `select-safe-coding-system' (which see). */);
9662 Vselect_safe_coding_system_function
= Qnil
;
9664 DEFVAR_BOOL ("coding-system-require-warning",
9665 &coding_system_require_warning
,
9666 doc
: /* Internal use only.
9667 If non-nil, on writing a file, `select-safe-coding-system-function' is
9668 called even if `coding-system-for-write' is non-nil. The command
9669 `universal-coding-system-argument' binds this variable to t temporarily. */);
9670 coding_system_require_warning
= 0;
9673 DEFVAR_BOOL ("inhibit-iso-escape-detection",
9674 &inhibit_iso_escape_detection
,
9676 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9678 By default, on reading a file, Emacs tries to detect how the text is
9679 encoded. This code detection is sensitive to escape sequences. If
9680 the sequence is valid as ISO2022, the code is determined as one of
9681 the ISO2022 encodings, and the file is decoded by the corresponding
9682 coding system (e.g. `iso-2022-7bit').
9684 However, there may be a case that you want to read escape sequences in
9685 a file as is. In such a case, you can set this variable to non-nil.
9686 Then, as the code detection ignores any escape sequences, no file is
9687 detected as encoded in some ISO2022 encoding. The result is that all
9688 escape sequences become visible in a buffer.
9690 The default value is nil, and it is strongly recommended not to change
9691 it. That is because many Emacs Lisp source files that contain
9692 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9693 in Emacs's distribution, and they won't be decoded correctly on
9694 reading if you suppress escape sequence detection.
9696 The other way to read escape sequences in a file without decoding is
9697 to explicitly specify some coding system that doesn't use ISO2022's
9698 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
9699 inhibit_iso_escape_detection
= 0;
9701 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input
,
9702 doc
: /* Char table for translating self-inserting characters.
9703 This is applied to the result of input methods, not their input. See also
9704 `keyboard-translate-table'. */);
9705 Vtranslation_table_for_input
= Qnil
;
9708 Lisp_Object args
[coding_arg_max
];
9709 Lisp_Object plist
[16];
9712 for (i
= 0; i
< coding_arg_max
; i
++)
9715 plist
[0] = intern (":name");
9716 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
9717 plist
[2] = intern (":mnemonic");
9718 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
9719 plist
[4] = intern (":coding-type");
9720 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
9721 plist
[6] = intern (":ascii-compatible-p");
9722 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
9723 plist
[8] = intern (":default-char");
9724 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
9725 plist
[10] = intern (":for-unibyte");
9726 plist
[11] = args
[coding_arg_for_unibyte
] = Qt
;
9727 plist
[12] = intern (":docstring");
9728 plist
[13] = build_string ("Do no conversion.\n\
9730 When you visit a file with this coding, the file is read into a\n\
9731 unibyte buffer as is, thus each byte of a file is treated as a\n\
9733 plist
[14] = intern (":eol-type");
9734 plist
[15] = args
[coding_arg_eol_type
] = Qunix
;
9735 args
[coding_arg_plist
] = Flist (16, plist
);
9736 Fdefine_coding_system_internal (coding_arg_max
, args
);
9738 plist
[1] = args
[coding_arg_name
] = Qundecided
;
9739 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('-');
9740 plist
[5] = args
[coding_arg_coding_type
] = Qundecided
;
9741 /* This is already set.
9742 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
9743 plist
[8] = intern (":charset-list");
9744 plist
[9] = args
[coding_arg_charset_list
] = Fcons (Qascii
, Qnil
);
9745 plist
[11] = args
[coding_arg_for_unibyte
] = Qnil
;
9746 plist
[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
9747 plist
[15] = args
[coding_arg_eol_type
] = Qnil
;
9748 args
[coding_arg_plist
] = Flist (16, plist
);
9749 Fdefine_coding_system_internal (coding_arg_max
, args
);
9752 setup_coding_system (Qno_conversion
, &keyboard_coding
);
9753 setup_coding_system (Qundecided
, &terminal_coding
);
9754 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
9759 for (i
= 0; i
< coding_category_max
; i
++)
9760 Fset (AREF (Vcoding_category_table
, i
), Qno_conversion
);
9762 #if defined (MSDOS) || defined (WINDOWSNT)
9763 system_eol_type
= Qdos
;
9765 system_eol_type
= Qunix
;
9767 staticpro (&system_eol_type
);
9771 emacs_strerror (error_number
)
9776 synchronize_system_messages_locale ();
9777 str
= strerror (error_number
);
9779 if (! NILP (Vlocale_coding_system
))
9781 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
9782 Vlocale_coding_system
,
9784 str
= (char *) SDATA (dec
);
9792 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
9793 (do not change this comment) */