1 /* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /*** TABLE OF CONTENTS ***
26 2. Emacs' internal format (emacs-mule) handlers
28 4. Shift-JIS and BIG5 handlers
30 6. End-of-line handlers
31 7. C library functions
32 8. Emacs Lisp library functions
37 /*** 0. General comments ***/
40 /*** GENERAL NOTE on CODING SYSTEM ***
42 Coding system is an encoding mechanism of one or more character
43 sets. Here's a list of coding systems which Emacs can handle. When
44 we say "decode", it means converting some other coding system to
45 Emacs' internal format (emacs-internal), and when we say "encode",
46 it means converting the coding system emacs-mule to some other
49 0. Emacs' internal format (emacs-mule)
51 Emacs itself holds a multi-lingual character in a buffer and a string
52 in a special format. Details are described in section 2.
56 The most famous coding system for multiple character sets. X's
57 Compound Text, various EUCs (Extended Unix Code), and coding
58 systems used in Internet communication such as ISO-2022-JP are
59 all variants of ISO2022. Details are described in section 3.
61 2. SJIS (or Shift-JIS or MS-Kanji-Code)
63 A coding system to encode character sets: ASCII, JISX0201, and
64 JISX0208. Widely used for PC's in Japan. Details are described in
69 A coding system to encode character sets: ASCII and Big5. Widely
70 used by Chinese (mainly in Taiwan and Hong Kong). Details are
71 described in section 4. In this file, when we write "BIG5"
72 (all uppercase), we mean the coding system, and when we write
73 "Big5" (capitalized), we mean the character set.
77 A coding system for a text containing random 8-bit code. Emacs does
78 no code conversion on such a text except for end-of-line format.
82 If a user wants to read/write a text encoded in a coding system not
83 listed above, he can supply a decoder and an encoder for it in CCL
84 (Code Conversion Language) programs. Emacs executes the CCL program
85 while reading/writing.
87 Emacs represents a coding system by a Lisp symbol that has a property
88 `coding-system'. But, before actually using the coding system, the
89 information about it is set in a structure of type `struct
90 coding_system' for rapid processing. See section 6 for more details.
94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
96 How end-of-line of a text is encoded depends on a system. For
97 instance, Unix's format is just one byte of `line-feed' code,
98 whereas DOS's format is two-byte sequence of `carriage-return' and
99 `line-feed' codes. MacOS's format is usually one byte of
102 Since text characters encoding and end-of-line encoding are
103 independent, any coding system described above can take
104 any format of end-of-line. So, Emacs has information of format of
105 end-of-line in each coding-system. See section 6 for more details.
109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
111 These functions check if a text between SRC and SRC_END is encoded
112 in the coding system category XXX. Each returns an integer value in
113 which appropriate flag bits for the category XXX is set. The flag
114 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the
115 template of these functions. */
118 detect_coding_emacs_mule (src
, src_end
)
119 unsigned char *src
, *src_end
;
125 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
127 These functions decode SRC_BYTES length of unibyte text at SOURCE
128 encoded in CODING to Emacs' internal format. The resulting
129 multibyte text goes to a place pointed to by DESTINATION, the length
130 of which should not exceed DST_BYTES.
132 These functions set the information of original and decoded texts in
133 the members produced, produced_char, consumed, and consumed_char of
134 the structure *CODING. They also set the member result to one of
135 CODING_FINISH_XXX indicating how the decoding finished.
137 DST_BYTES zero means that source area and destination area are
138 overlapped, which means that we can produce a decoded text until it
139 reaches at the head of not-yet-decoded source text.
141 Below is a template of these functions. */
144 decode_coding_XXX (coding
, source
, destination
, src_bytes
, dst_bytes
)
145 struct coding_system
*coding
;
146 unsigned char *source
, *destination
;
147 int src_bytes
, dst_bytes
;
153 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
155 These functions encode SRC_BYTES length text at SOURCE of Emacs'
156 internal multibyte format to CODING. The resulting unibyte text
157 goes to a place pointed to by DESTINATION, the length of which
158 should not exceed DST_BYTES.
160 These functions set the information of original and encoded texts in
161 the members produced, produced_char, consumed, and consumed_char of
162 the structure *CODING. They also set the member result to one of
163 CODING_FINISH_XXX indicating how the encoding finished.
165 DST_BYTES zero means that source area and destination area are
166 overlapped, which means that we can produce a encoded text until it
167 reaches at the head of not-yet-encoded source text.
169 Below is a template of these functions. */
172 encode_coding_XXX (coding
, source
, destination
, src_bytes
, dst_bytes
)
173 struct coding_system
*coding
;
174 unsigned char *source
, *destination
;
175 int src_bytes
, dst_bytes
;
181 /*** COMMONLY USED MACROS ***/
183 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
184 get one, two, and three bytes from the source text respectively.
185 If there are not enough bytes in the source, they jump to
186 `label_end_of_loop'. The caller should set variables `coding',
187 `src' and `src_end' to appropriate pointer in advance. These
188 macros are called from decoding routines `decode_coding_XXX', thus
189 it is assumed that the source text is unibyte. */
191 #define ONE_MORE_BYTE(c1) \
193 if (src >= src_end) \
195 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
196 goto label_end_of_loop; \
201 #define TWO_MORE_BYTES(c1, c2) \
203 if (src + 1 >= src_end) \
205 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
206 goto label_end_of_loop; \
213 /* Set C to the next character at the source text pointed by `src'.
214 If there are not enough characters in the source, jump to
215 `label_end_of_loop'. The caller should set variables `coding'
216 `src', `src_end', and `translation_table' to appropriate pointers
217 in advance. This macro is used in encoding routines
218 `encode_coding_XXX', thus it assumes that the source text is in
219 multibyte form except for 8-bit characters. 8-bit characters are
220 in multibyte form if coding->src_multibyte is nonzero, else they
221 are represented by a single byte. */
223 #define ONE_MORE_CHAR(c) \
225 int len = src_end - src; \
229 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
230 goto label_end_of_loop; \
232 if (coding->src_multibyte \
233 || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes)) \
234 c = STRING_CHAR_AND_LENGTH (src, len, bytes); \
236 c = *src, bytes = 1; \
237 if (!NILP (translation_table)) \
238 c = translate_char (translation_table, c, 0, 0, 0); \
243 /* Produce a multibyte form of characater C to `dst'. Jump to
244 `label_end_of_loop' if there's not enough space at `dst'.
246 If we are now in the middle of composition sequence, the decoded
247 character may be ALTCHAR (for the current composition). In that
248 case, the character goes to coding->cmp_data->data instead of
251 This macro is used in decoding routines. */
253 #define EMIT_CHAR(c) \
255 if (! COMPOSING_P (coding) \
256 || coding->composing == COMPOSITION_RELATIVE \
257 || coding->composing == COMPOSITION_WITH_RULE) \
259 int bytes = CHAR_BYTES (c); \
260 if ((dst + bytes) > (dst_bytes ? dst_end : src)) \
262 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
263 goto label_end_of_loop; \
265 dst += CHAR_STRING (c, dst); \
266 coding->produced_char++; \
269 if (COMPOSING_P (coding) \
270 && coding->composing != COMPOSITION_RELATIVE) \
272 CODING_ADD_COMPOSITION_COMPONENT (coding, c); \
273 coding->composition_rule_follows \
274 = coding->composing != COMPOSITION_WITH_ALTCHARS; \
279 #define EMIT_ONE_BYTE(c) \
281 if (dst >= (dst_bytes ? dst_end : src)) \
283 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
284 goto label_end_of_loop; \
289 #define EMIT_TWO_BYTES(c1, c2) \
291 if (dst + 2 > (dst_bytes ? dst_end : src)) \
293 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
294 goto label_end_of_loop; \
296 *dst++ = c1, *dst++ = c2; \
299 #define EMIT_BYTES(from, to) \
301 if (dst + (to - from) > (dst_bytes ? dst_end : src)) \
303 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
304 goto label_end_of_loop; \
311 /*** 1. Preamble ***/
324 #include "composite.h"
329 #else /* not emacs */
333 #endif /* not emacs */
335 Lisp_Object Qcoding_system
, Qeol_type
;
336 Lisp_Object Qbuffer_file_coding_system
;
337 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
338 Lisp_Object Qno_conversion
, Qundecided
;
339 Lisp_Object Qcoding_system_history
;
340 Lisp_Object Qsafe_charsets
;
341 Lisp_Object Qvalid_codes
;
343 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
344 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
345 Lisp_Object Qstart_process
, Qopen_network_stream
;
346 Lisp_Object Qtarget_idx
;
348 Lisp_Object Vselect_safe_coding_system_function
;
350 /* Mnemonic string for each format of end-of-line. */
351 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
352 /* Mnemonic string to indicate format of end-of-line is not yet
354 Lisp_Object eol_mnemonic_undecided
;
356 /* Format of end-of-line decided by system. This is CODING_EOL_LF on
357 Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac. */
362 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
364 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
366 /* Coding system emacs-mule and raw-text are for converting only
367 end-of-line format. */
368 Lisp_Object Qemacs_mule
, Qraw_text
;
370 /* Coding-systems are handed between Emacs Lisp programs and C internal
371 routines by the following three variables. */
372 /* Coding-system for reading files and receiving data from process. */
373 Lisp_Object Vcoding_system_for_read
;
374 /* Coding-system for writing files and sending data to process. */
375 Lisp_Object Vcoding_system_for_write
;
376 /* Coding-system actually used in the latest I/O. */
377 Lisp_Object Vlast_coding_system_used
;
379 /* A vector of length 256 which contains information about special
380 Latin codes (especially for dealing with Microsoft codes). */
381 Lisp_Object Vlatin_extra_code_table
;
383 /* Flag to inhibit code conversion of end-of-line format. */
384 int inhibit_eol_conversion
;
386 /* Flag to make buffer-file-coding-system inherit from process-coding. */
387 int inherit_process_coding_system
;
389 /* Coding system to be used to encode text for terminal display. */
390 struct coding_system terminal_coding
;
392 /* Coding system to be used to encode text for terminal display when
393 terminal coding system is nil. */
394 struct coding_system safe_terminal_coding
;
396 /* Coding system of what is sent from terminal keyboard. */
397 struct coding_system keyboard_coding
;
399 /* Default coding system to be used to write a file. */
400 struct coding_system default_buffer_file_coding
;
402 Lisp_Object Vfile_coding_system_alist
;
403 Lisp_Object Vprocess_coding_system_alist
;
404 Lisp_Object Vnetwork_coding_system_alist
;
406 Lisp_Object Vlocale_coding_system
;
410 Lisp_Object Qcoding_category
, Qcoding_category_index
;
412 /* List of symbols `coding-category-xxx' ordered by priority. */
413 Lisp_Object Vcoding_category_list
;
415 /* Table of coding categories (Lisp symbols). */
416 Lisp_Object Vcoding_category_table
;
418 /* Table of names of symbol for each coding-category. */
419 char *coding_category_name
[CODING_CATEGORY_IDX_MAX
] = {
420 "coding-category-emacs-mule",
421 "coding-category-sjis",
422 "coding-category-iso-7",
423 "coding-category-iso-7-tight",
424 "coding-category-iso-8-1",
425 "coding-category-iso-8-2",
426 "coding-category-iso-7-else",
427 "coding-category-iso-8-else",
428 "coding-category-ccl",
429 "coding-category-big5",
430 "coding-category-utf-8",
431 "coding-category-utf-16-be",
432 "coding-category-utf-16-le",
433 "coding-category-raw-text",
434 "coding-category-binary"
437 /* Table of pointers to coding systems corresponding to each coding
439 struct coding_system
*coding_system_table
[CODING_CATEGORY_IDX_MAX
];
441 /* Table of coding category masks. Nth element is a mask for a coding
442 cateogry of which priority is Nth. */
444 int coding_priorities
[CODING_CATEGORY_IDX_MAX
];
446 /* Flag to tell if we look up translation table on character code
448 Lisp_Object Venable_character_translation
;
449 /* Standard translation table to look up on decoding (reading). */
450 Lisp_Object Vstandard_translation_table_for_decode
;
451 /* Standard translation table to look up on encoding (writing). */
452 Lisp_Object Vstandard_translation_table_for_encode
;
454 Lisp_Object Qtranslation_table
;
455 Lisp_Object Qtranslation_table_id
;
456 Lisp_Object Qtranslation_table_for_decode
;
457 Lisp_Object Qtranslation_table_for_encode
;
459 /* Alist of charsets vs revision number. */
460 Lisp_Object Vcharset_revision_alist
;
462 /* Default coding systems used for process I/O. */
463 Lisp_Object Vdefault_process_coding_system
;
465 /* Global flag to tell that we can't call post-read-conversion and
466 pre-write-conversion functions. Usually the value is zero, but it
467 is set to 1 temporarily while such functions are running. This is
468 to avoid infinite recursive call. */
469 static int inhibit_pre_post_conversion
;
472 /*** 2. Emacs internal format (emacs-mule) handlers ***/
474 /* Emacs' internal format for encoding multiple character sets is a
475 kind of multi-byte encoding, i.e. characters are encoded by
476 variable-length sequences of one-byte codes.
478 ASCII characters and control characters (e.g. `tab', `newline') are
479 represented by one-byte sequences which are their ASCII codes, in
480 the range 0x00 through 0x7F.
482 8-bit characters of the range 0x80..0x9F are represented by
483 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
486 8-bit characters of the range 0xA0..0xFF are represented by
487 one-byte sequences which are their 8-bit code.
489 The other characters are represented by a sequence of `base
490 leading-code', optional `extended leading-code', and one or two
491 `position-code's. The length of the sequence is determined by the
492 base leading-code. Leading-code takes the range 0x80 through 0x9F,
493 whereas extended leading-code and position-code take the range 0xA0
494 through 0xFF. See `charset.h' for more details about leading-code
497 --- CODE RANGE of Emacs' internal format ---
501 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
502 eight-bit-graphic 0xA0..0xBF
503 ELSE 0x81..0x9F + [0xA0..0xFF]+
504 ---------------------------------------------
508 enum emacs_code_class_type emacs_code_class
[256];
510 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
511 Check if a text is encoded in Emacs' internal format. If it is,
512 return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
515 detect_coding_emacs_mule (src
, src_end
)
516 unsigned char *src
, *src_end
;
520 /* Dummy for ONE_MORE_BYTE. */
521 struct coding_system dummy_coding
;
522 struct coding_system
*coding
= &dummy_coding
;
543 if (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
)
546 else if (c
>= 0x80 && c
< 0xA0)
549 /* Old leading code for a composite character. */
553 unsigned char *src_base
= src
- 1;
556 if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base
, src_end
- src_base
,
559 src
= src_base
+ bytes
;
564 return CODING_CATEGORY_MASK_EMACS_MULE
;
568 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
571 decode_coding_emacs_mule (coding
, source
, destination
, src_bytes
, dst_bytes
)
572 struct coding_system
*coding
;
573 unsigned char *source
, *destination
;
574 int src_bytes
, dst_bytes
;
576 unsigned char *src
= source
;
577 unsigned char *src_end
= source
+ src_bytes
;
578 unsigned char *dst
= destination
;
579 unsigned char *dst_end
= destination
+ dst_bytes
;
580 /* SRC_BASE remembers the start position in source in each loop.
581 The loop will be exited when there's not enough source code, or
582 when there's not enough destination area to produce a
584 unsigned char *src_base
;
586 coding
->produced_char
= 0;
587 while ((src_base
= src
) < src_end
)
589 unsigned char tmp
[MAX_MULTIBYTE_LENGTH
], *p
;
592 if (UNIBYTE_STR_AS_MULTIBYTE_P (src
, src_end
- src
, bytes
))
599 bytes
= CHAR_STRING (*src
, tmp
);
603 if (dst
+ bytes
>= (dst_bytes
? dst_end
: src
))
605 coding
->result
= CODING_FINISH_INSUFFICIENT_DST
;
608 while (bytes
--) *dst
++ = *p
++;
609 coding
->produced_char
++;
611 coding
->consumed
= coding
->consumed_char
= src_base
- source
;
612 coding
->produced
= dst
- destination
;
615 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
616 encode_eol (coding, source, destination, src_bytes, dst_bytes)
620 /*** 3. ISO2022 handlers ***/
622 /* The following note describes the coding system ISO2022 briefly.
623 Since the intention of this note is to help understand the
624 functions in this file, some parts are NOT ACCURATE or OVERLY
625 SIMPLIFIED. For thorough understanding, please refer to the
626 original document of ISO2022.
628 ISO2022 provides many mechanisms to encode several character sets
629 in 7-bit and 8-bit environments. For 7-bite environments, all text
630 is encoded using bytes less than 128. This may make the encoded
631 text a little bit longer, but the text passes more easily through
632 several gateways, some of which strip off MSB (Most Signigant Bit).
634 There are two kinds of character sets: control character set and
635 graphic character set. The former contains control characters such
636 as `newline' and `escape' to provide control functions (control
637 functions are also provided by escape sequences). The latter
638 contains graphic characters such as 'A' and '-'. Emacs recognizes
639 two control character sets and many graphic character sets.
641 Graphic character sets are classified into one of the following
642 four classes, according to the number of bytes (DIMENSION) and
643 number of characters in one dimension (CHARS) of the set:
649 In addition, each character set is assigned an identification tag,
650 unique for each set, called "final character" (denoted as <F>
651 hereafter). The <F> of each character set is decided by ECMA(*)
652 when it is registered in ISO. The code range of <F> is 0x30..0x7F
653 (0x30..0x3F are for private use only).
655 Note (*): ECMA = European Computer Manufacturers Association
657 Here are examples of graphic character set [NAME(<F>)]:
658 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
659 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
660 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
661 o DIMENSION2_CHARS96 -- none for the moment
663 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
664 C0 [0x00..0x1F] -- control character plane 0
665 GL [0x20..0x7F] -- graphic character plane 0
666 C1 [0x80..0x9F] -- control character plane 1
667 GR [0xA0..0xFF] -- graphic character plane 1
669 A control character set is directly designated and invoked to C0 or
670 C1 by an escape sequence. The most common case is that:
671 - ISO646's control character set is designated/invoked to C0, and
672 - ISO6429's control character set is designated/invoked to C1,
673 and usually these designations/invocations are omitted in encoded
674 text. In a 7-bit environment, only C0 can be used, and a control
675 character for C1 is encoded by an appropriate escape sequence to
676 fit into the environment. All control characters for C1 are
677 defined to have corresponding escape sequences.
679 A graphic character set is at first designated to one of four
680 graphic registers (G0 through G3), then these graphic registers are
681 invoked to GL or GR. These designations and invocations can be
682 done independently. The most common case is that G0 is invoked to
683 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
684 these invocations and designations are omitted in encoded text.
685 In a 7-bit environment, only GL can be used.
687 When a graphic character set of CHARS94 is invoked to GL, codes
688 0x20 and 0x7F of the GL area work as control characters SPACE and
689 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
692 There are two ways of invocation: locking-shift and single-shift.
693 With locking-shift, the invocation lasts until the next different
694 invocation, whereas with single-shift, the invocation affects the
695 following character only and doesn't affect the locking-shift
696 state. Invocations are done by the following control characters or
699 ----------------------------------------------------------------------
700 abbrev function cntrl escape seq description
701 ----------------------------------------------------------------------
702 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
703 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
704 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
705 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
706 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
707 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
708 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
709 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
710 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
711 ----------------------------------------------------------------------
712 (*) These are not used by any known coding system.
714 Control characters for these functions are defined by macros
715 ISO_CODE_XXX in `coding.h'.
717 Designations are done by the following escape sequences:
718 ----------------------------------------------------------------------
719 escape sequence description
720 ----------------------------------------------------------------------
721 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
722 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
723 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
724 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
725 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
726 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
727 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
728 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
729 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
730 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
731 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
732 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
733 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
734 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
735 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
736 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
737 ----------------------------------------------------------------------
739 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
740 of dimension 1, chars 94, and final character <F>, etc...
742 Note (*): Although these designations are not allowed in ISO2022,
743 Emacs accepts them on decoding, and produces them on encoding
744 CHARS96 character sets in a coding system which is characterized as
745 7-bit environment, non-locking-shift, and non-single-shift.
747 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
748 '(' can be omitted. We refer to this as "short-form" hereafter.
750 Now you may notice that there are a lot of ways for encoding the
751 same multilingual text in ISO2022. Actually, there exist many
752 coding systems such as Compound Text (used in X11's inter client
753 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
754 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
755 localized platforms), and all of these are variants of ISO2022.
757 In addition to the above, Emacs handles two more kinds of escape
758 sequences: ISO6429's direction specification and Emacs' private
759 sequence for specifying character composition.
761 ISO6429's direction specification takes the following form:
762 o CSI ']' -- end of the current direction
763 o CSI '0' ']' -- end of the current direction
764 o CSI '1' ']' -- start of left-to-right text
765 o CSI '2' ']' -- start of right-to-left text
766 The control character CSI (0x9B: control sequence introducer) is
767 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
769 Character composition specification takes the following form:
770 o ESC '0' -- start relative composition
771 o ESC '1' -- end composition
772 o ESC '2' -- start rule-base composition (*)
773 o ESC '3' -- start relative composition with alternate chars (**)
774 o ESC '4' -- start rule-base composition with alternate chars (**)
775 Since these are not standard escape sequences of any ISO standard,
776 the use of them for these meaning is restricted to Emacs only.
778 (*) This form is used only in Emacs 20.5 and the older versions,
779 but the newer versions can safely decode it.
780 (**) This form is used only in Emacs 21.1 and the newer versions,
781 and the older versions can't decode it.
783 Here's a list of examples usages of these composition escape
784 sequences (categorized by `enum composition_method').
786 COMPOSITION_RELATIVE:
787 ESC 0 CHAR [ CHAR ] ESC 1
788 COMPOSITOIN_WITH_RULE:
789 ESC 2 CHAR [ RULE CHAR ] ESC 1
790 COMPOSITION_WITH_ALTCHARS:
791 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
792 COMPOSITION_WITH_RULE_ALTCHARS:
793 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
795 enum iso_code_class_type iso_code_class
[256];
797 #define CHARSET_OK(idx, charset) \
798 (coding_system_table[idx] \
799 && (coding_system_table[idx]->safe_charsets[charset] \
800 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \
801 (coding_system_table[idx], charset) \
802 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
804 #define SHIFT_OUT_OK(idx) \
805 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
807 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
808 Check if a text is encoded in ISO2022. If it is, returns an
809 integer in which appropriate flag bits any of:
810 CODING_CATEGORY_MASK_ISO_7
811 CODING_CATEGORY_MASK_ISO_7_TIGHT
812 CODING_CATEGORY_MASK_ISO_8_1
813 CODING_CATEGORY_MASK_ISO_8_2
814 CODING_CATEGORY_MASK_ISO_7_ELSE
815 CODING_CATEGORY_MASK_ISO_8_ELSE
816 are set. If a code which should never appear in ISO2022 is found,
820 detect_coding_iso2022 (src
, src_end
)
821 unsigned char *src
, *src_end
;
823 int mask
= CODING_CATEGORY_MASK_ISO
;
825 int reg
[4], shift_out
= 0, single_shifting
= 0;
826 int c
, c1
, i
, charset
;
827 /* Dummy for ONE_MORE_BYTE. */
828 struct coding_system dummy_coding
;
829 struct coding_system
*coding
= &dummy_coding
;
831 reg
[0] = CHARSET_ASCII
, reg
[1] = reg
[2] = reg
[3] = -1;
832 while (mask
&& src
< src_end
)
840 if (c
>= '(' && c
<= '/')
842 /* Designation sequence for a charset of dimension 1. */
844 if (c1
< ' ' || c1
>= 0x80
845 || (charset
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
846 /* Invalid designation sequence. Just ignore. */
848 reg
[(c
- '(') % 4] = charset
;
852 /* Designation sequence for a charset of dimension 2. */
854 if (c
>= '@' && c
<= 'B')
855 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
856 reg
[0] = charset
= iso_charset_table
[1][0][c
];
857 else if (c
>= '(' && c
<= '/')
860 if (c1
< ' ' || c1
>= 0x80
861 || (charset
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
862 /* Invalid designation sequence. Just ignore. */
864 reg
[(c
- '(') % 4] = charset
;
867 /* Invalid designation sequence. Just ignore. */
870 else if (c
== 'N' || c
== 'O')
872 /* ESC <Fe> for SS2 or SS3. */
873 mask
&= CODING_CATEGORY_MASK_ISO_7_ELSE
;
876 else if (c
>= '0' && c
<= '4')
878 /* ESC <Fp> for start/end composition. */
879 mask_found
|= CODING_CATEGORY_MASK_ISO
;
883 /* Invalid escape sequence. Just ignore. */
886 /* We found a valid designation sequence for CHARSET. */
887 mask
&= ~CODING_CATEGORY_MASK_ISO_8BIT
;
888 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7
, charset
))
889 mask_found
|= CODING_CATEGORY_MASK_ISO_7
;
891 mask
&= ~CODING_CATEGORY_MASK_ISO_7
;
892 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT
, charset
))
893 mask_found
|= CODING_CATEGORY_MASK_ISO_7_TIGHT
;
895 mask
&= ~CODING_CATEGORY_MASK_ISO_7_TIGHT
;
896 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE
, charset
))
897 mask_found
|= CODING_CATEGORY_MASK_ISO_7_ELSE
;
899 mask
&= ~CODING_CATEGORY_MASK_ISO_7_ELSE
;
900 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE
, charset
))
901 mask_found
|= CODING_CATEGORY_MASK_ISO_8_ELSE
;
903 mask
&= ~CODING_CATEGORY_MASK_ISO_8_ELSE
;
910 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE
)
911 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE
)))
913 /* Locking shift out. */
914 mask
&= ~CODING_CATEGORY_MASK_ISO_7BIT
;
915 mask_found
|= CODING_CATEGORY_MASK_ISO_SHIFT
;
923 /* Locking shift in. */
924 mask
&= ~CODING_CATEGORY_MASK_ISO_7BIT
;
925 mask_found
|= CODING_CATEGORY_MASK_ISO_SHIFT
;
934 int newmask
= CODING_CATEGORY_MASK_ISO_8_ELSE
;
936 if (c
!= ISO_CODE_CSI
)
938 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_1
]->flags
939 & CODING_FLAG_ISO_SINGLE_SHIFT
)
940 newmask
|= CODING_CATEGORY_MASK_ISO_8_1
;
941 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_2
]->flags
942 & CODING_FLAG_ISO_SINGLE_SHIFT
)
943 newmask
|= CODING_CATEGORY_MASK_ISO_8_2
;
946 if (VECTORP (Vlatin_extra_code_table
)
947 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
949 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_1
]->flags
950 & CODING_FLAG_ISO_LATIN_EXTRA
)
951 newmask
|= CODING_CATEGORY_MASK_ISO_8_1
;
952 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_2
]->flags
953 & CODING_FLAG_ISO_LATIN_EXTRA
)
954 newmask
|= CODING_CATEGORY_MASK_ISO_8_2
;
957 mask_found
|= newmask
;
970 if (VECTORP (Vlatin_extra_code_table
)
971 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
975 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_1
]->flags
976 & CODING_FLAG_ISO_LATIN_EXTRA
)
977 newmask
|= CODING_CATEGORY_MASK_ISO_8_1
;
978 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_2
]->flags
979 & CODING_FLAG_ISO_LATIN_EXTRA
)
980 newmask
|= CODING_CATEGORY_MASK_ISO_8_2
;
982 mask_found
|= newmask
;
989 mask
&= ~(CODING_CATEGORY_MASK_ISO_7BIT
990 | CODING_CATEGORY_MASK_ISO_7_ELSE
);
991 mask_found
|= CODING_CATEGORY_MASK_ISO_8_1
;
992 /* Check the length of succeeding codes of the range
993 0xA0..0FF. If the byte length is odd, we exclude
994 CODING_CATEGORY_MASK_ISO_8_2. We can check this only
995 when we are not single shifting. */
997 && mask
& CODING_CATEGORY_MASK_ISO_8_2
)
1000 while (src
< src_end
)
1008 if (i
& 1 && src
< src_end
)
1009 mask
&= ~CODING_CATEGORY_MASK_ISO_8_2
;
1011 mask_found
|= CODING_CATEGORY_MASK_ISO_8_2
;
1018 return (mask
& mask_found
);
1021 /* Decode a character of which charset is CHARSET, the 1st position
1022 code is C1, the 2nd position code is C2, and return the decoded
1023 character code. If the variable `translation_table' is non-nil,
1024 returned the translated code. */
1026 #define DECODE_ISO_CHARACTER(charset, c1, c2) \
1027 (NILP (translation_table) \
1028 ? MAKE_CHAR (charset, c1, c2) \
1029 : translate_char (translation_table, -1, charset, c1, c2))
1031 /* Set designation state into CODING. */
1032 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
1036 if (final_char < '0' || final_char >= 128) \
1037 goto label_invalid_code; \
1038 charset = ISO_CHARSET_TABLE (make_number (dimension), \
1039 make_number (chars), \
1040 make_number (final_char)); \
1042 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1043 || coding->safe_charsets[charset])) \
1045 if (coding->spec.iso2022.last_invalid_designation_register == 0 \
1047 && charset == CHARSET_ASCII) \
1049 /* We should insert this designation sequence as is so \
1050 that it is surely written back to a file. */ \
1051 coding->spec.iso2022.last_invalid_designation_register = -1; \
1052 goto label_invalid_code; \
1054 coding->spec.iso2022.last_invalid_designation_register = -1; \
1055 if ((coding->mode & CODING_MODE_DIRECTION) \
1056 && CHARSET_REVERSE_CHARSET (charset) >= 0) \
1057 charset = CHARSET_REVERSE_CHARSET (charset); \
1058 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1062 coding->spec.iso2022.last_invalid_designation_register = reg; \
1063 goto label_invalid_code; \
1067 /* Allocate a memory block for storing information about compositions.
1068 The block is chained to the already allocated blocks. */
1071 coding_allocate_composition_data (coding
, char_offset
)
1072 struct coding_system
*coding
;
1075 struct composition_data
*cmp_data
1076 = (struct composition_data
*) xmalloc (sizeof *cmp_data
);
1078 cmp_data
->char_offset
= char_offset
;
1080 cmp_data
->prev
= coding
->cmp_data
;
1081 cmp_data
->next
= NULL
;
1082 if (coding
->cmp_data
)
1083 coding
->cmp_data
->next
= cmp_data
;
1084 coding
->cmp_data
= cmp_data
;
1085 coding
->cmp_data_start
= 0;
1088 /* Record the starting position START and METHOD of one composition. */
1090 #define CODING_ADD_COMPOSITION_START(coding, start, method) \
1092 struct composition_data *cmp_data = coding->cmp_data; \
1093 int *data = cmp_data->data + cmp_data->used; \
1094 coding->cmp_data_start = cmp_data->used; \
1096 data[1] = cmp_data->char_offset + start; \
1097 data[3] = (int) method; \
1098 cmp_data->used += 4; \
1101 /* Record the ending position END of the current composition. */
1103 #define CODING_ADD_COMPOSITION_END(coding, end) \
1105 struct composition_data *cmp_data = coding->cmp_data; \
1106 int *data = cmp_data->data + coding->cmp_data_start; \
1107 data[0] = cmp_data->used - coding->cmp_data_start; \
1108 data[2] = cmp_data->char_offset + end; \
1111 /* Record one COMPONENT (alternate character or composition rule). */
1113 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
1114 (coding->cmp_data->data[coding->cmp_data->used++] = component)
1116 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4. */
1118 #define DECODE_COMPOSITION_START(c1) \
1120 if (coding->composing == COMPOSITION_DISABLED) \
1122 *dst++ = ISO_CODE_ESC; \
1123 *dst++ = c1 & 0x7f; \
1124 coding->produced_char += 2; \
1126 else if (!COMPOSING_P (coding)) \
1128 /* This is surely the start of a composition. We must be sure \
1129 that coding->cmp_data has enough space to store the \
1130 information about the composition. If not, terminate the \
1131 current decoding loop, allocate one more memory block for \
1132 coding->cmp_data in the calller, then start the decoding \
1133 loop again. We can't allocate memory here directly because \
1134 it may cause buffer/string relocation. */ \
1135 if (!coding->cmp_data \
1136 || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1137 >= COMPOSITION_DATA_SIZE)) \
1139 coding->result = CODING_FINISH_INSUFFICIENT_CMP; \
1140 goto label_end_of_loop; \
1142 coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
1143 : c1 == '2' ? COMPOSITION_WITH_RULE \
1144 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
1145 : COMPOSITION_WITH_RULE_ALTCHARS); \
1146 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \
1147 coding->composing); \
1148 coding->composition_rule_follows = 0; \
1152 /* We are already handling a composition. If the method is \
1153 the following two, the codes following the current escape \
1154 sequence are actual characters stored in a buffer. */ \
1155 if (coding->composing == COMPOSITION_WITH_ALTCHARS \
1156 || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \
1158 coding->composing = COMPOSITION_RELATIVE; \
1159 coding->composition_rule_follows = 0; \
1164 /* Handle compositoin end sequence ESC 1. */
1166 #define DECODE_COMPOSITION_END(c1) \
1168 if (coding->composing == COMPOSITION_DISABLED) \
1170 *dst++ = ISO_CODE_ESC; \
1172 coding->produced_char += 2; \
1176 CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
1177 coding->composing = COMPOSITION_NO; \
1181 /* Decode a composition rule from the byte C1 (and maybe one more byte
1182 from SRC) and store one encoded composition rule in
1183 coding->cmp_data. */
1185 #define DECODE_COMPOSITION_RULE(c1) \
1189 if (c1 < 81) /* old format (before ver.21) */ \
1191 int gref = (c1) / 9; \
1192 int nref = (c1) % 9; \
1193 if (gref == 4) gref = 10; \
1194 if (nref == 4) nref = 10; \
1195 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
1197 else if (c1 < 93) /* new format (after ver.21) */ \
1199 ONE_MORE_BYTE (c2); \
1200 rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
1202 CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \
1203 coding->composition_rule_follows = 0; \
1207 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1210 decode_coding_iso2022 (coding
, source
, destination
, src_bytes
, dst_bytes
)
1211 struct coding_system
*coding
;
1212 unsigned char *source
, *destination
;
1213 int src_bytes
, dst_bytes
;
1215 unsigned char *src
= source
;
1216 unsigned char *src_end
= source
+ src_bytes
;
1217 unsigned char *dst
= destination
;
1218 unsigned char *dst_end
= destination
+ dst_bytes
;
1219 /* Charsets invoked to graphic plane 0 and 1 respectively. */
1220 int charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1221 int charset1
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 1);
1222 /* SRC_BASE remembers the start position in source in each loop.
1223 The loop will be exited when there's not enough source code
1224 (within macro ONE_MORE_BYTE), or when there's not enough
1225 destination area to produce a character (within macro
1227 unsigned char *src_base
;
1229 Lisp_Object translation_table
;
1231 if (NILP (Venable_character_translation
))
1232 translation_table
= Qnil
;
1235 translation_table
= coding
->translation_table_for_decode
;
1236 if (NILP (translation_table
))
1237 translation_table
= Vstandard_translation_table_for_decode
;
1240 coding
->result
= CODING_FINISH_NORMAL
;
1249 /* We produce no character or one character. */
1250 switch (iso_code_class
[c1
])
1252 case ISO_0x20_or_0x7F
:
1253 if (COMPOSING_P (coding
) && coding
->composition_rule_follows
)
1255 DECODE_COMPOSITION_RULE (c1
);
1258 if (charset0
< 0 || CHARSET_CHARS (charset0
) == 94)
1260 /* This is SPACE or DEL. */
1261 charset
= CHARSET_ASCII
;
1264 /* This is a graphic character, we fall down ... */
1266 case ISO_graphic_plane_0
:
1267 if (COMPOSING_P (coding
) && coding
->composition_rule_follows
)
1269 DECODE_COMPOSITION_RULE (c1
);
1275 case ISO_0xA0_or_0xFF
:
1276 if (charset1
< 0 || CHARSET_CHARS (charset1
) == 94
1277 || coding
->flags
& CODING_FLAG_ISO_SEVEN_BITS
)
1278 goto label_invalid_code
;
1279 /* This is a graphic character, we fall down ... */
1281 case ISO_graphic_plane_1
:
1283 goto label_invalid_code
;
1288 if (COMPOSING_P (coding
))
1289 DECODE_COMPOSITION_END ('1');
1291 /* All ISO2022 control characters in this class have the
1292 same representation in Emacs internal format. */
1294 && (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
1295 && (coding
->eol_type
== CODING_EOL_CR
1296 || coding
->eol_type
== CODING_EOL_CRLF
))
1298 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
1299 goto label_end_of_loop
;
1301 charset
= CHARSET_ASCII
;
1305 if (COMPOSING_P (coding
))
1306 DECODE_COMPOSITION_END ('1');
1307 goto label_invalid_code
;
1309 case ISO_carriage_return
:
1310 if (COMPOSING_P (coding
))
1311 DECODE_COMPOSITION_END ('1');
1313 if (coding
->eol_type
== CODING_EOL_CR
)
1315 else if (coding
->eol_type
== CODING_EOL_CRLF
)
1318 if (c1
!= ISO_CODE_LF
)
1320 if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
1322 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
1323 goto label_end_of_loop
;
1329 charset
= CHARSET_ASCII
;
1333 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
)
1334 || CODING_SPEC_ISO_DESIGNATION (coding
, 1) < 0)
1335 goto label_invalid_code
;
1336 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 1;
1337 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1341 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
))
1342 goto label_invalid_code
;
1343 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 0;
1344 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1347 case ISO_single_shift_2_7
:
1348 case ISO_single_shift_2
:
1349 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
))
1350 goto label_invalid_code
;
1351 /* SS2 is handled as an escape sequence of ESC 'N' */
1353 goto label_escape_sequence
;
1355 case ISO_single_shift_3
:
1356 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
))
1357 goto label_invalid_code
;
1358 /* SS2 is handled as an escape sequence of ESC 'O' */
1360 goto label_escape_sequence
;
1362 case ISO_control_sequence_introducer
:
1363 /* CSI is handled as an escape sequence of ESC '[' ... */
1365 goto label_escape_sequence
;
1369 label_escape_sequence
:
1370 /* Escape sequences handled by Emacs are invocation,
1371 designation, direction specification, and character
1372 composition specification. */
1375 case '&': /* revision of following character set */
1377 if (!(c1
>= '@' && c1
<= '~'))
1378 goto label_invalid_code
;
1380 if (c1
!= ISO_CODE_ESC
)
1381 goto label_invalid_code
;
1383 goto label_escape_sequence
;
1385 case '$': /* designation of 2-byte character set */
1386 if (! (coding
->flags
& CODING_FLAG_ISO_DESIGNATION
))
1387 goto label_invalid_code
;
1389 if (c1
>= '@' && c1
<= 'B')
1390 { /* designation of JISX0208.1978, GB2312.1980,
1392 DECODE_DESIGNATION (0, 2, 94, c1
);
1394 else if (c1
>= 0x28 && c1
<= 0x2B)
1395 { /* designation of DIMENSION2_CHARS94 character set */
1397 DECODE_DESIGNATION (c1
- 0x28, 2, 94, c2
);
1399 else if (c1
>= 0x2C && c1
<= 0x2F)
1400 { /* designation of DIMENSION2_CHARS96 character set */
1402 DECODE_DESIGNATION (c1
- 0x2C, 2, 96, c2
);
1405 goto label_invalid_code
;
1406 /* We must update these variables now. */
1407 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1408 charset1
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 1);
1411 case 'n': /* invocation of locking-shift-2 */
1412 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
)
1413 || CODING_SPEC_ISO_DESIGNATION (coding
, 2) < 0)
1414 goto label_invalid_code
;
1415 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 2;
1416 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1419 case 'o': /* invocation of locking-shift-3 */
1420 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
)
1421 || CODING_SPEC_ISO_DESIGNATION (coding
, 3) < 0)
1422 goto label_invalid_code
;
1423 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 3;
1424 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1427 case 'N': /* invocation of single-shift-2 */
1428 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1429 || CODING_SPEC_ISO_DESIGNATION (coding
, 2) < 0)
1430 goto label_invalid_code
;
1431 charset
= CODING_SPEC_ISO_DESIGNATION (coding
, 2);
1435 case 'O': /* invocation of single-shift-3 */
1436 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1437 || CODING_SPEC_ISO_DESIGNATION (coding
, 3) < 0)
1438 goto label_invalid_code
;
1439 charset
= CODING_SPEC_ISO_DESIGNATION (coding
, 3);
1443 case '0': case '2': case '3': case '4': /* start composition */
1444 DECODE_COMPOSITION_START (c1
);
1447 case '1': /* end composition */
1448 DECODE_COMPOSITION_END (c1
);
1451 case '[': /* specification of direction */
1452 if (coding
->flags
& CODING_FLAG_ISO_NO_DIRECTION
)
1453 goto label_invalid_code
;
1454 /* For the moment, nested direction is not supported.
1455 So, `coding->mode & CODING_MODE_DIRECTION' zero means
1456 left-to-right, and nozero means right-to-left. */
1460 case ']': /* end of the current direction */
1461 coding
->mode
&= ~CODING_MODE_DIRECTION
;
1463 case '0': /* end of the current direction */
1464 case '1': /* start of left-to-right direction */
1467 coding
->mode
&= ~CODING_MODE_DIRECTION
;
1469 goto label_invalid_code
;
1472 case '2': /* start of right-to-left direction */
1475 coding
->mode
|= CODING_MODE_DIRECTION
;
1477 goto label_invalid_code
;
1481 goto label_invalid_code
;
1486 if (! (coding
->flags
& CODING_FLAG_ISO_DESIGNATION
))
1487 goto label_invalid_code
;
1488 if (c1
>= 0x28 && c1
<= 0x2B)
1489 { /* designation of DIMENSION1_CHARS94 character set */
1491 DECODE_DESIGNATION (c1
- 0x28, 1, 94, c2
);
1493 else if (c1
>= 0x2C && c1
<= 0x2F)
1494 { /* designation of DIMENSION1_CHARS96 character set */
1496 DECODE_DESIGNATION (c1
- 0x2C, 1, 96, c2
);
1499 goto label_invalid_code
;
1500 /* We must update these variables now. */
1501 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1502 charset1
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 1);
1507 /* Now we know CHARSET and 1st position code C1 of a character.
1508 Produce a multibyte sequence for that character while getting
1509 2nd position code C2 if necessary. */
1510 if (CHARSET_DIMENSION (charset
) == 2)
1513 if (c1
< 0x80 ? c2
< 0x20 || c2
>= 0x80 : c2
< 0xA0)
1514 /* C2 is not in a valid range. */
1515 goto label_invalid_code
;
1517 c
= DECODE_ISO_CHARACTER (charset
, c1
, c2
);
1523 if (COMPOSING_P (coding
))
1524 DECODE_COMPOSITION_END ('1');
1531 coding
->consumed
= coding
->consumed_char
= src_base
- source
;
1532 coding
->produced
= dst
- destination
;
1537 /* ISO2022 encoding stuff. */
1540 It is not enough to say just "ISO2022" on encoding, we have to
1541 specify more details. In Emacs, each coding system of ISO2022
1542 variant has the following specifications:
1543 1. Initial designation to G0 thru G3.
1544 2. Allows short-form designation?
1545 3. ASCII should be designated to G0 before control characters?
1546 4. ASCII should be designated to G0 at end of line?
1547 5. 7-bit environment or 8-bit environment?
1548 6. Use locking-shift?
1549 7. Use Single-shift?
1550 And the following two are only for Japanese:
1551 8. Use ASCII in place of JIS0201-1976-Roman?
1552 9. Use JISX0208-1983 in place of JISX0208-1978?
1553 These specifications are encoded in `coding->flags' as flag bits
1554 defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more
1558 /* Produce codes (escape sequence) for designating CHARSET to graphic
1559 register REG at DST, and increment DST. If <final-char> of CHARSET is
1560 '@', 'A', or 'B' and the coding system CODING allows, produce
1561 designation sequence of short-form. */
1563 #define ENCODE_DESIGNATION(charset, reg, coding) \
1565 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
1566 char *intermediate_char_94 = "()*+"; \
1567 char *intermediate_char_96 = ",-./"; \
1568 int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
1570 if (revision < 255) \
1572 *dst++ = ISO_CODE_ESC; \
1574 *dst++ = '@' + revision; \
1576 *dst++ = ISO_CODE_ESC; \
1577 if (CHARSET_DIMENSION (charset) == 1) \
1579 if (CHARSET_CHARS (charset) == 94) \
1580 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1582 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1587 if (CHARSET_CHARS (charset) == 94) \
1589 if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
1591 || final_char < '@' || final_char > 'B') \
1592 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1595 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1597 *dst++ = final_char; \
1598 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1601 /* The following two macros produce codes (control character or escape
1602 sequence) for ISO2022 single-shift functions (single-shift-2 and
1605 #define ENCODE_SINGLE_SHIFT_2 \
1607 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1608 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
1610 *dst++ = ISO_CODE_SS2; \
1611 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1614 #define ENCODE_SINGLE_SHIFT_3 \
1616 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1617 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
1619 *dst++ = ISO_CODE_SS3; \
1620 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1623 /* The following four macros produce codes (control character or
1624 escape sequence) for ISO2022 locking-shift functions (shift-in,
1625 shift-out, locking-shift-2, and locking-shift-3). */
1627 #define ENCODE_SHIFT_IN \
1629 *dst++ = ISO_CODE_SI; \
1630 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1633 #define ENCODE_SHIFT_OUT \
1635 *dst++ = ISO_CODE_SO; \
1636 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1639 #define ENCODE_LOCKING_SHIFT_2 \
1641 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
1642 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1645 #define ENCODE_LOCKING_SHIFT_3 \
1647 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
1648 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1651 /* Produce codes for a DIMENSION1 character whose character set is
1652 CHARSET and whose position-code is C1. Designation and invocation
1653 sequences are also produced in advance if necessary. */
1655 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
1657 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1659 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1660 *dst++ = c1 & 0x7F; \
1662 *dst++ = c1 | 0x80; \
1663 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1666 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1668 *dst++ = c1 & 0x7F; \
1671 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1673 *dst++ = c1 | 0x80; \
1676 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1677 && !coding->safe_charsets[charset]) \
1679 /* We should not encode this character, instead produce one or \
1681 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1682 if (CHARSET_WIDTH (charset) == 2) \
1683 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1687 /* Since CHARSET is not yet invoked to any graphic planes, we \
1688 must invoke it, or, at first, designate it to some graphic \
1689 register. Then repeat the loop to actually produce the \
1691 dst = encode_invocation_designation (charset, coding, dst); \
1694 /* Produce codes for a DIMENSION2 character whose character set is
1695 CHARSET and whose position-codes are C1 and C2. Designation and
1696 invocation codes are also produced in advance if necessary. */
1698 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
1700 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1702 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1703 *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
1705 *dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
1706 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1709 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1711 *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
1714 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1716 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
1719 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1720 && !coding->safe_charsets[charset]) \
1722 /* We should not encode this character, instead produce one or \
1724 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1725 if (CHARSET_WIDTH (charset) == 2) \
1726 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1730 /* Since CHARSET is not yet invoked to any graphic planes, we \
1731 must invoke it, or, at first, designate it to some graphic \
1732 register. Then repeat the loop to actually produce the \
1734 dst = encode_invocation_designation (charset, coding, dst); \
1737 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \
1739 int alt_charset = charset; \
1741 if (CHARSET_DEFINED_P (charset)) \
1743 if (CHARSET_DIMENSION (charset) == 1) \
1745 if (charset == CHARSET_ASCII \
1746 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
1747 alt_charset = charset_latin_jisx0201; \
1748 ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \
1752 if (charset == charset_jisx0208 \
1753 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
1754 alt_charset = charset_jisx0208_1978; \
1755 ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \
1766 /* Produce designation and invocation codes at a place pointed by DST
1767 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1771 encode_invocation_designation (charset
, coding
, dst
)
1773 struct coding_system
*coding
;
1776 int reg
; /* graphic register number */
1778 /* At first, check designations. */
1779 for (reg
= 0; reg
< 4; reg
++)
1780 if (charset
== CODING_SPEC_ISO_DESIGNATION (coding
, reg
))
1785 /* CHARSET is not yet designated to any graphic registers. */
1786 /* At first check the requested designation. */
1787 reg
= CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
);
1788 if (reg
== CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION
)
1789 /* Since CHARSET requests no special designation, designate it
1790 to graphic register 0. */
1793 ENCODE_DESIGNATION (charset
, reg
, coding
);
1796 if (CODING_SPEC_ISO_INVOCATION (coding
, 0) != reg
1797 && CODING_SPEC_ISO_INVOCATION (coding
, 1) != reg
)
1799 /* Since the graphic register REG is not invoked to any graphic
1800 planes, invoke it to graphic plane 0. */
1803 case 0: /* graphic register 0 */
1807 case 1: /* graphic register 1 */
1811 case 2: /* graphic register 2 */
1812 if (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1813 ENCODE_SINGLE_SHIFT_2
;
1815 ENCODE_LOCKING_SHIFT_2
;
1818 case 3: /* graphic register 3 */
1819 if (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1820 ENCODE_SINGLE_SHIFT_3
;
1822 ENCODE_LOCKING_SHIFT_3
;
1830 /* Produce 2-byte codes for encoded composition rule RULE. */
1832 #define ENCODE_COMPOSITION_RULE(rule) \
1835 COMPOSITION_DECODE_RULE (rule, gref, nref); \
1836 *dst++ = 32 + 81 + gref; \
1837 *dst++ = 32 + nref; \
1840 /* Produce codes for indicating the start of a composition sequence
1841 (ESC 0, ESC 3, or ESC 4). DATA points to an array of integers
1842 which specify information about the composition. See the comment
1843 in coding.h for the format of DATA. */
1845 #define ENCODE_COMPOSITION_START(coding, data) \
1847 coding->composing = data[3]; \
1848 *dst++ = ISO_CODE_ESC; \
1849 if (coding->composing == COMPOSITION_RELATIVE) \
1853 *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS \
1855 coding->cmp_data_index = coding->cmp_data_start + 4; \
1856 coding->composition_rule_follows = 0; \
1860 /* Produce codes for indicating the end of the current composition. */
1862 #define ENCODE_COMPOSITION_END(coding, data) \
1864 *dst++ = ISO_CODE_ESC; \
1866 coding->cmp_data_start += data[0]; \
1867 coding->composing = COMPOSITION_NO; \
1868 if (coding->cmp_data_start == coding->cmp_data->used \
1869 && coding->cmp_data->next) \
1871 coding->cmp_data = coding->cmp_data->next; \
1872 coding->cmp_data_start = 0; \
1876 /* Produce composition start sequence ESC 0. Here, this sequence
1877 doesn't mean the start of a new composition but means that we have
1878 just produced components (alternate chars and composition rules) of
1879 the composition and the actual text follows in SRC. */
1881 #define ENCODE_COMPOSITION_FAKE_START(coding) \
1883 *dst++ = ISO_CODE_ESC; \
1885 coding->composing = COMPOSITION_RELATIVE; \
1888 /* The following three macros produce codes for indicating direction
1890 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
1892 if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
1893 *dst++ = ISO_CODE_ESC, *dst++ = '['; \
1895 *dst++ = ISO_CODE_CSI; \
1898 #define ENCODE_DIRECTION_R2L \
1899 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
1901 #define ENCODE_DIRECTION_L2R \
1902 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
1904 /* Produce codes for designation and invocation to reset the graphic
1905 planes and registers to initial state. */
1906 #define ENCODE_RESET_PLANE_AND_REGISTER \
1909 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
1911 for (reg = 0; reg < 4; reg++) \
1912 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
1913 && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
1914 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
1915 ENCODE_DESIGNATION \
1916 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1919 /* Produce designation sequences of charsets in the line started from
1920 SRC to a place pointed by DST, and return updated DST.
1922 If the current block ends before any end-of-line, we may fail to
1923 find all the necessary designations. */
1925 static unsigned char *
1926 encode_designation_at_bol (coding
, translation_table
, src
, src_end
, dst
)
1927 struct coding_system
*coding
;
1928 Lisp_Object translation_table
;
1929 unsigned char *src
, *src_end
, *dst
;
1931 int charset
, c
, found
= 0, reg
;
1932 /* Table of charsets to be designated to each graphic register. */
1935 for (reg
= 0; reg
< 4; reg
++)
1944 charset
= CHAR_CHARSET (c
);
1945 reg
= CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
);
1946 if (reg
!= CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION
&& r
[reg
] < 0)
1956 for (reg
= 0; reg
< 4; reg
++)
1958 && CODING_SPEC_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
1959 ENCODE_DESIGNATION (r
[reg
], reg
, coding
);
1965 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
1968 encode_coding_iso2022 (coding
, source
, destination
, src_bytes
, dst_bytes
)
1969 struct coding_system
*coding
;
1970 unsigned char *source
, *destination
;
1971 int src_bytes
, dst_bytes
;
1973 unsigned char *src
= source
;
1974 unsigned char *src_end
= source
+ src_bytes
;
1975 unsigned char *dst
= destination
;
1976 unsigned char *dst_end
= destination
+ dst_bytes
;
1977 /* Since the maximum bytes produced by each loop is 20, we subtract 19
1978 from DST_END to assure overflow checking is necessary only at the
1980 unsigned char *adjusted_dst_end
= dst_end
- 19;
1981 /* SRC_BASE remembers the start position in source in each loop.
1982 The loop will be exited when there's not enough source text to
1983 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
1984 there's not enough destination area to produce encoded codes
1985 (within macro EMIT_BYTES). */
1986 unsigned char *src_base
;
1988 Lisp_Object translation_table
;
1990 if (NILP (Venable_character_translation
))
1991 translation_table
= Qnil
;
1994 translation_table
= coding
->translation_table_for_encode
;
1995 if (NILP (translation_table
))
1996 translation_table
= Vstandard_translation_table_for_encode
;
1999 coding
->consumed_char
= 0;
2003 int charset
, c1
, c2
;
2007 if (dst
>= (dst_bytes
? adjusted_dst_end
: (src
- 19)))
2009 coding
->result
= CODING_FINISH_INSUFFICIENT_DST
;
2013 if (coding
->flags
& CODING_FLAG_ISO_DESIGNATE_AT_BOL
2014 && CODING_SPEC_ISO_BOL (coding
))
2016 /* We have to produce designation sequences if any now. */
2017 dst
= encode_designation_at_bol (coding
, translation_table
,
2019 CODING_SPEC_ISO_BOL (coding
) = 0;
2022 /* Check composition start and end. */
2023 if (coding
->composing
!= COMPOSITION_DISABLED
2024 && coding
->cmp_data_start
< coding
->cmp_data
->used
)
2026 struct composition_data
*cmp_data
= coding
->cmp_data
;
2027 int *data
= cmp_data
->data
+ coding
->cmp_data_start
;
2028 int this_pos
= cmp_data
->char_offset
+ coding
->consumed_char
;
2030 if (coding
->composing
== COMPOSITION_RELATIVE
)
2032 if (this_pos
== data
[2])
2034 ENCODE_COMPOSITION_END (coding
, data
);
2035 cmp_data
= coding
->cmp_data
;
2036 data
= cmp_data
->data
+ coding
->cmp_data_start
;
2039 else if (COMPOSING_P (coding
))
2041 /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */
2042 if (coding
->cmp_data_index
== coding
->cmp_data_start
+ data
[0])
2043 /* We have consumed components of the composition.
2044 What follows in SRC is the compositions's base
2046 ENCODE_COMPOSITION_FAKE_START (coding
);
2049 int c
= cmp_data
->data
[coding
->cmp_data_index
++];
2050 if (coding
->composition_rule_follows
)
2052 ENCODE_COMPOSITION_RULE (c
);
2053 coding
->composition_rule_follows
= 0;
2057 SPLIT_CHAR (c
, charset
, c1
, c2
);
2058 ENCODE_ISO_CHARACTER (charset
, c1
, c2
);
2059 if (coding
->composing
== COMPOSITION_WITH_RULE_ALTCHARS
)
2060 coding
->composition_rule_follows
= 1;
2065 if (!COMPOSING_P (coding
))
2067 if (this_pos
== data
[1])
2069 ENCODE_COMPOSITION_START (coding
, data
);
2077 /* Now encode the character C. */
2078 if (c
< 0x20 || c
== 0x7F)
2082 if (! (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
2084 if (coding
->flags
& CODING_FLAG_ISO_RESET_AT_CNTL
)
2085 ENCODE_RESET_PLANE_AND_REGISTER
;
2089 /* fall down to treat '\r' as '\n' ... */
2094 if (coding
->flags
& CODING_FLAG_ISO_RESET_AT_EOL
)
2095 ENCODE_RESET_PLANE_AND_REGISTER
;
2096 if (coding
->flags
& CODING_FLAG_ISO_INIT_AT_BOL
)
2097 bcopy (coding
->spec
.iso2022
.initial_designation
,
2098 coding
->spec
.iso2022
.current_designation
,
2099 sizeof coding
->spec
.iso2022
.initial_designation
);
2100 if (coding
->eol_type
== CODING_EOL_LF
2101 || coding
->eol_type
== CODING_EOL_UNDECIDED
)
2102 *dst
++ = ISO_CODE_LF
;
2103 else if (coding
->eol_type
== CODING_EOL_CRLF
)
2104 *dst
++ = ISO_CODE_CR
, *dst
++ = ISO_CODE_LF
;
2106 *dst
++ = ISO_CODE_CR
;
2107 CODING_SPEC_ISO_BOL (coding
) = 1;
2111 if (coding
->flags
& CODING_FLAG_ISO_RESET_AT_CNTL
)
2112 ENCODE_RESET_PLANE_AND_REGISTER
;
2116 else if (ASCII_BYTE_P (c
))
2117 ENCODE_ISO_CHARACTER (CHARSET_ASCII
, c
, /* dummy */ c1
);
2118 else if (SINGLE_BYTE_CHAR_P (c
))
2125 SPLIT_CHAR (c
, charset
, c1
, c2
);
2126 ENCODE_ISO_CHARACTER (charset
, c1
, c2
);
2129 coding
->consumed_char
++;
2133 coding
->consumed
= src_base
- source
;
2134 coding
->produced
= coding
->produced_char
= dst
- destination
;
2138 /*** 4. SJIS and BIG5 handlers ***/
2140 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2141 quite widely. So, for the moment, Emacs supports them in the bare
2142 C code. But, in the future, they may be supported only by CCL. */
2144 /* SJIS is a coding system encoding three character sets: ASCII, right
2145 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
2146 as is. A character of charset katakana-jisx0201 is encoded by
2147 "position-code + 0x80". A character of charset japanese-jisx0208
2148 is encoded in 2-byte but two position-codes are divided and shifted
2149 so that it fit in the range below.
2151 --- CODE RANGE of SJIS ---
2152 (character set) (range)
2154 KATAKANA-JISX0201 0xA0 .. 0xDF
2155 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
2156 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
2157 -------------------------------
2161 /* BIG5 is a coding system encoding two character sets: ASCII and
2162 Big5. An ASCII character is encoded as is. Big5 is a two-byte
2163 character set and is encoded in two-byte.
2165 --- CODE RANGE of BIG5 ---
2166 (character set) (range)
2168 Big5 (1st byte) 0xA1 .. 0xFE
2169 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
2170 --------------------------
2172 Since the number of characters in Big5 is larger than maximum
2173 characters in Emacs' charset (96x96), it can't be handled as one
2174 charset. So, in Emacs, Big5 is divided into two: `charset-big5-1'
2175 and `charset-big5-2'. Both are DIMENSION2 and CHARS94. The former
2176 contains frequently used characters and the latter contains less
2177 frequently used characters. */
2179 /* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
2180 are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2181 C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2182 format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
2184 /* Number of Big5 characters which have the same code in 1st byte. */
2185 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2187 #define DECODE_BIG5(b1, b2, charset, c1, c2) \
2190 = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62); \
2192 charset = charset_big5_1; \
2195 charset = charset_big5_2; \
2196 temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW; \
2198 c1 = temp / (0xFF - 0xA1) + 0x21; \
2199 c2 = temp % (0xFF - 0xA1) + 0x21; \
2202 #define ENCODE_BIG5(charset, c1, c2, b1, b2) \
2204 unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21); \
2205 if (charset == charset_big5_2) \
2206 temp += BIG5_SAME_ROW * (0xC9 - 0xA1); \
2207 b1 = temp / BIG5_SAME_ROW + 0xA1; \
2208 b2 = temp % BIG5_SAME_ROW; \
2209 b2 += b2 < 0x3F ? 0x40 : 0x62; \
2212 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2213 Check if a text is encoded in SJIS. If it is, return
2214 CODING_CATEGORY_MASK_SJIS, else return 0. */
2217 detect_coding_sjis (src
, src_end
)
2218 unsigned char *src
, *src_end
;
2221 /* Dummy for ONE_MORE_BYTE. */
2222 struct coding_system dummy_coding
;
2223 struct coding_system
*coding
= &dummy_coding
;
2228 if ((c
>= 0x80 && c
< 0xA0) || c
>= 0xE0)
2236 return CODING_CATEGORY_MASK_SJIS
;
2239 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2240 Check if a text is encoded in BIG5. If it is, return
2241 CODING_CATEGORY_MASK_BIG5, else return 0. */
2244 detect_coding_big5 (src
, src_end
)
2245 unsigned char *src
, *src_end
;
2248 /* Dummy for ONE_MORE_BYTE. */
2249 struct coding_system dummy_coding
;
2250 struct coding_system
*coding
= &dummy_coding
;
2258 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
2263 return CODING_CATEGORY_MASK_BIG5
;
2266 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2267 Check if a text is encoded in UTF-8. If it is, return
2268 CODING_CATEGORY_MASK_UTF_8, else return 0. */
2270 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
2271 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
2272 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2273 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2274 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2275 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2276 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2279 detect_coding_utf_8 (src
, src_end
)
2280 unsigned char *src
, *src_end
;
2283 int seq_maybe_bytes
;
2284 /* Dummy for ONE_MORE_BYTE. */
2285 struct coding_system dummy_coding
;
2286 struct coding_system
*coding
= &dummy_coding
;
2291 if (UTF_8_1_OCTET_P (c
))
2293 else if (UTF_8_2_OCTET_LEADING_P (c
))
2294 seq_maybe_bytes
= 1;
2295 else if (UTF_8_3_OCTET_LEADING_P (c
))
2296 seq_maybe_bytes
= 2;
2297 else if (UTF_8_4_OCTET_LEADING_P (c
))
2298 seq_maybe_bytes
= 3;
2299 else if (UTF_8_5_OCTET_LEADING_P (c
))
2300 seq_maybe_bytes
= 4;
2301 else if (UTF_8_6_OCTET_LEADING_P (c
))
2302 seq_maybe_bytes
= 5;
2309 if (!UTF_8_EXTRA_OCTET_P (c
))
2313 while (seq_maybe_bytes
> 0);
2317 return CODING_CATEGORY_MASK_UTF_8
;
2320 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2321 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2322 Little Endian (otherwise). If it is, return
2323 CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2326 #define UTF_16_INVALID_P(val) \
2327 (((val) == 0xFFFE) \
2328 || ((val) == 0xFFFF))
2330 #define UTF_16_HIGH_SURROGATE_P(val) \
2331 (((val) & 0xD800) == 0xD800)
2333 #define UTF_16_LOW_SURROGATE_P(val) \
2334 (((val) & 0xDC00) == 0xDC00)
2337 detect_coding_utf_16 (src
, src_end
)
2338 unsigned char *src
, *src_end
;
2340 unsigned char c1
, c2
;
2341 /* Dummy for TWO_MORE_BYTES. */
2342 struct coding_system dummy_coding
;
2343 struct coding_system
*coding
= &dummy_coding
;
2345 TWO_MORE_BYTES (c1
, c2
);
2347 if ((c1
== 0xFF) && (c2
== 0xFE))
2348 return CODING_CATEGORY_MASK_UTF_16_LE
;
2349 else if ((c1
== 0xFE) && (c2
== 0xFF))
2350 return CODING_CATEGORY_MASK_UTF_16_BE
;
2356 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2357 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
2360 decode_coding_sjis_big5 (coding
, source
, destination
,
2361 src_bytes
, dst_bytes
, sjis_p
)
2362 struct coding_system
*coding
;
2363 unsigned char *source
, *destination
;
2364 int src_bytes
, dst_bytes
;
2367 unsigned char *src
= source
;
2368 unsigned char *src_end
= source
+ src_bytes
;
2369 unsigned char *dst
= destination
;
2370 unsigned char *dst_end
= destination
+ dst_bytes
;
2371 /* SRC_BASE remembers the start position in source in each loop.
2372 The loop will be exited when there's not enough source code
2373 (within macro ONE_MORE_BYTE), or when there's not enough
2374 destination area to produce a character (within macro
2376 unsigned char *src_base
;
2377 Lisp_Object translation_table
;
2379 if (NILP (Venable_character_translation
))
2380 translation_table
= Qnil
;
2383 translation_table
= coding
->translation_table_for_decode
;
2384 if (NILP (translation_table
))
2385 translation_table
= Vstandard_translation_table_for_decode
;
2388 coding
->produced_char
= 0;
2391 int c
, charset
, c1
, c2
;
2398 charset
= CHARSET_ASCII
;
2403 if (coding
->eol_type
== CODING_EOL_CRLF
)
2408 else if (coding
->mode
2409 & CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2411 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
2412 goto label_end_of_loop
;
2415 /* To process C2 again, SRC is subtracted by 1. */
2418 else if (coding
->eol_type
== CODING_EOL_CR
)
2422 && (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2423 && (coding
->eol_type
== CODING_EOL_CR
2424 || coding
->eol_type
== CODING_EOL_CRLF
))
2426 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
2427 goto label_end_of_loop
;
2436 goto label_invalid_code
;
2437 if (c1
< 0xA0 || c1
>= 0xE0)
2439 /* SJIS -> JISX0208 */
2441 if (c2
< 0x40 || c2
== 0x7F || c2
> 0xFC)
2442 goto label_invalid_code
;
2443 DECODE_SJIS (c1
, c2
, c1
, c2
);
2444 charset
= charset_jisx0208
;
2447 /* SJIS -> JISX0201-Kana */
2448 charset
= charset_katakana_jisx0201
;
2453 if (c1
< 0xA1 || c1
> 0xFE)
2454 goto label_invalid_code
;
2456 if (c2
< 0x40 || (c2
> 0x7E && c2
< 0xA1) || c2
> 0xFE)
2457 goto label_invalid_code
;
2458 DECODE_BIG5 (c1
, c2
, charset
, c1
, c2
);
2462 c
= DECODE_ISO_CHARACTER (charset
, c1
, c2
);
2474 coding
->consumed
= coding
->consumed_char
= src_base
- source
;
2475 coding
->produced
= dst
- destination
;
2479 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2480 This function can encode charsets `ascii', `katakana-jisx0201',
2481 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
2482 are sure that all these charsets are registered as official charset
2483 (i.e. do not have extended leading-codes). Characters of other
2484 charsets are produced without any encoding. If SJIS_P is 1, encode
2485 SJIS text, else encode BIG5 text. */
2488 encode_coding_sjis_big5 (coding
, source
, destination
,
2489 src_bytes
, dst_bytes
, sjis_p
)
2490 struct coding_system
*coding
;
2491 unsigned char *source
, *destination
;
2492 int src_bytes
, dst_bytes
;
2495 unsigned char *src
= source
;
2496 unsigned char *src_end
= source
+ src_bytes
;
2497 unsigned char *dst
= destination
;
2498 unsigned char *dst_end
= destination
+ dst_bytes
;
2499 /* SRC_BASE remembers the start position in source in each loop.
2500 The loop will be exited when there's not enough source text to
2501 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2502 there's not enough destination area to produce encoded codes
2503 (within macro EMIT_BYTES). */
2504 unsigned char *src_base
;
2505 Lisp_Object translation_table
;
2507 if (NILP (Venable_character_translation
))
2508 translation_table
= Qnil
;
2511 translation_table
= coding
->translation_table_for_decode
;
2512 if (NILP (translation_table
))
2513 translation_table
= Vstandard_translation_table_for_decode
;
2518 int c
, charset
, c1
, c2
;
2523 /* Now encode the character C. */
2524 if (SINGLE_BYTE_CHAR_P (c
))
2529 if (!coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
)
2536 if (coding
->eol_type
== CODING_EOL_CRLF
)
2538 EMIT_TWO_BYTES ('\r', c
);
2541 else if (coding
->eol_type
== CODING_EOL_CR
)
2549 SPLIT_CHAR (c
, charset
, c1
, c2
);
2552 if (charset
== charset_jisx0208
2553 || charset
== charset_jisx0208_1978
)
2555 ENCODE_SJIS (c1
, c2
, c1
, c2
);
2556 EMIT_TWO_BYTES (c1
, c2
);
2558 else if (charset
== charset_latin_jisx0201
)
2561 /* There's no way other than producing the internal
2563 EMIT_BYTES (src_base
, src
);
2567 if (charset
== charset_big5_1
|| charset
== charset_big5_2
)
2569 ENCODE_BIG5 (charset
, c1
, c2
, c1
, c2
);
2570 EMIT_TWO_BYTES (c1
, c2
);
2573 /* There's no way other than producing the internal
2575 EMIT_BYTES (src_base
, src
);
2578 coding
->consumed_char
++;
2582 coding
->consumed
= src_base
- source
;
2583 coding
->produced
= coding
->produced_char
= dst
- destination
;
2587 /*** 5. CCL handlers ***/
2589 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2590 Check if a text is encoded in a coding system of which
2591 encoder/decoder are written in CCL program. If it is, return
2592 CODING_CATEGORY_MASK_CCL, else return 0. */
2595 detect_coding_ccl (src
, src_end
)
2596 unsigned char *src
, *src_end
;
2598 unsigned char *valid
;
2600 /* Dummy for ONE_MORE_BYTE. */
2601 struct coding_system dummy_coding
;
2602 struct coding_system
*coding
= &dummy_coding
;
2604 /* No coding system is assigned to coding-category-ccl. */
2605 if (!coding_system_table
[CODING_CATEGORY_IDX_CCL
])
2608 valid
= coding_system_table
[CODING_CATEGORY_IDX_CCL
]->spec
.ccl
.valid_codes
;
2616 return CODING_CATEGORY_MASK_CCL
;
2620 /*** 6. End-of-line handlers ***/
2622 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2625 decode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
)
2626 struct coding_system
*coding
;
2627 unsigned char *source
, *destination
;
2628 int src_bytes
, dst_bytes
;
2630 unsigned char *src
= source
;
2631 unsigned char *dst
= destination
;
2632 unsigned char *src_end
= src
+ src_bytes
;
2633 unsigned char *dst_end
= dst
+ dst_bytes
;
2634 Lisp_Object translation_table
;
2635 /* SRC_BASE remembers the start position in source in each loop.
2636 The loop will be exited when there's not enough source code
2637 (within macro ONE_MORE_BYTE), or when there's not enough
2638 destination area to produce a character (within macro
2640 unsigned char *src_base
;
2643 translation_table
= Qnil
;
2644 switch (coding
->eol_type
)
2646 case CODING_EOL_CRLF
:
2656 if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2658 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
2659 goto label_end_of_loop
;
2666 && (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
))
2668 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
2669 goto label_end_of_loop
;
2682 if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2684 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
2685 goto label_end_of_loop
;
2694 default: /* no need for EOL handling */
2704 coding
->consumed
= coding
->consumed_char
= src_base
- source
;
2705 coding
->produced
= dst
- destination
;
2709 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
2710 format of end-of-line according to `coding->eol_type'. It also
2711 convert multibyte form 8-bit characers to unibyte if
2712 CODING->src_multibyte is nonzero. If `coding->mode &
2713 CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
2714 also means end-of-line. */
2717 encode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
)
2718 struct coding_system
*coding
;
2719 unsigned char *source
, *destination
;
2720 int src_bytes
, dst_bytes
;
2722 unsigned char *src
= source
;
2723 unsigned char *dst
= destination
;
2724 unsigned char *src_end
= src
+ src_bytes
;
2725 unsigned char *dst_end
= dst
+ dst_bytes
;
2726 Lisp_Object translation_table
;
2727 /* SRC_BASE remembers the start position in source in each loop.
2728 The loop will be exited when there's not enough source text to
2729 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2730 there's not enough destination area to produce encoded codes
2731 (within macro EMIT_BYTES). */
2732 unsigned char *src_base
;
2734 int selective_display
= coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
;
2736 translation_table
= Qnil
;
2737 if (coding
->src_multibyte
2738 && *(src_end
- 1) == LEADING_CODE_8_BIT_CONTROL
)
2742 coding
->result
= CODING_FINISH_INSUFFICIENT_SRC
;
2745 if (coding
->eol_type
== CODING_EOL_CRLF
)
2747 while (src
< src_end
)
2753 else if (c
== '\n' || (c
== '\r' && selective_display
))
2754 EMIT_TWO_BYTES ('\r', '\n');
2764 if (src_bytes
<= dst_bytes
)
2766 safe_bcopy (src
, dst
, src_bytes
);
2772 if (coding
->src_multibyte
2773 && *(src
+ dst_bytes
- 1) == LEADING_CODE_8_BIT_CONTROL
)
2775 safe_bcopy (src
, dst
, dst_bytes
);
2776 src_base
= src
+ dst_bytes
;
2777 dst
= destination
+ dst_bytes
;
2778 coding
->result
= CODING_FINISH_INSUFFICIENT_DST
;
2780 if (coding
->eol_type
== CODING_EOL_CR
)
2782 for (src
= destination
; src
< dst
; src
++)
2783 if (*src
== '\n') *src
= '\r';
2785 else if (selective_display
)
2787 for (src
= destination
; src
< dst
; src
++)
2788 if (*src
== '\r') *src
= '\n';
2791 if (coding
->src_multibyte
)
2792 dst
= destination
+ str_as_unibyte (destination
, dst
- destination
);
2794 coding
->consumed
= src_base
- source
;
2795 coding
->produced
= dst
- destination
;
2799 /*** 7. C library functions ***/
2801 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2802 has a property `coding-system'. The value of this property is a
2803 vector of length 5 (called as coding-vector). Among elements of
2804 this vector, the first (element[0]) and the fifth (element[4])
2805 carry important information for decoding/encoding. Before
2806 decoding/encoding, this information should be set in fields of a
2807 structure of type `coding_system'.
2809 A value of property `coding-system' can be a symbol of another
2810 subsidiary coding-system. In that case, Emacs gets coding-vector
2813 `element[0]' contains information to be set in `coding->type'. The
2814 value and its meaning is as follows:
2816 0 -- coding_type_emacs_mule
2817 1 -- coding_type_sjis
2818 2 -- coding_type_iso2022
2819 3 -- coding_type_big5
2820 4 -- coding_type_ccl encoder/decoder written in CCL
2821 nil -- coding_type_no_conversion
2822 t -- coding_type_undecided (automatic conversion on decoding,
2823 no-conversion on encoding)
2825 `element[4]' contains information to be set in `coding->flags' and
2826 `coding->spec'. The meaning varies by `coding->type'.
2828 If `coding->type' is `coding_type_iso2022', element[4] is a vector
2829 of length 32 (of which the first 13 sub-elements are used now).
2830 Meanings of these sub-elements are:
2832 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2833 If the value is an integer of valid charset, the charset is
2834 assumed to be designated to graphic register N initially.
2836 If the value is minus, it is a minus value of charset which
2837 reserves graphic register N, which means that the charset is
2838 not designated initially but should be designated to graphic
2839 register N just before encoding a character in that charset.
2841 If the value is nil, graphic register N is never used on
2844 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2845 Each value takes t or nil. See the section ISO2022 of
2846 `coding.h' for more information.
2848 If `coding->type' is `coding_type_big5', element[4] is t to denote
2849 BIG5-ETen or nil to denote BIG5-HKU.
2851 If `coding->type' takes the other value, element[4] is ignored.
2853 Emacs Lisp's coding system also carries information about format of
2854 end-of-line in a value of property `eol-type'. If the value is
2855 integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2856 means CODING_EOL_CR. If it is not integer, it should be a vector
2857 of subsidiary coding systems of which property `eol-type' has one
2862 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2863 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING
2864 is setup so that no conversion is necessary and return -1, else
2868 setup_coding_system (coding_system
, coding
)
2869 Lisp_Object coding_system
;
2870 struct coding_system
*coding
;
2872 Lisp_Object coding_spec
, coding_type
, eol_type
, plist
;
2876 /* Initialize some fields required for all kinds of coding systems. */
2877 coding
->symbol
= coding_system
;
2878 coding
->common_flags
= 0;
2880 coding
->heading_ascii
= -1;
2881 coding
->post_read_conversion
= coding
->pre_write_conversion
= Qnil
;
2882 coding
->composing
= COMPOSITION_DISABLED
;
2883 coding
->cmp_data
= NULL
;
2885 if (NILP (coding_system
))
2886 goto label_invalid_coding_system
;
2888 coding_spec
= Fget (coding_system
, Qcoding_system
);
2890 if (!VECTORP (coding_spec
)
2891 || XVECTOR (coding_spec
)->size
!= 5
2892 || !CONSP (XVECTOR (coding_spec
)->contents
[3]))
2893 goto label_invalid_coding_system
;
2895 eol_type
= inhibit_eol_conversion
? Qnil
: Fget (coding_system
, Qeol_type
);
2896 if (VECTORP (eol_type
))
2898 coding
->eol_type
= CODING_EOL_UNDECIDED
;
2899 coding
->common_flags
= CODING_REQUIRE_DETECTION_MASK
;
2901 else if (XFASTINT (eol_type
) == 1)
2903 coding
->eol_type
= CODING_EOL_CRLF
;
2904 coding
->common_flags
2905 = CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
2907 else if (XFASTINT (eol_type
) == 2)
2909 coding
->eol_type
= CODING_EOL_CR
;
2910 coding
->common_flags
2911 = CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
2914 coding
->eol_type
= CODING_EOL_LF
;
2916 coding_type
= XVECTOR (coding_spec
)->contents
[0];
2917 /* Try short cut. */
2918 if (SYMBOLP (coding_type
))
2920 if (EQ (coding_type
, Qt
))
2922 coding
->type
= coding_type_undecided
;
2923 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
2926 coding
->type
= coding_type_no_conversion
;
2930 /* Get values of coding system properties:
2931 `post-read-conversion', `pre-write-conversion',
2932 `translation-table-for-decode', `translation-table-for-encode'. */
2933 plist
= XVECTOR (coding_spec
)->contents
[3];
2934 /* Pre & post conversion functions should be disabled if
2935 inhibit_eol_conversion is nozero. This is the case that a code
2936 conversion function is called while those functions are running. */
2937 if (! inhibit_pre_post_conversion
)
2939 coding
->post_read_conversion
= Fplist_get (plist
, Qpost_read_conversion
);
2940 coding
->pre_write_conversion
= Fplist_get (plist
, Qpre_write_conversion
);
2942 val
= Fplist_get (plist
, Qtranslation_table_for_decode
);
2944 val
= Fget (val
, Qtranslation_table_for_decode
);
2945 coding
->translation_table_for_decode
= CHAR_TABLE_P (val
) ? val
: Qnil
;
2946 val
= Fplist_get (plist
, Qtranslation_table_for_encode
);
2948 val
= Fget (val
, Qtranslation_table_for_encode
);
2949 coding
->translation_table_for_encode
= CHAR_TABLE_P (val
) ? val
: Qnil
;
2950 val
= Fplist_get (plist
, Qcoding_category
);
2953 val
= Fget (val
, Qcoding_category_index
);
2955 coding
->category_idx
= XINT (val
);
2957 goto label_invalid_coding_system
;
2960 goto label_invalid_coding_system
;
2962 val
= Fplist_get (plist
, Qsafe_charsets
);
2965 for (i
= 0; i
<= MAX_CHARSET
; i
++)
2966 coding
->safe_charsets
[i
] = 1;
2970 bzero (coding
->safe_charsets
, MAX_CHARSET
+ 1);
2973 if ((i
= get_charset_id (XCAR (val
))) >= 0)
2974 coding
->safe_charsets
[i
] = 1;
2979 /* If the coding system has non-nil `composition' property, enable
2980 composition handling. */
2981 val
= Fplist_get (plist
, Qcomposition
);
2983 coding
->composing
= COMPOSITION_NO
;
2985 switch (XFASTINT (coding_type
))
2988 coding
->type
= coding_type_emacs_mule
;
2989 if (!NILP (coding
->post_read_conversion
))
2990 coding
->common_flags
|= CODING_REQUIRE_DECODING_MASK
;
2991 if (!NILP (coding
->pre_write_conversion
))
2992 coding
->common_flags
|= CODING_REQUIRE_ENCODING_MASK
;
2996 coding
->type
= coding_type_sjis
;
2997 coding
->common_flags
2998 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3002 coding
->type
= coding_type_iso2022
;
3003 coding
->common_flags
3004 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3006 Lisp_Object val
, temp
;
3008 int i
, charset
, reg_bits
= 0;
3010 val
= XVECTOR (coding_spec
)->contents
[4];
3012 if (!VECTORP (val
) || XVECTOR (val
)->size
!= 32)
3013 goto label_invalid_coding_system
;
3015 flags
= XVECTOR (val
)->contents
;
3017 = ((NILP (flags
[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM
)
3018 | (NILP (flags
[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL
)
3019 | (NILP (flags
[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL
)
3020 | (NILP (flags
[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS
)
3021 | (NILP (flags
[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT
)
3022 | (NILP (flags
[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT
)
3023 | (NILP (flags
[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN
)
3024 | (NILP (flags
[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS
)
3025 | (NILP (flags
[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION
)
3026 | (NILP (flags
[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL
)
3027 | (NILP (flags
[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL
)
3028 | (NILP (flags
[15]) ? 0 : CODING_FLAG_ISO_SAFE
)
3029 | (NILP (flags
[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA
)
3032 /* Invoke graphic register 0 to plane 0. */
3033 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 0;
3034 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */
3035 CODING_SPEC_ISO_INVOCATION (coding
, 1)
3036 = (coding
->flags
& CODING_FLAG_ISO_SEVEN_BITS
? -1 : 1);
3037 /* Not single shifting at first. */
3038 CODING_SPEC_ISO_SINGLE_SHIFTING (coding
) = 0;
3039 /* Beginning of buffer should also be regarded as bol. */
3040 CODING_SPEC_ISO_BOL (coding
) = 1;
3042 for (charset
= 0; charset
<= MAX_CHARSET
; charset
++)
3043 CODING_SPEC_ISO_REVISION_NUMBER (coding
, charset
) = 255;
3044 val
= Vcharset_revision_alist
;
3047 charset
= get_charset_id (Fcar_safe (XCAR (val
)));
3049 && (temp
= Fcdr_safe (XCAR (val
)), INTEGERP (temp
))
3050 && (i
= XINT (temp
), (i
>= 0 && (i
+ '@') < 128)))
3051 CODING_SPEC_ISO_REVISION_NUMBER (coding
, charset
) = i
;
3055 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3056 FLAGS[REG] can be one of below:
3057 integer CHARSET: CHARSET occupies register I,
3058 t: designate nothing to REG initially, but can be used
3060 list of integer, nil, or t: designate the first
3061 element (if integer) to REG initially, the remaining
3062 elements (if integer) is designated to REG on request,
3063 if an element is t, REG can be used by any charsets,
3064 nil: REG is never used. */
3065 for (charset
= 0; charset
<= MAX_CHARSET
; charset
++)
3066 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3067 = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION
;
3068 for (i
= 0; i
< 4; i
++)
3070 if (INTEGERP (flags
[i
])
3071 && (charset
= XINT (flags
[i
]), CHARSET_VALID_P (charset
))
3072 || (charset
= get_charset_id (flags
[i
])) >= 0)
3074 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = charset
;
3075 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
) = i
;
3077 else if (EQ (flags
[i
], Qt
))
3079 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = -1;
3081 coding
->flags
|= CODING_FLAG_ISO_DESIGNATION
;
3083 else if (CONSP (flags
[i
]))
3088 coding
->flags
|= CODING_FLAG_ISO_DESIGNATION
;
3089 if (INTEGERP (XCAR (tail
))
3090 && (charset
= XINT (XCAR (tail
)),
3091 CHARSET_VALID_P (charset
))
3092 || (charset
= get_charset_id (XCAR (tail
))) >= 0)
3094 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = charset
;
3095 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
) =i
;
3098 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = -1;
3100 while (CONSP (tail
))
3102 if (INTEGERP (XCAR (tail
))
3103 && (charset
= XINT (XCAR (tail
)),
3104 CHARSET_VALID_P (charset
))
3105 || (charset
= get_charset_id (XCAR (tail
))) >= 0)
3106 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3108 else if (EQ (XCAR (tail
), Qt
))
3114 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = -1;
3116 CODING_SPEC_ISO_DESIGNATION (coding
, i
)
3117 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
);
3120 if (reg_bits
&& ! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
))
3122 /* REG 1 can be used only by locking shift in 7-bit env. */
3123 if (coding
->flags
& CODING_FLAG_ISO_SEVEN_BITS
)
3125 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
))
3126 /* Without any shifting, only REG 0 and 1 can be used. */
3131 for (charset
= 0; charset
<= MAX_CHARSET
; charset
++)
3133 if (CHARSET_VALID_P (charset
))
3135 /* There exist some default graphic registers to be
3138 /* We had better avoid designating a charset of
3139 CHARS96 to REG 0 as far as possible. */
3140 if (CHARSET_CHARS (charset
) == 96)
3141 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3143 ? 1 : (reg_bits
& 4 ? 2 : (reg_bits
& 8 ? 3 : 0)));
3145 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3147 ? 0 : (reg_bits
& 2 ? 1 : (reg_bits
& 4 ? 2 : 3)));
3151 coding
->common_flags
|= CODING_REQUIRE_FLUSHING_MASK
;
3152 coding
->spec
.iso2022
.last_invalid_designation_register
= -1;
3156 coding
->type
= coding_type_big5
;
3157 coding
->common_flags
3158 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3160 = (NILP (XVECTOR (coding_spec
)->contents
[4])
3161 ? CODING_FLAG_BIG5_HKU
3162 : CODING_FLAG_BIG5_ETEN
);
3166 coding
->type
= coding_type_ccl
;
3167 coding
->common_flags
3168 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3170 val
= XVECTOR (coding_spec
)->contents
[4];
3172 || setup_ccl_program (&(coding
->spec
.ccl
.decoder
),
3174 || setup_ccl_program (&(coding
->spec
.ccl
.encoder
),
3176 goto label_invalid_coding_system
;
3178 bzero (coding
->spec
.ccl
.valid_codes
, 256);
3179 val
= Fplist_get (plist
, Qvalid_codes
);
3184 for (; CONSP (val
); val
= XCDR (val
))
3188 && XINT (this) >= 0 && XINT (this) < 256)
3189 coding
->spec
.ccl
.valid_codes
[XINT (this)] = 1;
3190 else if (CONSP (this)
3191 && INTEGERP (XCAR (this))
3192 && INTEGERP (XCDR (this)))
3194 int start
= XINT (XCAR (this));
3195 int end
= XINT (XCDR (this));
3197 if (start
>= 0 && start
<= end
&& end
< 256)
3198 while (start
<= end
)
3199 coding
->spec
.ccl
.valid_codes
[start
++] = 1;
3204 coding
->common_flags
|= CODING_REQUIRE_FLUSHING_MASK
;
3205 coding
->spec
.ccl
.cr_carryover
= 0;
3209 coding
->type
= coding_type_raw_text
;
3213 goto label_invalid_coding_system
;
3217 label_invalid_coding_system
:
3218 coding
->type
= coding_type_no_conversion
;
3219 coding
->category_idx
= CODING_CATEGORY_IDX_BINARY
;
3220 coding
->common_flags
= 0;
3221 coding
->eol_type
= CODING_EOL_LF
;
3222 coding
->pre_write_conversion
= coding
->post_read_conversion
= Qnil
;
3226 /* Free memory blocks allocated for storing composition information. */
3229 coding_free_composition_data (coding
)
3230 struct coding_system
*coding
;
3232 struct composition_data
*cmp_data
= coding
->cmp_data
, *next
;
3236 /* Memory blocks are chained. At first, rewind to the first, then,
3237 free blocks one by one. */
3238 while (cmp_data
->prev
)
3239 cmp_data
= cmp_data
->prev
;
3242 next
= cmp_data
->next
;
3246 coding
->cmp_data
= NULL
;
3249 /* Set `char_offset' member of all memory blocks pointed by
3250 coding->cmp_data to POS. */
3253 coding_adjust_composition_offset (coding
, pos
)
3254 struct coding_system
*coding
;
3257 struct composition_data
*cmp_data
;
3259 for (cmp_data
= coding
->cmp_data
; cmp_data
; cmp_data
= cmp_data
->next
)
3260 cmp_data
->char_offset
= pos
;
3263 /* Setup raw-text or one of its subsidiaries in the structure
3264 coding_system CODING according to the already setup value eol_type
3265 in CODING. CODING should be setup for some coding system in
3269 setup_raw_text_coding_system (coding
)
3270 struct coding_system
*coding
;
3272 if (coding
->type
!= coding_type_raw_text
)
3274 coding
->symbol
= Qraw_text
;
3275 coding
->type
= coding_type_raw_text
;
3276 if (coding
->eol_type
!= CODING_EOL_UNDECIDED
)
3278 Lisp_Object subsidiaries
;
3279 subsidiaries
= Fget (Qraw_text
, Qeol_type
);
3281 if (VECTORP (subsidiaries
)
3282 && XVECTOR (subsidiaries
)->size
== 3)
3284 = XVECTOR (subsidiaries
)->contents
[coding
->eol_type
];
3286 setup_coding_system (coding
->symbol
, coding
);
3291 /* Emacs has a mechanism to automatically detect a coding system if it
3292 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
3293 it's impossible to distinguish some coding systems accurately
3294 because they use the same range of codes. So, at first, coding
3295 systems are categorized into 7, those are:
3297 o coding-category-emacs-mule
3299 The category for a coding system which has the same code range
3300 as Emacs' internal format. Assigned the coding-system (Lisp
3301 symbol) `emacs-mule' by default.
3303 o coding-category-sjis
3305 The category for a coding system which has the same code range
3306 as SJIS. Assigned the coding-system (Lisp
3307 symbol) `japanese-shift-jis' by default.
3309 o coding-category-iso-7
3311 The category for a coding system which has the same code range
3312 as ISO2022 of 7-bit environment. This doesn't use any locking
3313 shift and single shift functions. This can encode/decode all
3314 charsets. Assigned the coding-system (Lisp symbol)
3315 `iso-2022-7bit' by default.
3317 o coding-category-iso-7-tight
3319 Same as coding-category-iso-7 except that this can
3320 encode/decode only the specified charsets.
3322 o coding-category-iso-8-1
3324 The category for a coding system which has the same code range
3325 as ISO2022 of 8-bit environment and graphic plane 1 used only
3326 for DIMENSION1 charset. This doesn't use any locking shift
3327 and single shift functions. Assigned the coding-system (Lisp
3328 symbol) `iso-latin-1' by default.
3330 o coding-category-iso-8-2
3332 The category for a coding system which has the same code range
3333 as ISO2022 of 8-bit environment and graphic plane 1 used only
3334 for DIMENSION2 charset. This doesn't use any locking shift
3335 and single shift functions. Assigned the coding-system (Lisp
3336 symbol) `japanese-iso-8bit' by default.
3338 o coding-category-iso-7-else
3340 The category for a coding system which has the same code range
3341 as ISO2022 of 7-bit environemnt but uses locking shift or
3342 single shift functions. Assigned the coding-system (Lisp
3343 symbol) `iso-2022-7bit-lock' by default.
3345 o coding-category-iso-8-else
3347 The category for a coding system which has the same code range
3348 as ISO2022 of 8-bit environemnt but uses locking shift or
3349 single shift functions. Assigned the coding-system (Lisp
3350 symbol) `iso-2022-8bit-ss2' by default.
3352 o coding-category-big5
3354 The category for a coding system which has the same code range
3355 as BIG5. Assigned the coding-system (Lisp symbol)
3356 `cn-big5' by default.
3358 o coding-category-utf-8
3360 The category for a coding system which has the same code range
3361 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
3362 symbol) `utf-8' by default.
3364 o coding-category-utf-16-be
3366 The category for a coding system in which a text has an
3367 Unicode signature (cf. Unicode Standard) in the order of BIG
3368 endian at the head. Assigned the coding-system (Lisp symbol)
3369 `utf-16-be' by default.
3371 o coding-category-utf-16-le
3373 The category for a coding system in which a text has an
3374 Unicode signature (cf. Unicode Standard) in the order of
3375 LITTLE endian at the head. Assigned the coding-system (Lisp
3376 symbol) `utf-16-le' by default.
3378 o coding-category-ccl
3380 The category for a coding system of which encoder/decoder is
3381 written in CCL programs. The default value is nil, i.e., no
3382 coding system is assigned.
3384 o coding-category-binary
3386 The category for a coding system not categorized in any of the
3387 above. Assigned the coding-system (Lisp symbol)
3388 `no-conversion' by default.
3390 Each of them is a Lisp symbol and the value is an actual
3391 `coding-system's (this is also a Lisp symbol) assigned by a user.
3392 What Emacs does actually is to detect a category of coding system.
3393 Then, it uses a `coding-system' assigned to it. If Emacs can't
3394 decide only one possible category, it selects a category of the
3395 highest priority. Priorities of categories are also specified by a
3396 user in a Lisp variable `coding-category-list'.
3401 int ascii_skip_code
[256];
3403 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3404 If it detects possible coding systems, return an integer in which
3405 appropriate flag bits are set. Flag bits are defined by macros
3406 CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL,
3407 it should point the table `coding_priorities'. In that case, only
3408 the flag bit for a coding system of the highest priority is set in
3411 How many ASCII characters are at the head is returned as *SKIP. */
3414 detect_coding_mask (source
, src_bytes
, priorities
, skip
)
3415 unsigned char *source
;
3416 int src_bytes
, *priorities
, *skip
;
3418 register unsigned char c
;
3419 unsigned char *src
= source
, *src_end
= source
+ src_bytes
;
3420 unsigned int mask
, utf16_examined_p
, iso2022_examined_p
;
3423 /* At first, skip all ASCII characters and control characters except
3424 for three ISO2022 specific control characters. */
3425 ascii_skip_code
[ISO_CODE_SO
] = 0;
3426 ascii_skip_code
[ISO_CODE_SI
] = 0;
3427 ascii_skip_code
[ISO_CODE_ESC
] = 0;
3429 label_loop_detect_coding
:
3430 while (src
< src_end
&& ascii_skip_code
[*src
]) src
++;
3431 *skip
= src
- source
;
3434 /* We found nothing other than ASCII. There's nothing to do. */
3438 /* The text seems to be encoded in some multilingual coding system.
3439 Now, try to find in which coding system the text is encoded. */
3442 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3443 /* C is an ISO2022 specific control code of C0. */
3444 mask
= detect_coding_iso2022 (src
, src_end
);
3447 /* No valid ISO2022 code follows C. Try again. */
3449 if (c
== ISO_CODE_ESC
)
3450 ascii_skip_code
[ISO_CODE_ESC
] = 1;
3452 ascii_skip_code
[ISO_CODE_SO
] = ascii_skip_code
[ISO_CODE_SI
] = 1;
3453 goto label_loop_detect_coding
;
3457 for (i
= 0; i
< CODING_CATEGORY_IDX_MAX
; i
++)
3459 if (mask
& priorities
[i
])
3460 return priorities
[i
];
3462 return CODING_CATEGORY_MASK_RAW_TEXT
;
3471 /* C is the first byte of SJIS character code,
3472 or a leading-code of Emacs' internal format (emacs-mule),
3473 or the first byte of UTF-16. */
3474 try = (CODING_CATEGORY_MASK_SJIS
3475 | CODING_CATEGORY_MASK_EMACS_MULE
3476 | CODING_CATEGORY_MASK_UTF_16_BE
3477 | CODING_CATEGORY_MASK_UTF_16_LE
);
3479 /* Or, if C is a special latin extra code,
3480 or is an ISO2022 specific control code of C1 (SS2 or SS3),
3481 or is an ISO2022 control-sequence-introducer (CSI),
3482 we should also consider the possibility of ISO2022 codings. */
3483 if ((VECTORP (Vlatin_extra_code_table
)
3484 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
3485 || (c
== ISO_CODE_SS2
|| c
== ISO_CODE_SS3
)
3486 || (c
== ISO_CODE_CSI
3489 || ((*src
== '0' || *src
== '1' || *src
== '2')
3490 && src
+ 1 < src_end
3491 && src
[1] == ']')))))
3492 try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3493 | CODING_CATEGORY_MASK_ISO_8BIT
);
3496 /* C is a character of ISO2022 in graphic plane right,
3497 or a SJIS's 1-byte character code (i.e. JISX0201),
3498 or the first byte of BIG5's 2-byte code,
3499 or the first byte of UTF-8/16. */
3500 try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3501 | CODING_CATEGORY_MASK_ISO_8BIT
3502 | CODING_CATEGORY_MASK_SJIS
3503 | CODING_CATEGORY_MASK_BIG5
3504 | CODING_CATEGORY_MASK_UTF_8
3505 | CODING_CATEGORY_MASK_UTF_16_BE
3506 | CODING_CATEGORY_MASK_UTF_16_LE
);
3508 /* Or, we may have to consider the possibility of CCL. */
3509 if (coding_system_table
[CODING_CATEGORY_IDX_CCL
]
3510 && (coding_system_table
[CODING_CATEGORY_IDX_CCL
]
3511 ->spec
.ccl
.valid_codes
)[c
])
3512 try |= CODING_CATEGORY_MASK_CCL
;
3515 utf16_examined_p
= iso2022_examined_p
= 0;
3518 for (i
= 0; i
< CODING_CATEGORY_IDX_MAX
; i
++)
3520 if (!iso2022_examined_p
3521 && (priorities
[i
] & try & CODING_CATEGORY_MASK_ISO
))
3523 mask
|= detect_coding_iso2022 (src
, src_end
);
3524 iso2022_examined_p
= 1;
3526 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_SJIS
)
3527 mask
|= detect_coding_sjis (src
, src_end
);
3528 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_UTF_8
)
3529 mask
|= detect_coding_utf_8 (src
, src_end
);
3530 else if (!utf16_examined_p
3531 && (priorities
[i
] & try &
3532 CODING_CATEGORY_MASK_UTF_16_BE_LE
))
3534 mask
|= detect_coding_utf_16 (src
, src_end
);
3535 utf16_examined_p
= 1;
3537 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_BIG5
)
3538 mask
|= detect_coding_big5 (src
, src_end
);
3539 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_EMACS_MULE
)
3540 mask
|= detect_coding_emacs_mule (src
, src_end
);
3541 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_CCL
)
3542 mask
|= detect_coding_ccl (src
, src_end
);
3543 else if (priorities
[i
] & CODING_CATEGORY_MASK_RAW_TEXT
)
3544 mask
|= CODING_CATEGORY_MASK_RAW_TEXT
;
3545 else if (priorities
[i
] & CODING_CATEGORY_MASK_BINARY
)
3546 mask
|= CODING_CATEGORY_MASK_BINARY
;
3547 if (mask
& priorities
[i
])
3548 return priorities
[i
];
3550 return CODING_CATEGORY_MASK_RAW_TEXT
;
3552 if (try & CODING_CATEGORY_MASK_ISO
)
3553 mask
|= detect_coding_iso2022 (src
, src_end
);
3554 if (try & CODING_CATEGORY_MASK_SJIS
)
3555 mask
|= detect_coding_sjis (src
, src_end
);
3556 if (try & CODING_CATEGORY_MASK_BIG5
)
3557 mask
|= detect_coding_big5 (src
, src_end
);
3558 if (try & CODING_CATEGORY_MASK_UTF_8
)
3559 mask
|= detect_coding_utf_8 (src
, src_end
);
3560 if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE
)
3561 mask
|= detect_coding_utf_16 (src
, src_end
);
3562 if (try & CODING_CATEGORY_MASK_EMACS_MULE
)
3563 mask
|= detect_coding_emacs_mule (src
, src_end
);
3564 if (try & CODING_CATEGORY_MASK_CCL
)
3565 mask
|= detect_coding_ccl (src
, src_end
);
3567 return (mask
| CODING_CATEGORY_MASK_RAW_TEXT
| CODING_CATEGORY_MASK_BINARY
);
3570 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3571 The information of the detected coding system is set in CODING. */
3574 detect_coding (coding
, src
, src_bytes
)
3575 struct coding_system
*coding
;
3583 val
= Vcoding_category_list
;
3584 mask
= detect_coding_mask (src
, src_bytes
, coding_priorities
, &skip
);
3585 coding
->heading_ascii
= skip
;
3589 /* We found a single coding system of the highest priority in MASK. */
3591 while (mask
&& ! (mask
& 1)) mask
>>= 1, idx
++;
3593 idx
= CODING_CATEGORY_IDX_RAW_TEXT
;
3595 val
= XSYMBOL (XVECTOR (Vcoding_category_table
)->contents
[idx
])->value
;
3597 if (coding
->eol_type
!= CODING_EOL_UNDECIDED
)
3601 tmp
= Fget (val
, Qeol_type
);
3603 val
= XVECTOR (tmp
)->contents
[coding
->eol_type
];
3606 /* Setup this new coding system while preserving some slots. */
3608 int src_multibyte
= coding
->src_multibyte
;
3609 int dst_multibyte
= coding
->dst_multibyte
;
3611 setup_coding_system (val
, coding
);
3612 coding
->src_multibyte
= src_multibyte
;
3613 coding
->dst_multibyte
= dst_multibyte
;
3614 coding
->heading_ascii
= skip
;
3618 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3619 SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3620 CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3622 How many non-eol characters are at the head is returned as *SKIP. */
3624 #define MAX_EOL_CHECK_COUNT 3
3627 detect_eol_type (source
, src_bytes
, skip
)
3628 unsigned char *source
;
3629 int src_bytes
, *skip
;
3631 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
3633 int total
= 0; /* How many end-of-lines are found so far. */
3634 int eol_type
= CODING_EOL_UNDECIDED
;
3639 while (src
< src_end
&& total
< MAX_EOL_CHECK_COUNT
)
3642 if (c
== '\n' || c
== '\r')
3645 *skip
= src
- 1 - source
;
3648 this_eol_type
= CODING_EOL_LF
;
3649 else if (src
>= src_end
|| *src
!= '\n')
3650 this_eol_type
= CODING_EOL_CR
;
3652 this_eol_type
= CODING_EOL_CRLF
, src
++;
3654 if (eol_type
== CODING_EOL_UNDECIDED
)
3655 /* This is the first end-of-line. */
3656 eol_type
= this_eol_type
;
3657 else if (eol_type
!= this_eol_type
)
3659 /* The found type is different from what found before. */
3660 eol_type
= CODING_EOL_INCONSISTENT
;
3667 *skip
= src_end
- source
;
3671 /* Like detect_eol_type, but detect EOL type in 2-octet
3672 big-endian/little-endian format for coding systems utf-16-be and
3676 detect_eol_type_in_2_octet_form (source
, src_bytes
, skip
, big_endian_p
)
3677 unsigned char *source
;
3678 int src_bytes
, *skip
;
3680 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
3681 unsigned int c1
, c2
;
3682 int total
= 0; /* How many end-of-lines are found so far. */
3683 int eol_type
= CODING_EOL_UNDECIDED
;
3694 while ((src
+ 1) < src_end
&& total
< MAX_EOL_CHECK_COUNT
)
3696 c1
= (src
[msb
] << 8) | (src
[lsb
]);
3699 if (c1
== '\n' || c1
== '\r')
3702 *skip
= src
- 2 - source
;
3706 this_eol_type
= CODING_EOL_LF
;
3710 if ((src
+ 1) >= src_end
)
3712 this_eol_type
= CODING_EOL_CR
;
3716 c2
= (src
[msb
] << 8) | (src
[lsb
]);
3718 this_eol_type
= CODING_EOL_CRLF
, src
+= 2;
3720 this_eol_type
= CODING_EOL_CR
;
3724 if (eol_type
== CODING_EOL_UNDECIDED
)
3725 /* This is the first end-of-line. */
3726 eol_type
= this_eol_type
;
3727 else if (eol_type
!= this_eol_type
)
3729 /* The found type is different from what found before. */
3730 eol_type
= CODING_EOL_INCONSISTENT
;
3737 *skip
= src_end
- source
;
3741 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3742 is encoded. If it detects an appropriate format of end-of-line, it
3743 sets the information in *CODING. */
3746 detect_eol (coding
, src
, src_bytes
)
3747 struct coding_system
*coding
;
3755 switch (coding
->category_idx
)
3757 case CODING_CATEGORY_IDX_UTF_16_BE
:
3758 eol_type
= detect_eol_type_in_2_octet_form (src
, src_bytes
, &skip
, 1);
3760 case CODING_CATEGORY_IDX_UTF_16_LE
:
3761 eol_type
= detect_eol_type_in_2_octet_form (src
, src_bytes
, &skip
, 0);
3764 eol_type
= detect_eol_type (src
, src_bytes
, &skip
);
3768 if (coding
->heading_ascii
> skip
)
3769 coding
->heading_ascii
= skip
;
3771 skip
= coding
->heading_ascii
;
3773 if (eol_type
== CODING_EOL_UNDECIDED
)
3775 if (eol_type
== CODING_EOL_INCONSISTENT
)
3778 /* This code is suppressed until we find a better way to
3779 distinguish raw text file and binary file. */
3781 /* If we have already detected that the coding is raw-text, the
3782 coding should actually be no-conversion. */
3783 if (coding
->type
== coding_type_raw_text
)
3785 setup_coding_system (Qno_conversion
, coding
);
3788 /* Else, let's decode only text code anyway. */
3790 eol_type
= CODING_EOL_LF
;
3793 val
= Fget (coding
->symbol
, Qeol_type
);
3794 if (VECTORP (val
) && XVECTOR (val
)->size
== 3)
3796 int src_multibyte
= coding
->src_multibyte
;
3797 int dst_multibyte
= coding
->dst_multibyte
;
3799 setup_coding_system (XVECTOR (val
)->contents
[eol_type
], coding
);
3800 coding
->src_multibyte
= src_multibyte
;
3801 coding
->dst_multibyte
= dst_multibyte
;
3802 coding
->heading_ascii
= skip
;
3806 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3808 #define DECODING_BUFFER_MAG(coding) \
3809 (coding->type == coding_type_iso2022 \
3811 : (coding->type == coding_type_ccl \
3812 ? coding->spec.ccl.decoder.buf_magnification \
3815 /* Return maximum size (bytes) of a buffer enough for decoding
3816 SRC_BYTES of text encoded in CODING. */
3819 decoding_buffer_size (coding
, src_bytes
)
3820 struct coding_system
*coding
;
3823 return (src_bytes
* DECODING_BUFFER_MAG (coding
)
3824 + CONVERSION_BUFFER_EXTRA_ROOM
);
3827 /* Return maximum size (bytes) of a buffer enough for encoding
3828 SRC_BYTES of text to CODING. */
3831 encoding_buffer_size (coding
, src_bytes
)
3832 struct coding_system
*coding
;
3837 if (coding
->type
== coding_type_ccl
)
3838 magnification
= coding
->spec
.ccl
.encoder
.buf_magnification
;
3839 else if (CODING_REQUIRE_ENCODING (coding
))
3844 return (src_bytes
* magnification
+ CONVERSION_BUFFER_EXTRA_ROOM
);
3847 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3848 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3851 char *conversion_buffer
;
3852 int conversion_buffer_size
;
3854 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3855 or decoding. Sufficient memory is allocated automatically. If we
3856 run out of memory, return NULL. */
3859 get_conversion_buffer (size
)
3862 if (size
> conversion_buffer_size
)
3865 int real_size
= conversion_buffer_size
* 2;
3867 while (real_size
< size
) real_size
*= 2;
3868 buf
= (char *) xmalloc (real_size
);
3869 xfree (conversion_buffer
);
3870 conversion_buffer
= buf
;
3871 conversion_buffer_size
= real_size
;
3873 return conversion_buffer
;
3877 ccl_coding_driver (coding
, source
, destination
, src_bytes
, dst_bytes
, encodep
)
3878 struct coding_system
*coding
;
3879 unsigned char *source
, *destination
;
3880 int src_bytes
, dst_bytes
, encodep
;
3882 struct ccl_program
*ccl
3883 = encodep
? &coding
->spec
.ccl
.encoder
: &coding
->spec
.ccl
.decoder
;
3886 ccl
->last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
3888 ccl
->eol_type
= coding
->eol_type
;
3889 coding
->produced
= ccl_driver (ccl
, source
, destination
,
3890 src_bytes
, dst_bytes
, &(coding
->consumed
));
3892 coding
->produced_char
= coding
->produced
;
3896 = dst_bytes
? dst_bytes
: source
+ coding
->consumed
- destination
;
3897 coding
->produced
= str_as_multibyte (destination
, bytes
,
3899 &(coding
->produced_char
));
3902 switch (ccl
->status
)
3904 case CCL_STAT_SUSPEND_BY_SRC
:
3905 result
= CODING_FINISH_INSUFFICIENT_SRC
;
3907 case CCL_STAT_SUSPEND_BY_DST
:
3908 result
= CODING_FINISH_INSUFFICIENT_DST
;
3911 case CCL_STAT_INVALID_CMD
:
3912 result
= CODING_FINISH_INTERRUPT
;
3915 result
= CODING_FINISH_NORMAL
;
3921 /* Decode EOL format of the text at PTR of BYTES length destructively
3922 according to CODING->eol_type. This is called after the CCL
3923 program produced a decoded text at PTR. If we do CRLF->LF
3924 conversion, update CODING->produced and CODING->produced_char. */
3927 decode_eol_post_ccl (coding
, ptr
, bytes
)
3928 struct coding_system
*coding
;
3932 Lisp_Object val
, saved_coding_symbol
;
3933 unsigned char *pend
= ptr
+ bytes
;
3936 /* Remember the current coding system symbol. We set it back when
3937 an inconsistent EOL is found so that `last-coding-system-used' is
3938 set to the coding system that doesn't specify EOL conversion. */
3939 saved_coding_symbol
= coding
->symbol
;
3941 coding
->spec
.ccl
.cr_carryover
= 0;
3942 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
3944 /* Here, to avoid the call of setup_coding_system, we directly
3945 call detect_eol_type. */
3946 coding
->eol_type
= detect_eol_type (ptr
, bytes
, &dummy
);
3947 if (coding
->eol_type
== CODING_EOL_INCONSISTENT
)
3948 coding
->eol_type
= CODING_EOL_LF
;
3949 if (coding
->eol_type
!= CODING_EOL_UNDECIDED
)
3951 val
= Fget (coding
->symbol
, Qeol_type
);
3952 if (VECTORP (val
) && XVECTOR (val
)->size
== 3)
3953 coding
->symbol
= XVECTOR (val
)->contents
[coding
->eol_type
];
3955 coding
->mode
|= CODING_MODE_INHIBIT_INCONSISTENT_EOL
;
3958 if (coding
->eol_type
== CODING_EOL_LF
3959 || coding
->eol_type
== CODING_EOL_UNDECIDED
)
3961 /* We have nothing to do. */
3964 else if (coding
->eol_type
== CODING_EOL_CRLF
)
3966 unsigned char *pstart
= ptr
, *p
= ptr
;
3968 if (! (coding
->mode
& CODING_MODE_LAST_BLOCK
)
3969 && *(pend
- 1) == '\r')
3971 /* If the last character is CR, we can't handle it here
3972 because LF will be in the not-yet-decoded source text.
3973 Recorded that the CR is not yet processed. */
3974 coding
->spec
.ccl
.cr_carryover
= 1;
3976 coding
->produced_char
--;
3983 if (ptr
+ 1 < pend
&& *(ptr
+ 1) == '\n')
3990 if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
3991 goto undo_eol_conversion
;
3995 else if (*ptr
== '\n'
3996 && coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
3997 goto undo_eol_conversion
;
4002 undo_eol_conversion
:
4003 /* We have faced with inconsistent EOL format at PTR.
4004 Convert all LFs before PTR back to CRLFs. */
4005 for (p
--, ptr
--; p
>= pstart
; p
--)
4008 *ptr
-- = '\n', *ptr
-- = '\r';
4012 /* If carryover is recorded, cancel it because we don't
4013 convert CRLF anymore. */
4014 if (coding
->spec
.ccl
.cr_carryover
)
4016 coding
->spec
.ccl
.cr_carryover
= 0;
4018 coding
->produced_char
++;
4022 coding
->eol_type
= CODING_EOL_LF
;
4023 coding
->symbol
= saved_coding_symbol
;
4027 /* As each two-byte sequence CRLF was converted to LF, (PEND
4028 - P) is the number of deleted characters. */
4029 coding
->produced
-= pend
- p
;
4030 coding
->produced_char
-= pend
- p
;
4033 else /* i.e. coding->eol_type == CODING_EOL_CR */
4035 unsigned char *p
= ptr
;
4037 for (; ptr
< pend
; ptr
++)
4041 else if (*ptr
== '\n'
4042 && coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
4044 for (; p
< ptr
; p
++)
4050 coding
->eol_type
= CODING_EOL_LF
;
4051 coding
->symbol
= saved_coding_symbol
;
4057 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
4058 decoding, it may detect coding system and format of end-of-line if
4059 those are not yet decided. The source should be unibyte, the
4060 result is multibyte if CODING->dst_multibyte is nonzero, else
4064 decode_coding (coding
, source
, destination
, src_bytes
, dst_bytes
)
4065 struct coding_system
*coding
;
4066 unsigned char *source
, *destination
;
4067 int src_bytes
, dst_bytes
;
4069 if (coding
->type
== coding_type_undecided
)
4070 detect_coding (coding
, source
, src_bytes
);
4072 if (coding
->eol_type
== CODING_EOL_UNDECIDED
4073 && coding
->type
!= coding_type_ccl
)
4074 detect_eol (coding
, source
, src_bytes
);
4076 coding
->produced
= coding
->produced_char
= 0;
4077 coding
->consumed
= coding
->consumed_char
= 0;
4079 coding
->result
= CODING_FINISH_NORMAL
;
4081 switch (coding
->type
)
4083 case coding_type_sjis
:
4084 decode_coding_sjis_big5 (coding
, source
, destination
,
4085 src_bytes
, dst_bytes
, 1);
4088 case coding_type_iso2022
:
4089 decode_coding_iso2022 (coding
, source
, destination
,
4090 src_bytes
, dst_bytes
);
4093 case coding_type_big5
:
4094 decode_coding_sjis_big5 (coding
, source
, destination
,
4095 src_bytes
, dst_bytes
, 0);
4098 case coding_type_emacs_mule
:
4099 decode_coding_emacs_mule (coding
, source
, destination
,
4100 src_bytes
, dst_bytes
);
4103 case coding_type_ccl
:
4104 if (coding
->spec
.ccl
.cr_carryover
)
4106 /* Set the CR which is not processed by the previous call of
4107 decode_eol_post_ccl in DESTINATION. */
4108 *destination
= '\r';
4110 coding
->produced_char
++;
4113 ccl_coding_driver (coding
, source
,
4114 destination
+ coding
->spec
.ccl
.cr_carryover
,
4115 src_bytes
, dst_bytes
, 0);
4116 if (coding
->eol_type
!= CODING_EOL_LF
)
4117 decode_eol_post_ccl (coding
, destination
, coding
->produced
);
4121 decode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
);
4124 if (coding
->result
== CODING_FINISH_INSUFFICIENT_SRC
4125 && coding
->consumed
== src_bytes
)
4126 coding
->result
= CODING_FINISH_NORMAL
;
4128 if (coding
->mode
& CODING_MODE_LAST_BLOCK
4129 && coding
->result
== CODING_FINISH_INSUFFICIENT_SRC
)
4131 unsigned char *src
= source
+ coding
->consumed
;
4132 unsigned char *dst
= destination
+ coding
->produced
;
4134 src_bytes
-= coding
->consumed
;
4136 if (COMPOSING_P (coding
))
4137 DECODE_COMPOSITION_END ('1');
4141 dst
+= CHAR_STRING (c
, dst
);
4142 coding
->produced_char
++;
4144 coding
->consumed
= coding
->consumed_char
= src
- source
;
4145 coding
->produced
= dst
- destination
;
4148 if (!coding
->dst_multibyte
)
4150 coding
->produced
= str_as_unibyte (destination
, coding
->produced
);
4151 coding
->produced_char
= coding
->produced
;
4154 return coding
->result
;
4157 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". The
4158 multibyteness of the source is CODING->src_multibyte, the
4159 multibyteness of the result is always unibyte. */
4162 encode_coding (coding
, source
, destination
, src_bytes
, dst_bytes
)
4163 struct coding_system
*coding
;
4164 unsigned char *source
, *destination
;
4165 int src_bytes
, dst_bytes
;
4167 coding
->produced
= coding
->produced_char
= 0;
4168 coding
->consumed
= coding
->consumed_char
= 0;
4170 coding
->result
= CODING_FINISH_NORMAL
;
4172 switch (coding
->type
)
4174 case coding_type_sjis
:
4175 encode_coding_sjis_big5 (coding
, source
, destination
,
4176 src_bytes
, dst_bytes
, 1);
4179 case coding_type_iso2022
:
4180 encode_coding_iso2022 (coding
, source
, destination
,
4181 src_bytes
, dst_bytes
);
4184 case coding_type_big5
:
4185 encode_coding_sjis_big5 (coding
, source
, destination
,
4186 src_bytes
, dst_bytes
, 0);
4189 case coding_type_emacs_mule
:
4190 encode_coding_emacs_mule (coding
, source
, destination
,
4191 src_bytes
, dst_bytes
);
4194 case coding_type_ccl
:
4195 ccl_coding_driver (coding
, source
, destination
,
4196 src_bytes
, dst_bytes
, 1);
4200 encode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
);
4203 if (coding
->result
== CODING_FINISH_INSUFFICIENT_SRC
4204 && coding
->consumed
== src_bytes
)
4205 coding
->result
= CODING_FINISH_NORMAL
;
4207 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
4209 unsigned char *src
= source
+ coding
->consumed
;
4210 unsigned char *src_end
= src
+ src_bytes
;
4211 unsigned char *dst
= destination
+ coding
->produced
;
4213 if (coding
->type
== coding_type_iso2022
)
4214 ENCODE_RESET_PLANE_AND_REGISTER
;
4215 if (COMPOSING_P (coding
))
4216 *dst
++ = ISO_CODE_ESC
, *dst
++ = '1';
4217 if (coding
->consumed
< src_bytes
)
4219 int len
= src_bytes
- coding
->consumed
;
4221 BCOPY_SHORT (source
+ coding
->consumed
, dst
, len
);
4222 if (coding
->src_multibyte
)
4223 len
= str_as_unibyte (dst
, len
);
4225 coding
->consumed
= src_bytes
;
4227 coding
->produced
= coding
->produced_char
= dst
- destination
;
4230 return coding
->result
;
4233 /* Scan text in the region between *BEG and *END (byte positions),
4234 skip characters which we don't have to decode by coding system
4235 CODING at the head and tail, then set *BEG and *END to the region
4236 of the text we actually have to convert. The caller should move
4237 the gap out of the region in advance if the region is from a
4240 If STR is not NULL, *BEG and *END are indices into STR. */
4243 shrink_decoding_region (beg
, end
, coding
, str
)
4245 struct coding_system
*coding
;
4248 unsigned char *begp_orig
, *begp
, *endp_orig
, *endp
, c
;
4250 Lisp_Object translation_table
;
4252 if (coding
->type
== coding_type_ccl
4253 || coding
->type
== coding_type_undecided
4254 || coding
->eol_type
!= CODING_EOL_LF
4255 || !NILP (coding
->post_read_conversion
)
4256 || coding
->composing
!= COMPOSITION_DISABLED
)
4258 /* We can't skip any data. */
4261 if (coding
->type
== coding_type_no_conversion
4262 || coding
->type
== coding_type_raw_text
4263 || coding
->type
== coding_type_emacs_mule
)
4265 /* We need no conversion, but don't have to skip any data here.
4266 Decoding routine handles them effectively anyway. */
4270 translation_table
= coding
->translation_table_for_decode
;
4271 if (NILP (translation_table
) && !NILP (Venable_character_translation
))
4272 translation_table
= Vstandard_translation_table_for_decode
;
4273 if (CHAR_TABLE_P (translation_table
))
4276 for (i
= 0; i
< 128; i
++)
4277 if (!NILP (CHAR_TABLE_REF (translation_table
, i
)))
4280 /* Some ASCII character should be translated. We give up
4285 if (coding
->heading_ascii
>= 0)
4286 /* Detection routine has already found how much we can skip at the
4288 *beg
+= coding
->heading_ascii
;
4292 begp_orig
= begp
= str
+ *beg
;
4293 endp_orig
= endp
= str
+ *end
;
4297 begp_orig
= begp
= BYTE_POS_ADDR (*beg
);
4298 endp_orig
= endp
= begp
+ *end
- *beg
;
4301 eol_conversion
= (coding
->eol_type
== CODING_EOL_CR
4302 || coding
->eol_type
== CODING_EOL_CRLF
);
4304 switch (coding
->type
)
4306 case coding_type_sjis
:
4307 case coding_type_big5
:
4308 /* We can skip all ASCII characters at the head. */
4309 if (coding
->heading_ascii
< 0)
4312 while (begp
< endp
&& *begp
< 0x80 && *begp
!= '\r') begp
++;
4314 while (begp
< endp
&& *begp
< 0x80) begp
++;
4316 /* We can skip all ASCII characters at the tail except for the
4317 second byte of SJIS or BIG5 code. */
4319 while (begp
< endp
&& endp
[-1] < 0x80 && endp
[-1] != '\r') endp
--;
4321 while (begp
< endp
&& endp
[-1] < 0x80) endp
--;
4322 /* Do not consider LF as ascii if preceded by CR, since that
4323 confuses eol decoding. */
4324 if (begp
< endp
&& endp
< endp_orig
&& endp
[-1] == '\r' && endp
[0] == '\n')
4326 if (begp
< endp
&& endp
< endp_orig
&& endp
[-1] >= 0x80)
4330 case coding_type_iso2022
:
4331 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, 0) != CHARSET_ASCII
)
4332 /* We can't skip any data. */
4334 if (coding
->heading_ascii
< 0)
4336 /* We can skip all ASCII characters at the head except for a
4337 few control codes. */
4338 while (begp
< endp
&& (c
= *begp
) < 0x80
4339 && c
!= ISO_CODE_CR
&& c
!= ISO_CODE_SO
4340 && c
!= ISO_CODE_SI
&& c
!= ISO_CODE_ESC
4341 && (!eol_conversion
|| c
!= ISO_CODE_LF
))
4344 switch (coding
->category_idx
)
4346 case CODING_CATEGORY_IDX_ISO_8_1
:
4347 case CODING_CATEGORY_IDX_ISO_8_2
:
4348 /* We can skip all ASCII characters at the tail. */
4350 while (begp
< endp
&& (c
= endp
[-1]) < 0x80 && c
!= '\r') endp
--;
4352 while (begp
< endp
&& endp
[-1] < 0x80) endp
--;
4353 /* Do not consider LF as ascii if preceded by CR, since that
4354 confuses eol decoding. */
4355 if (begp
< endp
&& endp
< endp_orig
&& endp
[-1] == '\r' && endp
[0] == '\n')
4359 case CODING_CATEGORY_IDX_ISO_7
:
4360 case CODING_CATEGORY_IDX_ISO_7_TIGHT
:
4362 /* We can skip all charactes at the tail except for 8-bit
4363 codes and ESC and the following 2-byte at the tail. */
4364 unsigned char *eight_bit
= NULL
;
4368 && (c
= endp
[-1]) != ISO_CODE_ESC
&& c
!= '\r')
4370 if (!eight_bit
&& c
& 0x80) eight_bit
= endp
;
4375 && (c
= endp
[-1]) != ISO_CODE_ESC
)
4377 if (!eight_bit
&& c
& 0x80) eight_bit
= endp
;
4380 /* Do not consider LF as ascii if preceded by CR, since that
4381 confuses eol decoding. */
4382 if (begp
< endp
&& endp
< endp_orig
4383 && endp
[-1] == '\r' && endp
[0] == '\n')
4385 if (begp
< endp
&& endp
[-1] == ISO_CODE_ESC
)
4387 if (endp
+ 1 < endp_orig
&& end
[0] == '(' && end
[1] == 'B')
4388 /* This is an ASCII designation sequence. We can
4389 surely skip the tail. But, if we have
4390 encountered an 8-bit code, skip only the codes
4392 endp
= eight_bit
? eight_bit
: endp
+ 2;
4394 /* Hmmm, we can't skip the tail. */
4406 *beg
+= begp
- begp_orig
;
4407 *end
+= endp
- endp_orig
;
4411 /* Like shrink_decoding_region but for encoding. */
4414 shrink_encoding_region (beg
, end
, coding
, str
)
4416 struct coding_system
*coding
;
4419 unsigned char *begp_orig
, *begp
, *endp_orig
, *endp
;
4421 Lisp_Object translation_table
;
4423 if (coding
->type
== coding_type_ccl
4424 || coding
->eol_type
== CODING_EOL_CRLF
4425 || coding
->eol_type
== CODING_EOL_CR
4426 || coding
->cmp_data
&& coding
->cmp_data
->used
> 0)
4428 /* We can't skip any data. */
4431 if (coding
->type
== coding_type_no_conversion
4432 || coding
->type
== coding_type_raw_text
4433 || coding
->type
== coding_type_emacs_mule
4434 || coding
->type
== coding_type_undecided
)
4436 /* We need no conversion, but don't have to skip any data here.
4437 Encoding routine handles them effectively anyway. */
4441 translation_table
= coding
->translation_table_for_encode
;
4442 if (NILP (translation_table
) && !NILP (Venable_character_translation
))
4443 translation_table
= Vstandard_translation_table_for_encode
;
4444 if (CHAR_TABLE_P (translation_table
))
4447 for (i
= 0; i
< 128; i
++)
4448 if (!NILP (CHAR_TABLE_REF (translation_table
, i
)))
4451 /* Some ASCII character should be tranlsated. We give up
4458 begp_orig
= begp
= str
+ *beg
;
4459 endp_orig
= endp
= str
+ *end
;
4463 begp_orig
= begp
= BYTE_POS_ADDR (*beg
);
4464 endp_orig
= endp
= begp
+ *end
- *beg
;
4467 eol_conversion
= (coding
->eol_type
== CODING_EOL_CR
4468 || coding
->eol_type
== CODING_EOL_CRLF
);
4470 /* Here, we don't have to check coding->pre_write_conversion because
4471 the caller is expected to have handled it already. */
4472 switch (coding
->type
)
4474 case coding_type_iso2022
:
4475 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, 0) != CHARSET_ASCII
)
4476 /* We can't skip any data. */
4478 if (coding
->flags
& CODING_FLAG_ISO_DESIGNATE_AT_BOL
)
4480 unsigned char *bol
= begp
;
4481 while (begp
< endp
&& *begp
< 0x80)
4484 if (begp
[-1] == '\n')
4488 goto label_skip_tail
;
4492 case coding_type_sjis
:
4493 case coding_type_big5
:
4494 /* We can skip all ASCII characters at the head and tail. */
4496 while (begp
< endp
&& *begp
< 0x80 && *begp
!= '\n') begp
++;
4498 while (begp
< endp
&& *begp
< 0x80) begp
++;
4501 while (begp
< endp
&& endp
[-1] < 0x80 && endp
[-1] != '\n') endp
--;
4503 while (begp
< endp
&& *(endp
- 1) < 0x80) endp
--;
4510 *beg
+= begp
- begp_orig
;
4511 *end
+= endp
- endp_orig
;
4515 /* As shrinking conversion region requires some overhead, we don't try
4516 shrinking if the length of conversion region is less than this
4518 static int shrink_conversion_region_threshhold
= 1024;
4520 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \
4522 if (*(end) - *(beg) > shrink_conversion_region_threshhold) \
4524 if (encodep) shrink_encoding_region (beg, end, coding, str); \
4525 else shrink_decoding_region (beg, end, coding, str); \
4530 code_convert_region_unwind (dummy
)
4533 inhibit_pre_post_conversion
= 0;
4537 /* Store information about all compositions in the range FROM and TO
4538 of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a
4539 buffer or a string, defaults to the current buffer. */
4542 coding_save_composition (coding
, from
, to
, obj
)
4543 struct coding_system
*coding
;
4550 if (coding
->composing
== COMPOSITION_DISABLED
)
4552 if (!coding
->cmp_data
)
4553 coding_allocate_composition_data (coding
, from
);
4554 if (!find_composition (from
, to
, &start
, &end
, &prop
, obj
)
4558 && (!find_composition (end
, to
, &start
, &end
, &prop
, obj
)
4561 coding
->composing
= COMPOSITION_NO
;
4564 if (COMPOSITION_VALID_P (start
, end
, prop
))
4566 enum composition_method method
= COMPOSITION_METHOD (prop
);
4567 if (coding
->cmp_data
->used
+ COMPOSITION_DATA_MAX_BUNCH_LENGTH
4568 >= COMPOSITION_DATA_SIZE
)
4569 coding_allocate_composition_data (coding
, from
);
4570 /* For relative composition, we remember start and end
4571 positions, for the other compositions, we also remember
4573 CODING_ADD_COMPOSITION_START (coding
, start
- from
, method
);
4574 if (method
!= COMPOSITION_RELATIVE
)
4576 /* We must store a*/
4577 Lisp_Object val
, ch
;
4579 val
= COMPOSITION_COMPONENTS (prop
);
4583 ch
= XCAR (val
), val
= XCDR (val
);
4584 CODING_ADD_COMPOSITION_COMPONENT (coding
, XINT (ch
));
4586 else if (VECTORP (val
) || STRINGP (val
))
4588 int len
= (VECTORP (val
)
4589 ? XVECTOR (val
)->size
: XSTRING (val
)->size
);
4591 for (i
= 0; i
< len
; i
++)
4594 ? Faref (val
, make_number (i
))
4595 : XVECTOR (val
)->contents
[i
]);
4596 CODING_ADD_COMPOSITION_COMPONENT (coding
, XINT (ch
));
4599 else /* INTEGERP (val) */
4600 CODING_ADD_COMPOSITION_COMPONENT (coding
, XINT (val
));
4602 CODING_ADD_COMPOSITION_END (coding
, end
- from
);
4607 && find_composition (start
, to
, &start
, &end
, &prop
, obj
)
4610 /* Make coding->cmp_data point to the first memory block. */
4611 while (coding
->cmp_data
->prev
)
4612 coding
->cmp_data
= coding
->cmp_data
->prev
;
4613 coding
->cmp_data_start
= 0;
4616 /* Reflect the saved information about compositions to OBJ.
4617 CODING->cmp_data points to a memory block for the informaiton. OBJ
4618 is a buffer or a string, defaults to the current buffer. */
4621 coding_restore_composition (coding
, obj
)
4622 struct coding_system
*coding
;
4625 struct composition_data
*cmp_data
= coding
->cmp_data
;
4630 while (cmp_data
->prev
)
4631 cmp_data
= cmp_data
->prev
;
4637 for (i
= 0; i
< cmp_data
->used
; i
+= cmp_data
->data
[i
])
4639 int *data
= cmp_data
->data
+ i
;
4640 enum composition_method method
= (enum composition_method
) data
[3];
4641 Lisp_Object components
;
4643 if (method
== COMPOSITION_RELATIVE
)
4647 int len
= data
[0] - 4, j
;
4648 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
4650 for (j
= 0; j
< len
; j
++)
4651 args
[j
] = make_number (data
[4 + j
]);
4652 components
= (method
== COMPOSITION_WITH_ALTCHARS
4653 ? Fstring (len
, args
) : Fvector (len
, args
));
4655 compose_text (data
[1], data
[2], components
, Qnil
, obj
);
4657 cmp_data
= cmp_data
->next
;
4661 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4662 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4663 coding system CODING, and return the status code of code conversion
4664 (currently, this value has no meaning).
4666 How many characters (and bytes) are converted to how many
4667 characters (and bytes) are recorded in members of the structure
4670 If REPLACE is nonzero, we do various things as if the original text
4671 is deleted and a new text is inserted. See the comments in
4672 replace_range (insdel.c) to know what we are doing.
4674 If REPLACE is zero, it is assumed that the source text is unibyte.
4675 Otherwize, it is assumed that the source text is multibyte. */
4678 code_convert_region (from
, from_byte
, to
, to_byte
, coding
, encodep
, replace
)
4679 int from
, from_byte
, to
, to_byte
, encodep
, replace
;
4680 struct coding_system
*coding
;
4682 int len
= to
- from
, len_byte
= to_byte
- from_byte
;
4683 int require
, inserted
, inserted_byte
;
4684 int head_skip
, tail_skip
, total_skip
= 0;
4685 Lisp_Object saved_coding_symbol
;
4687 unsigned char *src
, *dst
;
4688 Lisp_Object deletion
;
4689 int orig_point
= PT
, orig_len
= len
;
4691 int multibyte_p
= !NILP (current_buffer
->enable_multibyte_characters
);
4693 coding
->src_multibyte
= replace
&& multibyte_p
;
4694 coding
->dst_multibyte
= multibyte_p
;
4697 saved_coding_symbol
= Qnil
;
4699 if (from
< PT
&& PT
< to
)
4701 TEMP_SET_PT_BOTH (from
, from_byte
);
4707 int saved_from
= from
;
4709 prepare_to_modify_buffer (from
, to
, &from
);
4710 if (saved_from
!= from
)
4713 from_byte
= CHAR_TO_BYTE (from
), to_byte
= CHAR_TO_BYTE (to
);
4714 len_byte
= to_byte
- from_byte
;
4718 if (! encodep
&& CODING_REQUIRE_DETECTION (coding
))
4720 /* We must detect encoding of text and eol format. */
4722 if (from
< GPT
&& to
> GPT
)
4723 move_gap_both (from
, from_byte
);
4724 if (coding
->type
== coding_type_undecided
)
4726 detect_coding (coding
, BYTE_POS_ADDR (from_byte
), len_byte
);
4727 if (coding
->type
== coding_type_undecided
)
4728 /* It seems that the text contains only ASCII, but we
4729 should not left it undecided because the deeper
4730 decoding routine (decode_coding) tries to detect the
4731 encodings again in vain. */
4732 coding
->type
= coding_type_emacs_mule
;
4734 if (coding
->eol_type
== CODING_EOL_UNDECIDED
4735 && coding
->type
!= coding_type_ccl
)
4737 saved_coding_symbol
= coding
->symbol
;
4738 detect_eol (coding
, BYTE_POS_ADDR (from_byte
), len_byte
);
4739 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
4740 coding
->eol_type
= CODING_EOL_LF
;
4741 /* We had better recover the original eol format if we
4742 encounter an inconsitent eol format while decoding. */
4743 coding
->mode
|= CODING_MODE_INHIBIT_INCONSISTENT_EOL
;
4747 /* Now we convert the text. */
4749 /* For encoding, we must process pre-write-conversion in advance. */
4750 if (! inhibit_pre_post_conversion
4752 && SYMBOLP (coding
->pre_write_conversion
)
4753 && ! NILP (Ffboundp (coding
->pre_write_conversion
)))
4755 /* The function in pre-write-conversion may put a new text in a
4757 struct buffer
*prev
= current_buffer
;
4759 int count
= specpdl_ptr
- specpdl
;
4761 record_unwind_protect (code_convert_region_unwind
, Qnil
);
4762 /* We should not call any more pre-write/post-read-conversion
4763 functions while this pre-write-conversion is running. */
4764 inhibit_pre_post_conversion
= 1;
4765 call2 (coding
->pre_write_conversion
,
4766 make_number (from
), make_number (to
));
4767 inhibit_pre_post_conversion
= 0;
4768 /* Discard the unwind protect. */
4771 if (current_buffer
!= prev
)
4774 new = Fcurrent_buffer ();
4775 set_buffer_internal_1 (prev
);
4776 del_range_2 (from
, from_byte
, to
, to_byte
, 0);
4777 TEMP_SET_PT_BOTH (from
, from_byte
);
4778 insert_from_buffer (XBUFFER (new), 1, len
, 0);
4780 if (orig_point
>= to
)
4781 orig_point
+= len
- orig_len
;
4782 else if (orig_point
> from
)
4786 from_byte
= CHAR_TO_BYTE (from
);
4787 to_byte
= CHAR_TO_BYTE (to
);
4788 len_byte
= to_byte
- from_byte
;
4789 TEMP_SET_PT_BOTH (from
, from_byte
);
4794 deletion
= make_buffer_string_both (from
, from_byte
, to
, to_byte
, 1);
4796 if (coding
->composing
!= COMPOSITION_DISABLED
)
4799 coding_save_composition (coding
, from
, to
, Fcurrent_buffer ());
4801 coding_allocate_composition_data (coding
, from
);
4804 /* Try to skip the heading and tailing ASCIIs. */
4805 if (coding
->type
!= coding_type_ccl
)
4807 int from_byte_orig
= from_byte
, to_byte_orig
= to_byte
;
4809 if (from
< GPT
&& GPT
< to
)
4810 move_gap_both (from
, from_byte
);
4811 SHRINK_CONVERSION_REGION (&from_byte
, &to_byte
, coding
, NULL
, encodep
);
4812 if (from_byte
== to_byte
4813 && (encodep
|| NILP (coding
->post_read_conversion
))
4814 && ! CODING_REQUIRE_FLUSHING (coding
))
4816 coding
->produced
= len_byte
;
4817 coding
->produced_char
= len
;
4819 /* We must record and adjust for this new text now. */
4820 adjust_after_insert (from
, from_byte_orig
, to
, to_byte_orig
, len
);
4824 head_skip
= from_byte
- from_byte_orig
;
4825 tail_skip
= to_byte_orig
- to_byte
;
4826 total_skip
= head_skip
+ tail_skip
;
4829 len
-= total_skip
; len_byte
-= total_skip
;
4832 /* The code conversion routine can not preserve text properties for
4833 now. So, we must remove all text properties in the region.
4834 Here, we must suppress all modification hooks. */
4837 int saved_inhibit_modification_hooks
= inhibit_modification_hooks
;
4838 inhibit_modification_hooks
= 1;
4839 Fset_text_properties (make_number (from
), make_number (to
), Qnil
, Qnil
);
4840 inhibit_modification_hooks
= saved_inhibit_modification_hooks
;
4843 /* For converion, we must put the gap before the text in addition to
4844 making the gap larger for efficient decoding. The required gap
4845 size starts from 2000 which is the magic number used in make_gap.
4846 But, after one batch of conversion, it will be incremented if we
4847 find that it is not enough . */
4850 if (GAP_SIZE
< require
)
4851 make_gap (require
- GAP_SIZE
);
4852 move_gap_both (from
, from_byte
);
4854 inserted
= inserted_byte
= 0;
4856 GAP_SIZE
+= len_byte
;
4859 ZV_BYTE
-= len_byte
;
4862 if (GPT
- BEG
< BEG_UNCHANGED
)
4863 BEG_UNCHANGED
= GPT
- BEG
;
4864 if (Z
- GPT
< END_UNCHANGED
)
4865 END_UNCHANGED
= Z
- GPT
;
4867 if (!encodep
&& coding
->src_multibyte
)
4869 /* Decoding routines expects that the source text is unibyte.
4870 We must convert 8-bit characters of multibyte form to
4872 int len_byte_orig
= len_byte
;
4873 len_byte
= str_as_unibyte (GAP_END_ADDR
- len_byte
, len_byte
);
4874 if (len_byte
< len_byte_orig
)
4875 safe_bcopy (GAP_END_ADDR
- len_byte_orig
, GAP_END_ADDR
- len_byte
,
4877 coding
->src_multibyte
= 0;
4884 /* The buffer memory is now:
4885 +--------+converted-text+---------+-------original-text-------+---+
4886 |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
4887 |<---------------------- GAP ----------------------->| */
4888 src
= GAP_END_ADDR
- len_byte
;
4889 dst
= GPT_ADDR
+ inserted_byte
;
4892 result
= encode_coding (coding
, src
, dst
, len_byte
, 0);
4894 result
= decode_coding (coding
, src
, dst
, len_byte
, 0);
4896 /* The buffer memory is now:
4897 +--------+-------converted-text----+--+------original-text----+---+
4898 |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
4899 |<---------------------- GAP ----------------------->| */
4901 inserted
+= coding
->produced_char
;
4902 inserted_byte
+= coding
->produced
;
4903 len_byte
-= coding
->consumed
;
4905 if (result
== CODING_FINISH_INSUFFICIENT_CMP
)
4907 coding_allocate_composition_data (coding
, from
+ inserted
);
4911 src
+= coding
->consumed
;
4912 dst
+= coding
->produced
;
4914 if (result
== CODING_FINISH_NORMAL
)
4919 if (! encodep
&& result
== CODING_FINISH_INCONSISTENT_EOL
)
4921 unsigned char *pend
= dst
, *p
= pend
- inserted_byte
;
4922 Lisp_Object eol_type
;
4924 /* Encode LFs back to the original eol format (CR or CRLF). */
4925 if (coding
->eol_type
== CODING_EOL_CR
)
4927 while (p
< pend
) if (*p
++ == '\n') p
[-1] = '\r';
4933 while (p
< pend
) if (*p
++ == '\n') count
++;
4934 if (src
- dst
< count
)
4936 /* We don't have sufficient room for encoding LFs
4937 back to CRLF. We must record converted and
4938 not-yet-converted text back to the buffer
4939 content, enlarge the gap, then record them out of
4940 the buffer contents again. */
4941 int add
= len_byte
+ inserted_byte
;
4944 ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
4945 GPT
+= inserted_byte
; GPT_BYTE
+= inserted_byte
;
4946 make_gap (count
- GAP_SIZE
);
4948 ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
4949 GPT
-= inserted_byte
; GPT_BYTE
-= inserted_byte
;
4950 /* Don't forget to update SRC, DST, and PEND. */
4951 src
= GAP_END_ADDR
- len_byte
;
4952 dst
= GPT_ADDR
+ inserted_byte
;
4956 inserted_byte
+= count
;
4957 coding
->produced
+= count
;
4958 p
= dst
= pend
+ count
;
4962 if (*p
== '\n') count
--, *--p
= '\r';
4966 /* Suppress eol-format conversion in the further conversion. */
4967 coding
->eol_type
= CODING_EOL_LF
;
4969 /* Set the coding system symbol to that for Unix-like EOL. */
4970 eol_type
= Fget (saved_coding_symbol
, Qeol_type
);
4971 if (VECTORP (eol_type
)
4972 && XVECTOR (eol_type
)->size
== 3
4973 && SYMBOLP (XVECTOR (eol_type
)->contents
[CODING_EOL_LF
]))
4974 coding
->symbol
= XVECTOR (eol_type
)->contents
[CODING_EOL_LF
];
4976 coding
->symbol
= saved_coding_symbol
;
4982 if (coding
->type
!= coding_type_ccl
4983 || coding
->mode
& CODING_MODE_LAST_BLOCK
)
4985 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
4988 if (result
== CODING_FINISH_INSUFFICIENT_SRC
)
4990 /* The source text ends in invalid codes. Let's just
4991 make them valid buffer contents, and finish conversion. */
4992 inserted
+= len_byte
;
4993 inserted_byte
+= len_byte
;
4998 if (result
== CODING_FINISH_INTERRUPT
)
5000 /* The conversion procedure was interrupted by a user. */
5003 /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */
5004 if (coding
->consumed
< 1)
5006 /* It's quite strange to require more memory without
5007 consuming any bytes. Perhaps CCL program bug. */
5012 /* We have just done the first batch of conversion which was
5013 stoped because of insufficient gap. Let's reconsider the
5014 required gap size (i.e. SRT - DST) now.
5016 We have converted ORIG bytes (== coding->consumed) into
5017 NEW bytes (coding->produced). To convert the remaining
5018 LEN bytes, we may need REQUIRE bytes of gap, where:
5019 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5020 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5021 Here, we are sure that NEW >= ORIG. */
5022 float ratio
= coding
->produced
- coding
->consumed
;
5023 ratio
/= coding
->consumed
;
5024 require
= len_byte
* ratio
;
5027 if ((src
- dst
) < (require
+ 2000))
5029 /* See the comment above the previous call of make_gap. */
5030 int add
= len_byte
+ inserted_byte
;
5033 ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
5034 GPT
+= inserted_byte
; GPT_BYTE
+= inserted_byte
;
5035 make_gap (require
+ 2000);
5037 ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
5038 GPT
-= inserted_byte
; GPT_BYTE
-= inserted_byte
;
5041 if (src
- dst
> 0) *dst
= 0; /* Put an anchor. */
5043 if (encodep
&& coding
->dst_multibyte
)
5045 /* The output is unibyte. We must convert 8-bit characters to
5047 if (inserted_byte
* 2 > GAP_SIZE
)
5049 GAP_SIZE
-= inserted_byte
;
5050 ZV
+= inserted_byte
; Z
+= inserted_byte
;
5051 ZV_BYTE
+= inserted_byte
; Z_BYTE
+= inserted_byte
;
5052 GPT
+= inserted_byte
; GPT_BYTE
+= inserted_byte
;
5053 make_gap (inserted_byte
- GAP_SIZE
);
5054 GAP_SIZE
+= inserted_byte
;
5055 ZV
-= inserted_byte
; Z
-= inserted_byte
;
5056 ZV_BYTE
-= inserted_byte
; Z_BYTE
-= inserted_byte
;
5057 GPT
-= inserted_byte
; GPT_BYTE
-= inserted_byte
;
5059 inserted_byte
= str_to_multibyte (GPT_ADDR
, GAP_SIZE
, inserted_byte
);
5062 /* If we have shrinked the conversion area, adjust it now. */
5066 safe_bcopy (GAP_END_ADDR
, GPT_ADDR
+ inserted_byte
, tail_skip
);
5067 inserted
+= total_skip
; inserted_byte
+= total_skip
;
5068 GAP_SIZE
+= total_skip
;
5069 GPT
-= head_skip
; GPT_BYTE
-= head_skip
;
5070 ZV
-= total_skip
; ZV_BYTE
-= total_skip
;
5071 Z
-= total_skip
; Z_BYTE
-= total_skip
;
5072 from
-= head_skip
; from_byte
-= head_skip
;
5073 to
+= tail_skip
; to_byte
+= tail_skip
;
5077 adjust_after_replace (from
, from_byte
, deletion
, inserted
, inserted_byte
);
5078 inserted
= Z
- prev_Z
;
5080 if (!encodep
&& coding
->cmp_data
&& coding
->cmp_data
->used
)
5081 coding_restore_composition (coding
, Fcurrent_buffer ());
5082 coding_free_composition_data (coding
);
5084 if (! inhibit_pre_post_conversion
5085 && ! encodep
&& ! NILP (coding
->post_read_conversion
))
5088 int count
= specpdl_ptr
- specpdl
;
5091 TEMP_SET_PT_BOTH (from
, from_byte
);
5093 record_unwind_protect (code_convert_region_unwind
, Qnil
);
5094 /* We should not call any more pre-write/post-read-conversion
5095 functions while this post-read-conversion is running. */
5096 inhibit_pre_post_conversion
= 1;
5097 val
= call1 (coding
->post_read_conversion
, make_number (inserted
));
5098 inhibit_pre_post_conversion
= 0;
5099 /* Discard the unwind protect. */
5101 CHECK_NUMBER (val
, 0);
5102 inserted
+= Z
- prev_Z
;
5105 if (orig_point
>= from
)
5107 if (orig_point
>= from
+ orig_len
)
5108 orig_point
+= inserted
- orig_len
;
5111 TEMP_SET_PT (orig_point
);
5116 signal_after_change (from
, to
- from
, inserted
);
5117 update_compositions (from
, from
+ inserted
, CHECK_BORDER
);
5121 coding
->consumed
= to_byte
- from_byte
;
5122 coding
->consumed_char
= to
- from
;
5123 coding
->produced
= inserted_byte
;
5124 coding
->produced_char
= inserted
;
5131 run_pre_post_conversion_on_str (str
, coding
, encodep
)
5133 struct coding_system
*coding
;
5136 int count
= specpdl_ptr
- specpdl
;
5137 struct gcpro gcpro1
;
5138 struct buffer
*prev
= current_buffer
;
5139 int multibyte
= STRING_MULTIBYTE (str
);
5141 record_unwind_protect (Fset_buffer
, Fcurrent_buffer ());
5142 record_unwind_protect (code_convert_region_unwind
, Qnil
);
5144 temp_output_buffer_setup (" *code-converting-work*");
5145 set_buffer_internal (XBUFFER (Vstandard_output
));
5146 /* We must insert the contents of STR as is without
5147 unibyte<->multibyte conversion. For that, we adjust the
5148 multibyteness of the working buffer to that of STR. */
5150 current_buffer
->enable_multibyte_characters
= multibyte
? Qt
: Qnil
;
5151 insert_from_string (str
, 0, 0,
5152 XSTRING (str
)->size
, STRING_BYTES (XSTRING (str
)), 0);
5154 inhibit_pre_post_conversion
= 1;
5156 call2 (coding
->pre_write_conversion
, make_number (BEG
), make_number (Z
));
5159 TEMP_SET_PT_BOTH (BEG
, BEG_BYTE
);
5160 call1 (coding
->post_read_conversion
, make_number (Z
- BEG
));
5162 inhibit_pre_post_conversion
= 0;
5163 str
= make_buffer_string (BEG
, Z
, 0);
5164 return unbind_to (count
, str
);
5168 decode_coding_string (str
, coding
, nocopy
)
5170 struct coding_system
*coding
;
5175 int from
, to
, to_byte
;
5176 struct gcpro gcpro1
;
5177 Lisp_Object saved_coding_symbol
;
5181 to
= XSTRING (str
)->size
;
5182 to_byte
= STRING_BYTES (XSTRING (str
));
5184 saved_coding_symbol
= Qnil
;
5185 if (CODING_REQUIRE_DETECTION (coding
))
5187 /* See the comments in code_convert_region. */
5188 if (coding
->type
== coding_type_undecided
)
5190 detect_coding (coding
, XSTRING (str
)->data
, to_byte
);
5191 if (coding
->type
== coding_type_undecided
)
5192 coding
->type
= coding_type_emacs_mule
;
5194 if (coding
->eol_type
== CODING_EOL_UNDECIDED
5195 && coding
->type
!= coding_type_ccl
)
5197 saved_coding_symbol
= coding
->symbol
;
5198 detect_eol (coding
, XSTRING (str
)->data
, to_byte
);
5199 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
5200 coding
->eol_type
= CODING_EOL_LF
;
5201 /* We had better recover the original eol format if we
5202 encounter an inconsitent eol format while decoding. */
5203 coding
->mode
|= CODING_MODE_INHIBIT_INCONSISTENT_EOL
;
5207 if (! CODING_REQUIRE_DECODING (coding
))
5209 if (!STRING_MULTIBYTE (str
))
5211 str
= Fstring_as_multibyte (str
);
5214 return (nocopy
? str
: Fcopy_sequence (str
));
5217 if (STRING_MULTIBYTE (str
))
5219 /* Decoding routines expect the source text to be unibyte. */
5220 str
= Fstring_as_unibyte (str
);
5221 to_byte
= STRING_BYTES (XSTRING (str
));
5223 coding
->src_multibyte
= 0;
5225 coding
->dst_multibyte
= 1;
5227 if (coding
->composing
!= COMPOSITION_DISABLED
)
5228 coding_allocate_composition_data (coding
, from
);
5230 /* Try to skip the heading and tailing ASCIIs. */
5231 if (coding
->type
!= coding_type_ccl
)
5233 int from_orig
= from
;
5235 SHRINK_CONVERSION_REGION (&from
, &to_byte
, coding
, XSTRING (str
)->data
,
5237 if (from
== to_byte
)
5238 return (nocopy
? str
: Fcopy_sequence (str
));
5241 len
= decoding_buffer_size (coding
, to_byte
- from
);
5242 len
+= from
+ STRING_BYTES (XSTRING (str
)) - to_byte
;
5244 buf
= get_conversion_buffer (len
);
5248 bcopy (XSTRING (str
)->data
, buf
, from
);
5249 result
= decode_coding (coding
, XSTRING (str
)->data
+ from
,
5250 buf
+ from
, to_byte
- from
, len
);
5251 if (result
== CODING_FINISH_INCONSISTENT_EOL
)
5253 /* We simply try to decode the whole string again but without
5254 eol-conversion this time. */
5255 coding
->eol_type
= CODING_EOL_LF
;
5256 coding
->symbol
= saved_coding_symbol
;
5257 coding_free_composition_data (coding
);
5258 return decode_coding_string (str
, coding
, nocopy
);
5261 bcopy (XSTRING (str
)->data
+ to_byte
, buf
+ from
+ coding
->produced
,
5262 STRING_BYTES (XSTRING (str
)) - to_byte
);
5264 len
= from
+ STRING_BYTES (XSTRING (str
)) - to_byte
;
5265 str
= make_multibyte_string (buf
, len
+ coding
->produced_char
,
5266 len
+ coding
->produced
);
5268 if (coding
->cmp_data
&& coding
->cmp_data
->used
)
5269 coding_restore_composition (coding
, str
);
5270 coding_free_composition_data (coding
);
5272 if (SYMBOLP (coding
->post_read_conversion
)
5273 && !NILP (Ffboundp (coding
->post_read_conversion
)))
5274 str
= run_pre_post_conversion_on_str (str
, coding
, 0);
5280 encode_coding_string (str
, coding
, nocopy
)
5282 struct coding_system
*coding
;
5287 int from
, to
, to_byte
;
5288 struct gcpro gcpro1
;
5289 Lisp_Object saved_coding_symbol
;
5292 if (SYMBOLP (coding
->pre_write_conversion
)
5293 && !NILP (Ffboundp (coding
->pre_write_conversion
)))
5294 str
= run_pre_post_conversion_on_str (str
, coding
, 1);
5297 to
= XSTRING (str
)->size
;
5298 to_byte
= STRING_BYTES (XSTRING (str
));
5300 saved_coding_symbol
= Qnil
;
5301 if (! CODING_REQUIRE_ENCODING (coding
))
5303 if (STRING_MULTIBYTE (str
))
5305 str
= Fstring_as_unibyte (str
);
5308 return (nocopy
? str
: Fcopy_sequence (str
));
5311 /* Encoding routines determine the multibyteness of the source text
5312 by coding->src_multibyte. */
5313 coding
->src_multibyte
= STRING_MULTIBYTE (str
);
5314 coding
->dst_multibyte
= 0;
5316 if (coding
->composing
!= COMPOSITION_DISABLED
)
5317 coding_save_composition (coding
, from
, to
, str
);
5319 /* Try to skip the heading and tailing ASCIIs. */
5320 if (coding
->type
!= coding_type_ccl
)
5322 int from_orig
= from
;
5324 SHRINK_CONVERSION_REGION (&from
, &to_byte
, coding
, XSTRING (str
)->data
,
5326 if (from
== to_byte
)
5327 return (nocopy
? str
: Fcopy_sequence (str
));
5330 len
= encoding_buffer_size (coding
, to_byte
- from
);
5331 len
+= from
+ STRING_BYTES (XSTRING (str
)) - to_byte
;
5333 buf
= get_conversion_buffer (len
);
5337 bcopy (XSTRING (str
)->data
, buf
, from
);
5338 result
= encode_coding (coding
, XSTRING (str
)->data
+ from
,
5339 buf
+ from
, to_byte
- from
, len
);
5340 bcopy (XSTRING (str
)->data
+ to_byte
, buf
+ from
+ coding
->produced
,
5341 STRING_BYTES (XSTRING (str
)) - to_byte
);
5343 len
= from
+ STRING_BYTES (XSTRING (str
)) - to_byte
;
5344 str
= make_unibyte_string (buf
, len
+ coding
->produced
);
5345 coding_free_composition_data (coding
);
5352 /*** 8. Emacs Lisp library functions ***/
5354 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
5355 "Return t if OBJECT is nil or a coding-system.\n\
5356 See the documentation of `make-coding-system' for information\n\
5357 about coding-system objects.")
5365 /* Get coding-spec vector for OBJ. */
5366 obj
= Fget (obj
, Qcoding_system
);
5367 return ((VECTORP (obj
) && XVECTOR (obj
)->size
== 5)
5371 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
5372 Sread_non_nil_coding_system
, 1, 1, 0,
5373 "Read a coding system from the minibuffer, prompting with string PROMPT.")
5380 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
5381 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
5383 while (XSTRING (val
)->size
== 0);
5384 return (Fintern (val
, Qnil
));
5387 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
5388 "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5389 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5390 (prompt
, default_coding_system
)
5391 Lisp_Object prompt
, default_coding_system
;
5394 if (SYMBOLP (default_coding_system
))
5395 XSETSTRING (default_coding_system
, XSYMBOL (default_coding_system
)->name
);
5396 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
5397 Qt
, Qnil
, Qcoding_system_history
,
5398 default_coding_system
, Qnil
);
5399 return (XSTRING (val
)->size
== 0 ? Qnil
: Fintern (val
, Qnil
));
5402 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
5404 "Check validity of CODING-SYSTEM.\n\
5405 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5406 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
5407 The value of property should be a vector of length 5.")
5409 Lisp_Object coding_system
;
5411 CHECK_SYMBOL (coding_system
, 0);
5412 if (!NILP (Fcoding_system_p (coding_system
)))
5413 return coding_system
;
5415 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
5419 detect_coding_system (src
, src_bytes
, highest
)
5421 int src_bytes
, highest
;
5423 int coding_mask
, eol_type
;
5424 Lisp_Object val
, tmp
;
5427 coding_mask
= detect_coding_mask (src
, src_bytes
, NULL
, &dummy
);
5428 eol_type
= detect_eol_type (src
, src_bytes
, &dummy
);
5429 if (eol_type
== CODING_EOL_INCONSISTENT
)
5430 eol_type
= CODING_EOL_UNDECIDED
;
5435 if (eol_type
!= CODING_EOL_UNDECIDED
)
5438 val2
= Fget (Qundecided
, Qeol_type
);
5440 val
= XVECTOR (val2
)->contents
[eol_type
];
5442 return (highest
? val
: Fcons (val
, Qnil
));
5445 /* At first, gather possible coding systems in VAL. */
5447 for (tmp
= Vcoding_category_list
; CONSP (tmp
); tmp
= XCDR (tmp
))
5449 Lisp_Object category_val
, category_index
;
5451 category_index
= Fget (XCAR (tmp
), Qcoding_category_index
);
5452 category_val
= Fsymbol_value (XCAR (tmp
));
5453 if (!NILP (category_val
)
5454 && NATNUMP (category_index
)
5455 && (coding_mask
& (1 << XFASTINT (category_index
))))
5457 val
= Fcons (category_val
, val
);
5463 val
= Fnreverse (val
);
5465 /* Then, replace the elements with subsidiary coding systems. */
5466 for (tmp
= val
; CONSP (tmp
); tmp
= XCDR (tmp
))
5468 if (eol_type
!= CODING_EOL_UNDECIDED
5469 && eol_type
!= CODING_EOL_INCONSISTENT
)
5472 eol
= Fget (XCAR (tmp
), Qeol_type
);
5474 XCAR (tmp
) = XVECTOR (eol
)->contents
[eol_type
];
5477 return (highest
? XCAR (val
) : val
);
5480 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
5482 "Detect coding system of the text in the region between START and END.\n\
5483 Return a list of possible coding systems ordered by priority.\n\
5485 If only ASCII characters are found, it returns a list of single element\n\
5486 `undecided' or its subsidiary coding system according to a detected\n\
5487 end-of-line format.\n\
5489 If optional argument HIGHEST is non-nil, return the coding system of\n\
5491 (start
, end
, highest
)
5492 Lisp_Object start
, end
, highest
;
5495 int from_byte
, to_byte
;
5497 CHECK_NUMBER_COERCE_MARKER (start
, 0);
5498 CHECK_NUMBER_COERCE_MARKER (end
, 1);
5500 validate_region (&start
, &end
);
5501 from
= XINT (start
), to
= XINT (end
);
5502 from_byte
= CHAR_TO_BYTE (from
);
5503 to_byte
= CHAR_TO_BYTE (to
);
5505 if (from
< GPT
&& to
>= GPT
)
5506 move_gap_both (to
, to_byte
);
5508 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
5509 to_byte
- from_byte
,
5513 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
5515 "Detect coding system of the text in STRING.\n\
5516 Return a list of possible coding systems ordered by priority.\n\
5518 If only ASCII characters are found, it returns a list of single element\n\
5519 `undecided' or its subsidiary coding system according to a detected\n\
5520 end-of-line format.\n\
5522 If optional argument HIGHEST is non-nil, return the coding system of\n\
5525 Lisp_Object string
, highest
;
5527 CHECK_STRING (string
, 0);
5529 return detect_coding_system (XSTRING (string
)->data
,
5530 STRING_BYTES (XSTRING (string
)),
5535 code_convert_region1 (start
, end
, coding_system
, encodep
)
5536 Lisp_Object start
, end
, coding_system
;
5539 struct coding_system coding
;
5542 CHECK_NUMBER_COERCE_MARKER (start
, 0);
5543 CHECK_NUMBER_COERCE_MARKER (end
, 1);
5544 CHECK_SYMBOL (coding_system
, 2);
5546 validate_region (&start
, &end
);
5547 from
= XFASTINT (start
);
5548 to
= XFASTINT (end
);
5550 if (NILP (coding_system
))
5551 return make_number (to
- from
);
5553 if (setup_coding_system (Fcheck_coding_system (coding_system
), &coding
) < 0)
5554 error ("Invalid coding system: %s", XSYMBOL (coding_system
)->name
->data
);
5556 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
5557 coding
.src_multibyte
= coding
.dst_multibyte
5558 = !NILP (current_buffer
->enable_multibyte_characters
);
5559 code_convert_region (from
, CHAR_TO_BYTE (from
), to
, CHAR_TO_BYTE (to
),
5560 &coding
, encodep
, 1);
5561 Vlast_coding_system_used
= coding
.symbol
;
5562 return make_number (coding
.produced_char
);
5565 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
5566 3, 3, "r\nzCoding system: ",
5567 "Decode the current region by specified coding system.\n\
5568 When called from a program, takes three arguments:\n\
5569 START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
5570 This function sets `last-coding-system-used' to the precise coding system\n\
5571 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5572 not fully specified.)\n\
5573 It returns the length of the decoded text.")
5574 (start
, end
, coding_system
)
5575 Lisp_Object start
, end
, coding_system
;
5577 return code_convert_region1 (start
, end
, coding_system
, 0);
5580 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
5581 3, 3, "r\nzCoding system: ",
5582 "Encode the current region by specified coding system.\n\
5583 When called from a program, takes three arguments:\n\
5584 START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
5585 This function sets `last-coding-system-used' to the precise coding system\n\
5586 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5587 not fully specified.)\n\
5588 It returns the length of the encoded text.")
5589 (start
, end
, coding_system
)
5590 Lisp_Object start
, end
, coding_system
;
5592 return code_convert_region1 (start
, end
, coding_system
, 1);
5596 code_convert_string1 (string
, coding_system
, nocopy
, encodep
)
5597 Lisp_Object string
, coding_system
, nocopy
;
5600 struct coding_system coding
;
5602 CHECK_STRING (string
, 0);
5603 CHECK_SYMBOL (coding_system
, 1);
5605 if (NILP (coding_system
))
5606 return (NILP (nocopy
) ? Fcopy_sequence (string
) : string
);
5608 if (setup_coding_system (Fcheck_coding_system (coding_system
), &coding
) < 0)
5609 error ("Invalid coding system: %s", XSYMBOL (coding_system
)->name
->data
);
5611 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
5613 ? encode_coding_string (string
, &coding
, !NILP (nocopy
))
5614 : decode_coding_string (string
, &coding
, !NILP (nocopy
)));
5615 Vlast_coding_system_used
= coding
.symbol
;
5620 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
5622 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5623 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5624 if the decoding operation is trivial.\n\
5625 This function sets `last-coding-system-used' to the precise coding system\n\
5626 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5627 not fully specified.)")
5628 (string
, coding_system
, nocopy
)
5629 Lisp_Object string
, coding_system
, nocopy
;
5631 return code_convert_string1 (string
, coding_system
, nocopy
, 0);
5634 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
5636 "Encode STRING to CODING-SYSTEM, and return the result.\n\
5637 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5638 if the encoding operation is trivial.\n\
5639 This function sets `last-coding-system-used' to the precise coding system\n\
5640 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5641 not fully specified.)")
5642 (string
, coding_system
, nocopy
)
5643 Lisp_Object string
, coding_system
, nocopy
;
5645 return code_convert_string1 (string
, coding_system
, nocopy
, 1);
5648 /* Encode or decode STRING according to CODING_SYSTEM.
5649 Do not set Vlast_coding_system_used.
5651 This function is called only from macros DECODE_FILE and
5652 ENCODE_FILE, thus we ignore character composition. */
5655 code_convert_string_norecord (string
, coding_system
, encodep
)
5656 Lisp_Object string
, coding_system
;
5659 struct coding_system coding
;
5661 CHECK_STRING (string
, 0);
5662 CHECK_SYMBOL (coding_system
, 1);
5664 if (NILP (coding_system
))
5667 if (setup_coding_system (Fcheck_coding_system (coding_system
), &coding
) < 0)
5668 error ("Invalid coding system: %s", XSYMBOL (coding_system
)->name
->data
);
5670 coding
.composing
= COMPOSITION_DISABLED
;
5671 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
5673 ? encode_coding_string (string
, &coding
, 1)
5674 : decode_coding_string (string
, &coding
, 1));
5677 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
5678 "Decode a Japanese character which has CODE in shift_jis encoding.\n\
5679 Return the corresponding character.")
5683 unsigned char c1
, c2
, s1
, s2
;
5686 CHECK_NUMBER (code
, 0);
5687 s1
= (XFASTINT (code
)) >> 8, s2
= (XFASTINT (code
)) & 0xFF;
5691 XSETFASTINT (val
, s2
);
5692 else if (s2
>= 0xA0 || s2
<= 0xDF)
5693 XSETFASTINT (val
, MAKE_CHAR (charset_katakana_jisx0201
, s2
, 0));
5695 error ("Invalid Shift JIS code: %x", XFASTINT (code
));
5699 if ((s1
< 0x80 || s1
> 0x9F && s1
< 0xE0 || s1
> 0xEF)
5700 || (s2
< 0x40 || s2
== 0x7F || s2
> 0xFC))
5701 error ("Invalid Shift JIS code: %x", XFASTINT (code
));
5702 DECODE_SJIS (s1
, s2
, c1
, c2
);
5703 XSETFASTINT (val
, MAKE_CHAR (charset_jisx0208
, c1
, c2
));
5708 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
5709 "Encode a Japanese character CHAR to shift_jis encoding.\n\
5710 Return the corresponding code in SJIS.")
5714 int charset
, c1
, c2
, s1
, s2
;
5717 CHECK_NUMBER (ch
, 0);
5718 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
5719 if (charset
== CHARSET_ASCII
)
5723 else if (charset
== charset_jisx0208
5724 && c1
> 0x20 && c1
< 0x7F && c2
> 0x20 && c2
< 0x7F)
5726 ENCODE_SJIS (c1
, c2
, s1
, s2
);
5727 XSETFASTINT (val
, (s1
<< 8) | s2
);
5729 else if (charset
== charset_katakana_jisx0201
5730 && c1
> 0x20 && c2
< 0xE0)
5732 XSETFASTINT (val
, c1
| 0x80);
5735 error ("Can't encode to shift_jis: %d", XFASTINT (ch
));
5739 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
5740 "Decode a Big5 character which has CODE in BIG5 coding system.\n\
5741 Return the corresponding character.")
5746 unsigned char b1
, b2
, c1
, c2
;
5749 CHECK_NUMBER (code
, 0);
5750 b1
= (XFASTINT (code
)) >> 8, b2
= (XFASTINT (code
)) & 0xFF;
5754 error ("Invalid BIG5 code: %x", XFASTINT (code
));
5759 if ((b1
< 0xA1 || b1
> 0xFE)
5760 || (b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE))
5761 error ("Invalid BIG5 code: %x", XFASTINT (code
));
5762 DECODE_BIG5 (b1
, b2
, charset
, c1
, c2
);
5763 XSETFASTINT (val
, MAKE_CHAR (charset
, c1
, c2
));
5768 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
5769 "Encode the Big5 character CHAR to BIG5 coding system.\n\
5770 Return the corresponding character code in Big5.")
5774 int charset
, c1
, c2
, b1
, b2
;
5777 CHECK_NUMBER (ch
, 0);
5778 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
5779 if (charset
== CHARSET_ASCII
)
5783 else if ((charset
== charset_big5_1
5784 && (XFASTINT (ch
) >= 0x250a1 && XFASTINT (ch
) <= 0x271ec))
5785 || (charset
== charset_big5_2
5786 && XFASTINT (ch
) >= 0x290a1 && XFASTINT (ch
) <= 0x2bdb2))
5788 ENCODE_BIG5 (charset
, c1
, c2
, b1
, b2
);
5789 XSETFASTINT (val
, (b1
<< 8) | b2
);
5792 error ("Can't encode to Big5: %d", XFASTINT (ch
));
5796 DEFUN ("set-terminal-coding-system-internal",
5797 Fset_terminal_coding_system_internal
,
5798 Sset_terminal_coding_system_internal
, 1, 1, 0, "")
5800 Lisp_Object coding_system
;
5802 CHECK_SYMBOL (coding_system
, 0);
5803 setup_coding_system (Fcheck_coding_system (coding_system
), &terminal_coding
);
5804 /* We had better not send unsafe characters to terminal. */
5805 terminal_coding
.flags
|= CODING_FLAG_ISO_SAFE
;
5806 /* Characer composition should be disabled. */
5807 terminal_coding
.composing
= COMPOSITION_DISABLED
;
5808 terminal_coding
.src_multibyte
= 1;
5809 terminal_coding
.dst_multibyte
= 0;
5813 DEFUN ("set-safe-terminal-coding-system-internal",
5814 Fset_safe_terminal_coding_system_internal
,
5815 Sset_safe_terminal_coding_system_internal
, 1, 1, 0, "")
5817 Lisp_Object coding_system
;
5819 CHECK_SYMBOL (coding_system
, 0);
5820 setup_coding_system (Fcheck_coding_system (coding_system
),
5821 &safe_terminal_coding
);
5822 /* Characer composition should be disabled. */
5823 safe_terminal_coding
.composing
= COMPOSITION_DISABLED
;
5824 safe_terminal_coding
.src_multibyte
= 1;
5825 safe_terminal_coding
.dst_multibyte
= 0;
5829 DEFUN ("terminal-coding-system",
5830 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
5831 "Return coding system specified for terminal output.")
5834 return terminal_coding
.symbol
;
5837 DEFUN ("set-keyboard-coding-system-internal",
5838 Fset_keyboard_coding_system_internal
,
5839 Sset_keyboard_coding_system_internal
, 1, 1, 0, "")
5841 Lisp_Object coding_system
;
5843 CHECK_SYMBOL (coding_system
, 0);
5844 setup_coding_system (Fcheck_coding_system (coding_system
), &keyboard_coding
);
5845 /* Characer composition should be disabled. */
5846 keyboard_coding
.composing
= COMPOSITION_DISABLED
;
5850 DEFUN ("keyboard-coding-system",
5851 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
5852 "Return coding system specified for decoding keyboard input.")
5855 return keyboard_coding
.symbol
;
5859 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
5860 Sfind_operation_coding_system
, 1, MANY
, 0,
5861 "Choose a coding system for an operation based on the target name.\n\
5862 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
5863 DECODING-SYSTEM is the coding system to use for decoding\n\
5864 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
5865 for encoding (in case OPERATION does encoding).\n\
5867 The first argument OPERATION specifies an I/O primitive:\n\
5868 For file I/O, `insert-file-contents' or `write-region'.\n\
5869 For process I/O, `call-process', `call-process-region', or `start-process'.\n\
5870 For network I/O, `open-network-stream'.\n\
5872 The remaining arguments should be the same arguments that were passed\n\
5873 to the primitive. Depending on which primitive, one of those arguments\n\
5874 is selected as the TARGET. For example, if OPERATION does file I/O,\n\
5875 whichever argument specifies the file name is TARGET.\n\
5877 TARGET has a meaning which depends on OPERATION:\n\
5878 For file I/O, TARGET is a file name.\n\
5879 For process I/O, TARGET is a process name.\n\
5880 For network I/O, TARGET is a service name or a port number\n\
5882 This function looks up what specified for TARGET in,\n\
5883 `file-coding-system-alist', `process-coding-system-alist',\n\
5884 or `network-coding-system-alist' depending on OPERATION.\n\
5885 They may specify a coding system, a cons of coding systems,\n\
5886 or a function symbol to call.\n\
5887 In the last case, we call the function with one argument,\n\
5888 which is a list of all the arguments given to this function.")
5893 Lisp_Object operation
, target_idx
, target
, val
;
5894 register Lisp_Object chain
;
5897 error ("Too few arguments");
5898 operation
= args
[0];
5899 if (!SYMBOLP (operation
)
5900 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
5901 error ("Invalid first arguement");
5902 if (nargs
< 1 + XINT (target_idx
))
5903 error ("Too few arguments for operation: %s",
5904 XSYMBOL (operation
)->name
->data
);
5905 target
= args
[XINT (target_idx
) + 1];
5906 if (!(STRINGP (target
)
5907 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
5908 error ("Invalid %dth argument", XINT (target_idx
) + 1);
5910 chain
= ((EQ (operation
, Qinsert_file_contents
)
5911 || EQ (operation
, Qwrite_region
))
5912 ? Vfile_coding_system_alist
5913 : (EQ (operation
, Qopen_network_stream
)
5914 ? Vnetwork_coding_system_alist
5915 : Vprocess_coding_system_alist
));
5919 for (; CONSP (chain
); chain
= XCDR (chain
))
5925 && ((STRINGP (target
)
5926 && STRINGP (XCAR (elt
))
5927 && fast_string_match (XCAR (elt
), target
) >= 0)
5928 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
5931 /* Here, if VAL is both a valid coding system and a valid
5932 function symbol, we return VAL as a coding system. */
5935 if (! SYMBOLP (val
))
5937 if (! NILP (Fcoding_system_p (val
)))
5938 return Fcons (val
, val
);
5939 if (! NILP (Ffboundp (val
)))
5941 val
= call1 (val
, Flist (nargs
, args
));
5944 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
5945 return Fcons (val
, val
);
5953 DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal
,
5954 Supdate_coding_systems_internal
, 0, 0, 0,
5955 "Update internal database for ISO2022 and CCL based coding systems.\n\
5956 When values of any coding categories are changed, you must\n\
5957 call this function")
5962 for (i
= CODING_CATEGORY_IDX_EMACS_MULE
; i
< CODING_CATEGORY_IDX_MAX
; i
++)
5966 val
= XSYMBOL (XVECTOR (Vcoding_category_table
)->contents
[i
])->value
;
5969 if (! coding_system_table
[i
])
5970 coding_system_table
[i
] = ((struct coding_system
*)
5971 xmalloc (sizeof (struct coding_system
)));
5972 setup_coding_system (val
, coding_system_table
[i
]);
5974 else if (coding_system_table
[i
])
5976 xfree (coding_system_table
[i
]);
5977 coding_system_table
[i
] = NULL
;
5984 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal
,
5985 Sset_coding_priority_internal
, 0, 0, 0,
5986 "Update internal database for the current value of `coding-category-list'.\n\
5987 This function is internal use only.")
5993 val
= Vcoding_category_list
;
5995 while (CONSP (val
) && i
< CODING_CATEGORY_IDX_MAX
)
5997 if (! SYMBOLP (XCAR (val
)))
5999 idx
= XFASTINT (Fget (XCAR (val
), Qcoding_category_index
));
6000 if (idx
>= CODING_CATEGORY_IDX_MAX
)
6002 coding_priorities
[i
++] = (1 << idx
);
6005 /* If coding-category-list is valid and contains all coding
6006 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
6007 the following code saves Emacs from crashing. */
6008 while (i
< CODING_CATEGORY_IDX_MAX
)
6009 coding_priorities
[i
++] = CODING_CATEGORY_MASK_RAW_TEXT
;
6017 /*** 9. Post-amble ***/
6022 conversion_buffer
= (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE
);
6030 /* Emacs' internal format specific initialize routine. */
6031 for (i
= 0; i
<= 0x20; i
++)
6032 emacs_code_class
[i
] = EMACS_control_code
;
6033 emacs_code_class
[0x0A] = EMACS_linefeed_code
;
6034 emacs_code_class
[0x0D] = EMACS_carriage_return_code
;
6035 for (i
= 0x21 ; i
< 0x7F; i
++)
6036 emacs_code_class
[i
] = EMACS_ascii_code
;
6037 emacs_code_class
[0x7F] = EMACS_control_code
;
6038 for (i
= 0x80; i
< 0xFF; i
++)
6039 emacs_code_class
[i
] = EMACS_invalid_code
;
6040 emacs_code_class
[LEADING_CODE_PRIVATE_11
] = EMACS_leading_code_3
;
6041 emacs_code_class
[LEADING_CODE_PRIVATE_12
] = EMACS_leading_code_3
;
6042 emacs_code_class
[LEADING_CODE_PRIVATE_21
] = EMACS_leading_code_4
;
6043 emacs_code_class
[LEADING_CODE_PRIVATE_22
] = EMACS_leading_code_4
;
6045 /* ISO2022 specific initialize routine. */
6046 for (i
= 0; i
< 0x20; i
++)
6047 iso_code_class
[i
] = ISO_control_0
;
6048 for (i
= 0x21; i
< 0x7F; i
++)
6049 iso_code_class
[i
] = ISO_graphic_plane_0
;
6050 for (i
= 0x80; i
< 0xA0; i
++)
6051 iso_code_class
[i
] = ISO_control_1
;
6052 for (i
= 0xA1; i
< 0xFF; i
++)
6053 iso_code_class
[i
] = ISO_graphic_plane_1
;
6054 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
6055 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
6056 iso_code_class
[ISO_CODE_CR
] = ISO_carriage_return
;
6057 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
6058 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
6059 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
6060 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
6061 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
6062 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
6063 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
6065 conversion_buffer_size
= MINIMUM_CONVERSION_BUFFER_SIZE
;
6067 setup_coding_system (Qnil
, &keyboard_coding
);
6068 setup_coding_system (Qnil
, &terminal_coding
);
6069 setup_coding_system (Qnil
, &safe_terminal_coding
);
6070 setup_coding_system (Qnil
, &default_buffer_file_coding
);
6072 bzero (coding_system_table
, sizeof coding_system_table
);
6074 bzero (ascii_skip_code
, sizeof ascii_skip_code
);
6075 for (i
= 0; i
< 128; i
++)
6076 ascii_skip_code
[i
] = 1;
6078 #if defined (MSDOS) || defined (WINDOWSNT)
6079 system_eol_type
= CODING_EOL_CRLF
;
6081 system_eol_type
= CODING_EOL_LF
;
6084 inhibit_pre_post_conversion
= 0;
6092 Qtarget_idx
= intern ("target-idx");
6093 staticpro (&Qtarget_idx
);
6095 Qcoding_system_history
= intern ("coding-system-history");
6096 staticpro (&Qcoding_system_history
);
6097 Fset (Qcoding_system_history
, Qnil
);
6099 /* Target FILENAME is the first argument. */
6100 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
6101 /* Target FILENAME is the third argument. */
6102 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
6104 Qcall_process
= intern ("call-process");
6105 staticpro (&Qcall_process
);
6106 /* Target PROGRAM is the first argument. */
6107 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
6109 Qcall_process_region
= intern ("call-process-region");
6110 staticpro (&Qcall_process_region
);
6111 /* Target PROGRAM is the third argument. */
6112 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
6114 Qstart_process
= intern ("start-process");
6115 staticpro (&Qstart_process
);
6116 /* Target PROGRAM is the third argument. */
6117 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
6119 Qopen_network_stream
= intern ("open-network-stream");
6120 staticpro (&Qopen_network_stream
);
6121 /* Target SERVICE is the fourth argument. */
6122 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
6124 Qcoding_system
= intern ("coding-system");
6125 staticpro (&Qcoding_system
);
6127 Qeol_type
= intern ("eol-type");
6128 staticpro (&Qeol_type
);
6130 Qbuffer_file_coding_system
= intern ("buffer-file-coding-system");
6131 staticpro (&Qbuffer_file_coding_system
);
6133 Qpost_read_conversion
= intern ("post-read-conversion");
6134 staticpro (&Qpost_read_conversion
);
6136 Qpre_write_conversion
= intern ("pre-write-conversion");
6137 staticpro (&Qpre_write_conversion
);
6139 Qno_conversion
= intern ("no-conversion");
6140 staticpro (&Qno_conversion
);
6142 Qundecided
= intern ("undecided");
6143 staticpro (&Qundecided
);
6145 Qcoding_system_p
= intern ("coding-system-p");
6146 staticpro (&Qcoding_system_p
);
6148 Qcoding_system_error
= intern ("coding-system-error");
6149 staticpro (&Qcoding_system_error
);
6151 Fput (Qcoding_system_error
, Qerror_conditions
,
6152 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
6153 Fput (Qcoding_system_error
, Qerror_message
,
6154 build_string ("Invalid coding system"));
6156 Qcoding_category
= intern ("coding-category");
6157 staticpro (&Qcoding_category
);
6158 Qcoding_category_index
= intern ("coding-category-index");
6159 staticpro (&Qcoding_category_index
);
6161 Vcoding_category_table
6162 = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX
), Qnil
);
6163 staticpro (&Vcoding_category_table
);
6166 for (i
= 0; i
< CODING_CATEGORY_IDX_MAX
; i
++)
6168 XVECTOR (Vcoding_category_table
)->contents
[i
]
6169 = intern (coding_category_name
[i
]);
6170 Fput (XVECTOR (Vcoding_category_table
)->contents
[i
],
6171 Qcoding_category_index
, make_number (i
));
6175 Qtranslation_table
= intern ("translation-table");
6176 staticpro (&Qtranslation_table
);
6177 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
6179 Qtranslation_table_id
= intern ("translation-table-id");
6180 staticpro (&Qtranslation_table_id
);
6182 Qtranslation_table_for_decode
= intern ("translation-table-for-decode");
6183 staticpro (&Qtranslation_table_for_decode
);
6185 Qtranslation_table_for_encode
= intern ("translation-table-for-encode");
6186 staticpro (&Qtranslation_table_for_encode
);
6188 Qsafe_charsets
= intern ("safe-charsets");
6189 staticpro (&Qsafe_charsets
);
6191 Qvalid_codes
= intern ("valid-codes");
6192 staticpro (&Qvalid_codes
);
6194 Qemacs_mule
= intern ("emacs-mule");
6195 staticpro (&Qemacs_mule
);
6197 Qraw_text
= intern ("raw-text");
6198 staticpro (&Qraw_text
);
6200 defsubr (&Scoding_system_p
);
6201 defsubr (&Sread_coding_system
);
6202 defsubr (&Sread_non_nil_coding_system
);
6203 defsubr (&Scheck_coding_system
);
6204 defsubr (&Sdetect_coding_region
);
6205 defsubr (&Sdetect_coding_string
);
6206 defsubr (&Sdecode_coding_region
);
6207 defsubr (&Sencode_coding_region
);
6208 defsubr (&Sdecode_coding_string
);
6209 defsubr (&Sencode_coding_string
);
6210 defsubr (&Sdecode_sjis_char
);
6211 defsubr (&Sencode_sjis_char
);
6212 defsubr (&Sdecode_big5_char
);
6213 defsubr (&Sencode_big5_char
);
6214 defsubr (&Sset_terminal_coding_system_internal
);
6215 defsubr (&Sset_safe_terminal_coding_system_internal
);
6216 defsubr (&Sterminal_coding_system
);
6217 defsubr (&Sset_keyboard_coding_system_internal
);
6218 defsubr (&Skeyboard_coding_system
);
6219 defsubr (&Sfind_operation_coding_system
);
6220 defsubr (&Supdate_coding_systems_internal
);
6221 defsubr (&Sset_coding_priority_internal
);
6223 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
6224 "List of coding systems.\n\
6226 Do not alter the value of this variable manually. This variable should be\n\
6227 updated by the functions `make-coding-system' and\n\
6228 `define-coding-system-alias'.");
6229 Vcoding_system_list
= Qnil
;
6231 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
6232 "Alist of coding system names.\n\
6233 Each element is one element list of coding system name.\n\
6234 This variable is given to `completing-read' as TABLE argument.\n\
6236 Do not alter the value of this variable manually. This variable should be\n\
6237 updated by the functions `make-coding-system' and\n\
6238 `define-coding-system-alias'.");
6239 Vcoding_system_alist
= Qnil
;
6241 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
6242 "List of coding-categories (symbols) ordered by priority.");
6246 Vcoding_category_list
= Qnil
;
6247 for (i
= CODING_CATEGORY_IDX_MAX
- 1; i
>= 0; i
--)
6248 Vcoding_category_list
6249 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
6250 Vcoding_category_list
);
6253 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
6254 "Specify the coding system for read operations.\n\
6255 It is useful to bind this variable with `let', but do not set it globally.\n\
6256 If the value is a coding system, it is used for decoding on read operation.\n\
6257 If not, an appropriate element is used from one of the coding system alists:\n\
6258 There are three such tables, `file-coding-system-alist',\n\
6259 `process-coding-system-alist', and `network-coding-system-alist'.");
6260 Vcoding_system_for_read
= Qnil
;
6262 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
6263 "Specify the coding system for write operations.\n\
6264 Programs bind this variable with `let', but you should not set it globally.\n\
6265 If the value is a coding system, it is used for encoding of output,\n\
6266 when writing it to a file and when sending it to a file or subprocess.\n\
6268 If this does not specify a coding system, an appropriate element\n\
6269 is used from one of the coding system alists:\n\
6270 There are three such tables, `file-coding-system-alist',\n\
6271 `process-coding-system-alist', and `network-coding-system-alist'.\n\
6272 For output to files, if the above procedure does not specify a coding system,\n\
6273 the value of `buffer-file-coding-system' is used.");
6274 Vcoding_system_for_write
= Qnil
;
6276 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
6277 "Coding system used in the latest file or process I/O.");
6278 Vlast_coding_system_used
= Qnil
;
6280 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
6281 "*Non-nil means always inhibit code conversion of end-of-line format.\n\
6282 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6284 inhibit_eol_conversion
= 0;
6286 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
6287 "Non-nil means process buffer inherits coding system of process output.\n\
6288 Bind it to t if the process output is to be treated as if it were a file\n\
6289 read from some filesystem.");
6290 inherit_process_coding_system
= 0;
6292 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
6293 "Alist to decide a coding system to use for a file I/O operation.\n\
6294 The format is ((PATTERN . VAL) ...),\n\
6295 where PATTERN is a regular expression matching a file name,\n\
6296 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6297 If VAL is a coding system, it is used for both decoding and encoding\n\
6298 the file contents.\n\
6299 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6300 and the cdr part is used for encoding.\n\
6301 If VAL is a function symbol, the function must return a coding system\n\
6302 or a cons of coding systems which are used as above.\n\
6304 See also the function `find-operation-coding-system'\n\
6305 and the variable `auto-coding-alist'.");
6306 Vfile_coding_system_alist
= Qnil
;
6308 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
6309 "Alist to decide a coding system to use for a process I/O operation.\n\
6310 The format is ((PATTERN . VAL) ...),\n\
6311 where PATTERN is a regular expression matching a program name,\n\
6312 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6313 If VAL is a coding system, it is used for both decoding what received\n\
6314 from the program and encoding what sent to the program.\n\
6315 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6316 and the cdr part is used for encoding.\n\
6317 If VAL is a function symbol, the function must return a coding system\n\
6318 or a cons of coding systems which are used as above.\n\
6320 See also the function `find-operation-coding-system'.");
6321 Vprocess_coding_system_alist
= Qnil
;
6323 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
6324 "Alist to decide a coding system to use for a network I/O operation.\n\
6325 The format is ((PATTERN . VAL) ...),\n\
6326 where PATTERN is a regular expression matching a network service name\n\
6327 or is a port number to connect to,\n\
6328 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6329 If VAL is a coding system, it is used for both decoding what received\n\
6330 from the network stream and encoding what sent to the network stream.\n\
6331 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6332 and the cdr part is used for encoding.\n\
6333 If VAL is a function symbol, the function must return a coding system\n\
6334 or a cons of coding systems which are used as above.\n\
6336 See also the function `find-operation-coding-system'.");
6337 Vnetwork_coding_system_alist
= Qnil
;
6339 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
6340 "Coding system to use with system messages.");
6341 Vlocale_coding_system
= Qnil
;
6343 /* The eol mnemonics are reset in startup.el system-dependently. */
6344 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
6345 "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6346 eol_mnemonic_unix
= build_string (":");
6348 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
6349 "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6350 eol_mnemonic_dos
= build_string ("\\");
6352 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
6353 "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6354 eol_mnemonic_mac
= build_string ("/");
6356 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
6357 "*String displayed in mode line when end-of-line format is not yet determined.");
6358 eol_mnemonic_undecided
= build_string (":");
6360 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
6361 "*Non-nil enables character translation while encoding and decoding.");
6362 Venable_character_translation
= Qt
;
6364 DEFVAR_LISP ("standard-translation-table-for-decode",
6365 &Vstandard_translation_table_for_decode
,
6366 "Table for translating characters while decoding.");
6367 Vstandard_translation_table_for_decode
= Qnil
;
6369 DEFVAR_LISP ("standard-translation-table-for-encode",
6370 &Vstandard_translation_table_for_encode
,
6371 "Table for translationg characters while encoding.");
6372 Vstandard_translation_table_for_encode
= Qnil
;
6374 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist
,
6375 "Alist of charsets vs revision numbers.\n\
6376 While encoding, if a charset (car part of an element) is found,\n\
6377 designate it with the escape sequence identifing revision (cdr part of the element).");
6378 Vcharset_revision_alist
= Qnil
;
6380 DEFVAR_LISP ("default-process-coding-system",
6381 &Vdefault_process_coding_system
,
6382 "Cons of coding systems used for process I/O by default.\n\
6383 The car part is used for decoding a process output,\n\
6384 the cdr part is used for encoding a text to be sent to a process.");
6385 Vdefault_process_coding_system
= Qnil
;
6387 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
6388 "Table of extra Latin codes in the range 128..159 (inclusive).\n\
6389 This is a vector of length 256.\n\
6390 If Nth element is non-nil, the existence of code N in a file\n\
6391 \(or output of subprocess) doesn't prevent it to be detected as\n\
6392 a coding system of ISO 2022 variant which has a flag\n\
6393 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
6394 or reading output of a subprocess.\n\
6395 Only 128th through 159th elements has a meaning.");
6396 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
6398 DEFVAR_LISP ("select-safe-coding-system-function",
6399 &Vselect_safe_coding_system_function
,
6400 "Function to call to select safe coding system for encoding a text.\n\
6402 If set, this function is called to force a user to select a proper\n\
6403 coding system which can encode the text in the case that a default\n\
6404 coding system used in each operation can't encode the text.\n\
6406 The default value is `select-safe-coding-system' (which see).");
6407 Vselect_safe_coding_system_function
= Qnil
;
6412 emacs_strerror (error_number
)
6417 synchronize_system_messages_locale ();
6418 str
= strerror (error_number
);
6420 if (! NILP (Vlocale_coding_system
))
6422 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
6423 Vlocale_coding_system
,
6425 str
= (char *) XSTRING (dec
)->data
;