/* Header for coding system handler.
- Ver.1.0
- Copyright (C) 1995 Free Software Foundation, Inc.
- Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
+ Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
+ Licensed to the Free Software Foundation.
This file is part of GNU Emacs.
#include "../src/ccl.h"
#endif
-/*** EMACS' INTERNAL FORMAT section ***/
+/*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/
/* All code (1-byte) of Emacs' internal format is classified into one
of the followings. See also `charset.h'. */
/* If set, do not produce ISO6429's direction specifying sequence. */
#define CODING_FLAG_ISO_NO_DIRECTION 0x0100
+/* If set, assume designation states are reset at beginning of line on
+ output. */
+#define CODING_FLAG_ISO_INIT_AT_BOL 0x0200
+
+/* If set, designation sequence should be placed at beginning of line
+ on output. */
+#define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400
+
+/* If set, do not encode unsafe charactes on output. */
+#define CODING_FLAG_ISO_SAFE 0x0800
+
+/* If set, extra latin codes (128..159) are accepted as a valid code
+ on input. */
+#define CODING_FLAG_ISO_LATIN_EXTRA 0x1000
+
+/* If set, use designation escape sequence. */
+#define CODING_FLAG_ISO_DESIGNATION 0x10000
+
+/* A character to be produced on output if encoding of the original
+ character is prohibited by CODING_FLAG_ISO_SAFE. */
+#define CODING_INHIBIT_CHARACTER_SUBSTITUTION 077 /* 077 == `?' */
+
/* Structure of the field `spec.iso2022' in the structure `coding_system'. */
struct iso2022_spec
{
/* A charset initially designated to each graphic register. */
int initial_designation[4];
+ /* If not -1, it is a graphic register specified in an invalid
+ designation sequence. */
+ int last_invalid_designation_register;
+
/* A graphic register to which each charset should be designated. */
- int requested_designation[MAX_CHARSET];
+ unsigned char requested_designation[MAX_CHARSET + 1];
+
+ /* A revision number to be specified for each charset on encoding.
+ The value 255 means no revision number for the corresponding
+ charset. */
+ unsigned char charset_revision_number[MAX_CHARSET + 1];
/* Set to 1 temporarily only when graphic register 2 or 3 is invoked
by single-shift while encoding. */
int single_shifting;
+
+ /* Set to 1 temporarily only when processing at beginning of line. */
+ int bol;
};
/* Macros to access each field in the structure `spec.iso2022'. */
#define CODING_SPEC_ISO_INVOCATION(coding, plane) \
- coding->spec.iso2022.current_invocation[plane]
+ (coding)->spec.iso2022.current_invocation[plane]
#define CODING_SPEC_ISO_DESIGNATION(coding, reg) \
- coding->spec.iso2022.current_designation[reg]
+ (coding)->spec.iso2022.current_designation[reg]
#define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \
- coding->spec.iso2022.initial_designation[reg]
+ (coding)->spec.iso2022.initial_designation[reg]
#define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \
- coding->spec.iso2022.requested_designation[charset]
-
-/* Set to 1 temporarily only when encoding a character with
- single-shift function. */
+ (coding)->spec.iso2022.requested_designation[charset]
+#define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \
+ (coding)->spec.iso2022.charset_revision_number[charset]
#define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \
- coding->spec.iso2022.single_shifting
+ (coding)->spec.iso2022.single_shifting
+#define CODING_SPEC_ISO_BOL(coding) \
+ (coding)->spec.iso2022.bol
+
+/* A value which may appear in
+ coding->spec.iso2022.requested_designation indicating that the
+ corresponding charset does not request any graphic register to be
+ designated. */
+#define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4
/* Return a charset which is currently designated to the graphic plane
PLANE in the coding-system CODING. */
-#define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \
- CODING_SPEC_ISO_DESIGNATION \
- (coding, CODING_SPEC_ISO_INVOCATION (coding, plane))
+#define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \
+ ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0) \
+ ? -1 \
+ : CODING_SPEC_ISO_DESIGNATION (coding, \
+ CODING_SPEC_ISO_INVOCATION (coding, plane)))
/*** BIG5 section ***/
coding_type_no_conversion, /* A coding system which requires no
conversion for reading and writing
including end-of-line format. */
- coding_type_internal, /* A coding system used in Emacs'
+ coding_type_emacs_mule, /* A coding system used in Emacs'
buffer and string. Requires no
conversion for reading and writing
except for end-of-line format. */
- coding_type_automatic, /* A coding system which requires
+ coding_type_undecided, /* A coding system which requires
automatic detection of a real
coding system. */
coding_type_sjis, /* SJIS coding system for Japanese. */
coding_type_iso2022, /* Any coding system of ISO2022
variants. */
coding_type_big5, /* BIG5 coding system for Chinese. */
- coding_type_ccl /* The coding system of which decoder
+ coding_type_ccl, /* The coding system of which decoder
and encoder are written in CCL. */
+ coding_type_raw_text /* A coding system for a text
+ containing ramdom 8-bit code which
+ does not require code conversion
+ except for end-of-line format. */
};
/* Formats of end-of-line. */
#define CODING_EOL_CRLF 1 /* Sequence of carriage-return and
line-feed. */
#define CODING_EOL_CR 2 /* Carriage-return only. */
-#define CODING_EOL_AUTOMATIC 3 /* This value is used to denote the
+#define CODING_EOL_UNDECIDED 3 /* This value is used to denote the
eol-type is not yet decided. */
+#define CODING_EOL_INCONSISTENT 4 /* This value is used to denote the
+ eol-type is not consistent
+ through the file. */
/* Character composition status while encoding/decoding. */
#define COMPOSING_NO 0 /* not composing */
/* 1 iff composing with embeded composition rule. */
#define COMPOSING_WITH_RULE_P(composing) ((composing) & 1)
+/* Macros used for the member finish_status of the struct
+ coding_system. */
+#define CODING_FINISH_NORMAL 0
+#define CODING_FINISH_INSUFFICIENT_SRC 1
+#define CODING_FINISH_INSUFFICIENT_DST 2
+#define CODING_FINISH_INCONSISTENT_EOL 3
+
+/* Macros used for the member `mode' of the struct coding_system. */
+
+/* If set, recover the original CR or LF of the already decoded text
+ when the decoding routine encounters an inconsistent eol format. */
+#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
+
+/* If set, the decoding/encoding routines treat the current data as
+ the last block of the whole text to be converted, and do
+ appropriate fisishing job. */
+#define CODING_MODE_LAST_BLOCK 0x02
+
+/* If set, it means that the current source text is in a buffer which
+ enables selective display. */
+#define CODING_MODE_SELECTIVE_DISPLAY 0x04
+
+/* This flag is used by the decoding/encoding routines on the fly. If
+ set, it means that right-to-left text is being processed. */
+#define CODING_MODE_DIRECTION 0x08
+
struct coding_system
{
/* Type of the coding system. */
enum coding_type type;
- /* If the coding system requires specific code to be attached at the
- tail of converted text, this value should be set to `1'. */
- int require_flushing;
+ /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */
+ int eol_type;
+
+ /* Flag bits of the coding system. The meaning of each bit is common
+ to all types of coding systems. */
+ unsigned int common_flags;
/* Flag bits of the coding system. The meaning of each bit depends
on the type of the coding system. */
unsigned int flags;
- /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */
- int eol_type;
+ /* Mode bits of the coding system. See the comments of the macros
+ CODING_MODE_XXX. */
+ unsigned int mode;
- /* Non-zero means that the current source text is the last block of the
- whole text to be converted. */
- int last_block;
+ /* Table of safe character sets for this coding system. If the Nth
+ element is 0, the charset of ID N is not a safe character set.
+ Such a character set is not encoded when CODING_ISO_FLAG_SAFE is
+ set. */
+ unsigned char safe_charsets[MAX_CHARSET + 1];
/* Non-zero means that characters are being composed currently while
decoding or encoding. See macros COMPOSING_XXXX above for the
meaing of each non-zero value. */
int composing;
- /* 0 (left-to-right) or 1 (right-to-left): the direction of the text
- being processed currently. */
- int direction;
-
- /* Non-zero means that the current source text is in a buffer which
- enables selective display. */
- int selective;
-
/* Detailed information specific to each type of coding system. */
union spec
{
struct ccl_spec ccl; /* Defined in ccl.h. */
} spec;
+ /* Index number of coding category of the coding system. */
+ int category_idx;
+
+ /* How may heading bytes we can skip for decoding. This is set to
+ -1 in setup_coding_system, and updated by detect_coding. So,
+ when this is equal to the byte length of the text being
+ converted, we can skip the actual conversion process. */
+ int heading_ascii;
+
+ /* The following members are set by encoding/decoding routine. */
+ int produced, produced_char, consumed, consumed_char;
+
+ /* Encoding routines set this to 1 when they produce a byte sequence
+ which can be parsed as a multibyte character. Decoding routines
+ set this to 1 when they encounter an invalid code and, as the
+ result, produce an unexpected multibyte character. */
+ int fake_multibyte;
+
+ /* The following members are all Lisp symbols. We don't have to
+ protect them from GC because the current garbage collection
+ doesn't relocate Lisp symbols. But, when it is changed, we must
+ find a way to protect them. */
+
/* Backward pointer to the Lisp symbol of the coding system. */
Lisp_Object symbol;
/* Lisp function (symbol) to be called after decoding to do
- additional conversion. */
+ additional conversion, or nil. */
Lisp_Object post_read_conversion;
/* Lisp function (symbol) to be called before encoding to do
- additional conversion. */
+ additional conversion, or nil. */
Lisp_Object pre_write_conversion;
- /* Carryover yielded by decoding/encoding incomplete source. No
- coding-system yields more than 7-byte of carryover. This does
- not include a text which is not processed because of short of
- output buffer. */
- char carryover[8];
-
- /* Actual data length in the above array. */
- int carryover_size;
+ /* Character translation tables to look up, or nil. */
+ Lisp_Object character_translation_table_for_decode;
+ Lisp_Object character_translation_table_for_encode;
};
-/* Return 1 if the coding-system CODING requires conversion of
- representation of a visible character (text). */
-#define CODING_REQUIRE_TEXT_CONVERSION(coding) \
- ((coding)->type != coding_type_no_conversion \
- && (coding)->type != coding_type_internal)
+#define CODING_REQUIRE_FLUSHING_MASK 1
+#define CODING_REQUIRE_DECODING_MASK 2
+#define CODING_REQUIRE_ENCODING_MASK 4
+#define CODING_REQUIRE_DETECTION_MASK 8
+
+/* Return 1 if the coding system CODING requires specific code to be
+ attached at the tail of converted text. */
+#define CODING_REQUIRE_FLUSHING(coding) \
+ ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
+
+/* Return 1 if the coding system CODING requires code conversion on
+ decoding. */
+#define CODING_REQUIRE_DECODING(coding) \
+ ((coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
+
+/* Return 1 if the coding system CODING requires code conversion on
+ encoding. */
+#define CODING_REQUIRE_ENCODING(coding) \
+ ((coding)->common_flags & CODING_REQUIRE_ENCODING_MASK)
-/* Return 1 if the coding-system CODING requires conversion of the
- format of end-of-line. */
-#define CODING_REQUIRE_EOL_CONVERSION(coding) \
- ((coding)->eol_type != CODING_EOL_AUTOMATIC \
- && (coding)->eol_type != CODING_EOL_LF)
+/* Return 1 if the coding system CODING requires some kind of code
+ detection. */
+#define CODING_REQUIRE_DETECTION(coding) \
+ ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
-/* Return 1 if the coding-system CODING requires some conversion. */
-#define CODING_REQUIRE_CONVERSION(coding) \
- (CODING_REQUIRE_TEXT_CONVERSION (coding) \
- || CODING_REQUIRE_EOL_CONVERSION (coding))
+#define CODING_MAY_REQUIRE_DECODING(coding) \
+ ((coding)->common_flags \
+ & (CODING_REQUIRE_DETECTION_MASK | CODING_REQUIRE_DECODING_MASK))
/* Index for each coding category in `coding_category_table' */
-#define CODING_CATEGORY_IDX_INTERNAL 0
+#define CODING_CATEGORY_IDX_EMACS_MULE 0
#define CODING_CATEGORY_IDX_SJIS 1
#define CODING_CATEGORY_IDX_ISO_7 2
-#define CODING_CATEGORY_IDX_ISO_8_1 3
-#define CODING_CATEGORY_IDX_ISO_8_2 4
-#define CODING_CATEGORY_IDX_ISO_ELSE 5
-#define CODING_CATEGORY_IDX_BIG5 6
-#define CODING_CATEGORY_IDX_BINARY 7
-#define CODING_CATEGORY_IDX_MAX 8
+#define CODING_CATEGORY_IDX_ISO_7_TIGHT 3
+#define CODING_CATEGORY_IDX_ISO_8_1 4
+#define CODING_CATEGORY_IDX_ISO_8_2 5
+#define CODING_CATEGORY_IDX_ISO_7_ELSE 6
+#define CODING_CATEGORY_IDX_ISO_8_ELSE 7
+#define CODING_CATEGORY_IDX_BIG5 8
+#define CODING_CATEGORY_IDX_RAW_TEXT 9
+#define CODING_CATEGORY_IDX_BINARY 10
+#define CODING_CATEGORY_IDX_MAX 11
/* Definitions of flag bits returned by the function
detect_coding_mask (). */
-#define CODING_CATEGORY_MASK_INTERNAL (1 << CODING_CATEGORY_IDX_INTERNAL)
+#define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE)
#define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS)
#define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7)
+#define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT)
#define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1)
#define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2)
-#define CODING_CATEGORY_MASK_ISO_ELSE (1 << CODING_CATEGORY_IDX_ISO_ELSE)
+#define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE)
+#define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE)
#define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5)
+#define CODING_CATEGORY_MASK_RAW_TEXT (1 << CODING_CATEGORY_IDX_RAW_TEXT)
+#define CODING_CATEGORY_MASK_BINARY (1 << CODING_CATEGORY_IDX_BINARY)
/* This value is returned if detect_coding_mask () find nothing other
than ASCII characters. */
#define CODING_CATEGORY_MASK_ANY \
- ( CODING_CATEGORY_MASK_INTERNAL \
+ ( CODING_CATEGORY_MASK_EMACS_MULE \
| CODING_CATEGORY_MASK_SJIS \
| CODING_CATEGORY_MASK_ISO_7 \
+ | CODING_CATEGORY_MASK_ISO_7_TIGHT \
| CODING_CATEGORY_MASK_ISO_8_1 \
| CODING_CATEGORY_MASK_ISO_8_2 \
- | CODING_CATEGORY_MASK_ISO_ELSE \
+ | CODING_CATEGORY_MASK_ISO_7_ELSE \
+ | CODING_CATEGORY_MASK_ISO_8_ELSE \
| CODING_CATEGORY_MASK_BIG5)
+#define CODING_CATEGORY_MASK_ISO_7BIT \
+ (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT)
+
+#define CODING_CATEGORY_MASK_ISO_8BIT \
+ (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2)
+
+#define CODING_CATEGORY_MASK_ISO_SHIFT \
+ (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE)
+
+#define CODING_CATEGORY_MASK_ISO \
+ ( CODING_CATEGORY_MASK_ISO_7BIT \
+ | CODING_CATEGORY_MASK_ISO_SHIFT \
+ | CODING_CATEGORY_MASK_ISO_8BIT)
+
/* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
system. C1 and C2 are the 1st and 2nd position codes of Emacs'
s2 = c2 + 0x7E; \
} while (0)
+/* Encode the file name NAME using the specified coding system
+ for file names, if any. */
+#define ENCODE_FILE(name) \
+ (! NILP (Vfile_name_coding_system) \
+ && XFASTINT (Vfile_name_coding_system) != 0 \
+ ? Fencode_coding_string (name, Vfile_name_coding_system, Qt) \
+ : (! NILP (Vdefault_file_name_coding_system) \
+ && XFASTINT (Vdefault_file_name_coding_system) \
+ ? Fencode_coding_string (name, Vdefault_file_name_coding_system, Qt) \
+ : name))
+
+/* Decode the file name NAME using the specified coding system
+ for file names, if any. */
+#define DECODE_FILE(name) \
+ (! NILP (Vfile_name_coding_system) \
+ && XFASTINT (Vfile_name_coding_system) != 0 \
+ ? Fdecode_coding_string (name, Vfile_name_coding_system, Qt) \
+ : (! NILP (Vdefault_file_name_coding_system) \
+ && XFASTINT (Vdefault_file_name_coding_system) \
+ ? Fdecode_coding_string (name, Vdefault_file_name_coding_system, Qt) \
+ : name))
+
/* Extern declarations. */
-extern int decode_coding (), encode_coding ();
-extern int decoding_buffer_size (), encoding_buffer_size ();
+extern int decode_coding P_ ((struct coding_system *, unsigned char *,
+ unsigned char *, int, int));
+extern int encode_coding P_ ((struct coding_system *, unsigned char *,
+ unsigned char *, int, int));
+extern int code_convert_region P_ ((int, int, int, int, struct coding_system *,
+ int, int));
+extern int decoding_buffer_size P_ ((struct coding_system *, int));
+extern int encoding_buffer_size P_ ((struct coding_system *, int));
+extern void detect_coding P_ ((struct coding_system *, unsigned char *, int));
+extern void detect_eol P_ ((struct coding_system *, unsigned char *, int));
extern int conversion_buffer_size;
-extern char *conversion_buffer, *get_conversion_buffer ();
-extern Lisp_Object Fcheck_coding_system ();
+extern char *conversion_buffer;
+extern char *get_conversion_buffer P_ ((int));
+extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *));
extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
+extern Lisp_Object Qraw_text;
extern Lisp_Object Qbuffer_file_coding_system;
extern Lisp_Object Vcoding_category_list;
+extern Lisp_Object Qcharacter_translation_table;
+extern Lisp_Object Qcharacter_translation_table_id;
+
/* Mnemonic character to indicate each type of end-of-line. */
extern int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
/* Mnemonic character to indicate type of end-of-line is not yet decided. */
extern int eol_mnemonic_undecided;
-/* Table of coding-systems currently assigned to each coding-category. */
-extern Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX];
-/* Table of names of symbol for each coding-category. */
-extern char *coding_category_name[CODING_CATEGORY_IDX_MAX];
-
#ifdef emacs
extern Lisp_Object Qfile_coding_system;
extern Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
/* Coding-system actually used in the latest I/O. */
extern Lisp_Object Vlast_coding_system_used;
+/* If non-zero, process buffer inherits the coding system used to decode
+ the subprocess output. */
+extern int inherit_process_coding_system;
+
/* Coding-system to be used for encoding terminal output. This
structure contains information of a coding-system specified by the
function `set-terminal-coding-system'. */
extern struct coding_system terminal_coding;
+/* Coding system to be used to encode text for terminal display when
+ terminal coding system is nil. */
+extern struct coding_system safe_terminal_coding;
+
/* Coding-system of what is sent from terminal keyboard. This
structure contains information of a coding-system specified by the
function `set-keyboard-coding-system'. */
extern struct coding_system keyboard_coding;
-extern Lisp_Object Vcoding_system_alist;
+/* Default coding systems used for process I/O. */
+extern Lisp_Object Vdefault_process_coding_system;
+
+/* Function to call to force a user to force select a propert coding
+ system. */
+extern Lisp_Object Vselect_safe_coding_system_function;
+
+/* Coding system for file names, or nil if none. */
+extern Lisp_Object Vfile_name_coding_system;
+/* Coding system for file names used only when
+ Vfile_name_coding_system is nil. */
+extern Lisp_Object Vdefault_file_name_coding_system;
#endif
#endif /* _CODING_H */