X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/64abe701f107204aa473ca3c780e28f57c3e3569..cdd2341c84d7049cbd5f3f864766f76d2f279322:/src/coding.h diff --git a/src/coding.h b/src/coding.h index 15a1669329..fb24c17040 100644 --- a/src/coding.h +++ b/src/coding.h @@ -1,6 +1,9 @@ /* Header for coding system handler. - Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. + Copyright (C) 2002, 2003, 2004, 2005, + 2006 Free Software Foundation, Inc. + Copyright (C) 1995, 1997, 1998, 2000 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H14PRO021 This file is part of GNU Emacs. @@ -16,15 +19,13 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Emacs; see the file COPYING. If not, write to -the Free Software Foundation, Inc., 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ +the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ -#ifndef _CODING_H -#define _CODING_H +#ifndef EMACS_CODING_H +#define EMACS_CODING_H -#ifndef _CCL_H -#include "../src/ccl.h" -#endif +#include "ccl.h" /*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/ @@ -40,8 +41,6 @@ enum emacs_code_class_type EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used in selective display mode. */ EMACS_ascii_code, /* ASCII characters. */ - EMACS_leading_code_composition, /* Leading code of a composite - character. */ EMACS_leading_code_2, /* Base leading code of official TYPE9N character. */ EMACS_leading_code_3, /* Base leading code of private TYPE9N @@ -74,15 +73,17 @@ extern enum emacs_code_class_type emacs_code_class[256]; followings. */ enum iso_code_class_type { - ISO_control_code, /* Control codes in the range - 0x00..0x1F, 0x7F, and 0x80..0x9F, - except for the following seven - codes. */ + ISO_control_0, /* Control codes in the range + 0x00..0x1F and 0x7F, except for the + following 5 codes. */ ISO_carriage_return, /* ISO_CODE_CR (0x0D) */ ISO_shift_out, /* ISO_CODE_SO (0x0E) */ ISO_shift_in, /* ISO_CODE_SI (0x0F) */ ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ ISO_escape, /* ISO_CODE_SO (0x1B) */ + ISO_control_1, /* Control codes in the range + 0x80..0x9F, except for the + following 3 codes. */ ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */ ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */ ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */ @@ -135,7 +136,7 @@ enum iso_code_class_type on output. */ #define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400 -/* If set, do not encode unsafe charactes on output. */ +/* If set, do not encode unsafe characters on output. */ #define CODING_FLAG_ISO_SAFE 0x0800 /* If set, extra latin codes (128..159) are accepted as a valid code @@ -146,8 +147,9 @@ enum iso_code_class_type #define CODING_FLAG_ISO_DESIGNATION 0x10000 /* A character to be produced on output if encoding of the original - character is prohibited by CODING_FLAG_ISO_SAFE. */ -#define CODING_INHIBIT_CHARACTER_SUBSTITUTION 077 /* 077 == `?' */ + character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR. + It must be an ASCII character. */ +#define CODING_REPLACEMENT_CHARACTER '?' /* Structure of the field `spec.iso2022' in the structure `coding_system'. */ struct iso2022_spec @@ -243,7 +245,7 @@ enum coding_type coding_type_ccl, /* The coding system of which decoder and encoder are written in CCL. */ coding_type_raw_text /* A coding system for a text - containing ramdom 8-bit code which + containing random 8-bit code which does not require code conversion except for end-of-line format. */ }; @@ -260,28 +262,69 @@ enum coding_type eol-type is not consistent through the file. */ -/* Character composition status while encoding/decoding. */ -#define COMPOSING_NO 0 /* not composing */ -#define COMPOSING_WITH_RULE_HEAD 1 /* 1st char of with-rule composing follow */ -#define COMPOSING_NO_RULE_HEAD 2 /* 1st char of no-rule composing follow */ -#define COMPOSING_WITH_RULE_TAIL 3 /* Nth char of with-rule composing follow */ -#define COMPOSING_NO_RULE_TAIL 4 /* Nth char of no-rule composing follow */ -#define COMPOSING_WITH_RULE_RULE 5 /* composition rule follow */ - /* 1 iff composing. */ -#define COMPOSING_P(composing) (composing) -/* 1 iff 1st char of composing element follows. */ -#define COMPOSING_HEAD_P(composing) \ - ((composing) && (composing) <= COMPOSING_NO_RULE_HEAD) -/* 1 iff composing with embeded composition rule. */ -#define COMPOSING_WITH_RULE_P(composing) ((composing) & 1) - -/* Macros used for the member finish_status of the struct +#define COMPOSING_P(coding) ((int) coding->composing > (int) COMPOSITION_NO) + +#define COMPOSITION_DATA_SIZE 4080 +#define COMPOSITION_DATA_MAX_BUNCH_LENGTH (4 + MAX_COMPOSITION_COMPONENTS*2) + +/* Data structure to hold information about compositions of text that + is being decoded or encode. ISO 2022 base code conversion routines + handle special ESC sequences for composition specification. But, + they can't get/put such information directly from/to a buffer in + the deepest place. So, they store or retrieve the information + through this structure. + + The encoder stores the information in this structure when it meets + ESC sequences for composition while encoding codes, then, after all + text codes are encoded, puts `composition' properties on the text + by referring to the structure. + + The decoder at first stores the information of a text to be + decoded, then, while decoding codes, generates ESC sequences for + composition at proper places by referring to the structure. */ + +struct composition_data +{ + /* The character position of the first character to be encoded or + decoded. START and END (see below) are relative to this + position. */ + int char_offset; + + /* The composition data. These elements are repeated for each + composition: + LENGTH START END METHOD [ COMPONENT ... ] + where, + LENGTH is the number of elements for this composition. + + START and END are starting and ending character positions of + the composition relative to `char_offset'. + + METHOD is one of `enum composing_status' specifying the way of + composition. + + COMPONENT is a character or an encoded composition rule. */ + int data[COMPOSITION_DATA_SIZE]; + + /* The number of elements in `data' currently used. */ + int used; + + /* Pointers to the previous and next structures. When `data' is + filled up, another structure is allocated and linked in `next'. + The new structure has backward link to this structure in `prev'. + The number of chained structures depends on how many compositions + the text being encoded or decoded contains. */ + struct composition_data *prev, *next; +}; + +/* Macros used for the member `result' of the struct coding_system. */ #define CODING_FINISH_NORMAL 0 #define CODING_FINISH_INSUFFICIENT_SRC 1 #define CODING_FINISH_INSUFFICIENT_DST 2 #define CODING_FINISH_INCONSISTENT_EOL 3 +#define CODING_FINISH_INSUFFICIENT_CMP 4 +#define CODING_FINISH_INTERRUPT 5 /* Macros used for the member `mode' of the struct coding_system. */ @@ -291,16 +334,19 @@ enum coding_type /* If set, the decoding/encoding routines treat the current data as the last block of the whole text to be converted, and do - appropriate fisishing job. */ + appropriate finishing job. */ #define CODING_MODE_LAST_BLOCK 0x02 /* If set, it means that the current source text is in a buffer which enables selective display. */ #define CODING_MODE_SELECTIVE_DISPLAY 0x04 +/* If set, replace unencodabae characters by `?' on encoding. */ +#define CODING_MODE_INHIBIT_UNENCODABLE_CHAR 0x08 + /* This flag is used by the decoding/encoding routines on the fly. If set, it means that right-to-left text is being processed. */ -#define CODING_MODE_DIRECTION 0x08 +#define CODING_MODE_DIRECTION 0x10 struct coding_system { @@ -322,17 +368,24 @@ struct coding_system CODING_MODE_XXX. */ unsigned int mode; - /* Table of safe character sets for this coding system. If the Nth - element is 0, the charset of ID N is not a safe character set. - Such a character set is not encoded when CODING_ISO_FLAG_SAFE is - set. */ - unsigned char safe_charsets[MAX_CHARSET + 1]; - - /* Non-zero means that characters are being composed currently while - decoding or encoding. See macros COMPOSING_XXXX above for the - meaing of each non-zero value. */ + /* The current status of composition handling. */ int composing; + /* 1 iff the next character is a composition rule. */ + int composition_rule_follows; + + /* Information of compositions are stored here on decoding and set + in advance on encoding. */ + struct composition_data *cmp_data; + + /* Index to cmp_data->data for the first element for the current + composition. */ + int cmp_data_start; + + /* Index to cmp_data->data for the current element for the current + composition. */ + int cmp_data_index; + /* Detailed information specific to each type of coding system. */ union spec { @@ -343,6 +396,13 @@ struct coding_system /* Index number of coding category of the coding system. */ int category_idx; + /* The following two members specify how characters 128..159 are + represented in source and destination text respectively. 1 means + they are represented by 2-byte sequence, 0 means they are + represented by 1-byte as is (see the comment in charset.h). */ + unsigned src_multibyte : 1; + unsigned dst_multibyte : 1; + /* How may heading bytes we can skip for decoding. This is set to -1 in setup_coding_system, and updated by detect_coding. So, when this is equal to the byte length of the text being @@ -352,11 +412,15 @@ struct coding_system /* The following members are set by encoding/decoding routine. */ int produced, produced_char, consumed, consumed_char; - /* Encoding routines set this to 1 when they produce a byte sequence - which can be parsed as a multibyte character. Decoding routines - set this to 1 when they encounter an invalid code and, as the - result, produce an unexpected multibyte character. */ - int fake_multibyte; + /* Number of error source data found in a decoding routine. */ + int errors; + + /* Finish status of code conversion. It should be one of macros + CODING_FINISH_XXXX. */ + int result; + + /* If nonzero, suppress error notification. */ + int suppress_error; /* The following members are all Lisp symbols. We don't have to protect them from GC because the current garbage collection @@ -392,21 +456,25 @@ struct coding_system /* Return 1 if the coding system CODING requires code conversion on decoding. */ #define CODING_REQUIRE_DECODING(coding) \ - ((coding)->common_flags & CODING_REQUIRE_DECODING_MASK) + ((coding)->dst_multibyte \ + || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK) /* Return 1 if the coding system CODING requires code conversion on encoding. */ #define CODING_REQUIRE_ENCODING(coding) \ - ((coding)->common_flags & CODING_REQUIRE_ENCODING_MASK) + ((coding)->src_multibyte \ + || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK) /* Return 1 if the coding system CODING requires some kind of code detection. */ #define CODING_REQUIRE_DETECTION(coding) \ ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) +/* Return 1 if the coding system CODING requires code conversion on + decoding or some kind of code detection. */ #define CODING_MAY_REQUIRE_DECODING(coding) \ - ((coding)->common_flags \ - & (CODING_REQUIRE_DETECTION_MASK | CODING_REQUIRE_DECODING_MASK)) + (CODING_REQUIRE_DECODING (coding) \ + || CODING_REQUIRE_DETECTION (coding)) /* Index for each coding category in `coding_category_table' */ #define CODING_CATEGORY_IDX_EMACS_MULE 0 @@ -417,10 +485,14 @@ struct coding_system #define CODING_CATEGORY_IDX_ISO_8_2 5 #define CODING_CATEGORY_IDX_ISO_7_ELSE 6 #define CODING_CATEGORY_IDX_ISO_8_ELSE 7 -#define CODING_CATEGORY_IDX_BIG5 8 -#define CODING_CATEGORY_IDX_RAW_TEXT 9 -#define CODING_CATEGORY_IDX_BINARY 10 -#define CODING_CATEGORY_IDX_MAX 11 +#define CODING_CATEGORY_IDX_CCL 8 +#define CODING_CATEGORY_IDX_BIG5 9 +#define CODING_CATEGORY_IDX_UTF_8 10 +#define CODING_CATEGORY_IDX_UTF_16_BE 11 +#define CODING_CATEGORY_IDX_UTF_16_LE 12 +#define CODING_CATEGORY_IDX_RAW_TEXT 13 +#define CODING_CATEGORY_IDX_BINARY 14 +#define CODING_CATEGORY_IDX_MAX 15 /* Definitions of flag bits returned by the function detect_coding_mask (). */ @@ -432,7 +504,11 @@ struct coding_system #define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2) #define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE) #define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE) +#define CODING_CATEGORY_MASK_CCL (1 << CODING_CATEGORY_IDX_CCL) #define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5) +#define CODING_CATEGORY_MASK_UTF_8 (1 << CODING_CATEGORY_IDX_UTF_8) +#define CODING_CATEGORY_MASK_UTF_16_BE (1 << CODING_CATEGORY_IDX_UTF_16_BE) +#define CODING_CATEGORY_MASK_UTF_16_LE (1 << CODING_CATEGORY_IDX_UTF_16_LE) #define CODING_CATEGORY_MASK_RAW_TEXT (1 << CODING_CATEGORY_IDX_RAW_TEXT) #define CODING_CATEGORY_MASK_BINARY (1 << CODING_CATEGORY_IDX_BINARY) @@ -447,7 +523,11 @@ struct coding_system | CODING_CATEGORY_MASK_ISO_8_2 \ | CODING_CATEGORY_MASK_ISO_7_ELSE \ | CODING_CATEGORY_MASK_ISO_8_ELSE \ - | CODING_CATEGORY_MASK_BIG5) + | CODING_CATEGORY_MASK_CCL \ + | CODING_CATEGORY_MASK_BIG5 \ + | CODING_CATEGORY_MASK_UTF_8 \ + | CODING_CATEGORY_MASK_UTF_16_BE \ + | CODING_CATEGORY_MASK_UTF_16_LE) #define CODING_CATEGORY_MASK_ISO_7BIT \ (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT) @@ -463,6 +543,9 @@ struct coding_system | CODING_CATEGORY_MASK_ISO_SHIFT \ | CODING_CATEGORY_MASK_ISO_8BIT) +#define CODING_CATEGORY_MASK_UTF_16_BE_LE \ + (CODING_CATEGORY_MASK_UTF_16_BE | CODING_CATEGORY_MASK_UTF_16_LE) + /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding system. C1 and C2 are the 1st and 2nd position codes of Emacs' @@ -492,10 +575,10 @@ struct coding_system for file names, if any. */ #define ENCODE_FILE(name) \ (! NILP (Vfile_name_coding_system) \ - && XFASTINT (Vfile_name_coding_system) != 0 \ + && !EQ (Vfile_name_coding_system, make_number (0)) \ ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \ : (! NILP (Vdefault_file_name_coding_system) \ - && XFASTINT (Vdefault_file_name_coding_system) != 0 \ + && !EQ (Vdefault_file_name_coding_system, make_number (0)) \ ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ : name)) @@ -503,46 +586,93 @@ struct coding_system for file names, if any. */ #define DECODE_FILE(name) \ (! NILP (Vfile_name_coding_system) \ - && XFASTINT (Vfile_name_coding_system) != 0 \ + && !EQ (Vfile_name_coding_system, make_number (0)) \ ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \ : (! NILP (Vdefault_file_name_coding_system) \ - && XFASTINT (Vdefault_file_name_coding_system) != 0 \ + && !EQ (Vdefault_file_name_coding_system, make_number (0)) \ ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ : name)) +/* Encode the string STR using the specified coding system + for system functions, if any. */ +#define ENCODE_SYSTEM(str) \ + (! NILP (Vlocale_coding_system) \ + && !EQ (Vlocale_coding_system, make_number (0)) \ + ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \ + : str) + +/* Decode the string STR using the specified coding system + for system functions, if any. */ +#define DECODE_SYSTEM(str) \ + (! NILP (Vlocale_coding_system) \ + && !EQ (Vlocale_coding_system, make_number (0)) \ + ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \ + : str) + +#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1) + /* Extern declarations. */ -extern int decode_coding P_ ((struct coding_system *, unsigned char *, +extern int decode_coding P_ ((struct coding_system *, const unsigned char *, unsigned char *, int, int)); -extern int encode_coding P_ ((struct coding_system *, unsigned char *, +extern int encode_coding P_ ((struct coding_system *, const unsigned char *, unsigned char *, int, int)); +extern void coding_save_composition P_ ((struct coding_system *, int, int, + Lisp_Object)); +extern void coding_free_composition_data P_ ((struct coding_system *)); +extern void coding_adjust_composition_offset P_ ((struct coding_system *, + int)); +extern void coding_allocate_composition_data P_ ((struct coding_system *, + int)); +extern void coding_restore_composition P_ ((struct coding_system *, + Lisp_Object)); extern int code_convert_region P_ ((int, int, int, int, struct coding_system *, int, int)); +extern Lisp_Object run_pre_post_conversion_on_str P_ ((Lisp_Object, + struct coding_system *, + int)); +extern void run_pre_write_conversin_on_c_str P_ ((unsigned char **, int *, + int, int, + struct coding_system *)); + extern int decoding_buffer_size P_ ((struct coding_system *, int)); extern int encoding_buffer_size P_ ((struct coding_system *, int)); -extern void detect_coding P_ ((struct coding_system *, unsigned char *, int)); -extern void detect_eol P_ ((struct coding_system *, unsigned char *, int)); -extern int conversion_buffer_size; -extern char *conversion_buffer; -extern char *get_conversion_buffer P_ ((int)); +extern void detect_coding P_ ((struct coding_system *, const unsigned char *, + int)); +extern void detect_eol P_ ((struct coding_system *, const unsigned char *, + int)); extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *)); +extern Lisp_Object code_convert_string P_ ((Lisp_Object, + struct coding_system *, int, int)); +extern Lisp_Object code_convert_string1 P_ ((Lisp_Object, Lisp_Object, + Lisp_Object, int)); +extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object, + int)); extern void setup_raw_text_coding_system P_ ((struct coding_system *)); +extern Lisp_Object encode_coding_string P_ ((Lisp_Object, + struct coding_system *, int)); +extern Lisp_Object decode_coding_string P_ ((Lisp_Object, + struct coding_system *, int)); extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index; extern Lisp_Object Qraw_text, Qemacs_mule; extern Lisp_Object Qbuffer_file_coding_system; extern Lisp_Object Vcoding_category_list; +extern Lisp_Object Qutf_8; extern Lisp_Object Qtranslation_table; extern Lisp_Object Qtranslation_table_id; -/* Mnemonic character to indicate each type of end-of-line. */ -extern int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac; -/* Mnemonic character to indicate type of end-of-line is not yet decided. */ -extern int eol_mnemonic_undecided; +/* Mnemonic strings to indicate each type of end-of-line. */ +extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac; +/* Mnemonic string to indicate type of end-of-line is not yet decided. */ +extern Lisp_Object eol_mnemonic_undecided; #ifdef emacs extern Lisp_Object Qfile_coding_system; -extern Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; +extern Lisp_Object Qcall_process, Qcall_process_region; extern Lisp_Object Qstart_process, Qopen_network_stream; +extern Lisp_Object Qwrite_region; + +extern char *emacs_strerror P_ ((int)); /* Coding-system for reading files and receiving data from process. */ extern Lisp_Object Vcoding_system_for_read; @@ -550,6 +680,8 @@ extern Lisp_Object Vcoding_system_for_read; extern Lisp_Object Vcoding_system_for_write; /* Coding-system actually used in the latest I/O. */ extern Lisp_Object Vlast_coding_system_used; +/* Coding-system to use with system messages (e.g. strerror). */ +extern Lisp_Object Vlocale_coding_system; /* If non-zero, process buffer inherits the coding system used to decode the subprocess output. */ @@ -569,19 +701,33 @@ extern struct coding_system safe_terminal_coding; function `set-keyboard-coding-system'. */ extern struct coding_system keyboard_coding; +/* Default coding system to be used to write a file. */ +extern struct coding_system default_buffer_file_coding; + /* Default coding systems used for process I/O. */ extern Lisp_Object Vdefault_process_coding_system; -/* Function to call to force a user to force select a propert coding +/* Function to call to force a user to force select a proper coding system. */ extern Lisp_Object Vselect_safe_coding_system_function; +/* If nonzero, on writing a file, Vselect_safe_coding_system_function + is called even if Vcoding_system_for_write is non-nil. */ +extern int coding_system_require_warning; + /* Coding system for file names, or nil if none. */ extern Lisp_Object Vfile_name_coding_system; /* Coding system for file names used only when Vfile_name_coding_system is nil. */ extern Lisp_Object Vdefault_file_name_coding_system; + #endif -#endif /* _CODING_H */ +/* Error signaled when there's a problem with detecting coding system */ +extern Lisp_Object Qcoding_system_error; + +#endif /* EMACS_CODING_H */ + +/* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4 + (do not change this comment) */