X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/8000e212ef3cefdf190624c9607def66bda2bf8a..03da5d089a8ed035cec443a27259e7d21487a22e:/src/coding.h diff --git a/src/coding.h b/src/coding.h index 12c14da4d0..1ea1dafd54 100644 --- a/src/coding.h +++ b/src/coding.h @@ -1,6 +1,7 @@ /* Header for coding system handler. Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. Licensed to the Free Software Foundation. + Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. This file is part of GNU Emacs. @@ -16,15 +17,13 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Emacs; see the file COPYING. If not, write to -the Free Software Foundation, Inc., 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ +the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ -#ifndef _CODING_H -#define _CODING_H +#ifndef EMACS_CODING_H +#define EMACS_CODING_H -#ifndef _CCL_H #include "ccl.h" -#endif /*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/ @@ -72,15 +71,17 @@ extern enum emacs_code_class_type emacs_code_class[256]; followings. */ enum iso_code_class_type { - ISO_control_code, /* Control codes in the range - 0x00..0x1F, 0x7F, and 0x80..0x9F, - except for the following seven - codes. */ + ISO_control_0, /* Control codes in the range + 0x00..0x1F and 0x7F, except for the + following 5 codes. */ ISO_carriage_return, /* ISO_CODE_CR (0x0D) */ ISO_shift_out, /* ISO_CODE_SO (0x0E) */ ISO_shift_in, /* ISO_CODE_SI (0x0F) */ ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ ISO_escape, /* ISO_CODE_SO (0x1B) */ + ISO_control_1, /* Control codes in the range + 0x80..0x9F, except for the + following 3 codes. */ ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */ ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */ ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */ @@ -133,7 +134,7 @@ enum iso_code_class_type on output. */ #define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400 -/* If set, do not encode unsafe charactes on output. */ +/* If set, do not encode unsafe characters on output. */ #define CODING_FLAG_ISO_SAFE 0x0800 /* If set, extra latin codes (128..159) are accepted as a valid code @@ -144,8 +145,9 @@ enum iso_code_class_type #define CODING_FLAG_ISO_DESIGNATION 0x10000 /* A character to be produced on output if encoding of the original - character is prohibited by CODING_FLAG_ISO_SAFE. */ -#define CODING_INHIBIT_CHARACTER_SUBSTITUTION 077 /* 077 == `?' */ + character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR. + It must be an ASCII character. */ +#define CODING_REPLACEMENT_CHARACTER '?' /* Structure of the field `spec.iso2022' in the structure `coding_system'. */ struct iso2022_spec @@ -241,7 +243,7 @@ enum coding_type coding_type_ccl, /* The coding system of which decoder and encoder are written in CCL. */ coding_type_raw_text /* A coding system for a text - containing ramdom 8-bit code which + containing random 8-bit code which does not require code conversion except for end-of-line format. */ }; @@ -274,11 +276,11 @@ enum coding_type The encoder stores the information in this structure when it meets ESC sequences for composition while encoding codes, then, after all text codes are encoded, puts `composition' properties on the text - by refering the structure. + by referring to the structure. The decoder at first stores the information of a text to be decoded, then, while decoding codes, generates ESC sequences for - composition at proper places by refering the structure. */ + composition at proper places by referring to the structure. */ struct composition_data { @@ -296,7 +298,7 @@ struct composition_data START and END are starting and ending character positions of the composition relative to `char_offset'. - METHOD is one of `enum cmposing_status' specifying the way of + METHOD is one of `enum composing_status' specifying the way of composition. COMPONENT is a character or an encoded composition rule. */ @@ -307,13 +309,13 @@ struct composition_data /* Pointers to the previous and next structures. When `data' is filled up, another structure is allocated and linked in `next'. - The new struture has backward link to this struture in `prev'. - The number of chaind structures depends on how many compositions + The new structure has backward link to this structure in `prev'. + The number of chained structures depends on how many compositions the text being encoded or decoded contains. */ struct composition_data *prev, *next; }; -/* Macros used for the member finish_status of the struct +/* Macros used for the member `result' of the struct coding_system. */ #define CODING_FINISH_NORMAL 0 #define CODING_FINISH_INSUFFICIENT_SRC 1 @@ -330,16 +332,19 @@ struct composition_data /* If set, the decoding/encoding routines treat the current data as the last block of the whole text to be converted, and do - appropriate fisishing job. */ + appropriate finishing job. */ #define CODING_MODE_LAST_BLOCK 0x02 /* If set, it means that the current source text is in a buffer which enables selective display. */ #define CODING_MODE_SELECTIVE_DISPLAY 0x04 +/* If set, replace unencodabae characters by `?' on encoding. */ +#define CODING_MODE_INHIBIT_UNENCODABLE_CHAR 0x08 + /* This flag is used by the decoding/encoding routines on the fly. If set, it means that right-to-left text is being processed. */ -#define CODING_MODE_DIRECTION 0x08 +#define CODING_MODE_DIRECTION 0x10 struct coding_system { @@ -361,12 +366,6 @@ struct coding_system CODING_MODE_XXX. */ unsigned int mode; - /* Table of safe character sets for this coding system. If the Nth - element is 0, the charset of ID N is not a safe character set. - Such a character set is not encoded when CODING_ISO_FLAG_SAFE is - set. */ - unsigned char safe_charsets[MAX_CHARSET + 1]; - /* The current status of composition handling. */ int composing; @@ -395,6 +394,13 @@ struct coding_system /* Index number of coding category of the coding system. */ int category_idx; + /* The following two members specify how characters 128..159 are + represented in source and destination text respectively. 1 means + they are represented by 2-byte sequence, 0 means they are + represented by 1-byte as is (see the comment in charset.h). */ + unsigned src_multibyte : 1; + unsigned dst_multibyte : 1; + /* How may heading bytes we can skip for decoding. This is set to -1 in setup_coding_system, and updated by detect_coding. So, when this is equal to the byte length of the text being @@ -404,11 +410,15 @@ struct coding_system /* The following members are set by encoding/decoding routine. */ int produced, produced_char, consumed, consumed_char; - /* Encoding routines set this to 1 when they produce a byte sequence - which can be parsed as a multibyte character. Decoding routines - set this to 1 when they encounter an invalid code and, as the - result, produce an unexpected multibyte character. */ - int fake_multibyte; + /* Number of error source data found in a decoding routine. */ + int errors; + + /* Finish status of code conversion. It should be one of macros + CODING_FINISH_XXXX. */ + int result; + + /* If nonzero, suppress error notification. */ + int suppress_error; /* The following members are all Lisp symbols. We don't have to protect them from GC because the current garbage collection @@ -444,21 +454,25 @@ struct coding_system /* Return 1 if the coding system CODING requires code conversion on decoding. */ #define CODING_REQUIRE_DECODING(coding) \ - ((coding)->common_flags & CODING_REQUIRE_DECODING_MASK) + ((coding)->dst_multibyte \ + || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK) /* Return 1 if the coding system CODING requires code conversion on encoding. */ #define CODING_REQUIRE_ENCODING(coding) \ - ((coding)->common_flags & CODING_REQUIRE_ENCODING_MASK) + ((coding)->src_multibyte \ + || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK) /* Return 1 if the coding system CODING requires some kind of code detection. */ #define CODING_REQUIRE_DETECTION(coding) \ ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) +/* Return 1 if the coding system CODING requires code conversion on + decoding or some kind of code detection. */ #define CODING_MAY_REQUIRE_DECODING(coding) \ - ((coding)->common_flags \ - & (CODING_REQUIRE_DETECTION_MASK | CODING_REQUIRE_DECODING_MASK)) + (CODING_REQUIRE_DECODING (coding) \ + || CODING_REQUIRE_DETECTION (coding)) /* Index for each coding category in `coding_category_table' */ #define CODING_CATEGORY_IDX_EMACS_MULE 0 @@ -559,10 +573,10 @@ struct coding_system for file names, if any. */ #define ENCODE_FILE(name) \ (! NILP (Vfile_name_coding_system) \ - && XFASTINT (Vfile_name_coding_system) != 0 \ + && !EQ (Vfile_name_coding_system, make_number (0)) \ ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \ : (! NILP (Vdefault_file_name_coding_system) \ - && XFASTINT (Vdefault_file_name_coding_system) != 0 \ + && !EQ (Vdefault_file_name_coding_system, make_number (0)) \ ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ : name)) @@ -570,40 +584,77 @@ struct coding_system for file names, if any. */ #define DECODE_FILE(name) \ (! NILP (Vfile_name_coding_system) \ - && XFASTINT (Vfile_name_coding_system) != 0 \ + && !EQ (Vfile_name_coding_system, make_number (0)) \ ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \ : (! NILP (Vdefault_file_name_coding_system) \ - && XFASTINT (Vdefault_file_name_coding_system) != 0 \ + && !EQ (Vdefault_file_name_coding_system, make_number (0)) \ ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ : name)) +/* Encode the string STR using the specified coding system + for system functions, if any. */ +#define ENCODE_SYSTEM(str) \ + (! NILP (Vlocale_coding_system) \ + && !EQ (Vlocale_coding_system, make_number (0)) \ + ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \ + : str) + +/* Decode the string STR using the specified coding system + for system functions, if any. */ +#define DECODE_SYSTEM(name) \ + (! NILP (Vlocale_coding_system) \ + && !EQ (Vlocale_coding_system, make_number (0)) \ + ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \ + : str) + +#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1) + /* Extern declarations. */ -extern int decode_coding P_ ((struct coding_system *, unsigned char *, +extern int decode_coding P_ ((struct coding_system *, const unsigned char *, unsigned char *, int, int)); -extern int encode_coding P_ ((struct coding_system *, unsigned char *, +extern int encode_coding P_ ((struct coding_system *, const unsigned char *, unsigned char *, int, int)); extern void coding_save_composition P_ ((struct coding_system *, int, int, Lisp_Object)); extern void coding_free_composition_data P_ ((struct coding_system *)); extern void coding_adjust_composition_offset P_ ((struct coding_system *, int)); +extern void coding_allocate_composition_data P_ ((struct coding_system *, + int)); +extern void coding_restore_composition P_ ((struct coding_system *, + Lisp_Object)); extern int code_convert_region P_ ((int, int, int, int, struct coding_system *, int, int)); +extern Lisp_Object run_pre_post_conversion_on_str P_ ((Lisp_Object, + struct coding_system *, + int)); +extern void run_pre_write_conversin_on_c_str P_ ((unsigned char **, int *, + int, int, + struct coding_system *)); + extern int decoding_buffer_size P_ ((struct coding_system *, int)); extern int encoding_buffer_size P_ ((struct coding_system *, int)); -extern void detect_coding P_ ((struct coding_system *, unsigned char *, int)); -extern void detect_eol P_ ((struct coding_system *, unsigned char *, int)); -extern int conversion_buffer_size; -extern char *conversion_buffer; -extern char *get_conversion_buffer P_ ((int)); +extern void detect_coding P_ ((struct coding_system *, const unsigned char *, + int)); +extern void detect_eol P_ ((struct coding_system *, const unsigned char *, + int)); extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *)); extern Lisp_Object code_convert_string P_ ((Lisp_Object, struct coding_system *, int, int)); +extern Lisp_Object code_convert_string1 P_ ((Lisp_Object, Lisp_Object, + Lisp_Object, int)); +extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object, + int)); extern void setup_raw_text_coding_system P_ ((struct coding_system *)); +extern Lisp_Object encode_coding_string P_ ((Lisp_Object, + struct coding_system *, int)); +extern Lisp_Object decode_coding_string P_ ((Lisp_Object, + struct coding_system *, int)); extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index; extern Lisp_Object Qraw_text, Qemacs_mule; extern Lisp_Object Qbuffer_file_coding_system; extern Lisp_Object Vcoding_category_list; +extern Lisp_Object Qutf_8; extern Lisp_Object Qtranslation_table; extern Lisp_Object Qtranslation_table_id; @@ -615,8 +666,9 @@ extern Lisp_Object eol_mnemonic_undecided; #ifdef emacs extern Lisp_Object Qfile_coding_system; -extern Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; +extern Lisp_Object Qcall_process, Qcall_process_region; extern Lisp_Object Qstart_process, Qopen_network_stream; +extern Lisp_Object Qwrite_region; extern char *emacs_strerror P_ ((int)); @@ -653,16 +705,27 @@ extern struct coding_system default_buffer_file_coding; /* Default coding systems used for process I/O. */ extern Lisp_Object Vdefault_process_coding_system; -/* Function to call to force a user to force select a propert coding +/* Function to call to force a user to force select a proper coding system. */ extern Lisp_Object Vselect_safe_coding_system_function; +/* If nonzero, on writing a file, Vselect_safe_coding_system_function + is called even if Vcoding_system_for_write is non-nil. */ +extern int coding_system_require_warning; + /* Coding system for file names, or nil if none. */ extern Lisp_Object Vfile_name_coding_system; /* Coding system for file names used only when Vfile_name_coding_system is nil. */ extern Lisp_Object Vdefault_file_name_coding_system; + #endif -#endif /* _CODING_H */ +/* Error signaled when there's a problem with detecting coding system */ +extern Lisp_Object Qcoding_system_error; + +#endif /* EMACS_CODING_H */ + +/* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4 + (do not change this comment) */