X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/28b6b84d4e91cf39c85f194c29b8e044ae6bac87..2fdec80c2cebf486bc708c5a59b0cd52def5285b:/src/coding.h diff --git a/src/coding.h b/src/coding.h index eb95fa13dd..a2f9cbc578 100644 --- a/src/coding.h +++ b/src/coding.h @@ -1,5 +1,5 @@ /* Header for coding system handler. - Copyright (C) 2001-2013 Free Software Foundation, Inc. + Copyright (C) 2001-2015 Free Software Foundation, Inc. Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 National Institute of Advanced Industrial Science and Technology (AIST) @@ -76,6 +76,14 @@ enum define_coding_ccl_arg_index coding_arg_ccl_max }; +enum define_coding_undecided_arg_index + { + coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max, + coding_arg_undecided_inhibit_iso_escape_detection, + coding_arg_undecided_prefer_utf_8, + coding_arg_undecided_max + }; + /* Hash table for all coding systems. Keys are coding system symbols and values are spec vectors of the corresponding coding system. A spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a @@ -158,6 +166,10 @@ enum coding_attr_index coding_attr_emacs_mule_full, + coding_attr_undecided_inhibit_null_byte_detection, + coding_attr_undecided_inhibit_iso_escape_detection, + coding_attr_undecided_prefer_utf_8, + coding_attr_last_index }; @@ -214,7 +226,7 @@ enum coding_attr_index hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \ coding_system_symbol, NULL) -/* Return 1 if CODING_SYSTEM_SYMBOL is a coding system. */ +/* Return true if CODING_SYSTEM_SYMBOL is a coding system. */ #define CODING_SYSTEM_P(coding_system_symbol) \ (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \ @@ -228,7 +240,7 @@ enum coding_attr_index if (CODING_SYSTEM_ID (x) < 0 \ && NILP (Fcheck_coding_system (x))) \ wrong_type_argument (Qcoding_system_p, (x)); \ - } while (0) + } while (false) /* Check if X is a coding system or not. If it is, set SEPC to the @@ -244,7 +256,7 @@ enum coding_attr_index } \ if (NILP (spec)) \ wrong_type_argument (Qcoding_system_p, (x)); \ - } while (0) + } while (false) /* Check if X is a coding system or not. If it is, set ID to the @@ -261,7 +273,7 @@ enum coding_attr_index } \ if (id < 0) \ wrong_type_argument (Qcoding_system_p, (x)); \ - } while (0) + } while (false) /*** GENERAL section ***/ @@ -272,37 +284,31 @@ enum coding_result_code CODING_RESULT_SUCCESS, CODING_RESULT_INSUFFICIENT_SRC, CODING_RESULT_INSUFFICIENT_DST, - CODING_RESULT_INCONSISTENT_EOL, CODING_RESULT_INVALID_SRC, - CODING_RESULT_INTERRUPT, - CODING_RESULT_INSUFFICIENT_MEM + CODING_RESULT_INTERRUPT }; /* Macros used for the member `mode' of the struct coding_system. */ -/* If set, recover the original CR or LF of the already decoded text - when the decoding routine encounters an inconsistent eol format. */ -#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01 - /* If set, the decoding/encoding routines treat the current data as the last block of the whole text to be converted, and do the appropriate finishing job. */ -#define CODING_MODE_LAST_BLOCK 0x02 +#define CODING_MODE_LAST_BLOCK 0x01 /* If set, it means that the current source text is in a buffer which enables selective display. */ -#define CODING_MODE_SELECTIVE_DISPLAY 0x04 +#define CODING_MODE_SELECTIVE_DISPLAY 0x02 /* This flag is used by the decoding/encoding routines on the fly. If set, it means that right-to-left text is being processed. */ -#define CODING_MODE_DIRECTION 0x08 +#define CODING_MODE_DIRECTION 0x04 -#define CODING_MODE_FIXED_DESTINATION 0x10 +#define CODING_MODE_FIXED_DESTINATION 0x08 /* If set, it means that the encoding routines produces some safe ASCII characters (usually '?') for unsupported characters. */ -#define CODING_MODE_SAFE_ENCODING 0x20 +#define CODING_MODE_SAFE_ENCODING 0x10 /* For handling composition sequence. */ #include "composite.h" @@ -355,13 +361,13 @@ struct iso_2022_spec /* True temporarily only when graphic register 2 or 3 is invoked by single-shift while encoding. */ - unsigned single_shifting : 1; + bool_bf single_shifting : 1; /* True temporarily only when processing at beginning of line. */ - unsigned bol : 1; + bool_bf bol : 1; /* If true, we are now scanning embedded UTF-8 sequence. */ - unsigned embedded_utf_8 : 1; + bool_bf embedded_utf_8 : 1; /* The current composition. */ struct composition_status cmp_status; @@ -372,7 +378,18 @@ struct emacs_mule_spec struct composition_status cmp_status; }; -struct ccl_spec; +struct undecided_spec +{ + /* Inhibit null byte detection. 1 means always inhibit, + -1 means do not inhibit, 0 means rely on user variable. */ + int inhibit_nbd; + + /* Inhibit ISO escape detection. -1, 0, 1 as above. */ + int inhibit_ied; + + /* Prefer UTF-8 when the input could be other encodings. */ + bool prefer_utf_8; +}; enum utf_bom_type { @@ -431,29 +448,38 @@ struct coding_system struct utf_16_spec utf_16; enum utf_bom_type utf_8_bom; struct emacs_mule_spec emacs_mule; + struct undecided_spec undecided; } spec; int max_charset_id; unsigned char *safe_charsets; /* The following two members specify how binary 8-bit code 128..255 - are represented in source and destination text respectively. 1 - means they are represented by 2-byte sequence, 0 means they are + are represented in source and destination text respectively. True + means they are represented by 2-byte sequence, false means they are represented by 1-byte as is (see the comment in character.h). */ - unsigned src_multibyte : 1; - unsigned dst_multibyte : 1; + bool_bf src_multibyte : 1; + bool_bf dst_multibyte : 1; /* How may heading bytes we can skip for decoding. This is set to -1 in setup_coding_system, and updated by detect_coding. So, when this is equal to the byte length of the text being - converted, we can skip the actual conversion process. */ + converted, we can skip the actual conversion process except for + the eol format. */ ptrdiff_t head_ascii; + /* How many bytes/chars at the source are detected as valid utf-8 + sequence. Set by detect_coding_utf_8. */ + ptrdiff_t detected_utf8_bytes, detected_utf8_chars; + + /* Used internally in coding.c. See the comment of detect_ascii. */ + int eol_seen; + /* The following members are set by encoding/decoding routine. */ ptrdiff_t produced, produced_char, consumed, consumed_char; /* Number of error source data found in a decoding routine. */ - int errors; + ptrdiff_t errors; /* Store the positions of error source data. */ ptrdiff_t *error_positions; @@ -486,10 +512,14 @@ struct coding_system /* True if the source of conversion is not in the member `charbuf', but at `src_object'. */ - unsigned chars_at_source : 1; + bool_bf chars_at_source : 1; + + /* Nonzero if the result of conversion is in `destination' + buffer rather than in `dst_object'. */ + bool_bf raw_destination : 1; - /* Set to 1 if charbuf contains an annotation. */ - unsigned annotated : 1; + /* Set to true if charbuf contains an annotation. */ + bool_bf annotated : 1; unsigned char carryover[64]; int carryover_bytes; @@ -515,28 +545,29 @@ struct coding_system #define CODING_REQUIRE_DETECTION_MASK 0x1000 #define CODING_RESET_AT_BOL_MASK 0x2000 -/* Return 1 if the coding context CODING requires annotation +/* Return nonzero if the coding context CODING requires annotation handling. */ #define CODING_REQUIRE_ANNOTATION(coding) \ ((coding)->common_flags & CODING_ANNOTATION_MASK) -/* Return 1 if the coding context CODING prefers decoding into unibyte. */ +/* Return nonzero if the coding context CODING prefers decoding into + unibyte. */ #define CODING_FOR_UNIBYTE(coding) \ ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK) -/* Return 1 if the coding context CODING requires specific code to be +/* Return nonzero if the coding context CODING requires specific code to be attached at the tail of converted text. */ #define CODING_REQUIRE_FLUSHING(coding) \ ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK) -/* Return 1 if the coding context CODING requires code conversion on +/* Return nonzero if the coding context CODING requires code conversion on decoding. */ #define CODING_REQUIRE_DECODING(coding) \ ((coding)->dst_multibyte \ || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK) -/* Return 1 if the coding context CODING requires code conversion on +/* Return nonzero if the coding context CODING requires code conversion on encoding. The non-multibyte part of the condition is to support encoding of unibyte strings/buffers generated by string-as-unibyte or @@ -547,12 +578,12 @@ struct coding_system || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY) -/* Return 1 if the coding context CODING requires some kind of code +/* Return nonzero if the coding context CODING requires some kind of code detection. */ #define CODING_REQUIRE_DETECTION(coding) \ ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) -/* Return 1 if the coding context CODING requires code conversion on +/* Return nonzero if the coding context CODING requires code conversion on decoding or some kind of code detection. */ #define CODING_MAY_REQUIRE_DECODING(coding) \ (CODING_REQUIRE_DECODING (coding) \ @@ -576,7 +607,7 @@ struct coding_system (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \ j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \ (code) = (j1 << 8) | j2; \ - } while (0) + } while (false) #define SJIS_TO_JIS2(code) \ do { \ @@ -601,7 +632,7 @@ struct coding_system j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \ } \ (code) = (j1 << 8) | j2; \ - } while (0) + } while (false) #define JIS_TO_SJIS(code) \ @@ -616,7 +647,7 @@ struct coding_system (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \ s2 = j2 + 0x7E); \ (code) = (s1 << 8) | s2; \ - } while (0) + } while (false) #define JIS_TO_SJIS2(code) \ do { \ @@ -640,44 +671,32 @@ struct coding_system s2 = j2 + 0x7E; \ } \ (code) = (s1 << 8) | s2; \ - } while (0) + } while (false) /* Encode the file name NAME using the specified coding system for file names, if any. */ -#define ENCODE_FILE(name) \ - (! NILP (Vfile_name_coding_system) \ - ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \ - : (! NILP (Vdefault_file_name_coding_system) \ - ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ - : name)) - +#define ENCODE_FILE(NAME) encode_file_name (NAME) /* Decode the file name NAME using the specified coding system for file names, if any. */ -#define DECODE_FILE(name) \ - (! NILP (Vfile_name_coding_system) \ - ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \ - : (! NILP (Vdefault_file_name_coding_system) \ - ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ - : name)) - +#define DECODE_FILE(NAME) decode_file_name (NAME) /* Encode the string STR using the specified coding system for system functions, if any. */ #define ENCODE_SYSTEM(str) \ (! NILP (Vlocale_coding_system) \ - ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \ + ? code_convert_string_norecord (str, Vlocale_coding_system, true) \ : str) /* Decode the string STR using the specified coding system for system functions, if any. */ #define DECODE_SYSTEM(str) \ (! NILP (Vlocale_coding_system) \ - ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \ + ? code_convert_string_norecord (str, Vlocale_coding_system, false) \ : str) /* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op. */ -#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1) +#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, true) /* Extern declarations. */ extern Lisp_Object code_conversion_save (bool, bool); @@ -688,6 +707,8 @@ extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object, Lisp_Object, bool, bool, bool); extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object, bool); +extern Lisp_Object encode_file_name (Lisp_Object); +extern Lisp_Object decode_file_name (Lisp_Object); extern Lisp_Object raw_text_coding_system (Lisp_Object); extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object); extern Lisp_Object complement_process_encoding_system (Lisp_Object); @@ -721,26 +742,13 @@ extern wchar_t *to_unicode (Lisp_Object str, Lisp_Object *buf); failure modes. STR itself is not modified. */ extern Lisp_Object from_unicode (Lisp_Object str); +/* Convert WSTR to an Emacs string. */ +extern Lisp_Object from_unicode_buffer (const wchar_t* wstr); + #endif /* WINDOWSNT || CYGWIN */ /* Macros for backward compatibility. */ -#define decode_coding_region(coding, from, to) \ - decode_coding_object (coding, Fcurrent_buffer (), \ - from, CHAR_TO_BYTE (from), \ - to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) - - -#define encode_coding_region(coding, from, to) \ - encode_coding_object (coding, Fcurrent_buffer (), \ - from, CHAR_TO_BYTE (from), \ - to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) - - -#define decode_coding_string(coding, string, nocopy) \ - decode_coding_object (coding, string, 0, 0, SCHARS (string), \ - SBYTES (string), Qt) - #define encode_coding_string(coding, string, nocopy) \ (STRING_MULTIBYTE(string) ? \ (encode_coding_object (coding, string, 0, 0, SCHARS (string), \ @@ -754,7 +762,7 @@ extern Lisp_Object from_unicode (Lisp_Object str); (coding)->src_chars = (coding)->src_bytes = (bytes); \ decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \ (dst_object)); \ - } while (0) + } while (false) extern Lisp_Object preferred_coding_system (void);