/* Header for coding system handler.
- Copyright (C) 2001-2012 Free Software Foundation, Inc.
+ Copyright (C) 2001-2013 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011
National Institute of Advanced Industrial Science and Technology (AIST)
coding_arg_ccl_max
};
+enum define_coding_undecided_arg_index
+ {
+ coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
+ coding_arg_undecided_inhibit_iso_escape_detection,
+ coding_arg_undecided_prefer_utf_8,
+ coding_arg_undecided_max
+ };
+
/* Hash table for all coding systems. Keys are coding system symbols
and values are spec vectors of the corresponding coding system. A
spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
coding_attr_emacs_mule_full,
+ coding_attr_undecided_inhibit_null_byte_detection,
+ coding_attr_undecided_inhibit_iso_escape_detection,
+ coding_attr_undecided_prefer_utf_8,
+
coding_attr_last_index
};
CODING_RESULT_SUCCESS,
CODING_RESULT_INSUFFICIENT_SRC,
CODING_RESULT_INSUFFICIENT_DST,
- CODING_RESULT_INCONSISTENT_EOL,
CODING_RESULT_INVALID_SRC,
- CODING_RESULT_INTERRUPT,
- CODING_RESULT_INSUFFICIENT_MEM
+ CODING_RESULT_INTERRUPT
};
/* Macros used for the member `mode' of the struct coding_system. */
-/* If set, recover the original CR or LF of the already decoded text
- when the decoding routine encounters an inconsistent eol format. */
-#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
-
/* If set, the decoding/encoding routines treat the current data as
the last block of the whole text to be converted, and do the
appropriate finishing job. */
-#define CODING_MODE_LAST_BLOCK 0x02
+#define CODING_MODE_LAST_BLOCK 0x01
/* If set, it means that the current source text is in a buffer which
enables selective display. */
-#define CODING_MODE_SELECTIVE_DISPLAY 0x04
+#define CODING_MODE_SELECTIVE_DISPLAY 0x02
/* This flag is used by the decoding/encoding routines on the fly. If
set, it means that right-to-left text is being processed. */
-#define CODING_MODE_DIRECTION 0x08
+#define CODING_MODE_DIRECTION 0x04
-#define CODING_MODE_FIXED_DESTINATION 0x10
+#define CODING_MODE_FIXED_DESTINATION 0x08
/* If set, it means that the encoding routines produces some safe
ASCII characters (usually '?') for unsupported characters. */
-#define CODING_MODE_SAFE_ENCODING 0x20
+#define CODING_MODE_SAFE_ENCODING 0x10
/* For handling composition sequence. */
#include "composite.h"
struct composition_status cmp_status;
};
-struct ccl_spec;
+struct undecided_spec
+{
+ /* Inhibit null byte detection. 1 means always inhibit,
+ -1 means do not inhibit, 0 means rely on user variable. */
+ int inhibit_nbd;
+
+ /* Inhibit ISO escape detection. -1, 0, 1 as above. */
+ int inhibit_ied;
+
+ /* Prefer UTF-8 when the input could be other encodings. */
+ bool prefer_utf_8;
+};
enum utf_bom_type
{
struct utf_16_spec utf_16;
enum utf_bom_type utf_8_bom;
struct emacs_mule_spec emacs_mule;
+ struct undecided_spec undecided;
} spec;
int max_charset_id;
/* How may heading bytes we can skip for decoding. This is set to
-1 in setup_coding_system, and updated by detect_coding. So,
when this is equal to the byte length of the text being
- converted, we can skip the actual conversion process. */
+ converted, we can skip the actual conversion process except for
+ the eol format. */
ptrdiff_t head_ascii;
+ ptrdiff_t detected_utf8_chars;
+
+ /* Used internally in coding.c. See the comment of detect_ascii. */
+ int eol_seen;
+
/* The following members are set by encoding/decoding routine. */
ptrdiff_t produced, produced_char, consumed, consumed_char;
`charbuf', but at `src_object'. */
unsigned chars_at_source : 1;
+ /* Nonzero if the result of conversion is in `destination'
+ buffer rather than in `dst_object'. */
+ unsigned raw_destination : 1;
+
/* Set to 1 if charbuf contains an annotation. */
unsigned annotated : 1;
(code) = (s1 << 8) | s2; \
} while (0)
-/* Encode the file name NAME using the specified coding system
- for file names, if any. */
-#define ENCODE_FILE(name) \
- (! NILP (Vfile_name_coding_system) \
- ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
- : (! NILP (Vdefault_file_name_coding_system) \
- ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
- : name))
+/* Encode the file name NAME using the specified coding system for
+ file names, if any. If NAME is a unibyte string, return NAME. */
+#define ENCODE_FILE(name) \
+ (! STRING_MULTIBYTE (name) \
+ ? name \
+ : (! NILP (Vfile_name_coding_system) \
+ ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
+ : (! NILP (Vdefault_file_name_coding_system) \
+ ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
+ : name)))
/* Decode the file name NAME using the specified coding system
failure modes. STR itself is not modified. */
extern Lisp_Object from_unicode (Lisp_Object str);
+/* Convert WSTR to an Emacs string. */
+extern Lisp_Object from_unicode_buffer (const wchar_t* wstr);
+
#endif /* WINDOWSNT || CYGWIN */
/* Macros for backward compatibility. */
-#define decode_coding_region(coding, from, to) \
- decode_coding_object (coding, Fcurrent_buffer (), \
- from, CHAR_TO_BYTE (from), \
- to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
-
-
-#define encode_coding_region(coding, from, to) \
- encode_coding_object (coding, Fcurrent_buffer (), \
- from, CHAR_TO_BYTE (from), \
- to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
-
-
-#define decode_coding_string(coding, string, nocopy) \
- decode_coding_object (coding, string, 0, 0, SCHARS (string), \
- SBYTES (string), Qt)
-
#define encode_coding_string(coding, string, nocopy) \
(STRING_MULTIBYTE(string) ? \
(encode_coding_object (coding, string, 0, 0, SCHARS (string), \