/* Coding system handler (conversion, detection, etc).
Copyright (C) 2001, 2002, 2003, 2004, 2005,
- 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008
+ 2005, 2006, 2007, 2008, 2009, 2010
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
Copyright (C) 2003
#include <config.h>
#include <stdio.h>
+#include <setjmp.h>
#include "lisp.h"
#include "buffer.h"
Lisp_Object Qbig, Qlittle;
Lisp_Object Qcoding_system_history;
Lisp_Object Qvalid_codes;
-Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
+Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
Lisp_Object QCdecode_translation_table, QCencode_translation_table;
Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
Lisp_Object QCascii_compatible_p;
/* Flag to inhibit ISO2022 escape sequence detection. */
int inhibit_iso_escape_detection;
+/* Flag to inhibit detection of binary files through null bytes. */
+int inhibit_null_byte_detection;
+
/* Flag to make buffer-file-coding-system inherit from process-coding. */
int inherit_process_coding_system;
reg)))
-#define CODING_ISO_REQUEST(coding, charset_id) \
- ((charset_id <= (coding)->max_charset_id \
- ? (coding)->safe_charsets[charset_id] \
+#define CODING_ISO_REQUEST(coding, charset_id) \
+ (((charset_id) <= (coding)->max_charset_id \
+ ? ((coding)->safe_charsets[charset_id] != 255 \
+ ? (coding)->safe_charsets[charset_id] \
+ : -1) \
: -1))
((coding)->spec.iso_2022.bol)
#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
+#define CODING_ISO_CMP_STATUS(coding) \
+ (&(coding)->spec.iso_2022.cmp_status)
+#define CODING_ISO_EXTSEGMENT_LEN(coding) \
+ ((coding)->spec.iso_2022.ctext_extended_segment_len)
+#define CODING_ISO_EMBEDDED_UTF_8(coding) \
+ ((coding)->spec.iso_2022.embedded_utf_8)
/* Control characters of ISO2022. */
/* code */ /* function */
consumed_chars++; \
} while (0)
+/* Safely get two bytes from the source text pointed by SRC which ends
+ at SRC_END, and set C1 and C2 to those bytes while skipping the
+ heading multibyte characters. If there are not enough bytes in the
+ source, it jumps to `no_more_source'. If multibytep is nonzero and
+ a multibyte character is found for C2, set C2 to the negative value
+ of the character code. The caller should declare and set these
+ variables appropriately in advance:
+ src, src_end, multibytep
+ It is intended that this macro is used in detect_coding_utf_16. */
+
+#define TWO_MORE_BYTES(c1, c2) \
+ do { \
+ do { \
+ if (src == src_end) \
+ goto no_more_source; \
+ c1 = *src++; \
+ if (multibytep && (c1 & 0x80)) \
+ { \
+ if ((c1 & 0xFE) == 0xC0) \
+ c1 = ((c1 & 1) << 6) | *src++; \
+ else \
+ { \
+ src += BYTES_BY_CHAR_HEAD (c1) - 1; \
+ c1 = -1; \
+ } \
+ } \
+ } while (c1 < 0); \
+ if (src == src_end) \
+ goto no_more_source; \
+ c2 = *src++; \
+ if (multibytep && (c2 & 0x80)) \
+ { \
+ if ((c2 & 0xFE) == 0xC0) \
+ c2 = ((c2 & 1) << 6) | *src++; \
+ else \
+ c2 = -1; \
+ } \
+ } while (0)
+
#define ONE_MORE_BYTE_NO_CHECK(c) \
do { \
static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
static void decode_eol P_ ((struct coding_system *));
static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
-static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
- int, int *, int *));
+static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
-static INLINE void produce_composition P_ ((struct coding_system *, int *,
- EMACS_INT));
static INLINE void produce_charset P_ ((struct coding_system *, int *,
EMACS_INT));
static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
case CODING_RESULT_INSUFFICIENT_MEM:
Vlast_code_conversion_error = Qinsufficient_memory;
break;
+ case CODING_RESULT_INSUFFICIENT_DST:
+ /* Don't record this error in Vlast_code_conversion_error
+ because it happens just temporarily and is resolved when the
+ whole conversion is finished. */
+ break;
+ case CODING_RESULT_SUCCESS:
+ break;
default:
Vlast_code_conversion_error = intern ("Unknown error");
}
}
+/* This wrapper macro is used to preserve validity of pointers into
+ buffer text across calls to decode_char, which could cause
+ relocation of buffers if it loads a charset map, because loading a
+ charset map allocates large structures. */
#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
do { \
charset_map_loaded = 0; \
}
else
coding_alloc_by_realloc (coding, nbytes);
- record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding_set_destination (coding);
dst = coding->destination + offset;
return dst;
/** Macros for annotations. */
-/* Maximum length of annotation data (sum of annotations for
- composition and charset). */
-#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
-
/* An annotation data is stored in the array coding->charbuf in this
format:
[ -LENGTH ANNOTATION_MASK NCHARS ... ]
In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
follows:
- ... METHOD [ COMPOSITION-COMPONENTS ... ]
+ ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
+
+ NBYTES is the number of bytes specified in the header part of
+ old-style emacs-mule encoding, or 0 for the other kind of
+ composition.
+
METHOD is one of enum composition_method.
+
Optionnal COMPOSITION-COMPONENTS are characters and composition
rules.
In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
- follows. */
+ follows.
+
+ If ANNOTATION_MASK is 0, this annotation is just a space holder to
+ recover from an invalid annotation, and should be skipped by
+ produce_annotation. */
+
+/* Maximum length of the header of annotation data. */
+#define MAX_ANNOTATION_LENGTH 5
#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
do { \
coding->annotated = 1; \
} while (0);
-#define ADD_COMPOSITION_DATA(buf, nchars, method) \
+#define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \
do { \
- ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
+ ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
+ *buf++ = nbytes; \
*buf++ = method; \
} while (0)
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
int *charbuf_end = coding->charbuf + coding->charbuf_size;
- int consumed_chars = 0, consumed_chars_base;
+ int consumed_chars = 0, consumed_chars_base = 0;
int multibytep = coding->src_multibyte;
enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
Lisp_Object attr, charset_list;
- int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+ int eol_crlf =
+ !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
int byte_after_cr = -1;
CODING_GET_INFO (coding, attr, charset_list);
src = src_base;
else
{
- ONE_MORE_BYTE (c2);
+ ONE_MORE_BYTE (c2);
if (! UTF_8_EXTRA_OCTET_P (c2))
src = src_base;
else
{
- ONE_MORE_BYTE (c3);
+ ONE_MORE_BYTE (c3);
if (! UTF_8_EXTRA_OCTET_P (c3))
src = src_base;
else
consumed_chars_base = consumed_chars;
if (charbuf >= charbuf_end)
- break;
+ {
+ if (byte_after_cr >= 0)
+ src_base--;
+ break;
+ }
if (byte_after_cr >= 0)
c1 = byte_after_cr, byte_after_cr = -1;
return 0;
}
- ONE_MORE_BYTE (c1);
- ONE_MORE_BYTE (c2);
+ TWO_MORE_BYTES (c1, c2);
if ((c1 == 0xFF) && (c2 == 0xFE))
{
detect_info->found |= (CATEGORY_MASK_UTF_16_LE
| CATEGORY_MASK_UTF_16_BE_NOSIG
| CATEGORY_MASK_UTF_16_LE_NOSIG);
}
+ else if (c2 < 0)
+ {
+ detect_info->rejected |= CATEGORY_MASK_UTF_16;
+ return 0;
+ }
else
{
/* We check the dispersion of Eth and Oth bytes where E is even and
e[c1] = 1;
o[c2] = 1;
- detect_info->rejected
- |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
+ detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
+ |CATEGORY_MASK_UTF_16_BE
+ | CATEGORY_MASK_UTF_16_LE);
- while (1)
+ while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
+ != CATEGORY_MASK_UTF_16)
{
- ONE_MORE_BYTE (c1);
- ONE_MORE_BYTE (c2);
+ TWO_MORE_BYTES (c1, c2);
+ if (c2 < 0)
+ break;
if (! e[c1])
{
e[c1] = 1;
e_num++;
if (e_num >= 128)
- break;
+ detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
}
if (! o[c2])
{
- o[c1] = 1;
+ o[c2] = 1;
o_num++;
if (o_num >= 128)
- break;
+ detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
}
}
- detect_info->rejected |= CATEGORY_MASK_UTF_16;
return 0;
}
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
- int *charbuf_end = coding->charbuf + coding->charbuf_size;
- int consumed_chars = 0, consumed_chars_base;
+ /* We may produces at most 3 chars in one loop. */
+ int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
+ int consumed_chars = 0, consumed_chars_base = 0;
int multibytep = coding->src_multibyte;
enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
int surrogate = CODING_UTF_16_SURROGATE (coding);
Lisp_Object attr, charset_list;
- int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+ int eol_crlf =
+ !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
int byte_after_cr1 = -1, byte_after_cr2 = -1;
CODING_GET_INFO (coding, attr, charset_list);
src_base = src;
consumed_chars_base = consumed_chars;
- if (charbuf + 2 >= charbuf_end)
- break;
+ if (charbuf >= charbuf_end)
+ {
+ if (byte_after_cr1 >= 0)
+ src_base -= 2;
+ break;
+ }
if (byte_after_cr1 >= 0)
c1 = byte_after_cr1, byte_after_cr1 = -1;
{
ASSURE_DESTINATION (safe_room);
c = *charbuf++;
- if (c >= MAX_UNICODE_CHAR)
+ if (c > MAX_UNICODE_CHAR)
c = coding->default_char;
if (c < 0x10000)
Next, character composition data are represented by the byte
sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
where,
- METHOD is 0xF0 plus one of composition method (enum
+ METHOD is 0xF2 plus one of composition method (enum
composition_method),
BYTES is 0xA0 plus a byte length of this composition data,
- CHARS is 0x20 plus a number of characters composed by this
+ CHARS is 0xA0 plus a number of characters composed by this
data,
COMPONENTs are characters of multibye form or composition
char emacs_mule_bytes[256];
+
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+ Check if a text is encoded in `emacs-mule'. If it is, return 1,
+ else return 0. */
+
+static int
+detect_coding_emacs_mule (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
+{
+ const unsigned char *src = coding->source, *src_base;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int c;
+ int found = 0;
+
+ detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
+
+ while (1)
+ {
+ src_base = src;
+ ONE_MORE_BYTE (c);
+ if (c < 0)
+ continue;
+ if (c == 0x80)
+ {
+ /* Perhaps the start of composite character. We simply skip
+ it because analyzing it is too heavy for detecting. But,
+ at least, we check that the composite character
+ constitutes of more than 4 bytes. */
+ const unsigned char *src_base;
+
+ repeat:
+ src_base = src;
+ do
+ {
+ ONE_MORE_BYTE (c);
+ }
+ while (c >= 0xA0);
+
+ if (src - src_base <= 4)
+ break;
+ found = CATEGORY_MASK_EMACS_MULE;
+ if (c == 0x80)
+ goto repeat;
+ }
+
+ if (c < 0x80)
+ {
+ if (c < 0x20
+ && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
+ break;
+ }
+ else
+ {
+ int more_bytes = emacs_mule_bytes[*src_base] - 1;
+
+ while (more_bytes > 0)
+ {
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ {
+ src--; /* Unread the last byte. */
+ break;
+ }
+ more_bytes--;
+ }
+ if (more_bytes != 0)
+ break;
+ found = CATEGORY_MASK_EMACS_MULE;
+ }
+ }
+ detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
+ return 0;
+
+ no_more_source:
+ if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
+ return 0;
+ }
+ detect_info->found |= found;
+ return 1;
+}
+
+
+/* Parse emacs-mule multibyte sequence at SRC and return the decoded
+ character. If CMP_STATUS indicates that we must expect MSEQ or
+ RULE described above, decode it and return the negative value of
+ the decoded character or rule. If an invalid byte is found, return
+ -1. If SRC is too short, return -2. */
+
int
-emacs_mule_char (coding, src, nbytes, nchars, id)
+emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
struct coding_system *coding;
const unsigned char *src;
int *nbytes, *nchars, *id;
+ struct composition_status *cmp_status;
{
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base = src;
unsigned code;
int c;
int consumed_chars = 0;
+ int mseq_found = 0;
ONE_MORE_BYTE (c);
if (c < 0)
{
if (c >= 0xA0)
{
- /* Old style component character of a composition. */
- if (c == 0xA0)
+ if (cmp_status->state != COMPOSING_NO
+ && cmp_status->old_form)
{
- ONE_MORE_BYTE (c);
- c -= 0x80;
+ if (cmp_status->state == COMPOSING_CHAR)
+ {
+ if (c == 0xA0)
+ {
+ ONE_MORE_BYTE (c);
+ c -= 0x80;
+ if (c < 0)
+ goto invalid_code;
+ }
+ else
+ c -= 0x20;
+ mseq_found = 1;
+ }
+ else
+ {
+ *nbytes = src - src_base;
+ *nchars = consumed_chars;
+ return -c;
+ }
}
else
- c -= 0x20;
+ goto invalid_code;
}
switch (emacs_mule_bytes[c])
default:
abort ();
}
- c = DECODE_CHAR (charset, code);
+ CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, code, c);
if (c < 0)
goto invalid_code;
}
*nchars = consumed_chars;
if (id)
*id = charset->id;
- return c;
+ return (mseq_found ? -c : c);
no_more_source:
return -2;
}
-/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
- Check if a text is encoded in `emacs-mule'. If it is, return 1,
- else return 0. */
+/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
-static int
-detect_coding_emacs_mule (coding, detect_info)
- struct coding_system *coding;
- struct coding_detection_info *detect_info;
-{
- const unsigned char *src = coding->source, *src_base;
- const unsigned char *src_end = coding->source + coding->src_bytes;
- int multibytep = coding->src_multibyte;
- int consumed_chars = 0;
- int c;
- int found = 0;
+/* Handle these composition sequence ('|': the end of header elements,
+ BYTES and CHARS >= 0xA0):
- detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
- /* A coding system of this category is always ASCII compatible. */
- src += coding->head_ascii;
+ (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
+ (2) altchar composition: 0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
+ (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
- while (1)
- {
- src_base = src;
- ONE_MORE_BYTE (c);
- if (c < 0)
- continue;
- if (c == 0x80)
- {
- /* Perhaps the start of composite character. We simple skip
- it because analyzing it is too heavy for detecting. But,
- at least, we check that the composite character
- constitutes of more than 4 bytes. */
- const unsigned char *src_base;
+ and these old form:
+
+ (4) relative composition: 0x80 | MSEQ ... MSEQ
+ (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
- repeat:
- src_base = src;
- do
- {
- ONE_MORE_BYTE (c);
- }
- while (c >= 0xA0);
+ When the starter 0x80 and the following header elements are found,
+ this annotation header is produced.
- if (src - src_base <= 4)
- break;
- found = CATEGORY_MASK_EMACS_MULE;
- if (c == 0x80)
- goto repeat;
- }
+ [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
- if (c < 0x80)
- {
- if (c < 0x20
- && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
- break;
- }
- else
- {
- int more_bytes = emacs_mule_bytes[*src_base] - 1;
+ NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
+ NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
- while (more_bytes > 0)
- {
- ONE_MORE_BYTE (c);
- if (c < 0xA0)
- {
- src--; /* Unread the last byte. */
- break;
- }
- more_bytes--;
- }
- if (more_bytes != 0)
- break;
- found = CATEGORY_MASK_EMACS_MULE;
- }
- }
- detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
- return 0;
+ Then, upon reading the following elements, these codes are produced
+ until the composition end is found:
- no_more_source:
- if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
- {
- detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
- return 0;
- }
- detect_info->found |= found;
- return 1;
-}
+ (1) CHAR ... CHAR
+ (2) ALT ... ALT CHAR ... CHAR
+ (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
+ (4) CHAR ... CHAR
+ (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
+ When the composition end is found, LENGTH and NCHARS in the
+ annotation header is updated as below:
-/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
+ (1) LENGTH: unchanged, NCHARS: unchanged
+ (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
+ (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
+ (4) LENGTH: unchanged, NCHARS: number of CHARs
+ (5) LENGTH: unchanged, NCHARS: number of CHARs
-/* Decode a character represented as a component of composition
- sequence of Emacs 20/21 style at SRC. Set C to that character and
- update SRC to the head of next character (or an encoded composition
- rule). If SRC doesn't points a composition component, set C to -1.
- If SRC points an invalid byte sequence, global exit by a return
- value 0. */
-
-#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
- do \
- { \
- int c; \
- int nbytes, nchars; \
- \
- if (src == src_end) \
- break; \
- c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
- if (c < 0) \
- { \
- if (c == -2) \
- break; \
- goto invalid_code; \
- } \
- *buf++ = c; \
- src += nbytes; \
- consumed_chars += nchars; \
- } \
- while (0)
-
-
-/* Decode a composition rule represented as a component of composition
- sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
- and increment BUF. If SRC points an invalid byte sequence, set C
- to -1. */
-
-#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
+ If an error is found while composing, the annotation header is
+ changed to the original composition header (plus filler -1s) as
+ below:
+
+ (1),(2),(3) [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
+ (5) [ 0x80 0xFF -1 -1- -1 ]
+
+ and the sequence [ -2 DECODED-RULE ] is changed to the original
+ byte sequence as below:
+ o the original byte sequence is B: [ B -1 ]
+ o the original byte sequence is B1 B2: [ B1 B2 ]
+
+ Most of the routines are implemented by macros because many
+ variables and labels in the caller decode_coding_emacs_mule must be
+ accessible, and they are usually called just once (thus doesn't
+ increase the size of compiled object). */
+
+/* Decode a composition rule represented by C as a component of
+ composition sequence of Emacs 20 style. Set RULE to the decoded
+ rule. */
+
+#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \
do { \
- int c, gref, nref; \
- \
- if (src >= src_end) \
- goto invalid_code; \
- ONE_MORE_BYTE_NO_CHECK (c); \
+ int gref, nref; \
+ \
c -= 0xA0; \
if (c < 0 || c >= 81) \
goto invalid_code; \
- \
gref = c / 9, nref = c % 9; \
- *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
+ if (gref == 4) gref = 10; \
+ if (nref == 4) nref = 10; \
+ rule = COMPOSITION_ENCODE_RULE (gref, nref); \
} while (0)
-/* Decode a composition rule represented as a component of composition
- sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
- and increment BUF. If SRC points an invalid byte sequence, set C
- to -1. */
+/* Decode a composition rule represented by C and the following byte
+ at SRC as a component of composition sequence of Emacs 21 style.
+ Set RULE to the decoded rule. */
-#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
+#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \
do { \
int gref, nref; \
- \
- if (src + 1>= src_end) \
+ \
+ gref = c - 0x20; \
+ if (gref < 0 || gref >= 81) \
goto invalid_code; \
- ONE_MORE_BYTE_NO_CHECK (gref); \
- gref -= 0x20; \
- ONE_MORE_BYTE_NO_CHECK (nref); \
- nref -= 0x20; \
- if (gref < 0 || gref >= 81 \
- || nref < 0 || nref >= 81) \
+ ONE_MORE_BYTE (c); \
+ nref = c - 0x20; \
+ if (nref < 0 || nref >= 81) \
goto invalid_code; \
- *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
+ rule = COMPOSITION_ENCODE_RULE (gref, nref); \
} while (0)
-#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
+/* Start of Emacs 21 style format. The first three bytes at SRC are
+ (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
+ byte length of this composition information, CHARS is the number of
+ characters composed by this composition. */
+
+#define DECODE_EMACS_MULE_21_COMPOSITION() \
do { \
- /* Emacs 21 style format. The first three bytes at SRC are \
- (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
- the byte length of this composition information, CHARS is the \
- number of characters composed by this composition. */ \
enum composition_method method = c - 0xF2; \
int *charbuf_base = charbuf; \
- int consumed_chars_limit; \
int nbytes, nchars; \
- \
+ \
ONE_MORE_BYTE (c); \
if (c < 0) \
goto invalid_code; \
nbytes = c - 0xA0; \
- if (nbytes < 3) \
+ if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \
goto invalid_code; \
ONE_MORE_BYTE (c); \
- if (c < 0) \
- goto invalid_code; \
nchars = c - 0xA0; \
- ADD_COMPOSITION_DATA (charbuf, nchars, method); \
- consumed_chars_limit = consumed_chars_base + nbytes; \
- if (method != COMPOSITION_RELATIVE) \
- { \
- int i = 0; \
- while (consumed_chars < consumed_chars_limit) \
- { \
- if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
- DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
- else \
- DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
- i++; \
- } \
- if (consumed_chars < consumed_chars_limit) \
- goto invalid_code; \
- charbuf_base[0] -= i; \
- } \
+ if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \
+ goto invalid_code; \
+ cmp_status->old_form = 0; \
+ cmp_status->method = method; \
+ if (method == COMPOSITION_RELATIVE) \
+ cmp_status->state = COMPOSING_CHAR; \
+ else \
+ cmp_status->state = COMPOSING_COMPONENT_CHAR; \
+ cmp_status->length = MAX_ANNOTATION_LENGTH; \
+ cmp_status->nchars = nchars; \
+ cmp_status->ncomps = nbytes - 4; \
+ ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \
} while (0)
-#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
- do { \
- /* Emacs 20 style format for relative composition. */ \
- /* Store multibyte form of characters to be composed. */ \
- enum composition_method method = COMPOSITION_RELATIVE; \
- int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
- int *buf = components; \
- int i, j; \
- \
- src = src_base; \
- ONE_MORE_BYTE (c); /* skip 0x80 */ \
- for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
- DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
- if (i < 2) \
- goto invalid_code; \
- ADD_COMPOSITION_DATA (charbuf, i, method); \
- for (j = 0; j < i; j++) \
- *charbuf++ = components[j]; \
+/* Start of Emacs 20 style format for relative composition. */
+
+#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \
+ do { \
+ cmp_status->old_form = 1; \
+ cmp_status->method = COMPOSITION_RELATIVE; \
+ cmp_status->state = COMPOSING_CHAR; \
+ cmp_status->length = MAX_ANNOTATION_LENGTH; \
+ cmp_status->nchars = cmp_status->ncomps = 0; \
+ ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
} while (0)
-#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
+/* Start of Emacs 20 style format for rule-base composition. */
+
+#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \
do { \
- /* Emacs 20 style format for rule-base composition. */ \
- /* Store multibyte form of characters to be composed. */ \
- enum composition_method method = COMPOSITION_WITH_RULE; \
- int *charbuf_base = charbuf; \
- int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
- int *buf = components; \
- int i, j; \
+ cmp_status->old_form = 1; \
+ cmp_status->method = COMPOSITION_WITH_RULE; \
+ cmp_status->state = COMPOSING_CHAR; \
+ cmp_status->length = MAX_ANNOTATION_LENGTH; \
+ cmp_status->nchars = cmp_status->ncomps = 0; \
+ ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
+ } while (0)
+
+
+#define DECODE_EMACS_MULE_COMPOSITION_START() \
+ do { \
+ const unsigned char *current_src = src; \
+ \
+ ONE_MORE_BYTE (c); \
+ if (c < 0) \
+ goto invalid_code; \
+ if (c - 0xF2 >= COMPOSITION_RELATIVE \
+ && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \
+ DECODE_EMACS_MULE_21_COMPOSITION (); \
+ else if (c < 0xA0) \
+ goto invalid_code; \
+ else if (c < 0xC0) \
+ { \
+ DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \
+ /* Re-read C as a composition component. */ \
+ src = current_src; \
+ } \
+ else if (c == 0xFF) \
+ DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \
+ else \
+ goto invalid_code; \
+ } while (0)
+
+#define EMACS_MULE_COMPOSITION_END() \
+ do { \
+ int idx = - cmp_status->length; \
\
- DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
- for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \
- { \
- if (*src < 0xA0) \
- break; \
- DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
- DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
- } \
- if (i <= 1 || (buf - components) % 2 == 0) \
- goto invalid_code; \
- if (charbuf + i + (i / 2) + 1 >= charbuf_end) \
- goto no_more_source; \
- ADD_COMPOSITION_DATA (charbuf, i, method); \
- i = i * 2 - 1; \
- for (j = 0; j < i; j++) \
- *charbuf++ = components[j]; \
- charbuf_base[0] -= i; \
- for (j = 0; j < i; j += 2) \
- *charbuf++ = components[j]; \
+ if (cmp_status->old_form) \
+ charbuf[idx + 2] = cmp_status->nchars; \
+ else if (cmp_status->method > COMPOSITION_RELATIVE) \
+ charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \
+ cmp_status->state = COMPOSING_NO; \
+ } while (0)
+
+
+static int
+emacs_mule_finish_composition (charbuf, cmp_status)
+ int *charbuf;
+ struct composition_status *cmp_status;
+{
+ int idx = - cmp_status->length;
+ int new_chars;
+
+ if (cmp_status->old_form && cmp_status->nchars > 0)
+ {
+ charbuf[idx + 2] = cmp_status->nchars;
+ new_chars = 0;
+ if (cmp_status->method == COMPOSITION_WITH_RULE
+ && cmp_status->state == COMPOSING_CHAR)
+ {
+ /* The last rule was invalid. */
+ int rule = charbuf[-1] + 0xA0;
+
+ charbuf[-2] = BYTE8_TO_CHAR (rule);
+ charbuf[-1] = -1;
+ new_chars = 1;
+ }
+ }
+ else
+ {
+ charbuf[idx++] = BYTE8_TO_CHAR (0x80);
+
+ if (cmp_status->method == COMPOSITION_WITH_RULE)
+ {
+ charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
+ charbuf[idx++] = -3;
+ charbuf[idx++] = 0;
+ new_chars = 1;
+ }
+ else
+ {
+ int nchars = charbuf[idx + 1] + 0xA0;
+ int nbytes = charbuf[idx + 2] + 0xA0;
+
+ charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
+ charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
+ charbuf[idx++] = BYTE8_TO_CHAR (nchars);
+ charbuf[idx++] = -1;
+ new_chars = 4;
+ }
+ }
+ cmp_status->state = COMPOSING_NO;
+ return new_chars;
+}
+
+#define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \
+ do { \
+ if (cmp_status->state != COMPOSING_NO) \
+ char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
} while (0)
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
+ /* We may produce two annocations (charset and composition) in one
+ loop and one more charset annocation at the end. */
int *charbuf_end
- = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
Lisp_Object attrs, charset_list;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
- int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+ int eol_crlf =
+ !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
int byte_after_cr = -1;
+ struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
CODING_GET_INFO (coding, attrs, charset_list);
+ if (cmp_status->state != COMPOSING_NO)
+ {
+ int i;
+
+ for (i = 0; i < cmp_status->length; i++)
+ *charbuf++ = cmp_status->carryover[i];
+ coding->annotated = 1;
+ }
+
while (1)
{
- int c;
+ int c, id;
src_base = src;
consumed_chars_base = consumed_chars;
if (charbuf >= charbuf_end)
- break;
+ {
+ if (byte_after_cr >= 0)
+ src_base--;
+ break;
+ }
if (byte_after_cr >= 0)
c = byte_after_cr, byte_after_cr = -1;
else
ONE_MORE_BYTE (c);
- if (c < 0)
+
+ if (c < 0 || c == 0x80)
{
- *charbuf++ = -c;
- char_offset++;
+ EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
+ if (c < 0)
+ {
+ *charbuf++ = -c;
+ char_offset++;
+ }
+ else
+ DECODE_EMACS_MULE_COMPOSITION_START ();
+ continue;
}
- else if (c < 0x80)
+
+ if (c < 0x80)
{
if (eol_crlf && c == '\r')
ONE_MORE_BYTE (byte_after_cr);
- *charbuf++ = c;
- char_offset++;
- }
- else if (c == 0x80)
- {
- ONE_MORE_BYTE (c);
- if (c < 0)
- goto invalid_code;
- if (c - 0xF2 >= COMPOSITION_RELATIVE
- && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
- DECODE_EMACS_MULE_21_COMPOSITION (c);
- else if (c < 0xC0)
- DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
- else if (c == 0xFF)
- DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
- else
- goto invalid_code;
+ id = charset_ascii;
+ if (cmp_status->state != COMPOSING_NO)
+ {
+ if (cmp_status->old_form)
+ EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
+ else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
+ cmp_status->ncomps--;
+ }
}
- else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
+ else
{
- int nbytes, nchars;
- int id;
-
- src = src_base;
- consumed_chars = consumed_chars_base;
- c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
+ int nchars, nbytes;
+ /* emacs_mule_char can load a charset map from a file, which
+ allocates a large structure and might cause buffer text
+ to be relocated as result. Thus, we need to remember the
+ original pointer to buffer text, and fixup all related
+ pointers after the call. */
+ const unsigned char *orig = coding->source;
+ EMACS_INT offset;
+
+ c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
+ cmp_status);
+ offset = coding->source - orig;
+ if (offset)
+ {
+ src += offset;
+ src_base += offset;
+ src_end += offset;
+ }
if (c < 0)
{
+ if (c == -1)
+ goto invalid_code;
if (c == -2)
break;
- goto invalid_code;
}
+ src = src_base + nbytes;
+ consumed_chars = consumed_chars_base + nchars;
+ if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
+ cmp_status->ncomps -= nchars;
+ }
+
+ /* Now if C >= 0, we found a normally encoded characer, if C <
+ 0, we found an old-style composition component character or
+ rule. */
+
+ if (cmp_status->state == COMPOSING_NO)
+ {
if (last_id != id)
{
if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
+ last_id);
last_id = id;
last_offset = char_offset;
}
*charbuf++ = c;
- src += nbytes;
- consumed_chars += nchars;
char_offset++;
}
- else
- goto invalid_code;
+ else if (cmp_status->state == COMPOSING_CHAR)
+ {
+ if (cmp_status->old_form)
+ {
+ if (c >= 0)
+ {
+ EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
+ *charbuf++ = c;
+ char_offset++;
+ }
+ else
+ {
+ *charbuf++ = -c;
+ cmp_status->nchars++;
+ cmp_status->length++;
+ if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
+ EMACS_MULE_COMPOSITION_END ();
+ else if (cmp_status->method == COMPOSITION_WITH_RULE)
+ cmp_status->state = COMPOSING_RULE;
+ }
+ }
+ else
+ {
+ *charbuf++ = c;
+ cmp_status->length++;
+ cmp_status->nchars--;
+ if (cmp_status->nchars == 0)
+ EMACS_MULE_COMPOSITION_END ();
+ }
+ }
+ else if (cmp_status->state == COMPOSING_RULE)
+ {
+ int rule;
+
+ if (c >= 0)
+ {
+ EMACS_MULE_COMPOSITION_END ();
+ *charbuf++ = c;
+ char_offset++;
+ }
+ else
+ {
+ c = -c;
+ DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
+ if (rule < 0)
+ goto invalid_code;
+ *charbuf++ = -2;
+ *charbuf++ = rule;
+ cmp_status->length += 2;
+ cmp_status->state = COMPOSING_CHAR;
+ }
+ }
+ else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
+ {
+ *charbuf++ = c;
+ cmp_status->length++;
+ if (cmp_status->ncomps == 0)
+ cmp_status->state = COMPOSING_CHAR;
+ else if (cmp_status->ncomps > 0)
+ {
+ if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
+ cmp_status->state = COMPOSING_COMPONENT_RULE;
+ }
+ else
+ EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
+ }
+ else /* COMPOSING_COMPONENT_RULE */
+ {
+ int rule;
+
+ DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
+ if (rule < 0)
+ goto invalid_code;
+ *charbuf++ = -2;
+ *charbuf++ = rule;
+ cmp_status->length += 2;
+ cmp_status->ncomps--;
+ if (cmp_status->ncomps > 0)
+ cmp_status->state = COMPOSING_COMPONENT_CHAR;
+ else
+ EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
+ }
+ continue;
+
+ retry:
+ src = src_base;
+ consumed_chars = consumed_chars_base;
continue;
invalid_code:
+ EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
}
no_more_source:
+ if (cmp_status->state != COMPOSING_NO)
+ {
+ if (coding->mode & CODING_MODE_LAST_BLOCK)
+ EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
+ else
+ {
+ int i;
+
+ charbuf -= cmp_status->length;
+ for (i = 0; i < cmp_status->length; i++)
+ cmp_status->carryover[i] = charbuf[i];
+ }
+ }
if (last_id != charset_ascii)
ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
coding->consumed_char += consumed_chars_base;
if (preferred_charset_id >= 0)
{
charset = CHARSET_FROM_ID (preferred_charset_id);
- if (! CHAR_CHARSET_P (c, charset))
- charset = char_charset (c, charset_list, NULL);
+ if (CHAR_CHARSET_P (c, charset))
+ code = ENCODE_CHAR (charset, c);
+ else
+ charset = char_charset (c, charset_list, &code);
}
else
charset = char_charset (c, charset_list, &code);
#define SAFE_CHARSET_P(coding, id) \
((id) <= (coding)->max_charset_id \
- && (coding)->safe_charsets[id] >= 0)
+ && (coding)->safe_charsets[id] != 255)
#define SHIFT_OUT_OK(category) \
max_charset_id = id;
}
- safe_charsets = Fmake_string (make_number (max_charset_id + 1),
- make_number (255));
+ safe_charsets = make_uninit_string (max_charset_id + 1);
+ memset (SDATA (safe_charsets), 255, max_charset_id + 1);
request = AREF (attrs, coding_attr_iso_request);
reg_usage = AREF (attrs, coding_attr_iso_usage);
reg94 = XINT (XCAR (reg_usage));
int i;
int rejected = 0;
int found = 0;
+ int composition_count = -1;
detect_info->checked |= CATEGORY_MASK_ISO;
continue;
attrs = CODING_ID_ATTRS (this->id);
if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
- && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
+ && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
setup_iso_safe_charsets (attrs);
val = CODING_ATTR_SAFE_CHARSETS (attrs);
this->max_charset_id = SCHARS (val) - 1;
- this->safe_charsets = (char *) SDATA (val);
+ this->safe_charsets = SDATA (val);
}
/* A coding system of this category is always ASCII compatible. */
rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
break;
}
+ else if (c == '1')
+ {
+ /* End of composition. */
+ if (composition_count < 0
+ || composition_count > MAX_COMPOSITION_COMPONENTS)
+ /* Invalid */
+ break;
+ composition_count = -1;
+ found |= CATEGORY_MASK_ISO;
+ }
else if (c >= '0' && c <= '4')
{
/* ESC <Fp> for start/end composition. */
- found |= CATEGORY_MASK_ISO;
+ composition_count = 0;
break;
}
else
continue;
if (c < 0x80)
{
+ if (composition_count >= 0)
+ composition_count++;
single_shifting = 0;
break;
}
int i = 1;
while (src < src_end)
{
+ src_base = src;
ONE_MORE_BYTE (c);
if (c < 0xA0)
- break;
+ {
+ src = src_base;
+ break;
+ }
i++;
}
if (i & 1 && src < src_end)
- rejected |= CATEGORY_MASK_ISO_8_2;
+ {
+ rejected |= CATEGORY_MASK_ISO_8_2;
+ if (composition_count >= 0)
+ composition_count += i;
+ }
else
- found |= CATEGORY_MASK_ISO_8_2;
+ {
+ found |= CATEGORY_MASK_ISO_8_2;
+ if (composition_count >= 0)
+ composition_count += i / 2;
+ }
}
break;
}
} while (0)
-#define MAYBE_FINISH_COMPOSITION() \
- do { \
- int i; \
- if (composition_state == COMPOSING_NO) \
- break; \
- /* It is assured that we have enough room for producing \
- characters stored in the table `components'. */ \
- if (charbuf + component_idx > charbuf_end) \
- goto no_more_source; \
- composition_state = COMPOSING_NO; \
- if (method == COMPOSITION_RELATIVE \
- || method == COMPOSITION_WITH_ALTCHARS) \
- { \
- for (i = 0; i < component_idx; i++) \
- *charbuf++ = components[i]; \
- char_offset += component_idx; \
- } \
- else \
- { \
- for (i = 0; i < component_idx; i += 2) \
- *charbuf++ = components[i]; \
- char_offset += (component_idx / 2) + 1; \
- } \
- } while (0)
+/* Handle these composition sequence (ALT: alternate char):
+ (1) relative composition: ESC 0 CHAR ... ESC 1
+ (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
+ (3) altchar composition: ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
+ (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
-/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
- ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
- ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
- ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
- ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
- */
+ When the start sequence (ESC 0/2/3/4) is found, this annotation
+ header is produced.
+
+ [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
+
+ Then, upon reading CHAR or RULE (one or two bytes), these codes are
+ produced until the end sequence (ESC 1) is found:
+
+ (1) CHAR ... CHAR
+ (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
+ (3) ALT ... ALT -1 -1 CHAR ... CHAR
+ (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
+
+ When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
+ annotation header is updated as below:
-#define DECODE_COMPOSITION_START(c1) \
+ (1) LENGTH: unchanged, NCHARS: number of CHARs
+ (2) LENGTH: unchanged, NCHARS: number of CHARs
+ (3) LENGTH: += number of ALTs + 2, NCHARS: number of CHARs
+ (4) LENGTH: += number of ALTs * 3, NCHARS: number of CHARs
+
+ If an error is found while composing, the annotation header is
+ changed to:
+
+ [ ESC '0'/'2'/'3'/'4' -2 0 ]
+
+ and the sequence [ -2 DECODED-RULE ] is changed to the original
+ byte sequence as below:
+ o the original byte sequence is B: [ B -1 ]
+ o the original byte sequence is B1 B2: [ B1 B2 ]
+ and the sequence [ -1 -1 ] is changed to the original byte
+ sequence:
+ [ ESC '0' ]
+*/
+
+/* Decode a composition rule C1 and maybe one more byte from the
+ source, and set RULE to the encoded composition rule, NBYTES to the
+ length of the composition rule. If the rule is invalid, set RULE
+ to some negative value. */
+
+#define DECODE_COMPOSITION_RULE(rule, nbytes) \
do { \
- if (c1 == '0' \
- && composition_state == COMPOSING_COMPONENT_RULE) \
+ rule = c1 - 32; \
+ if (rule < 0) \
+ break; \
+ if (rule < 81) /* old format (before ver.21) */ \
{ \
- component_len = component_idx; \
- composition_state = COMPOSING_CHAR; \
+ int gref = (rule) / 9; \
+ int nref = (rule) % 9; \
+ if (gref == 4) gref = 10; \
+ if (nref == 4) nref = 10; \
+ rule = COMPOSITION_ENCODE_RULE (gref, nref); \
+ nbytes = 1; \
} \
- else \
+ else /* new format (after ver.21) */ \
{ \
- const unsigned char *p; \
+ int c; \
\
- MAYBE_FINISH_COMPOSITION (); \
- if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
- goto no_more_source; \
- for (p = src; p < src_end - 1; p++) \
- if (*p == ISO_CODE_ESC && p[1] == '1') \
- break; \
- if (p == src_end - 1) \
- { \
- /* The current composition doesn't end in the current \
- source. */ \
- record_conversion_result \
- (coding, CODING_RESULT_INSUFFICIENT_SRC); \
- goto no_more_source; \
- } \
- \
- /* This is surely the start of a composition. */ \
- method = (c1 == '0' ? COMPOSITION_RELATIVE \
- : c1 == '2' ? COMPOSITION_WITH_RULE \
- : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
- : COMPOSITION_WITH_RULE_ALTCHARS); \
- composition_state = (c1 <= '2' ? COMPOSING_CHAR \
- : COMPOSING_COMPONENT_CHAR); \
- component_idx = component_len = 0; \
+ ONE_MORE_BYTE (c); \
+ rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \
+ if (rule >= 0) \
+ rule += 0x100; /* to destinguish it from the old format */ \
+ nbytes = 2; \
} \
} while (0)
+#define ENCODE_COMPOSITION_RULE(rule) \
+ do { \
+ int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
+ \
+ if (rule < 0x100) /* old format */ \
+ { \
+ if (gref == 10) gref = 4; \
+ if (nref == 10) nref = 4; \
+ charbuf[idx] = 32 + gref * 9 + nref; \
+ charbuf[idx + 1] = -1; \
+ new_chars++; \
+ } \
+ else /* new format */ \
+ { \
+ charbuf[idx] = 32 + 81 + gref; \
+ charbuf[idx + 1] = 32 + nref; \
+ new_chars += 2; \
+ } \
+ } while (0)
+
+/* Finish the current composition as invalid. */
+
+static int finish_composition P_ ((int *, struct composition_status *));
+
+static int
+finish_composition (charbuf, cmp_status)
+ int *charbuf;
+ struct composition_status *cmp_status;
+{
+ int idx = - cmp_status->length;
+ int new_chars;
+
+ /* Recover the original ESC sequence */
+ charbuf[idx++] = ISO_CODE_ESC;
+ charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
+ : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
+ : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
+ /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
+ : '4');
+ charbuf[idx++] = -2;
+ charbuf[idx++] = 0;
+ charbuf[idx++] = -1;
+ new_chars = cmp_status->nchars;
+ if (cmp_status->method >= COMPOSITION_WITH_RULE)
+ for (; idx < 0; idx++)
+ {
+ int elt = charbuf[idx];
+
+ if (elt == -2)
+ {
+ ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
+ idx++;
+ }
+ else if (elt == -1)
+ {
+ charbuf[idx++] = ISO_CODE_ESC;
+ charbuf[idx] = '0';
+ new_chars += 2;
+ }
+ }
+ cmp_status->state = COMPOSING_NO;
+ return new_chars;
+}
+
+/* If characers are under composition, finish the composition. */
+#define MAYBE_FINISH_COMPOSITION() \
+ do { \
+ if (cmp_status->state != COMPOSING_NO) \
+ char_offset += finish_composition (charbuf, cmp_status); \
+ } while (0)
+
+/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
+
+ ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
+ ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
+ ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
+ ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
+
+ Produce this annotation sequence now:
+
+ [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
+*/
+
+#define DECODE_COMPOSITION_START(c1) \
+ do { \
+ if (c1 == '0' \
+ && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \
+ && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
+ || (cmp_status->state == COMPOSING_COMPONENT_RULE \
+ && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
+ { \
+ *charbuf++ = -1; \
+ *charbuf++= -1; \
+ cmp_status->state = COMPOSING_CHAR; \
+ cmp_status->length += 2; \
+ } \
+ else \
+ { \
+ MAYBE_FINISH_COMPOSITION (); \
+ cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \
+ : c1 == '2' ? COMPOSITION_WITH_RULE \
+ : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
+ : COMPOSITION_WITH_RULE_ALTCHARS); \
+ cmp_status->state \
+ = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \
+ ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
+ cmp_status->length = MAX_ANNOTATION_LENGTH; \
+ cmp_status->nchars = cmp_status->ncomps = 0; \
+ coding->annotated = 1; \
+ } \
+ } while (0)
+
-/* Handle compositoin end sequence ESC 1. */
+/* Handle composition end sequence ESC 1. */
#define DECODE_COMPOSITION_END() \
do { \
- int nchars = (component_len > 0 ? component_idx - component_len \
- : method == COMPOSITION_RELATIVE ? component_idx \
- : (component_idx + 1) / 2); \
- int i; \
- int *saved_charbuf = charbuf; \
- \
- ADD_COMPOSITION_DATA (charbuf, nchars, method); \
- if (method != COMPOSITION_RELATIVE) \
+ if (cmp_status->nchars == 0 \
+ || ((cmp_status->state == COMPOSING_CHAR) \
+ == (cmp_status->method == COMPOSITION_WITH_RULE))) \
{ \
- if (component_len == 0) \
- for (i = 0; i < component_idx; i++) \
- *charbuf++ = components[i]; \
- else \
- for (i = 0; i < component_len; i++) \
- *charbuf++ = components[i]; \
- *saved_charbuf = saved_charbuf - charbuf; \
+ MAYBE_FINISH_COMPOSITION (); \
+ goto invalid_code; \
} \
- if (method == COMPOSITION_WITH_RULE) \
- for (i = 0; i < component_idx; i += 2, char_offset++) \
- *charbuf++ = components[i]; \
- else \
- for (i = component_len; i < component_idx; i++, char_offset++) \
- *charbuf++ = components[i]; \
- coding->annotated = 1; \
- composition_state = COMPOSING_NO; \
+ if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
+ charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \
+ else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \
+ charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \
+ charbuf[- cmp_status->length + 2] = cmp_status->nchars; \
+ char_offset += cmp_status->nchars; \
+ cmp_status->state = COMPOSING_NO; \
} while (0)
+/* Store a composition rule RULE in charbuf, and update cmp_status. */
+
+#define STORE_COMPOSITION_RULE(rule) \
+ do { \
+ *charbuf++ = -2; \
+ *charbuf++ = rule; \
+ cmp_status->length += 2; \
+ cmp_status->state--; \
+ } while (0)
-/* Decode a composition rule from the byte C1 (and maybe one more byte
- from SRC) and store one encoded composition rule in
- coding->cmp_data. */
+/* Store a composed char or a component char C in charbuf, and update
+ cmp_status. */
-#define DECODE_COMPOSITION_RULE(c1) \
+#define STORE_COMPOSITION_CHAR(c) \
do { \
- (c1) -= 32; \
- if (c1 < 81) /* old format (before ver.21) */ \
- { \
- int gref = (c1) / 9; \
- int nref = (c1) % 9; \
- if (gref == 4) gref = 10; \
- if (nref == 4) nref = 10; \
- c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
- } \
- else if (c1 < 93) /* new format (after ver.21) */ \
- { \
- ONE_MORE_BYTE (c2); \
- c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
- } \
+ *charbuf++ = (c); \
+ cmp_status->length++; \
+ if (cmp_status->state == COMPOSING_CHAR) \
+ cmp_status->nchars++; \
else \
- c1 = 0; \
+ cmp_status->ncomps++; \
+ if (cmp_status->method == COMPOSITION_WITH_RULE \
+ || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \
+ && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \
+ cmp_status->state++; \
} while (0)
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
+ /* We may produce two annocations (charset and composition) in one
+ loop and one more charset annocation at the end. */
int *charbuf_end
- = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
+ = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
/* Charsets invoked to graphic plane 0 and 1 respectively. */
int charset_id_2, charset_id_3;
struct charset *charset;
int c;
- /* For handling composition sequence. */
-#define COMPOSING_NO 0
-#define COMPOSING_CHAR 1
-#define COMPOSING_RULE 2
-#define COMPOSING_COMPONENT_CHAR 3
-#define COMPOSING_COMPONENT_RULE 4
-
- int composition_state = COMPOSING_NO;
- enum composition_method method;
- int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
- int component_idx;
- int component_len;
+ struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
Lisp_Object attrs, charset_list;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
- int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+ int eol_crlf =
+ !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
int byte_after_cr = -1;
+ int i;
CODING_GET_INFO (coding, attrs, charset_list);
setup_iso_safe_charsets (attrs);
/* Charset list may have been changed. */
charset_list = CODING_ATTR_CHARSET_LIST (attrs);
- coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
+ coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
+
+ if (cmp_status->state != COMPOSING_NO)
+ {
+ for (i = 0; i < cmp_status->length; i++)
+ *charbuf++ = cmp_status->carryover[i];
+ coding->annotated = 1;
+ }
while (1)
{
- int c1, c2;
+ int c1, c2, c3;
src_base = src;
consumed_chars_base = consumed_chars;
if (charbuf >= charbuf_end)
- break;
+ {
+ if (byte_after_cr >= 0)
+ src_base--;
+ break;
+ }
if (byte_after_cr >= 0)
c1 = byte_after_cr, byte_after_cr = -1;
if (c1 < 0)
goto invalid_code;
- /* We produce at most one character. */
- switch (iso_code_class [c1])
+ if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
{
- case ISO_0x20_or_0x7F:
- if (composition_state != COMPOSING_NO)
+ *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+ char_offset++;
+ CODING_ISO_EXTSEGMENT_LEN (coding)--;
+ continue;
+ }
+
+ if (CODING_ISO_EMBEDDED_UTF_8 (coding))
+ {
+ if (c1 == ISO_CODE_ESC)
{
- if (composition_state == COMPOSING_RULE
- || composition_state == COMPOSING_COMPONENT_RULE)
+ if (src + 1 >= src_end)
+ goto no_more_source;
+ *charbuf++ = ISO_CODE_ESC;
+ char_offset++;
+ if (src[0] == '%' && src[1] == '@')
{
- DECODE_COMPOSITION_RULE (c1);
- components[component_idx++] = c1;
- composition_state--;
- continue;
+ src += 2;
+ consumed_chars += 2;
+ char_offset += 2;
+ /* We are sure charbuf can contain two more chars. */
+ *charbuf++ = '%';
+ *charbuf++ = '@';
+ CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
}
}
+ else
+ {
+ *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+ char_offset++;
+ }
+ continue;
+ }
+
+ if ((cmp_status->state == COMPOSING_RULE
+ || cmp_status->state == COMPOSING_COMPONENT_RULE)
+ && c1 != ISO_CODE_ESC)
+ {
+ int rule, nbytes;
+
+ DECODE_COMPOSITION_RULE (rule, nbytes);
+ if (rule < 0)
+ goto invalid_code;
+ STORE_COMPOSITION_RULE (rule);
+ continue;
+ }
+
+ /* We produce at most one character. */
+ switch (iso_code_class [c1])
+ {
+ case ISO_0x20_or_0x7F:
if (charset_id_0 < 0
|| ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
/* This is SPACE or DEL. */
break;
case ISO_graphic_plane_0:
- if (composition_state != COMPOSING_NO)
- {
- if (composition_state == COMPOSING_RULE
- || composition_state == COMPOSING_COMPONENT_RULE)
- {
- DECODE_COMPOSITION_RULE (c1);
- components[component_idx++] = c1;
- composition_state--;
- continue;
- }
- }
if (charset_id_0 < 0)
charset = CHARSET_FROM_ID (charset_ascii);
else
break;
case ISO_control_1:
- MAYBE_FINISH_COMPOSITION ();
goto invalid_code;
case ISO_shift_out:
continue;
case ISO_single_shift_2_7:
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
+ goto invalid_code;
case ISO_single_shift_2:
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
goto invalid_code;
case '0': case '2': case '3': case '4': /* start composition */
if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
goto invalid_code;
+ if (last_id != charset_ascii)
+ {
+ ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
+ last_id = charset_ascii;
+ last_offset = char_offset;
+ }
DECODE_COMPOSITION_START (c1);
continue;
case '1': /* end composition */
- if (composition_state == COMPOSING_NO)
+ if (cmp_status->state == COMPOSING_NO)
goto invalid_code;
DECODE_COMPOSITION_END ();
continue;
case '[': /* specification of direction */
- if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
goto invalid_code;
/* For the moment, nested direction is not supported.
So, `coding->mode & CODING_MODE_DIRECTION' zero means
int size;
ONE_MORE_BYTE (dim);
+ if (dim < 0 || dim > 4)
+ goto invalid_code;
ONE_MORE_BYTE (M);
+ if (M < 128)
+ goto invalid_code;
ONE_MORE_BYTE (L);
+ if (L < 128)
+ goto invalid_code;
size = ((M - 128) * 128) + (L - 128);
- if (charbuf + 8 + size > charbuf_end)
+ if (charbuf + 6 > charbuf_end)
goto break_loop;
*charbuf++ = ISO_CODE_ESC;
*charbuf++ = '%';
*charbuf++ = dim;
*charbuf++ = BYTE8_TO_CHAR (M);
*charbuf++ = BYTE8_TO_CHAR (L);
- while (size-- > 0)
- {
- ONE_MORE_BYTE (c1);
- *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
- }
+ CODING_ISO_EXTSEGMENT_LEN (coding) = size;
}
else if (c1 == 'G')
{
ESC % G --UTF-8-BYTES-- ESC % @
We keep these bytes as is for the moment.
They may be decoded by post-read-conversion. */
- int *p = charbuf;
-
- if (p + 6 > charbuf_end)
- goto break_loop;
- *p++ = ISO_CODE_ESC;
- *p++ = '%';
- *p++ = 'G';
- while (p < charbuf_end)
- {
- ONE_MORE_BYTE (c1);
- if (c1 == ISO_CODE_ESC
- && src + 1 < src_end
- && src[0] == '%'
- && src[1] == '@')
- {
- src += 2;
- break;
- }
- *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
- }
- if (p + 3 > charbuf_end)
+ if (charbuf + 3 > charbuf_end)
goto break_loop;
- *p++ = ISO_CODE_ESC;
- *p++ = '%';
- *p++ = '@';
- charbuf = p;
+ *charbuf++ = ISO_CODE_ESC;
+ *charbuf++ = '%';
+ *charbuf++ = 'G';
+ CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
}
else
goto invalid_code;
}
}
- if (charset->id != charset_ascii
+ if (cmp_status->state == COMPOSING_NO
+ && charset->id != charset_ascii
&& last_id != charset->id)
{
if (last_id != charset_ascii)
}
/* Now we know CHARSET and 1st position code C1 of a character.
- Produce a decoded character while getting 2nd position code
- C2 if necessary. */
- c1 &= 0x7F;
+ Produce a decoded character while getting 2nd and 3rd
+ position codes C2, C3 if necessary. */
if (CHARSET_DIMENSION (charset) > 1)
{
ONE_MORE_BYTE (c2);
- if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
+ if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
+ || ((c1 & 0x80) != (c2 & 0x80)))
/* C2 is not in a valid range. */
goto invalid_code;
- c1 = (c1 << 8) | (c2 & 0x7F);
- if (CHARSET_DIMENSION (charset) > 2)
+ if (CHARSET_DIMENSION (charset) == 2)
+ c1 = (c1 << 8) | c2;
+ else
{
- ONE_MORE_BYTE (c2);
- if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
- /* C2 is not in a valid range. */
+ ONE_MORE_BYTE (c3);
+ if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
+ || ((c1 & 0x80) != (c3 & 0x80)))
+ /* C3 is not in a valid range. */
goto invalid_code;
- c1 = (c1 << 8) | (c2 & 0x7F);
+ c1 = (c1 << 16) | (c2 << 8) | c2;
}
}
-
+ c1 &= 0x7F7F7F;
CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
if (c < 0)
{
*charbuf++ = BYTE8_TO_CHAR (*src_base);
}
}
- else if (composition_state == COMPOSING_NO)
+ else if (cmp_status->state == COMPOSING_NO)
{
*charbuf++ = c;
char_offset++;
}
- else
+ else if ((cmp_status->state == COMPOSING_CHAR
+ ? cmp_status->nchars
+ : cmp_status->ncomps)
+ >= MAX_COMPOSITION_COMPONENTS)
{
- components[component_idx++] = c;
- if (method == COMPOSITION_WITH_RULE
- || (method == COMPOSITION_WITH_RULE_ALTCHARS
- && composition_state == COMPOSING_COMPONENT_CHAR))
- composition_state++;
+ /* Too long composition. */
+ MAYBE_FINISH_COMPOSITION ();
+ *charbuf++ = c;
+ char_offset++;
}
+ else
+ STORE_COMPOSITION_CHAR (c);
continue;
invalid_code:
}
no_more_source:
- if (last_id != charset_ascii)
+ if (cmp_status->state != COMPOSING_NO)
+ {
+ if (coding->mode & CODING_MODE_LAST_BLOCK)
+ MAYBE_FINISH_COMPOSITION ();
+ else
+ {
+ charbuf -= cmp_status->length;
+ for (i = 0; i < cmp_status->length; i++)
+ cmp_status->carryover[i] = charbuf[i];
+ }
+ }
+ else if (last_id != charset_ascii)
ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
coding->consumed_char += consumed_chars_base;
coding->consumed = src_base - coding->source;
int preferred_charset_id = -1;
CODING_GET_INFO (coding, attrs, charset_list);
- eol_type = CODING_ID_EOL_TYPE (coding->id);
+ eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
if (VECTORP (eol_type))
eol_type = Qunix;
setup_iso_safe_charsets (attrs);
/* Charset list may have been changed. */
charset_list = CODING_ATTR_CHARSET_LIST (attrs);
- coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
+ coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
int consumed_chars = 0;
int found = 0;
int c;
+ Lisp_Object attrs, charset_list;
+ int max_first_byte_of_2_byte_code;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+ max_first_byte_of_2_byte_code
+ = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
detect_info->checked |= CATEGORY_MASK_SJIS;
/* A coding system of this category is always ASCII compatible. */
ONE_MORE_BYTE (c);
if (c < 0x80)
continue;
- if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
+ if ((c >= 0x81 && c <= 0x9F)
+ || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
{
ONE_MORE_BYTE (c);
if (c < 0x40 || c == 0x7F || c > 0xFC)
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
+ /* We may produce one charset annocation in one loop and one more at
+ the end. */
int *charbuf_end
- = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
struct charset *charset_roman, *charset_kanji, *charset_kana;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
- int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+ int eol_crlf =
+ !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
int byte_after_cr = -1;
CODING_GET_INFO (coding, attrs, charset_list);
consumed_chars_base = consumed_chars;
if (charbuf >= charbuf_end)
- break;
+ {
+ if (byte_after_cr >= 0)
+ src_base--;
+ break;
+ }
if (byte_after_cr >= 0)
c = byte_after_cr, byte_after_cr = -1;
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
+ /* We may produce one charset annocation in one loop and one more at
+ the end. */
int *charbuf_end
- = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
struct charset *charset_roman, *charset_big5;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
- int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+ int eol_crlf =
+ !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
int byte_after_cr = -1;
CODING_GET_INFO (coding, attrs, charset_list);
consumed_chars_base = consumed_chars;
if (charbuf >= charbuf_end)
- break;
+ {
+ if (byte_after_cr >= 0)
+ src_base--;
+ break;
+ }
if (byte_after_cr >= 0)
c = byte_after_cr, byte_after_cr = -1;
int c1, c2;
c1 = code >> 8;
- if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
+ if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
+ || c1 == 0x28
|| (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
{
JIS_TO_SJIS2 (code);
int *charbuf_end = coding->charbuf + coding->charbuf_size;
int consumed_chars = 0;
int multibytep = coding->src_multibyte;
- struct ccl_program ccl;
+ struct ccl_program *ccl = &coding->spec.ccl->ccl;
int source_charbuf[1024];
- int source_byteidx[1024];
+ int source_byteidx[1025];
Lisp_Object attrs, charset_list;
CODING_GET_INFO (coding, attrs, charset_list);
- setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
- while (src < src_end)
+ while (1)
{
const unsigned char *p = src;
- int *source, *source_end;
int i = 0;
if (multibytep)
- while (i < 1024 && p < src_end)
- {
- source_byteidx[i] = p - src;
- source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
- }
+ {
+ while (i < 1024 && p < src_end)
+ {
+ source_byteidx[i] = p - src;
+ source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
+ }
+ source_byteidx[i] = p - src;
+ }
else
while (i < 1024 && p < src_end)
source_charbuf[i++] = *p++;
if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
- ccl.last_block = 1;
-
- source = source_charbuf;
- source_end = source + i;
- while (source < source_end)
- {
- ccl_driver (&ccl, source, charbuf,
- source_end - source, charbuf_end - charbuf,
- charset_list);
- source += ccl.consumed;
- charbuf += ccl.produced;
- if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
- break;
- }
- if (source < source_end)
- src += source_byteidx[source - source_charbuf];
+ ccl->last_block = 1;
+ ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
+ charset_list);
+ charbuf += ccl->produced;
+ if (multibytep)
+ src += source_byteidx[ccl->consumed];
else
- src = p;
- consumed_chars += source - source_charbuf;
-
- if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
- && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
+ src += ccl->consumed;
+ consumed_chars += ccl->consumed;
+ if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
break;
}
- switch (ccl.status)
+ switch (ccl->status)
{
case CCL_STAT_SUSPEND_BY_SRC:
record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
break;
case CCL_STAT_SUSPEND_BY_DST:
+ record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
break;
case CCL_STAT_QUIT:
case CCL_STAT_INVALID_CMD:
encode_coding_ccl (coding)
struct coding_system *coding;
{
- struct ccl_program ccl;
+ struct ccl_program *ccl = &coding->spec.ccl->ccl;
int multibytep = coding->dst_multibyte;
int *charbuf = coding->charbuf;
int *charbuf_end = charbuf + coding->charbuf_used;
Lisp_Object attrs, charset_list;
CODING_GET_INFO (coding, attrs, charset_list);
- setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
-
- ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
- ccl.dst_multibyte = coding->dst_multibyte;
+ if (coding->consumed_char == coding->src_chars
+ && coding->mode & CODING_MODE_LAST_BLOCK)
+ ccl->last_block = 1;
while (charbuf < charbuf_end)
{
- ccl_driver (&ccl, charbuf, destination_charbuf,
+ ccl_driver (ccl, charbuf, destination_charbuf,
charbuf_end - charbuf, 1024, charset_list);
if (multibytep)
{
- ASSURE_DESTINATION (ccl.produced * 2);
- for (i = 0; i < ccl.produced; i++)
+ ASSURE_DESTINATION (ccl->produced * 2);
+ for (i = 0; i < ccl->produced; i++)
EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
}
else
{
- ASSURE_DESTINATION (ccl.produced);
- for (i = 0; i < ccl.produced; i++)
+ ASSURE_DESTINATION (ccl->produced);
+ for (i = 0; i < ccl->produced; i++)
*dst++ = destination_charbuf[i] & 0xFF;
- produced_chars += ccl.produced;
+ produced_chars += ccl->produced;
}
- charbuf += ccl.consumed;
- if (ccl.status == CCL_STAT_QUIT
- || ccl.status == CCL_STAT_INVALID_CMD)
+ charbuf += ccl->consumed;
+ if (ccl->status == CCL_STAT_QUIT
+ || ccl->status == CCL_STAT_INVALID_CMD)
break;
}
- switch (ccl.status)
+ switch (ccl->status)
{
case CCL_STAT_SUSPEND_BY_SRC:
record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
decode_coding_raw_text (coding)
struct coding_system *coding;
{
- int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+ int eol_crlf =
+ !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
coding->chars_at_source = 1;
coding->consumed_char = coding->src_chars;
const unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
int consumed_chars = 0;
- Lisp_Object attrs, valids;
+ Lisp_Object attrs, valids, name;
int found = 0;
int head_ascii = coding->head_ascii;
+ int check_latin_extra = 0;
detect_info->checked |= CATEGORY_MASK_CHARSET;
coding = &coding_categories[coding_category_charset];
attrs = CODING_ID_ATTRS (coding->id);
valids = AREF (attrs, coding_attr_charset_valids);
+ name = CODING_ID_NAME (coding->id);
+ if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
+ "iso-8859-", sizeof ("iso-8859-") - 1) == 0
+ || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
+ "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
+ check_latin_extra = 1;
if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
src += head_ascii;
if (NILP (val))
break;
if (c >= 0x80)
- found = CATEGORY_MASK_CHARSET;
+ {
+ if (c < 0xA0
+ && check_latin_extra
+ && (!VECTORP (Vlatin_extra_code_table)
+ || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
+ break;
+ found = CATEGORY_MASK_CHARSET;
+ }
if (INTEGERP (val))
{
charset = CHARSET_FROM_ID (XFASTINT (val));
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
+ /* We may produce one charset annocation in one loop and one more at
+ the end. */
int *charbuf_end
- = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
Lisp_Object attrs, charset_list, valids;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
- int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+ int eol_crlf =
+ !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
int byte_after_cr = -1;
CODING_GET_INFO (coding, attrs, charset_list);
consumed_chars_base = consumed_chars;
if (charbuf >= charbuf_end)
- break;
+ {
+ if (byte_after_cr >= 0)
+ src_base--;
+ break;
+ }
if (byte_after_cr >= 0)
{
code = c;
val = AREF (valids, c);
- if (NILP (val))
+ if (! INTEGERP (val) && ! CONSP (val))
goto invalid_code;
if (INTEGERP (val))
{
CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
attrs = CODING_ID_ATTRS (coding->id);
- eol_type = CODING_ID_EOL_TYPE (coding->id);
+ eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
coding->mode = 0;
coding->head_ascii = -1;
val = CODING_ATTR_SAFE_CHARSETS (attrs);
coding->max_charset_id = SCHARS (val) - 1;
- coding->safe_charsets = (char *) SDATA (val);
+ coding->safe_charsets = SDATA (val);
coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
+ coding->carryover_bytes = 0;
coding_type = CODING_ATTR_TYPE (attrs);
if (EQ (coding_type, Qundecided))
setup_iso_safe_charsets (attrs);
val = CODING_ATTR_SAFE_CHARSETS (attrs);
coding->max_charset_id = SCHARS (val) - 1;
- coding->safe_charsets = (char *) SDATA (val);
+ coding->safe_charsets = SDATA (val);
}
CODING_ISO_FLAGS (coding) = flags;
+ CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
+ CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
+ CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
+ CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
}
else if (EQ (coding_type, Qcharset))
{
coding->encoder = encode_coding_emacs_mule;
coding->common_flags
|= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ coding->spec.emacs_mule.full_support = 1;
if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
&& ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
{
tail = XCDR (tail))
if (max_charset_id < XFASTINT (XCAR (tail)))
max_charset_id = XFASTINT (XCAR (tail));
- safe_charsets = Fmake_string (make_number (max_charset_id + 1),
- make_number (255));
+ safe_charsets = make_uninit_string (max_charset_id + 1);
+ memset (SDATA (safe_charsets), 255, max_charset_id + 1);
for (tail = Vemacs_mule_charset_list; CONSP (tail);
tail = XCDR (tail))
SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
coding->max_charset_id = max_charset_id;
- coding->safe_charsets = (char *) SDATA (safe_charsets);
+ coding->safe_charsets = SDATA (safe_charsets);
+ coding->spec.emacs_mule.full_support = 1;
}
+ coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
+ coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
}
else if (EQ (coding_type, Qshift_jis))
{
}
+/* Return a list of charsets supported by CODING-SYSTEM. */
+
+Lisp_Object
+coding_system_charset_list (coding_system)
+ Lisp_Object coding_system;
+{
+ int id;
+ Lisp_Object attrs, charset_list;
+
+ CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
+ attrs = CODING_ID_ATTRS (id);
+
+ if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
+ {
+ int flags = XINT (AREF (attrs, coding_attr_iso_flags));
+
+ if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
+ charset_list = Viso_2022_charset_list;
+ else
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ }
+ else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
+ {
+ charset_list = Vemacs_mule_charset_list;
+ }
+ else
+ {
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ }
+ return charset_list;
+}
+
+
/* Return raw-text or one of its subsidiaries that has the same
eol_type as CODING-SYSTEM. */
|| src[lsb + 2] != '\n')
this_eol = EOL_SEEN_CR;
else
- this_eol = EOL_SEEN_CRLF;
+ {
+ this_eol = EOL_SEEN_CRLF;
+ src += 2;
+ }
if (eol_seen == EOL_SEEN_NONE)
/* This is the first end-of-line. */
eol_seen = this_eol;
else if (eol_seen != this_eol)
{
- /* The found type is different from what found before. */
- eol_seen = EOL_SEEN_LF;
- break;
+ /* The found type is different from what found before.
+ Allow for stray ^M characters in DOS EOL files. */
+ if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
+ || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
+ eol_seen = EOL_SEEN_CRLF;
+ else
+ {
+ eol_seen = EOL_SEEN_LF;
+ break;
+ }
}
if (++total == MAX_EOL_CHECK_COUNT)
break;
eol_seen = this_eol;
else if (eol_seen != this_eol)
{
- /* The found type is different from what found before. */
- eol_seen = EOL_SEEN_LF;
- break;
+ /* The found type is different from what found before.
+ Allow for stray ^M characters in DOS EOL files. */
+ if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
+ || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
+ eol_seen = EOL_SEEN_CRLF;
+ else
+ {
+ eol_seen = EOL_SEEN_LF;
+ break;
+ }
}
if (++total == MAX_EOL_CHECK_COUNT)
break;
struct coding_system *coding;
{
const unsigned char *src, *src_end;
+ int saved_mode = coding->mode;
coding->consumed = coding->consumed_char = 0;
coding->produced = coding->produced_char = 0;
{
/* We didn't find an 8-bit code. We may
have found a null-byte, but it's very
- rare that a binary file confirm to
+ rare that a binary file conforms to
ISO-2022. */
src = src_end;
coding->head_ascii = src - coding->source;
break;
}
}
- else if (! c)
+ else if (! c && !inhibit_null_byte_detection)
{
null_byte_found = 1;
if (eight_bit_found)
setup_coding_system (XCDR (coding_systems), coding);
}
}
+ coding->mode = saved_mode;
}
unsigned char *p, *pbeg, *pend;
eol_type = CODING_ID_EOL_TYPE (coding->id);
- if (EQ (eol_type, Qunix))
+ if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
return;
if (NILP (coding->dst_object))
eol_seen |= EOL_SEEN_CR;
}
}
- if (eol_seen != EOL_SEEN_NONE
+ /* Handle DOS-style EOLs in a file with stray ^M characters. */
+ if ((eol_seen & EOL_SEEN_CRLF) != 0
+ && (eol_seen & EOL_SEEN_CR) != 0
+ && (eol_seen & EOL_SEEN_LF) == 0)
+ eol_seen = EOL_SEEN_CRLF;
+ else if (eol_seen != EOL_SEEN_NONE
&& eol_seen != EOL_SEEN_LF
&& eol_seen != EOL_SEEN_CRLF
&& eol_seen != EOL_SEEN_CR)
Lisp_Object standard, translation_table;
Lisp_Object val;
+ if (NILP (Venable_character_translation))
+ {
+ if (max_lookup)
+ *max_lookup = 0;
+ return Qnil;
+ }
if (encodep)
translation_table = CODING_ATTR_ENCODE_TBL (attrs),
standard = Vstandard_translation_table_for_encode;
} while (0)
+/* Return a translation of character(s) at BUF according to TRANS.
+ TRANS is TO-CHAR or ((FROM . TO) ...) where
+ FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
+ The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
+ translation is found, and Qnil if not found..
+ If BUF is too short to lookup characters in FROM, return Qt. */
+
static Lisp_Object
-get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
- Lisp_Object val;
+get_translation (trans, buf, buf_end)
+ Lisp_Object trans;
int *buf, *buf_end;
- int last_block;
- int *from_nchars, *to_nchars;
{
- /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
- [TO-CHAR ...]. */
- if (CONSP (val))
+
+ if (INTEGERP (trans))
+ return trans;
+ for (; CONSP (trans); trans = XCDR (trans))
{
- Lisp_Object from, tail;
- int i, len;
+ Lisp_Object val = XCAR (trans);
+ Lisp_Object from = XCAR (val);
+ int len = ASIZE (from);
+ int i;
- for (tail = val; CONSP (tail); tail = XCDR (tail))
+ for (i = 0; i < len; i++)
{
- val = XCAR (tail);
- from = XCAR (val);
- len = ASIZE (from);
- for (i = 0; i < len; i++)
- {
- if (buf + i == buf_end)
- {
- if (! last_block)
- return Qt;
- break;
- }
- if (XINT (AREF (from, i)) != buf[i])
- break;
- }
- if (i == len)
- {
- val = XCDR (val);
- *from_nchars = len;
- break;
- }
+ if (buf + i == buf_end)
+ return Qt;
+ if (XINT (AREF (from, i)) != buf[i])
+ break;
}
- if (! CONSP (tail))
- return Qnil;
+ if (i == len)
+ return val;
}
- if (VECTORP (val))
- *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
- else
- *buf = XINT (val);
- return val;
+ return Qnil;
}
LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
if (! NILP (trans))
{
- trans = get_translation (trans, buf, buf_end, last_block,
- &from_nchars, &to_nchars);
- if (EQ (trans, Qt))
+ trans = get_translation (trans, buf, buf_end);
+ if (INTEGERP (trans))
+ c = XINT (trans);
+ else if (CONSP (trans))
+ {
+ from_nchars = ASIZE (XCAR (trans));
+ trans = XCDR (trans);
+ if (INTEGERP (trans))
+ c = XINT (trans);
+ else
+ {
+ to_nchars = ASIZE (trans);
+ c = XINT (AREF (trans, 0));
+ }
+ }
+ else if (EQ (trans, Qt) && ! last_block)
break;
- c = *buf;
}
if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
if (EQ (coding->src_object, coding->dst_object))
{
coding_set_source (coding);
- dst_end = ((unsigned char *) coding->source) + coding->consumed;
+ dst_end = (((unsigned char *) coding->source)
+ + coding->consumed);
}
else
dst_end = coding->destination + coding->dst_bytes;
*dst++ = CHAR_TO_BYTE8 (c);
}
produced_chars += to_nchars;
- *buf++ = to_nchars;
- while (--from_nchars > 0)
- *buf++ = 0;
+ buf += from_nchars;
}
else
/* This is an annotation datum. (-C) is the length. */
if (coding->src_multibyte)
{
int multibytep = 1;
- EMACS_INT consumed_chars;
+ EMACS_INT consumed_chars = 0;
while (1)
{
/* Compose text in CODING->object according to the annotation data at
CHARBUF. CHARBUF is an array:
- [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
+ [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
*/
static INLINE void
enum composition_method method;
Lisp_Object components;
- len = -charbuf[0];
+ len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
to = pos + charbuf[2];
- if (to <= pos)
- return;
- method = (enum composition_method) (charbuf[3]);
+ method = (enum composition_method) (charbuf[4]);
if (method == COMPOSITION_RELATIVE)
components = Qnil;
- else if (method >= COMPOSITION_WITH_RULE
- && method <= COMPOSITION_WITH_RULE_ALTCHARS)
+ else
{
Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
- int i;
+ int i, j;
- len -= 4;
- charbuf += 4;
- for (i = 0; i < len; i++)
+ if (method == COMPOSITION_WITH_RULE)
+ len = charbuf[2] * 3 - 2;
+ charbuf += MAX_ANNOTATION_LENGTH;
+ /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
+ for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
{
- args[i] = make_number (charbuf[i]);
- if (charbuf[i] < 0)
- return;
+ if (charbuf[i] >= 0)
+ args[j] = make_number (charbuf[i]);
+ else
+ {
+ i++;
+ args[j] = make_number (charbuf[i] % 0x100);
+ }
}
- components = (method == COMPOSITION_WITH_ALTCHARS
- ? Fstring (len, args) : Fvector (len, args));
+ components = (i == j ? Fstring (j, args) : Fvector (j, args));
}
- else
- return;
compose_text (pos, to, components, Qnil, coding->dst_object);
}
#define ALLOC_CONVERSION_WORK_AREA(coding) \
do { \
- int size = CHARBUF_SIZE;; \
+ int size = CHARBUF_SIZE; \
\
coding->charbuf = NULL; \
while (size > 1024) \
while (charbuf < charbuf_end)
{
if (*charbuf >= 0)
- pos += *charbuf++;
+ pos++, charbuf++;
else
{
int len = -*charbuf;
- switch (charbuf[1])
- {
- case CODING_ANNOTATE_COMPOSITION_MASK:
- produce_composition (coding, charbuf, pos);
- break;
- case CODING_ANNOTATE_CHARSET_MASK:
- produce_charset (coding, charbuf, pos);
- break;
- default:
- abort ();
- }
+
+ if (len > 2)
+ switch (charbuf[1])
+ {
+ case CODING_ANNOTATE_COMPOSITION_MASK:
+ produce_composition (coding, charbuf, pos);
+ break;
+ case CODING_ANNOTATE_CHARSET_MASK:
+ produce_charset (coding, charbuf, pos);
+ break;
+ }
charbuf += len;
}
}
Lisp_Object attrs;
Lisp_Object undo_list;
Lisp_Object translation_table;
+ struct ccl_spec cclspec;
int carryover;
int i;
translation_table = get_translation_table (attrs, 0, NULL);
carryover = 0;
+ if (coding->decoder == decode_coding_ccl)
+ {
+ coding->spec.ccl = &cclspec;
+ setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
+ }
do
{
EMACS_INT pos = coding->dst_pos + coding->produced_char;
coding->charbuf[i]
= coding->charbuf[coding->charbuf_used - carryover + i];
}
- while (coding->consumed < coding->src_bytes
- && (coding->result == CODING_RESULT_SUCCESS
- || coding->result == CODING_RESULT_INVALID_SRC));
+ while (coding->result == CODING_RESULT_INSUFFICIENT_DST
+ || (coding->consumed < coding->src_bytes
+ && (coding->result == CODING_RESULT_SUCCESS
+ || coding->result == CODING_RESULT_INVALID_SRC)));
if (carryover > 0)
{
that the number of data is less than the size of
coding->charbuf. */
coding->charbuf_used = 0;
+ coding->chars_at_source = 0;
+
while (nbytes-- > 0)
{
int c = *src++;
coding->carryover. */
unsigned char *p = coding->carryover;
+ if (nbytes > sizeof coding->carryover)
+ nbytes = sizeof coding->carryover;
coding->carryover_bytes = nbytes;
while (nbytes-- > 0)
*p++ = *src++;
coding->consumed = coding->src_bytes;
}
- if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
+ if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
+ && !inhibit_eol_conversion)
decode_eol (coding);
if (BUFFERP (coding->dst_object))
{
enum composition_method method = COMPOSITION_METHOD (prop);
int nchars = COMPOSITION_LENGTH (prop);
- ADD_COMPOSITION_DATA (buf, nchars, method);
+ ADD_COMPOSITION_DATA (buf, nchars, 0, method);
if (method != COMPOSITION_RELATIVE)
{
Lisp_Object components;
if (! NILP (translation_table))
lookup_buf = alloca (sizeof (int) * max_lookup);
- eol_type = CODING_ID_EOL_TYPE (coding->id);
+ eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
if (VECTORP (eol_type))
eol_type = Qunix;
{
EMACS_INT bytes;
- if (coding->encoder == encode_coding_raw_text)
+ if (coding->encoder == encode_coding_raw_text
+ || coding->encoder == encode_coding_ccl)
c = *src++, pos++;
else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
for (i = 1; i < max_lookup && p < src_end; i++)
lookup_buf[i] = STRING_CHAR_ADVANCE (p);
lookup_buf_end = lookup_buf + i;
- trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
- &from_nchars, &to_nchars);
- if (EQ (trans, Qt)
- || buf + to_nchars > buf_end)
+ trans = get_translation (trans, lookup_buf, lookup_buf_end);
+ if (INTEGERP (trans))
+ c = XINT (trans);
+ else if (CONSP (trans))
+ {
+ from_nchars = ASIZE (XCAR (trans));
+ trans = XCDR (trans);
+ if (INTEGERP (trans))
+ c = XINT (trans);
+ else
+ {
+ to_nchars = ASIZE (trans);
+ if (buf + to_nchars > buf_end)
+ break;
+ c = XINT (AREF (trans, 0));
+ }
+ }
+ else
break;
- *buf++ = *lookup_buf;
+ *buf++ = c;
for (i = 1; i < to_nchars; i++)
*buf++ = XINT (AREF (trans, i));
for (i = 1; i < from_nchars; i++, pos++)
Lisp_Object attrs;
Lisp_Object translation_table;
int max_lookup;
+ struct ccl_spec cclspec;
attrs = CODING_ID_ATTRS (coding->id);
if (coding->encoder == encode_coding_raw_text)
ALLOC_CONVERSION_WORK_AREA (coding);
+ if (coding->encoder == encode_coding_ccl)
+ {
+ coding->spec.ccl = &cclspec;
+ setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
+ }
do {
coding_set_source (coding);
consume_chars (coding, translation_table, max_lookup);
}
else
{
- name = Vcode_conversion_workbuf_name;
- workbuf = Fget_buffer_create (name);
- if (NILP (Vcode_conversion_reused_workbuf))
- Vcode_conversion_reused_workbuf = workbuf;
+ if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
+ Vcode_conversion_reused_workbuf
+ = Fget_buffer_create (Vcode_conversion_workbuf_name);
+ workbuf = Vcode_conversion_reused_workbuf;
}
current = current_buffer;
set_buffer_internal (XBUFFER (workbuf));
+ /* We can't allow modification hooks to run in the work buffer. For
+ instance, directory_files_internal assumes that file decoding
+ doesn't compile new regexps. */
+ Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
Ferase_buffer ();
current_buffer->undo_list = Qt;
current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
if (! destination)
{
record_conversion_result (coding,
- CODING_RESULT_INSUFFICIENT_DST);
+ CODING_RESULT_INSUFFICIENT_MEM);
unbind_to (count, Qnil);
return;
}
{
const unsigned char *src_end = src + src_bytes;
Lisp_Object attrs, eol_type;
- Lisp_Object val;
+ Lisp_Object val = Qnil;
struct coding_system coding;
int id;
struct coding_detection_info detect_info;
break;
}
}
- else if (! c)
+ else if (! c && !inhibit_null_byte_detection)
{
null_byte_found = 1;
if (eight_bit_found)
}
}
- if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
+ if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
+ || null_byte_found)
{
detect_info.found = CATEGORY_MASK_RAW_TEXT;
- id = coding_categories[coding_category_raw_text].id;
+ id = CODING_SYSTEM_ID (Qno_conversion);
val = Fcons (make_number (id), Qnil);
}
else if (! detect_info.rejected && ! detect_info.found)
{
int mask = detect_info.rejected | detect_info.found;
int found = 0;
- val = Qnil;
for (i = coding_category_raw_text - 1; i >= 0; i--)
{
/* Then, detect eol-format if necessary. */
{
- int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
+ int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
Lisp_Object tail;
if (VECTORP (eol_type))
}
}
- return (highest ? XCAR (val) : val);
+ return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
}
2, 3, 0,
doc: /* Detect coding system of the text in the region between START and END.
Return a list of possible coding systems ordered by priority.
+The coding systems to try and their priorities follows what
+the function `coding-system-priority-list' (which see) returns.
If only ASCII characters are found (except for such ISO-2022 control
characters as ESC), it returns a list of single element `undecided'
1, 2, 0,
doc: /* Detect coding system of the text in STRING.
Return a list of possible coding systems ordered by priority.
+The coding systems to try and their priorities follows what
+the function `coding-system-priority-list' (which see) returns.
If only ASCII characters are found (except for such ISO-2022 control
characters as ESC), it returns a list of single element `undecided'
EMACS_INT start_byte, end_byte;
const unsigned char *p, *pbeg, *pend;
int c;
- Lisp_Object tail, elt;
+ Lisp_Object tail, elt, work_table;
if (STRINGP (start))
{
while (p < pend && ASCII_BYTE_P (*p)) p++;
while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+ work_table = Fmake_char_table (Qnil, Qnil);
while (p < pend)
{
if (ASCII_BYTE_P (*p))
else
{
c = STRING_CHAR_ADVANCE (p);
+ if (!NILP (char_table_ref (work_table, c)))
+ /* This character was already checked. Ignore it. */
+ continue;
charset_map_loaded = 0;
for (tail = coding_attrs_list; CONSP (tail);)
p = pbeg + p_offset;
pend = pbeg + pend_offset;
}
+ char_table_set (work_table, c, Qt);
}
}
START may be a string. In that case, check if the string is
encodable, and the value contains indices to the string instead of
-buffer positions. END is ignored. */)
+buffer positions. END is ignored.
+
+If the current buffer (or START if it is a string) is unibyte, the value
+is nil. */)
(start, end, coding_system_list)
Lisp_Object start, end, coding_system_list;
{
if (STRINGP (start))
{
if (!STRING_MULTIBYTE (start)
- && SCHARS (start) != SBYTES (start))
+ || SCHARS (start) == SBYTES (start))
return Qnil;
start_byte = 0;
end_byte = SBYTES (start);
start_byte = CHAR_TO_BYTE (XINT (start));
end_byte = CHAR_TO_BYTE (XINT (end));
if (XINT (end) - XINT (start) == end_byte - start_byte)
- return Qt;
+ return Qnil;
if (XINT (start) < GPT && XINT (end) > GPT)
{
Optional 4th arguments DESTINATION specifies where the decoded text goes.
If nil, the region between START and END is replaced by the decoded text.
-If buffer, the decoded text is inserted in the buffer.
+If buffer, the decoded text is inserted in that buffer after point (point
+does not move).
In those cases, the length of the decoded text is returned.
If DESTINATION is t, the decoded text is returned.
Optional 4th arguments DESTINATION specifies where the encoded text goes.
If nil, the region between START and END is replace by the encoded text.
-If buffer, the encoded text is inserted in the buffer.
+If buffer, the encoded text is inserted in that buffer after point (point
+does not move).
In those cases, the length of the encoded text is returned.
If DESTINATION is t, the encoded text is returned.
if the decoding operation is trivial.
Optional fourth arg BUFFER non-nil means that the decoded text is
-inserted in BUFFER instead of returned as a string. In this case,
-the return value is the length of the decoded text.
+inserted in that buffer after point (point does not move). In this
+case, the return value is the length of the decoded text.
This function sets `last-coding-system-used' to the precise coding system
used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
itself if the encoding operation is trivial.
Optional fourth arg BUFFER non-nil means that the encoded text is
-inserted in BUFFER instead of returned as a string. In this case,
-the return value is the length of the encoded text.
+inserted in that buffer after point (point does not move). In this
+case, the return value is the length of the encoded text.
This function sets `last-coding-system-used' to the precise coding system
used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
DEFUN ("terminal-coding-system", Fterminal_coding_system,
Sterminal_coding_system, 0, 1, 0,
doc: /* Return coding system specified for terminal output on the given terminal.
-TERMINAL may be a terminal id, a frame, or nil for the selected
+TERMINAL may be a terminal object, a frame, or nil for the selected
frame's terminal device. */)
(terminal)
Lisp_Object terminal;
{
struct terminal *t = get_terminal (terminal, 1);
CHECK_SYMBOL (coding_system);
- setup_coding_system (Fcheck_coding_system (coding_system),
- TERMINAL_KEYBOARD_CODING (t));
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ else
+ Fcheck_coding_system (coding_system);
+ setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
/* Characer composition should be disabled. */
TERMINAL_KEYBOARD_CODING (t)->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK;
DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
Scoding_system_priority_list, 0, 1, 0,
doc: /* Return a list of coding systems ordered by their priorities.
+The list contains a subset of coding systems; i.e. coding systems
+assigned to each coding category (see `coding-category-list').
+
HIGHESTP non-nil means just return the highest priority one. */)
(highestp)
Lisp_Object highestp;
return Fnreverse (val);
}
-static char *suffixes[] = { "-unix", "-dos", "-mac" };
+static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
static Lisp_Object
make_subsidiaries (base)
}
CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
- safe_charsets = Fmake_string (make_number (max_charset_id + 1),
- make_number (255));
+ safe_charsets = make_uninit_string (max_charset_id + 1);
+ memset (SDATA (safe_charsets), 255, max_charset_id + 1);
for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
CHECK_CHARACTER (val);
CODING_ATTR_MNEMONIC (attrs) = val;
}
- else if (EQ (prop, QCdefalut_char))
+ else if (EQ (prop, QCdefault_char))
{
if (NILP (val))
val = make_number (' ');
Vcode_conversion_reused_workbuf = Qnil;
staticpro (&Vcode_conversion_workbuf_name);
- Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
+ Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
reused_workbuf_in_use = 0;
DEFSYM (Qcoding_system_error, "coding-system-error");
Fput (Qcoding_system_error, Qerror_conditions,
- Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
+ pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
Fput (Qcoding_system_error, Qerror_message,
- build_string ("Invalid coding system"));
+ make_pure_c_string ("Invalid coding system"));
/* Intern this now in case it isn't already done.
Setting this variable twice is harmless.
But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
DEFSYM (Qtranslation_table, "translation-table");
Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
DEFSYM (QCcategory, ":category");
DEFSYM (QCmnemonic, ":mnemonic");
- DEFSYM (QCdefalut_char, ":default-char");
+ DEFSYM (QCdefault_char, ":default-char");
DEFSYM (QCdecode_translation_table, ":decode-translation-table");
DEFSYM (QCencode_translation_table, ":encode-translation-table");
DEFSYM (QCpost_read_conversion, ":post-read-conversion");
staticpro (&Vcoding_category_table);
/* Followings are target of code detection. */
ASET (Vcoding_category_table, coding_category_iso_7,
- intern ("coding-category-iso-7"));
+ intern_c_string ("coding-category-iso-7"));
ASET (Vcoding_category_table, coding_category_iso_7_tight,
- intern ("coding-category-iso-7-tight"));
+ intern_c_string ("coding-category-iso-7-tight"));
ASET (Vcoding_category_table, coding_category_iso_8_1,
- intern ("coding-category-iso-8-1"));
+ intern_c_string ("coding-category-iso-8-1"));
ASET (Vcoding_category_table, coding_category_iso_8_2,
- intern ("coding-category-iso-8-2"));
+ intern_c_string ("coding-category-iso-8-2"));
ASET (Vcoding_category_table, coding_category_iso_7_else,
- intern ("coding-category-iso-7-else"));
+ intern_c_string ("coding-category-iso-7-else"));
ASET (Vcoding_category_table, coding_category_iso_8_else,
- intern ("coding-category-iso-8-else"));
+ intern_c_string ("coding-category-iso-8-else"));
ASET (Vcoding_category_table, coding_category_utf_8_auto,
- intern ("coding-category-utf-8-auto"));
+ intern_c_string ("coding-category-utf-8-auto"));
ASET (Vcoding_category_table, coding_category_utf_8_nosig,
- intern ("coding-category-utf-8"));
+ intern_c_string ("coding-category-utf-8"));
ASET (Vcoding_category_table, coding_category_utf_8_sig,
- intern ("coding-category-utf-8-sig"));
+ intern_c_string ("coding-category-utf-8-sig"));
ASET (Vcoding_category_table, coding_category_utf_16_be,
- intern ("coding-category-utf-16-be"));
+ intern_c_string ("coding-category-utf-16-be"));
ASET (Vcoding_category_table, coding_category_utf_16_auto,
- intern ("coding-category-utf-16-auto"));
+ intern_c_string ("coding-category-utf-16-auto"));
ASET (Vcoding_category_table, coding_category_utf_16_le,
- intern ("coding-category-utf-16-le"));
+ intern_c_string ("coding-category-utf-16-le"));
ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
- intern ("coding-category-utf-16-be-nosig"));
+ intern_c_string ("coding-category-utf-16-be-nosig"));
ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
- intern ("coding-category-utf-16-le-nosig"));
+ intern_c_string ("coding-category-utf-16-le-nosig"));
ASET (Vcoding_category_table, coding_category_charset,
- intern ("coding-category-charset"));
+ intern_c_string ("coding-category-charset"));
ASET (Vcoding_category_table, coding_category_sjis,
- intern ("coding-category-sjis"));
+ intern_c_string ("coding-category-sjis"));
ASET (Vcoding_category_table, coding_category_big5,
- intern ("coding-category-big5"));
+ intern_c_string ("coding-category-big5"));
ASET (Vcoding_category_table, coding_category_ccl,
- intern ("coding-category-ccl"));
+ intern_c_string ("coding-category-ccl"));
ASET (Vcoding_category_table, coding_category_emacs_mule,
- intern ("coding-category-emacs-mule"));
+ intern_c_string ("coding-category-emacs-mule"));
/* Followings are NOT target of code detection. */
ASET (Vcoding_category_table, coding_category_raw_text,
- intern ("coding-category-raw-text"));
+ intern_c_string ("coding-category-raw-text"));
ASET (Vcoding_category_table, coding_category_undecided,
- intern ("coding-category-undecided"));
+ intern_c_string ("coding-category-undecided"));
DEFSYM (Qinsufficient_source, "insufficient-source");
DEFSYM (Qinconsistent_eol, "inconsistent-eol");
DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
doc: /*
*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
- eol_mnemonic_unix = build_string (":");
+ eol_mnemonic_unix = make_pure_c_string (":");
DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
doc: /*
*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
- eol_mnemonic_dos = build_string ("\\");
+ eol_mnemonic_dos = make_pure_c_string ("\\");
DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
doc: /*
*String displayed in mode line for MAC-like (CR) end-of-line format. */);
- eol_mnemonic_mac = build_string ("/");
+ eol_mnemonic_mac = make_pure_c_string ("/");
DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
doc: /*
*String displayed in mode line when end-of-line format is not yet determined. */);
- eol_mnemonic_undecided = build_string (":");
+ eol_mnemonic_undecided = make_pure_c_string (":");
DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
doc: /*
DEFVAR_BOOL ("inhibit-iso-escape-detection",
&inhibit_iso_escape_detection,
doc: /*
-If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
+If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
-By default, on reading a file, Emacs tries to detect how the text is
-encoded. This code detection is sensitive to escape sequences. If
-the sequence is valid as ISO2022, the code is determined as one of
-the ISO2022 encodings, and the file is decoded by the corresponding
-coding system (e.g. `iso-2022-7bit').
+When Emacs reads text, it tries to detect how the text is encoded.
+This code detection is sensitive to escape sequences. If Emacs sees
+a valid ISO-2022 escape sequence, it assumes the text is encoded in one
+of the ISO2022 encodings, and decodes text by the corresponding coding
+system (e.g. `iso-2022-7bit').
However, there may be a case that you want to read escape sequences in
a file as is. In such a case, you can set this variable to non-nil.
-Then, as the code detection ignores any escape sequences, no file is
-detected as encoded in some ISO2022 encoding. The result is that all
+Then the code detection will ignore any escape sequences, and no text is
+detected as encoded in some ISO-2022 encoding. The result is that all
escape sequences become visible in a buffer.
The default value is nil, and it is strongly recommended not to change
reading if you suppress escape sequence detection.
The other way to read escape sequences in a file without decoding is
-to explicitly specify some coding system that doesn't use ISO2022's
+to explicitly specify some coding system that doesn't use ISO-2022
escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
inhibit_iso_escape_detection = 0;
+ DEFVAR_BOOL ("inhibit-null-byte-detection",
+ &inhibit_null_byte_detection,
+ doc: /* If non-nil, Emacs ignores null bytes on code detection.
+By default, Emacs treats it as binary data, and does not attempt to
+decode it. The effect is as if you specified `no-conversion' for
+reading that text.
+
+Set this to non-nil when a regular text happens to include null bytes.
+Examples are Index nodes of Info files and null-byte delimited output
+from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
+decode text as usual. */);
+ inhibit_null_byte_detection = 0;
+
DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
doc: /* Char table for translating self-inserting characters.
This is applied to the result of input methods, not their input.
-See also `keyboard-translate-table'. */);
+See also `keyboard-translate-table'.
+
+Use of this variable for character code unification was rendered
+obsolete in Emacs 23.1 and later, since Unicode is now the basis of
+internal character representation. */);
Vtranslation_table_for_input = Qnil;
{
for (i = 0; i < coding_arg_max; i++)
args[i] = Qnil;
- plist[0] = intern (":name");
+ plist[0] = intern_c_string (":name");
plist[1] = args[coding_arg_name] = Qno_conversion;
- plist[2] = intern (":mnemonic");
+ plist[2] = intern_c_string (":mnemonic");
plist[3] = args[coding_arg_mnemonic] = make_number ('=');
- plist[4] = intern (":coding-type");
+ plist[4] = intern_c_string (":coding-type");
plist[5] = args[coding_arg_coding_type] = Qraw_text;
- plist[6] = intern (":ascii-compatible-p");
+ plist[6] = intern_c_string (":ascii-compatible-p");
plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
- plist[8] = intern (":default-char");
+ plist[8] = intern_c_string (":default-char");
plist[9] = args[coding_arg_default_char] = make_number (0);
- plist[10] = intern (":for-unibyte");
+ plist[10] = intern_c_string (":for-unibyte");
plist[11] = args[coding_arg_for_unibyte] = Qt;
- plist[12] = intern (":docstring");
- plist[13] = build_string ("Do no conversion.\n\
+ plist[12] = intern_c_string (":docstring");
+ plist[13] = make_pure_c_string ("Do no conversion.\n\
\n\
When you visit a file with this coding, the file is read into a\n\
unibyte buffer as is, thus each byte of a file is treated as a\n\
character.");
- plist[14] = intern (":eol-type");
+ plist[14] = intern_c_string (":eol-type");
plist[15] = args[coding_arg_eol_type] = Qunix;
args[coding_arg_plist] = Flist (16, plist);
Fdefine_coding_system_internal (coding_arg_max, args);
plist[5] = args[coding_arg_coding_type] = Qundecided;
/* This is already set.
plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
- plist[8] = intern (":charset-list");
+ plist[8] = intern_c_string (":charset-list");
plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
plist[11] = args[coding_arg_for_unibyte] = Qnil;
- plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
+ plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
plist[15] = args[coding_arg_eol_type] = Qnil;
args[coding_arg_plist] = Flist (16, plist);
Fdefine_coding_system_internal (coding_arg_max, args);