"coding-category-iso-7",
"coding-category-iso-8-1",
"coding-category-iso-8-2",
- "coding-category-iso-else",
+ "coding-category-iso-7-else",
+ "coding-category-iso-8-else",
"coding-category-big5",
"coding-category-binary"
};
CODING_CATEGORY_MASK_ISO_7
CODING_CATEGORY_MASK_ISO_8_1
CODING_CATEGORY_MASK_ISO_8_2
- CODING_CATEGORY_MASK_ISO_ELSE
+ CODING_CATEGORY_MASK_ISO_7_ELSE
+ CODING_CATEGORY_MASK_ISO_8_ELSE
are set. If a code which should never appear in ISO2022 is found,
returns 0. */
int mask = (CODING_CATEGORY_MASK_ISO_7
| CODING_CATEGORY_MASK_ISO_8_1
| CODING_CATEGORY_MASK_ISO_8_2
- | CODING_CATEGORY_MASK_ISO_ELSE);
+ | CODING_CATEGORY_MASK_ISO_7_ELSE
+ | CODING_CATEGORY_MASK_ISO_8_ELSE
+ );
int g1 = 0; /* 1 iff designating to G1. */
int c, i;
if (c == ')' || (c == '$' && *src == ')'))
{
g1 = 1;
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
+ mask &= ~(CODING_CATEGORY_MASK_ISO_7
+ | CODING_CATEGORY_MASK_ISO_7_ELSE);
}
src++;
break;
}
else if (c == 'N' || c == 'O' || c == 'n' || c == 'o')
- return CODING_CATEGORY_MASK_ISO_ELSE;
+ mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE
+ | CODING_CATEGORY_MASK_ISO_8_ELSE);
break;
case ISO_CODE_SO:
if (g1)
- return CODING_CATEGORY_MASK_ISO_ELSE;
+ mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE
+ | CODING_CATEGORY_MASK_ISO_8_ELSE);
break;
case ISO_CODE_CSI:
case ISO_CODE_SS2:
case ISO_CODE_SS3:
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
+ mask &= ~(CODING_CATEGORY_MASK_ISO_7
+ | CODING_CATEGORY_MASK_ISO_7_ELSE);
break;
default:
return 0;
else
{
- int count = 1;
+ unsigned char *src_begin = src;
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
+ mask &= ~(CODING_CATEGORY_MASK_ISO_7
+ | CODING_CATEGORY_MASK_ISO_7_ELSE);
while (src < src_end && *src >= 0xA0)
- count++, src++;
- if (count & 1 && src < src_end)
+ src++;
+ if ((src - src_begin - 1) & 1 && src < src_end)
mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
}
break;
case EMACS_leading_code_2:
ONE_MORE_BYTE (c2);
- ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
+ if (c2 < 0xA0)
+ {
+ /* invalid sequence */
+ *dst++ = c1;
+ *dst++ = c2;
+ }
+ else
+ ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
break;
case EMACS_leading_code_3:
TWO_MORE_BYTES (c2, c3);
- if (c1 < LEADING_CODE_PRIVATE_11)
+ if (c2 < 0xA0 || c3 < 0xA0)
+ {
+ /* invalid sequence */
+ *dst++ = c1;
+ *dst++ = c2;
+ *dst++ = c3;
+ }
+ else if (c1 < LEADING_CODE_PRIVATE_11)
ENCODE_ISO_CHARACTER (c1, c2, c3);
else
ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
case EMACS_leading_code_4:
THREE_MORE_BYTES (c2, c3, c4);
- ENCODE_ISO_CHARACTER (c2, c3, c4);
+ if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
+ {
+ /* invalid sequence */
+ *dst++ = c1;
+ *dst++ = c2;
+ *dst++ = c3;
+ *dst++ = c4;
+ }
+ else
+ ENCODE_ISO_CHARACTER (c2, c3, c4);
break;
case EMACS_leading_code_composition:
- ONE_MORE_BYTE (c1);
- if (c1 == 0xFF)
+ ONE_MORE_BYTE (c2);
+ if (c2 < 0xA0)
+ {
+ /* invalid sequence */
+ *dst++ = c1;
+ *dst++ = c2;
+ }
+ else if (c2 == 0xFF)
{
coding->composing = COMPOSING_WITH_RULE_HEAD;
ENCODE_COMPOSITION_WITH_RULE_START;
}
continue;
label_end_of_loop:
- coding->carryover_size = src - src_base;
+ /* We reach here because the source date ends not at character
+ boundary. */
+ coding->carryover_size = src_end - src_base;
bcopy (src_base, coding->carryover, coding->carryover_size);
+ src = src_end;
break;
}
continue;
label_end_of_loop:
- coding->carryover_size = src - src_base;
+ coding->carryover_size = src_end - src_base;
bcopy (src_base, coding->carryover, coding->carryover_size);
- src = src_base;
+ src = src_end;
break;
}
The category for a coding system which has the same code range
as SJIS. Assigned the coding-system (Lisp
- symbol) `shift-jis' by default.
+ symbol) `japanese-shift-jis' by default.
o coding-category-iso-7
The category for a coding system which has the same code range
- as ISO2022 of 7-bit environment. Assigned the coding-system
- (Lisp symbol) `iso-2022-7' by default.
+ as ISO2022 of 7-bit environment. This doesn't use any locking
+ shift and single shift functions. Assigned the coding-system
+ (Lisp symbol) `iso-2022-7bit' by default.
o coding-category-iso-8-1
The category for a coding system which has the same code range
as ISO2022 of 8-bit environment and graphic plane 1 used only
- for DIMENSION1 charset. Assigned the coding-system (Lisp
- symbol) `iso-8859-1' by default.
+ for DIMENSION1 charset. This doesn't use any locking shift
+ and single shift functions. Assigned the coding-system (Lisp
+ symbol) `iso-latin-1' by default.
o coding-category-iso-8-2
The category for a coding system which has the same code range
as ISO2022 of 8-bit environment and graphic plane 1 used only
- for DIMENSION2 charset. Assigned the coding-system (Lisp
- symbol) `euc-japan' by default.
+ for DIMENSION2 charset. This doesn't use any locking shift
+ and single shift functions. Assigned the coding-system (Lisp
+ symbol) `japanese-iso-8bit' by default.
- o coding-category-iso-else
+ o coding-category-iso-7-else
The category for a coding system which has the same code range
- as ISO2022 but not belongs to any of the above three
- categories. Assigned the coding-system (Lisp symbol)
- `iso-2022-ss2-7' by default.
+ as ISO2022 of 7-bit environemnt but uses locking shift or
+ single shift functions. Assigned the coding-system (Lisp
+ symbol) `iso-2022-7bit-lock' by default.
+
+ o coding-category-iso-8-else
+
+ The category for a coding system which has the same code range
+ as ISO2022 of 8-bit environemnt but uses locking shift or
+ single shift functions. Assigned the coding-system (Lisp
+ symbol) `iso-2022-8bit-ss2' by default.
o coding-category-big5
/* No valid ISO2022 code follows C. Try again. */
goto label_loop_detect_coding;
}
- else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI)
+ else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
/* C is an ISO2022 specific control code of C1,
or the first byte of SJIS's 2-byte character code,
or a leading code of Emacs. */
| detect_coding_sjis (src, src_end)
| detect_coding_emacs_mule (src, src_end));
+ else if (c == ISO_CODE_CSI
+ && (src < src_end
+ && (*src == ']'
+ || (src + 1 < src_end
+ && src[1] == ']'
+ && (*src == '0' || *src == '1' || *src == '2')))))
+ /* C is an ISO2022's control-sequence-introducer. */
+ mask = (detect_coding_iso2022 (src, src_end)
+ | detect_coding_sjis (src, src_end)
+ | detect_coding_emacs_mule (src, src_end));
+
else if (c < 0xA0)
/* C is the first byte of SJIS character code,
or a leading-code of Emacs. */
{
int produced;
- coding->carryover_size = 0;
switch (coding->type)
{
case coding_type_no_conversion:
return Qnil;
if (! NILP (Fcoding_system_p (val)))
return Fcons (val, val);
- if (!NILP (Fboundp (val)))
+ if (!NILP (Ffboundp (val)))
return call1 (val, Flist (nargs, args));
return Qnil;
}
DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
"A variable of internal use only.\n\
If the value is a coding system, it is used for decoding on read operation.\n\
-If not, an appropriate element in `coding-system-alist' (which see) is used.");
+If not, an appropriate element is used from one of the coding system alists:\n\
+There are three of such tables, `file-coding-system-alist',\n\
+`process-coding-system-alist', and `network-coding-system-alist'.");
Vcoding_system_for_read = Qnil;
DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
"A variable of internal use only.\n\
If the value is a coding system, it is used for encoding on write operation.\n\
-If not, an appropriate element in `coding-system-alist' (which see) is used.");
+If not, an appropriate element is used from one of the coding system alists:\n\
+There are three of such tables, `file-coding-system-alist',\n\
+`process-coding-system-alist', and `network-coding-system-alist'.");
Vcoding_system_for_write = Qnil;
DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
- "Coding-system used in the latest file or process I/O.");
+ "Coding system used in the latest file or process I/O.");
Vlast_coding_system_used = Qnil;
DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,