/* Coding system handler (conversion, detection, and etc).
Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
+ Copyright (C) 2001 Free Software Foundation, Inc.
This file is part of GNU Emacs.
In that case, a sequence of one-byte codes has a slightly different
form.
- At first, all characters in eight-bit-control are represented by
+ Firstly, all characters in eight-bit-control are represented by
one-byte sequences which are their 8-bit code.
Next, character composition data are represented by the byte
METHOD is 0xF0 plus one of composition method (enum
composition_method),
- BYTES is 0x20 plus a byte length of this composition data,
+ BYTES is 0xA0 plus the byte length of these composition data,
- CHARS is 0x20 plus a number of characters composed by this
+ CHARS is 0xA0 plus the number of characters composed by these
data,
COMPONENTs are characters of multibyte form or composition
if (reg_bits)
for (charset = 0; charset <= MAX_CHARSET; charset++)
{
- if (CHARSET_VALID_P (charset)
+ if (CHARSET_DEFINED_P (charset)
&& (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
== CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
{
if (! mask)
idx = CODING_CATEGORY_IDX_RAW_TEXT;
- val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
+ val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
if (coding->eol_type != CODING_EOL_UNDECIDED)
{
{
/* The source text ends in invalid codes. Let's just
make them valid buffer contents, and finish conversion. */
- inserted += len_byte;
- inserted_byte += len_byte;
- while (len_byte--)
- *dst++ = *src++;
+ if (multibyte_p)
+ {
+ unsigned char *start = dst;
+
+ inserted += len_byte;
+ while (len_byte--)
+ {
+ int c = *src++;
+ dst += CHAR_STRING (c, dst);
+ }
+
+ inserted_byte += dst - start;
+ }
+ else
+ {
+ inserted += len_byte;
+ inserted_byte += len_byte;
+ while (len_byte--)
+ *dst++ = *src++;
+ }
break;
}
if (result == CODING_FINISH_INTERRUPT)
if (from < GPT && to >= GPT)
move_gap_both (to, to_byte);
+ /* If we an anchor byte `\0' follows the region, we include it in
+ the detecting source. Then code detectors can handle the tailing
+ byte sequence more accurately.
+
+ Fix me: This is not an perfect solution. It is better that we
+ add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
+ */
if (to == Z || (to == GPT && GAP_SIZE > 0))
include_anchor_byte = 1;
return detect_coding_system (BYTE_POS_ADDR (from_byte),
- /* "+ include_anchor_byteq" is to
- include the anchor byte `\0'. With
- this, code detectors can check if
- tailing bytes are valid. */
to_byte - from_byte + include_anchor_byte,
!NILP (highest),
!NILP (current_buffer
return detect_coding_system (XSTRING (string)->data,
/* "+ 1" is to include the anchor byte
`\0'. With this, code detectors can
- check if tailing bytes are
- valid. */
+ handle the tailing bytes more
+ accurately. */
STRING_BYTES (XSTRING (string)) + 1,
!NILP (highest),
STRING_MULTIBYTE (string));
safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
&single_byte_char_found);
- if (!single_byte_char_found)
+ if (EQ (safe_codings, Qt))
+ ; /* Nothing to be done. */
+ else if (!single_byte_char_found)
{
/* Append generic coding systems. */
Lisp_Object args[2];
{
Lisp_Object val;
- val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
+ val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
if (!NILP (val))
{
if (! coding_system_table[i])