/* Coding system handler (conversion, detection, and etc).
- Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
+ Copyright (C) 1995, 1997, 1998, 2002 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
- Copyright (C) 2001 Free Software Foundation, Inc.
+ Copyright (C) 2001,2002 Free Software Foundation, Inc.
This file is part of GNU Emacs.
all variants of ISO2022. Details are described in section 3.
2. SJIS (or Shift-JIS or MS-Kanji-Code)
-
+
A coding system to encode character sets: ASCII, JISX0201, and
JISX0208. Widely used for PC's in Japan. Details are described in
section 4.
Lisp_Object Vselect_safe_coding_system_function;
+int coding_system_require_warning;
+
/* Mnemonic string for each format of end-of-line. */
Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
/* Mnemonic string to indicate format of end-of-line is not yet
#ifdef emacs
+/* Information about which coding system is safe for which chars.
+ The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
+
+ GENERIC-LIST is a list of generic coding systems which can encode
+ any characters.
+
+ NON-GENERIC-ALIST is an alist of non generic coding systems vs the
+ corresponding char table that contains safe chars. */
+Lisp_Object Vcoding_system_safe_chars;
+
Lisp_Object Vcoding_system_list, Vcoding_system_alist;
Lisp_Object Qcoding_system_p, Qcoding_system_error;
/* Default coding systems used for process I/O. */
Lisp_Object Vdefault_process_coding_system;
+/* Char table for translating Quail and self-inserting input. */
+Lisp_Object Vtranslation_table_for_input;
+
/* Global flag to tell that we can't call post-read-conversion and
pre-write-conversion functions. Usually the value is zero, but it
is set to 1 temporarily while such functions are running. This is
to avoid infinite recursive call. */
static int inhibit_pre_post_conversion;
-/* Char-table containing safe coding systems of each character. */
-Lisp_Object Vchar_coding_system_table;
Lisp_Object Qchar_coding_system;
-/* Return `safe-chars' property of coding system CODING. Don't check
- validity of CODING. */
+/* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check
+ its validity. */
Lisp_Object
-coding_safe_chars (coding)
- struct coding_system *coding;
+coding_safe_chars (coding_system)
+ Lisp_Object coding_system;
{
Lisp_Object coding_spec, plist, safe_chars;
-
- coding_spec = Fget (coding->symbol, Qcoding_system);
+
+ coding_spec = Fget (coding_system, Qcoding_system);
plist = XVECTOR (coding_spec)->contents[3];
safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
/* Record one COMPONENT (alternate character or composition rule). */
-#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
- (coding->cmp_data->data[coding->cmp_data->used++] = component)
+#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
+ do { \
+ coding->cmp_data->data[coding->cmp_data->used++] = component; \
+ if (coding->cmp_data->used - coding->cmp_data_start \
+ == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \
+ { \
+ CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
+ coding->composing = COMPOSITION_NO; \
+ } \
+ } while (0)
/* Get one byte from a data pointed by SRC and increment SRC. If SRC
return 0;
for (ncomponent = 0; src < src_base + data_len; ncomponent++)
{
+ /* If it is longer than this, it can't be valid. */
+ if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
+ return 0;
+
if (ncomponent % 2 && with_rule)
{
ONE_MORE_BYTE (gref);
CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
for (i = 0; i < ncomponent; i++)
CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
- CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
+ CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
if (buf < bufp)
{
unsigned char *p = buf;
ONE_MORE_BYTE (c);
if (c != '\n')
{
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
src--;
c = '\r';
}
coding->produced_char++;
continue;
}
- else if (*src == 0x80)
+ else if (*src == 0x80 && coding->cmp_data)
{
/* Start of composition data. */
int consumed = decode_composition_emacs_mule (coding, src, src_end,
coding->cmp_data_start = 0; \
} \
} while (0)
-
-static void encode_eol P_ ((struct coding_system *, unsigned char *,
+
+static void encode_eol P_ ((struct coding_system *, const unsigned char *,
unsigned char *, int, int));
static void
#define CHARSET_OK(idx, charset, c) \
(coding_system_table[idx] \
&& (charset == CHARSET_ASCII \
- || (safe_chars = coding_safe_chars (coding_system_table[idx]), \
+ || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
CODING_SAFE_CHAR_P (safe_chars, c))) \
&& (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
charset) \
#define SHIFT_OUT_OK(idx) \
(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
+#define COMPOSITION_OK(idx) \
+ (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
+
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in ISO2022. If it is, return an
integer in which appropriate flag bits any of:
while (mask && src < src_end)
{
ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ retry:
switch (c)
{
case ISO_CODE_ESC:
else if (c >= '0' && c <= '4')
{
/* ESC <Fp> for start/end composition. */
- mask_found |= CODING_CATEGORY_MASK_ISO;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
+ mask_found |= CODING_CATEGORY_MASK_ISO_7;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_7;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
+ mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
+ mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
+ mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
+ mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
+ mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
break;
}
else
mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
}
break;
-
+
case ISO_CODE_SI:
if (inhibit_iso_escape_detection)
break;
&& mask & CODING_CATEGORY_MASK_ISO_8_2)
{
int i = 1;
+
+ c = -1;
while (src < src_end)
{
ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
else
mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+ if (c >= 0)
+ /* This means that we have read one extra byte. */
+ goto retry;
}
}
break;
#define DECODE_COMPOSITION_END(c1) \
do { \
- if (coding->composing == COMPOSITION_DISABLED) \
+ if (! COMPOSING_P (coding)) \
{ \
*dst++ = ISO_CODE_ESC; \
*dst++ = c1; \
Lisp_Object translation_table;
Lisp_Object safe_chars;
- safe_chars = coding_safe_chars (coding);
+ safe_chars = coding_safe_chars (coding->symbol);
if (NILP (Venable_character_translation))
translation_table = Qnil;
ONE_MORE_BYTE (c1);
if (c1 != ISO_CODE_LF)
{
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
src--;
c1 = '\r';
}
ONE_MORE_CHAR (c);
if (c == '\n')
break;
-
+
charset = CHAR_CHARSET (c);
reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
Lisp_Object translation_table;
Lisp_Object safe_chars;
- safe_chars = coding_safe_chars (coding);
+ safe_chars = coding_safe_chars (coding->symbol);
if (NILP (Venable_character_translation))
translation_table = Qnil;
*dst++ = ISO_CODE_CR;
CODING_SPEC_ISO_BOL (coding) = 1;
}
- else
+ else
{
if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
ENCODE_RESET_PLANE_AND_REGISTER;
/* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
- C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
+ C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
/* Number of Big5 characters which have the same code in 1st byte. */
ONE_MORE_BYTE (c2);
if (c2 == '\n')
c1 = c2;
- else if (coding->mode
- & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
else
/* To process C2 again, SRC is subtracted by 1. */
src--;
src_base = src;
ONE_MORE_CHAR (c);
-
+
/* Now encode the character C. */
if (SINGLE_BYTE_CHAR_P (c))
{
switch (c)
{
case '\r':
- if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
+ if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
{
EMIT_ONE_BYTE (c);
break;
ONE_MORE_BYTE (c);
if (c != '\n')
{
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
src--;
c = '\r';
}
static void
encode_eol (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
+ const unsigned char *src = source;
unsigned char *dst = destination;
- unsigned char *src_end = src + src_bytes;
+ const unsigned char *src_end = src + src_bytes;
unsigned char *dst_end = dst + dst_bytes;
Lisp_Object translation_table;
/* SRC_BASE remembers the start position in source in each loop.
analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
there's not enough destination area to produce encoded codes
(within macro EMIT_BYTES). */
- unsigned char *src_base;
+ const unsigned char *src_base;
+ unsigned char *tmp;
int c;
int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
}
if (coding->eol_type == CODING_EOL_CR)
{
- for (src = destination; src < dst; src++)
- if (*src == '\n') *src = '\r';
+ for (tmp = destination; tmp < dst; tmp++)
+ if (*tmp == '\n') *tmp = '\r';
}
else if (selective_display)
{
- for (src = destination; src < dst; src++)
- if (*src == '\r') *src = '\n';
+ for (tmp = destination; tmp < dst; tmp++)
+ if (*tmp == '\r') *tmp = '\n';
}
}
if (coding->src_multibyte)
If the value is nil, graphic register N is never used on
encoding.
-
+
sub-element[N] where N is 4 through 11: to be set in `coding->flags'
Each value takes t or nil. See the section ISO2022 of
`coding.h' for more information.
}
else
goto label_invalid_coding_system;
-
+
/* If the coding system has non-nil `composition' property, enable
composition handling. */
val = Fplist_get (plist, Qcomposition);
coding->type = coding_type_emacs_mule;
coding->common_flags
|= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- coding->composing = COMPOSITION_NO;
if (!NILP (coding->post_read_conversion))
coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
if (!NILP (coding->pre_write_conversion))
= CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
for (i = 0; i < 4; i++)
{
- if (INTEGERP (flags[i])
- && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
+ if ((INTEGERP (flags[i])
+ && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
|| (charset = get_charset_id (flags[i])) >= 0)
{
CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
tail = flags[i];
coding->flags |= CODING_FLAG_ISO_DESIGNATION;
- if (INTEGERP (XCAR (tail))
- && (charset = XINT (XCAR (tail)),
- CHARSET_VALID_P (charset))
+ if ((INTEGERP (XCAR (tail))
+ && (charset = XINT (XCAR (tail)),
+ CHARSET_VALID_P (charset)))
|| (charset = get_charset_id (XCAR (tail))) >= 0)
{
CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
tail = XCDR (tail);
while (CONSP (tail))
{
- if (INTEGERP (XCAR (tail))
- && (charset = XINT (XCAR (tail)),
- CHARSET_VALID_P (charset))
+ if ((INTEGERP (XCAR (tail))
+ && (charset = XINT (XCAR (tail)),
+ CHARSET_VALID_P (charset)))
|| (charset = get_charset_id (XCAR (tail))) >= 0)
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
= i;
}
else
CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
-
+
CODING_SPEC_ISO_DESIGNATION (coding, i)
= CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
}
| CODING_CATEGORY_MASK_UTF_16_LE);
/* Or, if C is a special latin extra code,
- or is an ISO2022 specific control code of C1 (SS2 or SS3),
+ or is an ISO2022 specific control code of C1 (SS2 or SS3),
or is an ISO2022 control-sequence-introducer (CSI),
we should also consider the possibility of ISO2022 codings. */
if ((VECTORP (Vlatin_extra_code_table)
if (!iso2022_examined_p
&& (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
{
- mask |= detect_coding_iso2022 (src, src_end);
+ mask |= detect_coding_iso2022 (src, src_end, multibytep);
iso2022_examined_p = 1;
}
else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
void
detect_coding (coding, src, src_bytes)
struct coding_system *coding;
- unsigned char *src;
+ const unsigned char *src;
int src_bytes;
{
unsigned int idx;
void
detect_eol (coding, src, src_bytes)
struct coding_system *coding;
- unsigned char *src;
+ const unsigned char *src;
int src_bytes;
{
Lisp_Object val;
{
int src_multibyte = coding->src_multibyte;
int dst_multibyte = coding->dst_multibyte;
+ struct composition_data *cmp_data = coding->cmp_data;
setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
coding->src_multibyte = src_multibyte;
coding->dst_multibyte = dst_multibyte;
coding->heading_ascii = skip;
+ coding->cmp_data = cmp_data;
}
}
{
/* If the last character is CR, we can't handle it here
because LF will be in the not-yet-decoded source text.
- Recorded that the CR is not yet processed. */
+ Record that the CR is not yet processed. */
coding->spec.ccl.cr_carryover = 1;
coding->produced--;
coding->produced_char--;
int
decode_coding (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
+ int extra = 0;
+
if (coding->type == coding_type_undecided)
detect_coding (coding, source, src_bytes);
case coding_type_ccl:
if (coding->spec.ccl.cr_carryover)
{
- /* Set the CR which is not processed by the previous call of
- decode_eol_post_ccl in DESTINATION. */
+ /* Put the CR which was not processed by the previous call
+ of decode_eol_post_ccl in DESTINATION. It will be
+ decoded together with the following LF by the call to
+ decode_eol_post_ccl below. */
*destination = '\r';
coding->produced++;
coding->produced_char++;
dst_bytes--;
+ extra = coding->spec.ccl.cr_carryover;
}
- ccl_coding_driver (coding, source,
- destination + coding->spec.ccl.cr_carryover,
+ ccl_coding_driver (coding, source, destination + extra,
src_bytes, dst_bytes, 0);
if (coding->eol_type != CODING_EOL_LF)
- decode_eol_post_ccl (coding, destination, coding->produced);
+ {
+ coding->produced += extra;
+ coding->produced_char += extra;
+ decode_eol_post_ccl (coding, destination, coding->produced);
+ }
break;
default:
if (coding->mode & CODING_MODE_LAST_BLOCK
&& coding->result == CODING_FINISH_INSUFFICIENT_SRC)
{
- unsigned char *src = source + coding->consumed;
+ const unsigned char *src = source + coding->consumed;
unsigned char *dst = destination + coding->produced;
src_bytes -= coding->consumed;
int
encode_coding (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
coding->produced = coding->produced_char = 0;
if (coding->mode & CODING_MODE_LAST_BLOCK
&& coding->result == CODING_FINISH_INSUFFICIENT_SRC)
{
- unsigned char *src = source + coding->consumed;
+ const unsigned char *src = source + coding->consumed;
unsigned char *dst = destination + coding->produced;
if (coding->type == coding_type_iso2022)
if (coding->type == coding_type_ccl
|| coding->eol_type == CODING_EOL_CRLF
|| coding->eol_type == CODING_EOL_CR
- || coding->cmp_data && coding->cmp_data->used > 0)
+ || (coding->cmp_data && coding->cmp_data->used > 0))
{
/* We can't skip any data. */
return;
break;
if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
{
- unsigned char *bol = begp;
+ unsigned char *bol = begp;
while (begp < endp && *begp < 0x80)
{
begp++;
else if (VECTORP (val) || STRINGP (val))
{
int len = (VECTORP (val)
- ? XVECTOR (val)->size : XSTRING (val)->size);
+ ? XVECTOR (val)->size : SCHARS (val));
int i;
for (i = 0; i < len; i++)
{
int len = data[0] - 4, j;
Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
+ if (method == COMPOSITION_WITH_RULE_ALTCHARS
+ && len % 2 == 0)
+ len --;
for (j = 0; j < len; j++)
args[j] = make_number (data[4 + j]);
components = (method == COMPOSITION_WITH_ALTCHARS
struct coding_system *coding;
{
int len = to - from, len_byte = to_byte - from_byte;
+ int nchars_del = 0, nbytes_del = 0;
int require, inserted, inserted_byte;
int head_skip, tail_skip, total_skip = 0;
Lisp_Object saved_coding_symbol;
}
if (replace)
- deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
+ {
+ if (! EQ (current_buffer->undo_list, Qt))
+ deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
+ else
+ {
+ nchars_del = to - from;
+ nbytes_del = to_byte - from_byte;
+ }
+ }
if (coding->composing != COMPOSITION_DISABLED)
{
if (encodep)
result = encode_coding (coding, src, dst, len_byte, 0);
else
- result = decode_coding (coding, src, dst, len_byte, 0);
+ {
+ if (coding->composing != COMPOSITION_DISABLED)
+ coding->cmp_data->char_offset = from + inserted;
+ result = decode_coding (coding, src, dst, len_byte, 0);
+ }
/* The buffer memory is now:
+--------+-------converted-text----+--+------original-text----+---+
coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
else
coding->symbol = saved_coding_symbol;
-
+
continue;
}
if (len_byte <= 0)
if (multibyte_p)
{
unsigned char *start = dst;
-
+
inserted += len_byte;
while (len_byte--)
{
REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
Here, we are sure that NEW >= ORIG. */
- float ratio = coding->produced - coding->consumed;
- ratio /= coding->consumed;
- require = len_byte * ratio;
+ float ratio;
+
+ if (coding->produced <= coding->consumed)
+ {
+ /* This happens because of CCL-based coding system with
+ eol-type CRLF. */
+ require = 0;
+ }
+ else
+ {
+ ratio = (coding->produced - coding->consumed) / coding->consumed;
+ require = len_byte * ratio;
+ }
first = 0;
}
if ((src - dst) < (require + 2000))
inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
}
- /* If we shrank the conversion area, adjust it now. */
+ /* If we shrank the conversion area, adjust it now. */
if (total_skip > 0)
{
if (tail_skip > 0)
}
prev_Z = Z;
- adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
+ if (! EQ (current_buffer->undo_list, Qt))
+ adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
+ else
+ adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
+ inserted, inserted_byte);
inserted = Z - prev_Z;
if (!encodep && coding->cmp_data && coding->cmp_data->used)
struct coding_system *coding;
int encodep;
{
- int count = specpdl_ptr - specpdl;
- struct gcpro gcpro1;
+ int count = SPECPDL_INDEX ();
+ struct gcpro gcpro1, gcpro2;
int multibyte = STRING_MULTIBYTE (str);
+ Lisp_Object buffer;
+ struct buffer *buf;
+ Lisp_Object old_deactivate_mark;
record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
record_unwind_protect (code_convert_region_unwind, Qnil);
- GCPRO1 (str);
- temp_output_buffer_setup (" *code-converting-work*");
- set_buffer_internal (XBUFFER (Vstandard_output));
+ /* It is not crucial to specbind this. */
+ old_deactivate_mark = Vdeactivate_mark;
+ GCPRO2 (str, old_deactivate_mark);
+
+ buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
+ buf = XBUFFER (buffer);
+
+ buf->directory = current_buffer->directory;
+ buf->read_only = Qnil;
+ buf->filename = Qnil;
+ buf->undo_list = Qt;
+ buf->overlays_before = Qnil;
+ buf->overlays_after = Qnil;
+
+ set_buffer_internal (buf);
/* We must insert the contents of STR as is without
unibyte<->multibyte conversion. For that, we adjust the
multibyteness of the working buffer to that of STR. */
Ferase_buffer ();
- current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
+ buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
+
insert_from_string (str, 0, 0,
- XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
+ SCHARS (str), SBYTES (str), 0);
UNGCPRO;
inhibit_pre_post_conversion = 1;
if (encodep)
call1 (coding->post_read_conversion, make_number (Z - BEG));
}
inhibit_pre_post_conversion = 0;
+ Vdeactivate_mark = old_deactivate_mark;
str = make_buffer_string (BEG, Z, 1);
return unbind_to (count, str);
}
int len;
struct conversion_buffer buf;
int from, to_byte;
- struct gcpro gcpro1;
Lisp_Object saved_coding_symbol;
int result;
int require_decoding;
int consumed, consumed_char, produced, produced_char;
from = 0;
- to_byte = STRING_BYTES (XSTRING (str));
+ to_byte = SBYTES (str);
saved_coding_symbol = coding->symbol;
coding->src_multibyte = STRING_MULTIBYTE (str);
/* See the comments in code_convert_region. */
if (coding->type == coding_type_undecided)
{
- detect_coding (coding, XSTRING (str)->data, to_byte);
+ detect_coding (coding, SDATA (str), to_byte);
if (coding->type == coding_type_undecided)
{
coding->type = coding_type_emacs_mule;
&& coding->type != coding_type_ccl)
{
saved_coding_symbol = coding->symbol;
- detect_eol (coding, XSTRING (str)->data, to_byte);
+ detect_eol (coding, SDATA (str), to_byte);
if (coding->eol_type == CODING_EOL_UNDECIDED)
coding->eol_type = CODING_EOL_LF;
/* We had better recover the original eol format if we
{
/* Decoding routines expect the source text to be unibyte. */
str = Fstring_as_unibyte (str);
- to_byte = STRING_BYTES (XSTRING (str));
+ to_byte = SBYTES (str);
nocopy = 1;
coding->src_multibyte = 0;
}
/* Try to skip the heading and tailing ASCIIs. */
if (require_decoding && coding->type != coding_type_ccl)
{
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+ SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
0);
if (from == to_byte)
require_decoding = 0;
- shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+ shrinked_bytes = from + (SBYTES (str) - to_byte);
}
if (!require_decoding)
{
- coding->consumed = STRING_BYTES (XSTRING (str));
- coding->consumed_char = XSTRING (str)->size;
+ coding->consumed = SBYTES (str);
+ coding->consumed_char = SCHARS (str);
if (coding->dst_multibyte)
{
str = Fstring_as_multibyte (str);
nocopy = 1;
}
- coding->produced = STRING_BYTES (XSTRING (str));
- coding->produced_char = XSTRING (str)->size;
+ coding->produced = SBYTES (str);
+ coding->produced_char = SCHARS (str);
return (nocopy ? str : Fcopy_sequence (str));
}
consumed = consumed_char = produced = produced_char = 0;
while (1)
{
- result = decode_coding (coding, XSTRING (str)->data + from + consumed,
+ result = decode_coding (coding, SDATA (str) + from + consumed,
buf.data + produced, to_byte - from - consumed,
buf.size - produced);
consumed += coding->consumed;
}
produced += num_eol;
produced_char += num_eol;
- }
+ }
/* Suppress eol-format conversion in the further conversion. */
coding->eol_type = CODING_EOL_LF;
else
newstr = make_uninit_string (produced + shrinked_bytes);
if (from > 0)
- bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
- bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+ STRING_COPYIN (newstr, 0, SDATA (str), from);
+ STRING_COPYIN (newstr, from, buf.data, produced);
if (shrinked_bytes > from)
- bcopy (XSTRING (str)->data + to_byte,
- XSTRING (newstr)->data + from + produced,
- shrinked_bytes - from);
+ STRING_COPYIN (newstr, from + produced,
+ SDATA (str) + to_byte,
+ shrinked_bytes - from);
free_conversion_buffer (&buf);
if (coding->cmp_data && coding->cmp_data->used)
str = run_pre_post_conversion_on_str (str, coding, 1);
from = 0;
- to = XSTRING (str)->size;
- to_byte = STRING_BYTES (XSTRING (str));
+ to = SCHARS (str);
+ to_byte = SBYTES (str);
/* Encoding routines determine the multibyteness of the source text
by coding->src_multibyte. */
coding->dst_multibyte = 0;
if (! CODING_REQUIRE_ENCODING (coding))
{
- coding->consumed = STRING_BYTES (XSTRING (str));
- coding->consumed_char = XSTRING (str)->size;
+ coding->consumed = SBYTES (str);
+ coding->consumed_char = SCHARS (str);
if (STRING_MULTIBYTE (str))
{
str = Fstring_as_unibyte (str);
nocopy = 1;
}
- coding->produced = STRING_BYTES (XSTRING (str));
- coding->produced_char = XSTRING (str)->size;
+ coding->produced = SBYTES (str);
+ coding->produced_char = SCHARS (str);
return (nocopy ? str : Fcopy_sequence (str));
}
/* Try to skip the heading and tailing ASCIIs. */
if (coding->type != coding_type_ccl)
{
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+ SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
1);
if (from == to_byte)
return (nocopy ? str : Fcopy_sequence (str));
- shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+ shrinked_bytes = from + (SBYTES (str) - to_byte);
}
len = encoding_buffer_size (coding, to_byte - from);
consumed = consumed_char = produced = produced_char = 0;
while (1)
{
- result = encode_coding (coding, XSTRING (str)->data + from + consumed,
+ result = encode_coding (coding, SDATA (str) + from + consumed,
buf.data + produced, to_byte - from - consumed,
buf.size - produced);
consumed += coding->consumed;
newstr = make_uninit_string (produced + shrinked_bytes);
if (from > 0)
- bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
- bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+ STRING_COPYIN (newstr, 0, SDATA (str), from);
+ STRING_COPYIN (newstr, from, buf.data, produced);
if (shrinked_bytes > from)
- bcopy (XSTRING (str)->data + to_byte,
- XSTRING (newstr)->data + from + produced,
- shrinked_bytes - from);
+ STRING_COPYIN (newstr, from + produced,
+ SDATA (str) + to_byte,
+ shrinked_bytes - from);
free_conversion_buffer (&buf);
coding_free_composition_data (coding);
val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
}
- while (XSTRING (val)->size == 0);
+ while (SCHARS (val) == 0);
return (Fintern (val, Qnil));
}
{
Lisp_Object val;
if (SYMBOLP (default_coding_system))
- XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
+ default_coding_system = SYMBOL_NAME (default_coding_system);
val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
Qt, Qnil, Qcoding_system_history,
default_coding_system, Qnil);
- return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
+ return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
}
DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
\f
Lisp_Object
detect_coding_system (src, src_bytes, highest, multibytep)
- unsigned char *src;
+ const unsigned char *src;
int src_bytes, highest;
int multibytep;
{
}
}
return (highest ? XCAR (val) : val);
-}
+}
DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2, 3, 0,
- doc: /* Detect coding system of the text in the region between START and END.
-Return a list of possible coding systems ordered by priority.
+ doc: /* Detect how the byte sequence in the region is encoded.
+Return a list of possible coding systems used on decoding a byte
+sequence containing the bytes in the region between START and END when
+the coding system `undecided' is specified. The list is ordered by
+priority decided in the current language environment.
If only ASCII characters are found, it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
the detecting source. Then code detectors can handle the tailing
byte sequence more accurately.
- Fix me: This is not an perfect solution. It is better that we
+ Fix me: This is not a perfect solution. It is better that we
add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
*/
if (to == Z || (to == GPT && GAP_SIZE > 0))
DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
1, 2, 0,
- doc: /* Detect coding system of the text in STRING.
-Return a list of possible coding systems ordered by priority.
+ doc: /* Detect how the byte sequence in STRING is encoded.
+Return a list of possible coding systems used on decoding a byte
+sequence containing the bytes in STRING when the coding system
+`undecided' is specified. The list is ordered by priority decided in
+the current language environment.
If only ASCII characters are found, it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
{
CHECK_STRING (string);
- return detect_coding_system (XSTRING (string)->data,
+ return detect_coding_system (SDATA (string),
/* "+ 1" is to include the anchor byte
`\0'. With this, code detectors can
handle the tailing bytes more
accurately. */
- STRING_BYTES (XSTRING (string)) + 1,
+ SBYTES (string) + 1,
!NILP (highest),
STRING_MULTIBYTE (string));
}
-/* Return an intersection of lists L1 and L2. */
-
-static Lisp_Object
-intersection (l1, l2)
- Lisp_Object l1, l2;
-{
- Lisp_Object val;
-
- for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
- {
- if (!NILP (Fmemq (XCAR (l1), l2)))
- val = Fcons (XCAR (l1), val);
- }
- return val;
-}
-
-
/* Subroutine for Fsafe_coding_systems_region_internal.
Return a list of coding systems that safely encode the multibyte
Lisp_Object safe_codings, work_table;
int *single_byte_char_found;
{
- int c, len, idx;
- Lisp_Object val;
+ int c, len, i;
+ Lisp_Object val, ch;
+ Lisp_Object prev, tail;
while (p < pend)
{
if (SINGLE_BYTE_CHAR_P (c))
*single_byte_char_found = 1;
if (NILP (safe_codings))
+ /* Already all coding systems are excluded. */
continue;
/* Check the safe coding systems for C. */
- val = char_table_ref_and_index (work_table, c, &idx);
+ ch = make_number (c);
+ val = Faref (work_table, ch);
if (EQ (val, Qt))
/* This element was already checked. Ignore it. */
continue;
/* Remember that we checked this element. */
- CHAR_TABLE_SET (work_table, make_number (idx), Qt);
+ Faset (work_table, ch, Qt);
- /* If there are some safe coding systems for C and we have
- already found the other set of coding systems for the
- different characters, get the intersection of them. */
- if (!EQ (safe_codings, Qt) && !NILP (val))
- val = intersection (safe_codings, val);
- safe_codings = val;
+ for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
+ {
+ val = XCAR (tail);
+ if (NILP (Faref (XCDR (val), ch)))
+ {
+ /* Exclued this coding system from SAFE_CODINGS. */
+ if (EQ (tail, safe_codings))
+ safe_codings = XCDR (safe_codings);
+ else
+ XSETCDR (prev, XCDR (tail));
+ }
+ else
+ prev = tail;
+ }
}
return safe_codings;
}
-
-/* Return a list of coding systems that safely encode the text between
- START and END. If the text contains only ASCII or is unibyte,
- return t. */
-
DEFUN ("find-coding-systems-region-internal",
Ffind_coding_systems_region_internal,
Sfind_coding_systems_region_internal, 2, 2, 0,
Lisp_Object work_table, safe_codings;
int non_ascii_p = 0;
int single_byte_char_found = 0;
- unsigned char *p1, *p1end, *p2, *p2end, *p;
+ const unsigned char *p1, *p1end, *p2, *p2end, *p;
if (STRINGP (start))
{
if (!STRING_MULTIBYTE (start))
return Qt;
- p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
+ p1 = SDATA (start), p1end = p1 + SBYTES (start);
p2 = p2end = p1end;
- if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
+ if (SCHARS (start) != SBYTES (start))
non_ascii_p = 1;
}
else
for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
if (p == p1end)
{
- for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
+ for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
if (p == p2end)
return Qt;
}
}
/* The text contains non-ASCII characters. */
- work_table = Fcopy_sequence (Vchar_coding_system_table);
- safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
+
+ work_table = Fmake_char_table (Qchar_coding_system, Qnil);
+ safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
+
+ safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
&single_byte_char_found);
if (p2 < p2end)
safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
&single_byte_char_found);
+ if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
+ safe_codings = Qt;
+ else
+ {
+ /* Turn safe_codings to a list of coding systems... */
+ Lisp_Object val;
+
+ if (single_byte_char_found)
+ /* ... and append these for eight-bit chars. */
+ val = Fcons (Qraw_text,
+ Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
+ else
+ /* ... and append generic coding systems. */
+ val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
+
+ for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
+ val = Fcons (XCAR (XCAR (safe_codings)), val);
+ safe_codings = val;
+ }
+
+ return safe_codings;
+}
+
+
+/* Search from position POS for such characters that are unencodable
+ accoding to SAFE_CHARS, and return a list of their positions. P
+ points where in the memory the character at POS exists. Limit the
+ search at PEND or when Nth unencodable characters are found.
+
+ If SAFE_CHARS is a char table, an element for an unencodable
+ character is nil.
+
+ If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
+
+ Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
+ eight-bit-graphic characters are unencodable. */
- if (EQ (safe_codings, Qt))
- ; /* Nothing to be done. */
- else if (!single_byte_char_found)
+static Lisp_Object
+unencodable_char_position (safe_chars, pos, p, pend, n)
+ Lisp_Object safe_chars;
+ int pos;
+ unsigned char *p, *pend;
+ int n;
+{
+ Lisp_Object pos_list;
+
+ pos_list = Qnil;
+ while (p < pend)
{
- /* Append generic coding systems. */
- Lisp_Object args[2];
- args[0] = safe_codings;
- args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
- make_number (0));
- safe_codings = Fappend (2, args);
+ int len;
+ int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
+
+ if (c >= 128
+ && (CHAR_TABLE_P (safe_chars)
+ ? NILP (CHAR_TABLE_REF (safe_chars, c))
+ : (NILP (safe_chars) || c < 256)))
+ {
+ pos_list = Fcons (make_number (pos), pos_list);
+ if (--n <= 0)
+ break;
+ }
+ pos++;
+ p += len;
+ }
+ return Fnreverse (pos_list);
+}
+
+
+DEFUN ("unencodable-char-position", Funencodable_char_position,
+ Sunencodable_char_position, 3, 5, 0,
+ doc: /*
+Return position of first un-encodable character in a region.
+START and END specfiy the region and CODING-SYSTEM specifies the
+encoding to check. Return nil if CODING-SYSTEM does encode the region.
+
+If optional 4th argument COUNT is non-nil, it specifies at most how
+many un-encodable characters to search. In this case, the value is a
+list of positions.
+
+If optional 5th argument STRING is non-nil, it is a string to search
+for un-encodable characters. In that case, START and END are indexes
+to the string. */)
+ (start, end, coding_system, count, string)
+ Lisp_Object start, end, coding_system, count, string;
+{
+ int n;
+ Lisp_Object safe_chars;
+ struct coding_system coding;
+ Lisp_Object positions;
+ int from, to;
+ unsigned char *p, *pend;
+
+ if (NILP (string))
+ {
+ validate_region (&start, &end);
+ from = XINT (start);
+ to = XINT (end);
+ if (NILP (current_buffer->enable_multibyte_characters))
+ return Qnil;
+ p = CHAR_POS_ADDR (from);
+ if (to == GPT)
+ pend = GPT_ADDR;
+ else
+ pend = CHAR_POS_ADDR (to);
}
else
- safe_codings = Fcons (Qraw_text,
- Fcons (Qemacs_mule,
- Fcons (Qno_conversion, safe_codings)));
- return safe_codings;
+ {
+ CHECK_STRING (string);
+ CHECK_NATNUM (start);
+ CHECK_NATNUM (end);
+ from = XINT (start);
+ to = XINT (end);
+ if (from > to
+ || to > SCHARS (string))
+ args_out_of_range_3 (string, start, end);
+ if (! STRING_MULTIBYTE (string))
+ return Qnil;
+ p = SDATA (string) + string_char_to_byte (string, from);
+ pend = SDATA (string) + string_char_to_byte (string, to);
+ }
+
+ setup_coding_system (Fcheck_coding_system (coding_system), &coding);
+
+ if (NILP (count))
+ n = 1;
+ else
+ {
+ CHECK_NATNUM (count);
+ n = XINT (count);
+ }
+
+ if (coding.type == coding_type_no_conversion
+ || coding.type == coding_type_raw_text)
+ return Qnil;
+
+ if (coding.type == coding_type_undecided)
+ safe_chars = Qnil;
+ else
+ safe_chars = coding_safe_chars (coding_system);
+
+ if (STRINGP (string)
+ || from >= GPT || to <= GPT)
+ positions = unencodable_char_position (safe_chars, from, p, pend, n);
+ else
+ {
+ Lisp_Object args[2];
+
+ args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
+ n -= XINT (Flength (args[0]));
+ if (n <= 0)
+ positions = args[0];
+ else
+ {
+ args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
+ pend, n);
+ positions = Fappend (2, args);
+ }
+ }
+
+ return (NILP (count) ? Fcar (positions) : positions);
}
return make_number (to - from);
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+ error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
coding.mode |= CODING_MODE_LAST_BLOCK;
coding.src_multibyte = coding.dst_multibyte
return (NILP (nocopy) ? Fcopy_sequence (string) : string);
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+ error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
coding.mode |= CODING_MODE_LAST_BLOCK;
string = (encodep
return string;
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+ error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
coding.composing = COMPOSITION_DISABLED;
coding.mode |= CODING_MODE_LAST_BLOCK;
}
else
{
- if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
+ if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
|| (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
error ("Invalid Shift JIS code: %x", XFASTINT (code));
DECODE_SJIS (s1, s2, c1, c2);
return val;
}
\f
-DEFUN ("set-terminal-coding-system-internal",
- Fset_terminal_coding_system_internal,
+DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
Sset_terminal_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
return Qnil;
}
-DEFUN ("set-safe-terminal-coding-system-internal",
- Fset_safe_terminal_coding_system_internal,
+DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
Sset_safe_terminal_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
return Qnil;
}
-DEFUN ("terminal-coding-system",
- Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
+DEFUN ("terminal-coding-system", Fterminal_coding_system,
+ Sterminal_coding_system, 0, 0, 0,
doc: /* Return coding system specified for terminal output. */)
()
{
return terminal_coding.symbol;
}
-DEFUN ("set-keyboard-coding-system-internal",
- Fset_keyboard_coding_system_internal,
+DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
Sset_keyboard_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
return Qnil;
}
-DEFUN ("keyboard-coding-system",
- Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
+DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
+ Skeyboard_coding_system, 0, 0, 0,
doc: /* Return coding system specified for decoding keyboard input. */)
()
{
error ("Invalid first argument");
if (nargs < 1 + XINT (target_idx))
error ("Too few arguments for operation: %s",
- XSYMBOL (operation)->name->data);
+ SDATA (SYMBOL_NAME (operation)));
+ /* For write-region, if the 6th argument (i.e. VISIT, the 5th
+ argument to write-region) is string, it must be treated as a
+ target file name. */
+ if (EQ (operation, Qwrite_region)
+ && nargs > 5
+ && STRINGP (args[5]))
+ target_idx = make_number (4);
target = args[XINT (target_idx) + 1];
if (!(STRINGP (target)
|| (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
return Qnil;
}
+DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
+ Sdefine_coding_system_internal, 1, 1, 0,
+ doc: /* Register CODING-SYSTEM as a base coding system.
+This function is internal use only. */)
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object safe_chars, slot;
+
+ if (NILP (Fcheck_coding_system (coding_system)))
+ Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+ safe_chars = coding_safe_chars (coding_system);
+ if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
+ error ("No valid safe-chars property for %s",
+ SDATA (SYMBOL_NAME (coding_system)));
+ if (EQ (safe_chars, Qt))
+ {
+ if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
+ XSETCAR (Vcoding_system_safe_chars,
+ Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
+ }
+ else
+ {
+ slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
+ if (NILP (slot))
+ XSETCDR (Vcoding_system_safe_chars,
+ nconc2 (XCDR (Vcoding_system_safe_chars),
+ Fcons (Fcons (coding_system, safe_chars), Qnil)));
+ else
+ XSETCDR (slot, safe_chars);
+ }
+ return Qnil;
+}
+
#endif /* emacs */
\f
{
int i;
- /* Emacs' internal format specific initialize routine. */
+ /* Emacs' internal format specific initialize routine. */
for (i = 0; i <= 0x20; i++)
emacs_code_class[i] = EMACS_control_code;
emacs_code_class[0x0A] = EMACS_linefeed_code;
}
}
+ Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
+ staticpro (&Vcoding_system_safe_chars);
+
Qtranslation_table = intern ("translation-table");
staticpro (&Qtranslation_table);
Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
But don't staticpro it here--that is done in alloc.c. */
Qchar_table_extra_slots = intern ("char-table-extra-slots");
Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
- Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
+ Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
Qvalid_codes = intern ("valid-codes");
staticpro (&Qvalid_codes);
defsubr (&Sdetect_coding_region);
defsubr (&Sdetect_coding_string);
defsubr (&Sfind_coding_systems_region_internal);
+ defsubr (&Sunencodable_char_position);
defsubr (&Sdecode_coding_region);
defsubr (&Sencode_coding_region);
defsubr (&Sdecode_coding_string);
defsubr (&Sfind_operation_coding_system);
defsubr (&Supdate_coding_systems_internal);
defsubr (&Sset_coding_priority_internal);
+ defsubr (&Sdefine_coding_system_internal);
DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
doc: /* List of coding systems.
If VAL is a cons of coding systems, the car part is used for decoding,
and the cdr part is used for encoding.
If VAL is a function symbol, the function must return a coding system
-or a cons of coding systems which are used as above.
+or a cons of coding systems which are used as above. The function gets
+the arguments with which `find-operation-coding-system' was called.
See also the function `find-operation-coding-system'
and the variable `auto-coding-alist'. */);
The default value is `select-safe-coding-system' (which see). */);
Vselect_safe_coding_system_function = Qnil;
- DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
- doc: /* Char-table containing safe coding systems of each characters.
-Each element doesn't include such generic coding systems that can
-encode any characters. They are in the first extra slot. */);
- Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
+ DEFVAR_BOOL ("coding-system-require-warning",
+ &coding_system_require_warning,
+ doc: /* Internal use only.
+If non-nil, on writing a file, `select-safe-coding-system-function' is
+called even if `coding-system-for-write' is non-nil. The command
+`universal-coding-system-argument' binds this variable to t temporarily. */);
+ coding_system_require_warning = 0;
+
DEFVAR_BOOL ("inhibit-iso-escape-detection",
&inhibit_iso_escape_detection,
to explicitly specify some coding system that doesn't use ISO2022's
escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
inhibit_iso_escape_detection = 0;
+
+ DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
+ doc: /* Char table for translating self-inserting characters.
+This is applied to the result of input methods, not their input. See also
+`keyboard-translate-table'. */);
+ Vtranslation_table_for_input = Qnil;
}
char *
Lisp_Object dec = code_convert_string_norecord (build_string (str),
Vlocale_coding_system,
0);
- str = (char *) XSTRING (dec)->data;
+ str = (char *) SDATA (dec);
}
return str;