/* Coding system handler (conversion, detection, and etc).
- Copyright (C) 1995, 1997, 1998, 2002 Electrotechnical Laboratory, JAPAN.
+ Copyright (C) 1995,97,1998,2002,2003 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
- Copyright (C) 2001 Free Software Foundation, Inc.
+ Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
This file is part of GNU Emacs.
You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
/*** TABLE OF CONTENTS ***
static void
decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
...
#include "ccl.h"
#include "coding.h"
#include "window.h"
+#include "intervals.h"
#else /* not emacs */
Lisp_Object Qvalid_codes;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
-Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
+Lisp_Object Qcall_process, Qcall_process_region;
Lisp_Object Qstart_process, Qopen_network_stream;
Lisp_Object Qtarget_idx;
+/* If a symbol has this property, evaluate the value to define the
+ symbol as a coding system. */
+Lisp_Object Qcoding_system_define_form;
+
Lisp_Object Vselect_safe_coding_system_function;
+int coding_system_require_warning;
+
/* Mnemonic string for each format of end-of-line. */
Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
/* Mnemonic string to indicate format of end-of-line is not yet
#ifdef emacs
+/* Information about which coding system is safe for which chars.
+ The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
+
+ GENERIC-LIST is a list of generic coding systems which can encode
+ any characters.
+
+ NON-GENERIC-ALIST is an alist of non generic coding systems vs the
+ corresponding char table that contains safe chars. */
+Lisp_Object Vcoding_system_safe_chars;
+
Lisp_Object Vcoding_system_list, Vcoding_system_alist;
Lisp_Object Qcoding_system_p, Qcoding_system_error;
end-of-line format. */
Lisp_Object Qemacs_mule, Qraw_text;
+Lisp_Object Qutf_8;
+
/* Coding-systems are handed between Emacs Lisp programs and C internal
routines by the following three variables. */
/* Coding-system for reading files and receiving data from process. */
/* Default coding systems used for process I/O. */
Lisp_Object Vdefault_process_coding_system;
+/* Char table for translating Quail and self-inserting input. */
+Lisp_Object Vtranslation_table_for_input;
+
/* Global flag to tell that we can't call post-read-conversion and
pre-write-conversion functions. Usually the value is zero, but it
is set to 1 temporarily while such functions are running. This is
to avoid infinite recursive call. */
static int inhibit_pre_post_conversion;
-/* Char-table containing safe coding systems of each character. */
-Lisp_Object Vchar_coding_system_table;
Lisp_Object Qchar_coding_system;
-/* Return `safe-chars' property of coding system CODING. Don't check
- validity of CODING. */
+/* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check
+ its validity. */
Lisp_Object
-coding_safe_chars (coding)
- struct coding_system *coding;
+coding_safe_chars (coding_system)
+ Lisp_Object coding_system;
{
Lisp_Object coding_spec, plist, safe_chars;
- coding_spec = Fget (coding->symbol, Qcoding_system);
+ coding_spec = Fget (coding_system, Qcoding_system);
plist = XVECTOR (coding_spec)->contents[3];
safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
/* Record one COMPONENT (alternate character or composition rule). */
-#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
- (coding->cmp_data->data[coding->cmp_data->used++] = component)
+#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
+ do { \
+ coding->cmp_data->data[coding->cmp_data->used++] = component; \
+ if (coding->cmp_data->used - coding->cmp_data_start \
+ == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \
+ { \
+ CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
+ coding->composing = COMPOSITION_NO; \
+ } \
+ } while (0)
/* Get one byte from a data pointed by SRC and increment SRC. If SRC
#define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \
do { \
int bytes; \
- \
+ \
c = SAFE_ONE_MORE_BYTE (); \
if (c < 0) \
break; \
break; \
*p++ = c; \
} \
- if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)) \
+ if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes) \
+ || (coding->flags /* We are recovering a file. */ \
+ && p0[0] == LEADING_CODE_8_BIT_CONTROL \
+ && ! CHAR_HEAD_P (p0[1]))) \
c = STRING_CHAR (p0, bytes); \
else \
c = -1; \
decode_composition_emacs_mule (coding, src, src_end,
destination, dst_end, dst_bytes)
struct coding_system *coding;
- unsigned char *src, *src_end, **destination, *dst_end;
+ const unsigned char *src, *src_end;
+ unsigned char **destination, *dst_end;
int dst_bytes;
{
unsigned char *dst = *destination;
int method, data_len, nchars;
- unsigned char *src_base = src++;
+ const unsigned char *src_base = src++;
/* Store components of composition. */
int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
int ncomponent;
else
{
int bytes;
- if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+ || (coding->flags /* We are recovering a file. */
+ && src[0] == LEADING_CODE_8_BIT_CONTROL
+ && ! CHAR_HEAD_P (src[1])))
c = STRING_CHAR (src, bytes);
else
c = *src, bytes = 1;
static void
decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* SRC_BASE remembers the start position in source in each loop.
The loop will be exited when there's not enough source code, or
when there's not enough destination area to produce a
character. */
- unsigned char *src_base;
+ const unsigned char *src_base;
coding->produced_char = 0;
while ((src_base = src) < src_end)
{
- unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
+ unsigned char tmp[MAX_MULTIBYTE_LENGTH];
+ const unsigned char *p;
int bytes;
if (*src == '\r')
ONE_MORE_BYTE (c);
if (c != '\n')
{
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
src--;
c = '\r';
}
coding->produced_char++;
continue;
}
- else if (*src == 0x80)
+ else if (*src == 0x80 && coding->cmp_data)
{
/* Start of composition data. */
int consumed = decode_composition_emacs_mule (coding, src, src_end,
p = tmp;
src++;
}
- else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+ || (coding->flags /* We are recovering a file. */
+ && src[0] == LEADING_CODE_8_BIT_CONTROL
+ && ! CHAR_HEAD_P (src[1])))
{
p = src;
src += bytes;
}
else
{
- bytes = CHAR_STRING (*src, tmp);
- p = tmp;
+ int i, c;
+
+ bytes = BYTES_BY_CHAR_HEAD (*src);
src++;
+ for (i = 1; i < bytes; i++)
+ {
+ ONE_MORE_BYTE (c);
+ if (CHAR_HEAD_P (c))
+ break;
+ }
+ if (i < bytes)
+ {
+ bytes = CHAR_STRING (*src_base, tmp);
+ p = tmp;
+ src = src_base + 1;
+ }
+ else
+ {
+ p = src_base;
+ }
}
if (dst + bytes >= (dst_bytes ? dst_end : src))
{
} while (0)
-static void encode_eol P_ ((struct coding_system *, unsigned char *,
+static void encode_eol P_ ((struct coding_system *, const unsigned char *,
unsigned char *, int, int));
static void
encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
- unsigned char *src_base;
+ const unsigned char *src_base;
int c;
int char_offset;
int *data;
EMIT_ONE_BYTE ('\r');
}
else if (SINGLE_BYTE_CHAR_P (c))
- EMIT_ONE_BYTE (c);
+ {
+ if (coding->flags && ! ASCII_BYTE_P (c))
+ {
+ /* As we are auto saving, retain the multibyte form for
+ 8-bit chars. */
+ unsigned char buf[MAX_MULTIBYTE_LENGTH];
+ int bytes = CHAR_STRING (c, buf);
+
+ if (bytes == 1)
+ EMIT_ONE_BYTE (buf[0]);
+ else
+ EMIT_TWO_BYTES (buf[0], buf[1]);
+ }
+ else
+ EMIT_ONE_BYTE (c);
+ }
else
EMIT_BYTES (src_base, src);
coding->consumed_char++;
#define CHARSET_OK(idx, charset, c) \
(coding_system_table[idx] \
&& (charset == CHARSET_ASCII \
- || (safe_chars = coding_safe_chars (coding_system_table[idx]), \
+ || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
CODING_SAFE_CHAR_P (safe_chars, c))) \
&& (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
charset) \
#define SHIFT_OUT_OK(idx) \
(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
+#define COMPOSITION_OK(idx) \
+ (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
+
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in ISO2022. If it is, return an
integer in which appropriate flag bits any of:
while (mask && src < src_end)
{
ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ retry:
switch (c)
{
case ISO_CODE_ESC:
else if (c >= '0' && c <= '4')
{
/* ESC <Fp> for start/end composition. */
- mask_found |= CODING_CATEGORY_MASK_ISO;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
+ mask_found |= CODING_CATEGORY_MASK_ISO_7;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_7;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
+ mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
+ mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
+ mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
+ mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
+ if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
+ mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
+ else
+ mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
break;
}
else
&& mask & CODING_CATEGORY_MASK_ISO_8_2)
{
int i = 1;
+
+ c = -1;
while (src < src_end)
{
ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
else
mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+ if (c >= 0)
+ /* This means that we have read one extra byte. */
+ goto retry;
}
}
break;
coding->cmp_data->next = cmp_data;
coding->cmp_data = cmp_data;
coding->cmp_data_start = 0;
+ coding->composing = COMPOSITION_NO;
}
/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
static void
decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* Charsets invoked to graphic plane 0 and 1 respectively. */
(within macro ONE_MORE_BYTE), or when there's not enough
destination area to produce a character (within macro
EMIT_CHAR). */
- unsigned char *src_base;
+ const unsigned char *src_base;
int c, charset;
Lisp_Object translation_table;
Lisp_Object safe_chars;
- safe_chars = coding_safe_chars (coding);
+ safe_chars = coding_safe_chars (coding->symbol);
if (NILP (Venable_character_translation))
translation_table = Qnil;
while (1)
{
- int c1, c2;
+ int c1, c2 = 0;
src_base = src;
ONE_MORE_BYTE (c1);
ONE_MORE_BYTE (c1);
if (c1 != ISO_CODE_LF)
{
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
src--;
c1 = '\r';
}
}
continue;
+ case '%':
+ if (COMPOSING_P (coding))
+ DECODE_COMPOSITION_END ('1');
+ ONE_MORE_BYTE (c1);
+ if (c1 == '/')
+ {
+ /* CTEXT extended segment:
+ ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
+ We keep these bytes as is for the moment.
+ They may be decoded by post-read-conversion. */
+ int dim, M, L;
+ int size, required;
+ int produced_chars;
+
+ ONE_MORE_BYTE (dim);
+ ONE_MORE_BYTE (M);
+ ONE_MORE_BYTE (L);
+ size = ((M - 128) * 128) + (L - 128);
+ required = 8 + size * 2;
+ if (dst + required > (dst_bytes ? dst_end : src))
+ goto label_end_of_loop;
+ *dst++ = ISO_CODE_ESC;
+ *dst++ = '%';
+ *dst++ = '/';
+ *dst++ = dim;
+ produced_chars = 4;
+ dst += CHAR_STRING (M, dst), produced_chars++;
+ dst += CHAR_STRING (L, dst), produced_chars++;
+ while (size-- > 0)
+ {
+ ONE_MORE_BYTE (c1);
+ dst += CHAR_STRING (c1, dst), produced_chars++;
+ }
+ coding->produced_char += produced_chars;
+ }
+ else if (c1 == 'G')
+ {
+ unsigned char *d = dst;
+ int produced_chars;
+
+ /* XFree86 extension for embedding UTF-8 in CTEXT:
+ ESC % G --UTF-8-BYTES-- ESC % @
+ We keep these bytes as is for the moment.
+ They may be decoded by post-read-conversion. */
+ if (d + 6 > (dst_bytes ? dst_end : src))
+ goto label_end_of_loop;
+ *d++ = ISO_CODE_ESC;
+ *d++ = '%';
+ *d++ = 'G';
+ produced_chars = 3;
+ while (d + 1 < (dst_bytes ? dst_end : src))
+ {
+ ONE_MORE_BYTE (c1);
+ if (c1 == ISO_CODE_ESC
+ && src + 1 < src_end
+ && src[0] == '%'
+ && src[1] == '@')
+ {
+ src += 2;
+ break;
+ }
+ d += CHAR_STRING (c1, d), produced_chars++;
+ }
+ if (d + 3 > (dst_bytes ? dst_end : src))
+ goto label_end_of_loop;
+ *d++ = ISO_CODE_ESC;
+ *d++ = '%';
+ *d++ = '@';
+ dst = d;
+ coding->produced_char += produced_chars + 3;
+ }
+ else
+ goto label_invalid_code;
+ continue;
+
default:
if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
goto label_invalid_code;
DECODE_COMPOSITION_END ('1');
src = src_base;
c = *src++;
+ if (! NILP (translation_table))
+ c = translate_char (translation_table, c, 0, 0, 0);
EMIT_CHAR (c);
}
/* Instead of encoding character C, produce one or two `?'s. */
-#define ENCODE_UNSAFE_CHARACTER(c) \
- do { \
- ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
- if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
- ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
+#define ENCODE_UNSAFE_CHARACTER(c) \
+ do { \
+ ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
+ if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
+ ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
} while (0)
encode_designation_at_bol (coding, translation_table, src, src_end, dst)
struct coding_system *coding;
Lisp_Object translation_table;
- unsigned char *src, *src_end, *dst;
+ const unsigned char *src, *src_end;
+ unsigned char *dst;
{
int charset, c, found = 0, reg;
/* Table of charsets to be designated to each graphic register. */
static void
encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* Since the maximum bytes produced by each loop is 20, we subtract 19
analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
there's not enough destination area to produce encoded codes
(within macro EMIT_BYTES). */
- unsigned char *src_base;
+ const unsigned char *src_base;
int c;
Lisp_Object translation_table;
Lisp_Object safe_chars;
- safe_chars = coding_safe_chars (coding);
+ if (coding->flags & CODING_FLAG_ISO_SAFE)
+ coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
+
+ safe_chars = coding_safe_chars (coding->symbol);
if (NILP (Venable_character_translation))
translation_table = Qnil;
}
else
{
- if (coding->flags & CODING_FLAG_ISO_SAFE
+ if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
&& ! CODING_SAFE_CHAR_P (safe_chars, c))
ENCODE_UNSAFE_CHARACTER (c);
else
*dst++ = c;
coding->errors++;
}
- else if (coding->flags & CODING_FLAG_ISO_SAFE
+ else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
&& ! CODING_SAFE_CHAR_P (safe_chars, c))
ENCODE_UNSAFE_CHARACTER (c);
else
/* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
- C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
+ C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
/* Number of Big5 characters which have the same code in 1st byte. */
int multibytep;
{
unsigned char c1, c2;
- /* Dummy for TWO_MORE_BYTES. */
+ /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */
struct coding_system dummy_coding;
struct coding_system *coding = &dummy_coding;
decode_coding_sjis_big5 (coding, source, destination,
src_bytes, dst_bytes, sjis_p)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
int sjis_p;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* SRC_BASE remembers the start position in source in each loop.
(within macro ONE_MORE_BYTE), or when there's not enough
destination area to produce a character (within macro
EMIT_CHAR). */
- unsigned char *src_base;
+ const unsigned char *src_base;
Lisp_Object translation_table;
if (NILP (Venable_character_translation))
coding->produced_char = 0;
while (1)
{
- int c, charset, c1, c2;
+ int c, charset, c1, c2 = 0;
src_base = src;
ONE_MORE_BYTE (c1);
ONE_MORE_BYTE (c2);
if (c2 == '\n')
c1 = c2;
- else if (coding->mode
- & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
else
/* To process C2 again, SRC is subtracted by 1. */
src--;
EMIT_ONE_BYTE (c1 | 0x80);
else if (charset == charset_latin_jisx0201)
EMIT_ONE_BYTE (c1);
+ else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
+ {
+ EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+ if (CHARSET_WIDTH (charset) > 1)
+ EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+ }
else
/* There's no way other than producing the internal
codes as is. */
ENCODE_BIG5 (charset, c1, c2, c1, c2);
EMIT_TWO_BYTES (c1, c2);
}
+ else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
+ {
+ EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+ if (CHARSET_WIDTH (charset) > 1)
+ EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+ }
else
/* There's no way other than producing the internal
codes as is. */
static void
decode_eol (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
+ const unsigned char *src = source;
unsigned char *dst = destination;
- unsigned char *src_end = src + src_bytes;
+ const unsigned char *src_end = src + src_bytes;
unsigned char *dst_end = dst + dst_bytes;
Lisp_Object translation_table;
/* SRC_BASE remembers the start position in source in each loop.
(within macro ONE_MORE_BYTE), or when there's not enough
destination area to produce a character (within macro
EMIT_CHAR). */
- unsigned char *src_base;
+ const unsigned char *src_base;
int c;
translation_table = Qnil;
ONE_MORE_BYTE (c);
if (c != '\n')
{
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
src--;
c = '\r';
}
static void
encode_eol (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
+ const unsigned char *src = source;
unsigned char *dst = destination;
- unsigned char *src_end = src + src_bytes;
+ const unsigned char *src_end = src + src_bytes;
unsigned char *dst_end = dst + dst_bytes;
Lisp_Object translation_table;
/* SRC_BASE remembers the start position in source in each loop.
analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
there's not enough destination area to produce encoded codes
(within macro EMIT_BYTES). */
- unsigned char *src_base;
+ const unsigned char *src_base;
+ unsigned char *tmp;
int c;
int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
}
if (coding->eol_type == CODING_EOL_CR)
{
- for (src = destination; src < dst; src++)
- if (*src == '\n') *src = '\r';
+ for (tmp = destination; tmp < dst; tmp++)
+ if (*tmp == '\n') *tmp = '\r';
}
else if (selective_display)
{
- for (src = destination; src < dst; src++)
- if (*src == '\r') *src = '\n';
+ for (tmp = destination; tmp < dst; tmp++)
+ if (*tmp == '\r') *tmp = '\n';
}
}
if (coding->src_multibyte)
coding->type = coding_type_emacs_mule;
coding->common_flags
|= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- coding->composing = COMPOSITION_NO;
if (!NILP (coding->post_read_conversion))
coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
if (!NILP (coding->pre_write_conversion))
o coding-category-utf-8
The category for a coding system which has the same code range
- as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
+ as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
symbol) `utf-8' by default.
o coding-category-utf-16-be
void
detect_coding (coding, src, src_bytes)
struct coding_system *coding;
- unsigned char *src;
+ const unsigned char *src;
int src_bytes;
{
unsigned int idx;
void
detect_eol (coding, src, src_bytes)
struct coding_system *coding;
- unsigned char *src;
+ const unsigned char *src;
int src_bytes;
{
Lisp_Object val;
{
int src_multibyte = coding->src_multibyte;
int dst_multibyte = coding->dst_multibyte;
+ struct composition_data *cmp_data = coding->cmp_data;
setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
coding->src_multibyte = src_multibyte;
coding->dst_multibyte = dst_multibyte;
coding->heading_ascii = skip;
+ coding->cmp_data = cmp_data;
}
}
int magnification;
if (coding->type == coding_type_ccl)
- magnification = coding->spec.ccl.encoder.buf_magnification;
+ {
+ magnification = coding->spec.ccl.encoder.buf_magnification;
+ if (coding->eol_type == CODING_EOL_CRLF)
+ magnification *= 2;
+ }
else if (CODING_REQUIRE_ENCODING (coding))
magnification = 3;
else
unsigned char *data;
};
-/* Don't use alloca for allocating memory space larger than this, lest
- we overflow their stack. */
-#define MAX_ALLOCA 16*1024
-
/* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */
#define allocate_conversion_buffer(buf, len) \
do { \
if (ccl->eol_type ==CODING_EOL_UNDECIDED)
ccl->eol_type = CODING_EOL_LF;
ccl->cr_consumed = coding->spec.ccl.cr_carryover;
+ ccl->eight_bit_control = coding->dst_multibyte;
}
+ else
+ ccl->eight_bit_control = 1;
ccl->multibyte = coding->src_multibyte;
if (coding->spec.ccl.eight_bit_carryover[0] != 0)
{
{
/* If the last character is CR, we can't handle it here
because LF will be in the not-yet-decoded source text.
- Recorded that the CR is not yet processed. */
+ Record that the CR is not yet processed. */
coding->spec.ccl.cr_carryover = 1;
coding->produced--;
coding->produced_char--;
int
decode_coding (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
+ int extra = 0;
+
if (coding->type == coding_type_undecided)
detect_coding (coding, source, src_bytes);
case coding_type_ccl:
if (coding->spec.ccl.cr_carryover)
{
- /* Set the CR which is not processed by the previous call of
- decode_eol_post_ccl in DESTINATION. */
+ /* Put the CR which was not processed by the previous call
+ of decode_eol_post_ccl in DESTINATION. It will be
+ decoded together with the following LF by the call to
+ decode_eol_post_ccl below. */
*destination = '\r';
coding->produced++;
coding->produced_char++;
dst_bytes--;
+ extra = coding->spec.ccl.cr_carryover;
}
- ccl_coding_driver (coding, source,
- destination + coding->spec.ccl.cr_carryover,
+ ccl_coding_driver (coding, source, destination + extra,
src_bytes, dst_bytes, 0);
if (coding->eol_type != CODING_EOL_LF)
- decode_eol_post_ccl (coding, destination, coding->produced);
+ {
+ coding->produced += extra;
+ coding->produced_char += extra;
+ decode_eol_post_ccl (coding, destination, coding->produced);
+ }
break;
default:
if (coding->mode & CODING_MODE_LAST_BLOCK
&& coding->result == CODING_FINISH_INSUFFICIENT_SRC)
{
- unsigned char *src = source + coding->consumed;
+ const unsigned char *src = source + coding->consumed;
unsigned char *dst = destination + coding->produced;
src_bytes -= coding->consumed;
int
encode_coding (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
coding->produced = coding->produced_char = 0;
if (coding->mode & CODING_MODE_LAST_BLOCK
&& coding->result == CODING_FINISH_INSUFFICIENT_SRC)
{
- unsigned char *src = source + coding->consumed;
+ const unsigned char *src = source + coding->consumed;
unsigned char *dst = destination + coding->produced;
if (coding->type == coding_type_iso2022)
} \
} while (0)
+/* ARG is (CODING . BUFFER) where CODING is what to be set in
+ Vlast_coding_system_used and BUFFER if non-nil is a buffer to
+ kill. */
static Lisp_Object
-code_convert_region_unwind (dummy)
- Lisp_Object dummy;
+code_convert_region_unwind (arg)
+ Lisp_Object arg;
{
inhibit_pre_post_conversion = 0;
+ Vlast_coding_system_used = XCAR (arg);
+ if (! NILP (XCDR (arg)))
+ Fkill_buffer (XCDR (arg));
return Qnil;
}
else if (VECTORP (val) || STRINGP (val))
{
int len = (VECTORP (val)
- ? XVECTOR (val)->size : XSTRING (val)->size);
+ ? XVECTOR (val)->size : SCHARS (val));
int i;
for (i = 0; i < len; i++)
{
enum composition_method method = (enum composition_method) data[3];
Lisp_Object components;
+ if (data[0] < 0 || i + data[0] > cmp_data->used)
+ /* Invalid composition data. */
+ break;
+
if (method == COMPOSITION_RELATIVE)
components = Qnil;
else
int len = data[0] - 4, j;
Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
+ if (method == COMPOSITION_WITH_RULE_ALTCHARS
+ && len % 2 == 0)
+ len --;
+ if (len < 1)
+ /* Invalid composition data. */
+ break;
for (j = 0; j < len; j++)
args[j] = make_number (data[4 + j]);
components = (method == COMPOSITION_WITH_ALTCHARS
- ? Fstring (len, args) : Fvector (len, args));
+ ? Fstring (len, args)
+ : Fvector (len, args));
}
compose_text (data[1], data[2], components, Qnil, obj);
}
struct buffer *prev = current_buffer;
Lisp_Object new;
- record_unwind_protect (code_convert_region_unwind, Qnil);
+ record_unwind_protect (code_convert_region_unwind,
+ Fcons (Vlast_coding_system_used, Qnil));
/* We should not call any more pre-write/post-read-conversion
functions while this pre-write-conversion is running. */
inhibit_pre_post_conversion = 1;
coding_allocate_composition_data (coding, from);
}
- /* Try to skip the heading and tailing ASCIIs. */
- if (coding->type != coding_type_ccl)
+ /* Try to skip the heading and tailing ASCIIs. We can't skip them
+ if we must run CCL program or there are compositions to
+ encode. */
+ if (coding->type != coding_type_ccl
+ && (! coding->cmp_data || coding->cmp_data->used == 0))
{
int from_byte_orig = from_byte, to_byte_orig = to_byte;
if (!replace)
/* We must record and adjust for this new text now. */
adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
+ coding_free_composition_data (coding);
return 0;
}
REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
Here, we are sure that NEW >= ORIG. */
- float ratio = coding->produced - coding->consumed;
- ratio /= coding->consumed;
- require = len_byte * ratio;
+
+ if (coding->produced <= coding->consumed)
+ {
+ /* This happens because of CCL-based coding system with
+ eol-type CRLF. */
+ require = 0;
+ }
+ else
+ {
+ float ratio = coding->produced - coding->consumed;
+ ratio /= coding->consumed;
+ require = len_byte * ratio;
+ }
first = 0;
}
if ((src - dst) < (require + 2000))
&& ! encodep && ! NILP (coding->post_read_conversion))
{
Lisp_Object val;
+ Lisp_Object saved_coding_system;
if (from != PT)
TEMP_SET_PT_BOTH (from, from_byte);
prev_Z = Z;
- record_unwind_protect (code_convert_region_unwind, Qnil);
+ record_unwind_protect (code_convert_region_unwind,
+ Fcons (Vlast_coding_system_used, Qnil));
+ saved_coding_system = Vlast_coding_system_used;
+ Vlast_coding_system_used = coding->symbol;
/* We should not call any more pre-write/post-read-conversion
functions while this post-read-conversion is running. */
inhibit_pre_post_conversion = 1;
val = call1 (coding->post_read_conversion, make_number (inserted));
inhibit_pre_post_conversion = 0;
+ coding->symbol = Vlast_coding_system_used;
+ Vlast_coding_system_used = saved_coding_system;
/* Discard the unwind protect. */
specpdl_ptr--;
CHECK_NUMBER (val);
return 0;
}
-Lisp_Object
-run_pre_post_conversion_on_str (str, coding, encodep)
- Lisp_Object str;
- struct coding_system *coding;
- int encodep;
+/* Name (or base name) of work buffer for code conversion. */
+static Lisp_Object Vcode_conversion_workbuf_name;
+
+/* Set the current buffer to the working buffer prepared for
+ code-conversion. MULTIBYTE specifies the multibyteness of the
+ buffer. Return the buffer we set if it must be killed after use.
+ Otherwise return Qnil. */
+
+static Lisp_Object
+set_conversion_work_buffer (multibyte)
+ int multibyte;
{
- int count = specpdl_ptr - specpdl;
- struct gcpro gcpro1;
- int multibyte = STRING_MULTIBYTE (str);
- Lisp_Object buffer;
+ Lisp_Object buffer, buffer_to_kill;
struct buffer *buf;
- record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
- record_unwind_protect (code_convert_region_unwind, Qnil);
- GCPRO1 (str);
-
- buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
+ buffer = Fget_buffer_create (Vcode_conversion_workbuf_name);
buf = XBUFFER (buffer);
+ if (buf == current_buffer)
+ {
+ /* As we are already in the work buffer, we must generate a new
+ buffer for the work. */
+ Lisp_Object name;
+
+ name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
+ buffer = buffer_to_kill = Fget_buffer_create (name);
+ buf = XBUFFER (buffer);
+ }
+ else
+ buffer_to_kill = Qnil;
+ delete_all_overlays (buf);
buf->directory = current_buffer->directory;
buf->read_only = Qnil;
buf->filename = Qnil;
buf->undo_list = Qt;
- buf->overlays_before = Qnil;
- buf->overlays_after = Qnil;
-
+ eassert (buf->overlays_before == NULL);
+ eassert (buf->overlays_after == NULL);
set_buffer_internal (buf);
+ if (BEG != BEGV || Z != ZV)
+ Fwiden ();
+ del_range_2 (BEG, BEG_BYTE, Z, Z_BYTE, 0);
+ buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
+ return buffer_to_kill;
+}
+
+Lisp_Object
+run_pre_post_conversion_on_str (str, coding, encodep)
+ Lisp_Object str;
+ struct coding_system *coding;
+ int encodep;
+{
+ int count = SPECPDL_INDEX ();
+ struct gcpro gcpro1, gcpro2;
+ int multibyte = STRING_MULTIBYTE (str);
+ Lisp_Object old_deactivate_mark;
+ Lisp_Object buffer_to_kill;
+
+ record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
+ /* It is not crucial to specbind this. */
+ old_deactivate_mark = Vdeactivate_mark;
+ GCPRO2 (str, old_deactivate_mark);
+
/* We must insert the contents of STR as is without
unibyte<->multibyte conversion. For that, we adjust the
multibyteness of the working buffer to that of STR. */
- Ferase_buffer ();
- buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
+ buffer_to_kill = set_conversion_work_buffer (multibyte);
+ record_unwind_protect (code_convert_region_unwind,
+ Fcons (Vlast_coding_system_used, buffer_to_kill));
insert_from_string (str, 0, 0,
- XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
+ SCHARS (str), SBYTES (str), 0);
UNGCPRO;
inhibit_pre_post_conversion = 1;
if (encodep)
call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
else
{
+ Vlast_coding_system_used = coding->symbol;
TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
call1 (coding->post_read_conversion, make_number (Z - BEG));
+ coding->symbol = Vlast_coding_system_used;
}
inhibit_pre_post_conversion = 0;
+ Vdeactivate_mark = old_deactivate_mark;
str = make_buffer_string (BEG, Z, 1);
return unbind_to (count, str);
}
+
+/* Run pre-write-conversion function of CODING on NCHARS/NBYTES
+ text in *STR. *SIZE is the allocated bytes for STR. As it
+ is intended that this function is called from encode_terminal_code,
+ the pre-write-conversion function is run by safe_call and thus
+ "Error during redisplay: ..." is logged when an error occurs.
+
+ Store the resulting text in *STR and set CODING->produced_char and
+ CODING->produced to the number of characters and bytes
+ respectively. If the size of *STR is too small, enlarge it by
+ xrealloc and update *STR and *SIZE. */
+
+void
+run_pre_write_conversin_on_c_str (str, size, nchars, nbytes, coding)
+ unsigned char **str;
+ int *size, nchars, nbytes;
+ struct coding_system *coding;
+{
+ struct gcpro gcpro1, gcpro2;
+ struct buffer *cur = current_buffer;
+ Lisp_Object old_deactivate_mark, old_last_coding_system_used;
+ Lisp_Object args[3];
+ Lisp_Object buffer_to_kill;
+
+ /* It is not crucial to specbind this. */
+ old_deactivate_mark = Vdeactivate_mark;
+ old_last_coding_system_used = Vlast_coding_system_used;
+ GCPRO2 (old_deactivate_mark, old_last_coding_system_used);
+
+ /* We must insert the contents of STR as is without
+ unibyte<->multibyte conversion. For that, we adjust the
+ multibyteness of the working buffer to that of STR. */
+ buffer_to_kill = set_conversion_work_buffer (coding->src_multibyte);
+ insert_1_both (*str, nchars, nbytes, 0, 0, 0);
+ UNGCPRO;
+ inhibit_pre_post_conversion = 1;
+ args[0] = coding->pre_write_conversion;
+ args[1] = make_number (BEG);
+ args[2] = make_number (Z);
+ safe_call (3, args);
+ inhibit_pre_post_conversion = 0;
+ Vdeactivate_mark = old_deactivate_mark;
+ Vlast_coding_system_used = old_last_coding_system_used;
+ coding->produced_char = Z - BEG;
+ coding->produced = Z_BYTE - BEG_BYTE;
+ if (coding->produced > *size)
+ {
+ *size = coding->produced;
+ *str = xrealloc (*str, *size);
+ }
+ if (BEG < GPT && GPT < Z)
+ move_gap (BEG);
+ bcopy (BEG_ADDR, *str, coding->produced);
+ coding->src_multibyte
+ = ! NILP (current_buffer->enable_multibyte_characters);
+ set_buffer_internal (cur);
+ if (! NILP (buffer_to_kill))
+ Fkill_buffer (buffer_to_kill);
+}
+
+
Lisp_Object
decode_coding_string (str, coding, nocopy)
Lisp_Object str;
int consumed, consumed_char, produced, produced_char;
from = 0;
- to_byte = STRING_BYTES (XSTRING (str));
+ to_byte = SBYTES (str);
saved_coding_symbol = coding->symbol;
coding->src_multibyte = STRING_MULTIBYTE (str);
/* See the comments in code_convert_region. */
if (coding->type == coding_type_undecided)
{
- detect_coding (coding, XSTRING (str)->data, to_byte);
+ detect_coding (coding, SDATA (str), to_byte);
if (coding->type == coding_type_undecided)
{
coding->type = coding_type_emacs_mule;
&& coding->type != coding_type_ccl)
{
saved_coding_symbol = coding->symbol;
- detect_eol (coding, XSTRING (str)->data, to_byte);
+ detect_eol (coding, SDATA (str), to_byte);
if (coding->eol_type == CODING_EOL_UNDECIDED)
coding->eol_type = CODING_EOL_LF;
/* We had better recover the original eol format if we
{
/* Decoding routines expect the source text to be unibyte. */
str = Fstring_as_unibyte (str);
- to_byte = STRING_BYTES (XSTRING (str));
+ to_byte = SBYTES (str);
nocopy = 1;
coding->src_multibyte = 0;
}
/* Try to skip the heading and tailing ASCIIs. */
if (require_decoding && coding->type != coding_type_ccl)
{
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+ SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
0);
if (from == to_byte)
require_decoding = 0;
- shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+ shrinked_bytes = from + (SBYTES (str) - to_byte);
}
- if (!require_decoding)
+ if (!require_decoding
+ && !(SYMBOLP (coding->post_read_conversion)
+ && !NILP (Ffboundp (coding->post_read_conversion))))
{
- coding->consumed = STRING_BYTES (XSTRING (str));
- coding->consumed_char = XSTRING (str)->size;
+ coding->consumed = SBYTES (str);
+ coding->consumed_char = SCHARS (str);
if (coding->dst_multibyte)
{
str = Fstring_as_multibyte (str);
nocopy = 1;
}
- coding->produced = STRING_BYTES (XSTRING (str));
- coding->produced_char = XSTRING (str)->size;
+ coding->produced = SBYTES (str);
+ coding->produced_char = SCHARS (str);
return (nocopy ? str : Fcopy_sequence (str));
}
consumed = consumed_char = produced = produced_char = 0;
while (1)
{
- result = decode_coding (coding, XSTRING (str)->data + from + consumed,
+ result = decode_coding (coding, SDATA (str) + from + consumed,
buf.data + produced, to_byte - from - consumed,
buf.size - produced);
consumed += coding->consumed;
produced += coding->produced;
produced_char += coding->produced_char;
if (result == CODING_FINISH_NORMAL
+ || result == CODING_FINISH_INTERRUPT
|| (result == CODING_FINISH_INSUFFICIENT_SRC
&& coding->consumed == 0))
break;
else
newstr = make_uninit_string (produced + shrinked_bytes);
if (from > 0)
- bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
- bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+ STRING_COPYIN (newstr, 0, SDATA (str), from);
+ STRING_COPYIN (newstr, from, buf.data, produced);
if (shrinked_bytes > from)
- bcopy (XSTRING (str)->data + to_byte,
- XSTRING (newstr)->data + from + produced,
- shrinked_bytes - from);
+ STRING_COPYIN (newstr, from + produced,
+ SDATA (str) + to_byte,
+ shrinked_bytes - from);
free_conversion_buffer (&buf);
+ coding->consumed += shrinked_bytes;
+ coding->consumed_char += shrinked_bytes;
+ coding->produced += shrinked_bytes;
+ coding->produced_char += shrinked_bytes;
+
if (coding->cmp_data && coding->cmp_data->used)
coding_restore_composition (coding, newstr);
coding_free_composition_data (coding);
if (SYMBOLP (coding->pre_write_conversion)
&& !NILP (Ffboundp (coding->pre_write_conversion)))
- str = run_pre_post_conversion_on_str (str, coding, 1);
+ {
+ str = run_pre_post_conversion_on_str (str, coding, 1);
+ /* As STR is just newly generated, we don't have to copy it
+ anymore. */
+ nocopy = 1;
+ }
from = 0;
- to = XSTRING (str)->size;
- to_byte = STRING_BYTES (XSTRING (str));
+ to = SCHARS (str);
+ to_byte = SBYTES (str);
/* Encoding routines determine the multibyteness of the source text
by coding->src_multibyte. */
- coding->src_multibyte = STRING_MULTIBYTE (str);
+ coding->src_multibyte = SCHARS (str) < SBYTES (str);
coding->dst_multibyte = 0;
if (! CODING_REQUIRE_ENCODING (coding))
- {
- coding->consumed = STRING_BYTES (XSTRING (str));
- coding->consumed_char = XSTRING (str)->size;
- if (STRING_MULTIBYTE (str))
- {
- str = Fstring_as_unibyte (str);
- nocopy = 1;
- }
- coding->produced = STRING_BYTES (XSTRING (str));
- coding->produced_char = XSTRING (str)->size;
- return (nocopy ? str : Fcopy_sequence (str));
- }
+ goto no_need_of_encoding;
if (coding->composing != COMPOSITION_DISABLED)
coding_save_composition (coding, from, to, str);
- /* Try to skip the heading and tailing ASCIIs. */
- if (coding->type != coding_type_ccl)
+ /* Try to skip the heading and tailing ASCIIs. We can't skip them
+ if we must run CCL program or there are compositions to
+ encode. */
+ if (coding->type != coding_type_ccl
+ && (! coding->cmp_data || coding->cmp_data->used == 0))
{
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+ SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
1);
if (from == to_byte)
- return (nocopy ? str : Fcopy_sequence (str));
- shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+ {
+ coding_free_composition_data (coding);
+ goto no_need_of_encoding;
+ }
+ shrinked_bytes = from + (SBYTES (str) - to_byte);
}
len = encoding_buffer_size (coding, to_byte - from);
consumed = consumed_char = produced = produced_char = 0;
while (1)
{
- result = encode_coding (coding, XSTRING (str)->data + from + consumed,
+ result = encode_coding (coding, SDATA (str) + from + consumed,
buf.data + produced, to_byte - from - consumed,
buf.size - produced);
consumed += coding->consumed;
produced += coding->produced;
produced_char += coding->produced_char;
if (result == CODING_FINISH_NORMAL
+ || result == CODING_FINISH_INTERRUPT
|| (result == CODING_FINISH_INSUFFICIENT_SRC
&& coding->consumed == 0))
break;
newstr = make_uninit_string (produced + shrinked_bytes);
if (from > 0)
- bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
- bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+ STRING_COPYIN (newstr, 0, SDATA (str), from);
+ STRING_COPYIN (newstr, from, buf.data, produced);
if (shrinked_bytes > from)
- bcopy (XSTRING (str)->data + to_byte,
- XSTRING (newstr)->data + from + produced,
- shrinked_bytes - from);
+ STRING_COPYIN (newstr, from + produced,
+ SDATA (str) + to_byte,
+ shrinked_bytes - from);
free_conversion_buffer (&buf);
coding_free_composition_data (coding);
return newstr;
+
+ no_need_of_encoding:
+ coding->consumed = SBYTES (str);
+ coding->consumed_char = SCHARS (str);
+ if (STRING_MULTIBYTE (str))
+ {
+ if (nocopy)
+ /* We are sure that STR doesn't contain a multibyte
+ character. */
+ STRING_SET_UNIBYTE (str);
+ else
+ {
+ str = Fstring_as_unibyte (str);
+ nocopy = 1;
+ }
+ }
+ coding->produced = SBYTES (str);
+ coding->produced_char = SCHARS (str);
+ return (nocopy ? str : Fcopy_sequence (str));
}
\f
return Qt;
if (!SYMBOLP (obj))
return Qnil;
+ if (! NILP (Fget (obj, Qcoding_system_define_form)))
+ return Qt;
/* Get coding-spec vector for OBJ. */
obj = Fget (obj, Qcoding_system);
return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
}
- while (XSTRING (val)->size == 0);
+ while (SCHARS (val) == 0);
return (Fintern (val, Qnil));
}
{
Lisp_Object val;
if (SYMBOLP (default_coding_system))
- XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
+ default_coding_system = SYMBOL_NAME (default_coding_system);
val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
Qt, Qnil, Qcoding_system_history,
default_coding_system, Qnil);
- return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
+ return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
}
DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
1, 1, 0,
doc: /* Check validity of CODING-SYSTEM.
If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
-It is valid if it is a symbol with a non-nil `coding-system' property.
-The value of property should be a vector of length 5. */)
+It is valid if it is nil or a symbol with a non-nil `coding-system' property.
+The value of this property should be a vector of length 5. */)
(coding_system)
Lisp_Object coding_system;
{
- CHECK_SYMBOL (coding_system);
+ Lisp_Object define_form;
+
+ define_form = Fget (coding_system, Qcoding_system_define_form);
+ if (! NILP (define_form))
+ {
+ Fput (coding_system, Qcoding_system_define_form, Qnil);
+ safe_eval (define_form);
+ }
if (!NILP (Fcoding_system_p (coding_system)))
return coding_system;
while (1)
\f
Lisp_Object
detect_coding_system (src, src_bytes, highest, multibytep)
- unsigned char *src;
+ const unsigned char *src;
int src_bytes, highest;
int multibytep;
{
DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2, 3, 0,
- doc: /* Detect coding system of the text in the region between START and END.
-Return a list of possible coding systems ordered by priority.
+ doc: /* Detect how the byte sequence in the region is encoded.
+Return a list of possible coding systems used on decoding a byte
+sequence containing the bytes in the region between START and END when
+the coding system `undecided' is specified. The list is ordered by
+priority decided in the current language environment.
If only ASCII characters are found, it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
the detecting source. Then code detectors can handle the tailing
byte sequence more accurately.
- Fix me: This is not an perfect solution. It is better that we
+ Fix me: This is not a perfect solution. It is better that we
add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
*/
if (to == Z || (to == GPT && GAP_SIZE > 0))
DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
1, 2, 0,
- doc: /* Detect coding system of the text in STRING.
-Return a list of possible coding systems ordered by priority.
+ doc: /* Detect how the byte sequence in STRING is encoded.
+Return a list of possible coding systems used on decoding a byte
+sequence containing the bytes in STRING when the coding system
+`undecided' is specified. The list is ordered by priority decided in
+the current language environment.
If only ASCII characters are found, it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
{
CHECK_STRING (string);
- return detect_coding_system (XSTRING (string)->data,
+ return detect_coding_system (SDATA (string),
/* "+ 1" is to include the anchor byte
`\0'. With this, code detectors can
handle the tailing bytes more
accurately. */
- STRING_BYTES (XSTRING (string)) + 1,
+ SBYTES (string) + 1,
!NILP (highest),
STRING_MULTIBYTE (string));
}
-/* Return an intersection of lists L1 and L2. */
-
-static Lisp_Object
-intersection (l1, l2)
- Lisp_Object l1, l2;
-{
- Lisp_Object val;
-
- for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
- {
- if (!NILP (Fmemq (XCAR (l1), l2)))
- val = Fcons (XCAR (l1), val);
- }
- return val;
-}
-
-
-/* Subroutine for Fsafe_coding_systems_region_internal.
+/* Subroutine for Ffind_coding_systems_region_internal.
Return a list of coding systems that safely encode the multibyte
- text between P and PEND. SAFE_CODINGS, if non-nil, is a list of
+ text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of
possible coding systems. If it is nil, it means that we have not
yet found any coding systems.
- WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An
- element of WORK_TABLE is set to t once the element is looked up.
+ WORK_TABLE a char-table of which element is set to t once the
+ element is looked up.
If a non-ASCII single byte char is found, set
*single_byte_char_found to 1. */
Lisp_Object safe_codings, work_table;
int *single_byte_char_found;
{
- int c, len, idx;
- Lisp_Object val;
+ int c, len;
+ Lisp_Object val, ch;
+ Lisp_Object prev, tail;
+ if (NILP (safe_codings))
+ goto done_safe_codings;
while (p < pend)
{
c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
continue;
if (SINGLE_BYTE_CHAR_P (c))
*single_byte_char_found = 1;
- if (NILP (safe_codings))
- continue;
/* Check the safe coding systems for C. */
- val = char_table_ref_and_index (work_table, c, &idx);
+ ch = make_number (c);
+ val = Faref (work_table, ch);
if (EQ (val, Qt))
/* This element was already checked. Ignore it. */
continue;
/* Remember that we checked this element. */
- CHAR_TABLE_SET (work_table, make_number (idx), Qt);
+ Faset (work_table, ch, Qt);
- /* If there are some safe coding systems for C and we have
- already found the other set of coding systems for the
- different characters, get the intersection of them. */
- if (!EQ (safe_codings, Qt) && !NILP (val))
- val = intersection (safe_codings, val);
- safe_codings = val;
+ for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
+ {
+ Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
+ int encodable;
+
+ elt = XCAR (tail);
+ if (CONSP (XCDR (elt)))
+ {
+ /* This entry has this format now:
+ ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
+ ACCEPT-LATIN-EXTRA ) */
+ val = XCDR (elt);
+ encodable = ! NILP (Faref (XCAR (val), ch));
+ if (! encodable)
+ {
+ val = XCDR (val);
+ translation_table = XCAR (val);
+ hash_table = XCAR (XCDR (val));
+ accept_latin_extra = XCAR (XCDR (XCDR (val)));
+ }
+ }
+ else
+ {
+ /* This entry has this format now: ( CODING . SAFE-CHARS) */
+ encodable = ! NILP (Faref (XCDR (elt), ch));
+ if (! encodable)
+ {
+ /* Transform the format to:
+ ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
+ ACCEPT-LATIN-EXTRA ) */
+ val = Fget (XCAR (elt), Qcoding_system);
+ translation_table
+ = Fplist_get (AREF (val, 3),
+ Qtranslation_table_for_encode);
+ if (SYMBOLP (translation_table))
+ translation_table = Fget (translation_table,
+ Qtranslation_table);
+ hash_table
+ = (CHAR_TABLE_P (translation_table)
+ ? XCHAR_TABLE (translation_table)->extras[1]
+ : Qnil);
+ accept_latin_extra
+ = ((EQ (AREF (val, 0), make_number (2))
+ && VECTORP (AREF (val, 4)))
+ ? AREF (AREF (val, 4), 16)
+ : Qnil);
+ XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
+ translation_table, hash_table,
+ accept_latin_extra));
+ }
+ }
+
+ if (! encodable
+ && ((CHAR_TABLE_P (translation_table)
+ && ! NILP (Faref (translation_table, ch)))
+ || (HASH_TABLE_P (hash_table)
+ && ! NILP (Fgethash (ch, hash_table, Qnil)))
+ || (SINGLE_BYTE_CHAR_P (c)
+ && ! NILP (accept_latin_extra)
+ && VECTORP (Vlatin_extra_code_table)
+ && ! NILP (AREF (Vlatin_extra_code_table, c)))))
+ encodable = 1;
+ if (encodable)
+ prev = tail;
+ else
+ {
+ /* Exclude this coding system from SAFE_CODINGS. */
+ if (EQ (tail, safe_codings))
+ {
+ safe_codings = XCDR (safe_codings);
+ if (NILP (safe_codings))
+ goto done_safe_codings;
+ }
+ else
+ XSETCDR (prev, XCDR (tail));
+ }
+ }
}
+
+ done_safe_codings:
+ /* If the above loop was terminated before P reaches PEND, it means
+ SAFE_CODINGS was set to nil. If we have not yet found an
+ non-ASCII single-byte char, check it now. */
+ if (! *single_byte_char_found)
+ while (p < pend)
+ {
+ c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+ p += len;
+ if (! ASCII_BYTE_P (c)
+ && SINGLE_BYTE_CHAR_P (c))
+ {
+ *single_byte_char_found = 1;
+ break;
+ }
+ }
return safe_codings;
}
-
-/* Return a list of coding systems that safely encode the text between
- START and END. If the text contains only ASCII or is unibyte,
- return t. */
-
DEFUN ("find-coding-systems-region-internal",
Ffind_coding_systems_region_internal,
Sfind_coding_systems_region_internal, 2, 2, 0,
Lisp_Object work_table, safe_codings;
int non_ascii_p = 0;
int single_byte_char_found = 0;
- unsigned char *p1, *p1end, *p2, *p2end, *p;
+ const unsigned char *p1, *p1end, *p2, *p2end, *p;
if (STRINGP (start))
{
if (!STRING_MULTIBYTE (start))
return Qt;
- p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
+ p1 = SDATA (start), p1end = p1 + SBYTES (start);
p2 = p2end = p1end;
- if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
+ if (SCHARS (start) != SBYTES (start))
non_ascii_p = 1;
}
else
}
/* The text contains non-ASCII characters. */
- work_table = Fcopy_sequence (Vchar_coding_system_table);
- safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
+
+ work_table = Fmake_char_table (Qchar_coding_system, Qnil);
+ safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
+
+ safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
&single_byte_char_found);
if (p2 < p2end)
safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
&single_byte_char_found);
+ if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
+ safe_codings = Qt;
+ else
+ {
+ /* Turn safe_codings to a list of coding systems... */
+ Lisp_Object val;
- if (EQ (safe_codings, Qt))
- ; /* Nothing to be done. */
- else if (!single_byte_char_found)
+ if (single_byte_char_found)
+ /* ... and append these for eight-bit chars. */
+ val = Fcons (Qraw_text,
+ Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
+ else
+ /* ... and append generic coding systems. */
+ val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
+
+ for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
+ val = Fcons (XCAR (XCAR (safe_codings)), val);
+ safe_codings = val;
+ }
+
+ return safe_codings;
+}
+
+
+/* Search from position POS for such characters that are unencodable
+ accoding to SAFE_CHARS, and return a list of their positions. P
+ points where in the memory the character at POS exists. Limit the
+ search at PEND or when Nth unencodable characters are found.
+
+ If SAFE_CHARS is a char table, an element for an unencodable
+ character is nil.
+
+ If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
+
+ Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
+ eight-bit-graphic characters are unencodable. */
+
+static Lisp_Object
+unencodable_char_position (safe_chars, pos, p, pend, n)
+ Lisp_Object safe_chars;
+ int pos;
+ unsigned char *p, *pend;
+ int n;
+{
+ Lisp_Object pos_list;
+
+ pos_list = Qnil;
+ while (p < pend)
{
- /* Append generic coding systems. */
- Lisp_Object args[2];
- args[0] = safe_codings;
- args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
- make_number (0));
- safe_codings = Fappend (2, args);
+ int len;
+ int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
+
+ if (c >= 128
+ && (CHAR_TABLE_P (safe_chars)
+ ? NILP (CHAR_TABLE_REF (safe_chars, c))
+ : (NILP (safe_chars) || c < 256)))
+ {
+ pos_list = Fcons (make_number (pos), pos_list);
+ if (--n <= 0)
+ break;
+ }
+ pos++;
+ p += len;
+ }
+ return Fnreverse (pos_list);
+}
+
+
+DEFUN ("unencodable-char-position", Funencodable_char_position,
+ Sunencodable_char_position, 3, 5, 0,
+ doc: /*
+Return position of first un-encodable character in a region.
+START and END specfiy the region and CODING-SYSTEM specifies the
+encoding to check. Return nil if CODING-SYSTEM does encode the region.
+
+If optional 4th argument COUNT is non-nil, it specifies at most how
+many un-encodable characters to search. In this case, the value is a
+list of positions.
+
+If optional 5th argument STRING is non-nil, it is a string to search
+for un-encodable characters. In that case, START and END are indexes
+to the string. */)
+ (start, end, coding_system, count, string)
+ Lisp_Object start, end, coding_system, count, string;
+{
+ int n;
+ Lisp_Object safe_chars;
+ struct coding_system coding;
+ Lisp_Object positions;
+ int from, to;
+ unsigned char *p, *pend;
+
+ if (NILP (string))
+ {
+ validate_region (&start, &end);
+ from = XINT (start);
+ to = XINT (end);
+ if (NILP (current_buffer->enable_multibyte_characters))
+ return Qnil;
+ p = CHAR_POS_ADDR (from);
+ if (to == GPT)
+ pend = GPT_ADDR;
+ else
+ pend = CHAR_POS_ADDR (to);
}
else
- safe_codings = Fcons (Qraw_text,
- Fcons (Qemacs_mule,
- Fcons (Qno_conversion, safe_codings)));
- return safe_codings;
+ {
+ CHECK_STRING (string);
+ CHECK_NATNUM (start);
+ CHECK_NATNUM (end);
+ from = XINT (start);
+ to = XINT (end);
+ if (from > to
+ || to > SCHARS (string))
+ args_out_of_range_3 (string, start, end);
+ if (! STRING_MULTIBYTE (string))
+ return Qnil;
+ p = SDATA (string) + string_char_to_byte (string, from);
+ pend = SDATA (string) + string_char_to_byte (string, to);
+ }
+
+ setup_coding_system (Fcheck_coding_system (coding_system), &coding);
+
+ if (NILP (count))
+ n = 1;
+ else
+ {
+ CHECK_NATNUM (count);
+ n = XINT (count);
+ }
+
+ if (coding.type == coding_type_no_conversion
+ || coding.type == coding_type_raw_text)
+ return Qnil;
+
+ if (coding.type == coding_type_undecided)
+ safe_chars = Qnil;
+ else
+ safe_chars = coding_safe_chars (coding_system);
+
+ if (STRINGP (string)
+ || from >= GPT || to <= GPT)
+ positions = unencodable_char_position (safe_chars, from, p, pend, n);
+ else
+ {
+ Lisp_Object args[2];
+
+ args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
+ n -= XINT (Flength (args[0]));
+ if (n <= 0)
+ positions = args[0];
+ else
+ {
+ args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
+ pend, n);
+ positions = Fappend (2, args);
+ }
+ }
+
+ return (NILP (count) ? Fcar (positions) : positions);
}
return make_number (to - from);
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+ error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
coding.mode |= CODING_MODE_LAST_BLOCK;
coding.src_multibyte = coding.dst_multibyte
return (NILP (nocopy) ? Fcopy_sequence (string) : string);
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+ error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
coding.mode |= CODING_MODE_LAST_BLOCK;
string = (encodep
return string;
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+ error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
coding.composing = COMPOSITION_DISABLED;
coding.mode |= CODING_MODE_LAST_BLOCK;
return val;
}
\f
-DEFUN ("set-terminal-coding-system-internal",
- Fset_terminal_coding_system_internal,
+DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
Sset_terminal_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
CHECK_SYMBOL (coding_system);
setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
/* We had better not send unsafe characters to terminal. */
- terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
+ terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
/* Character composition should be disabled. */
terminal_coding.composing = COMPOSITION_DISABLED;
/* Error notification should be suppressed. */
return Qnil;
}
-DEFUN ("set-safe-terminal-coding-system-internal",
- Fset_safe_terminal_coding_system_internal,
+DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
Sset_safe_terminal_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
/* Character composition should be disabled. */
safe_terminal_coding.composing = COMPOSITION_DISABLED;
/* Error notification should be suppressed. */
- terminal_coding.suppress_error = 1;
+ safe_terminal_coding.suppress_error = 1;
safe_terminal_coding.src_multibyte = 1;
safe_terminal_coding.dst_multibyte = 0;
return Qnil;
}
-DEFUN ("terminal-coding-system",
- Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
+DEFUN ("terminal-coding-system", Fterminal_coding_system,
+ Sterminal_coding_system, 0, 0, 0,
doc: /* Return coding system specified for terminal output. */)
()
{
return terminal_coding.symbol;
}
-DEFUN ("set-keyboard-coding-system-internal",
- Fset_keyboard_coding_system_internal,
+DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
Sset_keyboard_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
return Qnil;
}
-DEFUN ("keyboard-coding-system",
- Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
+DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
+ Skeyboard_coding_system, 0, 0, 0,
doc: /* Return coding system specified for decoding keyboard input. */)
()
{
error ("Invalid first argument");
if (nargs < 1 + XINT (target_idx))
error ("Too few arguments for operation: %s",
- XSYMBOL (operation)->name->data);
+ SDATA (SYMBOL_NAME (operation)));
+ /* For write-region, if the 6th argument (i.e. VISIT, the 5th
+ argument to write-region) is string, it must be treated as a
+ target file name. */
+ if (EQ (operation, Qwrite_region)
+ && nargs > 5
+ && STRINGP (args[5]))
+ target_idx = make_number (4);
target = args[XINT (target_idx) + 1];
if (!(STRINGP (target)
|| (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
return Qnil;
}
+DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
+ Sdefine_coding_system_internal, 1, 1, 0,
+ doc: /* Register CODING-SYSTEM as a base coding system.
+This function is internal use only. */)
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object safe_chars, slot;
+
+ if (NILP (Fcheck_coding_system (coding_system)))
+ Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+ safe_chars = coding_safe_chars (coding_system);
+ if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
+ error ("No valid safe-chars property for %s",
+ SDATA (SYMBOL_NAME (coding_system)));
+ if (EQ (safe_chars, Qt))
+ {
+ if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
+ XSETCAR (Vcoding_system_safe_chars,
+ Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
+ }
+ else
+ {
+ slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
+ if (NILP (slot))
+ XSETCDR (Vcoding_system_safe_chars,
+ nconc2 (XCDR (Vcoding_system_safe_chars),
+ Fcons (Fcons (coding_system, safe_chars), Qnil)));
+ else
+ XSETCDR (slot, safe_chars);
+ }
+ return Qnil;
+}
+
#endif /* emacs */
\f
void
syms_of_coding ()
{
+ staticpro (&Vcode_conversion_workbuf_name);
+ Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
+
Qtarget_idx = intern ("target-idx");
staticpro (&Qtarget_idx);
}
}
+ Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
+ staticpro (&Vcoding_system_safe_chars);
+
Qtranslation_table = intern ("translation-table");
staticpro (&Qtranslation_table);
- Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
+ Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
Qtranslation_table_id = intern ("translation-table-id");
staticpro (&Qtranslation_table_id);
But don't staticpro it here--that is done in alloc.c. */
Qchar_table_extra_slots = intern ("char-table-extra-slots");
Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
- Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2));
+ Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
Qvalid_codes = intern ("valid-codes");
staticpro (&Qvalid_codes);
Qraw_text = intern ("raw-text");
staticpro (&Qraw_text);
+ Qutf_8 = intern ("utf-8");
+ staticpro (&Qutf_8);
+
+ Qcoding_system_define_form = intern ("coding-system-define-form");
+ staticpro (&Qcoding_system_define_form);
+
defsubr (&Scoding_system_p);
defsubr (&Sread_coding_system);
defsubr (&Sread_non_nil_coding_system);
defsubr (&Sdetect_coding_region);
defsubr (&Sdetect_coding_string);
defsubr (&Sfind_coding_systems_region_internal);
+ defsubr (&Sunencodable_char_position);
defsubr (&Sdecode_coding_region);
defsubr (&Sencode_coding_region);
defsubr (&Sdecode_coding_string);
defsubr (&Sfind_operation_coding_system);
defsubr (&Supdate_coding_systems_internal);
defsubr (&Sset_coding_priority_internal);
+ defsubr (&Sdefine_coding_system_internal);
DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
doc: /* List of coding systems.
On detecting a coding system, Emacs tries code detection algorithms
associated with each coding-category one by one in this order. When
one algorithm agrees with a byte sequence of source text, the coding
-system bound to the corresponding coding-category is selected. */);
+system bound to the corresponding coding-category is selected.
+
+Don't modify this variable directly, but use `set-coding-priority'. */);
{
int i;
Vcoding_system_for_write = Qnil;
DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
- doc: /* Coding system used in the latest file or process I/O. */);
+ doc: /* Coding system used in the latest file or process I/O.
+Also set by `encode-coding-region', `decode-coding-region',
+`encode-coding-string' and `decode-coding-string'. */);
Vlast_coding_system_used = Qnil;
DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
The default value is `select-safe-coding-system' (which see). */);
Vselect_safe_coding_system_function = Qnil;
- DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
- doc: /* Char-table containing safe coding systems of each characters.
-Each element doesn't include such generic coding systems that can
-encode any characters. They are in the first extra slot. */);
- Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
+ DEFVAR_BOOL ("coding-system-require-warning",
+ &coding_system_require_warning,
+ doc: /* Internal use only.
+If non-nil, on writing a file, `select-safe-coding-system-function' is
+called even if `coding-system-for-write' is non-nil. The command
+`universal-coding-system-argument' binds this variable to t temporarily. */);
+ coding_system_require_warning = 0;
+
DEFVAR_BOOL ("inhibit-iso-escape-detection",
&inhibit_iso_escape_detection,
to explicitly specify some coding system that doesn't use ISO2022's
escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
inhibit_iso_escape_detection = 0;
+
+ DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
+ doc: /* Char table for translating self-inserting characters.
+This is applied to the result of input methods, not their input. See also
+`keyboard-translate-table'. */);
+ Vtranslation_table_for_input = Qnil;
}
char *
Lisp_Object dec = code_convert_string_norecord (build_string (str),
Vlocale_coding_system,
0);
- str = (char *) XSTRING (dec)->data;
+ str = (char *) SDATA (dec);
}
return str;
#endif /* emacs */
+/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
+ (do not change this comment) */