Nth coding category. */
static struct coding_system coding_categories[coding_category_max];
-/*** Commonly used macros and functions ***/
-
-#ifndef min
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#endif
-#ifndef max
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#endif
-
/* Encode a flag that can be nil, something else, or t as -1, 0, 1. */
static int
XSETCDR (x, tmp);
}
+/* True if CODING's destination can be grown. */
+
+static bool
+growable_destination (struct coding_system *coding)
+{
+ return STRINGP (coding->dst_object) || BUFFERP (coding->dst_object);
+}
+
/* Safely get one byte from the source text pointed by SRC which ends
at SRC_END, and set C to that byte. If there are not enough bytes
#define UTF_8_BOM_2 0xBB
#define UTF_8_BOM_3 0xBF
-/* Unlike the other detect_coding_XXX, this function counts number of
- characters and check EOL format. */
+/* Unlike the other detect_coding_XXX, this function counts the number
+ of characters and checks the EOL format. */
static bool
detect_coding_utf_8 (struct coding_system *coding,
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
- coding->errors++;
+ *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
}
no_more_source:
*dst++ = CHAR_TO_BYTE8 (c);
else
CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
- produced_chars++;
}
+ produced_chars = dst - (coding->destination + coding->produced);
}
record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->produced_char += produced_chars;
/* The first two bytes are not BOM. Treat them as bytes
for a normal character. */
src = src_base;
- coding->errors++;
}
CODING_UTF_16_BOM (coding) = utf_without_bom;
}
ONE_MORE_BYTE (c2);
if (c2 < 0)
{
- *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+ *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
*charbuf++ = -c2;
continue;
}
c1 = surrogate & 0xFF, c2 = surrogate >> 8;
*charbuf++ = c1;
*charbuf++ = c2;
- coding->errors++;
if (UTF_16_HIGH_SURROGATE_P (c))
CODING_UTF_16_SURROGATE (coding) = surrogate = c;
else
case 1:
code = c;
- charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
+ charset_ID = ASCII_CHAR_P (code) ? charset_ascii : charset_eight_bit;
break;
default:
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
char_offset++;
- coding->errors++;
}
no_more_source:
ONE_MORE_BYTE (c1);
if (c1 < ' ' || c1 >= 0x80
|| (id = iso_charset_table[0][c >= ','][c1]) < 0)
- /* Invalid designation sequence. Just ignore. */
- break;
+ {
+ /* Invalid designation sequence. Just ignore. */
+ if (c1 >= 0x80)
+ rejected |= (CATEGORY_MASK_ISO_7BIT
+ | CATEGORY_MASK_ISO_7_ELSE);
+ break;
+ }
}
else if (c == '$')
{
ONE_MORE_BYTE (c1);
if (c1 < ' ' || c1 >= 0x80
|| (id = iso_charset_table[1][c >= ','][c1]) < 0)
- /* Invalid designation sequence. Just ignore. */
- break;
+ {
+ /* Invalid designation sequence. Just ignore. */
+ if (c1 >= 0x80)
+ rejected |= (CATEGORY_MASK_ISO_7BIT
+ | CATEGORY_MASK_ISO_7_ELSE);
+ break;
+ }
}
else
- /* Invalid designation sequence. Just ignore it. */
- break;
+ {
+ /* Invalid designation sequence. Just ignore it. */
+ if (c >= 0x80)
+ rejected |= (CATEGORY_MASK_ISO_7BIT
+ | CATEGORY_MASK_ISO_7_ELSE);
+ break;
+ }
}
else
{
/* Invalid escape sequence. Just ignore it. */
+ if (c >= 0x80)
+ rejected |= (CATEGORY_MASK_ISO_7BIT
+ | CATEGORY_MASK_ISO_7_ELSE);
break;
}
if (inhibit_iso_escape_detection)
break;
single_shifting = 0;
- rejected |= CATEGORY_MASK_ISO_7BIT;
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
& CODING_ISO_FLAG_SINGLE_SHIFT)
{
single_shifting = 0;
break;
}
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
if (c >= 0xA0)
{
- rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
found |= CATEGORY_MASK_ISO_8_1;
/* Check the length of succeeding codes of the range
0xA0..0FF. If the byte length is even, we include
if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
{
- *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+ *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
char_offset++;
CODING_ISO_EXTSEGMENT_LEN (coding)--;
continue;
}
else
{
- *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+ *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
char_offset++;
}
continue;
MAYBE_FINISH_COMPOSITION ();
for (; src_base < src; src_base++, char_offset++)
{
- if (ASCII_BYTE_P (*src_base))
+ if (ASCII_CHAR_P (*src_base))
*charbuf++ = *src_base;
else
*charbuf++ = BYTE8_TO_CHAR (*src_base);
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
char_offset++;
- coding->errors++;
/* Reset the invocation and designation status to the safest
one; i.e. designate ASCII to the graphic register 0, and
invoke that register to the graphic plane 0. This typically
ONE_MORE_BYTE (c);
*charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
char_offset++;
- coding->errors++;
}
no_more_source:
ONE_MORE_BYTE (c);
*charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
char_offset++;
- coding->errors++;
}
no_more_source:
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
char_offset++;
- coding->errors++;
}
no_more_source:
}
+/* MAX_LOOKUP's maximum value. MAX_LOOKUP is an int and so cannot
+ exceed INT_MAX. Also, MAX_LOOKUP is multiplied by sizeof (int) for
+ alloca, so it cannot exceed MAX_ALLOCA / sizeof (int). */
+enum { MAX_LOOKUP_MAX = min (INT_MAX, MAX_ALLOCA / sizeof (int)) };
+
/* Return a translation table (or list of them) from coding system
attribute vector ATTRS for encoding (if ENCODEP) or decoding (if
not ENCODEP). */
{
val = XCHAR_TABLE (translation_table)->extras[1];
if (NATNUMP (val) && *max_lookup < XFASTINT (val))
- *max_lookup = XFASTINT (val);
+ *max_lookup = min (XFASTINT (val), MAX_LOOKUP_MAX);
}
else if (CONSP (translation_table))
{
{
Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval))
- *max_lookup = XFASTINT (tailval);
+ *max_lookup = min (XFASTINT (tailval), MAX_LOOKUP_MAX);
}
}
}
int *buf = coding->charbuf;
int *buf_end = buf + coding->charbuf_used;
- if (EQ (coding->src_object, coding->dst_object))
+ if (EQ (coding->src_object, coding->dst_object)
+ && ! NILP (coding->dst_object))
{
+ eassert (growable_destination (coding));
coding_set_source (coding);
dst_end = ((unsigned char *) coding->source) + coding->consumed;
}
if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars)
{
+ eassert (growable_destination (coding));
if (((min (PTRDIFF_MAX, SIZE_MAX) - (buf_end - buf))
/ MAX_MULTIBYTE_LENGTH)
< to_nchars)
const unsigned char *src_end = src + coding->consumed;
if (EQ (coding->dst_object, coding->src_object))
- dst_end = (unsigned char *) src;
+ {
+ eassert (growable_destination (coding));
+ dst_end = (unsigned char *) src;
+ }
if (coding->src_multibyte != coding->dst_multibyte)
{
if (coding->src_multibyte)
ONE_MORE_BYTE (c);
if (dst == dst_end)
{
+ eassert (growable_destination (coding));
if (EQ (coding->src_object, coding->dst_object))
dst_end = (unsigned char *) src;
if (dst == dst_end)
if (dst >= dst_end - 1)
{
+ eassert (growable_destination (coding));
if (EQ (coding->src_object, coding->dst_object))
dst_end = (unsigned char *) src;
if (dst >= dst_end - 1)
coding->dst_object);
}
+#define MAX_CHARBUF_SIZE 0x4000
+/* How many units decoding functions expect in coding->charbuf at
+ most. Currently, decode_coding_emacs_mule expects the following
+ size, and that is the largest value. */
+#define MAX_CHARBUF_EXTRA_SIZE ((MAX_ANNOTATION_LENGTH * 3) + 1)
-#define CHARBUF_SIZE 0x4000
-
-#define ALLOC_CONVERSION_WORK_AREA(coding) \
- do { \
- coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \
- coding->charbuf_size = CHARBUF_SIZE; \
+#define ALLOC_CONVERSION_WORK_AREA(coding, size) \
+ do { \
+ ptrdiff_t units = min ((size) + MAX_CHARBUF_EXTRA_SIZE, \
+ MAX_CHARBUF_SIZE); \
+ coding->charbuf = SAFE_ALLOCA (units * sizeof (int)); \
+ coding->charbuf_size = units; \
} while (0)
-
static void
produce_annotation (struct coding_system *coding, ptrdiff_t pos)
{
coding->produced = coding->produced_char = 0;
coding->chars_at_source = 0;
record_conversion_result (coding, CODING_RESULT_SUCCESS);
- coding->errors = 0;
- ALLOC_CONVERSION_WORK_AREA (coding);
+ ALLOC_CONVERSION_WORK_AREA (coding, coding->src_bytes);
attrs = CODING_ID_ATTRS (coding->id);
translation_table = get_translation_table (attrs, 0, NULL);
coding->consumed = coding->consumed_char = 0;
coding->produced = coding->produced_char = 0;
record_conversion_result (coding, CODING_RESULT_SUCCESS);
- coding->errors = 0;
- ALLOC_CONVERSION_WORK_AREA (coding);
+ ALLOC_CONVERSION_WORK_AREA (coding, coding->src_chars);
if (coding->encoder == encode_coding_ccl)
{
p = pbeg = BYTE_POS_ADDR (start_byte);
pend = p + (end_byte - start_byte);
- while (p < pend && ASCII_BYTE_P (*p)) p++;
- while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+ while (p < pend && ASCII_CHAR_P (*p)) p++;
+ while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
work_table = Fmake_char_table (Qnil, Qnil);
while (p < pend)
{
- if (ASCII_BYTE_P (*p))
+ if (ASCII_CHAR_P (*p))
p++;
else
{
DEFUN ("unencodable-char-position", Funencodable_char_position,
Sunencodable_char_position, 3, 5, 0,
- doc: /*
-Return position of first un-encodable character in a region.
+ doc: /* Return position of first un-encodable character in a region.
START and END specify the region and CODING-SYSTEM specifies the
encoding to check. Return nil if CODING-SYSTEM does encode the region.
If optional 5th argument STRING is non-nil, it is a string to search
for un-encodable characters. In that case, START and END are indexes
-to the string. */)
- (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
+to the string and treated as in `substring'. */)
+ (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system,
+ Lisp_Object count, Lisp_Object string)
{
EMACS_INT n;
struct coding_system coding;
else
{
CHECK_STRING (string);
- CHECK_NATNUM (start);
- CHECK_NATNUM (end);
- if (! (XINT (start) <= XINT (end) && XINT (end) <= SCHARS (string)))
- args_out_of_range_3 (string, start, end);
- from = XINT (start);
- to = XINT (end);
+ validate_subarray (string, start, end, SCHARS (string), &from, &to);
if (! STRING_MULTIBYTE (string))
return Qnil;
p = SDATA (string) + string_char_to_byte (string, from);
int c;
if (ascii_compatible)
- while (p < stop && ASCII_BYTE_P (*p))
+ while (p < stop && ASCII_CHAR_P (*p))
p++, from++;
if (p >= stop)
{
p = pbeg = BYTE_POS_ADDR (start_byte);
pend = p + (end_byte - start_byte);
- while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
- while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+ while (p < pend && ASCII_CHAR_P (*p)) p++, pos++;
+ while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
while (p < pend)
{
- if (ASCII_BYTE_P (*p))
+ if (ASCII_CHAR_P (*p))
p++;
else
{
CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
attrs = AREF (spec, 0);
- if (ASCII_BYTE_P (ch)
+ if (ASCII_CHAR_P (ch)
&& ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
return code;
CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
attrs = AREF (spec, 0);
- if (ASCII_BYTE_P (ch)
+ if (ASCII_CHAR_P (ch)
&& ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
return code;
doc: /* Internal use only. */)
(Lisp_Object coding_system, Lisp_Object terminal)
{
- struct terminal *term = get_terminal (terminal, 1);
+ struct terminal *term = decode_live_terminal (terminal);
struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term);
CHECK_SYMBOL (coding_system);
setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
(Lisp_Object terminal)
{
struct coding_system *terminal_coding
- = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
+ = TERMINAL_TERMINAL_CODING (decode_live_terminal (terminal));
Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
/* For backward compatibility, return nil if it is `undecided'. */
doc: /* Internal use only. */)
(Lisp_Object coding_system, Lisp_Object terminal)
{
- struct terminal *t = get_terminal (terminal, 1);
+ struct terminal *t = decode_live_terminal (terminal);
CHECK_SYMBOL (coding_system);
if (NILP (coding_system))
coding_system = Qno_conversion;
(Lisp_Object terminal)
{
return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
- (get_terminal (terminal, 1))->id);
+ (decode_live_terminal (terminal))->id);
}
\f
{
Lisp_Object subsidiaries;
ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base));
- char *buf = alloca (base_name_len + 6);
+ USE_SAFE_ALLOCA;
+ char *buf = SAFE_ALLOCA (base_name_len + 6);
int i;
memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
strcpy (buf + base_name_len, suffixes[i]);
ASET (subsidiaries, i, intern (buf));
}
+ SAFE_FREE ();
return subsidiaries;
}
DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files.
-Internal use only. Removed after the experimental optimizer gets stable. */);
+Internal use only. Remove after the experimental optimizer becomes stable. */);
disable_ascii_optimization = 0;
DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,