/* Coding system handler (conversion, detection, etc).
- Copyright (C) 2001-2014 Free Software Foundation, Inc.
+ Copyright (C) 2001-2015 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011
National Institute of Advanced Industrial Science and Technology (AIST)
Nth coding category. */
static struct coding_system coding_categories[coding_category_max];
-/*** Commonly used macros and functions ***/
-
-#ifndef min
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#endif
-#ifndef max
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#endif
-
/* Encode a flag that can be nil, something else, or t as -1, 0, 1. */
static int
XSETCDR (x, tmp);
}
+/* True if CODING's destination can be grown. */
+
+static bool
+growable_destination (struct coding_system *coding)
+{
+ return STRINGP (coding->dst_object) || BUFFERP (coding->dst_object);
+}
+
/* Safely get one byte from the source text pointed by SRC which ends
at SRC_END, and set C to that byte. If there are not enough bytes
#define UTF_8_BOM_2 0xBB
#define UTF_8_BOM_3 0xBF
-/* Unlike the other detect_coding_XXX, this function counts number of
- characters and check EOL format. */
+/* Unlike the other detect_coding_XXX, this function counts the number
+ of characters and checks the EOL format. */
static bool
detect_coding_utf_8 (struct coding_system *coding,
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
- coding->errors++;
+ *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
}
no_more_source:
*dst++ = CHAR_TO_BYTE8 (c);
else
CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
- produced_chars++;
}
+ produced_chars = dst - (coding->destination + coding->produced);
}
record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->produced_char += produced_chars;
/* The first two bytes are not BOM. Treat them as bytes
for a normal character. */
src = src_base;
- coding->errors++;
}
CODING_UTF_16_BOM (coding) = utf_without_bom;
}
ONE_MORE_BYTE (c2);
if (c2 < 0)
{
- *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+ *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
*charbuf++ = -c2;
continue;
}
c1 = surrogate & 0xFF, c2 = surrogate >> 8;
*charbuf++ = c1;
*charbuf++ = c2;
- coding->errors++;
if (UTF_16_HIGH_SURROGATE_P (c))
CODING_UTF_16_SURROGATE (coding) = surrogate = c;
else
case 1:
code = c;
- charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
+ charset_ID = ASCII_CHAR_P (code) ? charset_ascii : charset_eight_bit;
break;
default:
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
char_offset++;
- coding->errors++;
}
no_more_source:
ONE_MORE_BYTE (c1);
if (c1 < ' ' || c1 >= 0x80
|| (id = iso_charset_table[0][c >= ','][c1]) < 0)
- /* Invalid designation sequence. Just ignore. */
- break;
+ {
+ /* Invalid designation sequence. Just ignore. */
+ if (c1 >= 0x80)
+ rejected |= (CATEGORY_MASK_ISO_7BIT
+ | CATEGORY_MASK_ISO_7_ELSE);
+ break;
+ }
}
else if (c == '$')
{
ONE_MORE_BYTE (c1);
if (c1 < ' ' || c1 >= 0x80
|| (id = iso_charset_table[1][c >= ','][c1]) < 0)
- /* Invalid designation sequence. Just ignore. */
- break;
+ {
+ /* Invalid designation sequence. Just ignore. */
+ if (c1 >= 0x80)
+ rejected |= (CATEGORY_MASK_ISO_7BIT
+ | CATEGORY_MASK_ISO_7_ELSE);
+ break;
+ }
}
else
- /* Invalid designation sequence. Just ignore it. */
- break;
+ {
+ /* Invalid designation sequence. Just ignore it. */
+ if (c >= 0x80)
+ rejected |= (CATEGORY_MASK_ISO_7BIT
+ | CATEGORY_MASK_ISO_7_ELSE);
+ break;
+ }
}
else
{
/* Invalid escape sequence. Just ignore it. */
+ if (c >= 0x80)
+ rejected |= (CATEGORY_MASK_ISO_7BIT
+ | CATEGORY_MASK_ISO_7_ELSE);
break;
}
if (inhibit_iso_escape_detection)
break;
single_shifting = 0;
- rejected |= CATEGORY_MASK_ISO_7BIT;
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
& CODING_ISO_FLAG_SINGLE_SHIFT)
{
single_shifting = 0;
break;
}
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
if (c >= 0xA0)
{
- rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
found |= CATEGORY_MASK_ISO_8_1;
/* Check the length of succeeding codes of the range
0xA0..0FF. If the byte length is even, we include
if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
{
- *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+ *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
char_offset++;
CODING_ISO_EXTSEGMENT_LEN (coding)--;
continue;
}
else
{
- *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+ *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
char_offset++;
}
continue;
MAYBE_FINISH_COMPOSITION ();
for (; src_base < src; src_base++, char_offset++)
{
- if (ASCII_BYTE_P (*src_base))
+ if (ASCII_CHAR_P (*src_base))
*charbuf++ = *src_base;
else
*charbuf++ = BYTE8_TO_CHAR (*src_base);
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
char_offset++;
- coding->errors++;
/* Reset the invocation and designation status to the safest
one; i.e. designate ASCII to the graphic register 0, and
invoke that register to the graphic plane 0. This typically
ONE_MORE_BYTE (c);
*charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
char_offset++;
- coding->errors++;
}
no_more_source:
ONE_MORE_BYTE (c);
*charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
char_offset++;
- coding->errors++;
}
no_more_source:
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
char_offset++;
- coding->errors++;
}
no_more_source:
}
+/* MAX_LOOKUP's maximum value. MAX_LOOKUP is an int and so cannot
+ exceed INT_MAX. Also, MAX_LOOKUP is multiplied by sizeof (int) for
+ alloca, so it cannot exceed MAX_ALLOCA / sizeof (int). */
+enum { MAX_LOOKUP_MAX = min (INT_MAX, MAX_ALLOCA / sizeof (int)) };
+
/* Return a translation table (or list of them) from coding system
attribute vector ATTRS for encoding (if ENCODEP) or decoding (if
not ENCODEP). */
{
val = XCHAR_TABLE (translation_table)->extras[1];
if (NATNUMP (val) && *max_lookup < XFASTINT (val))
- *max_lookup = XFASTINT (val);
+ *max_lookup = min (XFASTINT (val), MAX_LOOKUP_MAX);
}
else if (CONSP (translation_table))
{
{
Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval))
- *max_lookup = XFASTINT (tailval);
+ *max_lookup = min (XFASTINT (tailval), MAX_LOOKUP_MAX);
}
}
}
int *buf = coding->charbuf;
int *buf_end = buf + coding->charbuf_used;
- if (EQ (coding->src_object, coding->dst_object))
+ if (EQ (coding->src_object, coding->dst_object)
+ && ! NILP (coding->dst_object))
{
+ eassert (growable_destination (coding));
coding_set_source (coding);
dst_end = ((unsigned char *) coding->source) + coding->consumed;
}
if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars)
{
+ eassert (growable_destination (coding));
if (((min (PTRDIFF_MAX, SIZE_MAX) - (buf_end - buf))
/ MAX_MULTIBYTE_LENGTH)
< to_nchars)
const unsigned char *src_end = src + coding->consumed;
if (EQ (coding->dst_object, coding->src_object))
- dst_end = (unsigned char *) src;
+ {
+ eassert (growable_destination (coding));
+ dst_end = (unsigned char *) src;
+ }
if (coding->src_multibyte != coding->dst_multibyte)
{
if (coding->src_multibyte)
ONE_MORE_BYTE (c);
if (dst == dst_end)
{
+ eassert (growable_destination (coding));
if (EQ (coding->src_object, coding->dst_object))
dst_end = (unsigned char *) src;
if (dst == dst_end)
if (dst >= dst_end - 1)
{
+ eassert (growable_destination (coding));
if (EQ (coding->src_object, coding->dst_object))
dst_end = (unsigned char *) src;
if (dst >= dst_end - 1)
coding->dst_object);
}
+#define MAX_CHARBUF_SIZE 0x4000
+/* How many units decoding functions expect in coding->charbuf at
+ most. Currently, decode_coding_emacs_mule expects the following
+ size, and that is the largest value. */
+#define MAX_CHARBUF_EXTRA_SIZE ((MAX_ANNOTATION_LENGTH * 3) + 1)
-#define CHARBUF_SIZE 0x4000
-
-#define ALLOC_CONVERSION_WORK_AREA(coding) \
- do { \
- coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \
- coding->charbuf_size = CHARBUF_SIZE; \
+#define ALLOC_CONVERSION_WORK_AREA(coding, size) \
+ do { \
+ ptrdiff_t units = min ((size) + MAX_CHARBUF_EXTRA_SIZE, \
+ MAX_CHARBUF_SIZE); \
+ coding->charbuf = SAFE_ALLOCA (units * sizeof (int)); \
+ coding->charbuf_size = units; \
} while (0)
-
static void
produce_annotation (struct coding_system *coding, ptrdiff_t pos)
{
coding->produced = coding->produced_char = 0;
coding->chars_at_source = 0;
record_conversion_result (coding, CODING_RESULT_SUCCESS);
- coding->errors = 0;
- ALLOC_CONVERSION_WORK_AREA (coding);
+ ALLOC_CONVERSION_WORK_AREA (coding, coding->src_bytes);
attrs = CODING_ID_ATTRS (coding->id);
translation_table = get_translation_table (attrs, 0, NULL);
coding->consumed = coding->consumed_char = 0;
coding->produced = coding->produced_char = 0;
record_conversion_result (coding, CODING_RESULT_SUCCESS);
- coding->errors = 0;
- ALLOC_CONVERSION_WORK_AREA (coding);
+ ALLOC_CONVERSION_WORK_AREA (coding, coding->src_chars);
if (coding->encoder == encode_coding_ccl)
{
p = pbeg = BYTE_POS_ADDR (start_byte);
pend = p + (end_byte - start_byte);
- while (p < pend && ASCII_BYTE_P (*p)) p++;
- while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+ while (p < pend && ASCII_CHAR_P (*p)) p++;
+ while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
work_table = Fmake_char_table (Qnil, Qnil);
while (p < pend)
{
- if (ASCII_BYTE_P (*p))
+ if (ASCII_CHAR_P (*p))
p++;
else
{
DEFUN ("unencodable-char-position", Funencodable_char_position,
Sunencodable_char_position, 3, 5, 0,
- doc: /*
-Return position of first un-encodable character in a region.
+ doc: /* Return position of first un-encodable character in a region.
START and END specify the region and CODING-SYSTEM specifies the
encoding to check. Return nil if CODING-SYSTEM does encode the region.
If optional 5th argument STRING is non-nil, it is a string to search
for un-encodable characters. In that case, START and END are indexes
-to the string. */)
- (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
+to the string and treated as in `substring'. */)
+ (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system,
+ Lisp_Object count, Lisp_Object string)
{
EMACS_INT n;
struct coding_system coding;
else
{
CHECK_STRING (string);
- CHECK_NATNUM (start);
- CHECK_NATNUM (end);
- if (! (XINT (start) <= XINT (end) && XINT (end) <= SCHARS (string)))
- args_out_of_range_3 (string, start, end);
- from = XINT (start);
- to = XINT (end);
+ validate_subarray (string, start, end, SCHARS (string), &from, &to);
if (! STRING_MULTIBYTE (string))
return Qnil;
p = SDATA (string) + string_char_to_byte (string, from);
int c;
if (ascii_compatible)
- while (p < stop && ASCII_BYTE_P (*p))
+ while (p < stop && ASCII_CHAR_P (*p))
p++, from++;
if (p >= stop)
{
p = pbeg = BYTE_POS_ADDR (start_byte);
pend = p + (end_byte - start_byte);
- while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
- while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+ while (p < pend && ASCII_CHAR_P (*p)) p++, pos++;
+ while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
while (p < pend)
{
- if (ASCII_BYTE_P (*p))
+ if (ASCII_CHAR_P (*p))
p++;
else
{
CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
attrs = AREF (spec, 0);
- if (ASCII_BYTE_P (ch)
+ if (ASCII_CHAR_P (ch)
&& ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
return code;
CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
attrs = AREF (spec, 0);
- if (ASCII_BYTE_P (ch)
+ if (ASCII_CHAR_P (ch)
&& ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
return code;
doc: /* Internal use only. */)
(Lisp_Object coding_system, Lisp_Object terminal)
{
- struct terminal *term = get_terminal (terminal, 1);
+ struct terminal *term = decode_live_terminal (terminal);
struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term);
CHECK_SYMBOL (coding_system);
setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
(Lisp_Object terminal)
{
struct coding_system *terminal_coding
- = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
+ = TERMINAL_TERMINAL_CODING (decode_live_terminal (terminal));
Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
/* For backward compatibility, return nil if it is `undecided'. */
doc: /* Internal use only. */)
(Lisp_Object coding_system, Lisp_Object terminal)
{
- struct terminal *t = get_terminal (terminal, 1);
+ struct terminal *t = decode_live_terminal (terminal);
CHECK_SYMBOL (coding_system);
if (NILP (coding_system))
coding_system = Qno_conversion;
(Lisp_Object terminal)
{
return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
- (get_terminal (terminal, 1))->id);
+ (decode_live_terminal (terminal))->id);
}
\f
{
Lisp_Object subsidiaries;
ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base));
- char *buf = alloca (base_name_len + 6);
+ USE_SAFE_ALLOCA;
+ char *buf = SAFE_ALLOCA (base_name_len + 6);
int i;
memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
strcpy (buf + base_name_len, suffixes[i]);
ASET (subsidiaries, i, intern (buf));
}
+ SAFE_FREE ();
return subsidiaries;
}
DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files.
-Internal use only. Removed after the experimental optimizer gets stable. */);
+Internal use only. Remove after the experimental optimizer becomes stable. */);
disable_ascii_optimization = 0;
DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,