/* Coding system handler (conversion, detection, etc).
- Copyright (C) 2001-2013 Free Software Foundation, Inc.
+ Copyright (C) 2001-2014 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011
National Institute of Advanced Industrial Science and Technology (AIST)
#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
+#define CODING_ISO_FLAG_LEVEL_4 0x20000
+
#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
/* A character to be produced on output if encoding of the original
break;
}
+ /* In the simple case, rapidly handle ordinary characters */
+ if (multibytep && ! eol_dos
+ && charbuf < charbuf_end - 6 && src < src_end - 6)
+ {
+ while (charbuf < charbuf_end - 6 && src < src_end - 6)
+ {
+ c1 = *src;
+ if (c1 & 0x80)
+ break;
+ src++;
+ consumed_chars++;
+ *charbuf++ = c1;
+
+ c1 = *src;
+ if (c1 & 0x80)
+ break;
+ src++;
+ consumed_chars++;
+ *charbuf++ = c1;
+
+ c1 = *src;
+ if (c1 & 0x80)
+ break;
+ src++;
+ consumed_chars++;
+ *charbuf++ = c1;
+
+ c1 = *src;
+ if (c1 & 0x80)
+ break;
+ src++;
+ consumed_chars++;
+ *charbuf++ = c1;
+ }
+ /* If we handled at least one character, restart the main loop. */
+ if (src != src_base)
+ continue;
+ }
+
if (byte_after_cr >= 0)
c1 = byte_after_cr, byte_after_cr = -1;
else
else
charset = CHARSET_FROM_ID (charset_id_2);
ONE_MORE_BYTE (c1);
- if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+ if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
+ || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
+ && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
+ ? c1 >= 0x80 : c1 < 0x80)))
goto invalid_code;
break;
else
charset = CHARSET_FROM_ID (charset_id_3);
ONE_MORE_BYTE (c1);
- if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+ if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
+ || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
+ && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
+ ? c1 >= 0x80 : c1 < 0x80)))
goto invalid_code;
break;
source_charbuf[i++] = *p++;
if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
- ccl->last_block = 1;
+ ccl->last_block = true;
/* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */
charset_map_loaded = 0;
ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
CODING_GET_INFO (coding, attrs, charset_list);
if (coding->consumed_char == coding->src_chars
&& coding->mode & CODING_MODE_LAST_BLOCK)
- ccl->last_block = 1;
+ ccl->last_block = true;
do
{
coding->safe_charsets = SDATA (val);
coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
coding->carryover_bytes = 0;
+ coding->raw_destination = 0;
coding_type = CODING_ATTR_TYPE (attrs);
if (EQ (coding_type, Qundecided))
if (CHAR_TABLE_P (standard))
{
if (CONSP (translation_table))
- translation_table = nconc2 (translation_table,
- Fcons (standard, Qnil));
+ translation_table = nconc2 (translation_table, list1 (standard));
else
- translation_table = Fcons (translation_table,
- Fcons (standard, Qnil));
+ translation_table = list2 (translation_table, standard);
}
}
/* We found a composition. Store the corresponding
annotation data in BUF. */
int *head = buf;
- enum composition_method method = COMPOSITION_METHOD (prop);
+ enum composition_method method = composition_method (prop);
int nchars = COMPOSITION_LENGTH (prop);
ADD_COMPOSITION_DATA (buf, nchars, 0, method);
}
-static Lisp_Object
+static void
code_conversion_restore (Lisp_Object arg)
{
Lisp_Object current, workbuf;
}
set_buffer_internal (XBUFFER (current));
UNGCPRO;
- return Qnil;
}
Lisp_Object
{
if (BUFFERP (coding->dst_object))
coding->dst_object = Fbuffer_string ();
+ else if (coding->raw_destination)
+ /* This is used to avoid creating huge Lisp string.
+ NOTE: caller who sets `raw_destination' is also
+ responsible for freeing `destination' buffer. */
+ coding->dst_object = Qnil;
else
{
coding->dst_object
{
detect_info.found = CATEGORY_MASK_RAW_TEXT;
id = CODING_SYSTEM_ID (Qno_conversion);
- val = Fcons (make_number (id), Qnil);
+ val = list1 (make_number (id));
}
else if (! detect_info.rejected && ! detect_info.found)
{
detect_info.found = CATEGORY_MASK_ANY;
id = coding_categories[coding_category_undecided].id;
- val = Fcons (make_number (id), Qnil);
+ val = list1 (make_number (id));
}
else if (highest)
{
if (detect_info.found)
{
detect_info.found = 1 << category;
- val = Fcons (make_number (this->id), Qnil);
+ val = list1 (make_number (this->id));
}
else
for (i = 0; i < coding_category_raw_text; i++)
{
detect_info.found = 1 << coding_priorities[i];
id = coding_categories[coding_priorities[i]].id;
- val = Fcons (make_number (id), Qnil);
+ val = list1 (make_number (id));
break;
}
}
found |= 1 << category;
id = coding_categories[category].id;
if (id >= 0)
- val = Fcons (make_number (id), val);
+ val = list1 (make_number (id));
}
}
for (i = coding_category_raw_text - 1; i >= 0; i--)
this = coding_categories + coding_category_utf_8_sig;
else
this = coding_categories + coding_category_utf_8_nosig;
- val = Fcons (make_number (this->id), Qnil);
+ val = list1 (make_number (this->id));
}
}
else if (base_category == coding_category_utf_16_auto)
this = coding_categories + coding_category_utf_16_be_nosig;
else
this = coding_categories + coding_category_utf_16_le_nosig;
- val = Fcons (make_number (this->id), Qnil);
+ val = list1 (make_number (this->id));
}
}
else
{
detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
- val = Fcons (make_number (coding.id), Qnil);
+ val = list1 (make_number (coding.id));
}
/* Then, detect eol-format if necessary. */
attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
ASET (attrs, coding_attr_trans_tbl,
get_translation_table (attrs, 1, NULL));
- list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
+ list = Fcons (list2 (elt, attrs), list);
}
if (STRINGP (start))
setup_coding_system (coding_system, &coding);
coding.mode |= CODING_MODE_LAST_BLOCK;
+ if (BUFFERP (dst_object) && !EQ (dst_object, src_object))
+ {
+ struct buffer *buf = XBUFFER (dst_object);
+ ptrdiff_t buf_pt = BUF_PT (buf);
+
+ invalidate_buffer_caches (buf, buf_pt, buf_pt);
+ }
+
if (encodep)
encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
dst_object);
coding.mode |= CODING_MODE_LAST_BLOCK;
chars = SCHARS (string);
bytes = SBYTES (string);
+
+ if (BUFFERP (dst_object))
+ {
+ struct buffer *buf = XBUFFER (dst_object);
+ ptrdiff_t buf_pt = BUF_PT (buf);
+
+ invalidate_buffer_caches (buf, buf_pt, buf_pt);
+ }
+
if (encodep)
encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
else
return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
}
+/* Encode or decode a file name, to or from a unibyte string suitable
+ for passing to C library functions. */
+Lisp_Object
+decode_file_name (Lisp_Object fname)
+{
+#ifdef WINDOWSNT
+ /* The w32 build pretends to use UTF-8 for file-name encoding, and
+ converts the file names either to UTF-16LE or to the system ANSI
+ codepage internally, depending on the underlying OS; see w32.c. */
+ if (! NILP (Fcoding_system_p (Qutf_8)))
+ return code_convert_string_norecord (fname, Qutf_8, 0);
+ return fname;
+#else /* !WINDOWSNT */
+ if (! NILP (Vfile_name_coding_system))
+ return code_convert_string_norecord (fname, Vfile_name_coding_system, 0);
+ else if (! NILP (Vdefault_file_name_coding_system))
+ return code_convert_string_norecord (fname,
+ Vdefault_file_name_coding_system, 0);
+ else
+ return fname;
+#endif
+}
+
+Lisp_Object
+encode_file_name (Lisp_Object fname)
+{
+ /* This is especially important during bootstrap and dumping, when
+ file-name encoding is not yet known, and therefore any non-ASCII
+ file names are unibyte strings, and could only be thrashed if we
+ try to encode them. */
+ if (!STRING_MULTIBYTE (fname))
+ return fname;
+#ifdef WINDOWSNT
+ /* The w32 build pretends to use UTF-8 for file-name encoding, and
+ converts the file names either to UTF-16LE or to the system ANSI
+ codepage internally, depending on the underlying OS; see w32.c. */
+ if (! NILP (Fcoding_system_p (Qutf_8)))
+ return code_convert_string_norecord (fname, Qutf_8, 1);
+ return fname;
+#else /* !WINDOWSNT */
+ if (! NILP (Vfile_name_coding_system))
+ return code_convert_string_norecord (fname, Vfile_name_coding_system, 1);
+ else if (! NILP (Vdefault_file_name_coding_system))
+ return code_convert_string_norecord (fname,
+ Vdefault_file_name_coding_system, 1);
+ else
+ return fname;
+#endif
+}
DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
2, 4, 0,
tset_charset_list
(term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
? coding_charset_list (terminal_coding)
- : Fcons (make_number (charset_ascii), Qnil)));
+ : list1 (make_number (charset_ascii))));
return Qnil;
}
{
dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
if (dim < dim2)
- tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
+ tmp = list2 (XCAR (tail), tmp);
else
- tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
+ tmp = list2 (tmp, XCAR (tail));
}
else
{
break;
}
if (NILP (tmp2))
- tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
+ tmp = nconc2 (tmp, list1 (XCAR (tail)));
else
{
XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
&& ! EQ (eol_type, Qmac))
error ("Invalid eol-type");
- aliases = Fcons (name, Qnil);
+ aliases = list1 (name);
if (NILP (eol_type))
{
Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
this_name = AREF (eol_type, i);
- this_aliases = Fcons (this_name, Qnil);
+ this_aliases = list1 (this_name);
this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
this_spec = make_uninit_vector (3);
ASET (this_spec, 0, attrs);
list. */
while (!NILP (XCDR (aliases)))
aliases = XCDR (aliases);
- XSETCDR (aliases, Fcons (alias, Qnil));
+ XSETCDR (aliases, list1 (alias));
eol_type = AREF (spec, 2);
if (VECTORP (eol_type))
Fput (Qcoding_system_error, Qerror_message,
build_pure_c_string ("Invalid coding system"));
- /* Intern this now in case it isn't already done.
- Setting this variable twice is harmless.
- But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
-
DEFSYM (Qtranslation_table, "translation-table");
Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
DEFSYM (Qtranslation_table_id, "translation-table-id");