/* Coding system handler (conversion, detection, etc).
Copyright (C) 2001, 2002, 2003, 2004, 2005,
- 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+ 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009
+ 2005, 2006, 2007, 2008, 2009, 2010
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
Copyright (C) 2003
#include <config.h>
#include <stdio.h>
+#include <setjmp.h>
#include "lisp.h"
#include "buffer.h"
case CODING_RESULT_INSUFFICIENT_MEM:
Vlast_code_conversion_error = Qinsufficient_memory;
break;
+ case CODING_RESULT_SUCCESS:
+ break;
default:
Vlast_code_conversion_error = intern ("Unknown error");
}
}
else
coding_alloc_by_realloc (coding, nbytes);
- record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding_set_destination (coding);
dst = coding->destination + offset;
return dst;
e[c1] = 1;
o[c2] = 1;
- detect_info->rejected
- |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
+ detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
+ |CATEGORY_MASK_UTF_16_BE
+ | CATEGORY_MASK_UTF_16_LE);
- while (1)
+ while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
+ != CATEGORY_MASK_UTF_16)
{
TWO_MORE_BYTES (c1, c2);
if (c2 < 0)
e[c1] = 1;
e_num++;
if (e_num >= 128)
- break;
+ detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
}
if (! o[c2])
{
- o[c1] = 1;
+ o[c2] = 1;
o_num++;
if (o_num >= 128)
- break;
+ detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
}
}
- detect_info->rejected |= CATEGORY_MASK_UTF_16;
return 0;
}
{
ASSURE_DESTINATION (safe_room);
c = *charbuf++;
- if (c >= MAX_UNICODE_CHAR)
+ if (c > MAX_UNICODE_CHAR)
c = coding->default_char;
if (c < 0x10000)
int i = 1;
while (src < src_end)
{
+ src_base = src;
ONE_MORE_BYTE (c);
if (c < 0xA0)
- break;
+ {
+ src = src_base;
+ break;
+ }
i++;
}
while (1)
{
- int c1, c2;
+ int c1, c2, c3;
src_base = src;
consumed_chars_base = consumed_chars;
continue;
case ISO_single_shift_2_7:
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
+ goto invalid_code;
case ISO_single_shift_2:
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
goto invalid_code;
continue;
case '[': /* specification of direction */
- if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
goto invalid_code;
/* For the moment, nested direction is not supported.
So, `coding->mode & CODING_MODE_DIRECTION' zero means
}
/* Now we know CHARSET and 1st position code C1 of a character.
- Produce a decoded character while getting 2nd position code
- C2 if necessary. */
- c1 &= 0x7F;
+ Produce a decoded character while getting 2nd and 3rd
+ position codes C2, C3 if necessary. */
if (CHARSET_DIMENSION (charset) > 1)
{
ONE_MORE_BYTE (c2);
- if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
+ if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
+ || ((c1 & 0x80) != (c2 & 0x80)))
/* C2 is not in a valid range. */
goto invalid_code;
- c1 = (c1 << 8) | (c2 & 0x7F);
- if (CHARSET_DIMENSION (charset) > 2)
+ if (CHARSET_DIMENSION (charset) == 2)
+ c1 = (c1 << 8) | c2;
+ else
{
- ONE_MORE_BYTE (c2);
- if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
- /* C2 is not in a valid range. */
+ ONE_MORE_BYTE (c3);
+ if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
+ || ((c1 & 0x80) != (c3 & 0x80)))
+ /* C3 is not in a valid range. */
goto invalid_code;
- c1 = (c1 << 8) | (c2 & 0x7F);
+ c1 = (c1 << 16) | (c2 << 8) | c2;
}
}
-
+ c1 &= 0x7F7F7F;
CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
if (c < 0)
{
int consumed_chars = 0;
int found = 0;
int c;
+ Lisp_Object attrs, charset_list;
+ int max_first_byte_of_2_byte_code;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+ max_first_byte_of_2_byte_code
+ = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
detect_info->checked |= CATEGORY_MASK_SJIS;
/* A coding system of this category is always ASCII compatible. */
ONE_MORE_BYTE (c);
if (c < 0x80)
continue;
- if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
+ if ((c >= 0x81 && c <= 0x9F)
+ || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
{
ONE_MORE_BYTE (c);
if (c < 0x40 || c == 0x7F || c > 0xFC)
int c1, c2;
c1 = code >> 8;
- if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
+ if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
+ || c1 == 0x28
|| (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
{
JIS_TO_SJIS2 (code);
coding->max_charset_id = SCHARS (val) - 1;
coding->safe_charsets = SDATA (val);
coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
+ coding->carryover_bytes = 0;
coding_type = CODING_ATTR_TYPE (attrs);
if (EQ (coding_type, Qundecided))
Lisp_Object standard, translation_table;
Lisp_Object val;
+ if (NILP (Venable_character_translation))
+ {
+ if (max_lookup)
+ *max_lookup = 0;
+ return Qnil;
+ }
if (encodep)
translation_table = CODING_ATTR_ENCODE_TBL (attrs),
standard = Vstandard_translation_table_for_encode;
{
EMACS_INT bytes;
- if (coding->encoder == encode_coding_raw_text)
+ if (coding->encoder == encode_coding_raw_text
+ || coding->encoder == encode_coding_ccl)
c = *src++, pos++;
else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
EMACS_INT start_byte, end_byte;
const unsigned char *p, *pbeg, *pend;
int c;
- Lisp_Object tail, elt;
+ Lisp_Object tail, elt, work_table;
if (STRINGP (start))
{
while (p < pend && ASCII_BYTE_P (*p)) p++;
while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+ work_table = Fmake_char_table (Qnil, Qnil);
while (p < pend)
{
if (ASCII_BYTE_P (*p))
else
{
c = STRING_CHAR_ADVANCE (p);
+ if (!NILP (char_table_ref (work_table, c)))
+ /* This character was already checked. Ignore it. */
+ continue;
charset_map_loaded = 0;
for (tail = coding_attrs_list; CONSP (tail);)
p = pbeg + p_offset;
pend = pbeg + pend_offset;
}
+ char_table_set (work_table, c, Qt);
}
}
{
struct terminal *t = get_terminal (terminal, 1);
CHECK_SYMBOL (coding_system);
- setup_coding_system (Fcheck_coding_system (coding_system),
- TERMINAL_KEYBOARD_CODING (t));
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ else
+ Fcheck_coding_system (coding_system);
+ setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
/* Characer composition should be disabled. */
TERMINAL_KEYBOARD_CODING (t)->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK;
return Fnreverse (val);
}
-static char *suffixes[] = { "-unix", "-dos", "-mac" };
+static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
static Lisp_Object
make_subsidiaries (base)
Vcode_conversion_reused_workbuf = Qnil;
staticpro (&Vcode_conversion_workbuf_name);
- Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
+ Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
reused_workbuf_in_use = 0;
DEFSYM (Qcoding_system_error, "coding-system-error");
Fput (Qcoding_system_error, Qerror_conditions,
- Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
+ pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
Fput (Qcoding_system_error, Qerror_message,
- build_string ("Invalid coding system"));
+ make_pure_c_string ("Invalid coding system"));
/* Intern this now in case it isn't already done.
Setting this variable twice is harmless.
But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
DEFSYM (Qtranslation_table, "translation-table");
Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
staticpro (&Vcoding_category_table);
/* Followings are target of code detection. */
ASET (Vcoding_category_table, coding_category_iso_7,
- intern ("coding-category-iso-7"));
+ intern_c_string ("coding-category-iso-7"));
ASET (Vcoding_category_table, coding_category_iso_7_tight,
- intern ("coding-category-iso-7-tight"));
+ intern_c_string ("coding-category-iso-7-tight"));
ASET (Vcoding_category_table, coding_category_iso_8_1,
- intern ("coding-category-iso-8-1"));
+ intern_c_string ("coding-category-iso-8-1"));
ASET (Vcoding_category_table, coding_category_iso_8_2,
- intern ("coding-category-iso-8-2"));
+ intern_c_string ("coding-category-iso-8-2"));
ASET (Vcoding_category_table, coding_category_iso_7_else,
- intern ("coding-category-iso-7-else"));
+ intern_c_string ("coding-category-iso-7-else"));
ASET (Vcoding_category_table, coding_category_iso_8_else,
- intern ("coding-category-iso-8-else"));
+ intern_c_string ("coding-category-iso-8-else"));
ASET (Vcoding_category_table, coding_category_utf_8_auto,
- intern ("coding-category-utf-8-auto"));
+ intern_c_string ("coding-category-utf-8-auto"));
ASET (Vcoding_category_table, coding_category_utf_8_nosig,
- intern ("coding-category-utf-8"));
+ intern_c_string ("coding-category-utf-8"));
ASET (Vcoding_category_table, coding_category_utf_8_sig,
- intern ("coding-category-utf-8-sig"));
+ intern_c_string ("coding-category-utf-8-sig"));
ASET (Vcoding_category_table, coding_category_utf_16_be,
- intern ("coding-category-utf-16-be"));
+ intern_c_string ("coding-category-utf-16-be"));
ASET (Vcoding_category_table, coding_category_utf_16_auto,
- intern ("coding-category-utf-16-auto"));
+ intern_c_string ("coding-category-utf-16-auto"));
ASET (Vcoding_category_table, coding_category_utf_16_le,
- intern ("coding-category-utf-16-le"));
+ intern_c_string ("coding-category-utf-16-le"));
ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
- intern ("coding-category-utf-16-be-nosig"));
+ intern_c_string ("coding-category-utf-16-be-nosig"));
ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
- intern ("coding-category-utf-16-le-nosig"));
+ intern_c_string ("coding-category-utf-16-le-nosig"));
ASET (Vcoding_category_table, coding_category_charset,
- intern ("coding-category-charset"));
+ intern_c_string ("coding-category-charset"));
ASET (Vcoding_category_table, coding_category_sjis,
- intern ("coding-category-sjis"));
+ intern_c_string ("coding-category-sjis"));
ASET (Vcoding_category_table, coding_category_big5,
- intern ("coding-category-big5"));
+ intern_c_string ("coding-category-big5"));
ASET (Vcoding_category_table, coding_category_ccl,
- intern ("coding-category-ccl"));
+ intern_c_string ("coding-category-ccl"));
ASET (Vcoding_category_table, coding_category_emacs_mule,
- intern ("coding-category-emacs-mule"));
+ intern_c_string ("coding-category-emacs-mule"));
/* Followings are NOT target of code detection. */
ASET (Vcoding_category_table, coding_category_raw_text,
- intern ("coding-category-raw-text"));
+ intern_c_string ("coding-category-raw-text"));
ASET (Vcoding_category_table, coding_category_undecided,
- intern ("coding-category-undecided"));
+ intern_c_string ("coding-category-undecided"));
DEFSYM (Qinsufficient_source, "insufficient-source");
DEFSYM (Qinconsistent_eol, "inconsistent-eol");
DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
doc: /*
*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
- eol_mnemonic_unix = build_string (":");
+ eol_mnemonic_unix = make_pure_c_string (":");
DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
doc: /*
*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
- eol_mnemonic_dos = build_string ("\\");
+ eol_mnemonic_dos = make_pure_c_string ("\\");
DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
doc: /*
*String displayed in mode line for MAC-like (CR) end-of-line format. */);
- eol_mnemonic_mac = build_string ("/");
+ eol_mnemonic_mac = make_pure_c_string ("/");
DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
doc: /*
*String displayed in mode line when end-of-line format is not yet determined. */);
- eol_mnemonic_undecided = build_string (":");
+ eol_mnemonic_undecided = make_pure_c_string (":");
DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
doc: /*
for (i = 0; i < coding_arg_max; i++)
args[i] = Qnil;
- plist[0] = intern (":name");
+ plist[0] = intern_c_string (":name");
plist[1] = args[coding_arg_name] = Qno_conversion;
- plist[2] = intern (":mnemonic");
+ plist[2] = intern_c_string (":mnemonic");
plist[3] = args[coding_arg_mnemonic] = make_number ('=');
- plist[4] = intern (":coding-type");
+ plist[4] = intern_c_string (":coding-type");
plist[5] = args[coding_arg_coding_type] = Qraw_text;
- plist[6] = intern (":ascii-compatible-p");
+ plist[6] = intern_c_string (":ascii-compatible-p");
plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
- plist[8] = intern (":default-char");
+ plist[8] = intern_c_string (":default-char");
plist[9] = args[coding_arg_default_char] = make_number (0);
- plist[10] = intern (":for-unibyte");
+ plist[10] = intern_c_string (":for-unibyte");
plist[11] = args[coding_arg_for_unibyte] = Qt;
- plist[12] = intern (":docstring");
- plist[13] = build_string ("Do no conversion.\n\
+ plist[12] = intern_c_string (":docstring");
+ plist[13] = make_pure_c_string ("Do no conversion.\n\
\n\
When you visit a file with this coding, the file is read into a\n\
unibyte buffer as is, thus each byte of a file is treated as a\n\
character.");
- plist[14] = intern (":eol-type");
+ plist[14] = intern_c_string (":eol-type");
plist[15] = args[coding_arg_eol_type] = Qunix;
args[coding_arg_plist] = Flist (16, plist);
Fdefine_coding_system_internal (coding_arg_max, args);
plist[5] = args[coding_arg_coding_type] = Qundecided;
/* This is already set.
plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
- plist[8] = intern (":charset-list");
+ plist[8] = intern_c_string (":charset-list");
plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
plist[11] = args[coding_arg_for_unibyte] = Qnil;
- plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
+ plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
plist[15] = args[coding_arg_eol_type] = Qnil;
args[coding_arg_plist] = Flist (16, plist);
Fdefine_coding_system_internal (coding_arg_max, args);