/* Coding system handler (conversion, detection, and etc).
Copyright (C) 2001, 2002, 2003, 2004, 2005,
- 2006 Free Software Foundation, Inc.
- Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
+ 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
GNU Emacs is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
+the Free Software Foundation; either version 3, or (at your option)
any later version.
GNU Emacs is distributed in the hope that it will be useful,
/* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
- form if MULTIBYTEP is nonzero. */
+ form if MULTIBYTEP is nonzero. In addition, if SRC is not less
+ than SRC_END, return with RET. */
-#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \
+#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep, ret) \
do { \
if (src >= src_end) \
{ \
coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
+ return ret; \
} \
c1 = *src++; \
if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
-
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
+ CODING_CATEGORY_MASK_EMACS_MULE);
if (composing)
{
if (c < 0xA0)
composing = 0;
else if (c == 0xA0)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
c &= 0x7F;
}
else
}
}
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_EMACS_MULE;
}
Lisp_Object safe_chars;
reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
- while (mask && src < src_end)
+ while (mask)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
retry:
switch (c)
{
if (inhibit_iso_escape_detection)
break;
single_shifting = 0;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
if (c >= '(' && c <= '/')
{
/* Designation sequence for a charset of dimension 1. */
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, mask & mask_found);
if (c1 < ' ' || c1 >= 0x80
|| (charset = iso_charset_table[0][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
else if (c == '$')
{
/* Designation sequence for a charset of dimension 2. */
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
if (c >= '@' && c <= 'B')
/* Designation for JISX0208.1978, GB2312, or JISX0208. */
reg[0] = charset = iso_charset_table[1][0][c];
else if (c >= '(' && c <= '/')
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep,
+ mask & mask_found);
if (c1 < ' ' || c1 >= 0x80
|| (charset = iso_charset_table[1][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
c = -1;
while (src < src_end)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
+ mask & mask_found);
if (c < 0xA0)
break;
i++;
break;
}
}
- label_end_of_loop:
return (mask & mask_found);
}
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_SJIS);
if (c < 0x80)
continue;
if (c == 0x80 || c == 0xA0 || c > 0xEF)
return 0;
if (c <= 0x9F || c >= 0xE0)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
if (c < 0x40 || c == 0x7F || c > 0xFC)
return 0;
}
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_SJIS;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_BIG5);
if (c < 0x80)
continue;
if (c < 0xA1 || c > 0xFE)
return 0;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
return 0;
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_BIG5;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_UTF_8);
if (UTF_8_1_OCTET_P (c))
continue;
else if (UTF_8_2_OCTET_LEADING_P (c))
do
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
if (!UTF_8_EXTRA_OCTET_P (c))
return 0;
seq_maybe_bytes--;
}
while (seq_maybe_bytes > 0);
}
-
- label_end_of_loop:
- return CODING_CATEGORY_MASK_UTF_8;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
struct coding_system dummy_coding;
struct coding_system *coding = &dummy_coding;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, 0);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep, 0);
if ((c1 == 0xFF) && (c2 == 0xFE))
return CODING_CATEGORY_MASK_UTF_16_LE;
else if ((c1 == 0xFE) && (c2 == 0xFF))
return CODING_CATEGORY_MASK_UTF_16_BE;
-
- label_end_of_loop:
return 0;
}
valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_CCL);
if (! valid[c])
return 0;
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_CCL;
}
\f
inhibit_modification_hooks = saved_inhibit_modification_hooks;
}
+ coding->heading_ascii = 0;
+
if (! encodep && CODING_REQUIRE_DETECTION (coding))
{
/* We must detect encoding of text and eol format. */
/* As we are already in the work buffer, we must generate a new
buffer for the work. */
Lisp_Object name;
-
+
name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
buffer = buffer_to_kill = Fget_buffer_create (name);
buf = XBUFFER (buffer);
saved_coding_symbol = coding->symbol;
coding->src_multibyte = STRING_MULTIBYTE (str);
coding->dst_multibyte = 1;
+ coding->heading_ascii = 0;
+
if (CODING_REQUIRE_DETECTION (coding))
{
/* See the comments in code_convert_region. */
/* Try to skip the heading and tailing ASCIIs. We can't skip them
if we must run CCL program or there are compositions to
encode. */
+ coding->heading_ascii = 0;
if (coding->type != coding_type_ccl
&& (! coding->cmp_data || coding->cmp_data->used == 0))
{
}
if (!NILP (Fcoding_system_p (coding_system)))
return coding_system;
- while (1)
- Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+ xsignal1 (Qcoding_system_error, coding_system);
}
\f
Lisp_Object
the coding system `undecided' is specified. The list is ordered by
priority decided in the current language environment.
-If only ASCII characters are found, it returns a list of single element
+If only ASCII characters are found (except for such ISO-2022 control
+characters ISO-2022 as ESC), it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
end-of-line format.
`undecided' is specified. The list is ordered by priority decided in
the current language environment.
-If only ASCII characters are found, it returns a list of single element
+If only ASCII characters are found (except for such ISO-2022 control
+characters ISO-2022 as ESC), it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
end-of-line format.
}
DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
- doc: /* Encode a Japanese character CHAR to shift_jis encoding.
+ doc: /* Encode a Japanese character CH to shift_jis encoding.
Return the corresponding code in SJIS. */)
(ch)
Lisp_Object ch;
}
DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
- doc: /* Encode the Big5 character CHAR to BIG5 coding system.
+ doc: /* Encode the Big5 character CH to BIG5 coding system.
Return the corresponding character code in Big5. */)
(ch)
Lisp_Object ch;
whichever argument specifies the file name is TARGET.
TARGET has a meaning which depends on OPERATION:
- For file I/O, TARGET is a file name.
+ For file I/O, TARGET is a file name (except for the special case below).
For process I/O, TARGET is a process name.
For network I/O, TARGET is a service name or a port number
or a function symbol to call.
In the last case, we call the function with one argument,
which is a list of all the arguments given to this function.
+If the function can't decide a coding system, it can return
+`undecided' so that the normal code-detection is performed.
+
+If OPERATION is `insert-file-contents', the argument corresponding to
+TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
+file name to look up, and BUFFER is a buffer that contains the file's
+contents (not yet decoded). If `file-coding-system-alist' specifies a
+function to call for FILENAME, that function should examine the
+contents of BUFFER instead of reading the file.
-usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
+usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
(nargs, args)
int nargs;
Lisp_Object *args;
target_idx = make_number (4);
target = args[XINT (target_idx) + 1];
if (!(STRINGP (target)
+ || (EQ (operation, Qinsert_file_contents) && CONSP (target)
+ && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
|| (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
error ("Invalid argument %d", XINT (target_idx) + 1);
+ if (CONSP (target))
+ target = XCAR (target);
chain = ((EQ (operation, Qinsert_file_contents)
|| EQ (operation, Qwrite_region))
return Fcons (val, val);
if (! NILP (Ffboundp (val)))
{
+ /* We use call1 rather than safe_call1
+ so as to get bug reports about functions called here
+ which don't handle the current interface. */
val = call1 (val, Flist (nargs, args));
if (CONSP (val))
return val;
Lisp_Object safe_chars, slot;
if (NILP (Fcheck_coding_system (coding_system)))
- Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+ xsignal1 (Qcoding_system_error, coding_system);
+
safe_chars = coding_safe_chars (coding_system);
if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
error ("No valid safe-chars property for %s",
SDATA (SYMBOL_NAME (coding_system)));
+
if (EQ (safe_chars, Qt))
{
if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
If VAL is a cons of coding systems, the car part is used for decoding,
and the cdr part is used for encoding.
If VAL is a function symbol, the function must return a coding system
-or a cons of coding systems which are used as above. The function gets
-the arguments with which `find-operation-coding-system' was called.
+or a cons of coding systems which are used as above. The function is
+called with an argument that is a list of the arguments with which
+`find-operation-coding-system' was called. If the function can't decide
+a coding system, it can return `undecided' so that the normal
+code-detection is performed.
See also the function `find-operation-coding-system'
and the variable `auto-coding-alist'. */);