/* Coding system handler (conversion, detection, and etc).
Copyright (C) 2001, 2002, 2003, 2004, 2005,
- 2006 Free Software Foundation, Inc.
- Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
+ 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
GNU Emacs is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
+the Free Software Foundation; either version 3, or (at your option)
any later version.
GNU Emacs is distributed in the hope that it will be useful,
/* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
- form if MULTIBYTEP is nonzero. */
+ form if MULTIBYTEP is nonzero. In addition, if SRC is not less
+ than SRC_END, return with RET. */
-#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \
+#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep, ret) \
do { \
if (src >= src_end) \
{ \
coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
+ return ret; \
} \
c1 = *src++; \
if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \
#include "coding.h"
#include "window.h"
#include "intervals.h"
+#include "frame.h"
+#include "termhooks.h"
#else /* not emacs */
Lisp_Object Qcoding_system_history;
Lisp_Object Qsafe_chars;
Lisp_Object Qvalid_codes;
+Lisp_Object Qascii_incompatible;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
Lisp_Object Qcall_process, Qcall_process_region;
Lisp_Object eol_mnemonic_undecided;
/* Format of end-of-line decided by system. This is CODING_EOL_LF on
- Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac. */
+ Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.
+ This has an effect only for external encoding (i.e. for output to
+ file and process), not for in-buffer or Lisp string encoding. */
int system_eol_type;
#ifdef emacs
/* Flag to make buffer-file-coding-system inherit from process-coding. */
int inherit_process_coding_system;
-/* Coding system to be used to encode text for terminal display. */
-struct coding_system terminal_coding;
-
/* Coding system to be used to encode text for terminal display when
terminal coding system is nil. */
struct coding_system safe_terminal_coding;
-/* Coding system of what is sent from terminal keyboard. */
-struct coding_system keyboard_coding;
-
/* Default coding system to be used to write a file. */
struct coding_system default_buffer_file_coding;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
-
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
+ CODING_CATEGORY_MASK_EMACS_MULE);
if (composing)
{
if (c < 0xA0)
composing = 0;
else if (c == 0xA0)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
c &= 0x7F;
}
else
}
}
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_EMACS_MULE;
}
Lisp_Object safe_chars;
reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
- while (mask && src < src_end)
+ while (mask)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
retry:
switch (c)
{
if (inhibit_iso_escape_detection)
break;
single_shifting = 0;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
if (c >= '(' && c <= '/')
{
/* Designation sequence for a charset of dimension 1. */
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, mask & mask_found);
if (c1 < ' ' || c1 >= 0x80
|| (charset = iso_charset_table[0][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
else if (c == '$')
{
/* Designation sequence for a charset of dimension 2. */
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
if (c >= '@' && c <= 'B')
/* Designation for JISX0208.1978, GB2312, or JISX0208. */
reg[0] = charset = iso_charset_table[1][0][c];
else if (c >= '(' && c <= '/')
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep,
+ mask & mask_found);
if (c1 < ' ' || c1 >= 0x80
|| (charset = iso_charset_table[1][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
c = -1;
while (src < src_end)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
+ mask & mask_found);
if (c < 0xA0)
break;
i++;
break;
}
}
- label_end_of_loop:
return (mask & mask_found);
}
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_SJIS);
if (c < 0x80)
continue;
if (c == 0x80 || c == 0xA0 || c > 0xEF)
return 0;
if (c <= 0x9F || c >= 0xE0)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
if (c < 0x40 || c == 0x7F || c > 0xFC)
return 0;
}
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_SJIS;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_BIG5);
if (c < 0x80)
continue;
if (c < 0xA1 || c > 0xFE)
return 0;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
return 0;
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_BIG5;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_UTF_8);
if (UTF_8_1_OCTET_P (c))
continue;
else if (UTF_8_2_OCTET_LEADING_P (c))
do
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
if (!UTF_8_EXTRA_OCTET_P (c))
return 0;
seq_maybe_bytes--;
}
while (seq_maybe_bytes > 0);
}
-
- label_end_of_loop:
- return CODING_CATEGORY_MASK_UTF_8;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
struct coding_system dummy_coding;
struct coding_system *coding = &dummy_coding;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, 0);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep, 0);
if ((c1 == 0xFF) && (c2 == 0xFE))
return CODING_CATEGORY_MASK_UTF_16_LE;
else if ((c1 == 0xFE) && (c2 == 0xFF))
return CODING_CATEGORY_MASK_UTF_16_BE;
-
- label_end_of_loop:
return 0;
}
valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_CCL);
if (! valid[c])
return 0;
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_CCL;
}
\f
{
coding->eol_type = CODING_EOL_UNDECIDED;
coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
+ if (system_eol_type != CODING_EOL_LF)
+ coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
}
else if (XFASTINT (eol_type) == 1)
{
= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
}
else
- coding->eol_type = system_eol_type;
+ {
+ coding->common_flags = 0;
+ coding->eol_type = CODING_EOL_LF;
+ }
coding_type = XVECTOR (coding_spec)->contents[0];
/* Try short cut. */
if (!NILP (val))
coding->composing = COMPOSITION_NO;
+ /* If the coding system is ascii-incompatible, record it in
+ common_flags. */
+ val = Fplist_get (plist, Qascii_incompatible);
+ if (! NILP (val))
+ coding->common_flags |= CODING_ASCII_INCOMPATIBLE_MASK;
+
switch (XFASTINT (coding_type))
{
case 0:
coding->type = coding_type_no_conversion;
coding->category_idx = CODING_CATEGORY_IDX_BINARY;
coding->common_flags = 0;
- coding->eol_type = NILP (coding_system) ? system_eol_type : CODING_EOL_LF;
+ coding->eol_type = CODING_EOL_UNDECIDED;
coding->pre_write_conversion = coding->post_read_conversion = Qnil;
- return -1;
+ return NILP (coding_system) ? 0 : -1;
}
/* Free memory blocks allocated for storing composition information. */
coding->consumed = coding->consumed_char = 0;
coding->errors = 0;
coding->result = CODING_FINISH_NORMAL;
+ if (coding->eol_type == CODING_EOL_UNDECIDED)
+ coding->eol_type = CODING_EOL_LF;
switch (coding->type)
{
inhibit_modification_hooks = saved_inhibit_modification_hooks;
}
+ coding->heading_ascii = 0;
+
if (! encodep && CODING_REQUIRE_DETECTION (coding))
{
/* We must detect encoding of text and eol format. */
/* As we are already in the work buffer, we must generate a new
buffer for the work. */
Lisp_Object name;
-
+
name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
buffer = buffer_to_kill = Fget_buffer_create (name);
buf = XBUFFER (buffer);
saved_coding_symbol = coding->symbol;
coding->src_multibyte = STRING_MULTIBYTE (str);
coding->dst_multibyte = 1;
+ coding->heading_ascii = 0;
+
if (CODING_REQUIRE_DETECTION (coding))
{
/* See the comments in code_convert_region. */
/* Try to skip the heading and tailing ASCIIs. We can't skip them
if we must run CCL program or there are compositions to
encode. */
+ coding->heading_ascii = 0;
if (coding->type != coding_type_ccl
&& (! coding->cmp_data || coding->cmp_data->used == 0))
{
}
if (!NILP (Fcoding_system_p (coding_system)))
return coding_system;
- while (1)
- Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+ xsignal1 (Qcoding_system_error, coding_system);
}
\f
Lisp_Object
the coding system `undecided' is specified. The list is ordered by
priority decided in the current language environment.
-If only ASCII characters are found, it returns a list of single element
+If only ASCII characters are found (except for such ISO-2022 control
+characters ISO-2022 as ESC), it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
end-of-line format.
`undecided' is specified. The list is ordered by priority decided in
the current language environment.
-If only ASCII characters are found, it returns a list of single element
+If only ASCII characters are found (except for such ISO-2022 control
+characters ISO-2022 as ESC), it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
end-of-line format.
}
DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
- doc: /* Encode a Japanese character CHAR to shift_jis encoding.
+ doc: /* Encode a Japanese character CH to shift_jis encoding.
Return the corresponding code in SJIS. */)
(ch)
Lisp_Object ch;
}
DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
- doc: /* Encode the Big5 character CHAR to BIG5 coding system.
+ doc: /* Encode the Big5 character CH to BIG5 coding system.
Return the corresponding character code in Big5. */)
(ch)
Lisp_Object ch;
}
\f
DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
- Sset_terminal_coding_system_internal, 1, 1, 0,
+ Sset_terminal_coding_system_internal, 1, 2, 0,
doc: /* Internal use only. */)
- (coding_system)
+ (coding_system, terminal)
Lisp_Object coding_system;
+ Lisp_Object terminal;
{
+ struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
CHECK_SYMBOL (coding_system);
- setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
+ setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
/* We had better not send unsafe characters to terminal. */
- terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
+ terminal_coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
/* Character composition should be disabled. */
- terminal_coding.composing = COMPOSITION_DISABLED;
+ terminal_coding->composing = COMPOSITION_DISABLED;
/* Error notification should be suppressed. */
- terminal_coding.suppress_error = 1;
- terminal_coding.src_multibyte = 1;
- terminal_coding.dst_multibyte = 0;
+ terminal_coding->suppress_error = 1;
+ terminal_coding->src_multibyte = 1;
+ terminal_coding->dst_multibyte = 0;
return Qnil;
}
}
DEFUN ("terminal-coding-system", Fterminal_coding_system,
- Sterminal_coding_system, 0, 0, 0,
- doc: /* Return coding system specified for terminal output. */)
- ()
+ Sterminal_coding_system, 0, 1, 0,
+ doc: /* Return coding system specified for terminal output on the given terminal.
+TERMINAL may be a terminal id, a frame, or nil for the selected
+frame's terminal device. */)
+ (terminal)
+ Lisp_Object terminal;
{
- return terminal_coding.symbol;
+ return TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1))->symbol;
}
DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
- Sset_keyboard_coding_system_internal, 1, 1, 0,
+ Sset_keyboard_coding_system_internal, 1, 2, 0,
doc: /* Internal use only. */)
- (coding_system)
+ (coding_system, terminal)
Lisp_Object coding_system;
+ Lisp_Object terminal;
{
+ struct terminal *t = get_terminal (terminal, 1);
CHECK_SYMBOL (coding_system);
- setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
+
+ setup_coding_system (Fcheck_coding_system (coding_system),
+ TERMINAL_KEYBOARD_CODING (t));
/* Character composition should be disabled. */
- keyboard_coding.composing = COMPOSITION_DISABLED;
+ TERMINAL_KEYBOARD_CODING (t)->composing = COMPOSITION_DISABLED;
return Qnil;
}
DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
- Skeyboard_coding_system, 0, 0, 0,
- doc: /* Return coding system specified for decoding keyboard input. */)
- ()
+ Skeyboard_coding_system, 0, 1, 0,
+ doc: /* Return coding system for decoding keyboard input on TERMINAL.
+TERMINAL may be a terminal id, a frame, or nil for the selected
+frame's terminal device. */)
+ (terminal)
+ Lisp_Object terminal;
{
- return keyboard_coding.symbol;
+ return TERMINAL_KEYBOARD_CODING (get_terminal (terminal, 1))->symbol;
}
\f
whichever argument specifies the file name is TARGET.
TARGET has a meaning which depends on OPERATION:
- For file I/O, TARGET is a file name.
+ For file I/O, TARGET is a file name (except for the special case below).
For process I/O, TARGET is a process name.
For network I/O, TARGET is a service name or a port number
or a function symbol to call.
In the last case, we call the function with one argument,
which is a list of all the arguments given to this function.
+If the function can't decide a coding system, it can return
+`undecided' so that the normal code-detection is performed.
-usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
+If OPERATION is `insert-file-contents', the argument corresponding to
+TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
+file name to look up, and BUFFER is a buffer that contains the file's
+contents (not yet decoded). If `file-coding-system-alist' specifies a
+function to call for FILENAME, that function should examine the
+contents of BUFFER instead of reading the file.
+
+usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
(nargs, args)
int nargs;
Lisp_Object *args;
target_idx = make_number (4);
target = args[XINT (target_idx) + 1];
if (!(STRINGP (target)
+ || (EQ (operation, Qinsert_file_contents) && CONSP (target)
+ && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
|| (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
error ("Invalid argument %d", XINT (target_idx) + 1);
+ if (CONSP (target))
+ target = XCAR (target);
chain = ((EQ (operation, Qinsert_file_contents)
|| EQ (operation, Qwrite_region))
return Fcons (val, val);
if (! NILP (Ffboundp (val)))
{
+ /* We use call1 rather than safe_call1
+ so as to get bug reports about functions called here
+ which don't handle the current interface. */
val = call1 (val, Flist (nargs, args));
if (CONSP (val))
return val;
Lisp_Object safe_chars, slot;
if (NILP (Fcheck_coding_system (coding_system)))
- Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+ xsignal1 (Qcoding_system_error, coding_system);
+
safe_chars = coding_safe_chars (coding_system);
if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
error ("No valid safe-chars property for %s",
SDATA (SYMBOL_NAME (coding_system)));
+
if (EQ (safe_chars, Qt))
{
if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
- setup_coding_system (Qnil, &keyboard_coding);
- setup_coding_system (Qnil, &terminal_coding);
setup_coding_system (Qnil, &safe_terminal_coding);
setup_coding_system (Qnil, &default_buffer_file_coding);
Qvalid_codes = intern ("valid-codes");
staticpro (&Qvalid_codes);
+ Qascii_incompatible = intern ("ascii-incompatible");
+ staticpro (&Qascii_incompatible);
+
Qemacs_mule = intern ("emacs-mule");
staticpro (&Qemacs_mule);
If VAL is a cons of coding systems, the car part is used for decoding,
and the cdr part is used for encoding.
If VAL is a function symbol, the function must return a coding system
-or a cons of coding systems which are used as above. The function gets
-the arguments with which `find-operation-coding-system' was called.
+or a cons of coding systems which are used as above. The function is
+called with an argument that is a list of the arguments with which
+`find-operation-coding-system' was called. If the function can't decide
+a coding system, it can return `undecided' so that the normal
+code-detection is performed.
See also the function `find-operation-coding-system'
and the variable `auto-coding-alist'. */);