/* Coding system handler (conversion, detection, and etc).
- Copyright (C) 1995,97,1998,2002,2003 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
- Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2002, 2003, 2004, 2005,
+ 2006 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H14PRO021
This file is part of GNU Emacs.
You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
/*** TABLE OF CONTENTS ***
static void
decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
...
Lisp_Object Qvalid_codes;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
-Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
+Lisp_Object Qcall_process, Qcall_process_region;
Lisp_Object Qstart_process, Qopen_network_stream;
Lisp_Object Qtarget_idx;
c = -1; \
else \
{ \
- c -= 0xA0; \
+ c -= 0x80; \
*p++ = c; \
} \
} \
decode_composition_emacs_mule (coding, src, src_end,
destination, dst_end, dst_bytes)
struct coding_system *coding;
- unsigned char *src, *src_end, **destination, *dst_end;
+ const unsigned char *src, *src_end;
+ unsigned char **destination, *dst_end;
int dst_bytes;
{
unsigned char *dst = *destination;
int method, data_len, nchars;
- unsigned char *src_base = src++;
+ const unsigned char *src_base = src++;
/* Store components of composition. */
int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
int ncomponent;
component[ncomponent] = c;
}
}
- else
+ else if (c >= 0x80)
{
/* This may be an old Emacs 20 style format. See the comment at
the section 2 of this file. */
else
return 0;
}
+ else
+ return 0;
if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
{
static void
decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* SRC_BASE remembers the start position in source in each loop.
The loop will be exited when there's not enough source code, or
when there's not enough destination area to produce a
character. */
- unsigned char *src_base;
+ const unsigned char *src_base;
coding->produced_char = 0;
while ((src_base = src) < src_end)
{
- unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
+ unsigned char tmp[MAX_MULTIBYTE_LENGTH];
+ const unsigned char *p;
int bytes;
if (*src == '\r')
static void
encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
- unsigned char *src_base;
+ const unsigned char *src_base;
int c;
int char_offset;
int *data;
static void
decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* Charsets invoked to graphic plane 0 and 1 respectively. */
(within macro ONE_MORE_BYTE), or when there's not enough
destination area to produce a character (within macro
EMIT_CHAR). */
- unsigned char *src_base;
+ const unsigned char *src_base;
int c, charset;
Lisp_Object translation_table;
Lisp_Object safe_chars;
DECODE_COMPOSITION_END ('1');
src = src_base;
c = *src++;
+ if (! NILP (translation_table))
+ c = translate_char (translation_table, c, 0, 0, 0);
EMIT_CHAR (c);
}
encode_designation_at_bol (coding, translation_table, src, src_end, dst)
struct coding_system *coding;
Lisp_Object translation_table;
- unsigned char *src, *src_end, *dst;
+ const unsigned char *src, *src_end;
+ unsigned char *dst;
{
int charset, c, found = 0, reg;
/* Table of charsets to be designated to each graphic register. */
static void
encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* Since the maximum bytes produced by each loop is 20, we subtract 19
analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
there's not enough destination area to produce encoded codes
(within macro EMIT_BYTES). */
- unsigned char *src_base;
+ const unsigned char *src_base;
int c;
Lisp_Object translation_table;
Lisp_Object safe_chars;
decode_coding_sjis_big5 (coding, source, destination,
src_bytes, dst_bytes, sjis_p)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
int sjis_p;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* SRC_BASE remembers the start position in source in each loop.
(within macro ONE_MORE_BYTE), or when there's not enough
destination area to produce a character (within macro
EMIT_CHAR). */
- unsigned char *src_base;
+ const unsigned char *src_base;
Lisp_Object translation_table;
if (NILP (Venable_character_translation))
static void
decode_eol (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
+ const unsigned char *src = source;
unsigned char *dst = destination;
- unsigned char *src_end = src + src_bytes;
+ const unsigned char *src_end = src + src_bytes;
unsigned char *dst_end = dst + dst_bytes;
Lisp_Object translation_table;
/* SRC_BASE remembers the start position in source in each loop.
(within macro ONE_MORE_BYTE), or when there's not enough
destination area to produce a character (within macro
EMIT_CHAR). */
- unsigned char *src_base;
+ const unsigned char *src_base;
int c;
translation_table = Qnil;
{
coding->eol_type = CODING_EOL_UNDECIDED;
coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
+ if (system_eol_type != CODING_EOL_LF)
+ coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
}
else if (XFASTINT (eol_type) == 1)
{
coding->type = coding_type_no_conversion;
coding->category_idx = CODING_CATEGORY_IDX_BINARY;
coding->common_flags = 0;
- coding->eol_type = CODING_EOL_LF;
+ coding->eol_type = NILP (coding_system) ? system_eol_type : CODING_EOL_LF;
+ if (coding->eol_type != CODING_EOL_LF)
+ coding->common_flags
+ |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
coding->pre_write_conversion = coding->post_read_conversion = Qnil;
- return -1;
+ return NILP (coding_system) ? 0 : -1;
}
/* Free memory blocks allocated for storing composition information. */
coding->consumed = coding->consumed_char = 0;
coding->errors = 0;
coding->result = CODING_FINISH_NORMAL;
+ if (coding->eol_type == CODING_EOL_UNDECIDED)
+ coding->eol_type = system_eol_type;
switch (coding->type)
{
if (coding->type == coding_type_ccl
|| coding->eol_type == CODING_EOL_CRLF
|| coding->eol_type == CODING_EOL_CR
+ || (coding->eol_type == CODING_EOL_UNDECIDED
+ && system_eol_type != CODING_EOL_LF)
|| (coding->cmp_data && coding->cmp_data->used > 0))
{
/* We can't skip any data. */
} \
} while (0)
+/* ARG is (CODING BUFFER ...) where CODING is what to be set in
+ Vlast_coding_system_used and the remaining elements are buffers to
+ kill. */
static Lisp_Object
code_convert_region_unwind (arg)
Lisp_Object arg;
{
+ struct gcpro gcpro1;
+ GCPRO1 (arg);
+
inhibit_pre_post_conversion = 0;
- Vlast_coding_system_used = arg;
+ Vlast_coding_system_used = XCAR (arg);
+ for (arg = XCDR (arg); ! NILP (arg); arg = XCDR (arg))
+ Fkill_buffer (XCAR (arg));
+
+ UNGCPRO;
return Qnil;
}
Lisp_Object new;
record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
+ Fcons (Vlast_coding_system_used, Qnil));
/* We should not call any more pre-write/post-read-conversion
functions while this pre-write-conversion is running. */
inhibit_pre_post_conversion = 1;
REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
Here, we are sure that NEW >= ORIG. */
- float ratio;
if (coding->produced <= coding->consumed)
{
}
else
{
- ratio = (coding->produced - coding->consumed) / coding->consumed;
+ float ratio = coding->produced - coding->consumed;
+ ratio /= coding->consumed;
require = len_byte * ratio;
}
first = 0;
TEMP_SET_PT_BOTH (from, from_byte);
prev_Z = Z;
record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
+ Fcons (Vlast_coding_system_used, Qnil));
saved_coding_system = Vlast_coding_system_used;
Vlast_coding_system_used = coding->symbol;
/* We should not call any more pre-write/post-read-conversion
/* Set the current buffer to the working buffer prepared for
code-conversion. MULTIBYTE specifies the multibyteness of the
- buffer. */
+ buffer. Return the buffer we set if it must be killed after use.
+ Otherwise return Qnil. */
-static struct buffer *
+static Lisp_Object
set_conversion_work_buffer (multibyte)
int multibyte;
{
- Lisp_Object buffer;
+ Lisp_Object buffer, buffer_to_kill;
struct buffer *buf;
buffer = Fget_buffer_create (Vcode_conversion_workbuf_name);
buf = XBUFFER (buffer);
+ if (buf == current_buffer)
+ {
+ /* As we are already in the work buffer, we must generate a new
+ buffer for the work. */
+ Lisp_Object name;
+
+ name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
+ buffer = buffer_to_kill = Fget_buffer_create (name);
+ buf = XBUFFER (buffer);
+ }
+ else
+ buffer_to_kill = Qnil;
+
delete_all_overlays (buf);
buf->directory = current_buffer->directory;
buf->read_only = Qnil;
Fwiden ();
del_range_2 (BEG, BEG_BYTE, Z, Z_BYTE, 0);
buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
- return buf;
+ return buffer_to_kill;
}
Lisp_Object
int count = SPECPDL_INDEX ();
struct gcpro gcpro1, gcpro2;
int multibyte = STRING_MULTIBYTE (str);
- struct buffer *buf;
Lisp_Object old_deactivate_mark;
+ Lisp_Object buffer_to_kill;
+ Lisp_Object unwind_arg;
record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
- record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
/* It is not crucial to specbind this. */
old_deactivate_mark = Vdeactivate_mark;
GCPRO2 (str, old_deactivate_mark);
/* We must insert the contents of STR as is without
unibyte<->multibyte conversion. For that, we adjust the
multibyteness of the working buffer to that of STR. */
- set_conversion_work_buffer (multibyte);
+ buffer_to_kill = set_conversion_work_buffer (multibyte);
+ if (NILP (buffer_to_kill))
+ unwind_arg = Fcons (Vlast_coding_system_used, Qnil);
+ else
+ unwind_arg = list2 (Vlast_coding_system_used, buffer_to_kill);
+ record_unwind_protect (code_convert_region_unwind, unwind_arg);
insert_from_string (str, 0, 0,
SCHARS (str), SBYTES (str), 0);
UNGCPRO;
inhibit_pre_post_conversion = 1;
if (encodep)
- call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
+ {
+ struct buffer *prev = current_buffer;
+
+ call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
+ if (prev != current_buffer)
+ /* We must kill the current buffer too. */
+ Fsetcdr (unwind_arg, Fcons (Fcurrent_buffer (), XCDR (unwind_arg)));
+ }
else
{
Vlast_coding_system_used = coding->symbol;
{
struct gcpro gcpro1, gcpro2;
struct buffer *cur = current_buffer;
+ struct buffer *prev;
Lisp_Object old_deactivate_mark, old_last_coding_system_used;
Lisp_Object args[3];
+ Lisp_Object buffer_to_kill;
/* It is not crucial to specbind this. */
old_deactivate_mark = Vdeactivate_mark;
/* We must insert the contents of STR as is without
unibyte<->multibyte conversion. For that, we adjust the
multibyteness of the working buffer to that of STR. */
- set_conversion_work_buffer (coding->src_multibyte);
+ buffer_to_kill = set_conversion_work_buffer (coding->src_multibyte);
insert_1_both (*str, nchars, nbytes, 0, 0, 0);
UNGCPRO;
inhibit_pre_post_conversion = 1;
+ prev = current_buffer;
args[0] = coding->pre_write_conversion;
args[1] = make_number (BEG);
args[2] = make_number (Z);
bcopy (BEG_ADDR, *str, coding->produced);
coding->src_multibyte
= ! NILP (current_buffer->enable_multibyte_characters);
+ if (prev != current_buffer)
+ Fkill_buffer (Fcurrent_buffer ());
set_buffer_internal (cur);
+ if (! NILP (buffer_to_kill))
+ Fkill_buffer (buffer_to_kill);
}
produced += coding->produced;
produced_char += coding->produced_char;
if (result == CODING_FINISH_NORMAL
+ || result == CODING_FINISH_INTERRUPT
|| (result == CODING_FINISH_INSUFFICIENT_SRC
&& coding->consumed == 0))
break;
if (SYMBOLP (coding->pre_write_conversion)
&& !NILP (Ffboundp (coding->pre_write_conversion)))
- str = run_pre_post_conversion_on_str (str, coding, 1);
+ {
+ str = run_pre_post_conversion_on_str (str, coding, 1);
+ /* As STR is just newly generated, we don't have to copy it
+ anymore. */
+ nocopy = 1;
+ }
from = 0;
to = SCHARS (str);
/* Encoding routines determine the multibyteness of the source text
by coding->src_multibyte. */
- coding->src_multibyte = STRING_MULTIBYTE (str);
+ coding->src_multibyte = SCHARS (str) < SBYTES (str);
coding->dst_multibyte = 0;
if (! CODING_REQUIRE_ENCODING (coding))
- {
- coding->consumed = SBYTES (str);
- coding->consumed_char = SCHARS (str);
- if (STRING_MULTIBYTE (str))
- {
- str = Fstring_as_unibyte (str);
- nocopy = 1;
- }
- coding->produced = SBYTES (str);
- coding->produced_char = SCHARS (str);
- return (nocopy ? str : Fcopy_sequence (str));
- }
+ goto no_need_of_encoding;
if (coding->composing != COMPOSITION_DISABLED)
coding_save_composition (coding, from, to, str);
if (from == to_byte)
{
coding_free_composition_data (coding);
- return (nocopy ? str : Fcopy_sequence (str));
+ goto no_need_of_encoding;
}
shrinked_bytes = from + (SBYTES (str) - to_byte);
}
coding_free_composition_data (coding);
return newstr;
+
+ no_need_of_encoding:
+ coding->consumed = SBYTES (str);
+ coding->consumed_char = SCHARS (str);
+ if (STRING_MULTIBYTE (str))
+ {
+ if (nocopy)
+ /* We are sure that STR doesn't contain a multibyte
+ character. */
+ STRING_SET_UNIBYTE (str);
+ else
+ {
+ str = Fstring_as_unibyte (str);
+ nocopy = 1;
+ }
+ }
+ coding->produced = SBYTES (str);
+ coding->produced_char = SCHARS (str);
+ return (nocopy ? str : Fcopy_sequence (str));
}
\f
STRING_MULTIBYTE (string));
}
-/* Subroutine for Fsafe_coding_systems_region_internal.
+/* Subroutine for Ffind_coding_systems_region_internal.
Return a list of coding systems that safely encode the multibyte
text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of
from = XFASTINT (start);
to = XFASTINT (end);
- if (NILP (coding_system))
+ if (NILP (coding_system) && system_eol_type == CODING_EOL_LF)
return make_number (to - from);
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
CHECK_STRING (string);
CHECK_SYMBOL (coding_system);
- if (NILP (coding_system))
+ if (NILP (coding_system) && system_eol_type == CODING_EOL_LF)
return (NILP (nocopy) ? Fcopy_sequence (string) : string);
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
CHECK_STRING (string);
CHECK_SYMBOL (coding_system);
- if (NILP (coding_system))
+ if (NILP (coding_system) && system_eol_type == CODING_EOL_LF)
return string;
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
On detecting a coding system, Emacs tries code detection algorithms
associated with each coding-category one by one in this order. When
one algorithm agrees with a byte sequence of source text, the coding
-system bound to the corresponding coding-category is selected. */);
+system bound to the corresponding coding-category is selected.
+
+Don't modify this variable directly, but use `set-coding-priority'. */);
{
int i;