You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
/*** TABLE OF CONTENTS ***
static void
decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
...
#include "ccl.h"
#include "coding.h"
#include "window.h"
+#include "intervals.h"
#else /* not emacs */
Lisp_Object Qvalid_codes;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
-Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
+Lisp_Object Qcall_process, Qcall_process_region;
Lisp_Object Qstart_process, Qopen_network_stream;
Lisp_Object Qtarget_idx;
+/* If a symbol has this property, evaluate the value to define the
+ symbol as a coding system. */
+Lisp_Object Qcoding_system_define_form;
+
Lisp_Object Vselect_safe_coding_system_function;
int coding_system_require_warning;
end-of-line format. */
Lisp_Object Qemacs_mule, Qraw_text;
+Lisp_Object Qutf_8;
+
/* Coding-systems are handed between Emacs Lisp programs and C internal
routines by the following three variables. */
/* Coding-system for reading files and receiving data from process. */
#define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \
do { \
int bytes; \
- \
+ \
c = SAFE_ONE_MORE_BYTE (); \
if (c < 0) \
break; \
break; \
*p++ = c; \
} \
- if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)) \
+ if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes) \
+ || (coding->flags /* We are recovering a file. */ \
+ && p0[0] == LEADING_CODE_8_BIT_CONTROL \
+ && ! CHAR_HEAD_P (p0[1]))) \
c = STRING_CHAR (p0, bytes); \
else \
c = -1; \
decode_composition_emacs_mule (coding, src, src_end,
destination, dst_end, dst_bytes)
struct coding_system *coding;
- unsigned char *src, *src_end, **destination, *dst_end;
+ const unsigned char *src, *src_end;
+ unsigned char **destination, *dst_end;
int dst_bytes;
{
unsigned char *dst = *destination;
int method, data_len, nchars;
- unsigned char *src_base = src++;
+ const unsigned char *src_base = src++;
/* Store components of composition. */
int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
int ncomponent;
else
{
int bytes;
- if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+ || (coding->flags /* We are recovering a file. */
+ && src[0] == LEADING_CODE_8_BIT_CONTROL
+ && ! CHAR_HEAD_P (src[1])))
c = STRING_CHAR (src, bytes);
else
c = *src, bytes = 1;
static void
decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* SRC_BASE remembers the start position in source in each loop.
The loop will be exited when there's not enough source code, or
when there's not enough destination area to produce a
character. */
- unsigned char *src_base;
+ const unsigned char *src_base;
coding->produced_char = 0;
while ((src_base = src) < src_end)
{
- unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
+ unsigned char tmp[MAX_MULTIBYTE_LENGTH];
+ const unsigned char *p;
int bytes;
if (*src == '\r')
p = tmp;
src++;
}
- else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+ || (coding->flags /* We are recovering a file. */
+ && src[0] == LEADING_CODE_8_BIT_CONTROL
+ && ! CHAR_HEAD_P (src[1])))
{
p = src;
src += bytes;
}
else
{
- bytes = CHAR_STRING (*src, tmp);
- p = tmp;
+ int i, c;
+
+ bytes = BYTES_BY_CHAR_HEAD (*src);
src++;
+ for (i = 1; i < bytes; i++)
+ {
+ ONE_MORE_BYTE (c);
+ if (CHAR_HEAD_P (c))
+ break;
+ }
+ if (i < bytes)
+ {
+ bytes = CHAR_STRING (*src_base, tmp);
+ p = tmp;
+ src = src_base + 1;
+ }
+ else
+ {
+ p = src_base;
+ }
}
if (dst + bytes >= (dst_bytes ? dst_end : src))
{
static void
encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
- unsigned char *src_base;
+ const unsigned char *src_base;
int c;
int char_offset;
int *data;
EMIT_ONE_BYTE ('\r');
}
else if (SINGLE_BYTE_CHAR_P (c))
- EMIT_ONE_BYTE (c);
+ {
+ if (coding->flags && ! ASCII_BYTE_P (c))
+ {
+ /* As we are auto saving, retain the multibyte form for
+ 8-bit chars. */
+ unsigned char buf[MAX_MULTIBYTE_LENGTH];
+ int bytes = CHAR_STRING (c, buf);
+
+ if (bytes == 1)
+ EMIT_ONE_BYTE (buf[0]);
+ else
+ EMIT_TWO_BYTES (buf[0], buf[1]);
+ }
+ else
+ EMIT_ONE_BYTE (c);
+ }
else
EMIT_BYTES (src_base, src);
coding->consumed_char++;
coding->cmp_data->next = cmp_data;
coding->cmp_data = cmp_data;
coding->cmp_data_start = 0;
+ coding->composing = COMPOSITION_NO;
}
/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
static void
decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* Charsets invoked to graphic plane 0 and 1 respectively. */
(within macro ONE_MORE_BYTE), or when there's not enough
destination area to produce a character (within macro
EMIT_CHAR). */
- unsigned char *src_base;
+ const unsigned char *src_base;
int c, charset;
Lisp_Object translation_table;
Lisp_Object safe_chars;
while (1)
{
- int c1, c2;
+ int c1, c2 = 0;
src_base = src;
ONE_MORE_BYTE (c1);
}
continue;
+ case '%':
+ if (COMPOSING_P (coding))
+ DECODE_COMPOSITION_END ('1');
+ ONE_MORE_BYTE (c1);
+ if (c1 == '/')
+ {
+ /* CTEXT extended segment:
+ ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
+ We keep these bytes as is for the moment.
+ They may be decoded by post-read-conversion. */
+ int dim, M, L;
+ int size, required;
+ int produced_chars;
+
+ ONE_MORE_BYTE (dim);
+ ONE_MORE_BYTE (M);
+ ONE_MORE_BYTE (L);
+ size = ((M - 128) * 128) + (L - 128);
+ required = 8 + size * 2;
+ if (dst + required > (dst_bytes ? dst_end : src))
+ goto label_end_of_loop;
+ *dst++ = ISO_CODE_ESC;
+ *dst++ = '%';
+ *dst++ = '/';
+ *dst++ = dim;
+ produced_chars = 4;
+ dst += CHAR_STRING (M, dst), produced_chars++;
+ dst += CHAR_STRING (L, dst), produced_chars++;
+ while (size-- > 0)
+ {
+ ONE_MORE_BYTE (c1);
+ dst += CHAR_STRING (c1, dst), produced_chars++;
+ }
+ coding->produced_char += produced_chars;
+ }
+ else if (c1 == 'G')
+ {
+ unsigned char *d = dst;
+ int produced_chars;
+
+ /* XFree86 extension for embedding UTF-8 in CTEXT:
+ ESC % G --UTF-8-BYTES-- ESC % @
+ We keep these bytes as is for the moment.
+ They may be decoded by post-read-conversion. */
+ if (d + 6 > (dst_bytes ? dst_end : src))
+ goto label_end_of_loop;
+ *d++ = ISO_CODE_ESC;
+ *d++ = '%';
+ *d++ = 'G';
+ produced_chars = 3;
+ while (d + 1 < (dst_bytes ? dst_end : src))
+ {
+ ONE_MORE_BYTE (c1);
+ if (c1 == ISO_CODE_ESC
+ && src + 1 < src_end
+ && src[0] == '%'
+ && src[1] == '@')
+ {
+ src += 2;
+ break;
+ }
+ d += CHAR_STRING (c1, d), produced_chars++;
+ }
+ if (d + 3 > (dst_bytes ? dst_end : src))
+ goto label_end_of_loop;
+ *d++ = ISO_CODE_ESC;
+ *d++ = '%';
+ *d++ = '@';
+ dst = d;
+ coding->produced_char += produced_chars + 3;
+ }
+ else
+ goto label_invalid_code;
+ continue;
+
default:
if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
goto label_invalid_code;
DECODE_COMPOSITION_END ('1');
src = src_base;
c = *src++;
+ if (! NILP (translation_table))
+ c = translate_char (translation_table, c, 0, 0, 0);
EMIT_CHAR (c);
}
encode_designation_at_bol (coding, translation_table, src, src_end, dst)
struct coding_system *coding;
Lisp_Object translation_table;
- unsigned char *src, *src_end, *dst;
+ const unsigned char *src, *src_end;
+ unsigned char *dst;
{
int charset, c, found = 0, reg;
/* Table of charsets to be designated to each graphic register. */
static void
encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* Since the maximum bytes produced by each loop is 20, we subtract 19
analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
there's not enough destination area to produce encoded codes
(within macro EMIT_BYTES). */
- unsigned char *src_base;
+ const unsigned char *src_base;
int c;
Lisp_Object translation_table;
Lisp_Object safe_chars;
decode_coding_sjis_big5 (coding, source, destination,
src_bytes, dst_bytes, sjis_p)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
int sjis_p;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
+ const unsigned char *src = source;
+ const unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* SRC_BASE remembers the start position in source in each loop.
(within macro ONE_MORE_BYTE), or when there's not enough
destination area to produce a character (within macro
EMIT_CHAR). */
- unsigned char *src_base;
+ const unsigned char *src_base;
Lisp_Object translation_table;
if (NILP (Venable_character_translation))
coding->produced_char = 0;
while (1)
{
- int c, charset, c1, c2;
+ int c, charset, c1, c2 = 0;
src_base = src;
ONE_MORE_BYTE (c1);
static void
decode_eol (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
- unsigned char *source, *destination;
+ const unsigned char *source;
+ unsigned char *destination;
int src_bytes, dst_bytes;
{
- unsigned char *src = source;
+ const unsigned char *src = source;
unsigned char *dst = destination;
- unsigned char *src_end = src + src_bytes;
+ const unsigned char *src_end = src + src_bytes;
unsigned char *dst_end = dst + dst_bytes;
Lisp_Object translation_table;
/* SRC_BASE remembers the start position in source in each loop.
(within macro ONE_MORE_BYTE), or when there's not enough
destination area to produce a character (within macro
EMIT_CHAR). */
- unsigned char *src_base;
+ const unsigned char *src_base;
int c;
translation_table = Qnil;
o coding-category-utf-8
The category for a coding system which has the same code range
- as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
+ as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
symbol) `utf-8' by default.
o coding-category-utf-16-be
int magnification;
if (coding->type == coding_type_ccl)
- magnification = coding->spec.ccl.encoder.buf_magnification;
+ {
+ magnification = coding->spec.ccl.encoder.buf_magnification;
+ if (coding->eol_type == CODING_EOL_CRLF)
+ magnification *= 2;
+ }
else if (CODING_REQUIRE_ENCODING (coding))
magnification = 3;
else
unsigned char *data;
};
-/* Don't use alloca for allocating memory space larger than this, lest
- we overflow their stack. */
-#define MAX_ALLOCA 16*1024
-
/* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */
#define allocate_conversion_buffer(buf, len) \
do { \
if (ccl->eol_type ==CODING_EOL_UNDECIDED)
ccl->eol_type = CODING_EOL_LF;
ccl->cr_consumed = coding->spec.ccl.cr_carryover;
+ ccl->eight_bit_control = coding->dst_multibyte;
}
+ else
+ ccl->eight_bit_control = 1;
ccl->multibyte = coding->src_multibyte;
if (coding->spec.ccl.eight_bit_carryover[0] != 0)
{
} \
} while (0)
+/* ARG is (CODING . BUFFER) where CODING is what to be set in
+ Vlast_coding_system_used and BUFFER if non-nil is a buffer to
+ kill. */
static Lisp_Object
code_convert_region_unwind (arg)
Lisp_Object arg;
{
inhibit_pre_post_conversion = 0;
- Vlast_coding_system_used = arg;
+ Vlast_coding_system_used = XCAR (arg);
+ if (! NILP (XCDR (arg)))
+ Fkill_buffer (XCDR (arg));
return Qnil;
}
enum composition_method method = (enum composition_method) data[3];
Lisp_Object components;
+ if (data[0] < 0 || i + data[0] > cmp_data->used)
+ /* Invalid composition data. */
+ break;
+
if (method == COMPOSITION_RELATIVE)
components = Qnil;
else
if (method == COMPOSITION_WITH_RULE_ALTCHARS
&& len % 2 == 0)
len --;
+ if (len < 1)
+ /* Invalid composition data. */
+ break;
for (j = 0; j < len; j++)
args[j] = make_number (data[4 + j]);
components = (method == COMPOSITION_WITH_ALTCHARS
- ? Fstring (len, args) : Fvector (len, args));
+ ? Fstring (len, args)
+ : Fvector (len, args));
}
compose_text (data[1], data[2], components, Qnil, obj);
}
Lisp_Object new;
record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
+ Fcons (Vlast_coding_system_used, Qnil));
/* We should not call any more pre-write/post-read-conversion
functions while this pre-write-conversion is running. */
inhibit_pre_post_conversion = 1;
coding_allocate_composition_data (coding, from);
}
- /* Try to skip the heading and tailing ASCIIs. */
- if (coding->type != coding_type_ccl)
+ /* Try to skip the heading and tailing ASCIIs. We can't skip them
+ if we must run CCL program or there are compositions to
+ encode. */
+ if (coding->type != coding_type_ccl
+ && (! coding->cmp_data || coding->cmp_data->used == 0))
{
int from_byte_orig = from_byte, to_byte_orig = to_byte;
if (!replace)
/* We must record and adjust for this new text now. */
adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
+ coding_free_composition_data (coding);
return 0;
}
REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
Here, we are sure that NEW >= ORIG. */
- float ratio;
if (coding->produced <= coding->consumed)
{
}
else
{
- ratio = (coding->produced - coding->consumed) / coding->consumed;
+ float ratio = coding->produced - coding->consumed;
+ ratio /= coding->consumed;
require = len_byte * ratio;
}
first = 0;
TEMP_SET_PT_BOTH (from, from_byte);
prev_Z = Z;
record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
+ Fcons (Vlast_coding_system_used, Qnil));
saved_coding_system = Vlast_coding_system_used;
Vlast_coding_system_used = coding->symbol;
/* We should not call any more pre-write/post-read-conversion
return 0;
}
+/* Name (or base name) of work buffer for code conversion. */
+static Lisp_Object Vcode_conversion_workbuf_name;
+
+/* Set the current buffer to the working buffer prepared for
+ code-conversion. MULTIBYTE specifies the multibyteness of the
+ buffer. Return the buffer we set if it must be killed after use.
+ Otherwise return Qnil. */
+
+static Lisp_Object
+set_conversion_work_buffer (multibyte)
+ int multibyte;
+{
+ Lisp_Object buffer, buffer_to_kill;
+ struct buffer *buf;
+
+ buffer = Fget_buffer_create (Vcode_conversion_workbuf_name);
+ buf = XBUFFER (buffer);
+ if (buf == current_buffer)
+ {
+ /* As we are already in the work buffer, we must generate a new
+ buffer for the work. */
+ Lisp_Object name;
+
+ name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
+ buffer = buffer_to_kill = Fget_buffer_create (name);
+ buf = XBUFFER (buffer);
+ }
+ else
+ buffer_to_kill = Qnil;
+
+ delete_all_overlays (buf);
+ buf->directory = current_buffer->directory;
+ buf->read_only = Qnil;
+ buf->filename = Qnil;
+ buf->undo_list = Qt;
+ eassert (buf->overlays_before == NULL);
+ eassert (buf->overlays_after == NULL);
+ set_buffer_internal (buf);
+ if (BEG != BEGV || Z != ZV)
+ Fwiden ();
+ del_range_2 (BEG, BEG_BYTE, Z, Z_BYTE, 0);
+ buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
+ return buffer_to_kill;
+}
+
Lisp_Object
run_pre_post_conversion_on_str (str, coding, encodep)
Lisp_Object str;
int count = SPECPDL_INDEX ();
struct gcpro gcpro1, gcpro2;
int multibyte = STRING_MULTIBYTE (str);
- Lisp_Object buffer;
- struct buffer *buf;
Lisp_Object old_deactivate_mark;
+ Lisp_Object buffer_to_kill;
record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
- record_unwind_protect (code_convert_region_unwind,
- Vlast_coding_system_used);
/* It is not crucial to specbind this. */
old_deactivate_mark = Vdeactivate_mark;
GCPRO2 (str, old_deactivate_mark);
- buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
- buf = XBUFFER (buffer);
-
- buf->directory = current_buffer->directory;
- buf->read_only = Qnil;
- buf->filename = Qnil;
- buf->undo_list = Qt;
- buf->overlays_before = Qnil;
- buf->overlays_after = Qnil;
-
- set_buffer_internal (buf);
/* We must insert the contents of STR as is without
unibyte<->multibyte conversion. For that, we adjust the
multibyteness of the working buffer to that of STR. */
- Ferase_buffer ();
- buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
+ buffer_to_kill = set_conversion_work_buffer (multibyte);
+ record_unwind_protect (code_convert_region_unwind,
+ Fcons (Vlast_coding_system_used, buffer_to_kill));
insert_from_string (str, 0, 0,
SCHARS (str), SBYTES (str), 0);
return unbind_to (count, str);
}
+
+/* Run pre-write-conversion function of CODING on NCHARS/NBYTES
+ text in *STR. *SIZE is the allocated bytes for STR. As it
+ is intended that this function is called from encode_terminal_code,
+ the pre-write-conversion function is run by safe_call and thus
+ "Error during redisplay: ..." is logged when an error occurs.
+
+ Store the resulting text in *STR and set CODING->produced_char and
+ CODING->produced to the number of characters and bytes
+ respectively. If the size of *STR is too small, enlarge it by
+ xrealloc and update *STR and *SIZE. */
+
+void
+run_pre_write_conversin_on_c_str (str, size, nchars, nbytes, coding)
+ unsigned char **str;
+ int *size, nchars, nbytes;
+ struct coding_system *coding;
+{
+ struct gcpro gcpro1, gcpro2;
+ struct buffer *cur = current_buffer;
+ Lisp_Object old_deactivate_mark, old_last_coding_system_used;
+ Lisp_Object args[3];
+ Lisp_Object buffer_to_kill;
+
+ /* It is not crucial to specbind this. */
+ old_deactivate_mark = Vdeactivate_mark;
+ old_last_coding_system_used = Vlast_coding_system_used;
+ GCPRO2 (old_deactivate_mark, old_last_coding_system_used);
+
+ /* We must insert the contents of STR as is without
+ unibyte<->multibyte conversion. For that, we adjust the
+ multibyteness of the working buffer to that of STR. */
+ buffer_to_kill = set_conversion_work_buffer (coding->src_multibyte);
+ insert_1_both (*str, nchars, nbytes, 0, 0, 0);
+ UNGCPRO;
+ inhibit_pre_post_conversion = 1;
+ args[0] = coding->pre_write_conversion;
+ args[1] = make_number (BEG);
+ args[2] = make_number (Z);
+ safe_call (3, args);
+ inhibit_pre_post_conversion = 0;
+ Vdeactivate_mark = old_deactivate_mark;
+ Vlast_coding_system_used = old_last_coding_system_used;
+ coding->produced_char = Z - BEG;
+ coding->produced = Z_BYTE - BEG_BYTE;
+ if (coding->produced > *size)
+ {
+ *size = coding->produced;
+ *str = xrealloc (*str, *size);
+ }
+ if (BEG < GPT && GPT < Z)
+ move_gap (BEG);
+ bcopy (BEG_ADDR, *str, coding->produced);
+ coding->src_multibyte
+ = ! NILP (current_buffer->enable_multibyte_characters);
+ set_buffer_internal (cur);
+ if (! NILP (buffer_to_kill))
+ Fkill_buffer (buffer_to_kill);
+}
+
+
Lisp_Object
decode_coding_string (str, coding, nocopy)
Lisp_Object str;
produced += coding->produced;
produced_char += coding->produced_char;
if (result == CODING_FINISH_NORMAL
+ || result == CODING_FINISH_INTERRUPT
|| (result == CODING_FINISH_INSUFFICIENT_SRC
&& coding->consumed == 0))
break;
shrinked_bytes - from);
free_conversion_buffer (&buf);
+ coding->consumed += shrinked_bytes;
+ coding->consumed_char += shrinked_bytes;
+ coding->produced += shrinked_bytes;
+ coding->produced_char += shrinked_bytes;
+
if (coding->cmp_data && coding->cmp_data->used)
coding_restore_composition (coding, newstr);
coding_free_composition_data (coding);
if (SYMBOLP (coding->pre_write_conversion)
&& !NILP (Ffboundp (coding->pre_write_conversion)))
- str = run_pre_post_conversion_on_str (str, coding, 1);
+ {
+ str = run_pre_post_conversion_on_str (str, coding, 1);
+ /* As STR is just newly generated, we don't have to copy it
+ anymore. */
+ nocopy = 1;
+ }
from = 0;
to = SCHARS (str);
/* Encoding routines determine the multibyteness of the source text
by coding->src_multibyte. */
- coding->src_multibyte = STRING_MULTIBYTE (str);
+ coding->src_multibyte = SCHARS (str) < SBYTES (str);
coding->dst_multibyte = 0;
if (! CODING_REQUIRE_ENCODING (coding))
- {
- coding->consumed = SBYTES (str);
- coding->consumed_char = SCHARS (str);
- if (STRING_MULTIBYTE (str))
- {
- str = Fstring_as_unibyte (str);
- nocopy = 1;
- }
- coding->produced = SBYTES (str);
- coding->produced_char = SCHARS (str);
- return (nocopy ? str : Fcopy_sequence (str));
- }
+ goto no_need_of_encoding;
if (coding->composing != COMPOSITION_DISABLED)
coding_save_composition (coding, from, to, str);
- /* Try to skip the heading and tailing ASCIIs. */
- if (coding->type != coding_type_ccl)
+ /* Try to skip the heading and tailing ASCIIs. We can't skip them
+ if we must run CCL program or there are compositions to
+ encode. */
+ if (coding->type != coding_type_ccl
+ && (! coding->cmp_data || coding->cmp_data->used == 0))
{
SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
1);
if (from == to_byte)
- return (nocopy ? str : Fcopy_sequence (str));
+ {
+ coding_free_composition_data (coding);
+ goto no_need_of_encoding;
+ }
shrinked_bytes = from + (SBYTES (str) - to_byte);
}
produced += coding->produced;
produced_char += coding->produced_char;
if (result == CODING_FINISH_NORMAL
+ || result == CODING_FINISH_INTERRUPT
|| (result == CODING_FINISH_INSUFFICIENT_SRC
&& coding->consumed == 0))
break;
coding_free_composition_data (coding);
return newstr;
+
+ no_need_of_encoding:
+ coding->consumed = SBYTES (str);
+ coding->consumed_char = SCHARS (str);
+ if (STRING_MULTIBYTE (str))
+ {
+ if (nocopy)
+ /* We are sure that STR doesn't contain a multibyte
+ character. */
+ STRING_SET_UNIBYTE (str);
+ else
+ {
+ str = Fstring_as_unibyte (str);
+ nocopy = 1;
+ }
+ }
+ coding->produced = SBYTES (str);
+ coding->produced_char = SCHARS (str);
+ return (nocopy ? str : Fcopy_sequence (str));
}
\f
return Qt;
if (!SYMBOLP (obj))
return Qnil;
+ if (! NILP (Fget (obj, Qcoding_system_define_form)))
+ return Qt;
/* Get coding-spec vector for OBJ. */
obj = Fget (obj, Qcoding_system);
return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
1, 1, 0,
doc: /* Check validity of CODING-SYSTEM.
If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
-It is valid if it is a symbol with a non-nil `coding-system' property.
-The value of property should be a vector of length 5. */)
+It is valid if it is nil or a symbol with a non-nil `coding-system' property.
+The value of this property should be a vector of length 5. */)
(coding_system)
Lisp_Object coding_system;
{
- CHECK_SYMBOL (coding_system);
+ Lisp_Object define_form;
+
+ define_form = Fget (coding_system, Qcoding_system_define_form);
+ if (! NILP (define_form))
+ {
+ Fput (coding_system, Qcoding_system_define_form, Qnil);
+ safe_eval (define_form);
+ }
if (!NILP (Fcoding_system_p (coding_system)))
return coding_system;
while (1)
STRING_MULTIBYTE (string));
}
-/* Subroutine for Fsafe_coding_systems_region_internal.
+/* Subroutine for Ffind_coding_systems_region_internal.
Return a list of coding systems that safely encode the multibyte
text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of
possible coding systems. If it is nil, it means that we have not
yet found any coding systems.
- WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An
- element of WORK_TABLE is set to t once the element is looked up.
+ WORK_TABLE a char-table of which element is set to t once the
+ element is looked up.
If a non-ASCII single byte char is found, set
*single_byte_char_found to 1. */
Lisp_Object val, ch;
Lisp_Object prev, tail;
+ if (NILP (safe_codings))
+ goto done_safe_codings;
while (p < pend)
{
c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
continue;
if (SINGLE_BYTE_CHAR_P (c))
*single_byte_char_found = 1;
- if (NILP (safe_codings))
- /* Already all coding systems are excluded. But, we can't
- terminate the loop here because non-ASCII single-byte char
- must be found. */
- continue;
/* Check the safe coding systems for C. */
ch = make_number (c);
val = Faref (work_table, ch);
accept_latin_extra));
}
}
-
+
if (! encodable
&& ((CHAR_TABLE_P (translation_table)
&& ! NILP (Faref (translation_table, ch)))
{
/* Exclude this coding system from SAFE_CODINGS. */
if (EQ (tail, safe_codings))
- safe_codings = XCDR (safe_codings);
+ {
+ safe_codings = XCDR (safe_codings);
+ if (NILP (safe_codings))
+ goto done_safe_codings;
+ }
else
XSETCDR (prev, XCDR (tail));
}
}
}
+
+ done_safe_codings:
+ /* If the above loop was terminated before P reaches PEND, it means
+ SAFE_CODINGS was set to nil. If we have not yet found an
+ non-ASCII single-byte char, check it now. */
+ if (! *single_byte_char_found)
+ while (p < pend)
+ {
+ c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+ p += len;
+ if (! ASCII_BYTE_P (c)
+ && SINGLE_BYTE_CHAR_P (c))
+ {
+ *single_byte_char_found = 1;
+ break;
+ }
+ }
return safe_codings;
}
/* Character composition should be disabled. */
safe_terminal_coding.composing = COMPOSITION_DISABLED;
/* Error notification should be suppressed. */
- terminal_coding.suppress_error = 1;
+ safe_terminal_coding.suppress_error = 1;
safe_terminal_coding.src_multibyte = 1;
safe_terminal_coding.dst_multibyte = 0;
return Qnil;
void
syms_of_coding ()
{
+ staticpro (&Vcode_conversion_workbuf_name);
+ Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
+
Qtarget_idx = intern ("target-idx");
staticpro (&Qtarget_idx);
Qraw_text = intern ("raw-text");
staticpro (&Qraw_text);
+ Qutf_8 = intern ("utf-8");
+ staticpro (&Qutf_8);
+
+ Qcoding_system_define_form = intern ("coding-system-define-form");
+ staticpro (&Qcoding_system_define_form);
+
defsubr (&Scoding_system_p);
defsubr (&Sread_coding_system);
defsubr (&Sread_non_nil_coding_system);
On detecting a coding system, Emacs tries code detection algorithms
associated with each coding-category one by one in this order. When
one algorithm agrees with a byte sequence of source text, the coding
-system bound to the corresponding coding-category is selected. */);
+system bound to the corresponding coding-category is selected.
+
+Don't modify this variable directly, but use `set-coding-priority'. */);
{
int i;
#endif /* emacs */
+/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
+ (do not change this comment) */