X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/38b92c4260400393c5cfe1394b7c01488a6d5b4b..cda4c00bef6556b851f007b22818060a55b4bb43:/src/coding.c diff --git a/src/coding.c b/src/coding.c index 62c761c5bc..bd96b5cc06 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1,7 +1,9 @@ /* Coding system handler (conversion, detection, and etc). - Copyright (C) 1995,97,1998,2002,2003 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. - Copyright (C) 2001,2002,2003 Free Software Foundation, Inc. + Copyright (C) 2001, 2002, 2003, 2004, 2005, + 2006 Free Software Foundation, Inc. + Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H14PRO021 This file is part of GNU Emacs. @@ -17,8 +19,8 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Emacs; see the file COPYING. If not, write to -the Free Software Foundation, Inc., 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ +the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ /*** TABLE OF CONTENTS *** @@ -147,7 +149,8 @@ detect_coding_emacs_mule (src, src_end, multibytep) static void decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { ... @@ -362,7 +365,7 @@ Lisp_Object Qsafe_chars; Lisp_Object Qvalid_codes; extern Lisp_Object Qinsert_file_contents, Qwrite_region; -Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; +Lisp_Object Qcall_process, Qcall_process_region; Lisp_Object Qstart_process, Qopen_network_stream; Lisp_Object Qtarget_idx; @@ -732,7 +735,7 @@ detect_coding_emacs_mule (src, src_end, multibytep) c = -1; \ else \ { \ - c -= 0xA0; \ + c -= 0x80; \ *p++ = c; \ } \ } \ @@ -800,12 +803,13 @@ static INLINE int decode_composition_emacs_mule (coding, src, src_end, destination, dst_end, dst_bytes) struct coding_system *coding; - unsigned char *src, *src_end, **destination, *dst_end; + const unsigned char *src, *src_end; + unsigned char **destination, *dst_end; int dst_bytes; { unsigned char *dst = *destination; int method, data_len, nchars; - unsigned char *src_base = src++; + const unsigned char *src_base = src++; /* Store components of composition. */ int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH]; int ncomponent; @@ -869,7 +873,7 @@ decode_composition_emacs_mule (coding, src, src_end, component[ncomponent] = c; } } - else + else if (c >= 0x80) { /* This may be an old Emacs 20 style format. See the comment at the section 2 of this file. */ @@ -921,6 +925,8 @@ decode_composition_emacs_mule (coding, src, src_end, else return 0; } + else + return 0; if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src)) { @@ -946,23 +952,25 @@ decode_composition_emacs_mule (coding, src, src_end, static void decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* SRC_BASE remembers the start position in source in each loop. The loop will be exited when there's not enough source code, or when there's not enough destination area to produce a character. */ - unsigned char *src_base; + const unsigned char *src_base; coding->produced_char = 0; while ((src_base = src) < src_end) { - unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p; + unsigned char tmp[MAX_MULTIBYTE_LENGTH]; + const unsigned char *p; int bytes; if (*src == '\r') @@ -1024,9 +1032,26 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) } else { - bytes = CHAR_STRING (*src, tmp); - p = tmp; + int i, c; + + bytes = BYTES_BY_CHAR_HEAD (*src); src++; + for (i = 1; i < bytes; i++) + { + ONE_MORE_BYTE (c); + if (CHAR_HEAD_P (c)) + break; + } + if (i < bytes) + { + bytes = CHAR_STRING (*src_base, tmp); + p = tmp; + src = src_base + 1; + } + else + { + p = src_base; + } } if (dst + bytes >= (dst_bytes ? dst_end : src)) { @@ -1099,14 +1124,15 @@ static void encode_eol P_ ((struct coding_system *, const unsigned char *, static void encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; - unsigned char *src_base; + const unsigned char *src_base; int c; int char_offset; int *data; @@ -1793,11 +1819,12 @@ coding_allocate_composition_data (coding, char_offset) static void decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* Charsets invoked to graphic plane 0 and 1 respectively. */ @@ -1808,7 +1835,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) (within macro ONE_MORE_BYTE), or when there's not enough destination area to produce a character (within macro EMIT_CHAR). */ - unsigned char *src_base; + const unsigned char *src_base; int c, charset; Lisp_Object translation_table; Lisp_Object safe_chars; @@ -2081,7 +2108,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) int dim, M, L; int size, required; int produced_chars; - + ONE_MORE_BYTE (dim); ONE_MORE_BYTE (M); ONE_MORE_BYTE (L); @@ -2185,6 +2212,8 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) DECODE_COMPOSITION_END ('1'); src = src_base; c = *src++; + if (! NILP (translation_table)) + c = translate_char (translation_table, c, 0, 0, 0); EMIT_CHAR (c); } @@ -2579,7 +2608,8 @@ static unsigned char * encode_designation_at_bol (coding, translation_table, src, src_end, dst) struct coding_system *coding; Lisp_Object translation_table; - unsigned char *src, *src_end, *dst; + const unsigned char *src, *src_end; + unsigned char *dst; { int charset, c, found = 0, reg; /* Table of charsets to be designated to each graphic register. */ @@ -2620,11 +2650,12 @@ encode_designation_at_bol (coding, translation_table, src, src_end, dst) static void encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* Since the maximum bytes produced by each loop is 20, we subtract 19 @@ -2636,7 +2667,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) analyze multi-byte codes (within macro ONE_MORE_CHAR), or when there's not enough destination area to produce encoded codes (within macro EMIT_BYTES). */ - unsigned char *src_base; + const unsigned char *src_base; int c; Lisp_Object translation_table; Lisp_Object safe_chars; @@ -3030,12 +3061,13 @@ static void decode_coding_sjis_big5 (coding, source, destination, src_bytes, dst_bytes, sjis_p) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; int sjis_p; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* SRC_BASE remembers the start position in source in each loop. @@ -3043,7 +3075,7 @@ decode_coding_sjis_big5 (coding, source, destination, (within macro ONE_MORE_BYTE), or when there's not enough destination area to produce a character (within macro EMIT_CHAR). */ - unsigned char *src_base; + const unsigned char *src_base; Lisp_Object translation_table; if (NILP (Venable_character_translation)) @@ -3303,12 +3335,13 @@ detect_coding_ccl (src, src_end, multibytep) static void decode_eol (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; + const unsigned char *src = source; unsigned char *dst = destination; - unsigned char *src_end = src + src_bytes; + const unsigned char *src_end = src + src_bytes; unsigned char *dst_end = dst + dst_bytes; Lisp_Object translation_table; /* SRC_BASE remembers the start position in source in each loop. @@ -3316,7 +3349,7 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) (within macro ONE_MORE_BYTE), or when there's not enough destination area to produce a character (within macro EMIT_CHAR). */ - unsigned char *src_base; + const unsigned char *src_base; int c; translation_table = Qnil; @@ -3574,6 +3607,8 @@ setup_coding_system (coding_system, coding) { coding->eol_type = CODING_EOL_UNDECIDED; coding->common_flags = CODING_REQUIRE_DETECTION_MASK; + if (system_eol_type != CODING_EOL_LF) + coding->common_flags |= CODING_REQUIRE_ENCODING_MASK; } else if (XFASTINT (eol_type) == 1) { @@ -3889,9 +3924,12 @@ setup_coding_system (coding_system, coding) coding->type = coding_type_no_conversion; coding->category_idx = CODING_CATEGORY_IDX_BINARY; coding->common_flags = 0; - coding->eol_type = CODING_EOL_LF; + coding->eol_type = NILP (coding_system) ? system_eol_type : CODING_EOL_LF; + if (coding->eol_type != CODING_EOL_LF) + coding->common_flags + |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; coding->pre_write_conversion = coding->post_read_conversion = Qnil; - return -1; + return NILP (coding_system) ? 0 : -1; } /* Free memory blocks allocated for storing composition information. */ @@ -4535,10 +4573,6 @@ struct conversion_buffer unsigned char *data; }; -/* Don't use alloca for allocating memory space larger than this, lest - we overflow their stack. */ -#define MAX_ALLOCA 16*1024 - /* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */ #define allocate_conversion_buffer(buf, len) \ do { \ @@ -4969,6 +5003,8 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes) coding->consumed = coding->consumed_char = 0; coding->errors = 0; coding->result = CODING_FINISH_NORMAL; + if (coding->eol_type == CODING_EOL_UNDECIDED) + coding->eol_type = system_eol_type; switch (coding->type) { @@ -5225,6 +5261,8 @@ shrink_encoding_region (beg, end, coding, str) if (coding->type == coding_type_ccl || coding->eol_type == CODING_EOL_CRLF || coding->eol_type == CODING_EOL_CR + || (coding->eol_type == CODING_EOL_UNDECIDED + && system_eol_type != CODING_EOL_LF) || (coding->cmp_data && coding->cmp_data->used > 0)) { /* We can't skip any data. */ @@ -5328,12 +5366,22 @@ static int shrink_conversion_region_threshhold = 1024; } \ } while (0) +/* ARG is (CODING BUFFER ...) where CODING is what to be set in + Vlast_coding_system_used and the remaining elements are buffers to + kill. */ static Lisp_Object code_convert_region_unwind (arg) Lisp_Object arg; { + struct gcpro gcpro1; + GCPRO1 (arg); + inhibit_pre_post_conversion = 0; - Vlast_coding_system_used = arg; + Vlast_coding_system_used = XCAR (arg); + for (arg = XCDR (arg); ! NILP (arg); arg = XCDR (arg)) + Fkill_buffer (XCAR (arg)); + + UNGCPRO; return Qnil; } @@ -5586,7 +5634,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) Lisp_Object new; record_unwind_protect (code_convert_region_unwind, - Vlast_coding_system_used); + Fcons (Vlast_coding_system_used, Qnil)); /* We should not call any more pre-write/post-read-conversion functions while this pre-write-conversion is running. */ inhibit_pre_post_conversion = 1; @@ -5637,8 +5685,11 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) coding_allocate_composition_data (coding, from); } - /* Try to skip the heading and tailing ASCIIs. */ - if (coding->type != coding_type_ccl) + /* Try to skip the heading and tailing ASCIIs. We can't skip them + if we must run CCL program or there are compositions to + encode. */ + if (coding->type != coding_type_ccl + && (! coding->cmp_data || coding->cmp_data->used == 0)) { int from_byte_orig = from_byte, to_byte_orig = to_byte; @@ -5654,6 +5705,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) if (!replace) /* We must record and adjust for this new text now. */ adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); + coding_free_composition_data (coding); return 0; } @@ -5864,7 +5916,6 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG) REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG Here, we are sure that NEW >= ORIG. */ - float ratio; if (coding->produced <= coding->consumed) { @@ -5874,7 +5925,8 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) } else { - ratio = (coding->produced - coding->consumed) / coding->consumed; + float ratio = coding->produced - coding->consumed; + ratio /= coding->consumed; require = len_byte * ratio; } first = 0; @@ -5950,7 +6002,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) TEMP_SET_PT_BOTH (from, from_byte); prev_Z = Z; record_unwind_protect (code_convert_region_unwind, - Vlast_coding_system_used); + Fcons (Vlast_coding_system_used, Qnil)); saved_coding_system = Vlast_coding_system_used; Vlast_coding_system_used = coding->symbol; /* We should not call any more pre-write/post-read-conversion @@ -5991,6 +6043,51 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) return 0; } +/* Name (or base name) of work buffer for code conversion. */ +static Lisp_Object Vcode_conversion_workbuf_name; + +/* Set the current buffer to the working buffer prepared for + code-conversion. MULTIBYTE specifies the multibyteness of the + buffer. Return the buffer we set if it must be killed after use. + Otherwise return Qnil. */ + +static Lisp_Object +set_conversion_work_buffer (multibyte) + int multibyte; +{ + Lisp_Object buffer, buffer_to_kill; + struct buffer *buf; + + buffer = Fget_buffer_create (Vcode_conversion_workbuf_name); + buf = XBUFFER (buffer); + if (buf == current_buffer) + { + /* As we are already in the work buffer, we must generate a new + buffer for the work. */ + Lisp_Object name; + + name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); + buffer = buffer_to_kill = Fget_buffer_create (name); + buf = XBUFFER (buffer); + } + else + buffer_to_kill = Qnil; + + delete_all_overlays (buf); + buf->directory = current_buffer->directory; + buf->read_only = Qnil; + buf->filename = Qnil; + buf->undo_list = Qt; + eassert (buf->overlays_before == NULL); + eassert (buf->overlays_after == NULL); + set_buffer_internal (buf); + if (BEG != BEGV || Z != ZV) + Fwiden (); + del_range_2 (BEG, BEG_BYTE, Z, Z_BYTE, 0); + buf->enable_multibyte_characters = multibyte ? Qt : Qnil; + return buffer_to_kill; +} + Lisp_Object run_pre_post_conversion_on_str (str, coding, encodep) Lisp_Object str; @@ -6000,41 +6097,38 @@ run_pre_post_conversion_on_str (str, coding, encodep) int count = SPECPDL_INDEX (); struct gcpro gcpro1, gcpro2; int multibyte = STRING_MULTIBYTE (str); - Lisp_Object buffer; - struct buffer *buf; Lisp_Object old_deactivate_mark; + Lisp_Object buffer_to_kill; + Lisp_Object unwind_arg; record_unwind_protect (Fset_buffer, Fcurrent_buffer ()); - record_unwind_protect (code_convert_region_unwind, - Vlast_coding_system_used); /* It is not crucial to specbind this. */ old_deactivate_mark = Vdeactivate_mark; GCPRO2 (str, old_deactivate_mark); - buffer = Fget_buffer_create (build_string (" *code-converting-work*")); - buf = XBUFFER (buffer); - - delete_all_overlays (buf); - buf->directory = current_buffer->directory; - buf->read_only = Qnil; - buf->filename = Qnil; - buf->undo_list = Qt; - eassert (buf->overlays_before == NULL); - eassert (buf->overlays_after == NULL); - - set_buffer_internal (buf); /* We must insert the contents of STR as is without unibyte<->multibyte conversion. For that, we adjust the multibyteness of the working buffer to that of STR. */ - Ferase_buffer (); - buf->enable_multibyte_characters = multibyte ? Qt : Qnil; + buffer_to_kill = set_conversion_work_buffer (multibyte); + if (NILP (buffer_to_kill)) + unwind_arg = Fcons (Vlast_coding_system_used, Qnil); + else + unwind_arg = list2 (Vlast_coding_system_used, buffer_to_kill); + record_unwind_protect (code_convert_region_unwind, unwind_arg); insert_from_string (str, 0, 0, SCHARS (str), SBYTES (str), 0); UNGCPRO; inhibit_pre_post_conversion = 1; if (encodep) - call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z)); + { + struct buffer *prev = current_buffer; + + call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z)); + if (prev != current_buffer) + /* We must kill the current buffer too. */ + Fsetcdr (unwind_arg, Fcons (Fcurrent_buffer (), XCDR (unwind_arg))); + } else { Vlast_coding_system_used = coding->symbol; @@ -6048,6 +6142,71 @@ run_pre_post_conversion_on_str (str, coding, encodep) return unbind_to (count, str); } + +/* Run pre-write-conversion function of CODING on NCHARS/NBYTES + text in *STR. *SIZE is the allocated bytes for STR. As it + is intended that this function is called from encode_terminal_code, + the pre-write-conversion function is run by safe_call and thus + "Error during redisplay: ..." is logged when an error occurs. + + Store the resulting text in *STR and set CODING->produced_char and + CODING->produced to the number of characters and bytes + respectively. If the size of *STR is too small, enlarge it by + xrealloc and update *STR and *SIZE. */ + +void +run_pre_write_conversin_on_c_str (str, size, nchars, nbytes, coding) + unsigned char **str; + int *size, nchars, nbytes; + struct coding_system *coding; +{ + struct gcpro gcpro1, gcpro2; + struct buffer *cur = current_buffer; + struct buffer *prev; + Lisp_Object old_deactivate_mark, old_last_coding_system_used; + Lisp_Object args[3]; + Lisp_Object buffer_to_kill; + + /* It is not crucial to specbind this. */ + old_deactivate_mark = Vdeactivate_mark; + old_last_coding_system_used = Vlast_coding_system_used; + GCPRO2 (old_deactivate_mark, old_last_coding_system_used); + + /* We must insert the contents of STR as is without + unibyte<->multibyte conversion. For that, we adjust the + multibyteness of the working buffer to that of STR. */ + buffer_to_kill = set_conversion_work_buffer (coding->src_multibyte); + insert_1_both (*str, nchars, nbytes, 0, 0, 0); + UNGCPRO; + inhibit_pre_post_conversion = 1; + prev = current_buffer; + args[0] = coding->pre_write_conversion; + args[1] = make_number (BEG); + args[2] = make_number (Z); + safe_call (3, args); + inhibit_pre_post_conversion = 0; + Vdeactivate_mark = old_deactivate_mark; + Vlast_coding_system_used = old_last_coding_system_used; + coding->produced_char = Z - BEG; + coding->produced = Z_BYTE - BEG_BYTE; + if (coding->produced > *size) + { + *size = coding->produced; + *str = xrealloc (*str, *size); + } + if (BEG < GPT && GPT < Z) + move_gap (BEG); + bcopy (BEG_ADDR, *str, coding->produced); + coding->src_multibyte + = ! NILP (current_buffer->enable_multibyte_characters); + if (prev != current_buffer) + Fkill_buffer (Fcurrent_buffer ()); + set_buffer_internal (cur); + if (! NILP (buffer_to_kill)) + Fkill_buffer (buffer_to_kill); +} + + Lisp_Object decode_coding_string (str, coding, nocopy) Lisp_Object str; @@ -6156,6 +6315,7 @@ decode_coding_string (str, coding, nocopy) produced += coding->produced; produced_char += coding->produced_char; if (result == CODING_FINISH_NORMAL + || result == CODING_FINISH_INTERRUPT || (result == CODING_FINISH_INSUFFICIENT_SRC && coding->consumed == 0)) break; @@ -6225,6 +6385,11 @@ decode_coding_string (str, coding, nocopy) shrinked_bytes - from); free_conversion_buffer (&buf); + coding->consumed += shrinked_bytes; + coding->consumed_char += shrinked_bytes; + coding->produced += shrinked_bytes; + coding->produced_char += shrinked_bytes; + if (coding->cmp_data && coding->cmp_data->used) coding_restore_composition (coding, newstr); coding_free_composition_data (coding); @@ -6252,7 +6417,12 @@ encode_coding_string (str, coding, nocopy) if (SYMBOLP (coding->pre_write_conversion) && !NILP (Ffboundp (coding->pre_write_conversion))) - str = run_pre_post_conversion_on_str (str, coding, 1); + { + str = run_pre_post_conversion_on_str (str, coding, 1); + /* As STR is just newly generated, we don't have to copy it + anymore. */ + nocopy = 1; + } from = 0; to = SCHARS (str); @@ -6260,32 +6430,27 @@ encode_coding_string (str, coding, nocopy) /* Encoding routines determine the multibyteness of the source text by coding->src_multibyte. */ - coding->src_multibyte = STRING_MULTIBYTE (str); + coding->src_multibyte = SCHARS (str) < SBYTES (str); coding->dst_multibyte = 0; if (! CODING_REQUIRE_ENCODING (coding)) - { - coding->consumed = SBYTES (str); - coding->consumed_char = SCHARS (str); - if (STRING_MULTIBYTE (str)) - { - str = Fstring_as_unibyte (str); - nocopy = 1; - } - coding->produced = SBYTES (str); - coding->produced_char = SCHARS (str); - return (nocopy ? str : Fcopy_sequence (str)); - } + goto no_need_of_encoding; if (coding->composing != COMPOSITION_DISABLED) coding_save_composition (coding, from, to, str); - /* Try to skip the heading and tailing ASCIIs. */ - if (coding->type != coding_type_ccl) + /* Try to skip the heading and tailing ASCIIs. We can't skip them + if we must run CCL program or there are compositions to + encode. */ + if (coding->type != coding_type_ccl + && (! coding->cmp_data || coding->cmp_data->used == 0)) { SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str), 1); if (from == to_byte) - return (nocopy ? str : Fcopy_sequence (str)); + { + coding_free_composition_data (coding); + goto no_need_of_encoding; + } shrinked_bytes = from + (SBYTES (str) - to_byte); } @@ -6303,6 +6468,7 @@ encode_coding_string (str, coding, nocopy) produced += coding->produced; produced_char += coding->produced_char; if (result == CODING_FINISH_NORMAL + || result == CODING_FINISH_INTERRUPT || (result == CODING_FINISH_INSUFFICIENT_SRC && coding->consumed == 0)) break; @@ -6328,6 +6494,25 @@ encode_coding_string (str, coding, nocopy) coding_free_composition_data (coding); return newstr; + + no_need_of_encoding: + coding->consumed = SBYTES (str); + coding->consumed_char = SCHARS (str); + if (STRING_MULTIBYTE (str)) + { + if (nocopy) + /* We are sure that STR doesn't contain a multibyte + character. */ + STRING_SET_UNIBYTE (str); + else + { + str = Fstring_as_unibyte (str); + nocopy = 1; + } + } + coding->produced = SBYTES (str); + coding->produced_char = SCHARS (str); + return (nocopy ? str : Fcopy_sequence (str)); } @@ -6546,15 +6731,15 @@ highest priority. */) STRING_MULTIBYTE (string)); } -/* Subroutine for Fsafe_coding_systems_region_internal. +/* Subroutine for Ffind_coding_systems_region_internal. Return a list of coding systems that safely encode the multibyte text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of possible coding systems. If it is nil, it means that we have not yet found any coding systems. - WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An - element of WORK_TABLE is set to t once the element is looked up. + WORK_TABLE a char-table of which element is set to t once the + element is looked up. If a non-ASCII single byte char is found, set *single_byte_char_found to 1. */ @@ -6569,6 +6754,8 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) Lisp_Object val, ch; Lisp_Object prev, tail; + if (NILP (safe_codings)) + goto done_safe_codings; while (p < pend) { c = STRING_CHAR_AND_LENGTH (p, pend - p, len); @@ -6578,11 +6765,6 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) continue; if (SINGLE_BYTE_CHAR_P (c)) *single_byte_char_found = 1; - if (NILP (safe_codings)) - /* Already all coding systems are excluded. But, we can't - terminate the loop here because non-ASCII single-byte char - must be found. */ - continue; /* Check the safe coding systems for C. */ ch = make_number (c); val = Faref (work_table, ch); @@ -6643,7 +6825,7 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) accept_latin_extra)); } } - + if (! encodable && ((CHAR_TABLE_P (translation_table) && ! NILP (Faref (translation_table, ch))) @@ -6660,12 +6842,33 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) { /* Exclude this coding system from SAFE_CODINGS. */ if (EQ (tail, safe_codings)) - safe_codings = XCDR (safe_codings); + { + safe_codings = XCDR (safe_codings); + if (NILP (safe_codings)) + goto done_safe_codings; + } else XSETCDR (prev, XCDR (tail)); } } } + + done_safe_codings: + /* If the above loop was terminated before P reaches PEND, it means + SAFE_CODINGS was set to nil. If we have not yet found an + non-ASCII single-byte char, check it now. */ + if (! *single_byte_char_found) + while (p < pend) + { + c = STRING_CHAR_AND_LENGTH (p, pend - p, len); + p += len; + if (! ASCII_BYTE_P (c) + && SINGLE_BYTE_CHAR_P (c)) + { + *single_byte_char_found = 1; + break; + } + } return safe_codings; } @@ -6915,7 +7118,7 @@ code_convert_region1 (start, end, coding_system, encodep) from = XFASTINT (start); to = XFASTINT (end); - if (NILP (coding_system)) + if (NILP (coding_system) && system_eol_type == CODING_EOL_LF) return make_number (to - from); if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) @@ -6970,7 +7173,7 @@ code_convert_string1 (string, coding_system, nocopy, encodep) CHECK_STRING (string); CHECK_SYMBOL (coding_system); - if (NILP (coding_system)) + if (NILP (coding_system) && system_eol_type == CODING_EOL_LF) return (NILP (nocopy) ? Fcopy_sequence (string) : string); if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) @@ -7029,7 +7232,7 @@ code_convert_string_norecord (string, coding_system, encodep) CHECK_STRING (string); CHECK_SYMBOL (coding_system); - if (NILP (coding_system)) + if (NILP (coding_system) && system_eol_type == CODING_EOL_LF) return string; if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) @@ -7192,7 +7395,7 @@ DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_sys /* Character composition should be disabled. */ safe_terminal_coding.composing = COMPOSITION_DISABLED; /* Error notification should be suppressed. */ - terminal_coding.suppress_error = 1; + safe_terminal_coding.suppress_error = 1; safe_terminal_coding.src_multibyte = 1; safe_terminal_coding.dst_multibyte = 0; return Qnil; @@ -7496,6 +7699,9 @@ init_coding_once () void syms_of_coding () { + staticpro (&Vcode_conversion_workbuf_name); + Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*"); + Qtarget_idx = intern ("target-idx"); staticpro (&Qtarget_idx); @@ -7673,7 +7879,9 @@ updated by the functions `make-coding-system' and On detecting a coding system, Emacs tries code detection algorithms associated with each coding-category one by one in this order. When one algorithm agrees with a byte sequence of source text, the coding -system bound to the corresponding coding-category is selected. */); +system bound to the corresponding coding-category is selected. + +Don't modify this variable directly, but use `set-coding-priority'. */); { int i;