X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/7371fe0a09cf89c4bf6e40aec49410cc98d5dfe9..974aae61bbb8c05e0d0fc1a95b419fe596423fd8:/src/coding.c diff --git a/src/coding.c b/src/coding.c index 8c54f86e53..a4f03c70d9 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1,7 +1,7 @@ /* Coding system handler (conversion, detection, and etc). - Copyright (C) 1995, 1997, 1998, 2002 Electrotechnical Laboratory, JAPAN. + Copyright (C) 1995,97,1998,2002,2003 Electrotechnical Laboratory, JAPAN. Licensed to the Free Software Foundation. - Copyright (C) 2001 Free Software Foundation, Inc. + Copyright (C) 2001,2002,2003 Free Software Foundation, Inc. This file is part of GNU Emacs. @@ -17,8 +17,8 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Emacs; see the file COPYING. If not, write to -the Free Software Foundation, Inc., 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ +the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ /*** TABLE OF CONTENTS *** @@ -147,7 +147,8 @@ detect_coding_emacs_mule (src, src_end, multibytep) static void decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { ... @@ -345,6 +346,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) #include "ccl.h" #include "coding.h" #include "window.h" +#include "intervals.h" #else /* not emacs */ @@ -361,12 +363,18 @@ Lisp_Object Qsafe_chars; Lisp_Object Qvalid_codes; extern Lisp_Object Qinsert_file_contents, Qwrite_region; -Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; +Lisp_Object Qcall_process, Qcall_process_region; Lisp_Object Qstart_process, Qopen_network_stream; Lisp_Object Qtarget_idx; +/* If a symbol has this property, evaluate the value to define the + symbol as a coding system. */ +Lisp_Object Qcoding_system_define_form; + Lisp_Object Vselect_safe_coding_system_function; +int coding_system_require_warning; + /* Mnemonic string for each format of end-of-line. */ Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac; /* Mnemonic string to indicate format of end-of-line is not yet @@ -379,6 +387,16 @@ int system_eol_type; #ifdef emacs +/* Information about which coding system is safe for which chars. + The value has the form (GENERIC-LIST . NON-GENERIC-ALIST). + + GENERIC-LIST is a list of generic coding systems which can encode + any characters. + + NON-GENERIC-ALIST is an alist of non generic coding systems vs the + corresponding char table that contains safe chars. */ +Lisp_Object Vcoding_system_safe_chars; + Lisp_Object Vcoding_system_list, Vcoding_system_alist; Lisp_Object Qcoding_system_p, Qcoding_system_error; @@ -387,6 +405,8 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error; end-of-line format. */ Lisp_Object Qemacs_mule, Qraw_text; +Lisp_Object Qutf_8; + /* Coding-systems are handed between Emacs Lisp programs and C internal routines by the following three variables. */ /* Coding-system for reading files and receiving data from process. */ @@ -485,26 +505,27 @@ Lisp_Object Vcharset_revision_alist; /* Default coding systems used for process I/O. */ Lisp_Object Vdefault_process_coding_system; +/* Char table for translating Quail and self-inserting input. */ +Lisp_Object Vtranslation_table_for_input; + /* Global flag to tell that we can't call post-read-conversion and pre-write-conversion functions. Usually the value is zero, but it is set to 1 temporarily while such functions are running. This is to avoid infinite recursive call. */ static int inhibit_pre_post_conversion; -/* Char-table containing safe coding systems of each character. */ -Lisp_Object Vchar_coding_system_table; Lisp_Object Qchar_coding_system; -/* Return `safe-chars' property of coding system CODING. Don't check - validity of CODING. */ +/* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check + its validity. */ Lisp_Object -coding_safe_chars (coding) - struct coding_system *coding; +coding_safe_chars (coding_system) + Lisp_Object coding_system; { Lisp_Object coding_spec, plist, safe_chars; - coding_spec = Fget (coding->symbol, Qcoding_system); + coding_spec = Fget (coding_system, Qcoding_system); plist = XVECTOR (coding_spec)->contents[3]; safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars); return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt); @@ -673,8 +694,16 @@ detect_coding_emacs_mule (src, src_end, multibytep) /* Record one COMPONENT (alternate character or composition rule). */ -#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ - (coding->cmp_data->data[coding->cmp_data->used++] = component) +#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ + do { \ + coding->cmp_data->data[coding->cmp_data->used++] = component; \ + if (coding->cmp_data->used - coding->cmp_data_start \ + == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \ + { \ + CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \ + coding->composing = COMPOSITION_NO; \ + } \ + } while (0) /* Get one byte from a data pointed by SRC and increment SRC. If SRC @@ -691,7 +720,7 @@ detect_coding_emacs_mule (src, src_end, multibytep) #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \ do { \ int bytes; \ - \ + \ c = SAFE_ONE_MORE_BYTE (); \ if (c < 0) \ break; \ @@ -722,7 +751,10 @@ detect_coding_emacs_mule (src, src_end, multibytep) break; \ *p++ = c; \ } \ - if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)) \ + if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes) \ + || (coding->flags /* We are recovering a file. */ \ + && p0[0] == LEADING_CODE_8_BIT_CONTROL \ + && ! CHAR_HEAD_P (p0[1]))) \ c = STRING_CHAR (p0, bytes); \ else \ c = -1; \ @@ -769,12 +801,13 @@ static INLINE int decode_composition_emacs_mule (coding, src, src_end, destination, dst_end, dst_bytes) struct coding_system *coding; - unsigned char *src, *src_end, **destination, *dst_end; + const unsigned char *src, *src_end; + unsigned char **destination, *dst_end; int dst_bytes; { unsigned char *dst = *destination; int method, data_len, nchars; - unsigned char *src_base = src++; + const unsigned char *src_base = src++; /* Store components of composition. */ int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH]; int ncomponent; @@ -826,7 +859,10 @@ decode_composition_emacs_mule (coding, src, src_end, else { int bytes; - if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) + if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes) + || (coding->flags /* We are recovering a file. */ + && src[0] == LEADING_CODE_8_BIT_CONTROL + && ! CHAR_HEAD_P (src[1]))) c = STRING_CHAR (src, bytes); else c = *src, bytes = 1; @@ -912,23 +948,25 @@ decode_composition_emacs_mule (coding, src, src_end, static void decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* SRC_BASE remembers the start position in source in each loop. The loop will be exited when there's not enough source code, or when there's not enough destination area to produce a character. */ - unsigned char *src_base; + const unsigned char *src_base; coding->produced_char = 0; while ((src_base = src) < src_end) { - unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p; + unsigned char tmp[MAX_MULTIBYTE_LENGTH]; + const unsigned char *p; int bytes; if (*src == '\r') @@ -942,11 +980,6 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) ONE_MORE_BYTE (c); if (c != '\n') { - if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; - } src--; c = '\r'; } @@ -968,7 +1001,7 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) coding->produced_char++; continue; } - else if (*src == 0x80) + else if (*src == 0x80 && coding->cmp_data) { /* Start of composition data. */ int consumed = decode_composition_emacs_mule (coding, src, src_end, @@ -985,16 +1018,36 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) p = tmp; src++; } - else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) + else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes) + || (coding->flags /* We are recovering a file. */ + && src[0] == LEADING_CODE_8_BIT_CONTROL + && ! CHAR_HEAD_P (src[1]))) { p = src; src += bytes; } else { - bytes = CHAR_STRING (*src, tmp); - p = tmp; + int i, c; + + bytes = BYTES_BY_CHAR_HEAD (*src); src++; + for (i = 1; i < bytes; i++) + { + ONE_MORE_BYTE (c); + if (CHAR_HEAD_P (c)) + break; + } + if (i < bytes) + { + bytes = CHAR_STRING (*src_base, tmp); + p = tmp; + src = src_base + 1; + } + else + { + p = src_base; + } } if (dst + bytes >= (dst_bytes ? dst_end : src)) { @@ -1061,20 +1114,21 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) } while (0) -static void encode_eol P_ ((struct coding_system *, unsigned char *, +static void encode_eol P_ ((struct coding_system *, const unsigned char *, unsigned char *, int, int)); static void encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; - unsigned char *src_base; + const unsigned char *src_base; int c; int char_offset; int *data; @@ -1116,7 +1170,22 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) EMIT_ONE_BYTE ('\r'); } else if (SINGLE_BYTE_CHAR_P (c)) - EMIT_ONE_BYTE (c); + { + if (coding->flags && ! ASCII_BYTE_P (c)) + { + /* As we are auto saving, retain the multibyte form for + 8-bit chars. */ + unsigned char buf[MAX_MULTIBYTE_LENGTH]; + int bytes = CHAR_STRING (c, buf); + + if (bytes == 1) + EMIT_ONE_BYTE (buf[0]); + else + EMIT_TWO_BYTES (buf[0], buf[1]); + } + else + EMIT_ONE_BYTE (c); + } else EMIT_BYTES (src_base, src); coding->consumed_char++; @@ -1310,7 +1379,7 @@ enum iso_code_class_type iso_code_class[256]; #define CHARSET_OK(idx, charset, c) \ (coding_system_table[idx] \ && (charset == CHARSET_ASCII \ - || (safe_chars = coding_safe_chars (coding_system_table[idx]), \ + || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \ CODING_SAFE_CHAR_P (safe_chars, c))) \ && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \ charset) \ @@ -1319,6 +1388,9 @@ enum iso_code_class_type iso_code_class[256]; #define SHIFT_OUT_OK(idx) \ (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) +#define COMPOSITION_OK(idx) \ + (coding_system_table[idx]->composing != COMPOSITION_DISABLED) + /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". Check if a text is encoded in ISO2022. If it is, return an integer in which appropriate flag bits any of: @@ -1349,6 +1421,7 @@ detect_coding_iso2022 (src, src_end, multibytep) while (mask && src < src_end) { ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); + retry: switch (c) { case ISO_CODE_ESC: @@ -1395,7 +1468,30 @@ detect_coding_iso2022 (src, src_end, multibytep) else if (c >= '0' && c <= '4') { /* ESC for start/end composition. */ - mask_found |= CODING_CATEGORY_MASK_ISO; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7)) + mask_found |= CODING_CATEGORY_MASK_ISO_7; + else + mask &= ~CODING_CATEGORY_MASK_ISO_7; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT)) + mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT; + else + mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1)) + mask_found |= CODING_CATEGORY_MASK_ISO_8_1; + else + mask &= ~CODING_CATEGORY_MASK_ISO_8_1; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2)) + mask_found |= CODING_CATEGORY_MASK_ISO_8_2; + else + mask &= ~CODING_CATEGORY_MASK_ISO_8_2; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)) + mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE; + else + mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)) + mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE; + else + mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; break; } else @@ -1523,6 +1619,8 @@ detect_coding_iso2022 (src, src_end, multibytep) && mask & CODING_CATEGORY_MASK_ISO_8_2) { int i = 1; + + c = -1; while (src < src_end) { ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); @@ -1535,6 +1633,9 @@ detect_coding_iso2022 (src, src_end, multibytep) mask &= ~CODING_CATEGORY_MASK_ISO_8_2; else mask_found |= CODING_CATEGORY_MASK_ISO_8_2; + if (c >= 0) + /* This means that we have read one extra byte. */ + goto retry; } } break; @@ -1610,6 +1711,7 @@ coding_allocate_composition_data (coding, char_offset) coding->cmp_data->next = cmp_data; coding->cmp_data = cmp_data; coding->cmp_data_start = 0; + coding->composing = COMPOSITION_NO; } /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. @@ -1713,11 +1815,12 @@ coding_allocate_composition_data (coding, char_offset) static void decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* Charsets invoked to graphic plane 0 and 1 respectively. */ @@ -1728,12 +1831,12 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) (within macro ONE_MORE_BYTE), or when there's not enough destination area to produce a character (within macro EMIT_CHAR). */ - unsigned char *src_base; + const unsigned char *src_base; int c, charset; Lisp_Object translation_table; Lisp_Object safe_chars; - safe_chars = coding_safe_chars (coding); + safe_chars = coding_safe_chars (coding->symbol); if (NILP (Venable_character_translation)) translation_table = Qnil; @@ -1748,7 +1851,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) while (1) { - int c1, c2; + int c1, c2 = 0; src_base = src; ONE_MORE_BYTE (c1); @@ -1824,11 +1927,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) ONE_MORE_BYTE (c1); if (c1 != ISO_CODE_LF) { - if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; - } src--; c1 = '\r'; } @@ -1993,6 +2091,81 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) } continue; + case '%': + if (COMPOSING_P (coding)) + DECODE_COMPOSITION_END ('1'); + ONE_MORE_BYTE (c1); + if (c1 == '/') + { + /* CTEXT extended segment: + ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES-- + We keep these bytes as is for the moment. + They may be decoded by post-read-conversion. */ + int dim, M, L; + int size, required; + int produced_chars; + + ONE_MORE_BYTE (dim); + ONE_MORE_BYTE (M); + ONE_MORE_BYTE (L); + size = ((M - 128) * 128) + (L - 128); + required = 8 + size * 2; + if (dst + required > (dst_bytes ? dst_end : src)) + goto label_end_of_loop; + *dst++ = ISO_CODE_ESC; + *dst++ = '%'; + *dst++ = '/'; + *dst++ = dim; + produced_chars = 4; + dst += CHAR_STRING (M, dst), produced_chars++; + dst += CHAR_STRING (L, dst), produced_chars++; + while (size-- > 0) + { + ONE_MORE_BYTE (c1); + dst += CHAR_STRING (c1, dst), produced_chars++; + } + coding->produced_char += produced_chars; + } + else if (c1 == 'G') + { + unsigned char *d = dst; + int produced_chars; + + /* XFree86 extension for embedding UTF-8 in CTEXT: + ESC % G --UTF-8-BYTES-- ESC % @ + We keep these bytes as is for the moment. + They may be decoded by post-read-conversion. */ + if (d + 6 > (dst_bytes ? dst_end : src)) + goto label_end_of_loop; + *d++ = ISO_CODE_ESC; + *d++ = '%'; + *d++ = 'G'; + produced_chars = 3; + while (d + 1 < (dst_bytes ? dst_end : src)) + { + ONE_MORE_BYTE (c1); + if (c1 == ISO_CODE_ESC + && src + 1 < src_end + && src[0] == '%' + && src[1] == '@') + { + src += 2; + break; + } + d += CHAR_STRING (c1, d), produced_chars++; + } + if (d + 3 > (dst_bytes ? dst_end : src)) + goto label_end_of_loop; + *d++ = ISO_CODE_ESC; + *d++ = '%'; + *d++ = '@'; + dst = d; + coding->produced_char += produced_chars + 3; + } + else + goto label_invalid_code; + continue; + default: if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION)) goto label_invalid_code; @@ -2035,6 +2208,8 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) DECODE_COMPOSITION_END ('1'); src = src_base; c = *src++; + if (! NILP (translation_table)) + c = translate_char (translation_table, c, 0, 0, 0); EMIT_CHAR (c); } @@ -2258,11 +2433,11 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) /* Instead of encoding character C, produce one or two `?'s. */ -#define ENCODE_UNSAFE_CHARACTER(c) \ - do { \ - ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \ - if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \ - ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \ +#define ENCODE_UNSAFE_CHARACTER(c) \ + do { \ + ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \ + if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \ + ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \ } while (0) @@ -2429,7 +2604,8 @@ static unsigned char * encode_designation_at_bol (coding, translation_table, src, src_end, dst) struct coding_system *coding; Lisp_Object translation_table; - unsigned char *src, *src_end, *dst; + const unsigned char *src, *src_end; + unsigned char *dst; { int charset, c, found = 0, reg; /* Table of charsets to be designated to each graphic register. */ @@ -2470,11 +2646,12 @@ encode_designation_at_bol (coding, translation_table, src, src_end, dst) static void encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* Since the maximum bytes produced by each loop is 20, we subtract 19 @@ -2486,12 +2663,15 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) analyze multi-byte codes (within macro ONE_MORE_CHAR), or when there's not enough destination area to produce encoded codes (within macro EMIT_BYTES). */ - unsigned char *src_base; + const unsigned char *src_base; int c; Lisp_Object translation_table; Lisp_Object safe_chars; - safe_chars = coding_safe_chars (coding); + if (coding->flags & CODING_FLAG_ISO_SAFE) + coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR; + + safe_chars = coding_safe_chars (coding->symbol); if (NILP (Venable_character_translation)) translation_table = Qnil; @@ -2558,7 +2738,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) } else { - if (coding->flags & CODING_FLAG_ISO_SAFE + if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR && ! CODING_SAFE_CHAR_P (safe_chars, c)) ENCODE_UNSAFE_CHARACTER (c); else @@ -2627,7 +2807,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) *dst++ = c; coding->errors++; } - else if (coding->flags & CODING_FLAG_ISO_SAFE + else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR && ! CODING_SAFE_CHAR_P (safe_chars, c)) ENCODE_UNSAFE_CHARACTER (c); else @@ -2685,7 +2865,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) /* Macros to decode or encode a character of Big5 in BIG5. B1 and B2 are the 1st and 2nd position-codes of Big5 in BIG5 coding system. - C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal + C1 and C2 are the 1st and 2nd position-codes of Emacs' internal format. CHARSET is `charset_big5_1' or `charset_big5_2'. */ /* Number of Big5 characters which have the same code in 1st byte. */ @@ -2854,7 +3034,7 @@ detect_coding_utf_16 (src, src_end, multibytep) int multibytep; { unsigned char c1, c2; - /* Dummy for TWO_MORE_BYTES. */ + /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */ struct coding_system dummy_coding; struct coding_system *coding = &dummy_coding; @@ -2877,12 +3057,13 @@ static void decode_coding_sjis_big5 (coding, source, destination, src_bytes, dst_bytes, sjis_p) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; int sjis_p; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; + const unsigned char *src = source; + const unsigned char *src_end = source + src_bytes; unsigned char *dst = destination; unsigned char *dst_end = destination + dst_bytes; /* SRC_BASE remembers the start position in source in each loop. @@ -2890,7 +3071,7 @@ decode_coding_sjis_big5 (coding, source, destination, (within macro ONE_MORE_BYTE), or when there's not enough destination area to produce a character (within macro EMIT_CHAR). */ - unsigned char *src_base; + const unsigned char *src_base; Lisp_Object translation_table; if (NILP (Venable_character_translation)) @@ -2905,7 +3086,7 @@ decode_coding_sjis_big5 (coding, source, destination, coding->produced_char = 0; while (1) { - int c, charset, c1, c2; + int c, charset, c1, c2 = 0; src_base = src; ONE_MORE_BYTE (c1); @@ -2922,12 +3103,6 @@ decode_coding_sjis_big5 (coding, source, destination, ONE_MORE_BYTE (c2); if (c2 == '\n') c1 = c2; - else if (coding->mode - & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; - } else /* To process C2 again, SRC is subtracted by 1. */ src--; @@ -3076,6 +3251,12 @@ encode_coding_sjis_big5 (coding, source, destination, EMIT_ONE_BYTE (c1 | 0x80); else if (charset == charset_latin_jisx0201) EMIT_ONE_BYTE (c1); + else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR) + { + EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER); + if (CHARSET_WIDTH (charset) > 1) + EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER); + } else /* There's no way other than producing the internal codes as is. */ @@ -3088,6 +3269,12 @@ encode_coding_sjis_big5 (coding, source, destination, ENCODE_BIG5 (charset, c1, c2, c1, c2); EMIT_TWO_BYTES (c1, c2); } + else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR) + { + EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER); + if (CHARSET_WIDTH (charset) > 1) + EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER); + } else /* There's no way other than producing the internal codes as is. */ @@ -3144,12 +3331,13 @@ detect_coding_ccl (src, src_end, multibytep) static void decode_eol (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; + const unsigned char *src = source; unsigned char *dst = destination; - unsigned char *src_end = src + src_bytes; + const unsigned char *src_end = src + src_bytes; unsigned char *dst_end = dst + dst_bytes; Lisp_Object translation_table; /* SRC_BASE remembers the start position in source in each loop. @@ -3157,7 +3345,7 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) (within macro ONE_MORE_BYTE), or when there's not enough destination area to produce a character (within macro EMIT_CHAR). */ - unsigned char *src_base; + const unsigned char *src_base; int c; translation_table = Qnil; @@ -3173,11 +3361,6 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) ONE_MORE_BYTE (c); if (c != '\n') { - if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; - } src--; c = '\r'; } @@ -3236,12 +3419,13 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) static void encode_eol (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { - unsigned char *src = source; + const unsigned char *src = source; unsigned char *dst = destination; - unsigned char *src_end = src + src_bytes; + const unsigned char *src_end = src + src_bytes; unsigned char *dst_end = dst + dst_bytes; Lisp_Object translation_table; /* SRC_BASE remembers the start position in source in each loop. @@ -3249,7 +3433,8 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) analyze multi-byte codes (within macro ONE_MORE_CHAR), or when there's not enough destination area to produce encoded codes (within macro EMIT_BYTES). */ - unsigned char *src_base; + const unsigned char *src_base; + unsigned char *tmp; int c; int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY; @@ -3299,13 +3484,13 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) } if (coding->eol_type == CODING_EOL_CR) { - for (src = destination; src < dst; src++) - if (*src == '\n') *src = '\r'; + for (tmp = destination; tmp < dst; tmp++) + if (*tmp == '\n') *tmp = '\r'; } else if (selective_display) { - for (src = destination; src < dst; src++) - if (*src == '\r') *src = '\n'; + for (tmp = destination; tmp < dst; tmp++) + if (*tmp == '\r') *tmp = '\n'; } } if (coding->src_multibyte) @@ -3498,7 +3683,6 @@ setup_coding_system (coding_system, coding) coding->type = coding_type_emacs_mule; coding->common_flags |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; - coding->composing = COMPOSITION_NO; if (!NILP (coding->post_read_conversion)) coding->common_flags |= CODING_REQUIRE_DECODING_MASK; if (!NILP (coding->pre_write_conversion)) @@ -3874,7 +4058,7 @@ setup_raw_text_coding_system (coding) o coding-category-utf-8 The category for a coding system which has the same code range - as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp + as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp symbol) `utf-8' by default. o coding-category-utf-16-be @@ -4094,7 +4278,7 @@ detect_coding_mask (source, src_bytes, priorities, skip, multibytep) void detect_coding (coding, src, src_bytes) struct coding_system *coding; - unsigned char *src; + const unsigned char *src; int src_bytes; { unsigned int idx; @@ -4267,7 +4451,7 @@ detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p) void detect_eol (coding, src, src_bytes) struct coding_system *coding; - unsigned char *src; + const unsigned char *src; int src_bytes; { Lisp_Object val; @@ -4317,11 +4501,13 @@ detect_eol (coding, src, src_bytes) { int src_multibyte = coding->src_multibyte; int dst_multibyte = coding->dst_multibyte; + struct composition_data *cmp_data = coding->cmp_data; setup_coding_system (XVECTOR (val)->contents[eol_type], coding); coding->src_multibyte = src_multibyte; coding->dst_multibyte = dst_multibyte; coding->heading_ascii = skip; + coding->cmp_data = cmp_data; } } @@ -4357,7 +4543,11 @@ encoding_buffer_size (coding, src_bytes) int magnification; if (coding->type == coding_type_ccl) - magnification = coding->spec.ccl.encoder.buf_magnification; + { + magnification = coding->spec.ccl.encoder.buf_magnification; + if (coding->eol_type == CODING_EOL_CRLF) + magnification *= 2; + } else if (CODING_REQUIRE_ENCODING (coding)) magnification = 3; else @@ -4374,10 +4564,6 @@ struct conversion_buffer unsigned char *data; }; -/* Don't use alloca for allocating memory space larger than this, lest - we overflow their stack. */ -#define MAX_ALLOCA 16*1024 - /* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */ #define allocate_conversion_buffer(buf, len) \ do { \ @@ -4442,7 +4628,10 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep) if (ccl->eol_type ==CODING_EOL_UNDECIDED) ccl->eol_type = CODING_EOL_LF; ccl->cr_consumed = coding->spec.ccl.cr_carryover; + ccl->eight_bit_control = coding->dst_multibyte; } + else + ccl->eight_bit_control = 1; ccl->multibyte = coding->src_multibyte; if (coding->spec.ccl.eight_bit_carryover[0] != 0) { @@ -4590,7 +4779,7 @@ decode_eol_post_ccl (coding, ptr, bytes) { /* If the last character is CR, we can't handle it here because LF will be in the not-yet-decoded source text. - Recorded that the CR is not yet processed. */ + Record that the CR is not yet processed. */ coding->spec.ccl.cr_carryover = 1; coding->produced--; coding->produced_char--; @@ -4683,9 +4872,12 @@ decode_eol_post_ccl (coding, ptr, bytes) int decode_coding (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { + int extra = 0; + if (coding->type == coding_type_undecided) detect_coding (coding, source, src_bytes); @@ -4728,18 +4920,24 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes) case coding_type_ccl: if (coding->spec.ccl.cr_carryover) { - /* Set the CR which is not processed by the previous call of - decode_eol_post_ccl in DESTINATION. */ + /* Put the CR which was not processed by the previous call + of decode_eol_post_ccl in DESTINATION. It will be + decoded together with the following LF by the call to + decode_eol_post_ccl below. */ *destination = '\r'; coding->produced++; coding->produced_char++; dst_bytes--; + extra = coding->spec.ccl.cr_carryover; } - ccl_coding_driver (coding, source, - destination + coding->spec.ccl.cr_carryover, + ccl_coding_driver (coding, source, destination + extra, src_bytes, dst_bytes, 0); if (coding->eol_type != CODING_EOL_LF) - decode_eol_post_ccl (coding, destination, coding->produced); + { + coding->produced += extra; + coding->produced_char += extra; + decode_eol_post_ccl (coding, destination, coding->produced); + } break; default: @@ -4754,7 +4952,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes) if (coding->mode & CODING_MODE_LAST_BLOCK && coding->result == CODING_FINISH_INSUFFICIENT_SRC) { - unsigned char *src = source + coding->consumed; + const unsigned char *src = source + coding->consumed; unsigned char *dst = destination + coding->produced; src_bytes -= coding->consumed; @@ -4788,7 +4986,8 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes) int encode_coding (coding, source, destination, src_bytes, dst_bytes) struct coding_system *coding; - unsigned char *source, *destination; + const unsigned char *source; + unsigned char *destination; int src_bytes, dst_bytes; { coding->produced = coding->produced_char = 0; @@ -4830,7 +5029,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes) if (coding->mode & CODING_MODE_LAST_BLOCK && coding->result == CODING_FINISH_INSUFFICIENT_SRC) { - unsigned char *src = source + coding->consumed; + const unsigned char *src = source + coding->consumed; unsigned char *dst = destination + coding->produced; if (coding->type == coding_type_iso2022) @@ -5154,11 +5353,17 @@ static int shrink_conversion_region_threshhold = 1024; } \ } while (0) +/* ARG is (CODING . BUFFER) where CODING is what to be set in + Vlast_coding_system_used and BUFFER if non-nil is a buffer to + kill. */ static Lisp_Object -code_convert_region_unwind (dummy) - Lisp_Object dummy; +code_convert_region_unwind (arg) + Lisp_Object arg; { inhibit_pre_post_conversion = 0; + Vlast_coding_system_used = XCAR (arg); + if (! NILP (XCDR (arg))) + Fkill_buffer (XCDR (arg)); return Qnil; } @@ -5214,7 +5419,7 @@ coding_save_composition (coding, from, to, obj) else if (VECTORP (val) || STRINGP (val)) { int len = (VECTORP (val) - ? XVECTOR (val)->size : XSTRING (val)->size); + ? XVECTOR (val)->size : SCHARS (val)); int i; for (i = 0; i < len; i++) { @@ -5269,6 +5474,10 @@ coding_restore_composition (coding, obj) enum composition_method method = (enum composition_method) data[3]; Lisp_Object components; + if (data[0] < 0 || i + data[0] > cmp_data->used) + /* Invalid composition data. */ + break; + if (method == COMPOSITION_RELATIVE) components = Qnil; else @@ -5276,10 +5485,17 @@ coding_restore_composition (coding, obj) int len = data[0] - 4, j; Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; + if (method == COMPOSITION_WITH_RULE_ALTCHARS + && len % 2 == 0) + len --; + if (len < 1) + /* Invalid composition data. */ + break; for (j = 0; j < len; j++) args[j] = make_number (data[4 + j]); components = (method == COMPOSITION_WITH_ALTCHARS - ? Fstring (len, args) : Fvector (len, args)); + ? Fstring (len, args) + : Fvector (len, args)); } compose_text (data[1], data[2], components, Qnil, obj); } @@ -5399,7 +5615,8 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) struct buffer *prev = current_buffer; Lisp_Object new; - record_unwind_protect (code_convert_region_unwind, Qnil); + record_unwind_protect (code_convert_region_unwind, + Fcons (Vlast_coding_system_used, Qnil)); /* We should not call any more pre-write/post-read-conversion functions while this pre-write-conversion is running. */ inhibit_pre_post_conversion = 1; @@ -5450,8 +5667,11 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) coding_allocate_composition_data (coding, from); } - /* Try to skip the heading and tailing ASCIIs. */ - if (coding->type != coding_type_ccl) + /* Try to skip the heading and tailing ASCIIs. We can't skip them + if we must run CCL program or there are compositions to + encode. */ + if (coding->type != coding_type_ccl + && (! coding->cmp_data || coding->cmp_data->used == 0)) { int from_byte_orig = from_byte, to_byte_orig = to_byte; @@ -5467,6 +5687,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) if (!replace) /* We must record and adjust for this new text now. */ adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); + coding_free_composition_data (coding); return 0; } @@ -5677,9 +5898,19 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG) REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG Here, we are sure that NEW >= ORIG. */ - float ratio = coding->produced - coding->consumed; - ratio /= coding->consumed; - require = len_byte * ratio; + + if (coding->produced <= coding->consumed) + { + /* This happens because of CCL-based coding system with + eol-type CRLF. */ + require = 0; + } + else + { + float ratio = coding->produced - coding->consumed; + ratio /= coding->consumed; + require = len_byte * ratio; + } first = 0; } if ((src - dst) < (require + 2000)) @@ -5747,16 +5978,22 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) && ! encodep && ! NILP (coding->post_read_conversion)) { Lisp_Object val; + Lisp_Object saved_coding_system; if (from != PT) TEMP_SET_PT_BOTH (from, from_byte); prev_Z = Z; - record_unwind_protect (code_convert_region_unwind, Qnil); + record_unwind_protect (code_convert_region_unwind, + Fcons (Vlast_coding_system_used, Qnil)); + saved_coding_system = Vlast_coding_system_used; + Vlast_coding_system_used = coding->symbol; /* We should not call any more pre-write/post-read-conversion functions while this post-read-conversion is running. */ inhibit_pre_post_conversion = 1; val = call1 (coding->post_read_conversion, make_number (inserted)); inhibit_pre_post_conversion = 0; + coding->symbol = Vlast_coding_system_used; + Vlast_coding_system_used = saved_coding_system; /* Discard the unwind protect. */ specpdl_ptr--; CHECK_NUMBER (val); @@ -5788,55 +6025,155 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) return 0; } -Lisp_Object -run_pre_post_conversion_on_str (str, coding, encodep) - Lisp_Object str; - struct coding_system *coding; - int encodep; +/* Name (or base name) of work buffer for code conversion. */ +static Lisp_Object Vcode_conversion_workbuf_name; + +/* Set the current buffer to the working buffer prepared for + code-conversion. MULTIBYTE specifies the multibyteness of the + buffer. Return the buffer we set if it must be killed after use. + Otherwise return Qnil. */ + +static Lisp_Object +set_conversion_work_buffer (multibyte) + int multibyte; { - int count = specpdl_ptr - specpdl; - struct gcpro gcpro1; - int multibyte = STRING_MULTIBYTE (str); - Lisp_Object buffer; + Lisp_Object buffer, buffer_to_kill; struct buffer *buf; - record_unwind_protect (Fset_buffer, Fcurrent_buffer ()); - record_unwind_protect (code_convert_region_unwind, Qnil); - GCPRO1 (str); - - buffer = Fget_buffer_create (build_string (" *code-converting-work*")); + buffer = Fget_buffer_create (Vcode_conversion_workbuf_name); buf = XBUFFER (buffer); + if (buf == current_buffer) + { + /* As we are already in the work buffer, we must generate a new + buffer for the work. */ + Lisp_Object name; + + name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); + buffer = buffer_to_kill = Fget_buffer_create (name); + buf = XBUFFER (buffer); + } + else + buffer_to_kill = Qnil; + delete_all_overlays (buf); buf->directory = current_buffer->directory; buf->read_only = Qnil; buf->filename = Qnil; buf->undo_list = Qt; - buf->overlays_before = Qnil; - buf->overlays_after = Qnil; - + eassert (buf->overlays_before == NULL); + eassert (buf->overlays_after == NULL); set_buffer_internal (buf); + if (BEG != BEGV || Z != ZV) + Fwiden (); + del_range_2 (BEG, BEG_BYTE, Z, Z_BYTE, 0); + buf->enable_multibyte_characters = multibyte ? Qt : Qnil; + return buffer_to_kill; +} + +Lisp_Object +run_pre_post_conversion_on_str (str, coding, encodep) + Lisp_Object str; + struct coding_system *coding; + int encodep; +{ + int count = SPECPDL_INDEX (); + struct gcpro gcpro1, gcpro2; + int multibyte = STRING_MULTIBYTE (str); + Lisp_Object old_deactivate_mark; + Lisp_Object buffer_to_kill; + + record_unwind_protect (Fset_buffer, Fcurrent_buffer ()); + /* It is not crucial to specbind this. */ + old_deactivate_mark = Vdeactivate_mark; + GCPRO2 (str, old_deactivate_mark); + /* We must insert the contents of STR as is without unibyte<->multibyte conversion. For that, we adjust the multibyteness of the working buffer to that of STR. */ - Ferase_buffer (); - buf->enable_multibyte_characters = multibyte ? Qt : Qnil; + buffer_to_kill = set_conversion_work_buffer (multibyte); + record_unwind_protect (code_convert_region_unwind, + Fcons (Vlast_coding_system_used, buffer_to_kill)); insert_from_string (str, 0, 0, - XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0); + SCHARS (str), SBYTES (str), 0); UNGCPRO; inhibit_pre_post_conversion = 1; if (encodep) call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z)); else { + Vlast_coding_system_used = coding->symbol; TEMP_SET_PT_BOTH (BEG, BEG_BYTE); call1 (coding->post_read_conversion, make_number (Z - BEG)); + coding->symbol = Vlast_coding_system_used; } inhibit_pre_post_conversion = 0; + Vdeactivate_mark = old_deactivate_mark; str = make_buffer_string (BEG, Z, 1); return unbind_to (count, str); } + +/* Run pre-write-conversion function of CODING on NCHARS/NBYTES + text in *STR. *SIZE is the allocated bytes for STR. As it + is intended that this function is called from encode_terminal_code, + the pre-write-conversion function is run by safe_call and thus + "Error during redisplay: ..." is logged when an error occurs. + + Store the resulting text in *STR and set CODING->produced_char and + CODING->produced to the number of characters and bytes + respectively. If the size of *STR is too small, enlarge it by + xrealloc and update *STR and *SIZE. */ + +void +run_pre_write_conversin_on_c_str (str, size, nchars, nbytes, coding) + unsigned char **str; + int *size, nchars, nbytes; + struct coding_system *coding; +{ + struct gcpro gcpro1, gcpro2; + struct buffer *cur = current_buffer; + Lisp_Object old_deactivate_mark, old_last_coding_system_used; + Lisp_Object args[3]; + Lisp_Object buffer_to_kill; + + /* It is not crucial to specbind this. */ + old_deactivate_mark = Vdeactivate_mark; + old_last_coding_system_used = Vlast_coding_system_used; + GCPRO2 (old_deactivate_mark, old_last_coding_system_used); + + /* We must insert the contents of STR as is without + unibyte<->multibyte conversion. For that, we adjust the + multibyteness of the working buffer to that of STR. */ + buffer_to_kill = set_conversion_work_buffer (coding->src_multibyte); + insert_1_both (*str, nchars, nbytes, 0, 0, 0); + UNGCPRO; + inhibit_pre_post_conversion = 1; + args[0] = coding->pre_write_conversion; + args[1] = make_number (BEG); + args[2] = make_number (Z); + safe_call (3, args); + inhibit_pre_post_conversion = 0; + Vdeactivate_mark = old_deactivate_mark; + Vlast_coding_system_used = old_last_coding_system_used; + coding->produced_char = Z - BEG; + coding->produced = Z_BYTE - BEG_BYTE; + if (coding->produced > *size) + { + *size = coding->produced; + *str = xrealloc (*str, *size); + } + if (BEG < GPT && GPT < Z) + move_gap (BEG); + bcopy (BEG_ADDR, *str, coding->produced); + coding->src_multibyte + = ! NILP (current_buffer->enable_multibyte_characters); + set_buffer_internal (cur); + if (! NILP (buffer_to_kill)) + Fkill_buffer (buffer_to_kill); +} + + Lisp_Object decode_coding_string (str, coding, nocopy) Lisp_Object str; @@ -5854,7 +6191,7 @@ decode_coding_string (str, coding, nocopy) int consumed, consumed_char, produced, produced_char; from = 0; - to_byte = STRING_BYTES (XSTRING (str)); + to_byte = SBYTES (str); saved_coding_symbol = coding->symbol; coding->src_multibyte = STRING_MULTIBYTE (str); @@ -5864,7 +6201,7 @@ decode_coding_string (str, coding, nocopy) /* See the comments in code_convert_region. */ if (coding->type == coding_type_undecided) { - detect_coding (coding, XSTRING (str)->data, to_byte); + detect_coding (coding, SDATA (str), to_byte); if (coding->type == coding_type_undecided) { coding->type = coding_type_emacs_mule; @@ -5879,7 +6216,7 @@ decode_coding_string (str, coding, nocopy) && coding->type != coding_type_ccl) { saved_coding_symbol = coding->symbol; - detect_eol (coding, XSTRING (str)->data, to_byte); + detect_eol (coding, SDATA (str), to_byte); if (coding->eol_type == CODING_EOL_UNDECIDED) coding->eol_type = CODING_EOL_LF; /* We had better recover the original eol format if we @@ -5898,7 +6235,7 @@ decode_coding_string (str, coding, nocopy) { /* Decoding routines expect the source text to be unibyte. */ str = Fstring_as_unibyte (str); - to_byte = STRING_BYTES (XSTRING (str)); + to_byte = SBYTES (str); nocopy = 1; coding->src_multibyte = 0; } @@ -5906,24 +6243,26 @@ decode_coding_string (str, coding, nocopy) /* Try to skip the heading and tailing ASCIIs. */ if (require_decoding && coding->type != coding_type_ccl) { - SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, + SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str), 0); if (from == to_byte) require_decoding = 0; - shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte); + shrinked_bytes = from + (SBYTES (str) - to_byte); } - if (!require_decoding) + if (!require_decoding + && !(SYMBOLP (coding->post_read_conversion) + && !NILP (Ffboundp (coding->post_read_conversion)))) { - coding->consumed = STRING_BYTES (XSTRING (str)); - coding->consumed_char = XSTRING (str)->size; + coding->consumed = SBYTES (str); + coding->consumed_char = SCHARS (str); if (coding->dst_multibyte) { str = Fstring_as_multibyte (str); nocopy = 1; } - coding->produced = STRING_BYTES (XSTRING (str)); - coding->produced_char = XSTRING (str)->size; + coding->produced = SBYTES (str); + coding->produced_char = SCHARS (str); return (nocopy ? str : Fcopy_sequence (str)); } @@ -5935,7 +6274,7 @@ decode_coding_string (str, coding, nocopy) consumed = consumed_char = produced = produced_char = 0; while (1) { - result = decode_coding (coding, XSTRING (str)->data + from + consumed, + result = decode_coding (coding, SDATA (str) + from + consumed, buf.data + produced, to_byte - from - consumed, buf.size - produced); consumed += coding->consumed; @@ -5943,6 +6282,7 @@ decode_coding_string (str, coding, nocopy) produced += coding->produced; produced_char += coding->produced_char; if (result == CODING_FINISH_NORMAL + || result == CODING_FINISH_INTERRUPT || (result == CODING_FINISH_INSUFFICIENT_SRC && coding->consumed == 0)) break; @@ -6004,14 +6344,19 @@ decode_coding_string (str, coding, nocopy) else newstr = make_uninit_string (produced + shrinked_bytes); if (from > 0) - bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from); - bcopy (buf.data, XSTRING (newstr)->data + from, produced); + STRING_COPYIN (newstr, 0, SDATA (str), from); + STRING_COPYIN (newstr, from, buf.data, produced); if (shrinked_bytes > from) - bcopy (XSTRING (str)->data + to_byte, - XSTRING (newstr)->data + from + produced, - shrinked_bytes - from); + STRING_COPYIN (newstr, from + produced, + SDATA (str) + to_byte, + shrinked_bytes - from); free_conversion_buffer (&buf); + coding->consumed += shrinked_bytes; + coding->consumed_char += shrinked_bytes; + coding->produced += shrinked_bytes; + coding->produced_char += shrinked_bytes; + if (coding->cmp_data && coding->cmp_data->used) coding_restore_composition (coding, newstr); coding_free_composition_data (coding); @@ -6039,41 +6384,41 @@ encode_coding_string (str, coding, nocopy) if (SYMBOLP (coding->pre_write_conversion) && !NILP (Ffboundp (coding->pre_write_conversion))) - str = run_pre_post_conversion_on_str (str, coding, 1); + { + str = run_pre_post_conversion_on_str (str, coding, 1); + /* As STR is just newly generated, we don't have to copy it + anymore. */ + nocopy = 1; + } from = 0; - to = XSTRING (str)->size; - to_byte = STRING_BYTES (XSTRING (str)); + to = SCHARS (str); + to_byte = SBYTES (str); /* Encoding routines determine the multibyteness of the source text by coding->src_multibyte. */ - coding->src_multibyte = STRING_MULTIBYTE (str); + coding->src_multibyte = SCHARS (str) < SBYTES (str); coding->dst_multibyte = 0; if (! CODING_REQUIRE_ENCODING (coding)) - { - coding->consumed = STRING_BYTES (XSTRING (str)); - coding->consumed_char = XSTRING (str)->size; - if (STRING_MULTIBYTE (str)) - { - str = Fstring_as_unibyte (str); - nocopy = 1; - } - coding->produced = STRING_BYTES (XSTRING (str)); - coding->produced_char = XSTRING (str)->size; - return (nocopy ? str : Fcopy_sequence (str)); - } + goto no_need_of_encoding; if (coding->composing != COMPOSITION_DISABLED) coding_save_composition (coding, from, to, str); - /* Try to skip the heading and tailing ASCIIs. */ - if (coding->type != coding_type_ccl) + /* Try to skip the heading and tailing ASCIIs. We can't skip them + if we must run CCL program or there are compositions to + encode. */ + if (coding->type != coding_type_ccl + && (! coding->cmp_data || coding->cmp_data->used == 0)) { - SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, + SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str), 1); if (from == to_byte) - return (nocopy ? str : Fcopy_sequence (str)); - shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte); + { + coding_free_composition_data (coding); + goto no_need_of_encoding; + } + shrinked_bytes = from + (SBYTES (str) - to_byte); } len = encoding_buffer_size (coding, to_byte - from); @@ -6082,7 +6427,7 @@ encode_coding_string (str, coding, nocopy) consumed = consumed_char = produced = produced_char = 0; while (1) { - result = encode_coding (coding, XSTRING (str)->data + from + consumed, + result = encode_coding (coding, SDATA (str) + from + consumed, buf.data + produced, to_byte - from - consumed, buf.size - produced); consumed += coding->consumed; @@ -6090,6 +6435,7 @@ encode_coding_string (str, coding, nocopy) produced += coding->produced; produced_char += coding->produced_char; if (result == CODING_FINISH_NORMAL + || result == CODING_FINISH_INTERRUPT || (result == CODING_FINISH_INSUFFICIENT_SRC && coding->consumed == 0)) break; @@ -6104,17 +6450,36 @@ encode_coding_string (str, coding, nocopy) newstr = make_uninit_string (produced + shrinked_bytes); if (from > 0) - bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from); - bcopy (buf.data, XSTRING (newstr)->data + from, produced); + STRING_COPYIN (newstr, 0, SDATA (str), from); + STRING_COPYIN (newstr, from, buf.data, produced); if (shrinked_bytes > from) - bcopy (XSTRING (str)->data + to_byte, - XSTRING (newstr)->data + from + produced, - shrinked_bytes - from); + STRING_COPYIN (newstr, from + produced, + SDATA (str) + to_byte, + shrinked_bytes - from); free_conversion_buffer (&buf); coding_free_composition_data (coding); return newstr; + + no_need_of_encoding: + coding->consumed = SBYTES (str); + coding->consumed_char = SCHARS (str); + if (STRING_MULTIBYTE (str)) + { + if (nocopy) + /* We are sure that STR doesn't contain a multibyte + character. */ + STRING_SET_UNIBYTE (str); + else + { + str = Fstring_as_unibyte (str); + nocopy = 1; + } + } + coding->produced = SBYTES (str); + coding->produced_char = SCHARS (str); + return (nocopy ? str : Fcopy_sequence (str)); } @@ -6132,6 +6497,8 @@ about coding-system objects. */) return Qt; if (!SYMBOLP (obj)) return Qnil; + if (! NILP (Fget (obj, Qcoding_system_define_form))) + return Qt; /* Get coding-spec vector for OBJ. */ obj = Fget (obj, Qcoding_system); return ((VECTORP (obj) && XVECTOR (obj)->size == 5) @@ -6150,7 +6517,7 @@ DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system, val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil, Qt, Qnil, Qcoding_system_history, Qnil, Qnil); } - while (XSTRING (val)->size == 0); + while (SCHARS (val) == 0); return (Fintern (val, Qnil)); } @@ -6162,23 +6529,30 @@ If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */ { Lisp_Object val; if (SYMBOLP (default_coding_system)) - XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name); + default_coding_system = SYMBOL_NAME (default_coding_system); val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil, Qt, Qnil, Qcoding_system_history, default_coding_system, Qnil); - return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil)); + return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil)); } DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system, 1, 1, 0, doc: /* Check validity of CODING-SYSTEM. If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. -It is valid if it is a symbol with a non-nil `coding-system' property. -The value of property should be a vector of length 5. */) +It is valid if it is nil or a symbol with a non-nil `coding-system' property. +The value of this property should be a vector of length 5. */) (coding_system) Lisp_Object coding_system; { - CHECK_SYMBOL (coding_system); + Lisp_Object define_form; + + define_form = Fget (coding_system, Qcoding_system_define_form); + if (! NILP (define_form)) + { + Fput (coding_system, Qcoding_system_define_form, Qnil); + safe_eval (define_form); + } if (!NILP (Fcoding_system_p (coding_system))) return coding_system; while (1) @@ -6187,7 +6561,7 @@ The value of property should be a vector of length 5. */) Lisp_Object detect_coding_system (src, src_bytes, highest, multibytep) - unsigned char *src; + const unsigned char *src; int src_bytes, highest; int multibytep; { @@ -6250,8 +6624,11 @@ detect_coding_system (src, src_bytes, highest, multibytep) DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, 2, 3, 0, - doc: /* Detect coding system of the text in the region between START and END. -Return a list of possible coding systems ordered by priority. + doc: /* Detect how the byte sequence in the region is encoded. +Return a list of possible coding systems used on decoding a byte +sequence containing the bytes in the region between START and END when +the coding system `undecided' is specified. The list is ordered by +priority decided in the current language environment. If only ASCII characters are found, it returns a list of single element `undecided' or its subsidiary coding system according to a detected @@ -6280,7 +6657,7 @@ highest priority. */) the detecting source. Then code detectors can handle the tailing byte sequence more accurately. - Fix me: This is not an perfect solution. It is better that we + Fix me: This is not a perfect solution. It is better that we add one more argument, say LAST_BLOCK, to all detect_coding_XXX. */ if (to == Z || (to == GPT && GAP_SIZE > 0)) @@ -6294,8 +6671,11 @@ highest priority. */) DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string, 1, 2, 0, - doc: /* Detect coding system of the text in STRING. -Return a list of possible coding systems ordered by priority. + doc: /* Detect how the byte sequence in STRING is encoded. +Return a list of possible coding systems used on decoding a byte +sequence containing the bytes in STRING when the coding system +`undecided' is specified. The list is ordered by priority decided in +the current language environment. If only ASCII characters are found, it returns a list of single element `undecided' or its subsidiary coding system according to a detected @@ -6308,42 +6688,25 @@ highest priority. */) { CHECK_STRING (string); - return detect_coding_system (XSTRING (string)->data, + return detect_coding_system (SDATA (string), /* "+ 1" is to include the anchor byte `\0'. With this, code detectors can handle the tailing bytes more accurately. */ - STRING_BYTES (XSTRING (string)) + 1, + SBYTES (string) + 1, !NILP (highest), STRING_MULTIBYTE (string)); } -/* Return an intersection of lists L1 and L2. */ - -static Lisp_Object -intersection (l1, l2) - Lisp_Object l1, l2; -{ - Lisp_Object val; - - for (val = Qnil; CONSP (l1); l1 = XCDR (l1)) - { - if (!NILP (Fmemq (XCAR (l1), l2))) - val = Fcons (XCAR (l1), val); - } - return val; -} - - -/* Subroutine for Fsafe_coding_systems_region_internal. +/* Subroutine for Ffind_coding_systems_region_internal. Return a list of coding systems that safely encode the multibyte - text between P and PEND. SAFE_CODINGS, if non-nil, is a list of + text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of possible coding systems. If it is nil, it means that we have not yet found any coding systems. - WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An - element of WORK_TABLE is set to t once the element is looked up. + WORK_TABLE a char-table of which element is set to t once the + element is looked up. If a non-ASCII single byte char is found, set *single_byte_char_found to 1. */ @@ -6354,9 +6717,12 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) Lisp_Object safe_codings, work_table; int *single_byte_char_found; { - int c, len, idx; - Lisp_Object val; + int c, len; + Lisp_Object val, ch; + Lisp_Object prev, tail; + if (NILP (safe_codings)) + goto done_safe_codings; while (p < pend) { c = STRING_CHAR_AND_LENGTH (p, pend - p, len); @@ -6366,31 +6732,113 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) continue; if (SINGLE_BYTE_CHAR_P (c)) *single_byte_char_found = 1; - if (NILP (safe_codings)) - continue; /* Check the safe coding systems for C. */ - val = char_table_ref_and_index (work_table, c, &idx); + ch = make_number (c); + val = Faref (work_table, ch); if (EQ (val, Qt)) /* This element was already checked. Ignore it. */ continue; /* Remember that we checked this element. */ - CHAR_TABLE_SET (work_table, make_number (idx), Qt); + Faset (work_table, ch, Qt); - /* If there are some safe coding systems for C and we have - already found the other set of coding systems for the - different characters, get the intersection of them. */ - if (!EQ (safe_codings, Qt) && !NILP (val)) - val = intersection (safe_codings, val); - safe_codings = val; + for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail)) + { + Lisp_Object elt, translation_table, hash_table, accept_latin_extra; + int encodable; + + elt = XCAR (tail); + if (CONSP (XCDR (elt))) + { + /* This entry has this format now: + ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE + ACCEPT-LATIN-EXTRA ) */ + val = XCDR (elt); + encodable = ! NILP (Faref (XCAR (val), ch)); + if (! encodable) + { + val = XCDR (val); + translation_table = XCAR (val); + hash_table = XCAR (XCDR (val)); + accept_latin_extra = XCAR (XCDR (XCDR (val))); + } + } + else + { + /* This entry has this format now: ( CODING . SAFE-CHARS) */ + encodable = ! NILP (Faref (XCDR (elt), ch)); + if (! encodable) + { + /* Transform the format to: + ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE + ACCEPT-LATIN-EXTRA ) */ + val = Fget (XCAR (elt), Qcoding_system); + translation_table + = Fplist_get (AREF (val, 3), + Qtranslation_table_for_encode); + if (SYMBOLP (translation_table)) + translation_table = Fget (translation_table, + Qtranslation_table); + hash_table + = (CHAR_TABLE_P (translation_table) + ? XCHAR_TABLE (translation_table)->extras[1] + : Qnil); + accept_latin_extra + = ((EQ (AREF (val, 0), make_number (2)) + && VECTORP (AREF (val, 4))) + ? AREF (AREF (val, 4), 16) + : Qnil); + XSETCAR (tail, list5 (XCAR (elt), XCDR (elt), + translation_table, hash_table, + accept_latin_extra)); + } + } + + if (! encodable + && ((CHAR_TABLE_P (translation_table) + && ! NILP (Faref (translation_table, ch))) + || (HASH_TABLE_P (hash_table) + && ! NILP (Fgethash (ch, hash_table, Qnil))) + || (SINGLE_BYTE_CHAR_P (c) + && ! NILP (accept_latin_extra) + && VECTORP (Vlatin_extra_code_table) + && ! NILP (AREF (Vlatin_extra_code_table, c))))) + encodable = 1; + if (encodable) + prev = tail; + else + { + /* Exclude this coding system from SAFE_CODINGS. */ + if (EQ (tail, safe_codings)) + { + safe_codings = XCDR (safe_codings); + if (NILP (safe_codings)) + goto done_safe_codings; + } + else + XSETCDR (prev, XCDR (tail)); + } + } } + + done_safe_codings: + /* If the above loop was terminated before P reaches PEND, it means + SAFE_CODINGS was set to nil. If we have not yet found an + non-ASCII single-byte char, check it now. */ + if (! *single_byte_char_found) + while (p < pend) + { + c = STRING_CHAR_AND_LENGTH (p, pend - p, len); + p += len; + if (! ASCII_BYTE_P (c) + && SINGLE_BYTE_CHAR_P (c)) + { + *single_byte_char_found = 1; + break; + } + } return safe_codings; } - -/* Return a list of coding systems that safely encode the text between - START and END. If the text contains only ASCII or is unibyte, - return t. */ - DEFUN ("find-coding-systems-region-internal", Ffind_coding_systems_region_internal, Sfind_coding_systems_region_internal, 2, 2, 0, @@ -6401,15 +6849,15 @@ DEFUN ("find-coding-systems-region-internal", Lisp_Object work_table, safe_codings; int non_ascii_p = 0; int single_byte_char_found = 0; - unsigned char *p1, *p1end, *p2, *p2end, *p; + const unsigned char *p1, *p1end, *p2, *p2end, *p; if (STRINGP (start)) { if (!STRING_MULTIBYTE (start)) return Qt; - p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start)); + p1 = SDATA (start), p1end = p1 + SBYTES (start); p2 = p2end = p1end; - if (XSTRING (start)->size != STRING_BYTES (XSTRING (start))) + if (SCHARS (start) != SBYTES (start)) non_ascii_p = 1; } else @@ -6449,29 +6897,175 @@ DEFUN ("find-coding-systems-region-internal", } /* The text contains non-ASCII characters. */ - work_table = Fcopy_sequence (Vchar_coding_system_table); - safe_codings = find_safe_codings (p1, p1end, Qt, work_table, + + work_table = Fmake_char_table (Qchar_coding_system, Qnil); + safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars)); + + safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table, &single_byte_char_found); if (p2 < p2end) safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table, &single_byte_char_found); + if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars))) + safe_codings = Qt; + else + { + /* Turn safe_codings to a list of coding systems... */ + Lisp_Object val; - if (EQ (safe_codings, Qt)) - ; /* Nothing to be done. */ - else if (!single_byte_char_found) + if (single_byte_char_found) + /* ... and append these for eight-bit chars. */ + val = Fcons (Qraw_text, + Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil))); + else + /* ... and append generic coding systems. */ + val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars)); + + for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings)) + val = Fcons (XCAR (XCAR (safe_codings)), val); + safe_codings = val; + } + + return safe_codings; +} + + +/* Search from position POS for such characters that are unencodable + accoding to SAFE_CHARS, and return a list of their positions. P + points where in the memory the character at POS exists. Limit the + search at PEND or when Nth unencodable characters are found. + + If SAFE_CHARS is a char table, an element for an unencodable + character is nil. + + If SAFE_CHARS is nil, all non-ASCII characters are unencodable. + + Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and + eight-bit-graphic characters are unencodable. */ + +static Lisp_Object +unencodable_char_position (safe_chars, pos, p, pend, n) + Lisp_Object safe_chars; + int pos; + unsigned char *p, *pend; + int n; +{ + Lisp_Object pos_list; + + pos_list = Qnil; + while (p < pend) { - /* Append generic coding systems. */ - Lisp_Object args[2]; - args[0] = safe_codings; - args[1] = Fchar_table_extra_slot (Vchar_coding_system_table, - make_number (0)); - safe_codings = Fappend (2, args); + int len; + int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len); + + if (c >= 128 + && (CHAR_TABLE_P (safe_chars) + ? NILP (CHAR_TABLE_REF (safe_chars, c)) + : (NILP (safe_chars) || c < 256))) + { + pos_list = Fcons (make_number (pos), pos_list); + if (--n <= 0) + break; + } + pos++; + p += len; + } + return Fnreverse (pos_list); +} + + +DEFUN ("unencodable-char-position", Funencodable_char_position, + Sunencodable_char_position, 3, 5, 0, + doc: /* +Return position of first un-encodable character in a region. +START and END specfiy the region and CODING-SYSTEM specifies the +encoding to check. Return nil if CODING-SYSTEM does encode the region. + +If optional 4th argument COUNT is non-nil, it specifies at most how +many un-encodable characters to search. In this case, the value is a +list of positions. + +If optional 5th argument STRING is non-nil, it is a string to search +for un-encodable characters. In that case, START and END are indexes +to the string. */) + (start, end, coding_system, count, string) + Lisp_Object start, end, coding_system, count, string; +{ + int n; + Lisp_Object safe_chars; + struct coding_system coding; + Lisp_Object positions; + int from, to; + unsigned char *p, *pend; + + if (NILP (string)) + { + validate_region (&start, &end); + from = XINT (start); + to = XINT (end); + if (NILP (current_buffer->enable_multibyte_characters)) + return Qnil; + p = CHAR_POS_ADDR (from); + if (to == GPT) + pend = GPT_ADDR; + else + pend = CHAR_POS_ADDR (to); } else - safe_codings = Fcons (Qraw_text, - Fcons (Qemacs_mule, - Fcons (Qno_conversion, safe_codings))); - return safe_codings; + { + CHECK_STRING (string); + CHECK_NATNUM (start); + CHECK_NATNUM (end); + from = XINT (start); + to = XINT (end); + if (from > to + || to > SCHARS (string)) + args_out_of_range_3 (string, start, end); + if (! STRING_MULTIBYTE (string)) + return Qnil; + p = SDATA (string) + string_char_to_byte (string, from); + pend = SDATA (string) + string_char_to_byte (string, to); + } + + setup_coding_system (Fcheck_coding_system (coding_system), &coding); + + if (NILP (count)) + n = 1; + else + { + CHECK_NATNUM (count); + n = XINT (count); + } + + if (coding.type == coding_type_no_conversion + || coding.type == coding_type_raw_text) + return Qnil; + + if (coding.type == coding_type_undecided) + safe_chars = Qnil; + else + safe_chars = coding_safe_chars (coding_system); + + if (STRINGP (string) + || from >= GPT || to <= GPT) + positions = unencodable_char_position (safe_chars, from, p, pend, n); + else + { + Lisp_Object args[2]; + + args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n); + n -= XINT (Flength (args[0])); + if (n <= 0) + positions = args[0]; + else + { + args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR, + pend, n); + positions = Fappend (2, args); + } + } + + return (NILP (count) ? Fcar (positions) : positions); } @@ -6495,7 +7089,7 @@ code_convert_region1 (start, end, coding_system, encodep) return make_number (to - from); if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) - error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); + error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system))); coding.mode |= CODING_MODE_LAST_BLOCK; coding.src_multibyte = coding.dst_multibyte @@ -6550,7 +7144,7 @@ code_convert_string1 (string, coding_system, nocopy, encodep) return (NILP (nocopy) ? Fcopy_sequence (string) : string); if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) - error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); + error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system))); coding.mode |= CODING_MODE_LAST_BLOCK; string = (encodep @@ -6609,7 +7203,7 @@ code_convert_string_norecord (string, coding_system, encodep) return string; if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) - error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); + error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system))); coding.composing = COMPOSITION_DISABLED; coding.mode |= CODING_MODE_LAST_BLOCK; @@ -6737,8 +7331,7 @@ Return the corresponding character code in Big5. */) return val; } -DEFUN ("set-terminal-coding-system-internal", - Fset_terminal_coding_system_internal, +DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal, Sset_terminal_coding_system_internal, 1, 1, 0, doc: /* Internal use only. */) (coding_system) @@ -6747,7 +7340,7 @@ DEFUN ("set-terminal-coding-system-internal", CHECK_SYMBOL (coding_system); setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); /* We had better not send unsafe characters to terminal. */ - terminal_coding.flags |= CODING_FLAG_ISO_SAFE; + terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR; /* Character composition should be disabled. */ terminal_coding.composing = COMPOSITION_DISABLED; /* Error notification should be suppressed. */ @@ -6757,8 +7350,7 @@ DEFUN ("set-terminal-coding-system-internal", return Qnil; } -DEFUN ("set-safe-terminal-coding-system-internal", - Fset_safe_terminal_coding_system_internal, +DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal, Sset_safe_terminal_coding_system_internal, 1, 1, 0, doc: /* Internal use only. */) (coding_system) @@ -6770,22 +7362,21 @@ DEFUN ("set-safe-terminal-coding-system-internal", /* Character composition should be disabled. */ safe_terminal_coding.composing = COMPOSITION_DISABLED; /* Error notification should be suppressed. */ - terminal_coding.suppress_error = 1; + safe_terminal_coding.suppress_error = 1; safe_terminal_coding.src_multibyte = 1; safe_terminal_coding.dst_multibyte = 0; return Qnil; } -DEFUN ("terminal-coding-system", - Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0, +DEFUN ("terminal-coding-system", Fterminal_coding_system, + Sterminal_coding_system, 0, 0, 0, doc: /* Return coding system specified for terminal output. */) () { return terminal_coding.symbol; } -DEFUN ("set-keyboard-coding-system-internal", - Fset_keyboard_coding_system_internal, +DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal, Sset_keyboard_coding_system_internal, 1, 1, 0, doc: /* Internal use only. */) (coding_system) @@ -6798,8 +7389,8 @@ DEFUN ("set-keyboard-coding-system-internal", return Qnil; } -DEFUN ("keyboard-coding-system", - Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0, +DEFUN ("keyboard-coding-system", Fkeyboard_coding_system, + Skeyboard_coding_system, 0, 0, 0, doc: /* Return coding system specified for decoding keyboard input. */) () { @@ -6854,7 +7445,14 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */) error ("Invalid first argument"); if (nargs < 1 + XINT (target_idx)) error ("Too few arguments for operation: %s", - XSYMBOL (operation)->name->data); + SDATA (SYMBOL_NAME (operation))); + /* For write-region, if the 6th argument (i.e. VISIT, the 5th + argument to write-region) is string, it must be treated as a + target file name. */ + if (EQ (operation, Qwrite_region) + && nargs > 5 + && STRINGP (args[5])) + target_idx = make_number (4); target = args[XINT (target_idx) + 1]; if (!(STRINGP (target) || (EQ (operation, Qopen_network_stream) && INTEGERP (target)))) @@ -6964,6 +7562,40 @@ This function is internal use only. */) return Qnil; } +DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal, + Sdefine_coding_system_internal, 1, 1, 0, + doc: /* Register CODING-SYSTEM as a base coding system. +This function is internal use only. */) + (coding_system) + Lisp_Object coding_system; +{ + Lisp_Object safe_chars, slot; + + if (NILP (Fcheck_coding_system (coding_system))) + Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil)); + safe_chars = coding_safe_chars (coding_system); + if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars)) + error ("No valid safe-chars property for %s", + SDATA (SYMBOL_NAME (coding_system))); + if (EQ (safe_chars, Qt)) + { + if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars)))) + XSETCAR (Vcoding_system_safe_chars, + Fcons (coding_system, XCAR (Vcoding_system_safe_chars))); + } + else + { + slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars)); + if (NILP (slot)) + XSETCDR (Vcoding_system_safe_chars, + nconc2 (XCDR (Vcoding_system_safe_chars), + Fcons (Fcons (coding_system, safe_chars), Qnil))); + else + XSETCDR (slot, safe_chars); + } + return Qnil; +} + #endif /* emacs */ @@ -7034,6 +7666,9 @@ init_coding_once () void syms_of_coding () { + staticpro (&Vcode_conversion_workbuf_name); + Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*"); + Qtarget_idx = intern ("target-idx"); staticpro (&Qtarget_idx); @@ -7117,9 +7752,12 @@ syms_of_coding () } } + Vcoding_system_safe_chars = Fcons (Qnil, Qnil); + staticpro (&Vcoding_system_safe_chars); + Qtranslation_table = intern ("translation-table"); staticpro (&Qtranslation_table); - Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1)); + Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2)); Qtranslation_table_id = intern ("translation-table-id"); staticpro (&Qtranslation_table_id); @@ -7141,7 +7779,7 @@ syms_of_coding () But don't staticpro it here--that is done in alloc.c. */ Qchar_table_extra_slots = intern ("char-table-extra-slots"); Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0)); - Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2)); + Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0)); Qvalid_codes = intern ("valid-codes"); staticpro (&Qvalid_codes); @@ -7152,6 +7790,12 @@ syms_of_coding () Qraw_text = intern ("raw-text"); staticpro (&Qraw_text); + Qutf_8 = intern ("utf-8"); + staticpro (&Qutf_8); + + Qcoding_system_define_form = intern ("coding-system-define-form"); + staticpro (&Qcoding_system_define_form); + defsubr (&Scoding_system_p); defsubr (&Sread_coding_system); defsubr (&Sread_non_nil_coding_system); @@ -7159,6 +7803,7 @@ syms_of_coding () defsubr (&Sdetect_coding_region); defsubr (&Sdetect_coding_string); defsubr (&Sfind_coding_systems_region_internal); + defsubr (&Sunencodable_char_position); defsubr (&Sdecode_coding_region); defsubr (&Sencode_coding_region); defsubr (&Sdecode_coding_string); @@ -7175,6 +7820,7 @@ syms_of_coding () defsubr (&Sfind_operation_coding_system); defsubr (&Supdate_coding_systems_internal); defsubr (&Sset_coding_priority_internal); + defsubr (&Sdefine_coding_system_internal); DEFVAR_LISP ("coding-system-list", &Vcoding_system_list, doc: /* List of coding systems. @@ -7200,7 +7846,9 @@ updated by the functions `make-coding-system' and On detecting a coding system, Emacs tries code detection algorithms associated with each coding-category one by one in this order. When one algorithm agrees with a byte sequence of source text, the coding -system bound to the corresponding coding-category is selected. */); +system bound to the corresponding coding-category is selected. + +Don't modify this variable directly, but use `set-coding-priority'. */); { int i; @@ -7235,7 +7883,9 @@ the value of `buffer-file-coding-system' is used. */); Vcoding_system_for_write = Qnil; DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used, - doc: /* Coding system used in the latest file or process I/O. */); + doc: /* Coding system used in the latest file or process I/O. +Also set by `encode-coding-region', `decode-coding-region', +`encode-coding-string' and `decode-coding-string'. */); Vlast_coding_system_used = Qnil; DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion, @@ -7369,11 +8019,14 @@ coding system used in each operation can't encode the text. The default value is `select-safe-coding-system' (which see). */); Vselect_safe_coding_system_function = Qnil; - DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table, - doc: /* Char-table containing safe coding systems of each characters. -Each element doesn't include such generic coding systems that can -encode any characters. They are in the first extra slot. */); - Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil); + DEFVAR_BOOL ("coding-system-require-warning", + &coding_system_require_warning, + doc: /* Internal use only. +If non-nil, on writing a file, `select-safe-coding-system-function' is +called even if `coding-system-for-write' is non-nil. The command +`universal-coding-system-argument' binds this variable to t temporarily. */); + coding_system_require_warning = 0; + DEFVAR_BOOL ("inhibit-iso-escape-detection", &inhibit_iso_escape_detection, @@ -7401,6 +8054,12 @@ The other way to read escape sequences in a file without decoding is to explicitly specify some coding system that doesn't use ISO2022's escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); inhibit_iso_escape_detection = 0; + + DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input, + doc: /* Char table for translating self-inserting characters. +This is applied to the result of input methods, not their input. See also +`keyboard-translate-table'. */); + Vtranslation_table_for_input = Qnil; } char * @@ -7417,7 +8076,7 @@ emacs_strerror (error_number) Lisp_Object dec = code_convert_string_norecord (build_string (str), Vlocale_coding_system, 0); - str = (char *) XSTRING (dec)->data; + str = (char *) SDATA (dec); } return str; @@ -7425,3 +8084,5 @@ emacs_strerror (error_number) #endif /* emacs */ +/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d + (do not change this comment) */