X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/5fcd436bab49a3374be4571969abbcf02803bf71..a0ed9b27c3fab5e3c1f3c249a58a99fa2948f71f:/src/coding.c diff --git a/src/coding.c b/src/coding.c index c77aa338a1..91e8dd890b 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1,8 +1,8 @@ /* Coding system handler (conversion, detection, etc). Copyright (C) 2001, 2002, 2003, 2004, 2005, - 2006, 2007 Free Software Foundation, Inc. + 2006, 2007, 2008 Free Software Foundation, Inc. Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007 + 2005, 2006, 2007, 2008 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H14PRO021 Copyright (C) 2003 @@ -11,10 +11,10 @@ This file is part of GNU Emacs. -GNU Emacs is free software; you can redistribute it and/or modify +GNU Emacs is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. GNU Emacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -22,9 +22,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GNU Emacs; see the file COPYING. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. */ +along with GNU Emacs. If not, see . */ /*** TABLE OF CONTENTS *** @@ -625,6 +623,7 @@ enum coding_category | CATEGORY_MASK_ISO_7_ELSE \ | CATEGORY_MASK_ISO_8_ELSE \ | CATEGORY_MASK_UTF_8 \ + | CATEGORY_MASK_UTF_16_AUTO \ | CATEGORY_MASK_UTF_16_BE \ | CATEGORY_MASK_UTF_16_LE \ | CATEGORY_MASK_UTF_16_BE_NOSIG \ @@ -657,7 +656,8 @@ enum coding_category | CATEGORY_MASK_ISO_ELSE) #define CATEGORY_MASK_UTF_16 \ - (CATEGORY_MASK_UTF_16_BE \ + (CATEGORY_MASK_UTF_16_AUTO \ + | CATEGORY_MASK_UTF_16_BE \ | CATEGORY_MASK_UTF_16_LE \ | CATEGORY_MASK_UTF_16_BE_NOSIG \ | CATEGORY_MASK_UTF_16_LE_NOSIG) @@ -898,7 +898,7 @@ static INLINE void produce_charset P_ ((struct coding_system *, int *, static void produce_annotation P_ ((struct coding_system *, EMACS_INT)); static int decode_coding P_ ((struct coding_system *)); static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT, - struct coding_system *, + struct coding_system *, int *, EMACS_INT *)); static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT, struct coding_system *, @@ -955,6 +955,11 @@ record_conversion_result (struct coding_system *coding, } while (0) +/* If there are at least BYTES length of room at dst, allocate memory + for coding->destination and update dst and dst_end. We don't have + to take care of coding->source which will be relocated. It is + handled by calling coding_set_source in encode_coding. */ + #define ASSURE_DESTINATION(bytes) \ do { \ if (dst + (bytes) >= dst_end) \ @@ -967,6 +972,66 @@ record_conversion_result (struct coding_system *coding, } while (0) +/* Store multibyte form of the character C in P, and advance P to the + end of the multibyte form. This is like CHAR_STRING_ADVANCE but it + never calls MAYBE_UNIFY_CHAR. */ + +#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \ + do { \ + if ((c) <= MAX_1_BYTE_CHAR) \ + *(p)++ = (c); \ + else if ((c) <= MAX_2_BYTE_CHAR) \ + *(p)++ = (0xC0 | ((c) >> 6)), \ + *(p)++ = (0x80 | ((c) & 0x3F)); \ + else if ((c) <= MAX_3_BYTE_CHAR) \ + *(p)++ = (0xE0 | ((c) >> 12)), \ + *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \ + *(p)++ = (0x80 | ((c) & 0x3F)); \ + else if ((c) <= MAX_4_BYTE_CHAR) \ + *(p)++ = (0xF0 | (c >> 18)), \ + *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \ + *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \ + *(p)++ = (0x80 | (c & 0x3F)); \ + else if ((c) <= MAX_5_BYTE_CHAR) \ + *(p)++ = 0xF8, \ + *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \ + *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \ + *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \ + *(p)++ = (0x80 | (c & 0x3F)); \ + else \ + (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \ + } while (0) + + +/* Return the character code of character whose multibyte form is at + P, and advance P to the end of the multibyte form. This is like + STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */ + +#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \ + (!((p)[0] & 0x80) \ + ? *(p)++ \ + : ! ((p)[0] & 0x20) \ + ? ((p) += 2, \ + ((((p)[-2] & 0x1F) << 6) \ + | ((p)[-1] & 0x3F) \ + | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \ + : ! ((p)[0] & 0x10) \ + ? ((p) += 3, \ + ((((p)[-3] & 0x0F) << 12) \ + | (((p)[-2] & 0x3F) << 6) \ + | ((p)[-1] & 0x3F))) \ + : ! ((p)[0] & 0x08) \ + ? ((p) += 4, \ + ((((p)[-4] & 0xF) << 18) \ + | (((p)[-3] & 0x3F) << 12) \ + | (((p)[-2] & 0x3F) << 6) \ + | ((p)[-1] & 0x3F))) \ + : ((p) += 5, \ + ((((p)[-4] & 0x3F) << 18) \ + | (((p)[-3] & 0x3F) << 12) \ + | (((p)[-2] & 0x3F) << 6) \ + | ((p)[-1] & 0x3F)))) + static void coding_set_source (coding) @@ -999,7 +1064,7 @@ coding_set_destination (coding) { if (coding->src_pos < 0) { - coding->destination = BEG_ADDR + coding->dst_pos_byte - 1; + coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE; coding->dst_bytes = (GAP_END_ADDR - (coding->src_bytes - coding->consumed) - coding->destination); @@ -1009,7 +1074,7 @@ coding_set_destination (coding) /* We are sure that coding->dst_pos_byte is before the gap of the buffer. */ coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object)) - + coding->dst_pos_byte - 1); + + coding->dst_pos_byte - BEG_BYTE); coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object)) - coding->destination); } @@ -1032,20 +1097,23 @@ coding_alloc_by_realloc (coding, bytes) } static void -coding_alloc_by_making_gap (coding, offset, bytes) +coding_alloc_by_making_gap (coding, gap_head_used, bytes) struct coding_system *coding; - EMACS_INT offset, bytes; + EMACS_INT gap_head_used, bytes; { - if (BUFFERP (coding->dst_object) - && EQ (coding->src_object, coding->dst_object)) + if (EQ (coding->src_object, coding->dst_object)) { - EMACS_INT add = offset + (coding->src_bytes - coding->consumed); + /* The gap may contain the produced data at the head and not-yet + consumed data at the tail. To preserve those data, we at + first make the gap size to zero, then increase the gap + size. */ + EMACS_INT add = GAP_SIZE; - GPT += offset, GPT_BYTE += offset; - GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; + GPT += gap_head_used, GPT_BYTE += gap_head_used; + GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; make_gap (bytes); GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; - GPT -= offset, GPT_BYTE -= offset; + GPT -= gap_head_used, GPT_BYTE -= gap_head_used; } else { @@ -1068,7 +1136,11 @@ alloc_destination (coding, nbytes, dst) EMACS_INT offset = dst - coding->destination; if (BUFFERP (coding->dst_object)) - coding_alloc_by_making_gap (coding, offset, nbytes); + { + struct buffer *buf = XBUFFER (coding->dst_object); + + coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes); + } else coding_alloc_by_realloc (coding, nbytes); record_conversion_result (coding, CODING_RESULT_SUCCESS); @@ -1225,6 +1297,8 @@ decode_coding_utf_8 (coding) int consumed_chars = 0, consumed_chars_base; int multibytep = coding->src_multibyte; Lisp_Object attr, charset_list; + int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); + int byte_after_cr = -1; CODING_GET_INFO (coding, attr, charset_list); @@ -1238,13 +1312,18 @@ decode_coding_utf_8 (coding) if (charbuf >= charbuf_end) break; - ONE_MORE_BYTE (c1); + if (byte_after_cr >= 0) + c1 = byte_after_cr, byte_after_cr = -1; + else + ONE_MORE_BYTE (c1); if (c1 < 0) { c = - c1; } else if (UTF_8_1_OCTET_P(c1)) { + if (eol_crlf && c1 == '\r') + ONE_MORE_BYTE (byte_after_cr); c = c1; } else @@ -1353,7 +1432,7 @@ encode_coding_utf_8 (coding) } else { - CHAR_STRING_ADVANCE (c, pend); + CHAR_STRING_ADVANCE_NO_UNIFY (c, pend); for (p = str; p < pend; p++) EMIT_ONE_BYTE (*p); } @@ -1370,7 +1449,7 @@ encode_coding_utf_8 (coding) if (CHAR_BYTE8_P (c)) *dst++ = CHAR_TO_BYTE8 (c); else - dst += CHAR_STRING (c, dst); + CHAR_STRING_ADVANCE_NO_UNIFY (c, dst); produced_chars++; } } @@ -1434,11 +1513,44 @@ detect_coding_utf_16 (coding, detect_info) | CATEGORY_MASK_UTF_16_BE_NOSIG | CATEGORY_MASK_UTF_16_LE_NOSIG); } - else if (c1 >= 0 && c2 >= 0) + else { + /* We check the dispersion of Eth and Oth bytes where E is even and + O is odd. If both are high, we assume binary data.*/ + unsigned char e[256], o[256]; + unsigned e_num = 1, o_num = 1; + + memset (e, 0, 256); + memset (o, 0, 256); + e[c1] = 1; + o[c2] = 1; + detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE); + + while (1) + { + ONE_MORE_BYTE (c1); + ONE_MORE_BYTE (c2); + if (! e[c1]) + { + e[c1] = 1; + e_num++; + if (e_num >= 128) + break; + } + if (! o[c2]) + { + o[c1] = 1; + o_num++; + if (o_num >= 128) + break; + } + } + detect_info->rejected |= CATEGORY_MASK_UTF_16; + return 0; } + no_more_source: return 1; } @@ -1458,6 +1570,8 @@ decode_coding_utf_16 (coding) enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); int surrogate = CODING_UTF_16_SURROGATE (coding); Lisp_Object attr, charset_list; + int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); + int byte_after_cr1 = -1, byte_after_cr2 = -1; CODING_GET_INFO (coding, attr, charset_list); @@ -1497,13 +1611,19 @@ decode_coding_utf_16 (coding) if (charbuf + 2 >= charbuf_end) break; - ONE_MORE_BYTE (c1); + if (byte_after_cr1 >= 0) + c1 = byte_after_cr1, byte_after_cr1 = -1; + else + ONE_MORE_BYTE (c1); if (c1 < 0) { *charbuf++ = -c1; continue; } - ONE_MORE_BYTE (c2); + if (byte_after_cr2 >= 0) + c2 = byte_after_cr2, byte_after_cr2 = -1; + else + ONE_MORE_BYTE (c2); if (c2 < 0) { *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); @@ -1512,6 +1632,7 @@ decode_coding_utf_16 (coding) } c = (endian == utf_16_big_endian ? ((c1 << 8) | c2) : ((c2 << 8) | c1)); + if (surrogate) { if (! UTF_16_LOW_SURROGATE_P (c)) @@ -1540,7 +1661,14 @@ decode_coding_utf_16 (coding) if (UTF_16_HIGH_SURROGATE_P (c)) CODING_UTF_16_SURROGATE (coding) = surrogate = c; else - *charbuf++ = c; + { + if (eol_crlf && c == '\r') + { + ONE_MORE_BYTE (byte_after_cr1); + ONE_MORE_BYTE (byte_after_cr2); + } + *charbuf++ = c; + } } } @@ -1711,7 +1839,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id) { if (c >= 0xA0) { - /* Old style component character of a compostion. */ + /* Old style component character of a composition. */ if (c == 0xA0) { ONE_MORE_BYTE (c); @@ -1831,7 +1959,7 @@ detect_coding_emacs_mule (coding, detect_info) /* Perhaps the start of composite character. We simple skip it because analyzing it is too heavy for detecting. But, at least, we check that the composite character - constitues of more than 4 bytes. */ + constitutes of more than 4 bytes. */ const unsigned char *src_base; repeat: @@ -1898,7 +2026,7 @@ detect_coding_emacs_mule (coding, detect_info) value 0. */ #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \ - if (1) \ + do \ { \ int c; \ int nbytes, nchars; \ @@ -1916,7 +2044,7 @@ detect_coding_emacs_mule (coding, detect_info) src += nbytes; \ consumed_chars += nchars; \ } \ - else + while (0) /* Decode a composition rule represented as a component of composition @@ -2072,6 +2200,8 @@ decode_coding_emacs_mule (coding) int char_offset = coding->produced_char; int last_offset = char_offset; int last_id = charset_ascii; + int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); + int byte_after_cr = -1; CODING_GET_INFO (coding, attrs, charset_list); @@ -2085,7 +2215,10 @@ decode_coding_emacs_mule (coding) if (charbuf >= charbuf_end) break; - ONE_MORE_BYTE (c); + if (byte_after_cr >= 0) + c = byte_after_cr, byte_after_cr = -1; + else + ONE_MORE_BYTE (c); if (c < 0) { *charbuf++ = -c; @@ -2093,6 +2226,8 @@ decode_coding_emacs_mule (coding) } else if (c < 0x80) { + if (eol_crlf && c == '\r') + ONE_MORE_BYTE (byte_after_cr); *charbuf++ = c; char_offset++; } @@ -2558,6 +2693,8 @@ detect_coding_iso_2022 (coding, detect_info) struct coding_system *this = &(coding_categories[i]); Lisp_Object attrs, val; + if (this->id < 0) + continue; attrs = CODING_ID_ATTRS (this->id); if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list)) @@ -2945,6 +3082,8 @@ decode_coding_iso_2022 (coding) int char_offset = coding->produced_char; int last_offset = char_offset; int last_id = charset_ascii; + int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); + int byte_after_cr = -1; CODING_GET_INFO (coding, attrs, charset_list); setup_iso_safe_charsets (attrs); @@ -2962,7 +3101,10 @@ decode_coding_iso_2022 (coding) if (charbuf >= charbuf_end) break; - ONE_MORE_BYTE (c1); + if (byte_after_cr >= 0) + c1 = byte_after_cr, byte_after_cr = -1; + else + ONE_MORE_BYTE (c1); if (c1 < 0) goto invalid_code; @@ -3021,6 +3163,8 @@ decode_coding_iso_2022 (coding) break; case ISO_control_0: + if (eol_crlf && c1 == '\r') + ONE_MORE_BYTE (byte_after_cr); MAYBE_FINISH_COMPOSITION (); charset = CHARSET_FROM_ID (charset_ascii); break; @@ -4091,6 +4235,8 @@ decode_coding_sjis (coding) int char_offset = coding->produced_char; int last_offset = char_offset; int last_id = charset_ascii; + int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); + int byte_after_cr = -1; CODING_GET_INFO (coding, attrs, charset_list); @@ -4111,11 +4257,18 @@ decode_coding_sjis (coding) if (charbuf >= charbuf_end) break; - ONE_MORE_BYTE (c); + if (byte_after_cr >= 0) + c = byte_after_cr, byte_after_cr = -1; + else + ONE_MORE_BYTE (c); if (c < 0) goto invalid_code; if (c < 0x80) - charset = charset_roman; + { + if (eol_crlf && c == '\r') + ONE_MORE_BYTE (byte_after_cr); + charset = charset_roman; + } else if (c == 0x80 || c == 0xA0) goto invalid_code; else if (c >= 0xA1 && c <= 0xDF) @@ -4193,6 +4346,8 @@ decode_coding_big5 (coding) int char_offset = coding->produced_char; int last_offset = char_offset; int last_id = charset_ascii; + int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); + int byte_after_cr = -1; CODING_GET_INFO (coding, attrs, charset_list); val = charset_list; @@ -4210,12 +4365,19 @@ decode_coding_big5 (coding) if (charbuf >= charbuf_end) break; - ONE_MORE_BYTE (c); + if (byte_after_cr >= 0) + c = byte_after_cr, byte_after_cr = -1; + else + ONE_MORE_BYTE (c); if (c < 0) goto invalid_code; if (c < 0x80) - charset = charset_roman; + { + if (eol_crlf && c == '\r') + ONE_MORE_BYTE (byte_after_cr); + charset = charset_roman; + } else { /* BIG5 -> Big5 */ @@ -4590,7 +4752,7 @@ encode_coding_ccl (coding) else { ASSURE_DESTINATION (ccl.produced); - for (i = 0; i < ccl.produced; i++) + for (i = 0; i < ccl.produced; i++) *dst++ = destination_charbuf[i] & 0xFF; produced_chars += ccl.produced; } @@ -4632,10 +4794,19 @@ static void decode_coding_raw_text (coding) struct coding_system *coding; { + int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); + coding->chars_at_source = 1; - coding->consumed_char = 0; - coding->consumed = 0; - record_conversion_result (coding, CODING_RESULT_SUCCESS); + coding->consumed_char = coding->src_chars; + coding->consumed = coding->src_bytes; + if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r') + { + coding->consumed_char--; + coding->consumed--; + record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC); + } + else + record_conversion_result (coding, CODING_RESULT_SUCCESS); } static int @@ -4702,7 +4873,6 @@ encode_coding_raw_text (coding) *dst++ = CHAR_TO_BYTE8 (c); else CHAR_STRING_ADVANCE (c, dst); - produced_chars++; } } else @@ -4710,8 +4880,8 @@ encode_coding_raw_text (coding) ASSURE_DESTINATION (charbuf_end - charbuf); while (charbuf < charbuf_end && dst < dst_end) *dst++ = *charbuf++; - produced_chars = dst - (coding->destination + coding->dst_bytes); } + produced_chars = charbuf - coding->charbuf; } record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->produced_char += produced_chars; @@ -4770,7 +4940,7 @@ detect_coding_charset (coding, detect_info) if (src == src_end) goto too_short; ONE_MORE_BYTE (c); - if (c < charset->code_space[(dim - 1 - idx) * 2] + if (c < charset->code_space[(dim - 1 - idx) * 2] || c > charset->code_space[(dim - 1 - idx) * 2 + 1]) break; } @@ -4829,6 +4999,8 @@ decode_coding_charset (coding) int char_offset = coding->produced_char; int last_offset = char_offset; int last_id = charset_ascii; + int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); + int byte_after_cr = -1; CODING_GET_INFO (coding, attrs, charset_list); valids = AREF (attrs, coding_attr_charset_valids); @@ -4848,7 +5020,17 @@ decode_coding_charset (coding) if (charbuf >= charbuf_end) break; - ONE_MORE_BYTE (c); + if (byte_after_cr >= 0) + { + c = byte_after_cr; + byte_after_cr = -1; + } + else + { + ONE_MORE_BYTE (c); + if (eol_crlf && c == '\r') + ONE_MORE_BYTE (byte_after_cr); + } if (c < 0) goto invalid_code; code = c; @@ -5529,32 +5711,53 @@ detect_coding (coding) { int c, i; struct coding_detection_info detect_info; + int null_byte_found = 0, eight_bit_found = 0; detect_info.checked = detect_info.found = detect_info.rejected = 0; - for (i = 0, src = coding->source; src < src_end; i++, src++) + coding->head_ascii = -1; + for (src = coding->source; src < src_end; src++) { c = *src; if (c & 0x80) - break; - if (c < 0x20 - && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) - && ! inhibit_iso_escape_detection - && ! detect_info.checked) { - coding->head_ascii = src - (coding->source + coding->consumed); - if (detect_coding_iso_2022 (coding, &detect_info)) + eight_bit_found = 1; + if (coding->head_ascii < 0) + coding->head_ascii = src - coding->source; + if (null_byte_found) + break; + } + else if (c < 0x20) + { + if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) + && ! inhibit_iso_escape_detection + && ! detect_info.checked) { - /* We have scanned the whole data. */ - if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) - /* We didn't find an 8-bit code. */ - src = src_end; - break; + if (coding->head_ascii < 0) + coding->head_ascii = src - coding->source; + if (detect_coding_iso_2022 (coding, &detect_info)) + { + /* We have scanned the whole data. */ + if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) + /* We didn't find an 8-bit code. We may have + found a null-byte, but it's very rare that + a binary file confirm to ISO-2022. */ + src = src_end; + break; + } + } + else if (! c) + { + null_byte_found = 1; + if (eight_bit_found) + break; } } } - coding->head_ascii = src - (coding->source + coding->consumed); + if (coding->head_ascii < 0) + coding->head_ascii = src - coding->source; - if (coding->head_ascii < coding->src_bytes + if (null_byte_found || eight_bit_found + || coding->head_ascii < coding->src_bytes || detect_info.found) { enum coding_category category; @@ -5570,48 +5773,58 @@ detect_coding (coding) break; } else - for (i = 0; i < coding_category_raw_text; i++) - { - category = coding_priorities[i]; - this = coding_categories + category; - if (this->id < 0) - { - /* No coding system of this category is defined. */ - detect_info.rejected |= (1 << category); - } - else if (category >= coding_category_raw_text) - continue; - else if (detect_info.checked & (1 << category)) - { - if (detect_info.found & (1 << category)) - break; - } - else if ((*(this->detector)) (coding, &detect_info) - && detect_info.found & (1 << category)) - { - if (category == coding_category_utf_16_auto) - { - if (detect_info.found & CATEGORY_MASK_UTF_16_LE) - category = coding_category_utf_16_le; - else - category = coding_category_utf_16_be; - } - break; - } - } - - if (i < coding_category_raw_text) - setup_coding_system (CODING_ID_NAME (this->id), coding); - else if (detect_info.rejected == CATEGORY_MASK_ANY) - setup_coding_system (Qraw_text, coding); - else if (detect_info.rejected) - for (i = 0; i < coding_category_raw_text; i++) - if (! (detect_info.rejected & (1 << coding_priorities[i]))) + { + if (null_byte_found) { - this = coding_categories + coding_priorities[i]; - setup_coding_system (CODING_ID_NAME (this->id), coding); - break; + detect_info.checked |= ~CATEGORY_MASK_UTF_16; + detect_info.rejected |= ~CATEGORY_MASK_UTF_16; } + for (i = 0; i < coding_category_raw_text; i++) + { + category = coding_priorities[i]; + this = coding_categories + category; + if (this->id < 0) + { + /* No coding system of this category is defined. */ + detect_info.rejected |= (1 << category); + } + else if (category >= coding_category_raw_text) + continue; + else if (detect_info.checked & (1 << category)) + { + if (detect_info.found & (1 << category)) + break; + } + else if ((*(this->detector)) (coding, &detect_info) + && detect_info.found & (1 << category)) + { + if (category == coding_category_utf_16_auto) + { + if (detect_info.found & CATEGORY_MASK_UTF_16_LE) + category = coding_category_utf_16_le; + else + category = coding_category_utf_16_be; + } + break; + } + } + + if (i < coding_category_raw_text) + setup_coding_system (CODING_ID_NAME (this->id), coding); + else if (null_byte_found) + setup_coding_system (Qno_conversion, coding); + else if ((detect_info.rejected & CATEGORY_MASK_ANY) + == CATEGORY_MASK_ANY) + setup_coding_system (Qraw_text, coding); + else if (detect_info.rejected) + for (i = 0; i < coding_category_raw_text; i++) + if (! (detect_info.rejected & (1 << coding_priorities[i]))) + { + this = coding_categories + coding_priorities[i]; + setup_coding_system (CODING_ID_NAME (this->id), coding); + break; + } + } } } else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) @@ -5641,7 +5854,7 @@ decode_eol (coding) { Lisp_Object eol_type; unsigned char *p, *pbeg, *pend; - + eol_type = CODING_ID_EOL_TYPE (coding->id); if (EQ (eol_type, Qunix)) return; @@ -5717,7 +5930,10 @@ decode_eol (coding) pos_end--; } pos++; - pos_byte += BYTES_BY_CHAR_HEAD (*p); + if (coding->dst_multibyte) + pos_byte += BYTES_BY_CHAR_HEAD (*p); + else + pos_byte++; } } coding->produced -= n; @@ -5877,19 +6093,21 @@ produce_chars (coding, translation_table, last_block) { unsigned char *dst = coding->destination + coding->produced; unsigned char *dst_end = coding->destination + coding->dst_bytes; - int produced; - int produced_chars = 0; + EMACS_INT produced; + EMACS_INT produced_chars = 0; int carryover = 0; if (! coding->chars_at_source) { - /* Characters are in coding->charbuf. */ + /* Source characters are in coding->charbuf. */ int *buf = coding->charbuf; int *buf_end = buf + coding->charbuf_used; - if (BUFFERP (coding->src_object) - && EQ (coding->src_object, coding->dst_object)) - dst_end = ((unsigned char *) coding->source) + coding->consumed; + if (EQ (coding->src_object, coding->dst_object)) + { + coding_set_source (coding); + dst_end = ((unsigned char *) coding->source) + coding->consumed; + } while (buf < buf_end) { @@ -5916,7 +6134,13 @@ produce_chars (coding, translation_table, last_block) buf_end - buf + MAX_MULTIBYTE_LENGTH * to_nchars, dst); - dst_end = coding->destination + coding->dst_bytes; + if (EQ (coding->src_object, coding->dst_object)) + { + coding_set_source (coding); + dst_end = ((unsigned char *) coding->source) + coding->consumed; + } + else + dst_end = coding->destination + coding->dst_bytes; } for (i = 0; i < to_nchars; i++) @@ -5925,7 +6149,7 @@ produce_chars (coding, translation_table, last_block) c = XINT (AREF (trans, i)); if (coding->dst_multibyte || ! CHAR_BYTE8_P (c)) - CHAR_STRING_ADVANCE (c, dst); + CHAR_STRING_ADVANCE_NO_UNIFY (c, dst); else *dst++ = CHAR_TO_BYTE8 (c); } @@ -5942,18 +6166,18 @@ produce_chars (coding, translation_table, last_block) } else { + /* Source characters are at coding->source. */ const unsigned char *src = coding->source; - const unsigned char *src_end = src + coding->src_bytes; - Lisp_Object eol_type; - - eol_type = CODING_ID_EOL_TYPE (coding->id); + const unsigned char *src_end = src + coding->consumed; + if (EQ (coding->dst_object, coding->src_object)) + dst_end = (unsigned char *) src; if (coding->src_multibyte != coding->dst_multibyte) { if (coding->src_multibyte) { int multibytep = 1; - int consumed_chars; + EMACS_INT consumed_chars; while (1) { @@ -5961,37 +6185,23 @@ produce_chars (coding, translation_table, last_block) int c; ONE_MORE_BYTE (c); - if (c == '\r') + if (dst == dst_end) { - if (EQ (eol_type, Qdos)) + if (EQ (coding->src_object, coding->dst_object)) + dst_end = (unsigned char *) src; + if (dst == dst_end) { - if (src == src_end) - { - record_conversion_result - (coding, CODING_RESULT_INSUFFICIENT_SRC); - goto no_more_source; - } - if (*src == '\n') - c = *src++; + EMACS_INT offset = src - coding->source; + + dst = alloc_destination (coding, src_end - src + 1, + dst); + dst_end = coding->destination + coding->dst_bytes; + coding_set_source (coding); + src = coding->source + offset; + src_end = coding->source + coding->src_bytes; + if (EQ (coding->src_object, coding->dst_object)) + dst_end = (unsigned char *) src; } - else if (EQ (eol_type, Qmac)) - c = '\n'; - } - if (dst == dst_end) - { - coding->consumed = src - coding->source; - - if (EQ (coding->src_object, coding->dst_object)) - dst_end = (unsigned char *) src; - if (dst == dst_end) - { - dst = alloc_destination (coding, src_end - src + 1, - dst); - dst_end = coding->destination + coding->dst_bytes; - coding_set_source (coding); - src = coding->source + coding->consumed; - src_end = coding->source + coding->src_bytes; - } } *dst++ = c; produced_chars++; @@ -6005,31 +6215,26 @@ produce_chars (coding, translation_table, last_block) int multibytep = 1; int c = *src++; - if (c == '\r') - { - if (EQ (eol_type, Qdos)) - { - if (src < src_end - && *src == '\n') - c = *src++; - } - else if (EQ (eol_type, Qmac)) - c = '\n'; - } if (dst >= dst_end - 1) { - coding->consumed = src - coding->source; - if (EQ (coding->src_object, coding->dst_object)) dst_end = (unsigned char *) src; if (dst >= dst_end - 1) { - dst = alloc_destination (coding, src_end - src + 2, - dst); + EMACS_INT offset = src - coding->source; + EMACS_INT more_bytes; + + if (EQ (coding->src_object, coding->dst_object)) + more_bytes = ((src_end - src) / 2) + 2; + else + more_bytes = src_end - src + 2; + dst = alloc_destination (coding, more_bytes, dst); dst_end = coding->destination + coding->dst_bytes; coding_set_source (coding); - src = coding->source + coding->consumed; + src = coding->source + offset; src_end = coding->source + coding->src_bytes; + if (EQ (coding->src_object, coding->dst_object)) + dst_end = (unsigned char *) src; } } EMIT_ONE_BYTE (c); @@ -6039,7 +6244,7 @@ produce_chars (coding, translation_table, last_block) { if (!EQ (coding->src_object, coding->dst_object)) { - int require = coding->src_bytes - coding->dst_bytes; + EMACS_INT require = coding->src_bytes - coding->dst_bytes; if (require > 0) { @@ -6051,28 +6256,10 @@ produce_chars (coding, translation_table, last_block) src_end = coding->source + coding->src_bytes; } } - produced_chars = coding->src_chars; + produced_chars = coding->consumed_char; while (src < src_end) - { - int c = *src++; - - if (c == '\r') - { - if (EQ (eol_type, Qdos)) - { - if (src < src_end - && *src == '\n') - c = *src++; - produced_chars--; - } - else if (EQ (eol_type, Qmac)) - c = '\n'; - } - *dst++ = c; - } + *dst++ = *src++; } - coding->consumed = coding->src_bytes; - coding->consumed_char = coding->src_chars; } produced = dst - (coding->destination + coding->produced); @@ -6535,12 +6722,12 @@ consume_chars (coding, translation_table, max_lookup) if (coding->encoder == encode_coding_raw_text) c = *src++, pos++; else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0) - c = STRING_CHAR_ADVANCE (src), pos += bytes; + c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes; else c = BYTE8_TO_CHAR (*src), src++, pos++; } else - c = STRING_CHAR_ADVANCE (src), pos++; + c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++; if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) c = '\n'; if (! EQ (eol_type, Qunix)) @@ -6690,7 +6877,7 @@ make_conversion_work_buffer (multibyte) } current = current_buffer; set_buffer_internal (XBUFFER (workbuf)); - Ferase_buffer (); + Ferase_buffer (); current_buffer->undo_list = Qt; current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil; set_buffer_internal (current); @@ -6849,10 +7036,11 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, EMACS_INT chars = to - from; EMACS_INT bytes = to_byte - from_byte; Lisp_Object attrs; - Lisp_Object buffer; int saved_pt = -1, saved_pt_byte; + int need_marker_adjustment = 0; + Lisp_Object old_deactivate_mark; - buffer = Fcurrent_buffer (); + old_deactivate_mark = Vdeactivate_mark; if (NILP (dst_object)) { @@ -6877,8 +7065,17 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, move_gap_both (from, from_byte); if (EQ (src_object, dst_object)) { + struct Lisp_Marker *tail; + + for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next) + { + tail->need_adjustment + = tail->charpos == (tail->insertion_type ? from : to); + need_marker_adjustment |= tail->need_adjustment; + } saved_pt = PT, saved_pt_byte = PT_BYTE; TEMP_SET_PT_BOTH (from, from_byte); + current_buffer->text->inhibit_shrinking = 1; del_range_both (from, from_byte, to, to_byte, 1); coding->src_pos = -chars; coding->src_pos_byte = -bytes; @@ -6898,10 +7095,10 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, || (! NILP (CODING_ATTR_POST_READ (attrs)) && NILP (dst_object))) { - coding->dst_object = code_conversion_save (1, 1); + coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding); + coding->dst_object = code_conversion_save (1, coding->dst_multibyte); coding->dst_pos = BEG; coding->dst_pos_byte = BEG_BYTE; - coding->dst_multibyte = 1; } else if (BUFFERP (dst_object)) { @@ -6916,6 +7113,9 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, { code_conversion_save (0, 0); coding->dst_object = Qnil; + /* Most callers presume this will return a multibyte result, and they + won't use `binary' or `raw-text' anyway, so let's not worry about + CODING_FOR_UNIBYTE. */ coding->dst_multibyte = 1; } @@ -6926,12 +7126,13 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, if (! NILP (CODING_ATTR_POST_READ (attrs))) { - struct gcpro gcpro1, gcpro2; + struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE; Lisp_Object val; TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte); - GCPRO2 (coding->src_object, coding->dst_object); + GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object, + old_deactivate_mark); val = safe_call1 (CODING_ATTR_POST_READ (attrs), make_number (coding->produced_char)); UNGCPRO; @@ -6949,8 +7150,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, set_buffer_internal (XBUFFER (coding->dst_object)); if (dst_bytes < coding->produced) { - destination - = (unsigned char *) xrealloc (destination, coding->produced); + destination = xrealloc (destination, coding->produced); if (! destination) { record_conversion_result (coding, @@ -6972,6 +7172,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, As we have moved PT while replacing the original buffer contents, we must recover it now. */ set_buffer_internal (XBUFFER (src_object)); + current_buffer->text->inhibit_shrinking = 0; if (saved_pt < from) TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte); else if (saved_pt < from + chars) @@ -6982,8 +7183,32 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, else TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes), saved_pt_byte + (coding->produced - bytes)); + + if (need_marker_adjustment) + { + struct Lisp_Marker *tail; + + for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next) + if (tail->need_adjustment) + { + tail->need_adjustment = 0; + if (tail->insertion_type) + { + tail->bytepos = from_byte; + tail->charpos = from; + } + else + { + tail->bytepos = from_byte + coding->produced; + tail->charpos + = (NILP (current_buffer->enable_multibyte_characters) + ? tail->bytepos : from + coding->produced_char); + } + } + } } + Vdeactivate_mark = old_deactivate_mark; unbind_to (count, coding->dst_object); } @@ -7000,11 +7225,12 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte, EMACS_INT chars = to - from; EMACS_INT bytes = to_byte - from_byte; Lisp_Object attrs; - Lisp_Object buffer; int saved_pt = -1, saved_pt_byte; + int need_marker_adjustment = 0; int kill_src_buffer = 0; + Lisp_Object old_deactivate_mark; - buffer = Fcurrent_buffer (); + old_deactivate_mark = Vdeactivate_mark; coding->src_object = src_object; coding->src_chars = chars; @@ -7013,6 +7239,18 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte, attrs = CODING_ID_ATTRS (coding->id); + if (EQ (src_object, dst_object)) + { + struct Lisp_Marker *tail; + + for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next) + { + tail->need_adjustment + = tail->charpos == (tail->insertion_type ? from : to); + need_marker_adjustment |= tail->need_adjustment; + } + } + if (! NILP (CODING_ATTR_PRE_WRITE (attrs))) { coding->src_object = code_conversion_save (1, coding->src_multibyte); @@ -7034,11 +7272,15 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte, { Lisp_Object args[3]; + struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; + GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object, + old_deactivate_mark); args[0] = CODING_ATTR_PRE_WRITE (attrs); args[1] = make_number (BEG); args[2] = make_number (Z); safe_call (3, args); + UNGCPRO; } if (XBUFFER (coding->src_object) != current_buffer) kill_src_buffer = 1; @@ -7142,10 +7384,35 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte, else TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes), saved_pt_byte + (coding->produced - bytes)); + + if (need_marker_adjustment) + { + struct Lisp_Marker *tail; + + for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next) + if (tail->need_adjustment) + { + tail->need_adjustment = 0; + if (tail->insertion_type) + { + tail->bytepos = from_byte; + tail->charpos = from; + } + else + { + tail->bytepos = from_byte + coding->produced; + tail->charpos + = (NILP (current_buffer->enable_multibyte_characters) + ? tail->bytepos : from + coding->produced_char); + } + } + } } if (kill_src_buffer) Fkill_buffer (coding->src_object); + + Vdeactivate_mark = old_deactivate_mark; unbind_to (count, Qnil); } @@ -7166,14 +7433,14 @@ DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0, doc: /* Return t if OBJECT is nil or a coding-system. See the documentation of `define-coding-system' for information about coding-system objects. */) - (obj) - Lisp_Object obj; + (object) + Lisp_Object object; { - if (NILP (obj) - || CODING_SYSTEM_ID (obj) >= 0) + if (NILP (object) + || CODING_SYSTEM_ID (object) >= 0) return Qt; - if (! SYMBOLP (obj) - || NILP (Fget (obj, Qcoding_system_define_form))) + if (! SYMBOLP (object) + || NILP (Fget (object, Qcoding_system_define_form))) return Qnil; return Qt; } @@ -7258,7 +7525,8 @@ Lisp_Object detect_coding_system (src, src_chars, src_bytes, highest, multibytep, coding_system) const unsigned char *src; - int src_chars, src_bytes, highest; + EMACS_INT src_chars, src_bytes; + int highest; int multibytep; Lisp_Object coding_system; { @@ -7269,6 +7537,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, int id; struct coding_detection_info detect_info; enum coding_category base_category; + int null_byte_found = 0, eight_bit_found = 0; if (NILP (coding_system)) coding_system = Qundecided; @@ -7294,33 +7563,54 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, struct coding_system *this; int c, i; + coding.head_ascii = -1; /* Skip all ASCII bytes except for a few ISO2022 controls. */ - for (i = 0; src < src_end; i++, src++) + for (; src < src_end; src++) { c = *src; if (c & 0x80) - break; - if (c < 0x20 - && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) - && ! inhibit_iso_escape_detection) { - coding.head_ascii = src - coding.source; - if (detect_coding_iso_2022 (&coding, &detect_info)) + eight_bit_found = 1; + if (coding.head_ascii < 0) + coding.head_ascii = src - coding.source; + if (null_byte_found) + break; + } + if (c < 0x20) + { + if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) + && ! inhibit_iso_escape_detection + && ! detect_info.checked) { - /* We have scanned the whole data. */ - if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) - /* We didn't find an 8-bit code. */ - src = src_end; - break; + if (coding.head_ascii < 0) + coding.head_ascii = src - coding.source; + if (detect_coding_iso_2022 (&coding, &detect_info)) + { + /* We have scanned the whole data. */ + if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) + /* We didn't find an 8-bit code. We may have + found a null-byte, but it's very rare that + a binary file confirm to ISO-2022. */ + src = src_end; + break; + } + } + else if (! c) + { + null_byte_found = 1; + if (eight_bit_found) + break; } } } - coding.head_ascii = src - coding.source; + if (coding.head_ascii < 0) + coding.head_ascii = src - coding.source; - if (src < src_end + if (null_byte_found || eight_bit_found + || coding.head_ascii < coding.src_bytes || detect_info.found) { - if (src == src_end) + if (coding.head_ascii == coding.src_bytes) /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */ for (i = 0; i < coding_category_raw_text; i++) { @@ -7330,44 +7620,48 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, break; } else - for (i = 0; i < coding_category_raw_text; i++) - { - category = coding_priorities[i]; - this = coding_categories + category; + { + if (null_byte_found) + { + detect_info.checked |= ~CATEGORY_MASK_UTF_16; + detect_info.rejected |= ~CATEGORY_MASK_UTF_16; + } + for (i = 0; i < coding_category_raw_text; i++) + { + category = coding_priorities[i]; + this = coding_categories + category; - if (this->id < 0) - { - /* No coding system of this category is defined. */ - detect_info.rejected |= (1 << category); - } - else if (category >= coding_category_raw_text) - continue; - else if (detect_info.checked & (1 << category)) - { - if (highest - && (detect_info.found & (1 << category))) - break; - } - else - { - if ((*(this->detector)) (&coding, &detect_info) - && highest - && (detect_info.found & (1 << category))) - { - if (category == coding_category_utf_16_auto) - { - if (detect_info.found & CATEGORY_MASK_UTF_16_LE) - category = coding_category_utf_16_le; - else - category = coding_category_utf_16_be; - } + if (this->id < 0) + { + /* No coding system of this category is defined. */ + detect_info.rejected |= (1 << category); + } + else if (category >= coding_category_raw_text) + continue; + else if (detect_info.checked & (1 << category)) + { + if (highest + && (detect_info.found & (1 << category))) break; - } - } - } + } + else if ((*(this->detector)) (&coding, &detect_info) + && highest + && (detect_info.found & (1 << category))) + { + if (category == coding_category_utf_16_auto) + { + if (detect_info.found & CATEGORY_MASK_UTF_16_LE) + category = coding_category_utf_16_le; + else + category = coding_category_utf_16_be; + } + break; + } + } + } } - if (detect_info.rejected == CATEGORY_MASK_ANY) + if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY) { detect_info.found = CATEGORY_MASK_RAW_TEXT; id = coding_categories[coding_category_raw_text].id; @@ -7456,8 +7750,13 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, if (VECTORP (eol_type)) { if (detect_info.found & ~CATEGORY_MASK_UTF_16) - normal_eol = detect_eol (coding.source, src_bytes, - coding_category_raw_text); + { + if (null_byte_found) + normal_eol = EOL_SEEN_LF; + else + normal_eol = detect_eol (coding.source, src_bytes, + coding_category_raw_text); + } if (detect_info.found & (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_BE_NOSIG)) utf_16_be_eol = detect_eol (coding.source, src_bytes, @@ -7521,9 +7820,9 @@ DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, Return a list of possible coding systems ordered by priority. If only ASCII characters are found (except for such ISO-2022 control -characters ISO-2022 as ESC), it returns a list of single element -`undecided' or its subsidiary coding system according to a detected -end-of-line format. +characters as ESC), it returns a list of single element `undecided' +or its subsidiary coding system according to a detected end-of-line +format. If optional argument HIGHEST is non-nil, return the coding system of highest priority. */) @@ -7558,9 +7857,9 @@ DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string, Return a list of possible coding systems ordered by priority. If only ASCII characters are found (except for such ISO-2022 control -characters ISO-2022 as ESC), it returns a list of single element -`undecided' or its subsidiary coding system according to a detected -end-of-line format. +characters as ESC), it returns a list of single element `undecided' +or its subsidiary coding system according to a detected end-of-line +format. If optional argument HIGHEST is non-nil, return the coding system of highest priority. */) @@ -7728,7 +8027,7 @@ DEFUN ("unencodable-char-position", Funencodable_char_position, Sunencodable_char_position, 3, 5, 0, doc: /* Return position of first un-encodable character in a region. -START and END specfiy the region and CODING-SYSTEM specifies the +START and END specify the region and CODING-SYSTEM specifies the encoding to check. Return nil if CODING-SYSTEM does encode the region. If optional 4th argument COUNT is non-nil, it specifies at most how @@ -7841,7 +8140,7 @@ START and END are buffer positions specifying the region. CODING-SYSTEM-LIST is a list of coding systems to check. The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where -CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the +CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the whole region, POS0, POS1, ... are buffer positions where non-encodable characters are found. @@ -8009,7 +8308,7 @@ When called from a program, takes four arguments: START and END are buffer positions. Optional 4th arguments DESTINATION specifies where the decoded text goes. -If nil, the region between START and END is replace by the decoded text. +If nil, the region between START and END is replaced by the decoded text. If buffer, the decoded text is inserted in the buffer. If t, the decoded text is returned. @@ -8026,8 +8325,9 @@ It returns the length of the decoded text. */) DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region, 3, 4, "r\nzCoding system: ", doc: /* Encode the current region by specified coding system. -When called from a program, takes three arguments: -START, END, and CODING-SYSTEM. START and END are buffer positions. +When called from a program, takes four arguments: + START, END, CODING-SYSTEM and DESTINATION. +START and END are buffer positions. Optional 4th arguments DESTINATION specifies where the encoded text goes. If nil, the region between START and END is replace by the encoded text. @@ -8110,13 +8410,13 @@ DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, Optional third arg NOCOPY non-nil means it is OK to return STRING itself if the decoding operation is trivial. -Optional fourth arg BUFFER non-nil meant that the decoded text is +Optional fourth arg BUFFER non-nil means that the decoded text is inserted in BUFFER instead of returned as a string. In this case, the return value is BUFFER. This function sets `last-coding-system-used' to the precise coding system used (which may be different from CODING-SYSTEM if CODING-SYSTEM is -not fully specified. */) +not fully specified.) */) (string, coding_system, nocopy, buffer) Lisp_Object string, coding_system, nocopy, buffer; { @@ -8131,7 +8431,7 @@ DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string, Optional third arg NOCOPY non-nil means it is OK to return STRING itself if the encoding operation is trivial. -Optional fourth arg BUFFER non-nil meant that the encoded text is +Optional fourth arg BUFFER non-nil means that the encoded text is inserted in BUFFER instead of returned as a string. In this case, the return value is BUFFER. @@ -8391,9 +8691,9 @@ whichever argument specifies the file name is TARGET. TARGET has a meaning which depends on OPERATION: For file I/O, TARGET is a file name (except for the special case below). For process I/O, TARGET is a process name. - For network I/O, TARGET is a service name or a port number + For network I/O, TARGET is a service name or a port number. -This function looks up what specified for TARGET in, +This function looks up what is specified for TARGET in `file-coding-system-alist', `process-coding-system-alist', or `network-coding-system-alist' depending on OPERATION. They may specify a coding system, a cons of coding systems, @@ -8423,7 +8723,7 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS...) */) operation = args[0]; if (!SYMBOLP (operation) || !INTEGERP (target_idx = Fget (operation, Qtarget_idx))) - error ("Invalid first arguement"); + error ("Invalid first argument"); if (nargs < 1 + XINT (target_idx)) error ("Too few arguments for operation: %s", SDATA (SYMBOL_NAME (operation))); @@ -8485,10 +8785,10 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS...) */) DEFUN ("set-coding-system-priority", Fset_coding_system_priority, Sset_coding_system_priority, 0, MANY, 0, doc: /* Assign higher priority to the coding systems given as arguments. -If multiple coding systems belongs to the same category, +If multiple coding systems belong to the same category, all but the first one are ignored. -usage: (set-coding-system-priority ...) */) +usage: (set-coding-system-priority &rest coding-systems) */) (nargs, args) int nargs; Lisp_Object *args; @@ -9042,7 +9342,7 @@ usage: (define-coding-system-internal ...) */) = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category), CODING_ATTR_PLIST (attrs))); CODING_ATTR_PLIST (attrs) - = Fcons (QCascii_compatible_p, + = Fcons (QCascii_compatible_p, Fcons (CODING_ATTR_ASCII_COMPAT (attrs), CODING_ATTR_PLIST (attrs))); @@ -9173,7 +9473,7 @@ DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, CHECK_SYMBOL (alias); CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec); aliases = AREF (spec, 1); - /* ALISES should be a list of length more than zero, and the first + /* ALIASES should be a list of length more than zero, and the first element is a base coding system. Append ALIAS at the tail of the list. */ while (!NILP (XCDR (aliases))) @@ -9251,7 +9551,7 @@ DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases, DEFUN ("coding-system-eol-type", Fcoding_system_eol_type, Scoding_system_eol_type, 1, 1, 0, doc: /* Return eol-type of CODING-SYSTEM. -An eol-type is integer 0, 1, 2, or a vector of coding systems. +An eol-type is an integer 0, 1, 2, or a vector of coding systems. Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF, and CR respectively. @@ -9742,7 +10042,9 @@ Function to call to select safe coding system for encoding a text. If set, this function is called to force a user to select a proper coding system which can encode the text in the case that a default -coding system used in each operation can't encode the text. +coding system used in each operation can't encode the text. The +function should take care that the buffer is not modified while +the coding system is being selected. The default value is `select-safe-coding-system' (which see). */); Vselect_safe_coding_system_function = Qnil;