+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+ Check if a text is encoded in Emacs' internal format. If it is,
+ return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
+
+static int
+detect_coding_emacs_mule (src, src_end, multibytep)
+ unsigned char *src, *src_end;
+ int multibytep;
+{
+ unsigned char c;
+ int composing = 0;
+ /* Dummy for ONE_MORE_BYTE. */
+ struct coding_system dummy_coding;
+ struct coding_system *coding = &dummy_coding;
+
+ while (1)
+ {
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+
+ if (composing)
+ {
+ if (c < 0xA0)
+ composing = 0;
+ else if (c == 0xA0)
+ {
+ ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ c &= 0x7F;
+ }
+ else
+ c -= 0x20;
+ }
+
+ if (c < 0x20)
+ {
+ if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+ return 0;
+ }
+ else if (c >= 0x80 && c < 0xA0)
+ {
+ if (c == 0x80)
+ /* Old leading code for a composite character. */
+ composing = 1;
+ else
+ {
+ unsigned char *src_base = src - 1;
+ int bytes;
+
+ if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
+ bytes))
+ return 0;
+ src = src_base + bytes;
+ }
+ }
+ }
+ label_end_of_loop:
+ return CODING_CATEGORY_MASK_EMACS_MULE;
+}
+
+
+/* Record the starting position START and METHOD of one composition. */
+
+#define CODING_ADD_COMPOSITION_START(coding, start, method) \
+ do { \
+ struct composition_data *cmp_data = coding->cmp_data; \
+ int *data = cmp_data->data + cmp_data->used; \
+ coding->cmp_data_start = cmp_data->used; \
+ data[0] = -1; \
+ data[1] = cmp_data->char_offset + start; \
+ data[3] = (int) method; \
+ cmp_data->used += 4; \
+ } while (0)
+
+/* Record the ending position END of the current composition. */
+
+#define CODING_ADD_COMPOSITION_END(coding, end) \
+ do { \
+ struct composition_data *cmp_data = coding->cmp_data; \
+ int *data = cmp_data->data + coding->cmp_data_start; \
+ data[0] = cmp_data->used - coding->cmp_data_start; \
+ data[2] = cmp_data->char_offset + end; \
+ } while (0)
+
+/* Record one COMPONENT (alternate character or composition rule). */
+
+#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
+ (coding->cmp_data->data[coding->cmp_data->used++] = component)
+
+
+/* Get one byte from a data pointed by SRC and increment SRC. If SRC
+ is not less than SRC_END, return -1 without incrementing Src. */
+
+#define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
+
+
+/* Decode a character represented as a component of composition
+ sequence of Emacs 20 style at SRC. Set C to that character, store
+ its multibyte form sequence at P, and set P to the end of that
+ sequence. If no valid character is found, set C to -1. */
+
+#define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \
+ do { \
+ int bytes; \
+ \
+ c = SAFE_ONE_MORE_BYTE (); \
+ if (c < 0) \
+ break; \
+ if (CHAR_HEAD_P (c)) \
+ c = -1; \
+ else if (c == 0xA0) \
+ { \
+ c = SAFE_ONE_MORE_BYTE (); \
+ if (c < 0xA0) \
+ c = -1; \
+ else \
+ { \
+ c -= 0xA0; \
+ *p++ = c; \
+ } \
+ } \
+ else if (BASE_LEADING_CODE_P (c - 0x20)) \
+ { \
+ unsigned char *p0 = p; \
+ \
+ c -= 0x20; \
+ *p++ = c; \
+ bytes = BYTES_BY_CHAR_HEAD (c); \
+ while (--bytes) \
+ { \
+ c = SAFE_ONE_MORE_BYTE (); \
+ if (c < 0) \
+ break; \
+ *p++ = c; \
+ } \
+ if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)) \
+ c = STRING_CHAR (p0, bytes); \
+ else \
+ c = -1; \
+ } \
+ else \
+ c = -1; \
+ } while (0)
+
+
+/* Decode a composition rule represented as a component of composition
+ sequence of Emacs 20 style at SRC. Set C to the rule. If not
+ valid rule is found, set C to -1. */
+
+#define DECODE_EMACS_MULE_COMPOSITION_RULE(c) \
+ do { \
+ c = SAFE_ONE_MORE_BYTE (); \
+ c -= 0xA0; \
+ if (c < 0 || c >= 81) \
+ c = -1; \
+ else \
+ { \
+ gref = c / 9, nref = c % 9; \
+ c = COMPOSITION_ENCODE_RULE (gref, nref); \
+ } \
+ } while (0)
+
+
+/* Decode composition sequence encoded by `emacs-mule' at the source
+ pointed by SRC. SRC_END is the end of source. Store information
+ of the composition in CODING->cmp_data.
+
+ For backward compatibility, decode also a composition sequence of
+ Emacs 20 style. In that case, the composition sequence contains
+ characters that should be extracted into a buffer or string. Store
+ those characters at *DESTINATION in multibyte form.
+
+ If we encounter an invalid byte sequence, return 0.
+ If we encounter an insufficient source or destination, or
+ insufficient space in CODING->cmp_data, return 1.
+ Otherwise, return consumed bytes in the source.
+
+*/
+static INLINE int
+decode_composition_emacs_mule (coding, src, src_end,
+ destination, dst_end, dst_bytes)
+ struct coding_system *coding;
+ unsigned char *src, *src_end, **destination, *dst_end;
+ int dst_bytes;
+{
+ unsigned char *dst = *destination;
+ int method, data_len, nchars;
+ unsigned char *src_base = src++;
+ /* Store components of composition. */
+ int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
+ int ncomponent;
+ /* Store multibyte form of characters to be composed. This is for
+ Emacs 20 style composition sequence. */
+ unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
+ unsigned char *bufp = buf;
+ int c, i, gref, nref;
+
+ if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
+ >= COMPOSITION_DATA_SIZE)
+ {
+ coding->result = CODING_FINISH_INSUFFICIENT_CMP;
+ return -1;
+ }
+
+ ONE_MORE_BYTE (c);
+ if (c - 0xF0 >= COMPOSITION_RELATIVE
+ && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
+ {
+ int with_rule;
+
+ method = c - 0xF0;
+ with_rule = (method == COMPOSITION_WITH_RULE
+ || method == COMPOSITION_WITH_RULE_ALTCHARS);
+ ONE_MORE_BYTE (c);
+ data_len = c - 0xA0;
+ if (data_len < 4
+ || src_base + data_len > src_end)
+ return 0;
+ ONE_MORE_BYTE (c);
+ nchars = c - 0xA0;
+ if (c < 1)
+ return 0;
+ for (ncomponent = 0; src < src_base + data_len; ncomponent++)
+ {
+ if (ncomponent % 2 && with_rule)
+ {
+ ONE_MORE_BYTE (gref);
+ gref -= 32;
+ ONE_MORE_BYTE (nref);
+ nref -= 32;
+ c = COMPOSITION_ENCODE_RULE (gref, nref);
+ }
+ else
+ {
+ int bytes;
+ if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ c = STRING_CHAR (src, bytes);
+ else
+ c = *src, bytes = 1;
+ src += bytes;
+ }
+ component[ncomponent] = c;
+ }
+ }
+ else
+ {
+ /* This may be an old Emacs 20 style format. See the comment at
+ the section 2 of this file. */
+ while (src < src_end && !CHAR_HEAD_P (*src)) src++;
+ if (src == src_end
+ && !(coding->mode & CODING_MODE_LAST_BLOCK))
+ goto label_end_of_loop;
+
+ src_end = src;
+ src = src_base + 1;
+ if (c < 0xC0)
+ {
+ method = COMPOSITION_RELATIVE;
+ for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
+ {
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
+ if (c < 0)
+ break;
+ component[ncomponent++] = c;
+ }
+ if (ncomponent < 2)
+ return 0;
+ nchars = ncomponent;
+ }
+ else if (c == 0xFF)
+ {
+ method = COMPOSITION_WITH_RULE;
+ src++;
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
+ if (c < 0)
+ return 0;
+ component[0] = c;
+ for (ncomponent = 1;
+ ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
+ {
+ DECODE_EMACS_MULE_COMPOSITION_RULE (c);
+ if (c < 0)
+ break;
+ component[ncomponent++] = c;
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
+ if (c < 0)
+ break;
+ component[ncomponent++] = c;
+ }
+ if (ncomponent < 3)
+ return 0;
+ nchars = (ncomponent + 1) / 2;
+ }
+ else
+ return 0;
+ }
+
+ if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
+ {
+ CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
+ for (i = 0; i < ncomponent; i++)
+ CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
+ CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
+ if (buf < bufp)
+ {
+ unsigned char *p = buf;
+ EMIT_BYTES (p, bufp);
+ *destination += bufp - buf;
+ coding->produced_char += nchars;
+ }
+ return (src - src_base);
+ }
+ label_end_of_loop:
+ return -1;
+}
+
+/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
+
+static void
+decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
+ struct coding_system *coding;
+ unsigned char *source, *destination;
+ int src_bytes, dst_bytes;
+{
+ unsigned char *src = source;
+ unsigned char *src_end = source + src_bytes;
+ unsigned char *dst = destination;
+ unsigned char *dst_end = destination + dst_bytes;
+ /* SRC_BASE remembers the start position in source in each loop.
+ The loop will be exited when there's not enough source code, or
+ when there's not enough destination area to produce a
+ character. */
+ unsigned char *src_base;
+
+ coding->produced_char = 0;
+ while ((src_base = src) < src_end)
+ {
+ unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
+ int bytes;
+
+ if (*src == '\r')
+ {
+ int c = *src++;
+
+ if (coding->eol_type == CODING_EOL_CR)
+ c = '\n';
+ else if (coding->eol_type == CODING_EOL_CRLF)
+ {
+ ONE_MORE_BYTE (c);
+ if (c != '\n')
+ {
+ if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ {
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
+ }
+ src--;
+ c = '\r';
+ }
+ }
+ *dst++ = c;
+ coding->produced_char++;
+ continue;
+ }
+ else if (*src == '\n')
+ {
+ if ((coding->eol_type == CODING_EOL_CR
+ || coding->eol_type == CODING_EOL_CRLF)
+ && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ {
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
+ }
+ *dst++ = *src++;
+ coding->produced_char++;
+ continue;
+ }
+ else if (*src == 0x80)
+ {
+ /* Start of composition data. */
+ int consumed = decode_composition_emacs_mule (coding, src, src_end,
+ &dst, dst_end,
+ dst_bytes);
+ if (consumed < 0)
+ goto label_end_of_loop;
+ else if (consumed > 0)
+ {
+ src += consumed;
+ continue;
+ }
+ bytes = CHAR_STRING (*src, tmp);
+ p = tmp;
+ src++;
+ }
+ else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ {
+ p = src;
+ src += bytes;
+ }
+ else
+ {
+ bytes = CHAR_STRING (*src, tmp);
+ p = tmp;
+ src++;
+ }
+ if (dst + bytes >= (dst_bytes ? dst_end : src))
+ {
+ coding->result = CODING_FINISH_INSUFFICIENT_DST;
+ break;
+ }
+ while (bytes--) *dst++ = *p++;
+ coding->produced_char++;
+ }
+ label_end_of_loop:
+ coding->consumed = coding->consumed_char = src_base - source;
+ coding->produced = dst - destination;
+}
+
+
+/* Encode composition data stored at DATA into a special byte sequence
+ starting by 0x80. Update CODING->cmp_data_start and maybe
+ CODING->cmp_data for the next call. */
+
+#define ENCODE_COMPOSITION_EMACS_MULE(coding, data) \
+ do { \
+ unsigned char buf[1024], *p0 = buf, *p; \
+ int len = data[0]; \
+ int i; \
+ \
+ buf[0] = 0x80; \
+ buf[1] = 0xF0 + data[3]; /* METHOD */ \
+ buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */ \
+ p = buf + 4; \
+ if (data[3] == COMPOSITION_WITH_RULE \
+ || data[3] == COMPOSITION_WITH_RULE_ALTCHARS) \
+ { \
+ p += CHAR_STRING (data[4], p); \
+ for (i = 5; i < len; i += 2) \
+ { \
+ int gref, nref; \
+ COMPOSITION_DECODE_RULE (data[i], gref, nref); \
+ *p++ = 0x20 + gref; \
+ *p++ = 0x20 + nref; \
+ p += CHAR_STRING (data[i + 1], p); \
+ } \
+ } \
+ else \
+ { \
+ for (i = 4; i < len; i++) \
+ p += CHAR_STRING (data[i], p); \
+ } \
+ buf[2] = 0xA0 + (p - buf); /* COMPONENTS-BYTES */ \
+ \
+ if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src)) \
+ { \
+ coding->result = CODING_FINISH_INSUFFICIENT_DST; \
+ goto label_end_of_loop; \
+ } \
+ while (p0 < p) \
+ *dst++ = *p0++; \
+ coding->cmp_data_start += data[0]; \
+ if (coding->cmp_data_start == coding->cmp_data->used \
+ && coding->cmp_data->next) \
+ { \
+ coding->cmp_data = coding->cmp_data->next; \
+ coding->cmp_data_start = 0; \
+ } \
+ } while (0)
+
+
+static void encode_eol P_ ((struct coding_system *, unsigned char *,
+ unsigned char *, int, int));
+
+static void
+encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
+ struct coding_system *coding;
+ unsigned char *source, *destination;
+ int src_bytes, dst_bytes;
+{
+ unsigned char *src = source;
+ unsigned char *src_end = source + src_bytes;
+ unsigned char *dst = destination;
+ unsigned char *dst_end = destination + dst_bytes;
+ unsigned char *src_base;
+ int c;
+ int char_offset;
+ int *data;
+
+ Lisp_Object translation_table;