X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/4d247a1f5734d0af7f7ec7cb770e8838ac55bddf..d4881c6acbb41cfd507b533efdd2cdaaf5eac204:/src/ccl.c diff --git a/src/ccl.c b/src/ccl.c index 2b27a2c488..d3879abae6 100644 --- a/src/ccl.c +++ b/src/ccl.c @@ -1,5 +1,6 @@ /* CCL (Code Conversion Language) interpreter. Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. + Copyright (C) 2001 Free Software Foundation, Inc. Licensed to the Free Software Foundation. This file is part of GNU Emacs. @@ -420,7 +421,7 @@ Lisp_Object Vccl_program_table; IC += 2; */ -#define CCL_Extention 0x1F /* Extended CCL code +#define CCL_Extension 0x1F /* Extended CCL code 1:ExtendedCOMMNDRrrRRRrrrXXXXX 2:ARGUEMENT 3:... @@ -614,7 +615,8 @@ static int stack_idx_of_map_multiple; } while (0) #define CCL_CALL_FOR_MAP_INSTRUCTION(symbol, ret_ic) \ - do { \ +if (1) \ + { \ struct ccl_program called_ccl; \ if (stack_idx >= 256 \ || (setup_ccl_program (&called_ccl, (symbol)) != 0)) \ @@ -632,7 +634,8 @@ static int stack_idx_of_map_multiple; ccl_prog = called_ccl.prog; \ ic = CCL_HEADER_MAIN; \ goto ccl_repeat; \ - } while (0) + } \ +else #define CCL_MapSingle 0x12 /* Map by single code conversion map 1:ExtendedCOMMNDXXXRRRrrrXXXXX @@ -672,29 +675,35 @@ static int stack_idx_of_map_multiple; r[7] = LOWER_BYTE (SJIS (Y, Z) */ /* Terminate CCL program successfully. */ -#define CCL_SUCCESS \ - do { \ +#define CCL_SUCCESS \ +if (1) \ + { \ ccl->status = CCL_STAT_SUCCESS; \ - goto ccl_finish; \ - } while (0) + goto ccl_finish; \ + } \ +else /* Suspend CCL program because of reading from empty input buffer or writing to full output buffer. When this program is resumed, the same I/O command is executed. */ #define CCL_SUSPEND(stat) \ - do { \ +if (1) \ + { \ ic--; \ ccl->status = stat; \ goto ccl_finish; \ - } while (0) + } \ +else /* Terminate CCL program because of invalid command. Should not occur in the normal case. */ #define CCL_INVALID_CMD \ - do { \ +if (1) \ + { \ ccl->status = CCL_STAT_INVALID_CMD; \ goto ccl_error_handler; \ - } while (0) + } \ +else /* Encode one character CH to multibyte form and write to the current output buffer. If CH is less than 256, CH is written as is. */ @@ -713,8 +722,29 @@ static int stack_idx_of_map_multiple; multibyte form later. */ \ extra_bytes++; \ } \ - else \ + else if (CHAR_VALID_P (ch, 0)) \ dst += CHAR_STRING (ch, dst); \ + else \ + CCL_INVALID_CMD; \ + } \ + else \ + CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ + } while (0) + +/* Encode one character CH to multibyte form and write to the current + output buffer. The output bytes always forms a valid multibyte + sequence. */ +#define CCL_WRITE_MULTIBYTE_CHAR(ch) \ + do { \ + int bytes = CHAR_BYTES (ch); \ + if (!dst) \ + CCL_INVALID_CMD; \ + else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \ + { \ + if (CHAR_VALID_P ((ch), 0)) \ + dst += CHAR_STRING ((ch), dst); \ + else \ + CCL_INVALID_CMD; \ } \ else \ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ @@ -734,15 +764,15 @@ static int stack_idx_of_map_multiple; CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ } while (0) -/* Read one byte from the current input buffer into Rth register. */ -#define CCL_READ_CHAR(r) \ +/* Read one byte from the current input buffer into REGth register. */ +#define CCL_READ_CHAR(REG) \ do { \ if (!src) \ CCL_INVALID_CMD; \ else if (src < src_end) \ { \ - r = *src++; \ - if (r == '\n' \ + REG = *src++; \ + if (REG == '\n' \ && ccl->eol_type != CODING_EOL_LF) \ { \ /* We are encoding. */ \ @@ -753,16 +783,16 @@ static int stack_idx_of_map_multiple; else \ { \ ccl->cr_consumed = 1; \ - r = '\r'; \ + REG = '\r'; \ src--; \ } \ } \ else \ - r = '\r'; \ + REG = '\r'; \ } \ - if (r == LEADING_CODE_8_BIT_CONTROL \ + if (REG == LEADING_CODE_8_BIT_CONTROL \ && ccl->multibyte) \ - r = *src++ - 0x20; \ + REG = *src++ - 0x20; \ } \ else if (ccl->last_block) \ { \ @@ -831,15 +861,15 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) { register int *reg = ccl->reg; register int ic = ccl->ic; - register int code, field1, field2; + register int code = 0, field1, field2; register Lisp_Object *ccl_prog = ccl->prog; unsigned char *src = source, *src_end = src + src_bytes; unsigned char *dst = destination, *dst_end = dst + dst_bytes; int jump_address; - int i, j, op; + int i = 0, j, op; int stack_idx = ccl->stack_idx; /* Instruction counter of the current CCL code. */ - int this_ic; + int this_ic = 0; /* CCL_WRITE_CHAR will produce 8-bit code of range 0x80..0x9F. But, each of them will be converted to multibyte form of 2-byte sequence. For that conversion, we remember how many more bytes @@ -849,7 +879,7 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) if (ic >= ccl->eof_ic) ic = CCL_HEADER_MAIN; - if (ccl->buf_magnification ==0) /* We can't produce any bytes. */ + if (ccl->buf_magnification == 0) /* We can't produce any bytes. */ dst = NULL; /* Set mapping stack pointer. */ @@ -1206,103 +1236,122 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) ic = jump_address; break; - case CCL_Extention: + case CCL_Extension: switch (EXCMD) { case CCL_ReadMultibyteChar2: if (!src) CCL_INVALID_CMD; - do { - if (src >= src_end) - { - src++; - goto ccl_read_multibyte_character_suspend; - } + if (src >= src_end) + { + src++; + goto ccl_read_multibyte_character_suspend; + } - i = *src++; - if (i == '\n' && ccl->eol_type != CODING_EOL_LF) - { - /* We are encoding. */ - if (ccl->eol_type == CODING_EOL_CRLF) - { - if (ccl->cr_consumed) - ccl->cr_consumed = 0; - else - { - ccl->cr_consumed = 1; - i = '\r'; - src--; - } - } - else - i = '\r'; - reg[rrr] = i; - reg[RRR] = CHARSET_ASCII; - } - else if (i < 0x80) - { - /* ASCII */ - reg[rrr] = i; - reg[RRR] = CHARSET_ASCII; - } - else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION1) - { - if (src >= src_end) - goto ccl_read_multibyte_character_suspend; - reg[RRR] = i; - reg[rrr] = (*src++ & 0x7F); - } - else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2) - { - if ((src + 1) >= src_end) - goto ccl_read_multibyte_character_suspend; - reg[RRR] = i; - i = (*src++ & 0x7F); - reg[rrr] = ((i << 7) | (*src & 0x7F)); - src++; - } - else if ((i == LEADING_CODE_PRIVATE_11) - || (i == LEADING_CODE_PRIVATE_12)) - { - if ((src + 1) >= src_end) - goto ccl_read_multibyte_character_suspend; - reg[RRR] = *src++; - reg[rrr] = (*src++ & 0x7F); - } - else if ((i == LEADING_CODE_PRIVATE_21) - || (i == LEADING_CODE_PRIVATE_22)) - { - if ((src + 2) >= src_end) - goto ccl_read_multibyte_character_suspend; - reg[RRR] = *src++; - i = (*src++ & 0x7F); - reg[rrr] = ((i << 7) | (*src & 0x7F)); - src++; - } - else if (i == LEADING_CODE_8_BIT_CONTROL) - { - if (src >= src_end) - goto ccl_read_multibyte_character_suspend; - reg[RRR] = CHARSET_8_BIT_CONTROL; - reg[rrr] = (*src++ - 0x20); - } - else if (i >= 0xA0) - { - reg[RRR] = CHARSET_8_BIT_GRAPHIC; - reg[rrr] = i; - } - else - { - /* INVALID CODE. Return a single byte character. */ - reg[RRR] = CHARSET_ASCII; - reg[rrr] = i; - } - break; - } while (1); + if (!ccl->multibyte) + { + int bytes; + if (!UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) + { + reg[RRR] = CHARSET_8_BIT_CONTROL; + reg[rrr] = *src++; + break; + } + } + i = *src++; + if (i == '\n' && ccl->eol_type != CODING_EOL_LF) + { + /* We are encoding. */ + if (ccl->eol_type == CODING_EOL_CRLF) + { + if (ccl->cr_consumed) + ccl->cr_consumed = 0; + else + { + ccl->cr_consumed = 1; + i = '\r'; + src--; + } + } + else + i = '\r'; + reg[rrr] = i; + reg[RRR] = CHARSET_ASCII; + } + else if (i < 0x80) + { + /* ASCII */ + reg[rrr] = i; + reg[RRR] = CHARSET_ASCII; + } + else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2) + { + int dimension = BYTES_BY_CHAR_HEAD (i) - 1; + + if (dimension == 0) + { + /* `i' is a leading code for an undefined charset. */ + reg[RRR] = CHARSET_8_BIT_GRAPHIC; + reg[rrr] = i; + } + else if (src + dimension > src_end) + goto ccl_read_multibyte_character_suspend; + else + { + reg[RRR] = i; + i = (*src++ & 0x7F); + if (dimension == 1) + reg[rrr] = i; + else + reg[rrr] = ((i << 7) | (*src++ & 0x7F)); + } + } + else if ((i == LEADING_CODE_PRIVATE_11) + || (i == LEADING_CODE_PRIVATE_12)) + { + if ((src + 1) >= src_end) + goto ccl_read_multibyte_character_suspend; + reg[RRR] = *src++; + reg[rrr] = (*src++ & 0x7F); + } + else if ((i == LEADING_CODE_PRIVATE_21) + || (i == LEADING_CODE_PRIVATE_22)) + { + if ((src + 2) >= src_end) + goto ccl_read_multibyte_character_suspend; + reg[RRR] = *src++; + i = (*src++ & 0x7F); + reg[rrr] = ((i << 7) | (*src & 0x7F)); + src++; + } + else if (i == LEADING_CODE_8_BIT_CONTROL) + { + if (src >= src_end) + goto ccl_read_multibyte_character_suspend; + reg[RRR] = CHARSET_8_BIT_CONTROL; + reg[rrr] = (*src++ - 0x20); + } + else if (i >= 0xA0) + { + reg[RRR] = CHARSET_8_BIT_GRAPHIC; + reg[rrr] = i; + } + else + { + /* INVALID CODE. Return a single byte character. */ + reg[RRR] = CHARSET_ASCII; + reg[rrr] = i; + } break; ccl_read_multibyte_character_suspend: + if (src <= src_end && !ccl->multibyte && ccl->last_block) + { + reg[RRR] = CHARSET_8_BIT_CONTROL; + reg[rrr] = i; + break; + } src--; if (ccl->last_block) { @@ -1327,7 +1376,7 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) else i = ((i - 0xE0) << 14) | reg[rrr]; - CCL_WRITE_CHAR (i); + CCL_WRITE_MULTIBYTE_CHAR (i); break; @@ -1715,7 +1764,9 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) } ccl_error_handler: - if (destination) + /* The suppress_error member is set when e.g. a CCL-based coding + system is used for terminal output. */ + if (!ccl->suppress_error && destination) { /* We can insert an error message only if DESTINATION is specified and we still have a room to store the message @@ -1774,13 +1825,33 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) bcopy (msg, dst, msglen); dst += msglen; } + + if (ccl->status == CCL_STAT_INVALID_CMD) + { +#if 0 /* If the remaining bytes contain 0x80..0x9F, copying them + results in an invalid multibyte sequence. */ + + /* Copy the remaining source data. */ + int i = src_end - src; + if (dst_bytes && (dst_end - dst) < i) + i = dst_end - dst; + bcopy (src, dst, i); + src += i; + dst += i; +#else + /* Signal that we've consumed everything. */ + src = src_end; +#endif + } } ccl_finish: ccl->ic = ic; ccl->stack_idx = stack_idx; ccl->prog = ccl_prog; - if (consumed) *consumed = src - source; + ccl->eight_bit_control = (extra_bytes > 0); + if (consumed) + *consumed = src - source; return (dst ? dst - destination : 0); } @@ -1932,6 +2003,7 @@ setup_ccl_program (ccl, ccl_prog) ccl->status = 0; ccl->stack_idx = 0; ccl->eol_type = CODING_EOL_LF; + ccl->suppress_error = 0; return 0; } @@ -1963,17 +2035,18 @@ DEFUN ("ccl-execute", Fccl_execute, Sccl_execute, 2, 2, 0, "Execute CCL-PROGRAM with registers initialized by REGISTERS.\n\ \n\ CCL-PROGRAM is a CCL program name (symbol)\n\ -or a compiled code generated by `ccl-compile' (for backward compatibility,\n\ -in this case, the overhead of the execution is bigger than the former case).\n\ +or compiled code generated by `ccl-compile' (for backward compatibility.\n\ +In the latter case, the execution overhead is bigger than in the former).\n\ No I/O commands should appear in CCL-PROGRAM.\n\ \n\ REGISTERS is a vector of [R0 R1 ... R7] where RN is an initial value\n\ - of Nth register.\n\ +for the Nth register.\n\ \n\ As side effect, each element of REGISTERS holds the value of\n\ - corresponding register after the execution.\n\ +the corresponding register after the execution.\n\ \n\ -See the documentation of `define-ccl-program' for the detail of CCL program.") +See the documentation of `define-ccl-program' for a definition of CCL\n\ +programs.") (ccl_prog, reg) Lisp_Object ccl_prog, reg; { @@ -2169,11 +2242,11 @@ Return index number of the registered CCL program.") /* Register code conversion map. A code conversion map consists of numbers, Qt, Qnil, and Qlambda. - The first element is start code point. - The rest elements are mapped numbers. + The first element is the start code point. + The other elements are mapped numbers. Symbol t means to map to an original number before mapping. Symbol nil means that the corresponding element is empty. - Symbol lambda menas to terminate mapping here. + Symbol lambda means to terminate mapping here. */ DEFUN ("register-code-conversion-map", Fregister_code_conversion_map,