X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/341dd15a7bd9d0b4adff846e94289b3e1877eed1..ce75f06e9110ceda73c75997a017f7c09dd9dd33:/src/coding.h?ds=sidebyside diff --git a/src/coding.h b/src/coding.h index dae16a9ea8..ca8878f1cb 100644 --- a/src/coding.h +++ b/src/coding.h @@ -1,8 +1,8 @@ /* Header for coding system handler. Copyright (C) 2001, 2002, 2003, 2004, 2005, - 2006, 2007, 2008 Free Software Foundation, Inc. + 2006, 2007, 2008, 2009 Free Software Foundation, Inc. Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007, 2008 + 2005, 2006, 2007, 2008, 2009 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H14PRO021 Copyright (C) 2003 @@ -11,10 +11,10 @@ This file is part of GNU Emacs. -GNU Emacs is free software; you can redistribute it and/or modify +GNU Emacs is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. GNU Emacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -22,9 +22,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GNU Emacs; see the file COPYING. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. */ +along with GNU Emacs. If not, see . */ #ifndef EMACS_CODING_H #define EMACS_CODING_H @@ -58,6 +56,12 @@ enum define_coding_iso2022_arg_index coding_arg_iso2022_max }; +enum define_coding_utf8_arg_index + { + coding_arg_utf8_bom = coding_arg_max, + coding_arg_utf8_max + }; + enum define_coding_utf16_arg_index { coding_arg_utf16_bom = coding_arg_max, @@ -150,7 +154,7 @@ enum coding_attr_index coding_attr_iso_request, coding_attr_iso_flags, - coding_attr_utf_16_bom, + coding_attr_utf_bom, coding_attr_utf_16_endian, coding_attr_emacs_mule_full, @@ -302,6 +306,37 @@ enum coding_result_code ASCII characters (usually '?') for unsupported characters. */ #define CODING_MODE_SAFE_ENCODING 0x20 + /* For handling composition sequence. */ +#include "composite.h" + +enum composition_state + { + COMPOSING_NO, + COMPOSING_CHAR, + COMPOSING_RULE, + COMPOSING_COMPONENT_CHAR, + COMPOSING_COMPONENT_RULE + }; + +/* Structure for the current composition status. */ +struct composition_status +{ + enum composition_state state; + enum composition_method method; + int old_form; /* 0:pre-21 form, 1:post-21 form */ + int length; /* number of elements produced in charbuf */ + int nchars; /* number of characters composed */ + int ncomps; /* number of composition components */ + /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS. + See the comment in coding.c. */ + int carryover[4 /* annotation header */ + + MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */ + + 2 /* intermediate -1 -1 */ + + MAX_COMPOSITION_COMPONENTS /* CHARs */ + ]; +}; + + /* Structure of the field `spec.iso_2022' in the structure `coding_system'. */ struct iso_2022_spec @@ -323,15 +358,30 @@ struct iso_2022_spec /* Set to 1 temporarily only when processing at beginning of line. */ int bol; + + /* If positive, we are now scanning CTEXT extended segment. */ + int ctext_extended_segment_len; + + /* If nonzero, we are now scanning embedded UTF-8 sequence. */ + int embedded_utf_8; + + /* The current composition. */ + struct composition_status cmp_status; +}; + +struct emacs_mule_spec +{ + int full_support; + struct composition_status cmp_status; }; struct ccl_spec; -enum utf_16_bom_type +enum utf_bom_type { - utf_16_detect_bom, - utf_16_without_bom, - utf_16_with_bom + utf_detect_bom, + utf_without_bom, + utf_with_bom }; enum utf_16_endian_type @@ -342,7 +392,7 @@ enum utf_16_endian_type struct utf_16_spec { - enum utf_16_bom_type bom; + enum utf_bom_type bom; enum utf_16_endian_type endian; int surrogate; }; @@ -382,11 +432,12 @@ struct coding_system struct iso_2022_spec iso_2022; struct ccl_spec *ccl; /* Defined in ccl.h. */ struct utf_16_spec utf_16; - int emacs_mule_full_support; + enum utf_bom_type utf_8_bom; + struct emacs_mule_spec emacs_mule; } spec; int max_charset_id; - char *safe_charsets; + unsigned char *safe_charsets; /* The following two members specify how binary 8-bit code 128..255 are represented in source and destination text respectively. 1 @@ -579,7 +630,7 @@ struct coding_system if (j1 & 1) \ { \ s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \ - : j1 <= 0x27 ? 0xF3 + (j1 - 0x2D) / 2 \ + : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2 \ : 0xF5 + (j1 - 0x6F) / 2); \ s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \ } \ @@ -645,6 +696,7 @@ extern int decoding_buffer_size P_ ((struct coding_system *, int)); extern int encoding_buffer_size P_ ((struct coding_system *, int)); extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *)); extern Lisp_Object coding_charset_list P_ ((struct coding_system *)); +extern Lisp_Object coding_system_charset_list P_ ((Lisp_Object)); extern void detect_coding P_ ((struct coding_system *)); extern Lisp_Object code_convert_region P_ ((Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, @@ -682,13 +734,14 @@ extern void encode_coding_object P_ ((struct coding_system *, #define decode_coding_string(coding, string, nocopy) \ - decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \ - STRING_BYTES (XSTRING (string)), Qt) + decode_coding_object (coding, string, 0, 0, SCHARS (string), \ + SBYTES (string), Qt) #define encode_coding_string(coding, string, nocopy) \ - (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \ - STRING_BYTES (XSTRING (string)), Qt), \ - (coding)->dst_object) + (STRING_MULTIBYTE(string) ? \ + (encode_coding_object (coding, string, 0, 0, SCHARS (string), \ + SBYTES (string), Qt), \ + (coding)->dst_object) : (string)) #define decode_coding_c_string(coding, src, bytes, dst_object) \