This function is provided for backward compatibility.
Now we have the variable `charset-list'."
charset-list)
+(make-obsolete 'charset-list "Use variable `charset-list'" "23.1")
-(defsubst generic-char-p (char)
- "Return t if and only if CHAR is a generic character.
-See also the documentation of `make-char'."
- (and (>= char 0400)
- (let ((l (split-char char)))
- (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0))
- (not (eq (car l) 'composition))))))
-
-(defun decode-char (ccs code-point &optional restriction)
- "Return character specified by coded character set CCS and CODE-POINT in it.
-Return nil if such a character is not supported.
-Currently the only supported coded character set is `ucs' (ISO/IEC
-10646: Universal Multi-Octet Coded Character Set), and the result is
-translated through the translation-table named
-`utf-translation-table-for-decode', or through the
-translation-hash-table named `utf-subst-table-for-decode'
-\(if `utf-translate-cjk-mode' is non-nil).
-
-Optional argument RESTRICTION specifies a way to map the pair of CCS
-and CODE-POINT to a character. Currently not supported and just ignored."
- (cond
- ((eq ccs 'ucs)
- (or (and utf-translate-cjk-mode
- (utf-lookup-subst-table-for-decode code-point))
- (let ((c (cond
- ((< code-point 160)
- code-point)
- ((< code-point 256)
- (make-char 'latin-iso8859-1 code-point))
- ((< code-point #x2500)
- (setq code-point (- code-point #x0100))
- (make-char 'mule-unicode-0100-24ff
- (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
- ((< code-point #x3400)
- (setq code-point (- code-point #x2500))
- (make-char 'mule-unicode-2500-33ff
- (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
- ((and (>= code-point #xe000) (< code-point #x10000))
- (setq code-point (- code-point #xe000))
- (make-char 'mule-unicode-e000-ffff
- (+ (/ code-point 96) 32)
- (+ (% code-point 96) 32))))))
- (when c
- (or (aref (get 'utf-translation-table-for-decode
- 'translation-table) c)
- c)))))))
-
-(defun encode-char (char ccs &optional restriction)
- "Return code-point in coded character set CCS that corresponds to CHAR.
-Return nil if CHAR is not included in CCS.
-Currently the only supported coded character set is `ucs' (ISO/IEC
-10646: Universal Multi-Octet Coded Character Set), and CHAR is first
-translated through the translation-table named
-`utf-translation-table-for-encode', or through the
-translation-hash-table named `utf-subst-table-for-encode' \(if
-`utf-translate-cjk-mode' is non-nil).
-
-CHAR should be in one of these charsets:
- ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff,
- mule-unicode-e000-ffff, eight-bit-control
-Otherwise, return nil.
-
-Optional argument RESTRICTION specifies a way to map CHAR to a
-code-point in CCS. Currently not supported and just ignored."
- (let* ((split (split-char char))
- (charset (car split))
- trans)
- (cond ((eq ccs 'ucs)
- (or (and utf-translate-cjk-mode
- (utf-lookup-subst-table-for-encode char))
- (let ((table (get 'utf-translation-table-for-encode
- 'translation-table)))
- (setq trans (aref table char))
- (if trans
- (setq split (split-char trans)
- charset (car split)))
- (cond ((eq charset 'ascii)
- (or trans char))
- ((eq charset 'latin-iso8859-1)
- (+ (nth 1 split) 128))
- ((eq charset 'mule-unicode-0100-24ff)
- (+ #x0100 (+ (* (- (nth 1 split) 32) 96)
- (- (nth 2 split) 32))))
- ((eq charset 'mule-unicode-2500-33ff)
- (+ #x2500 (+ (* (- (nth 1 split) 32) 96)
- (- (nth 2 split) 32))))
- ((eq charset 'mule-unicode-e000-ffff)
- (+ #xe000 (+ (* (- (nth 1 split) 32) 96)
- (- (nth 2 split) 32))))
- ((eq charset 'eight-bit-control)
- char))))))))
+\f
+;;; CHARACTER
+(defalias 'char-valid-p 'characterp)
+(make-obsolete 'char-valid-p 'characterp "23.1")
+(defun generic-char-p (char)
+ "Always return nil. This is provided for backward compatibility."
+ nil)
+(make-obsolete 'generic-char-p "Generic characters no longer exist" "23.1")
+
+(defun make-char-internal (charset-id &optional code1 code2)
+ (let ((charset (aref emacs-mule-charset-table charset-id)))
+ (or charset
+ (error "Invalid Emacs-mule charset ID: %d" charset-id))
+ (make-char charset code1 code2)))
\f
+ ;; Save the ASCII case table in case we need it later. Some locales
+ ;; (such as Turkish) modify the case behavior of ASCII characters,
+ ;; which can interfere with networking code that uses ASCII strings.
+
+ (defvar ascii-case-table
+ ;; Code copied from copy-case-table to avoid requiring case-table.el
+ (let ((tbl (copy-sequence (standard-case-table)))
+ (up (char-table-extra-slot (standard-case-table) 0)))
+ (if up (set-char-table-extra-slot tbl 0 (copy-sequence up)))
+ (set-char-table-extra-slot tbl 1 nil)
+ (set-char-table-extra-slot tbl 2 nil)
+ tbl)
+ "Case table for the ASCII character set.")
+ \f
;; Coding system stuff
-;; Coding system is a symbol that has the property `coding-system'.
-;;
-;; The value of the property `coding-system' is a vector of the
-;; following format:
-;; [TYPE MNEMONIC DOC-STRING PLIST FLAGS]
-;; We call this vector as coding-spec. See comments in src/coding.c
-;; for more detail.
-
-(defconst coding-spec-type-idx 0)
-(defconst coding-spec-mnemonic-idx 1)
-(defconst coding-spec-doc-string-idx 2)
-(defconst coding-spec-plist-idx 3)
-(defconst coding-spec-flags-idx 4)
-
-;; PLIST is a property list of a coding system. To share PLIST among
-;; alias coding systems, a coding system has PLIST in coding-spec
-;; instead of having it in normal property list of Lisp symbol.
-;; Here's a list of coding system properties currently being used.
-;;
-;; o coding-category
-;;
-;; The value is a coding category the coding system belongs to. The
-;; function `make-coding-system' sets this value automatically
-;; unless its argument PROPERTIES specifies this property.
-;;
-;; o alias-coding-systems
-;;
-;; The value is a list of coding systems of the same alias group. The
-;; first element is the coding system made at first, which we call as
-;; `base coding system'. The function `make-coding-system' sets this
-;; value automatically and `define-coding-system-alias' updates it.
-;;
-;; See the documentation of make-coding-system for the meanings of the
-;; following properties.
-;;
-;; o post-read-conversion
-;; o pre-write-conversion
-;; o translation-table-for-decode
-;; o translation-table-for-encode
-;; o safe-chars
-;; o safe-charsets
-;; o mime-charset
-;; o valid-codes (meaningful only for a coding system based on CCL)
-
-
-(defsubst coding-system-spec (coding-system)
- "Return coding-spec of CODING-SYSTEM."
- (get (check-coding-system coding-system) 'coding-system))
+;; Coding system is a symbol that has been defined by the function
+;; `define-coding-system'.
-(defun coding-system-type (coding-system)
- "Return the coding type of CODING-SYSTEM.
-A coding type is an integer value indicating the encoding method
-of CODING-SYSTEM. See the function `make-coding-system' for more detail."
- (aref (coding-system-spec coding-system) coding-spec-type-idx))
+(defconst coding-system-iso-2022-flags
+ '(long-form
+ ascii-at-eol
+ ascii-at-cntl
+ 7-bit
+ locking-shift
+ single-shift
+ designation
+ revision
+ direction
+ init-at-bol
+ designate-at-bol
+ safe
+ latin-extra
+ composition
+ euc-tw-shift
+ use-roman
+ use-oldjis)
+ "List of symbols that control ISO-2022 encoder/decoder.
-(defun coding-system-mnemonic (coding-system)
- "Return the mnemonic character of CODING-SYSTEM.
-The mnemonic character of a coding system is used in mode line
-to indicate the coding system. If the arg is nil, return ?-."
- (let ((spec (coding-system-spec coding-system)))
- (if spec (aref spec coding-spec-mnemonic-idx) ?-)))
+The value of the `:flags' attribute in the argument of the function
+`define-coding-system' must be one of them.
+
+If `long-form' is specified, use a long designation sequence on
+encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
+and `japanese-jisx0208'. The long designation sequence doesn't
+conform to ISO 2022, but is used by such coding systems as
+`compound-text'.
+
+If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
+on encoding.
+
+If `ascii-at-cntl' is specified, designate ASCII to g0 before control
+codes and SPC on encoding.
+
+If `7-bit' is specified, use 7-bit code only on encoding.
+
+If `locking-shift' is specified, decode locking-shift code correctly
+on decoding, and use locking-shift to invoke a graphic element on
+encoding.
+
+If `single-shift' is specified, decode single-shift code correctly on
+decoding, and use single-shift to invoke a graphic element on encoding.
+
+If `designation' is specified, decode designation code correctly on
+decoding, and use designation to designate a charset to a graphic
+element on encoding.
+
+If `revision' is specified, produce an escape sequence to specify
+revision number of a charset on encoding. Such an escape sequence is
+always correctly decoded on decoding.
+
+If `direction' is specified, decode ISO6429's code for specifying
+direction correctly, and produce the code on encoding.
+
+If `init-at-bol' is specified, on encoding, it is assumed that
+invocation and designation statuses are reset at each beginning of
+line even if `ascii-at-eol' is not specified; thus no codes for
+resetting them are produced.
+
+If `safe' is specified, on encoding, characters not supported by a
+coding are replaced with `?'.
+
+If `latin-extra' is specified, the code-detection routine assumes that a
+code specified in `latin-extra-code-table' (which see) is valid.
+
+If `composition' is specified, an escape sequence to specify
+composition sequence is correctly decoded on decoding, and is produced
+on encoding.
+
+If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
+correctly decoded on decoding, and is produced on encoding.
+
+If `use-roman' is specified, JIS0201-1976-Roman is designated instead
+of ASCII.
+
+If `use-oldjis' is specified, JIS0208-1976 is designated instead of
+JIS0208-1983.")
+
+(defun define-coding-system (name docstring &rest props)
+ "Define NAME (a symbol) as a coding system with DOCSTRING and attributes.
+The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
+may be any symbol.
+
+The following attributes have special meanings. Those labeled as
+\"(required)\", should not be omitted.
+
+`:mnemonic' (required)
+
+VALUE is a character to display on mode line for the coding system.
+
+`:coding-type' (required)
+
+VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022',
+`emacs-mule', `shift-jis', `ccl', `raw-text', `undecided'.
+
+`:eol-type'
+
+VALUE is the EOL (end-of-line) format of the coding system. It must be
+one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
+\(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF),
+and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on
+decoding by the coding system, Emacs automatically detects the EOL
+format of the source text.
+
+`:charset-list'
+
+VALUE must be a list of charsets supported by the coding system. On
+encoding by the coding system, if a character belongs to multiple
+charsets in the list, a charset that comes earlier in the list is
+selected. If `:coding-type' is `iso-2022', VALUE may be `iso-2022',
+which indicates that the coding system supports all ISO-2022 based
+charsets. If `:coding-type' is `emacs-mule', VALUE may be
+`emacs-mule', which indicates that the coding system supports all
+charsets that have the `:emacs-mule-id' property.
+
+`:ascii-compatible-p'
+
+If VALUE is non-nil, the coding system decodes all 7-bit bytes into
+the corresponding ASCII characters, and encodes all ASCII characters
+back to the corresponding 7-bit bytes. VALUE defaults to nil.
+
+`:decode-translation-table'
+
+VALUE must be a translation table to use on decoding.
+
+`:encode-translation-table'
+
+VALUE must be a translation table to use on encoding.
+
+`:post-read-conversion'
+
+VALUE must be a function to call after some text is inserted and
+decoded by the coding system itself and before any functions in
+`after-insert-functions' are called. The arguments to this function
+are the same as those of a function in `after-insert-file-functions',
+i.e. LENGTH of the text to be decoded with point at the head of it,
+and the function should leave point unchanged.
+
+`:pre-write-conversion'
+
+VALUE must be a function to call after all functions in
+`write-region-annotate-functions' and `buffer-file-format' are called,
+and before the text is encoded by the coding system itself. The
+arguments to this function are the same as those of a function in
+`write-region-annotate-functions'.
+
+`:default-char'
+
+VALUE must be a character. On encoding, a character not supported by
+the coding system is replaced with VALUE.
+
+`:for-unibyte'
+
+VALUE non-nil means that visiting a file with the coding system
+results in a unibyte buffer.
+
+`:eol-type'
+
+VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like
+EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like
+EOL (CR). If omitted, on decoding, the coding system detects EOL
+format automatically, and on encoding, uses Unix-like EOL.
+
+`:mime-charset'
+
+VALUE must be a symbol whose name is that of a MIME charset converted
+to lower case.
+
+`:mime-text-unsuitable'
+
+VALUE non-nil means the `:mime-charset' property names a charset which
+is unsuitable for the top-level media type \"text\".
+
+`:flags'
+
+VALUE must be a list of symbols that control the ISO-2022 converter.
+Each must be a member of the list `coding-system-iso-2022-flags'
+\(which see). This attribute has a meaning only when `:coding-type'
+is `iso-2022'.
+
+`:designation'
+
+VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
+GN-USAGE specifies the usage of graphic register GN as follows.
+
+If it is nil, no charset can be designated to GN.
+
+If it is a charset, the charset is initially designated to GN, and
+never used by the other charsets.
+
+If it is a list, the elements must be charsets, nil, 94, or 96. GN
+can be used by all the listed charsets. If the list contains 94, any
+iso-2022 charset whose code-space ranges are 94 long can be designated
+to GN. If the list contains 96, any charsets whose whose ranges are
+96 long can be designated to GN. If the first element is a charset,
+that charset is initially designated to GN.
+
+This attribute has a meaning only when `:coding-type' is `iso-2022'.
+
+`:bom'
+
+This attributes specifies whether the coding system uses a `byte order
+mark'. VALUE must nil, t, or cons of coding systems whose
+`:coding-type' is `utf-16'.
+
+If the value is nil, on decoding, don't treat the first two-byte as
+BOM, and on encoding, don't produce BOM bytes.
+
+If the value is t, on decoding, skip the first two-byte as BOM, and on
+encoding, produce BOM bytes accoding to the value of `:endian'.
+
+If the value is cons, on decoding, check the first two-byte. If theyq
+are 0xFE 0xFF, use the car part coding system of the value. If they
+are 0xFF 0xFE, use the car part coding system of the value.
+Otherwise, treat them as bytes for a normal character. On encoding,
+produce BOM bytes accoding to the value of `:endian'.
+
+This attribute has a meaning only when `:coding-type' is `utf-16'.
+
+`:endian'
+
+VALUE must be `big' or `little' specifying big-endian and
+little-endian respectively. The default value is `big'.
+
+This attribute has a meaning only when `:coding-type' is `utf-16'.
+
+`:ccl-decoder'
+
+VALUE is a symbol representing the registered CCL program used for
+decoding. This attribute has a meaning only when `:coding-type' is
+`ccl'.
+
+`:ccl-encoder'
+
+VALUE is a symbol representing the registered CCL program used for
+encoding. This attribute has a meaning only when `:coding-type' is
+`ccl'."
+ (let* ((common-attrs (mapcar 'list
+ '(:mnemonic
+ :coding-type
+ :charset-list
+ :ascii-compatible-p
+ :decode-translation-table
+ :encode-translation-table
+ :post-read-conversion
+ :pre-write-conversion
+ :default-char
+ :for-unibyte
+ :plist
+ :eol-type)))
+ (coding-type (plist-get props :coding-type))
+ (spec-attrs (mapcar 'list
+ (cond ((eq coding-type 'iso-2022)
+ '(:initial
+ :reg-usage
+ :request
+ :flags))
+ ((eq coding-type 'utf-16)
+ '(:bom
+ :endian))
+ ((eq coding-type 'ccl)
+ '(:ccl-decoder
+ :ccl-encoder
+ :valids))))))
+
+ (dolist (slot common-attrs)
+ (setcdr slot (plist-get props (car slot))))
+
+ (dolist (slot spec-attrs)
+ (setcdr slot (plist-get props (car slot))))
+
+ (if (eq coding-type 'iso-2022)
+ (let ((designation (plist-get props :designation))
+ (flags (plist-get props :flags))
+ (initial (make-vector 4 nil))
+ (reg-usage (cons 4 4))
+ request elt)
+ (dotimes (i 4)
+ (setq elt (aref designation i))
+ (cond ((charsetp elt)
+ (aset initial i elt)
+ (setq request (cons (cons elt i) request)))
+ ((consp elt)
+ (aset initial i (car elt))
+ (if (charsetp (car elt))
+ (setq request (cons (cons (car elt) i) request)))
+ (dolist (e (cdr elt))
+ (cond ((charsetp e)
+ (setq request (cons (cons e i) request)))
+ ((eq e 94)
+ (setcar reg-usage i))
+ ((eq e 96)
+ (setcdr reg-usage i))
+ ((eq e t)
+ (setcar reg-usage i)
+ (setcdr reg-usage i)))))))
+ (setcdr (assq :initial spec-attrs) initial)
+ (setcdr (assq :reg-usage spec-attrs) reg-usage)
+ (setcdr (assq :request spec-attrs) request)
+
+ ;; Change :flags value from a list to a bit-mask.
+ (let ((bits 0)
+ (i 0))
+ (dolist (elt coding-system-iso-2022-flags)
+ (if (memq elt flags)
+ (setq bits (logior bits (lsh 1 i))))
+ (setq i (1+ i)))
+ (setcdr (assq :flags spec-attrs) bits))))
+
+ ;; Add :name and :docstring properties to PROPS.
+ (setq props
+ (cons :name (cons name (cons :docstring (cons (purecopy docstring)
+ props)))))
+ (setcdr (assq :plist common-attrs) props)
+ (apply 'define-coding-system-internal
+ name (mapcar 'cdr (append common-attrs spec-attrs)))))
(defun coding-system-doc-string (coding-system)
"Return the documentation string for CODING-SYSTEM."