;; `utf-fragment-on-decoding' which may specify decoding Greek and
;; Cyrillic into 8859 charsets.
-;; Unification also puts a `translation-table-for-input' property on
-;; relevant coding coding systems and arranges for the
-;; `translation-table-for-input' variable to be set either globally or
-;; locally. This is used by Quail input methods to translate input
+;; Unification also arranges for `translation-table-for-input' to be
+;; set either globally or locally. This is used to translate input
;; characters appropriately for the buffer's coding system (if
;; possible). Unification on decoding sets it globally to translate
;; to Unicode. Unification on encoding uses hooks to set it up
;; to inconsistent behaviour between CCL-based coding systems which
;; use explicit translation tables and the rest.)
-;; Command `ucs-insert' is convenient for inserting a given Unicode.
+;; Command `ucs-insert' is convenient for inserting a given unicode.
;; (See also the `ucs' input method.)
-;; A replacement CCL program is provided which allows characters in
-;; the `ucs-mule-to-mule-unicode' table to be displayed with an
-;; iso-10646-encoded font. E.g. to use a `Unicode' font for Cyrillic:
-;;
-;; (set-fontset-font "fontset-startup"
-;; (cons (make-char 'cyrillic-iso8859-5 160)
-;; (make-char 'cyrillic-iso8859-5 255))
-;; '(nil . "ISO10646-1"))
-
;;; Code:
;;; Define tables, to be populated later.
"Used as `translation-table-for-encode' for iso-8859-15.
Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
-(defvar translation-table-for-input (make-translation-table))
+(setq translation-table-for-input (make-translation-table))
+;; It will normally be set locally, before the major mode is invoked.
+(put 'translation-table-for-input 'permanent-local t)
+
+(define-translation-table 'ucs-translation-table-for-decode)
;;; Set up the tables.
;; table `utf-translation-table-for-decode' does nothing.
;; Convert the lists to the basic char tables.
- (dolist (n (list 15 14 9 8 7 5 4 3 2 1))
+ (dolist (n (list 15 14 9 8 7 6 5 4 3 2 1))
(let ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n)))))
(dolist (pair alist)
(let ((mule (car pair))
;; Derive tables that can be used as per-coding-system
;; `translation-table-for-encode's.
+ ;; N.B., there's no 8859-6 coding system.
(dolist (n (list 15 14 9 8 7 5 4 3 2 1))
(let* ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n))))
(encode-translator (set (intern (format "ucs-8859-%d-encode-table"
;; Translate Quail input globally.
(setq-default translation-table-for-input ucs-mule-to-mule-unicode)
- ;; In case these are set up, but we should use the global
+ ;; In case this is set up, but we should use the global
;; translation-table.
- (remove-hook 'quail-activate-hook 'ucs-quail-activate)
(remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
(when for-encode
;; Make mule-utf-* encode all characters in ucs-mule-to-mule-unicode.
- (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le)))
+ (let ((coding-list '(mule-utf-8 mule-utf-16be mule-utf-16le
+ mule-utf-16be-with-signature
+ mule-utf-16le-with-signature)))
(define-translation-table 'utf-translation-table-for-encode
ucs-mule-to-mule-unicode)
(dolist (coding coding-list)
(set-char-table-parent (coding-system-get coding 'safe-chars)
- ucs-mule-to-mule-unicode)
- (register-char-codings coding ucs-mule-to-mule-unicode)))
+ ucs-mule-to-mule-unicode)))
;; Adjust the 8859 coding systems to fragment the unified characters
;; on encoding.
;; used after they've been registered, but we might as well
;; record them. Setting the parent here is a convenience.
(set-char-table-parent safe table)
- ;; Update the table of what encodes to what.
- (register-char-codings coding-system table)
- (coding-system-put coding-system 'translation-table-for-encode table)
- (coding-system-put coding-system 'translation-table-for-input table)))
- ;; Arrange local translation-tables for Quail input.
- (add-hook 'quail-activate-hook 'ucs-quail-activate)
+ (coding-system-put coding-system 'translation-table-for-encode table)))
(add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)))
(defun ucs-fragment-8859 (for-encode for-decode)
(set-char-table-parent standard-translation-table-for-decode nil)
;; For CCL coding systems other than mule-utf-* (e.g. cyrillic-koi8).
(define-translation-table 'ucs-translation-table-for-decode)
- ;; For Quail input.
(setq-default translation-table-for-input nil))
(when for-encode
- ;; Make mule-utf-* disabled for all characters in
- ;; ucs-mule-to-mule-unicode but what originally supported and what
- ;; translated bt utf-translation-table-for-decode when
+ ;; Disable mule-utf-* encoding for all characters in
+ ;; ucs-mule-to-mule-unicode except what was originally supported
+ ;; and what is translated by utf-translation-table-for-decode when
;; `utf-fragment-on-decoding' is non-nil.
- (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le))
+ (let ((coding-list '(mule-utf-8 mule-utf-16be mule-utf-16le
+ mule-utf-16be-with-signature
+ mule-utf-16le-with-signature))
(safe (coding-system-get 'mule-utf-8 'safe-chars)))
(dolist (coding coding-list)
(set-char-table-parent (coding-system-get coding 'safe-chars) nil))
- ;; Here we assume that all mule-utf-* have the same character
- ;; repertory, thus we can use SAFE for all of them.
- (map-char-table
- (lambda (key val)
- (if (and (>= key 128) val
- (not (aref safe key)))
- (aset char-coding-system-table key
- (delq 'mule-utf-8
- (delq 'mule-utf-16-le
- (delq 'mule-utf-16-be
- (aref char-coding-system-table key)))))))
- ucs-mule-to-mule-unicode)
-
(if (not utf-fragment-on-decoding)
(define-translation-table 'utf-translation-table-for-encode)
(define-translation-table 'utf-translation-table-for-encode
- utf-defragmentation-table)
- (dolist (coding coding-list)
- (register-char-codings coding utf-defragmentation-table))))
+ utf-defragmentation-table)))
- ;; For each charset, remove the entries in
- ;; `char-coding-system-table' added to its safe-chars table (as
- ;; its parent).
+ ;; For each charset, remove the parent of `safe-chars' property of
+ ;; the corresponding coding system.
(dolist (n '(1 2 3 4 5 7 8 9 14 15))
(let* ((coding-system
(coding-system-base (intern (format "iso-8859-%d" n))))
- (table (symbol-value
- (intern (format "ucs-8859-%d-encode-table" n))))
(safe (coding-system-get coding-system 'safe-chars)))
- (when (char-table-parent safe)
- (map-char-table
- (lambda (key val)
- (if (and (>= key 128) val)
- (let ((codings (aref char-coding-system-table key)))
- (aset char-coding-system-table key
- (delq coding-system codings)))))
- (char-table-parent safe))
- (set-char-table-parent safe nil))
- (coding-system-put coding-system 'translation-table-for-encode nil)
- (coding-system-put coding-system 'translation-table-for-input nil)))
- (optimize-char-table char-coding-system-table)
- (remove-hook 'quail-activate-hook 'ucs-quail-activate)
+ (if (char-table-parent safe)
+ (set-char-table-parent safe nil))
+ (coding-system-put coding-system 'translation-table-for-encode nil)))
(remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)))
(defun ucs-insert (arg)
"Insert the Emacs character representation of the given Unicode.
Interactively, prompts for a hex string giving the code."
(interactive "sUnicode (hex): ")
- (let ((c (decode-char 'ucs (if (integerp arg)
- arg
- (string-to-number arg 16)))))
+ (or (integerp arg)
+ (setq arg (string-to-number arg 16)))
+ (let ((c (decode-char 'ucs arg)))
(if c
(insert c)
- (error "Character can't be decoded to UCS"))))
+ (if (or (< arg 0) (> arg #x10FFFF))
+ (error "Not a Unicode character code: 0x%X" arg)
+ (error "Character U+%04X is not yet supported" arg)))))
;;; Dealing with non-8859 character sets.
(?\e(1x\e(B . ?\e$,1Dx\e(B)
(?\e(1y\e(B . ?\e$,1Dy\e(B)
(?\e(1|\e(B . ?\e$,1D|\e(B)
- (?\e(1}\e(B . ?\e$,1D}\e(B))))
+ (?\e(1}\e(B . ?\e$,1D}\e(B)))
+
+ (other
+ '(
+ ;; latin-jisx0201 is mostly decoded to ascii, with these
+ ;; exceptions, so we don't bother with tables for the whole
+ ;; thing.
+ (?\e(J\\e(B . ?\e,A%\e(B)
+ (?\e(J~\e(B . ?\e$,1s>\e(B))))
(let ((table (make-char-table 'safe-chars))
safe-charsets)
(dolist (cs '(vietnamese-viscii lao chinese-sisheng ipa
katakana-jisx0201 thai-tis620 tibetan-iso-8bit
- indian-is13194 ethiopic))
+ indian-is13194 ethiopic other))
;; These tables could be used as translation-table-for-encode by
;; the relevant coding systems.
(let ((encode-translator
(optimize-char-table encode-translator))
(if (charsetp cs)
(push cs safe-charsets)
- (setq safe-charsets
- (append (delq 'ascii (coding-system-get cs 'safe-charsets))
- safe-charsets)))
+ (if (coding-system-p cs)
+ (setq safe-charsets
+ (append (delq 'ascii (coding-system-get cs 'safe-charsets))
+ safe-charsets))))
(cond ((eq cs 'vietnamese-viscii)
(coding-system-put 'vietnamese-viscii
'translation-table-for-input
'translation-table-for-input
encode-translator))
((memq cs '(lao thai-tis620 tibetan-iso-8bit))
- (coding-system-put cs 'translation-table-for-input cs)))))
+ (coding-system-put cs 'translation-table-for-input
+ encode-translator)))))
(dolist (c safe-charsets)
(aset table (make-char c) t))))
can cope with separate Latin-1 and Latin-9 representations of e-acute.
Also sets hooks that arrange `translation-table-for-input' to be set
-up locally when Quail input methods are activated. This will often
-allow input generated by Quail input methods to conform with what the
-buffer's file coding system can encode. Thus you could use a Latin-2
-input method to search for e-acute in a Latin-1 buffer.
+up locally. This will often allow input generated by Quail input
+methods to conform with what the buffer's file coding system can
+encode. Thus you could use a Latin-2 input method to search for
+e-acute in a Latin-1 buffer.
See also command `unify-8859-on-decoding-mode'."
:group 'mule
built-in ISO 8859 charsets are unified by mapping them into the
`iso-latin-1' and `mule-unicode-0100-24ff' charsets.
-Also sets `translation-table-for-input' globally, so that Quail input
-methods produce unified characters.
+Also sets `translation-table-for-input' globally, so that keyboard input
+produces unified characters.
See also command `unify-8859-on-encoding-mode' and the user option
`utf-fragment-on-decoding'."
;; unify-8859-on-encoding-mode and unify-8859-on-decoding-mode.
(ucs-unify-8859 t nil)
-;; Arrange to set up the translation-table for Quail. This probably
-;; isn't foolproof.
-(defun ucs-quail-activate ()
- "Set up an appropriate `translation-table-for-input' for current buffer.
-Intended to be added to `quail-activate-hook'."
- (let ((cs (and buffer-file-coding-system
- (coding-system-base buffer-file-coding-system))))
- (if (eq cs 'undecided)
- (setq cs (and default-buffer-file-coding-system
- (coding-system-base default-buffer-file-coding-system))))
- (if (and cs (coding-system-get cs 'translation-table-for-input))
- (set (make-variable-buffer-local 'translation-table-for-input)
- (coding-system-get cs 'translation-table-for-input)))))
+;; Arrange to set up the translation-table for keyboard input. This
+;; is called from get-buffer-create, set-buffer-file-coding-system,
+;; normal-mode and minibuffer-setup-hook.
+(defun ucs-set-table-for-input (&optional buffer)
+ "Set up an appropriate `translation-table-for-input' for BUFFER.
+BUFFER defaults to the current buffer."
+ (when (and unify-8859-on-encoding-mode
+ (char-table-p translation-table-for-input))
+ (let ((cs (and buffer-file-coding-system
+ (coding-system-base buffer-file-coding-system)))
+ table)
+ (if (or (null cs)
+ (eq cs 'undecided))
+ (setq cs
+ (and default-buffer-file-coding-system
+ (coding-system-base default-buffer-file-coding-system))))
+ (when cs
+ (setq table (coding-system-get cs 'translation-table-for-encode))
+ (if (and table (symbolp table))
+ (setq table (get table 'translation-table)))
+ (unless (char-table-p table)
+ (setq table (coding-system-get cs 'translation-table-for-input)))
+ (when (char-table-p table)
+ (if buffer
+ (with-current-buffer buffer
+ (set (make-variable-buffer-local 'translation-table-for-input)
+ table))
+ (set (make-variable-buffer-local 'translation-table-for-input)
+ table)))))))
;; The minibuffer needs to acquire a `buffer-file-coding-system' for
;; the above to work in it.
(with-current-buffer (let ((win (minibuffer-selected-window)))
(if (window-live-p win) (window-buffer win)
(cadr (buffer-list))))
- buffer-file-coding-system)))
+ buffer-file-coding-system))
+ (ucs-set-table-for-input))
(provide 'ucs-tables)
+;;; arch-tag: b497e22b-7fe1-486a-9352-e2d7f7d76a76
;;; ucs-tables.el ends here