(mac-text-encoding-ascii): New constant.

[gnu-emacs] / lisp / international / ucs-tables.el
diff --git a/lisp/international/ucs-tables.el b/lisp/international/ucs-tables.el

index c206909c32d6e112ba8ecf2df07018491a6047b7..a8ca220466a612214dfd12c3046fdd71d2a6d02c 100644 (file)
--- a/lisp/international/ucs-tables.el
+++ b/lisp/international/ucs-tables.el
@@ -1,24 +1,29 @@
  ;;; ucs-tables.el --- translation to, from and via Unicode  -*- coding: iso-2022-7bit -*-
  
-;; Copyright (C) 2001  Free Software Foundation, Inc.
+;; Copyright (C) 2001, 2002, 2003, 2005  Free Software Foundation, Inc.
+;; Copyright (C) 2002, 2003
+;;   National Institute of Advanced Industrial Science and Technology (AIST)
+;;   Registration Number H14PRO021
  
  ;; Author: Dave Love <fx@gnu.org>
  ;; Keywords: i18n
  
-;; This file is free software; you can redistribute it and/or modify
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software; you can redistribute it and/or modify
  ;; it under the terms of the GNU General Public License as published by
  ;; the Free Software Foundation; either version 2, or (at your option)
  ;; any later version.
  
-;; This file is distributed in the hope that it will be useful,
+;; GNU Emacs is distributed in the hope that it will be useful,
  ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  ;; GNU General Public License for more details.
  
  ;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING.  If not, write to
-;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; along with GNU Emacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
  
  ;;; Commentary:
  
@@ -28,7 +33,7 @@
  
  ;; These tables are used to construct other mappings between the Mule
  ;; iso8859 charsets and the emacs-unicode charsets and a table that
-;; unifies iso8859 characters using a single charset as far as
+;; unifies iso-8859 characters using a single charset as far as
  ;; possible.  These tables are used by latin1-disp.el to display some
  ;; Unicode characters without a Unicode font and by utf-8.el to unify
  ;; Latin-N as far as possible on encoding.
@@ -42,12 +47,33 @@
  ;; not idempotent.
  
  ;; Global minor modes are provided to unify on encoding and decoding.
-
-;; The translation table `ucs-mule-to-mule-unicode' is populated.
-;; This is used by the `mule-utf-8' coding system to encode extra
-;; characters.
-
-;; Command `ucs-insert' is convenient for inserting a given Unicode.
+;; These could be extended to non-iso-8859 charsets.  However 8859 is
+;; all that users normally care about unifying although, for instance,
+;; Greek occurs in as many as nine Emacs charsets.
+
+;; The translation-table `utf-translation-table-for-encode' is
+;; populated, which could be used for more general unification on
+;; decoding.  This is used by the `mule-utf-8' coding system to encode
+;; extra characters, and also by the coding systems set up by
+;; code-pages.el.  The decoding tables here take account of
+;; `utf-fragment-on-decoding' which may specify decoding Greek and
+;; Cyrillic into 8859 charsets.
+
+;; Unification also arranges for `translation-table-for-input' to be
+;; set either globally or locally.  This is used to translate input
+;; characters appropriately for the buffer's coding system (if
+;; possible).  Unification on decoding sets it globally to translate
+;; to Unicode.  Unification on encoding uses hooks to set it up
+;; locally to buffers.  Thus in the latter case, typing `"a' into a
+;; Latin-1 buffer using the `latin-2-prefix' method translates the
+;; generated latin-iso8859-2 `\e,Bd\e(B' into latin-iso8859-1 `\e,Ad\e(B'.
+
+;; NB, this code depends on the default value of
+;; `enable-character-translation'.  (Making it nil would anyway lead
+;; to inconsistent behaviour between CCL-based coding systems which
+;; use explicit translation tables and the rest.)
+
+;; Command `ucs-insert' is convenient for inserting a given unicode.
  ;; (See also the `ucs' input method.)
  
  ;;; Code:
@@ -55,13 +81,13 @@
  ;;; Define tables, to be populated later.
  
  (defvar ucs-mule-8859-to-ucs-table (make-translation-table)
-  "Translation table from Emacs ISO-8859 characters to Unicode.
+  "Char table from Emacs ISO-8859 characters to Unicode.
  This maps Emacs characters from the non-Latin-1
  ...-iso8859-... charsets to their Unicode code points.  This is a
  many-to-one mapping.")
  
  (defvar ucs-mule-8859-to-mule-unicode (make-translation-table)
-  "Translation table from Emacs ISO-8859 characters to Mule Unicode.
+  "Char table from Emacs ISO-8859 characters to Mule Unicode.
  This maps Emacs characters from the non-Latin-1
  ...-iso8859-... charsets to characters from the
  mule-unicode-... charsets.  This is a many-to-one mapping.  The
@@ -123,10 +149,12 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
    "Used as `translation-table-for-encode' for iso-8859-15.
  Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
  
-;; Probably defined by utf-8.el.
-(defvar ucs-mule-to-mule-unicode (make-translation-table))
-(unless (get 'ucs-mule-to-mule-unicode 'translation-table)
-  (define-translation-table 'ucs-mule-to-mule-unicode ucs-mule-to-mule-unicode))
+(setq translation-table-for-input (make-translation-table))
+;; It will normally be set locally, before the major mode is invoked.
+(put 'translation-table-for-input 'permanent-local t)
+
+(define-translation-table 'ucs-translation-table-for-decode)
+
  ;;; Set up the tables.
  
  ;; Most of these tables were derived from ones in Mule-UCS.
@@ -702,25 +730,28 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
          (?\\e,H<\e(B . ?\x00BC) ;; VULGAR FRACTION ONE QUARTER
          (?\\e,H=\e(B . ?\x00BD) ;; VULGAR FRACTION ONE HALF
          (?\\e,H>\e(B . ?\x00BE) ;; VULGAR FRACTION THREE QUARTERS
-        (?\\e,H@\e(B . ?\x05B0) ;; HEBREW POINT SHEVA
-        (?\\e,HA\e(B . ?\x05B1) ;; HEBREW POINT HATAF SEGOL
-        (?\\e,HB\e(B . ?\x05B2) ;; HEBREW POINT HATAF PATAH
-        (?\\e,HC\e(B . ?\x05B3) ;; HEBREW POINT HATAF QAMATS
-        (?\\e,HD\e(B . ?\x05B4) ;; HEBREW POINT HIRIQ
-        (?\\e,HE\e(B . ?\x05B5) ;; HEBREW POINT TSERE
-        (?\\e,HF\e(B . ?\x05B6) ;; HEBREW POINT SEGOL
-        (?\\e,HG\e(B . ?\x05B7) ;; HEBREW POINT PATAH
-        (?\\e,HH\e(B . ?\x05B8) ;; HEBREW POINT QAMATS
-        (?\\e,HI\e(B . ?\x05B9) ;; HEBREW POINT HOLAM
-        (?\\e,HK\e(B . ?\x05BB) ;; HEBREW POINT QUBUTS
-        (?\\e,HL\e(B . ?\x05BC) ;; HEBREW POINT DAGESH
-        (?\\e,HM\e(B . ?\x05BD) ;; HEBREW POINT METEG
-        (?\\e,HN\e(B . ?\x05BE) ;; HEBREW POINT MAQAF
-        (?\\e,HO\e(B . ?\x05BF) ;; HEBREW POINT RAFE
-        (?\\e,HP\e(B . ?\x05C0) ;; HEBREW PUNCTUATION PASEQ
-        (?\\e,HQ\e(B . ?\x05C1) ;; HEBREW POINT SHIN DOT
-        (?\\e,HR\e(B . ?\x05C2) ;; HEBREW POINT SIN DOT
-        (?\\e,HS\e(B . ?\x05C3) ;; HEBREW PUNCTUATION SOF PASUQ
+        ;; These are commented out since the current 8859-8 standard
+        ;; does not yet define these codepoints, although there are
+        ;; drafts which do).
+;       (?\\e,H@\e(B . ?\x05B0) ;; HEBREW POINT SHEVA
+;       (?\\e,HA\e(B . ?\x05B1) ;; HEBREW POINT HATAF SEGOL
+;       (?\\e,HB\e(B . ?\x05B2) ;; HEBREW POINT HATAF PATAH
+;       (?\\e,HC\e(B . ?\x05B3) ;; HEBREW POINT HATAF QAMATS
+;       (?\\e,HD\e(B . ?\x05B4) ;; HEBREW POINT HIRIQ
+;       (?\\e,HE\e(B . ?\x05B5) ;; HEBREW POINT TSERE
+;       (?\\e,HF\e(B . ?\x05B6) ;; HEBREW POINT SEGOL
+;       (?\\e,HG\e(B . ?\x05B7) ;; HEBREW POINT PATAH
+;       (?\\e,HH\e(B . ?\x05B8) ;; HEBREW POINT QAMATS
+;       (?\\e,HI\e(B . ?\x05B9) ;; HEBREW POINT HOLAM
+;       (?\\e,HK\e(B . ?\x05BB) ;; HEBREW POINT QUBUTS
+;       (?\\e,HL\e(B . ?\x05BC) ;; HEBREW POINT DAGESH
+;       (?\\e,HM\e(B . ?\x05BD) ;; HEBREW POINT METEG
+;       (?\\e,HN\e(B . ?\x05BE) ;; HEBREW POINT MAQAF
+;       (?\\e,HO\e(B . ?\x05BF) ;; HEBREW POINT RAFE
+;       (?\\e,HP\e(B . ?\x05C0) ;; HEBREW PUNCTUATION PASEQ
+;       (?\\e,HQ\e(B . ?\x05C1) ;; HEBREW POINT SHIN DOT
+;       (?\\e,HR\e(B . ?\x05C2) ;; HEBREW POINT SIN DOT
+;       (?\\e,HS\e(B . ?\x05C3) ;; HEBREW PUNCTUATION SOF PASUQ
          (?\\e,H[\e(B . ?\x202D) ;; LEFT-TO-RIGHT OVERRIDE
          (?\\e,H\\e(B . ?\x202E) ;; RIGHT-TO-LEFT OVERRIDE
          (?\\e,H]\e(B . ?\x202C) ;; POP DIRECTIONAL FORMATTING
@@ -1062,14 +1093,14 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
            (push (cons (make-char 'latin-iso8859-1 (- i 128)) i)
                  l)
            (setq i (1+ i)))
-        (nreverse l)))
-      
-;;       (case-table (standard-case-table))
-;;       (syntax-table (standard-syntax-table))
-      )
+        (nreverse l))))
+
+  ;; Note: Here, using decode-char is safe because
+  ;; utf-fragment-on-decoding is by default nil, thus the translation
+  ;; table `utf-translation-table-for-decode' does nothing.
  
    ;; Convert the lists to the basic char tables.
-  (dolist (n (list 15 14 9 8 7 5 4 3 2 1))
+  (dolist (n (list 15 14 9 8 7 6 5 4 3 2 1))
      (let ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n)))))
        (dolist (pair alist)
         (let ((mule (car pair))
@@ -1079,71 +1110,43 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
           ;;      (aset ucs-ucs-to-mule-8859-table uc mule)
           ;;      (aset ucs-mule-unicode-to-mule-8859 mu mule)
           (aset ucs-mule-8859-to-mule-unicode mule mu)
-         (aset ucs-mule-to-mule-unicode mule mu)))
-;; I think this is actually done OK in characters.el.
-;; Probably things like accents shouldn't have word syntax, but the
-;; Latin-N syntax tables currently aren't consistent for such
-;; characters anyhow.
-;;      ;; Make the mule-unicode characters inherit syntax and case info
-;;      ;; if they don't already have it.
-;;      (dolist (pair alist)
-;;     (let ((mule (car pair))
-;;           (uc (cdr pair))
-;;           (mu (decode-char 'ucs (cdr pair))))
-;;       (let ((syntax (aref syntax-table mule)))
-;;         (if (eq mule (downcase mule))
-;;             (if (eq mule (upcase mule)) ; non-letter or uncased letter
-;;                 (progn
-;;                   (if (= 4 (car syntax)) ; left delim
-;;                       (progn
-;;                         (aset syntax-table
-;;                               mu
-;;                               (cons 4 (aref ucs-mule-8859-to-mule-unicode
-;;                                             (cdr syntax))))
-;;                         (aset syntax-table
-;;                               (aref ucs-mule-8859-to-mule-unicode
-;;                                     (cdr syntax))
-;;                               (cons 5 mu)))
-;;                     (aset syntax-table mu syntax))
-;;                   (aset case-table mu mu)))
-;;           ;; Upper case letter
-;;           (let ((lower (aref ucs-mule-8859-to-mule-unicode
-;;                              (aref case-table mule))))
-;;             (aset case-table mu lower)
-;;             (aset case-table lower lower)
-;;             (modify-syntax-entry lower "w   " syntax-table)
-;;             (modify-syntax-entry mu "w   " syntax-table))))))
-      ))
+         (aset ucs-mule-to-mule-unicode mule mu)))))
+
    ;; Derive tables that can be used as per-coding-system
    ;; `translation-table-for-encode's.
+  ;; N.B., there's no 8859-6 coding system.
    (dolist (n (list 15 14 9 8 7 5 4 3 2 1))
      (let* ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n))))
            (encode-translator (set (intern (format "ucs-8859-%d-encode-table"
                                                    n))
                                    (make-translation-table)))
+          (coding-system
+           (coding-system-base (intern (format "iso-8859-%d" n))))
+          (dependency (coding-system-get coding-system 'dependency))
            elt)
-      ;; Start with the mule-unicode component.
-      (dolist (pair alist)
-       (let ((mule (car pair))
-             (mu (decode-char 'ucs (cdr pair))))
-         (aset encode-translator mu mule)))
+      ;; Start with the mule-unicode component (except for latin-iso8859-1).
+      (if (/= n 1)
+         (dolist (pair alist)
+           (let ((mule (car pair))
+                 (mu (decode-char 'ucs (cdr pair))))
+             (aset encode-translator mu mule))))
        ;; Find characters from other 8859 sets which map to the same
        ;; unicode as some character in this set.
        (map-char-table (lambda (k v)
                         (if (and (setq elt (rassq v alist))
                                  (not (assq k alist)))
                             (aset encode-translator k (car elt))))
-                     ucs-mule-8859-to-ucs-table))))
+                     ucs-mule-8859-to-ucs-table)
+      (optimize-char-table encode-translator)
  
-;; Register for use in CCL.
-(define-translation-table 'ucs-mule-8859-to-mule-unicode
-  ucs-mule-8859-to-mule-unicode)
+      (or (memq 'unify-8859-on-encoding-mode dependency)
+         (setq dependency (cons 'unify-8859-on-encoding-mode dependency)))
+      (or (memq 'unify-8859-on-decoding-mode dependency)
+         (setq dependency (cons 'unify-8859-on-decoding-mode dependency)))
+      (coding-system-put coding-system 'dependency dependency))))
  
-;; Fixme: Make this reversible, which means frobbing
-;; `char-coding-system-table' directly to remove what we added -- see
-;; codepages.el.  Also make it a user option.
-(defun ucs-unify-8859 (&optional encode-only)
-  "Set up translation tables for unifying characters from ISO 8859.
+(defun ucs-unify-8859 (for-encode for-decode)
+  "Set up translation-tables for unifying characters from ISO 8859.
  
  On decoding, non-ASCII characters are mapped into the `iso-latin-1'
  and `mule-unicode-0100-24ff' charsets.  On encoding, these are mapped
@@ -1151,159 +1154,110 @@ back appropriate for the coding system.
  
  With prefix arg, do unification on encoding only, i.e. don't unify
  everything on input operations."
-  (interactive "P")
-  (unless encode-only
+  (when for-decode
      ;; Unify 8859 on decoding.  (Non-CCL coding systems only.)
+    (if utf-fragment-on-decoding
+       (progn (map-char-table
+               (lambda (k v)
+                 (if v (aset ucs-mule-8859-to-mule-unicode v nil)))
+               utf-fragmentation-table)
+              (optimize-char-table ucs-mule-8859-to-mule-unicode))
+      ;; Reset in case it was changed.
+      (map-char-table
+       (lambda (k v)
+        (if v (aset ucs-mule-8859-to-mule-unicode v k)))
+       utf-fragmentation-table))
+
+    ;; For non-CCL coding systems (e.g. iso-latin-2).
      (set-char-table-parent standard-translation-table-for-decode
-                          ucs-mule-8859-to-mule-unicode))
-  ;; Adjust the 8859 coding systems to fragment the unified characters
-  ;; on encoding.
-  (dolist (n '(1 2 3 4 5 7 8 9 14 15))
-    (let* ((coding-system
-           (coding-system-base (intern (format "iso-8859-%d" n))))
-          (table (symbol-value
-                  (intern (format "ucs-8859-%d-encode-table" n))))
-          (safe (coding-system-get coding-system 'safe-chars)))
-      ;; Actually, the coding system's safe-chars are not normally
-      ;; used after they've been registered, but we might as well
-      ;; record them.  Setting the parent here is a convenience.
-      (set-char-table-parent safe table)
-      ;; Update the table of what encodes to what.
-      (register-char-codings coding-system table)
-      (coding-system-put coding-system 'translation-table-for-encode table)))
-
-;;; The following works for the bundled coding systems, but it's
-;;; better to use the Unicode-based ones and make it irrelevant.
-
-;;;   ;; Update the Cyrillic special cases.
-;;;   ;; `translation-table-for-encode' doesn't work for CCL coding
-;;;   ;; systems, and `standard-translation-table-for-decode' isn't
-;;;   ;; applied.
-;;;   (let ((table (get 'cyrillic-koi8-r-encode-table 'translation-table)))
-;;;     (map-char-table
-;;;      (lambda (k v)
-;;;        (aset table
-;;;         (or (aref ucs-8859-5-encode-table k)
-;;;             k)
-;;;         v))
-;;;      table)
-;;;     (register-char-codings 'cyrillic-koi8 table))
-;;;   (let ((table (get 'cyrillic-koi8-r-nonascii-translation-table
-;;;                'translation-table)))
-;;;     (map-char-table
-;;;      (lambda (k v)
-;;;        (if v (aset table k (or (aref ucs-mule-8859-to-mule-unicode v)
-;;;                           v))))
-;;;      table))
-;;;   ;; Redefine this, since the orginal only translated 8859-5.
-;;;   (define-ccl-program ccl-encode-koi8
-;;;     `(1
-;;;       ((loop
-;;;    (read-multibyte-character r0 r1)
-;;;    (translate-character cyrillic-koi8-r-encode-table r0 r1)
-;;;    (write-repeat r1))))
-;;;     "CCL program to encode KOI8.")
-;;;   (let ((table (get 'cyrillic-alternativnyj-encode-table 'translation-table)))
-;;;     (map-char-table
-;;;      (lambda (k v)
-;;;        (aset table
-;;;         (or (aref ucs-8859-5-encode-table k)
-;;;             k)
-;;;         v))
-;;;      table)
-;;;     (register-char-codings 'cyrillic-alternativnyj table))
-;;;   (let ((table (get 'cyrillic-alternativnyj-nonascii-translation-table
-;;;                'translation-table)))
-;;;     (map-char-table
-;;;      (lambda (k v)
-;;;        (if v (aset table
-;;;               k
-;;;               (or (aref ucs-mule-8859-to-mule-unicode v)
-;;;                   v))))
-;;;      table))
-  )
-
-(defun ucs-fragment-8859 (&optional encode-only)
+                          ucs-mule-8859-to-mule-unicode)
+    ;; For CCL coding systems other than mule-utf-*
+    (define-translation-table 'ucs-translation-table-for-decode
+      ucs-mule-8859-to-mule-unicode)
+
+    ;; Translate Quail input globally.
+    (setq-default translation-table-for-input ucs-mule-to-mule-unicode)
+    ;; In case this is set up, but we should use the global
+    ;; translation-table.
+    (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
+
+  (when for-encode
+    ;; Make mule-utf-* encode all characters in ucs-mule-to-mule-unicode.
+    (let ((coding-list '(mule-utf-8 mule-utf-16be mule-utf-16le
+                                   mule-utf-16be-with-signature
+                                   mule-utf-16le-with-signature)))
+      (define-translation-table 'utf-translation-table-for-encode
+       ucs-mule-to-mule-unicode)
+      (dolist (coding coding-list)
+       (set-char-table-parent (coding-system-get coding 'safe-chars)
+                              ucs-mule-to-mule-unicode)))
+
+    ;; Adjust the 8859 coding systems to fragment the unified characters
+    ;; on encoding.
+    (dolist (n '(1 2 3 4 5 7 8 9 14 15))
+      (let* ((coding-system
+             (coding-system-base (intern (format "iso-8859-%d" n))))
+            (table (symbol-value
+                    (intern (format "ucs-8859-%d-encode-table" n))))
+            (safe (coding-system-get coding-system 'safe-chars)))
+       ;; Actually, the coding system's safe-chars are not normally
+       ;; used after they've been registered, but we might as well
+       ;; record them.  Setting the parent here is a convenience.
+       (set-char-table-parent safe table)
+       (coding-system-put coding-system 'translation-table-for-encode table)))
+    (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)))
+
+(defun ucs-fragment-8859 (for-encode for-decode)
    "Undo the unification done by `ucs-unify-8859'.
  With prefix arg, undo unification on encoding only, i.e. don't undo
  unification on input operations."
-  (interactive "P")
-  ;; Maybe fix decoding.
-  (unless encode-only
-    ;; Unify 8859 on decoding.  (Non-CCL coding systems only.)
-    (set-char-table-parent standard-translation-table-for-decode nil))
-  ;; Fix encoding.  For each charset, remove the entries in
-  ;; `char-coding-system-table' added to its safe-chars table (as its
-  ;; parent).
-  (dolist (n '(1 2 3 4 5 7 8 9 14 15))
-    (let* ((coding-system
-           (coding-system-base (intern (format "iso-8859-%d" n))))
-          (table (symbol-value
-                  (intern (format "ucs-8859-%d-encode-table" n))))
-          (safe (coding-system-get coding-system 'safe-chars)))
-      (map-char-table
-       (lambda (key val)
-        (if (and (>= key 128) val)
-            (let ((codings (aref char-coding-system-table key)))
-              (aset char-coding-system-table key
-                    (delq coding-system codings)))))
-       (char-table-parent safe))
-      (set-char-table-parent safe nil)
-      (coding-system-put coding-system 'translation-table-for-encode nil))))
-
-;;;###autoload
-(define-minor-mode unify-8859-on-encoding-mode
-  "Set up translation tables for unifying ISO 8859 characters on encoding.
-
-The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and
-8859-15 (Latin-9) differ only in a few characters.  Emacs normally
-distinguishes equivalent characters from those ISO-8859 character sets
-which are built in to Emacs.  This behaviour is essentially inherited
-from the European-originated international standards.  Treating them
-equivalently, by translating to and from a single representation is
-called `unification'.  (The `utf-8' coding system treats the
-characters of European scripts in a unified manner.)
-
-In this mode, on encoding -- i.e. output operations -- non-ASCII
-characters from the built-in ISO 8859 and `mule-unicode-0100-24ff'
-charsets are handled automatically by the coding system used if it can
-represent them.  Thus, say, an e-acute from the Latin-1 charset (the
-unified representation) in a buffer saved as Latin-9 will be encoded
-directly to a byte value 233.  By default, in contrast, you would be
-promtped for a general coding system to use for saving the file, which
-can cope with separate Latin-1 and Latin-9 representations of e-acute.
-
-See also command `unify-8859-on-decoding-mode'."
-  :group 'mule
-  :global t
-  :init-value nil
-  (if unify-8859-on-encoding-mode
-      (set-char-table-parent standard-translation-table-for-decode
-                        ucs-mule-8859-to-mule-unicode)
-    (set-char-table-parent standard-translation-table-for-decode nil)))
-
-;;;###autoload
-(define-minor-mode unify-8859-on-decoding-mode
-  "Set up translation tables for unifying ISO 8859 characters on decoding.
-On decoding, i.e. input operations, non-ASCII characters from the
-built-in ISO 8859 charsets are unified by mapping them into the
-`iso-latin-1' and `mule-unicode-0100-24ff' charsets.
-
-See also command `unify-8859-on-encoding-mode'."
-  :group 'mule
-  :global t
-  :init-value nil
-  (if unify-8859-on-decoding-mode
-      (ucs-unify-8859 t)
-    (ucs-fragment-8859 t)))
+  (when for-decode
+    ;; Don't Unify 8859 on decoding.
+    ;; For non-CCL coding systems (e.g. iso-latin-2).
+    (set-char-table-parent standard-translation-table-for-decode nil)
+    ;; For CCL coding systems other than mule-utf-* (e.g. cyrillic-koi8).
+    (define-translation-table 'ucs-translation-table-for-decode)
+    (setq-default translation-table-for-input nil))
+
+  (when for-encode
+    ;; Disable mule-utf-* encoding for all characters in
+    ;; ucs-mule-to-mule-unicode except what was originally supported
+    ;; and what is translated by utf-translation-table-for-decode when
+    ;; `utf-fragment-on-decoding' is non-nil.
+    (let ((coding-list '(mule-utf-8 mule-utf-16be mule-utf-16le
+                                   mule-utf-16be-with-signature
+                                   mule-utf-16le-with-signature))
+         (safe (coding-system-get 'mule-utf-8 'safe-chars)))
+      (dolist (coding coding-list)
+       (set-char-table-parent (coding-system-get coding 'safe-chars) nil))
+      (if (not utf-fragment-on-decoding)
+         (define-translation-table 'utf-translation-table-for-encode)
+       (define-translation-table 'utf-translation-table-for-encode
+         utf-defragmentation-table)))
+
+    ;; For each charset, remove the parent of `safe-chars' property of
+    ;; the corresponding coding system.
+    (dolist (n '(1 2 3 4 5 7 8 9 14 15))
+      (let* ((coding-system
+             (coding-system-base (intern (format "iso-8859-%d" n))))
+            (safe (coding-system-get coding-system 'safe-chars)))
+       (if (char-table-parent safe)
+           (set-char-table-parent safe nil))
+       (coding-system-put coding-system 'translation-table-for-encode nil)))
+    (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)))
  
  (defun ucs-insert (arg)
    "Insert the Emacs character representation of the given Unicode.
  Interactively, prompts for a hex string giving the code."
    (interactive "sUnicode (hex): ")
-  (insert (decode-char 'ucs (if (integerp arg)
-                               arg
-                             (string-to-number arg 16)))))
+  (or (integerp arg)
+      (setq arg (string-to-number arg 16)))
+  (let ((c (decode-char 'ucs arg)))
+    (if c
+       (insert c)
+      (if (or (< arg 0) (> arg #x10FFFF))
+         (error "Not a Unicode character code: 0x%X" arg)
+       (error "Character U+%04X is not yet supported" arg)))))
  
  ;;; Dealing with non-8859 character sets.
  
@@ -2434,12 +2388,20 @@ Interactively, prompts for a hex string giving the code."
          (?\e(1x\e(B . ?\e$,1Dx\e(B)
          (?\e(1y\e(B . ?\e$,1Dy\e(B)
          (?\e(1|\e(B . ?\e$,1D|\e(B)
-        (?\e(1}\e(B . ?\e$,1D}\e(B))))
+        (?\e(1}\e(B . ?\e$,1D}\e(B)))
+
+      (other
+       '(
+        ;; latin-jisx0201 is mostly decoded to ascii, with these
+        ;; exceptions, so we don't bother with tables for the whole
+        ;; thing.
+        (?\e(J\\e(B . ?\e,A%\e(B)
+        (?\e(J~\e(B . ?\e$,1s>\e(B))))
    (let ((table (make-char-table 'safe-chars))
         safe-charsets)
      (dolist (cs '(vietnamese-viscii lao chinese-sisheng ipa
                   katakana-jisx0201 thai-tis620 tibetan-iso-8bit
-                 indian-is13194 ethiopic))
+                 indian-is13194 ethiopic other))
        ;; These tables could be used as translation-table-for-encode by
        ;; the relevant coding systems.
        (let ((encode-translator
@@ -2450,18 +2412,135 @@ Interactively, prompts for a hex string giving the code."
           (aset ucs-mule-to-mule-unicode (car pair) (cdr pair))
           (if encode-translator
               (aset encode-translator (cdr pair) (car pair))))
+       (if encode-translator
+           (optimize-char-table encode-translator))
         (if (charsetp cs)
             (push cs safe-charsets)
-         (setq safe-charsets
-               (append (delq 'ascii (coding-system-get cs 'safe-charsets))
-                       safe-charsets)))))
+         (if (coding-system-p cs)
+             (setq safe-charsets
+                   (append (delq 'ascii (coding-system-get cs 'safe-charsets))
+                           safe-charsets))))
+       (cond ((eq cs 'vietnamese-viscii)
+              (coding-system-put 'vietnamese-viscii
+                                 'translation-table-for-input
+                                 encode-translator)
+              (coding-system-put 'vietnamese-viqr
+                                 'translation-table-for-input
+                                 encode-translator))
+             ((memq cs '(lao thai-tis620 tibetan-iso-8bit))
+              (coding-system-put cs 'translation-table-for-input
+                                 encode-translator)))))
      (dolist (c safe-charsets)
-      (aset table (make-char c) t))
-    (coding-system-put 'mule-utf-8 'safe-charsets
-                      (append (coding-system-get 'mule-utf-8 'safe-charsets)
-                              safe-charsets))
-    (register-char-codings 'mule-utf-8 table)))
+      (aset table (make-char c) t))))
+
+(define-minor-mode unify-8859-on-encoding-mode
+  "Set up translation-tables for unifying ISO 8859 characters on encoding.
+
+The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and
+8859-15 (Latin-9) differ only in a few characters.  Emacs normally
+distinguishes equivalent characters from those ISO-8859 character sets
+which are built in to Emacs.  This behavior is essentially inherited
+from the European-originated international standards.  Treating them
+equivalently, by translating to and from a single representation is
+called `unification'.  (The `utf-8' coding system treats the
+characters of European scripts in a unified manner.)
+
+In this mode, on encoding -- i.e. output operations -- non-ASCII
+characters from the built-in ISO 8859 and `mule-unicode-0100-24ff'
+charsets are handled automatically by the coding system used if it can
+represent them.  Thus, say, an e-acute from the Latin-1 charset (the
+unified representation) in a buffer saved as Latin-9 will be encoded
+directly to a byte value 233.  By default, in contrast, you would be
+prompted for a general coding system to use for saving the file, which
+can cope with separate Latin-1 and Latin-9 representations of e-acute.
+
+Also sets hooks that arrange `translation-table-for-input' to be set
+up locally.  This will often allow input generated by Quail input
+methods to conform with what the buffer's file coding system can
+encode.  Thus you could use a Latin-2 input method to search for
+e-acute in a Latin-1 buffer.
+
+See also command `unify-8859-on-decoding-mode'."
+  :group 'mule
+  :global t
+  :init-value t
+  (if unify-8859-on-encoding-mode
+      (ucs-unify-8859 t nil)
+    (ucs-fragment-8859 t nil)))
+
+(custom-add-version 'unify-8859-on-encoding-mode "21.3")
+
+(define-minor-mode unify-8859-on-decoding-mode
+  "Set up translation-tables for unifying ISO 8859 characters on decoding.
+On decoding, i.e. input operations, non-ASCII characters from the
+built-in ISO 8859 charsets are unified by mapping them into the
+`iso-latin-1' and `mule-unicode-0100-24ff' charsets.
+
+Also sets `translation-table-for-input' globally, so that keyboard input
+produces unified characters.
+
+See also command `unify-8859-on-encoding-mode' and the user option
+`utf-fragment-on-decoding'."
+  :group 'mule
+  :global t
+  :init-value nil
+  (if unify-8859-on-decoding-mode
+      (ucs-unify-8859 nil t)
+    (ucs-fragment-8859 nil t)))
+
+(custom-add-version 'unify-8859-on-decoding-mode "21.3")
+
+;; Synchronize the status with the initial value of
+;; unify-8859-on-encoding-mode and unify-8859-on-decoding-mode.
+(ucs-unify-8859 t nil)
+
+;; Arrange to set up the translation-table for keyboard input.  This
+;; is called from get-buffer-create, set-buffer-file-coding-system,
+;; normal-mode and minibuffer-setup-hook.
+(defun ucs-set-table-for-input (&optional buffer)
+  "Set up an appropriate `translation-table-for-input' for BUFFER.
+BUFFER defaults to the current buffer.  This function is
+automatically called directly at the end of `get-buffer-create'."
+  (when (and unify-8859-on-encoding-mode
+             (not unify-8859-on-decoding-mode)
+            (char-table-p translation-table-for-input))
+    (let ((cs (and buffer-file-coding-system
+                  (coding-system-base buffer-file-coding-system)))
+         table)
+      (if (or (null cs)
+             (eq cs 'undecided))
+         (setq cs
+               (and default-buffer-file-coding-system
+                    (coding-system-base default-buffer-file-coding-system))))
+      (when cs
+       (setq table (coding-system-get cs 'translation-table-for-encode))
+       (if (and table (symbolp table))
+           (setq table (get table 'translation-table)))
+       (unless (char-table-p table)
+         (setq table (coding-system-get cs 'translation-table-for-input))
+         (if (and table (symbolp table))
+             (setq table (get table 'translation-table))))
+       (when (char-table-p table)
+         (if buffer
+             (with-current-buffer buffer
+               (set (make-local-variable 'translation-table-for-input)
+                    table))
+           (set (make-local-variable 'translation-table-for-input)
+                table)))))))
+
+;; The minibuffer needs to acquire a `buffer-file-coding-system' for
+;; the above to work in it.
+(defun ucs-minibuffer-setup ()
+  "Set up an appropriate `buffer-file-coding-system' for current buffer.
+Intended to be added to `minibuffer-setup-hook'."
+  (set (make-local-variable 'buffer-file-coding-system)
+       (with-current-buffer (let ((win (minibuffer-selected-window)))
+                             (if (window-live-p win) (window-buffer win)
+                               (cadr (buffer-list))))
+        buffer-file-coding-system))
+  (ucs-set-table-for-input))
  
  (provide 'ucs-tables)
  
+;; arch-tag: b497e22b-7fe1-486a-9352-e2d7f7d76a76
  ;;; ucs-tables.el ends here