(buffer-file-coding-system-explicit):

[gnu-emacs] / lisp / international / mule.el
diff --git a/lisp/international/mule.el b/lisp/international/mule.el

index f325c60b80e9fd76a27a531b1bb5a0706edf564f..fe16d07fe17411e45961d0e245fd116127c608ff 100644 (file)
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -64,7 +64,7 @@ Return t if file exists."
             (message "Loading %s (source)..." file)
           (message "Loading %s..." file)))
        (when purify-flag
-       (setq preloaded-file-list (cons file preloaded-file-list)))
+       (push file preloaded-file-list))
        (unwind-protect
           (let ((load-file-name fullname)
                 (set-auto-coding-for-load t)
@@ -316,8 +316,7 @@ Optional argument RESTRICTION specifies a way to map the pair of CCS
  and CODE-POINT to a character.  Currently not supported and just ignored."
    (cond
     ((eq ccs 'ucs)
-    (or (gethash code-point
-                (get 'utf-subst-table-for-decode 'translation-hash-table))
+    (or (utf-lookup-subst-table-for-decode code-point)
         (let ((c (cond
                   ((< code-point 160)
                    code-point)
@@ -361,8 +360,7 @@ code-point in CCS.  Currently not supported and just ignored."
          (charset (car split))
          trans)
      (cond ((eq ccs 'ucs)
-          (or (gethash char (get 'utf-subst-table-for-encode
-                                 'translation-hash-table))
+          (or (utf-lookup-subst-table-for-encode char)
                (let ((table (get 'utf-translation-table-for-encode
                                  'translation-table)))
                  (setq trans (aref table char))
@@ -370,7 +368,7 @@ code-point in CCS.  Currently not supported and just ignored."
                      (setq split (split-char trans)
                            charset (car split)))
                  (cond ((eq charset 'ascii)
-                       char)
+                       (or trans char))
                        ((eq charset 'latin-iso8859-1)
                         (+ (nth 1 split) 128))
                        ((eq charset 'mule-unicode-0100-24ff)
@@ -516,11 +514,10 @@ coding system whose eol-type is N."
  (defun coding-system-eol-type-mnemonic (coding-system)
    "Return the string indicating end-of-line format of CODING-SYSTEM."
    (let* ((eol-type (coding-system-eol-type coding-system))
-        (val (cond ((vectorp eol-type) eol-mnemonic-undecided)
-                   ((eq eol-type 0) eol-mnemonic-unix)
+        (val (cond ((eq eol-type 0) eol-mnemonic-unix)
                     ((eq eol-type 1) eol-mnemonic-dos)
                     ((eq eol-type 2) eol-mnemonic-mac)
-                   (t "-"))))
+                   (t eol-mnemonic-undecided))))
      (if (stringp val)
         val
        (char-to-string val))))
@@ -538,6 +535,18 @@ coding system whose eol-type is N."
                  (and (not (> (downcase c1) (downcase c2)))
                       (< c1 c2)))))))
  
+(defun coding-system-equal (coding-system-1 coding-system-2)
+  "Return t if and only if CODING-SYSTEM-1 and CODING-SYSTEM-2 are identical.
+Two coding systems are identical if two symbols are equal
+or one is an alias of the other."
+  (or (eq coding-system-1 coding-system-2)
+      (and (equal (coding-system-spec coding-system-1)
+                 (coding-system-spec coding-system-2))
+          (let ((eol-type-1 (coding-system-eol-type coding-system-1))
+                (eol-type-2 (coding-system-eol-type coding-system-2)))
+            (or (eq eol-type-1 eol-type-2)
+                (and (vectorp eol-type-1) (vectorp eol-type-2)))))))
+
  (defun add-to-coding-system-list (coding-system)
    "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted."
    (if (or (null coding-system-list)
@@ -593,60 +602,14 @@ character code range.  Thus FUNC should iterate over [START, END]."
                  (make-char charset (+ i start) start)
                  (make-char charset (+ i start) (+ start chars -1)))))))
  
-(defun register-char-codings (coding-system safe-chars)
-  "Add entries for CODING-SYSTEM to `char-coding-system-table'.
-If SAFE-CHARS is a char-table, its non-nil entries specify characters
-which CODING-SYSTEM encodes safely.  If SAFE-CHARS is t, register
-CODING-SYSTEM as a general one which can encode all characters."
-  (let ((general (char-table-extra-slot char-coding-system-table 0))
-       ;; Charsets which have some members in the table, but not all
-       ;; of them (i.e. not just a generic character):
-       (partials (char-table-extra-slot char-coding-system-table 1)))
-    (if (eq safe-chars t)
-       (or (memq coding-system general)
-           (set-char-table-extra-slot char-coding-system-table 0
-                                      (cons coding-system general)))
-      (map-char-table
-       (lambda (key val)
-        (if (and (>= key 128) val)
-            (let ((codings (aref char-coding-system-table key))
-                  (charset (char-charset key)))
-              (unless (memq coding-system codings)
-                (if (and (generic-char-p key)
-                         (memq charset partials))
-                    ;; The generic char would clobber individual
-                    ;; entries already in the table.  First save the
-                    ;; separate existing entries for all chars of the
-                    ;; charset (with the generic entry added, if
-                    ;; necessary).
-                    (let (entry existing)
-                      (map-charset-chars
-                       (lambda (start end)
-                         (while (<= start end)
-                           (setq entry (aref char-coding-system-table start))
-                           (when entry
-                             (push (cons
-                                    start
-                                    (if (memq coding-system entry)
-                                        entry
-                                      (cons coding-system entry)))
-                                   existing))
-                           (setq start (1+ start))))
-                       charset)
-                      ;; Update the generic entry.
-                      (aset char-coding-system-table key
-                            (cons coding-system codings))
-                      ;; Override with the saved entries.
-                      (dolist (elt existing)
-                        (aset char-coding-system-table (car elt) (cdr elt))))
-                  (aset char-coding-system-table key
-                        (cons coding-system codings))
-                  (unless (or (memq charset partials)
-                              (generic-char-p key))
-                    (push charset partials)))))))
-       safe-chars)
-      (set-char-table-extra-slot char-coding-system-table 1 partials))))
+(defalias 'register-char-codings 'ignore "")
+(make-obsolete 'register-char-codings
+               "it exists just for backward compatibility, and does nothing."
+              "21.3")
  
+(defconst char-coding-system-table nil
+  "This is an obsolete variable.
+It exists just for backward compatibility, and the value is always nil.")
  
  (defun make-subsidiary-coding-system (coding-system)
    "Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM."
@@ -860,6 +823,11 @@ following properties are recognized:
    The value is a symbol whose name is the `MIME-charset' parameter of
    the coding system.
  
+  o mime-text-unsuitable
+
+  A non-nil value means the `mime-charset' property names a charset
+  which is unsuitable for the top-level media type \"text\".
+
    o valid-codes (meaningful only for a coding system based on CCL)
  
    The value is a list to indicate valid byte ranges of the encoded
@@ -869,8 +837,8 @@ following properties are recognized:
  
    o composition (meaningful only when TYPE is 0 or 2)
  
-  If the value is non-nil, the coding system preserves information of
-  composition.
+  If the value is non-nil, the coding system preserves composition
+  information.
  
  These properties are set in PLIST, a property list.  This function
  also sets properties `coding-category' and `alias-coding-systems'
@@ -1066,7 +1034,6 @@ a value of `safe-charsets' in PLIST."
                 (if (and (symbolp val)
                          (get val 'translation-table))
                     (setq safe-chars (get val 'translation-table)))
-               (register-char-codings coding-system safe-chars)
                 (setq val safe-chars)))
           (plist-put plist prop val)))
        ;; The property `coding-category' may have been set differently
@@ -1100,6 +1067,8 @@ a value of `safe-charsets' in PLIST."
                (error "Invalid EOL-TYPE spec:%S" eol-type))))
    (put coding-system 'eol-type eol-type)
  
+  (define-coding-system-internal coding-system)
+
    ;; At last, register CODING-SYSTEM in `coding-system-list' and
    ;; `coding-system-alist'.
    (add-to-coding-system-list coding-system)
@@ -1158,7 +1127,15 @@ Return the resulting coding system."
                      first eol)))
      first))
  
-(defun set-buffer-file-coding-system (coding-system &optional force)
+(defun autoload-coding-system (symbol form)
+  "Define SYMBOL as a coding-system that is defined on demand.
+
+FROM is a form to evaluate to define the coding-system."
+  (put symbol 'coding-system-define-form form)
+  (setq coding-system-alist (cons (list (symbol-name symbol))
+                                 coding-system-alist)))
+
+(defun set-buffer-file-coding-system (coding-system &optional force nomodify)
    "Set the file coding-system of the current buffer to CODING-SYSTEM.
  This means that when you save the buffer, it will be converted
  according to CODING-SYSTEM.  For a list of possible values of CODING-SYSTEM,
@@ -1168,19 +1145,25 @@ If CODING-SYSTEM leaves the text conversion unspecified, or if it
  leaves the end-of-line conversion unspecified, FORCE controls what to
  do.  If FORCE is nil, get the unspecified aspect (or aspects) from the
  buffer's previous `buffer-file-coding-system' value (if it is
-specified there).  Otherwise, levae it unspecified.
+specified there).  Otherwise, leave it unspecified.
  
  This marks the buffer modified so that the succeeding \\[save-buffer]
  surely saves the buffer with CODING-SYSTEM.  From a program, if you
-don't want to mark the buffer modified, just set the variable
-`buffer-file-coding-system' directly."
+don't want to mark the buffer modified, specify t for NOMODIFY.
+If you know exactly what coding system you want to use,
+just set the variable `buffer-file-coding-system' directly."
    (interactive "zCoding system for saving file (default, nil): \nP")
    (check-coding-system coding-system)
    (if (and coding-system buffer-file-coding-system (null force))
        (setq coding-system
             (merge-coding-systems coding-system buffer-file-coding-system)))
    (setq buffer-file-coding-system coding-system)
-  (set-buffer-modified-p t)
+  ;; This is in case of an explicit call.  Normally, `normal-mode' and
+  ;; `set-buffer-major-mode-hook' take care of setting the table.
+  (if (fboundp 'ucs-set-table-for-input) ; don't lose when building
+      (ucs-set-table-for-input))
+  (unless nomodify
+    (set-buffer-modified-p t))
    (force-mode-line-update))
  
  (defun revert-buffer-with-coding-system (coding-system &optional force)
@@ -1201,6 +1184,14 @@ usual for visiting a file."
    (let ((coding-system-for-read coding-system))
      (revert-buffer)))
  
+(defun set-file-name-coding-system (coding-system)
+  "Set coding system for decoding and encoding file names to CODING-SYSTEM.
+It actually just set the variable `file-name-coding-system' (which
+see) to CODING-SYSTEM."
+  (interactive "zCoding system for file names (default, nil): ")
+  (check-coding-system coding-system)
+  (setq file-name-coding-system coding-system))
+
  (defvar default-terminal-coding-system nil
    "Default value for the terminal coding system.
  This is normally set according to the selected language environment.
@@ -1265,8 +1256,10 @@ If you set this on a terminal which can't distinguish Meta keys from
  8-bit characters, you will have to use ESC to type Meta characters.
  See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
  
+On non-windowing terminals, this is set from the locale by default.
+
  Setting this variable directly does not take effect;
-use either M-x customize or \\[set-keyboard-coding-system]."
+use either \\[customize] or \\[set-keyboard-coding-system]."
    :type '(coding-system :tag "Coding system")
    :link '(info-link "(emacs)Specify Coding")
    :link '(info-link "(emacs)Single-Byte Character Support")
@@ -1275,7 +1268,7 @@ use either M-x customize or \\[set-keyboard-coding-system]."
          (if (or value (boundp 'encoded-kbd-mode))
              (set-keyboard-coding-system value)
            (set-default 'keyboard-coding-system nil))) ; must initialize
-  :version "21.1"
+  :version "21.4"
    :group 'keyboard
    :group 'mule)
  
@@ -1346,196 +1339,199 @@ ARG is a list of coding categories ordered by priority."
  
  ;;; X selections
  
-(defvar non-standard-icccm-encodings-alist
-  '(("ISO8859-15" . latin-iso8859-15)
-    ("ISO8859-14" . latin-iso8859-14)
-    ("KOI8-R" . koi8-r)
-    ("BIG5-0" . big5))
-  "Alist of font charset names defined by XLFD.
-The cdr of each element is the corresponding Emacs charset or coding system.")
+(defvar ctext-non-standard-encodings-alist
+  '(("big5-0" big5 2 (chinese-big5-1 chinese-big5-2))
+    ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
+    ("ISO8859-15" iso-8859-15 1 latin-iso8859-15))
+  "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
+
+It controls how extended segments of a compound text are handled
+by the coding system `compound-text-with-extensions'.
+
+Each element has the form (ENCODING-NAME CODING-SYSTEM N-OCTET CHARSET).
+
+ENCODING-NAME is an encoding name of an \"extended segments\".
+
+CODING-SYSTEM is the coding-system to encode (or decode) the
+characters into (or from) the extended segment.
+
+N-OCTET is the number of octets (bytes) that encodes a character
+in the segment.  It can be 0 (meaning the number of octets per
+character is variable), 1, 2, 3, or 4.
+
+CHARSET is a charater set containing characters that are encoded
+in the segment.  It can be a list of character sets.  It can also
+be a char-table, in which case characters that have non-nil value
+in the char-table are the target.
+
+On decoding CTEXT, all encoding names listed here are recognized.
+
+On encoding CTEXT, encoding names in the variable
+`ctext-non-standard-encodings' (which see) and in the information
+listed for the current language environment under the key
+`ctext-non-standard-encodings' are used.")
+
+(defvar ctext-non-standard-encodings
+  '("big5-0")
+  "List of non-standard encoding names used in extended segments of CTEXT.
+Each element must be one of the names listed in the variable
+`ctext-non-standard-encodings-alist' (which see).")
+
+(defvar ctext-non-standard-encodings-regexp
+  (string-to-multibyte
+   (concat
+    ;; For non-standard encodings.
+    "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)"
+    "\\|"
+    ;; For UTF-8 encoding.
+    "\\(\e%G[^\e]*\e%@\\)")))
  
  ;; Functions to support "Non-Standard Character Set Encodings" defined
-;; by the COMPOUND-TEXT spec.
-;; We support that by converting the leading sequence of the
-;; ``extended segment'' to the corresponding ISO-2022 sequences (if
-;; the leading sequence names an Emacs charset), or decode the segment
-;; (if it names a coding system).  Encoding does the reverse.
-;; This function also supports "The UTF-8 encoding" described in the
-;; section 7 of the documentation fo COMPOUND-TEXT distributed with
-;; XFree86.
+;; by the COMPOUND-TEXT spec.  They also support "The UTF-8 encoding"
+;; described in the section 7 of the documentation of COMPOUND-TEXT
+;; distributed with XFree86.
  
  (defun ctext-post-read-conversion (len)
    "Decode LEN characters encoded as Compound Text with Extended Segments."
-  (buffer-disable-undo)        ; minimize consing due to insertions and deletions
-  (narrow-to-region (point) (+ (point) len))
    (save-match-data
-    (let ((pt (point-marker))
-         (oldpt (point-marker))
-         (newpt (make-marker))
-         (modified-p (buffer-modified-p))
-         (case-fold-search nil)
-         last-coding-system-used
-         encoding textlen chset)
-      (while (re-search-forward
-             "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002\\|\e%G[^\e]+\e%@"
-             nil 'move)
-       (set-marker newpt (point))
-       (set-marker pt (match-beginning 0))
-       (if (= (preceding-char) ?@)
-           ;; We found embedded utf-8 sequence.
-           (progn
-             (delete-char -3)          ; delete ESC % @ at the tail
-             (goto-char pt)
-             (delete-char 3)           ; delete ESC % G at the head
-             (if (> pt oldpt)
-                 (decode-coding-region oldpt pt 'ctext-no-compositions))
-             (decode-coding-region pt newpt 'mule-utf-8)
-             (goto-char newpt)
-             (set-marker oldpt newpt))
-         (setq encoding (match-string 3))
-         (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
-                             (- (aref (match-string 2) 1) 128))
-                          (1+ (length encoding))))
-         (setq
-          chset (cdr (assoc-ignore-case encoding
-                                        non-standard-icccm-encodings-alist)))
-         (cond ((null chset)
-                ;; This charset is not supported--leave this extended
-                ;; segment unaltered and skip over it.
-                (goto-char (+ (point) textlen)))
-               ((charsetp chset)
-                ;; If it's a charset, replace the leading escape sequence
-                ;; with a standard ISO-2022 sequence.  We will decode all
-                ;; such segments later, in one go, when we exit the loop
-                ;; or find an extended segment that names a coding
-                ;; system, not a charset.
-                (replace-match
-                 (concat "\\1"
-                         (if (= 0 (charset-iso-graphic-plane chset))
-                             ;; GL charsets
-                             (if (= 1 (charset-dimension chset)) "(" "$(")
-                           ;; GR charsets
-                           (if (= 96 (charset-chars chset))
-                               "-"
-                             (if (= 1 (charset-dimension chset)) ")" "$)")))
-                         (string (charset-iso-final-char chset)))
-                 t)
-                (goto-char (+ (point) textlen)))
-               ((coding-system-p chset)
-                ;; If it's a coding system, we need to decode the segment
-                ;; right away.  But first, decode what we've skipped
-                ;; across until now.
-                (when (> pt oldpt)
-                  (decode-coding-region oldpt pt 'ctext-no-compositions))
-                (delete-region pt newpt)
-                (set-marker newpt (+ newpt textlen))
-                (decode-coding-region pt newpt chset)
-                (goto-char newpt)
-                (set-marker oldpt newpt)))))
-      ;; Decode what's left.
-      (when (> (point) oldpt)
-       (decode-coding-region oldpt (point) 'ctext-no-compositions))
-      ;; This buffer started as unibyte, because the string we get from
-      ;; the X selection is a unibyte string.  We must now make it
-      ;; multibyte, so that the decoded text is inserted as multibyte
-      ;; into its buffer.
-      (set-buffer-multibyte t)
-      (set-buffer-modified-p modified-p)
-      (- (point-max) (point-min)))))
-
-;; If you add charsets here, be sure to modify the regexp used by
-;; ctext-pre-write-conversion to look up non-standard charsets.
-(defvar non-standard-designations-alist
-  '(("$(0" . (big5 "big5-0" 2))
-    ("$(1" . (big5 "big5-0" 2))
-    ;; The following are actually standard; generating extended
-    ;; segments for them is wrong and screws e.g. Latin-9 users.
-    ;; 8859-{10,13,16} aren't Emacs charsets anyhow.  -- fx
-;;     ("-V"  . (t "iso8859-10" 1))
-;;     ("-Y"  . (t "iso8859-13" 1))
-;;     ("-_"  . (t "iso8859-14" 1))
-;;     ("-b"  . (t "iso8859-15" 1))
-;;     ("-f"  . (t "iso8859-16" 1))
-    )
-  "Alist of ctext control sequences that introduce character sets which
-are not in the list of approved encodings, and the corresponding
-coding system, identifier string, and number of octets per encoded
-character.
-
-Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)).  CTLSEQ
-is the control sequence (sans the leading ESC) that introduces the character
-set in the text encoded by compound-text.  ENCODING is a coding system
-symbol; if it is t, it means that the ctext coding system already encodes
-the text correctly, and only the leading control sequence needs to be altered.
-If ENCODING is a coding system, we need to re-encode the text with that
-coding system.  CHARSET is the name of the charset we need to put into
-the leading control sequence.  NOCTETS is the number of octets (bytes) that
-encode each character in this charset.  NOCTETS can be 0 (meaning the number
-of octets per character is variable), 1, 2, 3, or 4.")
+    (save-restriction
+      (let ((case-fold-search nil)
+           (in-workbuf (string= (buffer-name) " *code-converting-work*"))
+           last-coding-system-used
+           pos bytes)
+       (or in-workbuf
+           (narrow-to-region (point) (+ (point) len)))
+       (if in-workbuf
+           (set-buffer-multibyte t))
+       (while (re-search-forward ctext-non-standard-encodings-regexp
+                                 nil 'move)
+         (setq pos (match-beginning 0))
+         (if (match-beginning 1)
+             ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
+             (let* ((M (char-after (+ pos 4)))
+                    (L (char-after (+ pos 5)))
+                    (encoding (match-string 2))
+                    (encoding-info (assoc-string
+                                    encoding
+                                    ctext-non-standard-encodings-alist t))
+                    (coding (if encoding-info
+                                (nth 1 encoding-info)
+                              (setq encoding (intern (downcase encoding)))
+                              (and (coding-system-p encoding)
+                                   encoding))))
+               (setq bytes (- (+ (* (- M 128) 128) (- L 128))
+                              (- (point) (+ pos 6))))
+               (when coding
+                 (delete-region pos (point))
+                 (forward-char bytes)
+                 (decode-coding-region (- (point) bytes) (point) coding)))
+           ;; ESC % G --UTF-8-BYTES-- ESC % @
+           (delete-char -3)
+           (delete-region pos (+ pos 3))
+           (decode-coding-region pos (point) 'utf-8))))
+      (goto-char (point-min))
+      (- (point-max) (point)))))
+
+;; Return a char table of extended segment usage for each character.
+;; Each value of the char table is nil, one of the elements of
+;; `ctext-non-standard-encodings-alist', or the symbol `utf-8'.
+
+(defun ctext-non-standard-encodings-table ()
+  (let ((table (make-char-table 'translation-table)))
+    (aset table (make-char 'mule-unicode-0100-24ff) 'utf-8)
+    (aset table (make-char 'mule-unicode-2500-33ff) 'utf-8)
+    (aset table (make-char 'mule-unicode-e000-ffff) 'utf-8)
+    (dolist (encoding (reverse
+                      (append
+                       (get-language-info current-language-environment
+                                          'ctext-non-standard-encodings)
+                       ctext-non-standard-encodings)))
+      (let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
+            (charset (nth 3 slot)))
+       (if charset
+           (cond ((charsetp charset)
+                  (aset table (make-char charset) slot))
+                 ((listp charset)
+                  (dolist (elt charset)
+                    (aset table (make-char elt) slot)))
+                 ((char-table-p charset)
+                  (map-char-table #'(lambda (k v)
+                                  (if (and v (> k 128)) (aset table k slot)))
+                                  charset))))))
+    table))
  
  (defun ctext-pre-write-conversion (from to)
    "Encode characters between FROM and TO as Compound Text w/Extended Segments.
  
  If FROM is a string, or if the current buffer is not the one set up for us
-by run_pre_post_conversion_on_str, generate a new temp buffer, insert the
+by encode-coding-string, generate a new temp buffer, insert the
  text, and convert it in the temporary buffer.  Otherwise, convert in-place."
-  (cond ((and (string= (buffer-name) " *code-converting-work*")
-             (not (stringp from)))
-        ; Minimize consing due to subsequent insertions and deletions.
-        (buffer-disable-undo)
-        (narrow-to-region from to))
-       (t
-        (let ((buf (current-buffer)))
-          (set-buffer (generate-new-buffer " *temp"))
-          (buffer-disable-undo)
-          (if (stringp from)
-              (insert from)
-            (insert-buffer-substring buf from to))
-          (setq from (point-min) to (point-max)))))
-  (encode-coding-region from to 'ctext-no-compositions)
-  ;; Replace ISO-2022 charset designations with extended segments, for
-  ;; those charsets that are not part of the official X registry.
    (save-match-data
-    (goto-char (point-min))
-    (let ((newpt (make-marker))
-         (case-fold-search nil)
-         pt desig encode-info encoding chset noctets textlen)
-      (set-buffer-multibyte nil)
-      ;; The regexp below finds the leading sequences for big5.
-      (while (re-search-forward "\e\\(\$([01]\\)" nil 'move)
-       (setq desig (match-string 1)
-             pt (point-marker)
-             encode-info (cdr (assoc desig non-standard-designations-alist))
-             encoding (car encode-info)
-             chset (cadr encode-info)
-             noctets (car (cddr encode-info)))
-       (skip-chars-forward "^\e")
-       (set-marker newpt (point))
-       (cond
-        ((eq encoding t)  ; only the leading sequence needs to be changed
-         (setq textlen (+ (- newpt pt) (length chset) 1))
-         ;; Generate the ICCCM control sequence for an extended segment.
-         (replace-match (format "\e%%/%d%c%c%s\ 2"
-                                noctets
-                                (+ (/ textlen 128) 128)
-                                (+ (% textlen 128) 128)
-                                chset)
-                        t t))
-        ((coding-system-p encoding) ; need to recode the entire segment...
-         (set-marker pt (match-beginning 0))
-         (decode-coding-region pt newpt 'ctext-no-compositions)
-         (set-buffer-multibyte t)
-         (encode-coding-region pt newpt encoding)
-         (set-buffer-multibyte nil)
-         (setq textlen (+ (- newpt pt) (length chset) 1))
-         (goto-char pt)
-         (insert (format "\e%%/%d%c%c%s\ 2"
-                         noctets
-                         (+ (/ textlen 128) 128)
-                         (+ (% textlen 128) 128)
-                         chset))))
-       (goto-char newpt))))
-  (set-buffer-multibyte t)
+    ;; Setup a working buffer if necessary.
+    (cond ((stringp from)
+          (let ((buf (current-buffer)))
+            (set-buffer (generate-new-buffer " *temp"))
+            (set-buffer-multibyte (multibyte-string-p from))
+            (insert from)))
+         ((not (string= (buffer-name) " *code-converting-work*"))
+          (let ((buf (current-buffer))
+                (multibyte enable-multibyte-characters))
+            (set-buffer (generate-new-buffer " *temp"))
+            (set-buffer-multibyte multibyte)
+            (insert-buffer-substring buf from to))))
+
+    ;; Now we can encode the whole buffer.
+    (let ((encoding-table (ctext-non-standard-encodings-table))
+         last-coding-system-used
+         last-pos last-encoding-info
+         encoding-info end-pos)
+      (goto-char (setq last-pos (point-min)))
+      (setq end-pos (point-marker))
+      (while (re-search-forward "[^\000-\177]+" nil t)
+       ;; Found a sequence of non-ASCII characters.
+       (setq last-pos (match-beginning 0)
+             last-encoding-info (aref encoding-table (char-after last-pos)))
+       (set-marker end-pos (match-end 0))
+       (goto-char (1+ last-pos))
+       (catch 'tag
+         (while t
+           (setq encoding-info
+                 (if (< (point) end-pos)
+                     (aref encoding-table (following-char))))
+           (unless (eq last-encoding-info encoding-info)
+             (cond ((consp last-encoding-info)
+                    ;; Encode the previous range using an extended
+                    ;; segment.
+                    (let ((encoding-name (car last-encoding-info))
+                          (coding-system (nth 1 last-encoding-info))
+                          (noctets (nth 2 last-encoding-info))
+                          len)
+                      (encode-coding-region last-pos (point) coding-system)
+                      (setq len (+ (length encoding-name) 1
+                                   (- (point) last-pos)))
+                      (save-excursion
+                        (goto-char last-pos)
+                        (insert (string-to-multibyte
+                                 (format "\e%%/%d%c%c%s\ 2"
+                                         noctets
+                                         (+ (/ len 128) 128)
+                                         (+ (% len 128) 128)
+                                         encoding-name))))))
+                   ((eq last-encoding-info 'utf-8)
+                    ;; Encode the previous range using UTF-8 encoding
+                    ;; extention.
+                    (encode-coding-region last-pos (point) 'mule-utf-8)
+                    (save-excursion
+                      (goto-char last-pos)
+                      (insert "\e%G"))
+                    (insert "\e%@")))
+             (setq last-pos (point)
+                   last-encoding-info encoding-info))
+           (if (< (point) end-pos)
+               (forward-char 1)
+             (throw 'tag nil)))))
+      (set-marker end-pos nil)
+      (goto-char (point-min))))
    ;; Must return nil, as build_annotations_2 expects that.
    nil)
  
@@ -1576,14 +1572,18 @@ and the contents of `file-coding-system-alist'."
                                    sgml-html-meta-auto-coding-function)
    "A list of functions which attempt to determine a coding system.
  
-Each function in this list should be written to operate on the current
-buffer, but should not modify it in any way.  It should take one
-argument SIZE, past which it should not search.  If a function
-succeeds in determining a coding system, it should return that coding
-system.  Otherwise, it should return nil.
+Each function in this list should be written to operate on the
+current buffer, but should not modify it in any way.  The buffer
+will contain undecoded text of parts of the file.  Each function
+should take one argument, SIZE, which says how many
+characters (starting from point) it should look at.
  
-Any `coding:' tags present have a higher priority than the
-functions in this list."
+If one of these functions succeeds in determining a coding
+system, it should return that coding system.  Otherwise, it
+should return nil.
+
+If a file has a `coding:' tag, that takes precedence over these
+functions, so they won't be called at all."
    :group 'files
    :group 'mule
    :type '(repeat function))
@@ -1595,7 +1595,7 @@ This is used for loading and byte-compiling Emacs Lisp files.")
  (defun auto-coding-alist-lookup (filename)
    "Return the coding system specified by `auto-coding-alist' for FILENAME."
    (let ((alist auto-coding-alist)
-       (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos)))
+       (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos cygwin)))
         coding-system)
      (while (and alist (not coding-system))
        (if (string-match (car (car alist)) filename)
@@ -1672,31 +1672,36 @@ function by default."
                   (setq coding-system nil)))))
  
         ;; If no coding: tag in the head, check the tail.
+       ;; Here we must pay attention to the case that the end-of-line
+       ;; is just "\r" and we can't use "^" nor "$" in regexp.
         (when (and tail-found (not coding-system))
           (goto-char tail-start)
-         (search-forward "\n\^L" nil t)
+         (re-search-forward "[\r\n]\^L" nil t)
           (if (re-search-forward
-              "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
-         ;; The prefix is what comes before "local variables:" in its
-          ;; line.  The suffix is what comes after "local variables:"
+              "[\r\n]\\([^[\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]"
+              tail-end t)
+             ;; The prefix is what comes before "local variables:" in its
+             ;; line.  The suffix is what comes after "local variables:"
               ;; in its line.
               (let* ((prefix (regexp-quote (match-string 1)))
                      (suffix (regexp-quote (match-string 2)))
                      (re-coding
                       (concat
-                      "^" prefix
+                      "[\r\n]" prefix
                        ;; N.B. without the \n below, the regexp can
                        ;; eat newlines.
-                      "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
-                      suffix "$"))
+                      "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
+                      suffix "[\r\n]"))
                      (re-unibyte
                       (concat
-                      "^" prefix
-                      "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
-                      suffix "$"))
+                      "[\r\n]" prefix
+                      "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
+                      suffix "[\r\n]"))
                      (re-end
-                     (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$"))
-                    (pos (point)))
+                     (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix
+                             "[\r\n]?"))
+                    (pos (1- (point))))
+               (forward-char -1)       ; skip back \r or \n.
                 (re-search-forward re-end tail-end 'move)
                 (setq tail-end (point))
                 (goto-char pos)
@@ -1722,14 +1727,41 @@ function by default."
  
  (setq set-auto-coding-function 'set-auto-coding)
  
-(defun after-insert-file-set-buffer-file-coding-system (inserted)
-  "Set `buffer-file-coding-system' of current buffer after text is inserted."
+;; This variable is set in these two cases:
+;;   (1) A file is read by a coding system specified explicitly.
+;;       after-insert-file-set-coding sets this value to
+;;       coding-system-for-read.
+;;   (2) A buffer is saved.
+;;       After writing, basic-save-buffer-1 sets this value to
+;;       last-coding-system-used.
+;; This variable is used for decoding in revert-buffer.
+(defvar buffer-file-coding-system-explicit nil
+  "The file coding system explicitly specified for the current buffer.
+Internal use only.")
+(make-variable-buffer-local 'buffer-file-coding-system-explicit)
+(put 'buffer-file-coding-system-explicit 'permanent-local t)
+
+(defun after-insert-file-set-coding (inserted &optional visit)
+  "Set `buffer-file-coding-system' of current buffer after text is inserted.
+INSERTED is the number of characters that were inserted, as figured
+in the situation before this function.  Return the number of characters
+inserted, as figured in the situation after.  The two numbers can be
+different if the buffer has become unibyte.
+The optional second arg VISIT non-nil means that we are visiting a file."
+  (if (and visit
+          coding-system-for-read
+          (not (eq coding-system-for-read 'auto-save-coding)))
+      (setq buffer-file-coding-system-explicit coding-system-for-read))
    (if last-coding-system-used
        (let ((coding-system
              (find-new-buffer-file-coding-system last-coding-system-used))
             (modified-p (buffer-modified-p)))
         (when coding-system
-         (set-buffer-file-coding-system coding-system t)
+         ;; Tell set-buffer-file-coding-system not to mark the file
+         ;; as modified; we just read it, and it's supposed to be unmodified.
+         ;; Marking it modified would try to lock it, which would
+         ;; check the modtime, and we don't want to do that again now.
+         (set-buffer-file-coding-system coding-system t t)
           (if (and enable-multibyte-characters
                    (or (eq coding-system 'no-conversion)
                        (eq (coding-system-type coding-system) 5))
@@ -1739,15 +1771,14 @@ function by default."
                    (= (buffer-size) inserted))
               ;; For coding systems no-conversion and raw-text...,
               ;; edit the buffer as unibyte.
-             (let ((pos-byte (position-bytes (+ (point) inserted))))
+             (let ((pos-marker (copy-marker (+ (point) inserted)))
+                   ;; Prevent locking.
+                   (buffer-file-name nil))
                 (set-buffer-multibyte nil)
-               (setq inserted (- pos-byte (position-bytes (point))))))
+               (setq inserted (- pos-marker (point)))))
           (set-buffer-modified-p modified-p))))
    inserted)
  
-(add-hook 'after-insert-file-functions
-         'after-insert-file-set-buffer-file-coding-system)
-
  ;; The coding-spec and eol-type of coding-system returned is decided
  ;; independently in the following order.
  ;;     1. That of buffer-file-coding-system locally bound.
@@ -1863,6 +1894,74 @@ or a function symbol which, when called, returns such a cons cell."
                    (cons (cons regexp coding-system)
                          network-coding-system-alist)))))))
  
+(defun decode-coding-inserted-region (from to filename
+                                          &optional visit beg end replace)
+  "Decode the region between FROM and TO as if it is read from file FILENAME.
+The idea is that the text between FROM and TO was just inserted somehow.
+Optional arguments VISIT, BEG, END, and REPLACE are the same as those
+of the function `insert-file-contents'.
+Part of the job of this function is setting `buffer-undo-list' appropriately."
+  (save-excursion
+    (save-restriction
+      (let ((coding coding-system-for-read)
+           undo-list-saved)
+       (if visit
+           ;; Temporarily turn off undo recording, if we're decoding the
+           ;; text of a visited file.
+           (setq buffer-undo-list t)
+         ;; Otherwise, if we can recognize the undo elt for the insertion,
+         ;; remove it and get ready to replace it later.
+         ;; In the mean time, turn off undo recording.
+         (let ((last (car-safe buffer-undo-list))) 
+           (if (and (consp last) (eql (car last) from) (eql (cdr last) to))
+               (setq undo-list-saved (cdr buffer-undo-list)
+                     buffer-undo-list t))))
+       (narrow-to-region from to)
+       (goto-char (point-min))
+       (or coding
+           (setq coding (funcall set-auto-coding-function
+                                 filename (- (point-max) (point-min)))))
+       (or coding
+           (setq coding (car (find-operation-coding-system
+                              'insert-file-contents
+                              filename visit beg end replace))))
+       (if (coding-system-p coding)
+           (or enable-multibyte-characters
+               (setq coding
+                     (coding-system-change-text-conversion coding 'raw-text)))
+         (setq coding nil))
+       (if coding
+           (decode-coding-region (point-min) (point-max) coding)
+         (setq last-coding-system-used coding))
+       ;; If we're decoding the text of a visited file,
+       ;; the undo list should start out empty.
+       (if visit
+           (setq buffer-undo-list nil)
+         ;; If we decided to replace the undo entry for the insertion,
+         ;; do so now.
+         (if undo-list-saved
+             (setq buffer-undo-list
+                   (cons (cons from (point-max)) undo-list-saved))))))))
+
+(defun recode-region (start end new-coding coding)
+  "Re-decode the region (previously decoded by CODING) by NEW-CODING."
+  (interactive
+   (list (region-beginning) (region-end)
+        (read-coding-system "Text was really in: ")
+        (let ((coding (or buffer-file-coding-system last-coding-system-used)))
+          (read-coding-system
+           (concat "But was interpreted as"
+                   (if coding (format " (default %S): " coding) ": "))
+           coding))))
+  (or (and new-coding coding)
+      (error "Coding system not specified"))
+  ;; Check it before we encode the region.
+  (check-coding-system new-coding)
+  (save-restriction
+    (narrow-to-region start end)
+    (encode-coding-region (point-min) (point-max) coding)
+    (decode-coding-region (point-min) (point-max) new-coding)))
+
  (defun make-translation-table (&rest args)
    "Make a translation table from arguments.
  A translation table is a char table intended for character
@@ -1985,14 +2084,52 @@ the table in `translation-table-vector'."
      (put symbol 'translation-table-id id)
      id))
  
+(defun translate-region (start end table)
+  "From START to END, translate characters according to TABLE.
+TABLE is a string or a char-table.
+If TABLE is a string, the Nth character in it is the mapping
+for the character with code N.
+If TABLE is a char-table, the element for character N is the mapping
+for the character with code N.
+It returns the number of characters changed."
+  (interactive
+   (list (region-beginning)
+        (region-end)
+        (let (table l)
+          (dotimes (i (length translation-table-vector))
+            (if (consp (aref translation-table-vector i))
+                (push (list (symbol-name
+                             (car (aref translation-table-vector i)))) l)))
+          (if (not l)
+              (error "No translation table defined"))
+          (while (not table)
+            (setq table (completing-read "Translation table: " l nil t)))
+          (intern table))))
+  (if (symbolp table)
+      (let ((val (get table 'translation-table)))
+       (or (char-table-p val)
+           (error "Invalid translation table name: %s" table))
+       (setq table val)))
+  (translate-region-internal start end table))
+
  (put 'with-category-table 'lisp-indent-function 1)
  
-(defmacro with-category-table (category-table &rest body)
-  `(let ((current-category-table (category-table)))
-     (set-category-table ,category-table)
-     (unwind-protect
-        (progn ,@body)
-       (set-category-table current-category-table))))
+(defmacro with-category-table (table &rest body)
+  "Evaluate BODY with category table of current buffer set to TABLE.
+The category table of the current buffer is saved, BODY is evaluated,
+then the saved table is restored, even in case of an abnormal exit.
+Value is what BODY returns."
+  (let ((old-table (make-symbol "old-table"))
+       (old-buffer (make-symbol "old-buffer")))
+    `(let ((,old-table (category-table))
+          (,old-buffer (current-buffer)))
+       (unwind-protect
+          (progn
+            (set-category-table ,table)
+            ,@body)
+        (save-current-buffer
+          (set-buffer ,old-buffer)
+          (set-category-table ,old-table))))))
  
  (defun define-translation-hash-table (symbol table)
    "Define SYMBOL as the name of the hash translation TABLE for use in CCL.
@@ -2035,7 +2172,8 @@ Analogous to `define-translation-table', but updates
  (defun sgml-xml-auto-coding-function (size)
    "Determine whether the buffer is XML, and if so, its encoding.
  This function is intended to be added to `auto-coding-functions'."
-  (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" nil t)
+  (setq size (+ (point) size))
+  (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t)
      (let ((end (save-excursion
                  ;; This is a hack.
                  (re-search-forward "\"\\s-*\\?>" size t))))
@@ -2052,12 +2190,12 @@ This function is intended to be added to `auto-coding-functions'."
  (defun sgml-html-meta-auto-coding-function (size)
    "If the buffer has an HTML meta tag, use it to determine encoding.
  This function is intended to be added to `auto-coding-functions'."
-  (setq size (min size
+  (setq size (min (+ (point) size)
                   ;; Only search forward 10 lines
                   (save-excursion
                     (forward-line 10)
                     (point))))
-  (when (and (search-forward "<html>" size t)
+  (when (and (search-forward "<html" size t)
              (re-search-forward "<meta\\s-+http-equiv=\"content-type\"\\s-+content=\"text/\\sw+;\\s-*charset=\\(.+?\\)\"" size t))
        (let* ((match (match-string 1))
              (sym (intern (downcase match))))
@@ -2069,4 +2207,5 @@ This function is intended to be added to `auto-coding-functions'."
  ;;;
  (provide 'mule)
  
+;;; arch-tag: 9aebaa6e-0e8a-40a9-b857-cb5d04a39e7c
  ;;; mule.el ends here