Merge from gnus--rel--5.10

[gnu-emacs] / lisp / gnus / mm-util.el
diff --git a/lisp/gnus/mm-util.el b/lisp/gnus/mm-util.el

index a57389f59c49d8fa13e7732f6abd706b56b1663e..e75f2ef6d5f5e573a0c38b6895aa0ac4da15391e 100644 (file)
--- a/lisp/gnus/mm-util.el
+++ b/lisp/gnus/mm-util.el
@@ -1,6 +1,7 @@
  ;;; mm-util.el --- Utility functions for Mule and low level things
-;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
-;;   Free Software Foundation, Inc.
+
+;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+;;   2005, 2006 Free Software Foundation, Inc.
  
  ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
  ;;     MORIOKA Tomohiko <morioka@jaist.ac.jp>
@@ -98,16 +99,6 @@
            (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
            string "")))
       (multibyte-string-p . ignore)
-     ;; It is not a MIME function, but some MIME functions use it.
-     (make-temp-file . (lambda (prefix &optional dir-flag)
-                        (let ((file (expand-file-name
-                                     (make-temp-name prefix)
-                                     (if (fboundp 'temp-directory)
-                                         (temp-directory)
-                                       temporary-file-directory))))
-                          (if dir-flag
-                              (make-directory file))
-                          file)))
       (insert-byte . insert-char)
       (multibyte-char-to-unibyte . identity))))
  
@@ -183,34 +174,114 @@ system object in XEmacs."
      (if (fboundp 'coding-system-p)
         (when (coding-system-p cs)
           cs)
-      ;; Is this branch ever actually useful?
+      ;; no-MULE XEmacs:
        (car (memq cs (mm-get-coding-system-list))))))
  
+(defun mm-codepage-setup (number &optional alias)
+  "Create a coding system cpNUMBER.
+The coding system is created using `codepage-setup'.  If ALIAS is
+non-nil, an alias is created and added to
+`mm-charset-synonym-alist'.  If ALIAS is a string, it's used as
+the alias.  Else windows-NUMBER is used."
+  (interactive
+   (let ((completion-ignore-case t)
+        (candidates (cp-supported-codepages)))
+     (list (completing-read "Setup DOS Codepage: (default 437) " candidates
+                           nil t nil nil "437"))))
+  (when alias
+    (setq alias (if (stringp alias)
+                   (intern alias)
+                 (intern (format "windows-%s" number)))))
+  (let* ((cp (intern (format "cp%s" number))))
+    (unless (mm-coding-system-p cp)
+      (codepage-setup number))
+    (when (and alias
+              ;; Don't add alias if setup of cp failed.
+              (mm-coding-system-p cp))
+      (add-to-list 'mm-charset-synonym-alist (cons alias cp)))))
+
  (defvar mm-charset-synonym-alist
    `(
      ;; Not in XEmacs, but it's not a proper MIME charset anyhow.
      ,@(unless (mm-coding-system-p 'x-ctext)
-       '((x-ctext . ctext)))
+       '((x-ctext . ctext)))
      ;; ISO-8859-15 is very similar to ISO-8859-1.  But it's _different_!
      ,@(unless (mm-coding-system-p 'iso-8859-15)
-       '((iso-8859-15 . iso-8859-1)))
+       '((iso-8859-15 . iso-8859-1)))
      ;; BIG-5HKSCS is similar to, but different than, BIG-5.
      ,@(unless (mm-coding-system-p 'big5-hkscs)
         '((big5-hkscs . big5)))
      ;; Windows-1252 is actually a superset of Latin-1.  See also
      ;; `gnus-article-dumbquotes-map'.
      ,@(unless (mm-coding-system-p 'windows-1252)
-       (if (mm-coding-system-p 'cp1252)
-          '((windows-1252 . cp1252))
-        '((windows-1252 . iso-8859-1))))
+       (if (mm-coding-system-p 'cp1252)
+           '((windows-1252 . cp1252))
+         '((windows-1252 . iso-8859-1))))
      ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
      ;; Outlook users in Czech republic. Use this to allow reading of their
      ;; e-mails. cp1250 should be defined by M-x codepage-setup.
      ,@(if (and (not (mm-coding-system-p 'windows-1250))
                (mm-coding-system-p 'cp1250))
           '((windows-1250 . cp1250)))
+    ;; A Microsoft misunderstanding.
+    ,@(if (and (not (mm-coding-system-p 'unicode))
+              (mm-coding-system-p 'utf-16-le))
+         '((unicode . utf-16-le)))
+    ;; A Microsoft misunderstanding.
+    ,@(unless (mm-coding-system-p 'ks_c_5601-1987)
+       (if (mm-coding-system-p 'cp949)
+           '((ks_c_5601-1987 . cp949))
+         '((ks_c_5601-1987 . euc-kr))))
+    ;; Windows-31J is Windows Codepage 932.
+    ,@(if (and (not (mm-coding-system-p 'windows-31j))
+              (mm-coding-system-p 'cp932))
+         '((windows-31j . cp932)))
      )
-  "A mapping from invalid charset names to the real charset names.")
+  "A mapping from unknown or invalid charset names to the real charset names.")
+
+(defcustom mm-charset-override-alist
+  `((iso-8859-1 . windows-1252))
+  "A mapping from undesired charset names to their replacement.
+
+You may add pairs like (iso-8859-1 . windows-1252) here,
+i.e. treat iso-8859-1 as windows-1252.  windows-1252 is a
+superset of iso-8859-1."
+  :type '(list (set :inline t
+                   (const (iso-8859-1 . windows-1252))
+                   (const (undecided  . windows-1252)))
+              (repeat :inline t
+                      :tag "Other options"
+                      (cons (symbol :tag "From charset")
+                            (symbol :tag "To charset"))))
+  :version "22.1" ;; Gnus 5.10.9
+  :group 'mime)
+
+(defcustom mm-charset-eval-alist
+  (if (featurep 'xemacs)
+      nil ;; I don't know what would be useful for XEmacs.
+    '(;; Emacs 21 offers 1250 1251 1253 1257.  Emacs 22 provides autoloads for
+      ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing).
+      (windows-1250 . (mm-codepage-setup 1250 t))
+      (windows-1251 . (mm-codepage-setup 1251 t))
+      (windows-1253 . (mm-codepage-setup 1253 t))
+      (windows-1257 . (mm-codepage-setup 1257 t))))
+  "An alist of (CHARSET . FORM) pairs.
+If an article is encoded in an unknown CHARSET, FORM is
+evaluated.  This allows to load additional libraries providing
+charsets on demand.  If supported by your Emacs version, you
+could use `autoload-coding-system' here."
+  :version "22.1" ;; Gnus 5.10.9
+  :type '(list (set :inline t
+                   (const (windows-1250 . (mm-codepage-setup 1250 t)))
+                   (const (windows-1251 . (mm-codepage-setup 1251 t)))
+                   (const (windows-1253 . (mm-codepage-setup 1253 t)))
+                   (const (windows-1257 . (mm-codepage-setup 1257 t)))
+                   (const (cp850 . (mm-codepage-setup 850 nil))))
+              (repeat :inline t
+                      :tag "Other options"
+                      (cons (symbol :tag "charset")
+                            (symbol :tag "form"))))
+  :group 'mime)
  
  (defvar mm-binary-coding-system
    (cond
@@ -297,14 +368,17 @@ system object in XEmacs."
      (iso-2022-jp-3 latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208
                    japanese-jisx0213-1 japanese-jisx0213-2)
      (shift_jis latin-jisx0201 katakana-jisx0201 japanese-jisx0208)
-    ,(if (or (not (fboundp 'charsetp)) ;; non-Mule case
-            (charsetp 'unicode-a)
-            (not (mm-coding-system-p 'mule-utf-8)))
-        '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)
-       ;; If we have utf-8 we're in Mule 5+.
-       (append '(utf-8)
-              (delete 'ascii
-                      (coding-system-get 'mule-utf-8 'safe-charsets)))))
+    ,(cond ((fboundp 'unicode-precedence-list)
+           (cons 'utf-8 (delq 'ascii (mapcar 'charset-name
+                                             (unicode-precedence-list)))))
+          ((or (not (fboundp 'charsetp)) ;; non-Mule case
+               (charsetp 'unicode-a)
+               (not (mm-coding-system-p 'mule-utf-8)))
+           '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e))
+          (t ;; If we have utf-8 we're in Mule 5+.
+           (append '(utf-8)
+                   (delete 'ascii
+                           (coding-system-get 'mule-utf-8 'safe-charsets))))))
    "Alist of MIME-charset/MULE-charsets.")
  
  (defun mm-enrich-utf-8-by-mule-ucs ()
@@ -312,10 +386,6 @@ system object in XEmacs."
  This function will run when the `un-define' module is loaded under
  XEmacs, and fill the `utf-8' entry in `mm-mime-mule-charset-alist'
  with Mule charsets.  It is completely useless for Emacs."
-  (unless (cdr (delete '(mm-enrich-utf-8-by-mule-ucs)
-                      (assoc "un-define" after-load-alist)))
-    (setq after-load-alist
-         (delete '("un-define") after-load-alist)))
    (when (boundp 'unicode-basic-translation-charset-order-list)
      (condition-case nil
         (let ((val (delq
@@ -340,7 +410,7 @@ with Mule charsets.  It is completely useless for Emacs."
           cs mime mule alist)
        (while css
         (setq cs (pop css)
-             mime (or (coding-system-get cs :mime-charset) ; Emacs 22
+             mime (or (coding-system-get cs :mime-charset) ; Emacs 23 (unicode)
                        (coding-system-get cs 'mime-charset)))
         (when (and mime
                    (not (eq t (setq mule
@@ -435,11 +505,17 @@ mail with multiple parts is preferred to sending a Unicode one.")
         (pop alist))
        out)))
  
-(defun mm-charset-to-coding-system (charset &optional lbt)
+(defun mm-charset-to-coding-system (charset &optional lbt
+                                           allow-override)
    "Return coding-system corresponding to CHARSET.
  CHARSET is a symbol naming a MIME charset.
  If optional argument LBT (`unix', `dos' or `mac') is specified, it is
-used as the line break code type of the coding system."
+used as the line break code type of the coding system.
+
+If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to
+map undesired charset names to their replacement.  This should
+only be used for decoding, not for encoding."
+  ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'.
    (when (stringp charset)
      (setq charset (intern (downcase charset))))
    (when lbt
@@ -451,6 +527,11 @@ used as the line break code type of the coding system."
     ((or (null (mm-get-coding-system-list))
         (not (fboundp 'coding-system-get)))
      charset)
+   ;; Check override list quite early.  Should only used for decoding, not for
+   ;; encoding!
+   ((and allow-override
+        (let ((cs (cdr (assq charset mm-charset-override-alist))))
+          (and cs (mm-coding-system-p cs) cs))))
     ;; ascii
     ((eq charset 'us-ascii)
      'ascii)
@@ -463,9 +544,27 @@ used as the line break code type of the coding system."
  ;;;     (eq charset (coding-system-get charset 'mime-charset))
          )
      charset)
+   ;; Eval expressions from `mm-charset-eval-alist'
+   ((let* ((el (assq charset mm-charset-eval-alist))
+          (cs (car el))
+          (form (cdr el)))
+      (and cs
+          form
+          (prog2
+              ;; Avoid errors...
+              (condition-case nil (eval form) (error nil))
+              ;; (message "Failed to eval `%s'" form))
+              (mm-coding-system-p cs)
+            (message "Added charset `%s' via `mm-charset-eval-alist'" cs))
+          cs)))
     ;; Translate invalid charsets.
     ((let ((cs (cdr (assq charset mm-charset-synonym-alist))))
-      (and cs (mm-coding-system-p cs) cs)))
+      (and cs
+          (mm-coding-system-p cs)
+          ;; (message
+          ;;  "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'"
+          ;;  cs charset)
+          cs)))
     ;; Last resort: search the coding system list for entries which
     ;; have the right mime-charset in case the canonical name isn't
     ;; defined (though it should be).
@@ -477,6 +576,11 @@ used as the line break code type of the coding system."
                  (eq charset (or (coding-system-get c :mime-charset)
                                  (coding-system-get c 'mime-charset))))
             (setq cs c)))
+      (unless cs
+       ;; Warn the user about unknown charset:
+       (if (fboundp 'gnus-message)
+           (gnus-message 7 "Unknown charset: %s" charset)
+         (message "Unknown charset: %s" charset)))
        cs))))
  
  (defsubst mm-replace-chars-in-string (string from to)
@@ -567,14 +671,21 @@ If the charset is `composition', return the actual one."
      ;; This is for XEmacs.
      (mm-mule-charset-to-mime-charset charset)))
  
-(defun mm-delete-duplicates (list)
-  "Simple substitute for CL `delete-duplicates', testing with `equal'."
-  (let (result head)
-    (while list
-      (setq head (car list))
-      (setq list (delete head list))
-      (setq result (cons head result)))
-    (nreverse result)))
+(if (fboundp 'delete-dups)
+    (defalias 'mm-delete-duplicates 'delete-dups)
+  (defun mm-delete-duplicates (list)
+    "Destructively remove `equal' duplicates from LIST.
+Store the result in LIST and return it.  LIST must be a proper list.
+Of several `equal' occurrences of an element in LIST, the first
+one is kept.
+
+This is a compatibility function for Emacsen without `delete-dups'."
+    ;; Code from `subr.el' in Emacs 22:
+    (let ((tail list))
+      (while tail
+       (setcdr tail (delete (car tail) (cdr tail)))
+       (setq tail (cdr tail))))
+    list))
  
  ;; Fixme:  This is used in places when it should be testing the
  ;; default multibyteness.  See mm-default-multibyte-p.
@@ -766,6 +877,17 @@ charset, and a longer list means no appropriate charset."
      (if (and (memq 'iso-2022-jp-2 charsets)
              (memq 'iso-2022-jp-2 hack-charsets))
         (setq charsets (delq 'iso-2022-jp charsets)))
+    ;; Attempt to reduce the number of charsets if utf-8 is available.
+    (if (and (featurep 'xemacs)
+            (> (length charsets) 1)
+            (mm-coding-system-p 'utf-8))
+       (let ((mm-coding-system-priorities
+              (cons 'utf-8 mm-coding-system-priorities)))
+         (setq charsets
+               (mm-delete-duplicates
+                (mapcar 'mm-mime-charset
+                        (delq 'ascii
+                              (mm-find-charset-region b e)))))))
      charsets))
  
  (defmacro mm-with-unibyte-buffer (&rest forms)
@@ -787,11 +909,18 @@ Use multibyte mode for this."
  (defmacro mm-with-unibyte-current-buffer (&rest forms)
    "Evaluate FORMS with current buffer temporarily made unibyte.
  Also bind `default-enable-multibyte-characters' to nil.
-Equivalent to `progn' in XEmacs"
+Equivalent to `progn' in XEmacs
+
+NOTE: Use this macro with caution in multibyte buffers (it is not
+worth using this macro in unibyte buffers of course).  Use of
+`(set-buffer-multibyte t)', which is run finally, is generally
+harmful since it is likely to modify existing data in the buffer.
+For instance, it converts \"\\300\\255\" into \"\\255\" in
+Emacs 23 (unicode)."
    (let ((multibyte (make-symbol "multibyte"))
         (buffer (make-symbol "buffer")))
      `(if mm-emacs-mule
-        (let ((,multibyte enable-multibyte-characters)
+        (let ((,multibyte enable-multibyte-characters)
                (,buffer (current-buffer)))
            (unwind-protect
                (let (default-enable-multibyte-characters)
@@ -952,6 +1081,71 @@ If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
            inhibit-file-name-handlers)))
      (write-region start end filename append visit lockname)))
  
+;; It is not a MIME function, but some MIME functions use it.
+(if (and (fboundp 'make-temp-file)
+        (ignore-errors
+          (let ((def (symbol-function 'make-temp-file)))
+            (and (byte-code-function-p def)
+                 (setq def (if (fboundp 'compiled-function-arglist)
+                               ;; XEmacs
+                               (eval (list 'compiled-function-arglist def))
+                             (aref def 0)))
+                 (>= (length def) 4)
+                 (eq (nth 3 def) 'suffix)))))
+    (defalias 'mm-make-temp-file 'make-temp-file)
+  ;; Stolen (and modified for Emacs 20 and XEmacs) from Emacs 22.
+  (defun mm-make-temp-file (prefix &optional dir-flag suffix)
+    "Create a temporary file.
+The returned file name (created by appending some random characters at the end
+of PREFIX, and expanding against `temporary-file-directory' if necessary),
+is guaranteed to point to a newly created empty file.
+You can then use `write-region' to write new data into the file.
+
+If DIR-FLAG is non-nil, create a new empty directory instead of a file.
+
+If SUFFIX is non-nil, add that at the end of the file name."
+    (let ((umask (default-file-modes))
+         file)
+      (unwind-protect
+         (progn
+           ;; Create temp files with strict access rights.  It's easy to
+           ;; loosen them later, whereas it's impossible to close the
+           ;; time-window of loose permissions otherwise.
+           (set-default-file-modes 448)
+           (while (condition-case err
+                      (progn
+                        (setq file
+                              (make-temp-name
+                               (expand-file-name
+                                prefix
+                                (if (fboundp 'temp-directory)
+                                    ;; XEmacs
+                                    (temp-directory)
+                                  temporary-file-directory))))
+                        (if suffix
+                            (setq file (concat file suffix)))
+                        (if dir-flag
+                            (make-directory file)
+                          ;; NOTE: This is unsafe if Emacs 20
+                          ;; users and XEmacs users don't use
+                          ;; a secure temp directory.
+                          (gmm-write-region "" nil file nil 'silent
+                                            nil 'excl))
+                        nil)
+                    (file-already-exists t)
+                    ;; The Emacs 20 and XEmacs versions of
+                    ;; `make-directory' issue `file-error'.
+                    (file-error (or (and (or (featurep 'xemacs)
+                                             (= emacs-major-version 20))
+                                         (file-exists-p file))
+                                    (signal (car err) (cdr err)))))
+             ;; the file was somehow created by someone else between
+             ;; `make-temp-name' and `write-region', let's try again.
+             nil)
+           file)
+       ;; Reset the umask.
+       (set-default-file-modes umask)))))
+
  (defun mm-image-load-path (&optional package)
    (let (dir result)
      (dolist (path load-path (nreverse result))
@@ -983,7 +1177,8 @@ If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
      (defun mm-detect-mime-charset-region (start end)
        "Detect MIME charset of the text in the region between START and END."
        (let ((cs (mm-detect-coding-region start end)))
-       (coding-system-get cs 'mime-charset)))
+       (or (coding-system-get cs :mime-charset)
+           (coding-system-get cs 'mime-charset))))
    (defun mm-detect-mime-charset-region (start end)
      "Detect MIME charset of the text in the region between START and END."
      (let ((cs (mm-detect-coding-region start end)))