]> code.delx.au - gnu-emacs/blob - lisp/gnus/mm-util.el
Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-650
[gnu-emacs] / lisp / gnus / mm-util.el
1 ;;; mm-util.el --- Utility functions for Mule and low level things
2 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004
3 ;; Free Software Foundation, Inc.
4
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7 ;; This file is part of GNU Emacs.
8
9 ;; GNU Emacs is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; any later version.
13
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
18
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ;; Boston, MA 02111-1307, USA.
23
24 ;;; Commentary:
25
26 ;;; Code:
27
28 (eval-when-compile (require 'cl))
29 (require 'mail-prsvr)
30
31 (eval-and-compile
32 (mapcar
33 (lambda (elem)
34 (let ((nfunc (intern (format "mm-%s" (car elem)))))
35 (if (fboundp (car elem))
36 (defalias nfunc (car elem))
37 (defalias nfunc (cdr elem)))))
38 '((decode-coding-string . (lambda (s a) s))
39 (encode-coding-string . (lambda (s a) s))
40 (encode-coding-region . ignore)
41 (coding-system-list . ignore)
42 (decode-coding-region . ignore)
43 (char-int . identity)
44 (coding-system-equal . equal)
45 (annotationp . ignore)
46 (set-buffer-file-coding-system . ignore)
47 (make-char
48 . (lambda (charset int)
49 (int-to-char int)))
50 (read-charset
51 . (lambda (prompt)
52 "Return a charset."
53 (intern
54 (completing-read
55 prompt
56 (mapcar (lambda (e) (list (symbol-name (car e))))
57 mm-mime-mule-charset-alist)
58 nil t))))
59 (subst-char-in-string
60 . (lambda (from to string) ;; stolen (and renamed) from nnheader.el
61 "Replace characters in STRING from FROM to TO."
62 (let ((string (substring string 0)) ;Copy string.
63 (len (length string))
64 (idx 0))
65 ;; Replace all occurrences of FROM with TO.
66 (while (< idx len)
67 (when (= (aref string idx) from)
68 (aset string idx to))
69 (setq idx (1+ idx)))
70 string)))
71 (string-as-unibyte . identity)
72 (string-make-unibyte . identity)
73 (string-as-multibyte . identity)
74 (multibyte-string-p . ignore)
75 ;; It is not a MIME function, but some MIME functions use it.
76 (make-temp-file . (lambda (prefix &optional dir-flag)
77 (let ((file (expand-file-name
78 (make-temp-name prefix)
79 (if (fboundp 'temp-directory)
80 (temp-directory)
81 temporary-file-directory))))
82 (if dir-flag
83 (make-directory file))
84 file)))
85 (insert-byte . insert-char)
86 (multibyte-char-to-unibyte . identity))))
87
88 (eval-and-compile
89 (defalias 'mm-char-or-char-int-p
90 (cond
91 ((fboundp 'char-or-char-int-p) 'char-or-char-int-p)
92 ((fboundp 'char-valid-p) 'char-valid-p)
93 (t 'identity))))
94
95 ;; Fixme: This seems always to be used to read a MIME charset, so it
96 ;; should be re-named and fixed (in Emacs) to offer completion only on
97 ;; proper charset names (base coding systems which have a
98 ;; mime-charset defined). XEmacs doesn't believe in mime-charset;
99 ;; test with
100 ;; `(or (coding-system-get 'iso-8859-1 'mime-charset)
101 ;; (coding-system-get 'iso-8859-1 :mime-charset))'
102 ;; Actually, there should be an `mm-coding-system-mime-charset'.
103 (eval-and-compile
104 (defalias 'mm-read-coding-system
105 (cond
106 ((fboundp 'read-coding-system)
107 (if (and (featurep 'xemacs)
108 (<= (string-to-number emacs-version) 21.1))
109 (lambda (prompt &optional default-coding-system)
110 (read-coding-system prompt))
111 'read-coding-system))
112 (t (lambda (prompt &optional default-coding-system)
113 "Prompt the user for a coding system."
114 (completing-read
115 prompt (mapcar (lambda (s) (list (symbol-name (car s))))
116 mm-mime-mule-charset-alist)))))))
117
118 (defvar mm-coding-system-list nil)
119 (defun mm-get-coding-system-list ()
120 "Get the coding system list."
121 (or mm-coding-system-list
122 (setq mm-coding-system-list (mm-coding-system-list))))
123
124 (defun mm-coding-system-p (cs)
125 "Return non-nil if CS is a symbol naming a coding system.
126 In XEmacs, also return non-nil if CS is a coding system object."
127 (if (fboundp 'find-coding-system)
128 (find-coding-system cs)
129 (if (fboundp 'coding-system-p)
130 (coding-system-p cs)
131 ;; Is this branch ever actually useful?
132 (memq cs (mm-get-coding-system-list)))))
133
134 (defvar mm-charset-synonym-alist
135 `(
136 ;; Not in XEmacs, but it's not a proper MIME charset anyhow.
137 ,@(unless (mm-coding-system-p 'x-ctext)
138 '((x-ctext . ctext)))
139 ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_!
140 ,@(unless (mm-coding-system-p 'iso-8859-15)
141 '((iso-8859-15 . iso-8859-1)))
142 ;; BIG-5HKSCS is similar to, but different than, BIG-5.
143 ,@(unless (mm-coding-system-p 'big5-hkscs)
144 '((big5-hkscs . big5)))
145 ;; Windows-1252 is actually a superset of Latin-1. See also
146 ;; `gnus-article-dumbquotes-map'.
147 ,@(unless (mm-coding-system-p 'windows-1252)
148 (if (mm-coding-system-p 'cp1252)
149 '((windows-1252 . cp1252))
150 '((windows-1252 . iso-8859-1))))
151 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
152 ;; Outlook users in Czech republic. Use this to allow reading of their
153 ;; e-mails. cp1250 should be defined by M-x codepage-setup.
154 ,@(if (and (not (mm-coding-system-p 'windows-1250))
155 (mm-coding-system-p 'cp1250))
156 '((windows-1250 . cp1250)))
157 )
158 "A mapping from invalid charset names to the real charset names.")
159
160 (defvar mm-binary-coding-system
161 (cond
162 ((mm-coding-system-p 'binary) 'binary)
163 ((mm-coding-system-p 'no-conversion) 'no-conversion)
164 (t nil))
165 "100% binary coding system.")
166
167 (defvar mm-text-coding-system
168 (or (if (memq system-type '(windows-nt ms-dos ms-windows))
169 (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos)
170 (and (mm-coding-system-p 'raw-text) 'raw-text))
171 mm-binary-coding-system)
172 "Text-safe coding system (For removing ^M).")
173
174 (defvar mm-text-coding-system-for-write nil
175 "Text coding system for write.")
176
177 (defvar mm-auto-save-coding-system
178 (cond
179 ((mm-coding-system-p 'utf-8-emacs) ; Mule 7
180 (if (memq system-type '(windows-nt ms-dos ms-windows))
181 (if (mm-coding-system-p 'utf-8-emacs-dos)
182 'utf-8-emacs-dos mm-binary-coding-system)
183 'utf-8-emacs))
184 ((mm-coding-system-p 'emacs-mule)
185 (if (memq system-type '(windows-nt ms-dos ms-windows))
186 (if (mm-coding-system-p 'emacs-mule-dos)
187 'emacs-mule-dos mm-binary-coding-system)
188 'emacs-mule))
189 ((mm-coding-system-p 'escape-quoted) 'escape-quoted)
190 (t mm-binary-coding-system))
191 "Coding system of auto save file.")
192
193 (defvar mm-universal-coding-system mm-auto-save-coding-system
194 "The universal coding system.")
195
196 ;; Fixme: some of the cars here aren't valid MIME charsets. That
197 ;; should only matter with XEmacs, though.
198 (defvar mm-mime-mule-charset-alist
199 `((us-ascii ascii)
200 (iso-8859-1 latin-iso8859-1)
201 (iso-8859-2 latin-iso8859-2)
202 (iso-8859-3 latin-iso8859-3)
203 (iso-8859-4 latin-iso8859-4)
204 (iso-8859-5 cyrillic-iso8859-5)
205 ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters.
206 ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default
207 ;; charset is koi8-r, not iso-8859-5.
208 (koi8-r cyrillic-iso8859-5 gnus-koi8-r)
209 (iso-8859-6 arabic-iso8859-6)
210 (iso-8859-7 greek-iso8859-7)
211 (iso-8859-8 hebrew-iso8859-8)
212 (iso-8859-9 latin-iso8859-9)
213 (iso-8859-14 latin-iso8859-14)
214 (iso-8859-15 latin-iso8859-15)
215 (viscii vietnamese-viscii-lower)
216 (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
217 (euc-kr korean-ksc5601)
218 (gb2312 chinese-gb2312)
219 (big5 chinese-big5-1 chinese-big5-2)
220 (tibetan tibetan)
221 (thai-tis620 thai-tis620)
222 (iso-2022-7bit ethiopic arabic-1-column arabic-2-column)
223 (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7
224 latin-jisx0201 japanese-jisx0208-1978
225 chinese-gb2312 japanese-jisx0208
226 korean-ksc5601 japanese-jisx0212
227 katakana-jisx0201)
228 (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7
229 latin-jisx0201 japanese-jisx0208-1978
230 chinese-gb2312 japanese-jisx0208
231 korean-ksc5601 japanese-jisx0212
232 chinese-cns11643-1 chinese-cns11643-2)
233 (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2
234 cyrillic-iso8859-5 greek-iso8859-7
235 latin-jisx0201 japanese-jisx0208-1978
236 chinese-gb2312 japanese-jisx0208
237 korean-ksc5601 japanese-jisx0212
238 chinese-cns11643-1 chinese-cns11643-2
239 chinese-cns11643-3 chinese-cns11643-4
240 chinese-cns11643-5 chinese-cns11643-6
241 chinese-cns11643-7)
242 ,(if (or (not (fboundp 'charsetp)) ;; non-Mule case
243 (charsetp 'unicode-a)
244 (not (mm-coding-system-p 'mule-utf-8)))
245 '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)
246 ;; If we have utf-8 we're in Mule 5+.
247 (append '(utf-8)
248 (delete 'ascii
249 (coding-system-get 'mule-utf-8 'safe-charsets)))))
250 "Alist of MIME-charset/MULE-charsets.")
251
252 ;; Correct by construction, but should be unnecessary:
253 ;; XEmacs hates it.
254 (when (and (not (featurep 'xemacs))
255 (fboundp 'coding-system-list)
256 (fboundp 'sort-coding-systems))
257 (setq mm-mime-mule-charset-alist
258 (apply
259 'nconc
260 (mapcar
261 (lambda (cs)
262 (when (and (or (coding-system-get cs :mime-charset) ; Emacs 22
263 (coding-system-get cs 'mime-charset))
264 (not (eq t (coding-system-get cs 'safe-charsets))))
265 (list (cons (or (coding-system-get cs :mime-charset)
266 (coding-system-get cs 'mime-charset))
267 (delq 'ascii
268 (coding-system-get cs 'safe-charsets))))))
269 (sort-coding-systems (coding-system-list 'base-only))))))
270
271 (defvar mm-hack-charsets '(iso-8859-15 iso-2022-jp-2)
272 "A list of special charsets.
273 Valid elements include:
274 `iso-8859-15' convert ISO-8859-1, -9 to ISO-8859-15 if ISO-8859-15 exists.
275 `iso-2022-jp-2' convert ISO-2022-jp to ISO-2022-jp-2 if ISO-2022-jp-2 exists."
276 )
277
278 (defvar mm-iso-8859-15-compatible
279 '((iso-8859-1 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE")
280 (iso-8859-9 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xD0\xDD\xDE\xF0\xFD\xFE"))
281 "ISO-8859-15 exchangeable coding systems and inconvertible characters.")
282
283 (defvar mm-iso-8859-x-to-15-table
284 (and (fboundp 'coding-system-p)
285 (mm-coding-system-p 'iso-8859-15)
286 (mapcar
287 (lambda (cs)
288 (if (mm-coding-system-p (car cs))
289 (let ((c (string-to-char
290 (decode-coding-string "\341" (car cs)))))
291 (cons (char-charset c)
292 (cons
293 (- (string-to-char
294 (decode-coding-string "\341" 'iso-8859-15)) c)
295 (string-to-list (decode-coding-string (car (cdr cs))
296 (car cs))))))
297 '(gnus-charset 0)))
298 mm-iso-8859-15-compatible))
299 "A table of the difference character between ISO-8859-X and ISO-8859-15.")
300
301 (defcustom mm-coding-system-priorities
302 (if (boundp 'current-language-environment)
303 (let ((lang (symbol-value 'current-language-environment)))
304 (cond ((string= lang "Japanese")
305 ;; Japanese users prefer iso-2022-jp to euc-japan or
306 ;; shift_jis, however iso-8859-1 should be used when
307 ;; there are only ASCII text and Latin-1 characters.
308 '(iso-8859-1 iso-2022-jp iso-2022-jp-2 shift_jis utf-8)))))
309 "Preferred coding systems for encoding outgoing messages.
310
311 More than one suitable coding system may be found for some text.
312 By default, the coding system with the highest priority is used
313 to encode outgoing messages (see `sort-coding-systems'). If this
314 variable is set, it overrides the default priority."
315 :version "21.2"
316 :type '(repeat (symbol :tag "Coding system"))
317 :group 'mime)
318
319 ;; ??
320 (defvar mm-use-find-coding-systems-region
321 (fboundp 'find-coding-systems-region)
322 "Use `find-coding-systems-region' to find proper coding systems.
323
324 Setting it to nil is useful on Emacsen supporting Unicode if sending
325 mail with multiple parts is preferred to sending a Unicode one.")
326
327 ;;; Internal variables:
328
329 ;;; Functions:
330
331 (defun mm-mule-charset-to-mime-charset (charset)
332 "Return the MIME charset corresponding to the given Mule CHARSET."
333 (if (and (fboundp 'find-coding-systems-for-charsets)
334 (fboundp 'sort-coding-systems))
335 (let (mime)
336 (dolist (cs (sort-coding-systems
337 (copy-sequence
338 (find-coding-systems-for-charsets (list charset)))))
339 (unless mime
340 (when cs
341 (setq mime (or (coding-system-get cs :mime-charset)
342 (coding-system-get cs 'mime-charset))))))
343 mime)
344 (let ((alist mm-mime-mule-charset-alist)
345 out)
346 (while alist
347 (when (memq charset (cdar alist))
348 (setq out (caar alist)
349 alist nil))
350 (pop alist))
351 out)))
352
353 (defun mm-charset-to-coding-system (charset &optional lbt)
354 "Return coding-system corresponding to CHARSET.
355 CHARSET is a symbol naming a MIME charset.
356 If optional argument LBT (`unix', `dos' or `mac') is specified, it is
357 used as the line break code type of the coding system."
358 (when (stringp charset)
359 (setq charset (intern (downcase charset))))
360 (when lbt
361 (setq charset (intern (format "%s-%s" charset lbt))))
362 (cond
363 ((null charset)
364 charset)
365 ;; Running in a non-MULE environment.
366 ((or (null (mm-get-coding-system-list))
367 (not (fboundp 'coding-system-get)))
368 charset)
369 ;; ascii
370 ((eq charset 'us-ascii)
371 'ascii)
372 ;; Check to see whether we can handle this charset. (This depends
373 ;; on there being some coding system matching each `mime-charset'
374 ;; property defined, as there should be.)
375 ((and (mm-coding-system-p charset)
376 ;;; Doing this would potentially weed out incorrect charsets.
377 ;;; charset
378 ;;; (eq charset (coding-system-get charset 'mime-charset))
379 )
380 charset)
381 ;; Translate invalid charsets.
382 ((let ((cs (cdr (assq charset mm-charset-synonym-alist))))
383 (and cs (mm-coding-system-p cs) cs)))
384 ;; Last resort: search the coding system list for entries which
385 ;; have the right mime-charset in case the canonical name isn't
386 ;; defined (though it should be).
387 ((let (cs)
388 ;; mm-get-coding-system-list returns a list of cs without lbt.
389 ;; Do we need -lbt?
390 (dolist (c (mm-get-coding-system-list))
391 (if (and (null cs)
392 (eq charset (or (coding-system-get c :mime-charset)
393 (coding-system-get c 'mime-charset))))
394 (setq cs c)))
395 cs))))
396
397 (defsubst mm-replace-chars-in-string (string from to)
398 (mm-subst-char-in-string from to string))
399
400 (eval-and-compile
401 (defvar mm-emacs-mule (and (not (featurep 'xemacs))
402 (boundp 'default-enable-multibyte-characters)
403 default-enable-multibyte-characters
404 (fboundp 'set-buffer-multibyte))
405 "True in Emacs with Mule.")
406
407 (if mm-emacs-mule
408 (defun mm-enable-multibyte ()
409 "Set the multibyte flag of the current buffer.
410 Only do this if the default value of `enable-multibyte-characters' is
411 non-nil. This is a no-op in XEmacs."
412 (set-buffer-multibyte 'to))
413 (defalias 'mm-enable-multibyte 'ignore))
414
415 (if mm-emacs-mule
416 (defun mm-disable-multibyte ()
417 "Unset the multibyte flag of in the current buffer.
418 This is a no-op in XEmacs."
419 (set-buffer-multibyte nil))
420 (defalias 'mm-disable-multibyte 'ignore)))
421
422 (defun mm-preferred-coding-system (charset)
423 ;; A typo in some Emacs versions.
424 (or (get-charset-property charset 'preferred-coding-system)
425 (get-charset-property charset 'prefered-coding-system)))
426
427 ;; Mule charsets shouldn't be used.
428 (defsubst mm-guess-charset ()
429 "Guess Mule charset from the language environment."
430 (or
431 mail-parse-mule-charset ;; cached mule-charset
432 (progn
433 (setq mail-parse-mule-charset
434 (and (boundp 'current-language-environment)
435 (car (last
436 (assq 'charset
437 (assoc current-language-environment
438 language-info-alist))))))
439 (if (or (not mail-parse-mule-charset)
440 (eq mail-parse-mule-charset 'ascii))
441 (setq mail-parse-mule-charset
442 (or (car (last (assq mail-parse-charset
443 mm-mime-mule-charset-alist)))
444 ;; default
445 'latin-iso8859-1)))
446 mail-parse-mule-charset)))
447
448 (defun mm-charset-after (&optional pos)
449 "Return charset of a character in current buffer at position POS.
450 If POS is nil, it defauls to the current point.
451 If POS is out of range, the value is nil.
452 If the charset is `composition', return the actual one."
453 (let ((char (char-after pos)) charset)
454 (if (< (mm-char-int char) 128)
455 (setq charset 'ascii)
456 ;; charset-after is fake in some Emacsen.
457 (setq charset (and (fboundp 'char-charset) (char-charset char)))
458 (if (eq charset 'composition) ; Mule 4
459 (let ((p (or pos (point))))
460 (cadr (find-charset-region p (1+ p))))
461 (if (and charset (not (memq charset '(ascii eight-bit-control
462 eight-bit-graphic))))
463 charset
464 (mm-guess-charset))))))
465
466 (defun mm-mime-charset (charset)
467 "Return the MIME charset corresponding to the given Mule CHARSET."
468 (if (eq charset 'unknown)
469 (error "The message contains non-printable characters, please use attachment"))
470 (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property))
471 ;; This exists in Emacs 20.
472 (or
473 (and (mm-preferred-coding-system charset)
474 (or (coding-system-get
475 (mm-preferred-coding-system charset) :mime-charset)
476 (coding-system-get
477 (mm-preferred-coding-system charset) 'mime-charset)))
478 (and (eq charset 'ascii)
479 'us-ascii)
480 (mm-preferred-coding-system charset)
481 (mm-mule-charset-to-mime-charset charset))
482 ;; This is for XEmacs.
483 (mm-mule-charset-to-mime-charset charset)))
484
485 (defun mm-delete-duplicates (list)
486 "Simple substitute for CL `delete-duplicates', testing with `equal'."
487 (let (result head)
488 (while list
489 (setq head (car list))
490 (setq list (delete head list))
491 (setq result (cons head result)))
492 (nreverse result)))
493
494 ;; Fixme: This is used in places when it should be testing the
495 ;; default multibyteness. See mm-default-multibyte-p.
496 (eval-and-compile
497 (if (and (not (featurep 'xemacs))
498 (boundp 'enable-multibyte-characters))
499 (defun mm-multibyte-p ()
500 "Non-nil if multibyte is enabled in the current buffer."
501 enable-multibyte-characters)
502 (defun mm-multibyte-p () (featurep 'mule))))
503
504 (defun mm-default-multibyte-p ()
505 "Return non-nil if the session is multibyte.
506 This affects whether coding conversion should be attempted generally."
507 (if (featurep 'mule)
508 (if (boundp 'default-enable-multibyte-characters)
509 default-enable-multibyte-characters
510 t)))
511
512 (defun mm-iso-8859-x-to-15-region (&optional b e)
513 (if (fboundp 'char-charset)
514 (let (charset item c inconvertible)
515 (save-restriction
516 (if e (narrow-to-region b e))
517 (goto-char (point-min))
518 (skip-chars-forward "\0-\177")
519 (while (not (eobp))
520 (cond
521 ((not (setq item (assq (char-charset (setq c (char-after)))
522 mm-iso-8859-x-to-15-table)))
523 (forward-char))
524 ((memq c (cdr (cdr item)))
525 (setq inconvertible t)
526 (forward-char))
527 (t
528 (insert-before-markers (prog1 (+ c (car (cdr item)))
529 (delete-char 1)))))
530 (skip-chars-forward "\0-\177")))
531 (not inconvertible))))
532
533 (defun mm-sort-coding-systems-predicate (a b)
534 (let ((priorities
535 (mapcar (lambda (cs)
536 ;; Note: invalid entries are dropped silently
537 (and (coding-system-p cs)
538 (coding-system-base cs)))
539 mm-coding-system-priorities)))
540 (> (length (memq a priorities))
541 (length (memq b priorities)))))
542
543 (defun mm-find-mime-charset-region (b e &optional hack-charsets)
544 "Return the MIME charsets needed to encode the region between B and E.
545 nil means ASCII, a single-element list represents an appropriate MIME
546 charset, and a longer list means no appropriate charset."
547 (let (charsets)
548 ;; The return possibilities of this function are a mess...
549 (or (and (mm-multibyte-p)
550 mm-use-find-coding-systems-region
551 ;; Find the mime-charset of the most preferred coding
552 ;; system that has one.
553 (let ((systems (find-coding-systems-region b e)))
554 (when mm-coding-system-priorities
555 (setq systems
556 (sort systems 'mm-sort-coding-systems-predicate)))
557 (setq systems (delq 'compound-text systems))
558 (unless (equal systems '(undecided))
559 (while systems
560 (let* ((head (pop systems))
561 (cs (or (coding-system-get head :mime-charset)
562 (coding-system-get head 'mime-charset))))
563 ;; The mime-charset (`x-ctext') of
564 ;; `compound-text' is not in the IANA list. We
565 ;; shouldn't normally use anything here with a
566 ;; mime-charset having an `x-' prefix.
567 ;; Fixme: Allow this to be overridden, since
568 ;; there is existing use of x-ctext.
569 ;; Also people apparently need the coding system
570 ;; `iso-2022-jp-3' (which Mule-UCS defines with
571 ;; mime-charset, though it's not valid).
572 (if (and cs
573 (not (string-match "^[Xx]-" (symbol-name cs)))
574 ;; UTF-16 of any variety is invalid for
575 ;; text parts and, unfortunately, has
576 ;; mime-charset defined both in Mule-UCS
577 ;; and versions of Emacs. (The name
578 ;; might be `mule-utf-16...' or
579 ;; `utf-16...'.)
580 (not (string-match "utf-16" (symbol-name cs))))
581 (setq systems nil
582 charsets (list cs))))))
583 charsets))
584 ;; Otherwise we're not multibyte, we're XEmacs, or a single
585 ;; coding system won't cover it.
586 (setq charsets
587 (mm-delete-duplicates
588 (mapcar 'mm-mime-charset
589 (delq 'ascii
590 (mm-find-charset-region b e))))))
591 (if (and (> (length charsets) 1)
592 (memq 'iso-8859-15 charsets)
593 (memq 'iso-8859-15 hack-charsets)
594 (save-excursion (mm-iso-8859-x-to-15-region b e)))
595 (mapcar (lambda (x) (setq charsets (delq (car x) charsets)))
596 mm-iso-8859-15-compatible))
597 (if (and (memq 'iso-2022-jp-2 charsets)
598 (memq 'iso-2022-jp-2 hack-charsets))
599 (setq charsets (delq 'iso-2022-jp charsets)))
600 charsets))
601
602 (defmacro mm-with-unibyte-buffer (&rest forms)
603 "Create a temporary buffer, and evaluate FORMS there like `progn'.
604 Use unibyte mode for this."
605 `(let (default-enable-multibyte-characters)
606 (with-temp-buffer ,@forms)))
607 (put 'mm-with-unibyte-buffer 'lisp-indent-function 0)
608 (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body))
609
610 (defmacro mm-with-multibyte-buffer (&rest forms)
611 "Create a temporary buffer, and evaluate FORMS there like `progn'.
612 Use multibyte mode for this."
613 `(let ((default-enable-multibyte-characters t))
614 (with-temp-buffer ,@forms)))
615 (put 'mm-with-multibyte-buffer 'lisp-indent-function 0)
616 (put 'mm-with-multibyte-buffer 'edebug-form-spec '(body))
617
618 (defmacro mm-with-unibyte-current-buffer (&rest forms)
619 "Evaluate FORMS with current buffer temporarily made unibyte.
620 Also bind `default-enable-multibyte-characters' to nil.
621 Equivalent to `progn' in XEmacs"
622 (let ((multibyte (make-symbol "multibyte"))
623 (buffer (make-symbol "buffer")))
624 `(if mm-emacs-mule
625 (let ((,multibyte enable-multibyte-characters)
626 (,buffer (current-buffer)))
627 (unwind-protect
628 (let (default-enable-multibyte-characters)
629 (set-buffer-multibyte nil)
630 ,@forms)
631 (set-buffer ,buffer)
632 (set-buffer-multibyte ,multibyte)))
633 (let (default-enable-multibyte-characters)
634 ,@forms))))
635 (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0)
636 (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body))
637
638 (defmacro mm-with-unibyte (&rest forms)
639 "Eval the FORMS with the default value of `enable-multibyte-characters' nil."
640 `(let (default-enable-multibyte-characters)
641 ,@forms))
642 (put 'mm-with-unibyte 'lisp-indent-function 0)
643 (put 'mm-with-unibyte 'edebug-form-spec '(body))
644
645 (defmacro mm-with-multibyte (&rest forms)
646 "Eval the FORMS with the default value of `enable-multibyte-characters' t."
647 `(let ((default-enable-multibyte-characters t))
648 ,@forms))
649 (put 'mm-with-multibyte 'lisp-indent-function 0)
650 (put 'mm-with-multibyte 'edebug-form-spec '(body))
651
652 (defun mm-find-charset-region (b e)
653 "Return a list of Emacs charsets in the region B to E."
654 (cond
655 ((and (mm-multibyte-p)
656 (fboundp 'find-charset-region))
657 ;; Remove composition since the base charsets have been included.
658 ;; Remove eight-bit-*, treat them as ascii.
659 (let ((css (find-charset-region b e)))
660 (mapcar (lambda (cs) (setq css (delq cs css)))
661 '(composition eight-bit-control eight-bit-graphic
662 control-1))
663 css))
664 (t
665 ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit.
666 (save-excursion
667 (save-restriction
668 (narrow-to-region b e)
669 (goto-char (point-min))
670 (skip-chars-forward "\0-\177")
671 (if (eobp)
672 '(ascii)
673 (let (charset)
674 (setq charset
675 (and (boundp 'current-language-environment)
676 (car (last (assq 'charset
677 (assoc current-language-environment
678 language-info-alist))))))
679 (if (eq charset 'ascii) (setq charset nil))
680 (or charset
681 (setq charset
682 (car (last (assq mail-parse-charset
683 mm-mime-mule-charset-alist)))))
684 (list 'ascii (or charset 'latin-iso8859-1)))))))))
685
686 (if (fboundp 'shell-quote-argument)
687 (defalias 'mm-quote-arg 'shell-quote-argument)
688 (defun mm-quote-arg (arg)
689 "Return a version of ARG that is safe to evaluate in a shell."
690 (let ((pos 0) new-pos accum)
691 ;; *** bug: we don't handle newline characters properly
692 (while (setq new-pos (string-match "[]*[;!'`\"$\\& \t{} |()<>]" arg pos))
693 (push (substring arg pos new-pos) accum)
694 (push "\\" accum)
695 (push (list (aref arg new-pos)) accum)
696 (setq pos (1+ new-pos)))
697 (if (= pos 0)
698 arg
699 (apply 'concat (nconc (nreverse accum) (list (substring arg pos))))))))
700
701 (defun mm-auto-mode-alist ()
702 "Return an `auto-mode-alist' with only the .gz (etc) thingies."
703 (let ((alist auto-mode-alist)
704 out)
705 (while alist
706 (when (listp (cdar alist))
707 (push (car alist) out))
708 (pop alist))
709 (nreverse out)))
710
711 (defvar mm-inhibit-file-name-handlers
712 '(jka-compr-handler image-file-handler)
713 "A list of handlers doing (un)compression (etc) thingies.")
714
715 (defun mm-insert-file-contents (filename &optional visit beg end replace
716 inhibit)
717 "Like `insert-file-contents', but only reads in the file.
718 A buffer may be modified in several ways after reading into the buffer due
719 to advanced Emacs features, such as file-name-handlers, format decoding,
720 `find-file-hooks', etc.
721 If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'.
722 This function ensures that none of these modifications will take place."
723 (let ((format-alist nil)
724 (auto-mode-alist (if inhibit nil (mm-auto-mode-alist)))
725 (default-major-mode 'fundamental-mode)
726 (enable-local-variables nil)
727 (after-insert-file-functions nil)
728 (enable-local-eval nil)
729 (find-file-hooks nil)
730 (inhibit-file-name-operation (if inhibit
731 'insert-file-contents
732 inhibit-file-name-operation))
733 (inhibit-file-name-handlers
734 (if inhibit
735 (append mm-inhibit-file-name-handlers
736 inhibit-file-name-handlers)
737 inhibit-file-name-handlers)))
738 (insert-file-contents filename visit beg end replace)))
739
740 (defun mm-append-to-file (start end filename &optional codesys inhibit)
741 "Append the contents of the region to the end of file FILENAME.
742 When called from a function, expects three arguments,
743 START, END and FILENAME. START and END are buffer positions
744 saying what text to write.
745 Optional fourth argument specifies the coding system to use when
746 encoding the file.
747 If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
748 (let ((coding-system-for-write
749 (or codesys mm-text-coding-system-for-write
750 mm-text-coding-system))
751 (inhibit-file-name-operation (if inhibit
752 'append-to-file
753 inhibit-file-name-operation))
754 (inhibit-file-name-handlers
755 (if inhibit
756 (append mm-inhibit-file-name-handlers
757 inhibit-file-name-handlers)
758 inhibit-file-name-handlers)))
759 (write-region start end filename t 'no-message)
760 (message "Appended to %s" filename)))
761
762 (defun mm-write-region (start end filename &optional append visit lockname
763 coding-system inhibit)
764
765 "Like `write-region'.
766 If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
767 (let ((coding-system-for-write
768 (or coding-system mm-text-coding-system-for-write
769 mm-text-coding-system))
770 (inhibit-file-name-operation (if inhibit
771 'write-region
772 inhibit-file-name-operation))
773 (inhibit-file-name-handlers
774 (if inhibit
775 (append mm-inhibit-file-name-handlers
776 inhibit-file-name-handlers)
777 inhibit-file-name-handlers)))
778 (write-region start end filename append visit lockname)))
779
780 (defun mm-image-load-path (&optional package)
781 (let (dir result)
782 (dolist (path load-path (nreverse result))
783 (when (and path
784 (file-directory-p
785 (setq dir (concat (file-name-directory
786 (directory-file-name path))
787 "etc/" (or package "gnus/")))))
788 (push dir result))
789 (push path result))))
790
791 ;; Fixme: This doesn't look useful where it's used.
792 (if (fboundp 'detect-coding-region)
793 (defun mm-detect-coding-region (start end)
794 "Like `detect-coding-region' except returning the best one."
795 (let ((coding-systems
796 (detect-coding-region (point) (point-max))))
797 (or (car-safe coding-systems)
798 coding-systems)))
799 (defun mm-detect-coding-region (start end)
800 (let ((point (point)))
801 (goto-char start)
802 (skip-chars-forward "\0-\177" end)
803 (prog1
804 (if (eq (point) end) 'ascii (mm-guess-charset))
805 (goto-char point)))))
806
807 (if (fboundp 'coding-system-get)
808 (defun mm-detect-mime-charset-region (start end)
809 "Detect MIME charset of the text in the region between START and END."
810 (let ((cs (mm-detect-coding-region start end)))
811 (coding-system-get cs 'mime-charset)))
812 (defun mm-detect-mime-charset-region (start end)
813 "Detect MIME charset of the text in the region between START and END."
814 (let ((cs (mm-detect-coding-region start end)))
815 cs)))
816
817
818 (provide 'mm-util)
819
820 ;;; arch-tag: 94dc5388-825d-4fd1-bfa5-2100aa351238
821 ;;; mm-util.el ends here