]> code.delx.au - gnu-emacs/blob - lisp/mh-e/mh-junk.el
Follow MH-E Developers Guide conventions. Use `' quotes for Help
[gnu-emacs] / lisp / mh-e / mh-junk.el
1 ;;; mh-junk.el --- Interface to anti-spam measures
2
3 ;; Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4
5 ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>,
6 ;; Bill Wohler <wohler@newt.com>
7 ;; Maintainer: Bill Wohler <wohler@newt.com>
8 ;; Keywords: mail, spam
9
10 ;; This file is part of GNU Emacs.
11
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; any later version.
16
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 ;; Boston, MA 02110-1301, USA.
26
27 ;;; Commentary:
28
29 ;; Spam handling in MH-E.
30
31 ;;; Change Log:
32
33 ;;; Code:
34
35 (eval-when-compile (require 'mh-acros))
36 (mh-require-cl)
37 (require 'mh-e)
38
39 ;; Interactive functions callable from the folder buffer
40 ;;;###mh-autoload
41 (defun mh-junk-blacklist (range)
42 "Blacklist RANGE as spam.
43
44 This command trains the spam program in use (see the option
45 `mh-junk-program') with the content of RANGE and then handles the
46 message(s) as specified by the option `mh-junk-disposition'.
47
48 Check the documentation of `mh-interactive-range' to see how RANGE is
49 read in interactive use.
50
51 For more information about using your particular spam fighting
52 program, see:
53
54 - `mh-spamassassin-blacklist'
55 - `mh-bogofilter-blacklist'
56 - `mh-spamprobe-blacklist'"
57 (interactive (list (mh-interactive-range "Blacklist")))
58 (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist))))
59 (unless blacklist-func
60 (error "Customize `mh-junk-program' appropriately"))
61 (let ((dest (cond ((null mh-junk-disposition) nil)
62 ((equal mh-junk-disposition "") "+")
63 ((eq (aref mh-junk-disposition 0) ?+)
64 mh-junk-disposition)
65 ((eq (aref mh-junk-disposition 0) ?@)
66 (concat mh-current-folder "/"
67 (substring mh-junk-disposition 1)))
68 (t (concat "+" mh-junk-disposition)))))
69 (mh-iterate-on-range msg range
70 (message "Blacklisting message %d..." msg)
71 (funcall (symbol-function blacklist-func) msg)
72 (message "Blacklisting message %d...done" msg)
73 (if (not (memq msg mh-seen-list))
74 (setq mh-seen-list (cons msg mh-seen-list)))
75 (if dest
76 (mh-refile-a-msg nil (intern dest))
77 (mh-delete-a-msg nil)))
78 (mh-next-msg))))
79
80 ;;;###mh-autoload
81 (defun mh-junk-whitelist (range)
82 "Whitelist RANGE as ham.
83
84 This command reclassifies the RANGE as ham if it were incorrectly
85 classified as spam (see the option `mh-junk-program'). It then
86 refiles the message into the \"+inbox\" folder.
87
88 Check the documentation of `mh-interactive-range' to see how
89 RANGE is read in interactive use."
90 (interactive (list (mh-interactive-range "Whitelist")))
91 (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist))))
92 (unless whitelist-func
93 (error "Customize `mh-junk-program' appropriately"))
94 (mh-iterate-on-range msg range
95 (message "Whitelisting message %d..." msg)
96 (funcall (symbol-function whitelist-func) msg)
97 (message "Whitelisting message %d...done" msg)
98 (mh-refile-a-msg nil (intern mh-inbox)))
99 (mh-next-msg)))
100
101 \f
102
103 ;; Spamassassin Interface
104
105 (defvar mh-spamassassin-executable (executable-find "spamassassin"))
106 (defvar mh-sa-learn-executable (executable-find "sa-learn"))
107
108 (defun mh-spamassassin-blacklist (msg)
109 "Blacklist MSG with SpamAssassin.
110
111 SpamAssassin is one of the more popular spam filtering programs. Get
112 it from your local distribution or from http://spamassassin.org/.
113
114 To use SpamAssassin, add the following recipes to \".procmailrc\":
115
116 MAILDIR=$HOME/`mhparam Path`
117
118 # Fight spam with SpamAssassin.
119 :0fw
120 | spamc
121
122 # Anything with a spam level of 10 or more is junked immediately.
123 :0:
124 * ^X-Spam-Level: ..........
125 /dev/null
126
127 :0:
128 * ^X-Spam-Status: Yes
129 spam/.
130
131 If you don't use \"spamc\", use \"spamassassin -P -a\".
132
133 Note that one of the recipes above throws away messages with a score
134 greater than or equal to 10. Here's how you can determine a value that
135 works best for you.
136
137 First, run \"spamassassin -t\" on every mail message in your archive and
138 use Gnumeric to verify that the average plus the standard deviation of
139 good mail is under 5, the SpamAssassin default for \"spam\".
140
141 Using Gnumeric, sort the messages by score and view the messages with
142 the highest score. Determine the score which encompasses all of your
143 interesting messages and add a couple of points to be conservative.
144 Add that many dots to the \"X-Spam-Level:\" header field above to send
145 messages with that score down the drain.
146
147 In the example above, messages with a score of 5-9 are set aside in
148 the \"+spam\" folder for later review. The major weakness of rules-based
149 filters is a plethora of false positives so it is worthwhile to check.
150
151 If SpamAssassin classifies a message incorrectly, or is unsure, you
152 can use the MH-E commands \\[mh-junk-blacklist] and
153 \\[mh-junk-whitelist].
154
155 The \\[mh-junk-blacklist] command adds a \"blacklist_from\" entry to
156 \"~/spamassassin/user_prefs\", deletes the message, and sends the
157 message to the Razor, so that others might not see this spam. If the
158 \"sa-learn\" command is available, the message is also recategorized as
159 spam.
160
161 The \\[mh-junk-whitelist] command adds a \"whitelist_from\" rule to the
162 \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\" command is
163 available, the message is also recategorized as ham.
164
165 Over time, you'll observe that the same host or domain occurs
166 repeatedly in the \"blacklist_from\" entries, so you might think that
167 you could avoid future spam by blacklisting all mail from a particular
168 domain. The utility function `mh-spamassassin-identify-spammers' helps
169 you do precisely that. This function displays a frequency count of the
170 hosts and domains in the \"blacklist_from\" entries from the last blank
171 line in \"~/.spamassassin/user_prefs\" to the end of the file. This
172 information can be used so that you can replace multiple
173 \"blacklist_from\" entries with a single wildcard entry such as:
174
175 blacklist_from *@*amazingoffersdirect2u.com
176
177 In versions of SpamAssassin (2.50 and on) that support a Bayesian
178 classifier, \\[mh-junk-blacklist] uses the \"sa-learn\" program to
179 recategorize the message as spam. Neither MH-E, nor SpamAssassin,
180 rebuilds the database after adding words, so you will need to run
181 \"sa-learn --rebuild\" periodically. This can be done by adding the
182 following to your crontab:
183
184 0 * * * * sa-learn --rebuild > /dev/null 2>&1"
185 (unless mh-spamassassin-executable
186 (error "Unable to find the spamassassin executable"))
187 (let ((current-folder mh-current-folder)
188 (msg-file (mh-msg-filename msg mh-current-folder))
189 (sender))
190 (save-excursion
191 (message "Reporting message %d..." msg)
192 (mh-truncate-log-buffer)
193 (call-process mh-spamassassin-executable msg-file mh-log-buffer nil
194 ;;"--report" "--remove-from-whitelist"
195 "-r" "-R") ; spamassassin V2.20
196 (when mh-sa-learn-executable
197 (message "Recategorizing this message as spam...")
198 (call-process mh-sa-learn-executable msg-file mh-log-buffer nil
199 "--single" "--spam" "--local" "--no-rebuild"))
200 (message "Blacklisting message %d..." msg)
201 (set-buffer (get-buffer-create mh-temp-buffer))
202 (erase-buffer)
203 (call-process (expand-file-name mh-scan-prog mh-progs)
204 nil mh-junk-background nil
205 (format "%s" msg) current-folder
206 "-format" "%<(mymbox{from})%|%(addr{from})%>")
207 (goto-char (point-min))
208 (if (search-forward-regexp "^\\(.+\\)$" nil t)
209 (progn
210 (setq sender (match-string 0))
211 (mh-spamassassin-add-rule "blacklist_from" sender)
212 (message "Blacklisting message %d...done" msg))
213 (message "Blacklisting message %d...not done (from my address)" msg)))))
214
215 (defun mh-spamassassin-whitelist (msg)
216 "Whitelist MSG with SpamAssassin.
217
218 The \\[mh-junk-whitelist] command adds a \"whitelist_from\" rule to
219 the \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\" command
220 is available, the message is also recategorized as ham.
221
222 See `mh-spamassassin-blacklist' for more information."
223 (unless mh-spamassassin-executable
224 (error "Unable to find the spamassassin executable"))
225 (let ((msg-file (mh-msg-filename msg mh-current-folder))
226 (show-buffer (get-buffer mh-show-buffer))
227 from)
228 (save-excursion
229 (set-buffer (get-buffer-create mh-temp-buffer))
230 (erase-buffer)
231 (message "Removing spamassassin markup from message...")
232 (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil
233 ;; "--remove-markup"
234 "-d") ; spamassassin V2.20
235 (if show-buffer
236 (kill-buffer show-buffer))
237 (write-file msg-file)
238 (when mh-sa-learn-executable
239 (message "Recategorizing this message as ham...")
240 (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil
241 "--single" "--ham" "--local --no-rebuild"))
242 (message "Whitelisting message %d..." msg)
243 (setq from
244 (car (mh-funcall-if-exists
245 ietf-drums-parse-address (mh-get-header-field "From:"))))
246 (kill-buffer nil)
247 (unless (or (null from) (equal from ""))
248 (mh-spamassassin-add-rule "whitelist_from" from))
249 (message "Whitelisting message %d...done" msg))))
250
251 (defun mh-spamassassin-add-rule (rule body)
252 "Add a new rule to \"~/.spamassassin/user_prefs\".
253 The name of the rule is RULE and its body is BODY."
254 (save-window-excursion
255 (let* ((line (format "%s\t%s\n" rule body))
256 (case-fold-search t)
257 (file (expand-file-name "~/.spamassassin/user_prefs"))
258 (buffer-exists (find-buffer-visiting file)))
259 (find-file file)
260 (if (not (search-forward (format "\n%s" line) nil t))
261 (progn
262 (goto-char (point-max))
263 (insert (if (bolp) "" "\n") line)
264 (save-buffer)))
265 (if (not buffer-exists)
266 (kill-buffer nil)))))
267
268 (defun mh-spamassassin-identify-spammers ()
269 "Identify spammers who are repeat offenders.
270
271 This function displays a frequency count of the hosts and domains
272 in the \"blacklist_from\" entries from the last blank line in
273 \"~/.spamassassin/user_prefs\" to the end of the file. This
274 information can be used so that you can replace multiple
275 \"blacklist_from\" entries with a single wildcard entry such as:
276
277 blacklist_from *@*amazingoffersdirect2u.com"
278 (interactive)
279 (let* ((file (expand-file-name "~/.spamassassin/user_prefs"))
280 (domains (make-hash-table :test 'equal)))
281 (find-file file)
282 ;; Only consider entries between last blank line and end of file.
283 (goto-char (1- (point-max)))
284 (search-backward-regexp "^$")
285 ;; Perform frequency count.
286 (save-excursion
287 (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$"
288 nil t)
289 (let ((host (match-string 2))
290 value)
291 ;; Remove top-level-domain from hostname.
292 (setq host (cdr (reverse (split-string host "\\."))))
293 ;; Add counts for each host and domain part.
294 (while host
295 (setq value (gethash (car host) domains))
296 (setf (gethash (car host) domains) (1+ (if (not value) 0 value)))
297 (setq host (cdr host))))))
298
299 ;; Output
300 (delete-other-windows)
301 (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*"))
302 (erase-buffer)
303 (maphash '(lambda (key value) ""
304 (if (> value 2)
305 (insert (format "%s %s\n" key value))))
306 domains)
307 (sort-numeric-fields 2 (point-min) (point-max))
308 (reverse-region (point-min) (point-max))
309 (goto-char (point-min))))
310
311 \f
312
313 ;; Bogofilter Interface
314
315 (defvar mh-bogofilter-executable (executable-find "bogofilter"))
316
317 (defun mh-bogofilter-blacklist (msg)
318 "Blacklist MSG with bogofilter.
319
320 Bogofilter is a Bayesian spam filtering program. Get it from your
321 local distribution or from http://bogofilter.sourceforge.net/.
322
323 Bogofilter is taught by running:
324
325 bogofilter -n < good-message
326
327 on every good message, and
328
329 bogofilter -s < spam-message
330
331 on every spam message. This is called a full training; three other
332 training methods are described in the FAQ that is distributed with
333 bogofilter. Note that most Bayesian filters need 1000 to 5000 of each
334 type of message to start doing a good job.
335
336 To use bogofilter, add the following recipes to \".procmailrc\":
337
338 MAILDIR=$HOME/`mhparam Path`
339
340 # Fight spam with bogofilter.
341 :0fw
342 | bogofilter -3 -e -p
343
344 :0:
345 * ^X-Bogosity: Yes, tests=bogofilter
346 spam/.
347
348 :0:
349 * ^X-Bogosity: Unsure, tests=bogofilter
350 spam/unsure/.
351
352 If bogofilter classifies a message incorrectly, or is unsure, you can
353 use the MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist]
354 to update bogofilter's training.
355
356 The \"Bogofilter FAQ\" suggests that you run the following
357 occasionally to shrink the database:
358
359 bogoutil -d wordlist.db | bogoutil -l wordlist.db.new
360 mv wordlist.db wordlist.db.prv
361 mv wordlist.db.new wordlist.db
362
363 The \"Bogofilter tuning HOWTO\" describes how you can fine-tune Bogofilter."
364 (unless mh-bogofilter-executable
365 (error "Unable to find the bogofilter executable"))
366 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
367 (call-process mh-bogofilter-executable msg-file mh-junk-background
368 nil "-s")))
369
370 (defun mh-bogofilter-whitelist (msg)
371 "Whitelist MSG with bogofilter.
372
373 See `mh-bogofilter-blacklist' for more information."
374 (unless mh-bogofilter-executable
375 (error "Unable to find the bogofilter executable"))
376 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
377 (call-process mh-bogofilter-executable msg-file mh-junk-background
378 nil "-n")))
379
380 \f
381
382 ;; Spamprobe Interface
383
384 (defvar mh-spamprobe-executable (executable-find "spamprobe"))
385
386 (defun mh-spamprobe-blacklist (msg)
387 "Blacklist MSG with SpamProbe.
388
389 SpamProbe is a Bayesian spam filtering program. Get it from your local
390 distribution or from http://spamprobe.sourceforge.net.
391
392 To use SpamProbe, add the following recipes to \".procmailrc\":
393
394 MAILDIR=$HOME/`mhparam Path`
395
396 # Fight spam with SpamProbe.
397 :0
398 SCORE=| spamprobe receive
399
400 :0 wf
401 | formail -I \"X-SpamProbe: $SCORE\"
402
403 :0:
404 *^X-SpamProbe: SPAM
405 spam/.
406
407 If SpamProbe classifies a message incorrectly, you can use the
408 MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to
409 update SpamProbe's training."
410 (unless mh-spamprobe-executable
411 (error "Unable to find the spamprobe executable"))
412 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
413 (call-process mh-spamprobe-executable msg-file mh-junk-background
414 nil "spam")))
415
416 (defun mh-spamprobe-whitelist (msg)
417 "Whitelist MSG with SpamProbe.
418
419 See `mh-spamprobe-blacklist' for more information."
420 (unless mh-spamprobe-executable
421 (error "Unable to find the spamprobe executable"))
422 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
423 (call-process mh-spamprobe-executable msg-file mh-junk-background
424 nil "good")))
425
426 (provide 'mh-junk)
427
428 ;; Local Variables:
429 ;; indent-tabs-mode: nil
430 ;; sentence-end-double-space: nil
431 ;; End:
432
433 ;; arch-tag: 603335f1-77ff-4306-8828-5d3dad51abe1
434 ;;; mh-junk.el ends here