]> code.delx.au - gnu-emacs/blob - lisp/mh-e/mh-junk.el
Upgraded to MH-E version 7.4.4.
[gnu-emacs] / lisp / mh-e / mh-junk.el
1 ;;; mh-junk.el --- Interface to anti-spam measures
2
3 ;; Copyright (C) 2003 Free Software Foundation, Inc.
4
5 ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>,
6 ;; Bill Wohler <wohler@newt.com>
7 ;; Maintainer: Bill Wohler <wohler@newt.com>
8 ;; Keywords: mail, spam
9
10 ;; This file is part of GNU Emacs.
11
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; any later version.
16
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
26
27 ;;; Commentary:
28
29 ;; Spam handling in MH-E.
30
31 ;;; Change Log:
32
33 ;;; Code:
34
35 (require 'mh-e)
36
37 ;; Interactive functions callable from the folder buffer
38 ;;;###mh-autoload
39 (defun mh-junk-blacklist (range)
40 "Blacklist RANGE as spam.
41
42 Check the documentation of `mh-interactive-range' to see how RANGE is read in
43 interactive use.
44
45 First the appropriate function is called depending on the value of
46 `mh-junk-choice'. Then if `mh-junk-mail-folder' is a string then the message is
47 refiled to that folder. If nil, the message is deleted.
48
49 To change the spam program being used, customize `mh-junk-program'. Directly
50 setting `mh-junk-choice' is not recommended.
51
52 The documentation for the following functions describes what setup is needed
53 for the different spam fighting programs:
54
55 - `mh-bogofilter-blacklist'
56 - `mh-spamprobe-blacklist'
57 - `mh-spamassassin-blacklist'"
58 (interactive (list (mh-interactive-range "Blacklist")))
59 (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist))))
60 (unless blacklist-func
61 (error "Customize `mh-junk-program' appropriately"))
62 (let ((dest (cond ((null mh-junk-mail-folder) nil)
63 ((equal mh-junk-mail-folder "") "+")
64 ((eq (aref mh-junk-mail-folder 0) ?+)
65 mh-junk-mail-folder)
66 ((eq (aref mh-junk-mail-folder 0) ?@)
67 (concat mh-current-folder "/"
68 (substring mh-junk-mail-folder 1)))
69 (t (concat "+" mh-junk-mail-folder)))))
70 (mh-iterate-on-range msg range
71 (funcall (symbol-function blacklist-func) msg)
72 (if dest
73 (mh-refile-a-msg nil (intern dest))
74 (mh-delete-a-msg nil)))
75 (mh-next-msg))))
76
77 ;;;###mh-autoload
78 (defun mh-junk-whitelist (range)
79 "Whitelist RANGE incorrectly classified as spam.
80
81 Check the documentation of `mh-interactive-range' to see how RANGE is read in
82 interactive use.
83
84 First the appropriate function is called depending on the value of
85 `mh-junk-choice'. Then the message is refiled to `mh-inbox'.
86
87 To change the spam program being used, customize `mh-junk-program'. Directly
88 setting `mh-junk-choice' is not recommended."
89 (interactive (list (mh-interactive-range "Whitelist")))
90 (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist))))
91 (unless whitelist-func
92 (error "Customize `mh-junk-program' appropriately"))
93 (mh-iterate-on-range msg range
94 (funcall (symbol-function whitelist-func) msg)
95 (mh-refile-a-msg nil (intern mh-inbox)))
96 (mh-next-msg)))
97
98 \f
99
100 ;; Bogofilter Interface
101
102 (defvar mh-bogofilter-executable (executable-find "bogofilter"))
103
104 (defun mh-bogofilter-blacklist (msg)
105 "Classify MSG as spam.
106 Tell bogofilter that the message is spam.
107
108 Bogofilter is a Bayesian spam filtering program. Get it from your local
109 distribution or from:
110 http://bogofilter.sourceforge.net/
111
112 You first need to teach bogofilter. This is done by running
113
114 bogofilter -n < good-message
115
116 on every good message, and
117
118 bogofilter -s < spam-message
119
120 on every spam message. Most Bayesian filters need 1000 to 5000 of each to
121 start doing a good job.
122
123 To use bogofilter, add the following .procmailrc recipes which you can also
124 find in the bogofilter man page:
125
126 # Bogofilter
127 :0fw
128 | bogofilter -u -e -p
129
130 :0
131 * ^X-Bogosity: Yes, tests=bogofilter
132 $SPAM
133
134 Bogofilter continues to feed the messages it classifies back into its
135 database. Occasionally it misses, and those messages need to be reclassified.
136 MH-E can do this for you. Use \\[mh-junk-blacklist] to reclassify messges in
137 your +inbox as spam, and \\[mh-junk-whitelist] to reclassify messages in your
138 spambox as good messages."
139 (unless mh-bogofilter-executable
140 (error "Couldn't find the bogofilter executable"))
141 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
142 (call-process mh-bogofilter-executable msg-file 0 nil "-Ns")))
143
144 (defun mh-bogofilter-whitelist (msg)
145 "Reinstate incorrectly filtered MSG.
146 Train bogofilter to think of the message as non-spam."
147 (unless mh-bogofilter-executable
148 (error "Couldn't find the bogofilter executable"))
149 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
150 (call-process mh-bogofilter-executable msg-file 0 nil "-Sn")))
151
152 \f
153
154 ;; Spamprobe Interface
155
156 (defvar mh-spamprobe-executable (executable-find "spamprobe"))
157
158 (defun mh-spamprobe-blacklist (msg)
159 "Classify MSG as spam.
160 Tell spamprobe that the message is spam.
161
162 Spamprobe is a Bayesian spam filtering program. More info about the program can
163 be found at:
164 http://spamprobe.sourceforge.net
165
166 Here is a procmail recipe to stores incoming spam mail into the folder +spam
167 and good mail in /home/user/Mail/mdrop/mbox. This recipe is provided as an
168 example in the spamprobe man page.
169
170 PATH=/bin:/usr/bin:/usr/local/bin
171 DEFAULT=/home/user/Mail/mdrop/mbox
172 SPAM=/home/user/Mail/spam/.
173
174 # Spamprobe filtering
175 :0
176 SCORE=| spamprobe receive
177 :0 wf
178 | formail -I \"X-SpamProbe: $SCORE\"
179 :0 a:
180 *^X-SpamProbe: SPAM
181 $SPAM
182
183 Occasionally some good mail gets misclassified as spam. You can use
184 \\[mh-junk-whitelist] to reclassify that as good mail."
185 (unless mh-spamprobe-executable
186 (error "Couldn't find the spamprobe executable"))
187 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
188 (call-process mh-spamprobe-executable msg-file 0 nil "spam")))
189
190 (defun mh-spamprobe-whitelist (msg)
191 "Reinstate incorrectly filtered MSG.
192 Train spamprobe to think of the message as non-spam."
193 (unless mh-spamprobe-executable
194 (error "Couldn't find the spamprobe executable"))
195 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
196 (call-process mh-spamprobe-executable msg-file 0 nil "good")))
197
198 \f
199
200 ;; Spamassassin Interface
201
202 (defvar mh-spamassassin-executable (executable-find "spamassassin"))
203 (defvar mh-sa-learn-executable (executable-find "sa-learn"))
204
205 (defun mh-spamassassin-blacklist (msg)
206 "Blacklist MSG.
207 This is done by sending the message to Razor and by appending the sender to
208 ~/.spamassassin/user_prefs in a blacklist_from rule. If sa-learn is available,
209 the message is also recategorized as spam.
210
211 Spamassassin is an excellent spam filter. For more information, see:
212 http://spamassassin.org/.
213
214 I ran \"spamassassin -t\" on every mail message in my archive and ran an
215 analysis in Gnumeric to find that the standard deviation of good mail
216 scored under 5 (coincidentally, the spamassassin default for \"spam\").
217
218 Furthermore, I observed that there weren't any messages with a score of 8
219 or more that were interesting, so I added a couple of points to be
220 conservative and send any message with a score of 10 or more down the
221 drain. You might want to use a score of 12 or 13 to be really conservative.
222 I have found that this really decreases the amount of junk to review.
223
224 Messages with a score of 5-9 are set aside for later review. The major
225 weakness of rules-based filters is a plethora of false positives\; I catch one
226 or two legitimate messages in here a week, so it is worthwhile to check.
227
228 You might choose to do this analysis yourself to pick a good score for
229 deleting spam sight unseen, or you might pick a score out of a hat, or you
230 might choose to be very conservative and not delete any messages at all.
231
232 Based upon this discussion, here is what the associated ~/.procmailrc
233 entries look like. These rules appear before my list filters so that spam
234 sent to mailing lists gets pruned too.
235
236 #
237 # Spam
238 #
239 :0fw
240 | spamc
241
242 # Anything with a spam level of 10 or more is junked immediately.
243 :0:
244 * ^X-Spam-Level: ..........
245 /dev/null
246
247 :0
248 * ^X-Spam-Status: Yes
249 $SPAM
250
251 If you don't use \"spamc\", use \"spamassassin -P -a\".
252
253 A handful of spam does find its way into +inbox. In this case, use
254 \\[mh-junk-blacklist] to add a \"blacklist_from\" line to
255 ~/spamassassin/user_prefs, delete the message, and send the message to the
256 Razor, so that others might not see this spam.
257
258 Over time, you see some patterns in the blacklisted addresses and can
259 replace several lines with wildcards. For example, it is clear that High
260 Speed Media is the biggest bunch of jerks on the Net. Here are some of the
261 entries I have for them, and the list continues to grow.
262
263 blacklist_from *@*-hsm-*.com
264 blacklist_from *@*182*643*.com
265 blacklist_from *@*antarhsm*.com
266 blacklist_from *@*h*speed*
267 blacklist_from *@*hsm*182*.com
268 blacklist_from *@*hsm*643*.com
269 blacklist_from *@*hsmridi2983cslt227.com
270 blacklist_from *@*list*hsm*.com
271 blacklist_from *@h*s*media*
272 blacklist_from *@hsmdrct.com
273 blacklist_from *@hsmridi2983csltsite.com
274
275 The function `mh-spamassassin-identify-spammers' is provided that shows the
276 frequency counts of the host and domain names in your blacklist_from
277 entries. This can be helpful when editing the blacklist_from entries.
278
279 In versions of spamassassin (2.50 and on) that support a Bayesian classifier,
280 \\[mh-junk-blacklist] uses the sa-learn program to recategorize the message as
281 spam. Neither MH-E, nor spamassassin, rebuilds the database after adding
282 words, so you will need to run \"sa-learn --rebuild\" periodically. This can
283 be done by adding the following to your crontab:
284
285 0 * * * * sa-learn --rebuild > /dev/null 2>&1"
286 (unless mh-spamassassin-executable
287 (error "Couldn't find the spamassassin executable"))
288 (let ((current-folder mh-current-folder)
289 (msg-file (mh-msg-filename msg mh-current-folder))
290 (sender))
291 (save-excursion
292 (message "Giving this message the Razor...")
293 (mh-truncate-log-buffer)
294 (call-process mh-spamassassin-executable msg-file mh-log-buffer nil
295 "--report" "--remove-from-whitelist")
296 (when mh-sa-learn-executable
297 (message "Recategorizing this message as spam...")
298 (call-process mh-sa-learn-executable msg-file mh-log-buffer nil
299 "--single" "--spam" "--local" "--no-rebuild"))
300 (message "Blacklisting address...")
301 (set-buffer (get-buffer-create mh-temp-buffer))
302 (erase-buffer)
303 (call-process (expand-file-name mh-scan-prog mh-progs) nil t nil
304 (format "%s" msg) current-folder
305 "-format" "%<(mymbox{from})%|%(addr{from})%>")
306 (goto-char (point-min))
307 (if (search-forward-regexp "^\\(.+\\)$" nil t)
308 (progn
309 (setq sender (match-string 0))
310 (mh-spamassassin-add-rule "blacklist_from" sender)
311 (message "Blacklisting address...done"))
312 (message "Blacklisting address...not done (from my address)")))))
313
314 (defun mh-spamassassin-whitelist (msg)
315 "Whitelist MSG.
316 Add a whitelist_from rule to the ~/.spamassassin/user_prefs file. If sa-learn
317 is available, then the message is recategorized as ham."
318 (unless mh-spamassassin-executable
319 (error "Couldn't find the spamassassin executable"))
320 (let ((msg-file (mh-msg-filename msg mh-current-folder))
321 (show-buffer (get-buffer mh-show-buffer))
322 from)
323 (save-excursion
324 (set-buffer (get-buffer-create mh-temp-buffer))
325 (erase-buffer)
326 (message "Removing spamassassin markup from message...")
327 (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil
328 "--remove-markup")
329 (if show-buffer
330 (kill-buffer show-buffer))
331 (write-file msg-file)
332 (when mh-sa-learn-executable
333 (message "Recategorizing this message as ham...")
334 (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil
335 "--single" "--ham" "--local --no-rebuild"))
336 (message "Whitelisting address...")
337 (setq from (car (ietf-drums-parse-address (mh-get-header-field "From:"))))
338 (kill-buffer nil)
339 (unless (equal from "")
340 (mh-spamassassin-add-rule "whitelist_from" from))
341 (message "Whitelisting address...done"))))
342
343 (defun mh-spamassassin-add-rule (rule body)
344 "Add a new rule to ~/.spamassassin/user_prefs.
345 The name of the rule is RULE and its body is BODY."
346 (save-window-excursion
347 (let* ((line (format "%s\t%s\n" rule body))
348 (case-fold-search t)
349 (file (expand-file-name "~/.spamassassin/user_prefs"))
350 (buffer-exists (find-buffer-visiting file)))
351 (find-file file)
352 (if (not (search-forward (format "\n%s" line) nil t))
353 (progn
354 (goto-char (point-max))
355 (insert (if (bolp) "" "\n") line)
356 (save-buffer)))
357 (if (not buffer-exists)
358 (kill-buffer nil)))))
359
360 (defun mh-spamassassin-identify-spammers ()
361 "Identifies spammers who are repeat offenders.
362
363 For each blacklist_from entry from the last blank line of
364 ~/.spamassassin/user_prefs to the end of the file, a list of host and domain
365 names along with their frequency counts is displayed. This information can be
366 used to replace multiple blacklist_from entries with a single wildcard entry
367 such as:
368
369 blacklist_from *@*amazingoffersdirect2u.com"
370 (interactive)
371 (let* ((file (expand-file-name "~/.spamassassin/user_prefs"))
372 (domains (make-hash-table :test 'equal)))
373 (find-file file)
374 ;; Only consider entries between last blank line and end of file.
375 (goto-char (1- (point-max)))
376 (search-backward-regexp "^$")
377 ;; Perform frequency count.
378 (save-excursion
379 (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$"
380 nil t)
381 (let ((host (match-string 2))
382 value)
383 ;; Remove top-level-domain from hostname.
384 (setq host (cdr (reverse (split-string host "\\."))))
385 ;; Add counts for each host and domain part.
386 (while host
387 (setq value (gethash (car host) domains))
388 (puthash (car host) (1+ (if (not value) 0 value)) domains)
389 (setq host (cdr host))))))
390
391 ;; Output
392 (delete-other-windows)
393 (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*"))
394 (erase-buffer)
395 (maphash '(lambda (key value) ""
396 (if (> value 2)
397 (insert (format "%s %s\n" key value))))
398 domains)
399 (sort-numeric-fields 2 (point-min) (point-max))
400 (reverse-region (point-min) (point-max))
401 (goto-char (point-min))))
402
403 (provide 'mh-junk)
404
405 ;;; Local Variables:
406 ;;; indent-tabs-mode: nil
407 ;;; sentence-end-double-space: nil
408 ;;; End:
409
410 ;;; arch-tag: 603335f1-77ff-4306-8828-5d3dad51abe1
411 ;;; mh-junk.el ends here