1 ;;; mh-junk.el --- Interface to anti-spam measures
3 ;; Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
5 ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>,
6 ;; Bill Wohler <wohler@newt.com>
7 ;; Maintainer: Bill Wohler <wohler@newt.com>
8 ;; Keywords: mail, spam
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 ;; Boston, MA 02110-1301, USA.
29 ;; Spam handling in MH-E.
35 (eval-when-compile (require 'mh-acros))
39 ;; Interactive functions callable from the folder buffer
41 (defun mh-junk-blacklist (range)
42 "Blacklist RANGE as spam.
44 This command trains the spam program in use (see the option
45 `mh-junk-program') with the content of RANGE and then handles the
46 message(s) as specified by the option `mh-junk-disposition'.
48 Check the documentation of `mh-interactive-range' to see how RANGE is
49 read in interactive use.
51 For more information about using your particular spam fighting
54 - `mh-spamassassin-blacklist'
55 - `mh-bogofilter-blacklist'
56 - `mh-spamprobe-blacklist'"
57 (interactive (list (mh-interactive-range "Blacklist")))
58 (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist))))
59 (unless blacklist-func
60 (error "Customize `mh-junk-program' appropriately"))
61 (let ((dest (cond ((null mh-junk-disposition) nil)
62 ((equal mh-junk-disposition "") "+")
63 ((eq (aref mh-junk-disposition 0) ?+)
65 ((eq (aref mh-junk-disposition 0) ?@)
66 (concat mh-current-folder "/"
67 (substring mh-junk-disposition 1)))
68 (t (concat "+" mh-junk-disposition)))))
69 (mh-iterate-on-range msg range
70 (message "Blacklisting message %d..." msg)
71 (funcall (symbol-function blacklist-func) msg)
72 (message "Blacklisting message %d...done" msg)
73 (if (not (memq msg mh-seen-list))
74 (setq mh-seen-list (cons msg mh-seen-list)))
76 (mh-refile-a-msg nil (intern dest))
77 (mh-delete-a-msg nil)))
81 (defun mh-junk-whitelist (range)
82 "Whitelist RANGE as ham.
84 This command reclassifies the RANGE as ham if it were incorrectly
85 classified as spam (see the option `mh-junk-program'). It then
86 refiles the message into the \"+inbox\" folder.
88 Check the documentation of `mh-interactive-range' to see how
89 RANGE is read in interactive use."
90 (interactive (list (mh-interactive-range "Whitelist")))
91 (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist))))
92 (unless whitelist-func
93 (error "Customize `mh-junk-program' appropriately"))
94 (mh-iterate-on-range msg range
95 (message "Whitelisting message %d..." msg)
96 (funcall (symbol-function whitelist-func) msg)
97 (message "Whitelisting message %d...done" msg)
98 (mh-refile-a-msg nil (intern mh-inbox)))
103 ;; Spamassassin Interface
105 (defvar mh-spamassassin-executable (executable-find "spamassassin"))
106 (defvar mh-sa-learn-executable (executable-find "sa-learn"))
108 (defun mh-spamassassin-blacklist (msg)
109 "Blacklist MSG with SpamAssassin.
111 SpamAssassin is one of the more popular spam filtering programs. Get
112 it from your local distribution or from http://spamassassin.org/.
114 To use SpamAssassin, add the following recipes to \".procmailrc\":
116 MAILDIR=$HOME/`mhparam Path`
118 # Fight spam with SpamAssassin.
122 # Anything with a spam level of 10 or more is junked immediately.
124 * ^X-Spam-Level: ..........
128 * ^X-Spam-Status: Yes
131 If you don't use \"spamc\", use \"spamassassin -P -a\".
133 Note that one of the recipes above throws away messages with a score
134 greater than or equal to 10. Here's how you can determine a value that
137 First, run \"spamassassin -t\" on every mail message in your archive and
138 use Gnumeric to verify that the average plus the standard deviation of
139 good mail is under 5, the SpamAssassin default for \"spam\".
141 Using Gnumeric, sort the messages by score and view the messages with
142 the highest score. Determine the score which encompasses all of your
143 interesting messages and add a couple of points to be conservative.
144 Add that many dots to the \"X-Spam-Level:\" header field above to send
145 messages with that score down the drain.
147 In the example above, messages with a score of 5-9 are set aside in
148 the \"+spam\" folder for later review. The major weakness of rules-based
149 filters is a plethora of false positives so it is worthwhile to check.
151 If SpamAssassin classifies a message incorrectly, or is unsure, you
152 can use the MH-E commands \\[mh-junk-blacklist] and
153 \\[mh-junk-whitelist].
155 The \\[mh-junk-blacklist] command adds a \"blacklist_from\" entry to
156 \"~/spamassassin/user_prefs\", deletes the message, and sends the
157 message to the Razor, so that others might not see this spam. If the
158 \"sa-learn\" command is available, the message is also recategorized as
161 The \\[mh-junk-whitelist] command adds a \"whitelist_from\" rule to the
162 \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\" command is
163 available, the message is also recategorized as ham.
165 Over time, you'll observe that the same host or domain occurs
166 repeatedly in the \"blacklist_from\" entries, so you might think that
167 you could avoid future spam by blacklisting all mail from a particular
168 domain. The utility function `mh-spamassassin-identify-spammers' helps
169 you do precisely that. This function displays a frequency count of the
170 hosts and domains in the \"blacklist_from\" entries from the last blank
171 line in \"~/.spamassassin/user_prefs\" to the end of the file. This
172 information can be used so that you can replace multiple
173 \"blacklist_from\" entries with a single wildcard entry such as:
175 blacklist_from *@*amazingoffersdirect2u.com
177 In versions of SpamAssassin (2.50 and on) that support a Bayesian
178 classifier, \\[mh-junk-blacklist] uses the \"sa-learn\" program to
179 recategorize the message as spam. Neither MH-E, nor SpamAssassin,
180 rebuilds the database after adding words, so you will need to run
181 \"sa-learn --rebuild\" periodically. This can be done by adding the
182 following to your crontab:
184 0 * * * * sa-learn --rebuild > /dev/null 2>&1"
185 (unless mh-spamassassin-executable
186 (error "Unable to find the spamassassin executable"))
187 (let ((current-folder mh-current-folder)
188 (msg-file (mh-msg-filename msg mh-current-folder))
191 (message "Reporting message %d..." msg)
192 (mh-truncate-log-buffer)
193 (call-process mh-spamassassin-executable msg-file mh-log-buffer nil
194 ;;"--report" "--remove-from-whitelist"
195 "-r" "-R") ; spamassassin V2.20
196 (when mh-sa-learn-executable
197 (message "Recategorizing this message as spam...")
198 (call-process mh-sa-learn-executable msg-file mh-log-buffer nil
199 "--single" "--spam" "--local" "--no-rebuild"))
200 (message "Blacklisting message %d..." msg)
201 (set-buffer (get-buffer-create mh-temp-buffer))
203 (call-process (expand-file-name mh-scan-prog mh-progs)
204 nil mh-junk-background nil
205 (format "%s" msg) current-folder
206 "-format" "%<(mymbox{from})%|%(addr{from})%>")
207 (goto-char (point-min))
208 (if (search-forward-regexp "^\\(.+\\)$" nil t)
210 (setq sender (match-string 0))
211 (mh-spamassassin-add-rule "blacklist_from" sender)
212 (message "Blacklisting message %d...done" msg))
213 (message "Blacklisting message %d...not done (from my address)" msg)))))
215 (defun mh-spamassassin-whitelist (msg)
216 "Whitelist MSG with SpamAssassin.
218 The \\[mh-junk-whitelist] command adds a \"whitelist_from\" rule to
219 the \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\" command
220 is available, the message is also recategorized as ham.
222 See `mh-spamassassin-blacklist' for more information."
223 (unless mh-spamassassin-executable
224 (error "Unable to find the spamassassin executable"))
225 (let ((msg-file (mh-msg-filename msg mh-current-folder))
226 (show-buffer (get-buffer mh-show-buffer))
229 (set-buffer (get-buffer-create mh-temp-buffer))
231 (message "Removing spamassassin markup from message...")
232 (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil
234 "-d") ; spamassassin V2.20
236 (kill-buffer show-buffer))
237 (write-file msg-file)
238 (when mh-sa-learn-executable
239 (message "Recategorizing this message as ham...")
240 (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil
241 "--single" "--ham" "--local --no-rebuild"))
242 (message "Whitelisting message %d..." msg)
244 (car (mh-funcall-if-exists
245 ietf-drums-parse-address (mh-get-header-field "From:"))))
247 (unless (or (null from) (equal from ""))
248 (mh-spamassassin-add-rule "whitelist_from" from))
249 (message "Whitelisting message %d...done" msg))))
251 (defun mh-spamassassin-add-rule (rule body)
252 "Add a new rule to \"~/.spamassassin/user_prefs\".
253 The name of the rule is RULE and its body is BODY."
254 (save-window-excursion
255 (let* ((line (format "%s\t%s\n" rule body))
257 (file (expand-file-name "~/.spamassassin/user_prefs"))
258 (buffer-exists (find-buffer-visiting file)))
260 (if (not (search-forward (format "\n%s" line) nil t))
262 (goto-char (point-max))
263 (insert (if (bolp) "" "\n") line)
265 (if (not buffer-exists)
266 (kill-buffer nil)))))
268 (defun mh-spamassassin-identify-spammers ()
269 "Identify spammers who are repeat offenders.
271 This function displays a frequency count of the hosts and domains
272 in the \"blacklist_from\" entries from the last blank line in
273 \"~/.spamassassin/user_prefs\" to the end of the file. This
274 information can be used so that you can replace multiple
275 \"blacklist_from\" entries with a single wildcard entry such as:
277 blacklist_from *@*amazingoffersdirect2u.com"
279 (let* ((file (expand-file-name "~/.spamassassin/user_prefs"))
280 (domains (make-hash-table :test 'equal)))
282 ;; Only consider entries between last blank line and end of file.
283 (goto-char (1- (point-max)))
284 (search-backward-regexp "^$")
285 ;; Perform frequency count.
287 (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$"
289 (let ((host (match-string 2))
291 ;; Remove top-level-domain from hostname.
292 (setq host (cdr (reverse (split-string host "\\."))))
293 ;; Add counts for each host and domain part.
295 (setq value (gethash (car host) domains))
296 (setf (gethash (car host) domains) (1+ (if (not value) 0 value)))
297 (setq host (cdr host))))))
300 (delete-other-windows)
301 (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*"))
303 (maphash '(lambda (key value) ""
305 (insert (format "%s %s\n" key value))))
307 (sort-numeric-fields 2 (point-min) (point-max))
308 (reverse-region (point-min) (point-max))
309 (goto-char (point-min))))
313 ;; Bogofilter Interface
315 (defvar mh-bogofilter-executable (executable-find "bogofilter"))
317 (defun mh-bogofilter-blacklist (msg)
318 "Blacklist MSG with bogofilter.
320 Bogofilter is a Bayesian spam filtering program. Get it from your
321 local distribution or from http://bogofilter.sourceforge.net/.
323 Bogofilter is taught by running:
325 bogofilter -n < good-message
327 on every good message, and
329 bogofilter -s < spam-message
331 on every spam message. This is called a full training; three other
332 training methods are described in the FAQ that is distributed with
333 bogofilter. Note that most Bayesian filters need 1000 to 5000 of each
334 type of message to start doing a good job.
336 To use bogofilter, add the following recipes to \".procmailrc\":
338 MAILDIR=$HOME/`mhparam Path`
340 # Fight spam with bogofilter.
342 | bogofilter -3 -e -p
345 * ^X-Bogosity: Yes, tests=bogofilter
349 * ^X-Bogosity: Unsure, tests=bogofilter
352 If bogofilter classifies a message incorrectly, or is unsure, you can
353 use the MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist]
354 to update bogofilter's training.
356 The \"Bogofilter FAQ\" suggests that you run the following
357 occasionally to shrink the database:
359 bogoutil -d wordlist.db | bogoutil -l wordlist.db.new
360 mv wordlist.db wordlist.db.prv
361 mv wordlist.db.new wordlist.db
363 The \"Bogofilter tuning HOWTO\" describes how you can fine-tune Bogofilter."
364 (unless mh-bogofilter-executable
365 (error "Unable to find the bogofilter executable"))
366 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
367 (call-process mh-bogofilter-executable msg-file mh-junk-background
370 (defun mh-bogofilter-whitelist (msg)
371 "Whitelist MSG with bogofilter.
373 See `mh-bogofilter-blacklist' for more information."
374 (unless mh-bogofilter-executable
375 (error "Unable to find the bogofilter executable"))
376 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
377 (call-process mh-bogofilter-executable msg-file mh-junk-background
382 ;; Spamprobe Interface
384 (defvar mh-spamprobe-executable (executable-find "spamprobe"))
386 (defun mh-spamprobe-blacklist (msg)
387 "Blacklist MSG with SpamProbe.
389 SpamProbe is a Bayesian spam filtering program. Get it from your local
390 distribution or from http://spamprobe.sourceforge.net.
392 To use SpamProbe, add the following recipes to \".procmailrc\":
394 MAILDIR=$HOME/`mhparam Path`
396 # Fight spam with SpamProbe.
398 SCORE=| spamprobe receive
401 | formail -I \"X-SpamProbe: $SCORE\"
407 If SpamProbe classifies a message incorrectly, you can use the
408 MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to
409 update SpamProbe's training."
410 (unless mh-spamprobe-executable
411 (error "Unable to find the spamprobe executable"))
412 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
413 (call-process mh-spamprobe-executable msg-file mh-junk-background
416 (defun mh-spamprobe-whitelist (msg)
417 "Whitelist MSG with SpamProbe.
419 See `mh-spamprobe-blacklist' for more information."
420 (unless mh-spamprobe-executable
421 (error "Unable to find the spamprobe executable"))
422 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
423 (call-process mh-spamprobe-executable msg-file mh-junk-background
429 ;; indent-tabs-mode: nil
430 ;; sentence-end-double-space: nil
433 ;; arch-tag: 603335f1-77ff-4306-8828-5d3dad51abe1
434 ;;; mh-junk.el ends here