]> code.delx.au - gnu-emacs/blob - lisp/mail/rmail-spam-filter.el
ac50a870ebc02d1b7018a4e757375b6c87f5b832
[gnu-emacs] / lisp / mail / rmail-spam-filter.el
1 ;;; rmail-spam-filter.el --- spam filter for Rmail, the Emacs mail reader
2
3 ;; Copyright (C) 2002-2011
4 ;; Free Software Foundation, Inc.
5 ;; Keywords: email, spam, filter, rmail
6 ;; Author: Eli Tziperman <eli AT deas.harvard.edu>
7 ;; Package: rmail
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software: you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation, either version 3 of the License, or
14 ;; (at your option) any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23
24 ;;; Commentary:
25 ;;; -----------
26
27 ;;; Automatically recognize and delete junk email before it is
28 ;;; displayed in rmail/rmail-summary. Spam emails are defined by
29 ;;; specifying one or more of the sender, subject and contents.
30 ;;; URL: http://www.weizmann.ac.il/~eli/Downloads/rmail-spam-filter/
31
32 ;;; Usage:
33 ;;; ------
34
35 ;;; put in your .emacs:
36
37 ;;; (require 'rmail-spam-filter)
38
39 ;;; and use customize (in rmail-spam-filter group) to:
40
41 ;;; (*) turn on the variable rmail-use-spam-filter,
42
43 ;;; (*) specify in variable rsf-definitions-alist what sender,
44 ;;; subject and contents make an email be considered spam.
45
46 ;;; in addition, you may:
47
48 ;;; (*) Block future mail with the subject or sender of a message
49 ;;; while reading it in RMAIL: just click on the "Spam" item on the
50 ;;; menubar, and add the subject or sender to the list of spam
51 ;;; definitions using the mouse and the appropriate menu item. You
52 ;;; need to later also save the list of spam definitions using the
53 ;;; same menu item, or alternatively, see variable
54 ;;; `rsf-autosave-newly-added-definitions'.
55
56 ;;; (*) specify if blind-cc'ed mail (no "To:" header field) is to be
57 ;;; treated as spam (variable rsf-no-blind-cc; Thanks to Ethan
58 ;;; Brown <ethan@gso.saic.com> for this).
59
60 ;;; (*) specify if rmail-spam-filter should ignore case of spam
61 ;;; definitions (variable rsf-ignore-case; Thanks to
62 ;;; Ethan Brown <ethan@gso.saic.com> for the suggestion).
63
64 ;;; (*) Specify a "white-list" of trusted senders. If any
65 ;;; rsf-white-list string matches a substring of the "From"
66 ;;; header, the message is flagged as a valid, non-spam message (Ethan
67 ;;; Brown <ethan@gso.saic.com>).
68
69 ;;; (*) rmail-spam-filter is best used with a general purpose spam
70 ;;; filter such as the procmail-based http://www.spambouncer.org/.
71 ;;; Spambouncer is set to only mark messages as spam/blocked/bulk/OK
72 ;;; via special headers, and these headers may then be defined in
73 ;;; rmail-spam-filter such that the spam is rejected by
74 ;;; rmail-spam-filter itself.
75
76 (require 'rmail)
77 (require 'rmailsum)
78
79 (defgroup rmail-spam-filter nil
80 "Spam filter for Rmail, the Emacs mail reader."
81 :group 'rmail)
82
83 (defcustom rmail-use-spam-filter nil
84 "Non-nil to activate the Rmail spam filter.
85 Set `rsf-definitions-alist' to define what you consider spam emails."
86 :type 'boolean
87 :group 'rmail-spam-filter)
88
89 (defcustom rsf-file "~/XRMAIL-SPAM"
90 "Name of Rmail file for optionally saving some of the spam.
91 You can either just delete spam, or save it in this file for
92 later review. Which action to take for each spam definition is
93 specified by the \"action\" element of the definition."
94 :type 'string
95 :group 'rmail-spam-filter)
96
97 (defcustom rsf-no-blind-cc nil
98 "Non-nil means mail with no explicit To: or Cc: is spam."
99 :type 'boolean
100 :group 'rmail-spam-filter)
101
102 (defcustom rsf-ignore-case nil
103 "Non-nil means to ignore case in `rsf-definitions-alist'."
104 :type 'boolean
105 :group 'rmail-spam-filter)
106
107 (defcustom rsf-beep nil
108 "Non-nil means to beep if spam is found."
109 :type 'boolean
110 :group 'rmail-spam-filter)
111
112 (defcustom rsf-sleep-after-message 2.0
113 "Seconds to wait after displaying a message that spam was found."
114 :type 'number
115 :group 'rmail-spam-filter)
116
117 (defcustom rsf-min-region-to-spam-list 7
118 "Minimum size of region that you can add to the spam list.
119 The aim is to avoid adding too short a region, which could result
120 in false positive identification of a valid message as spam."
121 :type 'integer
122 :group 'rmail-spam-filter)
123
124 (defcustom rsf-autosave-newly-added-definitions nil
125 "Non-nil to auto-save new spam entries.
126 Any time you add an entry via the \"Spam\" menu, immediately saves
127 the custom file."
128 :type 'boolean
129 :group 'rmail-spam-filter)
130
131 (defcustom rsf-white-list nil
132 "List of regexps to identify valid senders.
133 If any element matches the \"From\" header, the message is
134 flagged as a valid, non-spam message. E.g., if your domain is
135 \"emacs.com\" then including \"emacs\\\\.com\" in this list would
136 flag all mail (purporting to be) from your colleagues as valid."
137 :type '(repeat string)
138 :group 'rmail-spam-filter)
139
140 (defcustom rsf-definitions-alist nil
141 "A list of rules (definitions) matching spam messages.
142 Each rule is an alist, with elements of the form (FIELD . REGEXP).
143 The recognized FIELDS are: from, to, subject, content-type,
144 x-spam-status, and contents. The \"contents\" element refers to
145 the entire text of the message; all the other elements refer to
146 message headers of the same name.
147
148 Using an empty-string for REGEXP is the same as omitting that
149 element altogether.
150
151 Each rule should contain one \"action\" element, saying what to do
152 if the rule is matched. This has the form (action . CHOICE), where
153 CHOICE may be either `output-and-delete' (save to `rsf-file', then delete),
154 or `delete-spam' (just delete).
155
156 A rule matches only if all the specified elements match."
157 :type '(repeat
158 (list :format "%v"
159 (cons :format "%v" :value (from . "")
160 (const :format "" from)
161 (string :tag "From" ""))
162 (cons :format "%v" :value (to . "")
163 (const :format "" to)
164 (string :tag "To" ""))
165 (cons :format "%v" :value (subject . "")
166 (const :format "" subject)
167 (string :tag "Subject" ""))
168 (cons :format "%v" :value (content-type . "")
169 (const :format "" content-type)
170 (string :tag "Content-Type" ""))
171 (cons :format "%v" :value (contents . "")
172 (const :format "" contents)
173 (string :tag "Contents" ""))
174 (cons :format "%v" :value (x-spam-status . "")
175 (const :format "" x-spam-status)
176 (string :tag "X-Spam-Status" ""))
177 (cons :format "%v" :value (action . output-and-delete)
178 (const :format "" action)
179 (choice :tag "Action selection"
180 (const :tag "Output and delete" output-and-delete)
181 (const :tag "Delete" delete-spam)
182 ))))
183 :group 'rmail-spam-filter)
184
185 ;; FIXME nothing uses this, and it could just be let-bound.
186 (defvar rsf-scanning-messages-now nil
187 "Non-nil when `rmail-spam-filter' scans messages.")
188
189 ;; the advantage over the automatic filter definitions is the AND conjunction
190 ;; of in-one-definition-elements
191 (defun rsf-check-field (field-symbol message-data definition result)
192 "Check if a message appears to be spam.
193 FIELD-SYMBOL is one of the possible keys of a `rsf-definitions-alist'
194 rule; e.g. from, to. MESSAGE-DATA is a string giving the value of
195 FIELD-SYMBOL in the current message. DEFINITION is the element of
196 `rsf-definitions-alist' currently being checked.
197
198 RESULT is a cons of the form (MAYBE-SPAM . IS-SPAM). If the car
199 is nil, or if the entry for FIELD-SYMBOL in this DEFINITION is
200 absent or the empty string, this function does nothing.
201
202 Otherwise, if MESSAGE-DATA is non-nil and the entry matches it,
203 the cdr is set to t. Else, the car is set to nil."
204 (let ((definition-field (cdr (assoc field-symbol definition))))
205 ;; Only in this case can maybe-spam change from t to nil.
206 (if (and (car result) (> (length definition-field) 0))
207 ;; If FIELD-SYMBOL field appears in the message, and also in
208 ;; spam definition list, this is potentially a spam.
209 (if (and message-data
210 (string-match definition-field message-data))
211 ;; If we do not get a contradiction from another field, this is spam
212 (setcdr result t)
213 ;; The message data contradicts the specification, this is not spam.
214 ;; Note that the total absence of a header specified in the
215 ;; rule means this cannot be spam.
216 (setcar result nil)))))
217
218 (defun rmail-spam-filter (msg)
219 "Return nil if message number MSG is spam based on `rsf-definitions-alist'.
220 If spam, optionally output message to a file `rsf-file' and delete
221 it from rmail file. Called for each new message retrieved by
222 `rmail-get-new-mail'."
223 (let ((return-value)
224 ;; maybe-spam is in the car, this-is-a-spam-email in cdr.
225 (maybe-spam '(nil . nil))
226 message-sender message-to message-cc message-recipients
227 message-subject message-content-type message-spam-status
228 (num-spam-definition-elements (safe-length rsf-definitions-alist))
229 (num-element 0)
230 (exit-while-loop nil)
231 ;; Do we want to ignore case in spam definitions.
232 (case-fold-search rsf-ignore-case)
233 ;; make sure bbdb does not create entries for messages while spam
234 ;; filter is scanning the rmail file:
235 (bbdb/mail_auto_create_p nil)
236 ;; Other things may wish to know if we are running (nothing
237 ;; uses this at present).
238 (rsf-scanning-messages-now t))
239 (save-excursion
240 ;; Narrow buffer to header of message and get Sender and
241 ;; Subject fields to be used below:
242 (save-restriction
243 (goto-char (rmail-msgbeg msg))
244 (narrow-to-region (point) (progn (search-forward "\n\n") (point)))
245 (setq message-sender (mail-fetch-field "From"))
246 (setq message-to (mail-fetch-field "To")
247 message-cc (mail-fetch-field "Cc")
248 message-recipients (or (and message-to message-cc
249 (concat message-to ", " message-cc))
250 message-to
251 message-cc))
252 (setq message-subject (mail-fetch-field "Subject"))
253 (setq message-content-type (mail-fetch-field "Content-Type"))
254 (setq message-spam-status (mail-fetch-field "X-Spam-Status")))
255 ;; Check for blind CC condition. Set vars such that while
256 ;; loop will be bypassed and spam condition will trigger.
257 (and rsf-no-blind-cc
258 (null message-recipients)
259 (setq exit-while-loop t
260 maybe-spam '(t . t)))
261 ;; Check white list, and likewise cause while loop bypass.
262 (and message-sender
263 (let ((white-list rsf-white-list)
264 (found nil))
265 (while (and (not found) white-list)
266 (if (string-match (car white-list) message-sender)
267 (setq found t)
268 (setq white-list (cdr white-list))))
269 found)
270 (setq exit-while-loop t
271 maybe-spam '(nil . nil)))
272 ;; Scan all elements of the list rsf-definitions-alist.
273 (while (and (< num-element num-spam-definition-elements)
274 (not exit-while-loop))
275 (let ((definition (nth num-element rsf-definitions-alist)))
276 ;; Initialize car, which is set to t in one of two cases:
277 ;; (1) unspecified definition-elements are found in
278 ;; rsf-definitions-alist, (2) empty field is found in the
279 ;; message being scanned (e.g. empty subject, sender,
280 ;; recipients, etc). It is set to nil if a non-empty field
281 ;; of the scanned message does not match a specified field
282 ;; in rsf-definitions-alist.
283 ;; FIXME the car is never set to t?!
284
285 ;; Initialize cdr to nil. This is set to t if one of the
286 ;; spam definitions matches a field in the scanned message.
287 (setq maybe-spam (cons t nil))
288
289 ;; Maybe the different fields should also be done in a
290 ;; loop to make the whole thing more flexible.
291
292 ;; If sender field is not specified in message being
293 ;; scanned, AND if "from" field does not appear in spam
294 ;; definitions for this element, this may still be spam due
295 ;; to another element...
296 (rsf-check-field 'from message-sender definition maybe-spam)
297 ;; Next, if spam was not ruled out already, check recipients:
298 (rsf-check-field 'to message-recipients definition maybe-spam)
299 ;; Next, if spam was not ruled out already, check subject:
300 (rsf-check-field 'subject message-subject definition maybe-spam)
301 ;; Next, if spam was not ruled out already, check content-type:
302 (rsf-check-field 'content-type message-content-type
303 definition maybe-spam)
304 ;; Next, if spam was not ruled out already, check contents:
305 ;; If contents field is not specified, this may still be
306 ;; spam due to another element...
307 (rsf-check-field 'contents
308 (buffer-substring-no-properties
309 (rmail-msgbeg msg) (rmail-msgend msg))
310 definition maybe-spam)
311
312 ;; Finally, check the X-Spam-Status header. You will typically
313 ;; look for the "Yes" string in this header field.
314 (rsf-check-field 'x-spam-status message-spam-status
315 definition maybe-spam)
316
317 ;; If the search in rsf-definitions-alist found
318 ;; that this email is spam, output the email to the spam
319 ;; rmail file, mark the email for deletion, leave the
320 ;; while loop and return nil so that an rmail summary line
321 ;; wont be displayed for this message: (FIXME ?)
322 (if (and (car maybe-spam) (cdr maybe-spam))
323 (setq exit-while-loop t)
324 ;; Else, spam was not yet found, proceed to next element
325 ;; in rsf-definitions-alist:
326 (setq num-element (1+ num-element)))))
327
328 (if (and (car maybe-spam) (cdr maybe-spam))
329 ;; Temporarily set rmail-current-message in order to output
330 ;; and delete the spam msg if needed:
331 (let ((rmail-current-message msg) ; FIXME does this do anything?
332 (action (cdr (assq 'action
333 (nth num-element rsf-definitions-alist))))
334 (newfile (not (file-exists-p rsf-file))))
335 ;; Check action item in rsf-definitions-alist and do it.
336 (cond
337 ((eq action 'output-and-delete)
338 ;; Else the prompt to write a new file leaves the raw
339 ;; mbox buffer visible.
340 (and newfile
341 (rmail-show-message (rmail-first-unseen-message) t))
342 (rmail-output rsf-file)
343 ;; Swap back, else rmail-get-new-mail-1 gets confused.
344 (when newfile
345 (rmail-swap-buffers-maybe)
346 (widen))
347 ;; Don't delete if automatic deletion after output is on.
348 (or rmail-delete-after-output (rmail-delete-message)))
349 ((eq action 'delete-spam)
350 (rmail-delete-message)))
351 (setq return-value nil))
352 (setq return-value t)))
353 return-value))
354
355 (defun rmail-get-new-mail-filter-spam (nnew)
356 "Check the most NNEW recent messages for spam.
357 This is called at the end of `rmail-get-new-mail-1' if there is new mail."
358 (let* ((nold (- rmail-total-messages nnew))
359 (nspam 0)
360 (nscan (1+ nold))
361 ;; Save the original deleted state of all the messages.
362 (rdv-old rmail-deleted-vector)
363 errflag)
364 ;; Set all messages undeleted so that the expunge only affects spam.
365 (setq rmail-deleted-vector (make-string (1+ rmail-total-messages) ?\s))
366 (while (and (not errflag) (<= nscan rmail-total-messages))
367 (condition-case nil
368 (or (rmail-spam-filter nscan)
369 (setq nspam (1+ nspam)))
370 (error (setq errflag nscan)))
371 (setq nscan (1+ nscan)))
372 (unwind-protect
373 (if errflag
374 (progn
375 (setq rmail-use-spam-filter nil)
376 (if rsf-beep (ding t))
377 (message "Spam filter error for new message %d, disabled" errflag)
378 (sleep-for rsf-sleep-after-message))
379 (when (> nspam 0)
380 ;; Otherwise sleep or expunge prompt leaves raw mbox buffer showing.
381 (rmail-show-message (or (rmail-first-unseen-message) 1) t)
382 (unwind-protect
383 (progn
384 (if rsf-beep (ding t))
385 (message "Rmail spam-filter detected and deleted %d spam \
386 message%s"
387 nspam (if (= 1 nspam) "" "s"))
388 (sleep-for rsf-sleep-after-message)
389 (if (rmail-expunge-confirmed) (rmail-only-expunge t)))
390 ;; Swap back, else get-new-mail-1 gets confused.
391 (rmail-swap-buffers-maybe)
392 (widen))))
393 ;; Restore the original deleted state. Character N refers to message N.
394 (setq rmail-deleted-vector
395 (concat (substring rdv-old 0 (1+ nold))
396 ;; This still works if we deleted all the new mail.
397 (substring rmail-deleted-vector (1+ nold)))))
398 ;; Return a message based on the number of spam messages found.
399 (cond
400 (errflag ", error in spam filter")
401 ((zerop nspam) "")
402 ((= 1 nnew) ", and it appears to be spam")
403 ((= nspam nnew) ", and all appear to be spam")
404 (t (format ", and %d appear%s to be spam" nspam
405 (if (= 1 nspam) "s" ""))))))
406
407 ;; define functions for interactively adding sender/subject of a
408 ;; specific message to the spam definitions while reading it, using
409 ;; the menubar:
410 (defun rsf-add-subject-to-spam-list ()
411 "Add the \"Subject\" header to the spam list."
412 (interactive)
413 (let ((message-subject (regexp-quote (rmail-get-header "Subject"))))
414 ;; Note the use of a backquote and comma on the subject line here,
415 ;; to make sure message-subject is actually evaluated and its value
416 ;; substituted.
417 (add-to-list 'rsf-definitions-alist
418 ;; Note that an empty element is treated the same as
419 ;; an absent one, so why does it bother to add them?
420 (list '(from . "")
421 '(to . "")
422 `(subject . ,message-subject)
423 '(content-type . "")
424 '(contents . "")
425 '(action . output-and-delete))
426 t)
427 (customize-mark-to-save 'rsf-definitions-alist)
428 (if rsf-autosave-newly-added-definitions
429 (progn
430 (custom-save-all)
431 (message "Added subject `%s' to spam list, and saved it"
432 message-subject))
433 (message "Added subject `%s' to spam list (remember to save it)"
434 message-subject))))
435
436 (defun rsf-add-sender-to-spam-list ()
437 "Add the \"From\" address to the spam list."
438 (interactive)
439 (let ((message-sender (regexp-quote (rmail-get-header "From"))))
440 (add-to-list 'rsf-definitions-alist
441 (list `(from . ,message-sender)
442 '(to . "")
443 '(subject . "")
444 '(content-type . "")
445 '(contents . "")
446 '(action . output-and-delete))
447 t)
448 (customize-mark-to-save 'rsf-definitions-alist)
449 (if rsf-autosave-newly-added-definitions
450 (progn
451 (custom-save-all)
452 (message "Added sender `%s' to spam list, and saved it"
453 message-sender))
454 (message "Added sender `%s' to spam list (remember to save it)"
455 message-sender))))
456
457 (defun rsf-add-region-to-spam-list ()
458 "Add the marked region in the Rmail buffer to the spam list.
459 Adds to spam definitions as a \"contents\" field."
460 (interactive)
461 (set-buffer rmail-buffer)
462 ;; Check if region is inactive or has zero size.
463 (if (not (and mark-active (not (= (region-beginning) (region-end)))))
464 ;; If inactive, print error message.
465 (message "You must highlight some text in the Rmail buffer")
466 (if (< (- (region-end) (region-beginning)) rsf-min-region-to-spam-list)
467 (message "Region is too small (minimum %d characters)"
468 rsf-min-region-to-spam-list)
469 ;; If region active and long enough, add to list of spam definitions.
470 (let ((region-to-spam-list (regexp-quote
471 (buffer-substring-no-properties
472 (region-beginning) (region-end)))))
473 (add-to-list 'rsf-definitions-alist
474 (list '(from . "")
475 '(to . "")
476 '(subject . "")
477 '(content-type . "")
478 `(contents . ,region-to-spam-list)
479 '(action . output-and-delete))
480 t)
481 (customize-mark-to-save 'rsf-definitions-alist)
482 (if rsf-autosave-newly-added-definitions
483 (progn
484 (custom-save-all)
485 (message "Added highlighted text:\n%s\n\
486 to the spam list, and saved it" region-to-spam-list))
487 (message "Added highlighted text:\n%s\n\
488 to the spam list (remember to save it)" region-to-spam-list))))))
489
490 (defun rsf-customize-spam-definitions ()
491 "Customize `rsf-definitions-alist'."
492 (interactive)
493 (customize-variable 'rsf-definitions-alist))
494
495 (defun rsf-customize-group ()
496 "Customize the rmail-spam-filter group."
497 (interactive)
498 (customize-group 'rmail-spam-filter))
499
500 (defun rsf-custom-save-all ()
501 "Interactive version of `custom-save-all'."
502 (interactive)
503 (custom-save-all))
504
505 ;; Add menu items (and keyboard shortcuts) to both rmail and rmail-summary.
506 (dolist (map (list rmail-summary-mode-map rmail-mode-map))
507 (easy-menu-define nil map nil
508 '("Spam"
509 ["Add subject to spam list" rsf-add-subject-to-spam-list]
510 ["Add sender to spam list" rsf-add-sender-to-spam-list]
511 ["Add region to spam list" rsf-add-region-to-spam-list]
512 ["Save spam definitions" rsf-custom-save-all]
513 "--"
514 ["Customize spam definitions" rsf-customize-spam-definitions]
515 ["Browse spam customizations" rsf-customize-group]
516 ))
517 (define-key map "\C-cSt" 'rsf-add-subject-to-spam-list)
518 (define-key map "\C-cSr" 'rsf-add-sender-to-spam-list)
519 (define-key map "\C-cSn" 'rsf-add-region-to-spam-list)
520 (define-key map "\C-cSa" 'rsf-custom-save-all)
521 (define-key map "\C-cSd" 'rsf-customize-spam-definitions)
522 (define-key map "\C-cSg" 'rsf-customize-group))
523
524 (defun rsf-add-content-type-field ()
525 "Maintain backward compatibility for `rmail-spam-filter'.
526 The most recent version of `rmail-spam-filter' checks the content-type
527 field of the incoming mail to see if it is spam. The format of
528 `rsf-definitions-alist' has therefore changed. This function
529 checks to see if the old format is used, and updates it if necessary."
530 (interactive)
531 (if (and rsf-definitions-alist
532 (not (assoc 'content-type (car rsf-definitions-alist))))
533 (let ((result nil)
534 (current nil)
535 (definitions rsf-definitions-alist))
536 (while definitions
537 (setq current (car definitions))
538 (setq definitions (cdr definitions))
539 (setq result
540 (append result
541 (list
542 (list (assoc 'from current)
543 (assoc 'to current)
544 (assoc 'subject current)
545 (cons 'content-type "")
546 (assoc 'contents current)
547 (assoc 'action current))))))
548 (setq rsf-definitions-alist result)
549 (customize-mark-to-save 'rsf-definitions-alist)
550 (if rsf-autosave-newly-added-definitions
551 (progn
552 (custom-save-all)
553 (message "Spam definitions converted to new format, and saved"))
554 (message "Spam definitions converted to new format (remember to save)")))))
555
556 (provide 'rmail-spam-filter)
557
558 ;;; rmail-spam-fitler ends here