1 ;;; rmail-spam-filter.el --- spam filter for rmail, the emacs mail reader.
3 ;; Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 ;; Keywords: email, spam, filter, rmail
5 ;; Author: Eli Tziperman <eli AT deas.harvard.edu>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the
21 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 ;; Boston, MA 02110-1301, USA.
27 ;;; Automatically recognize and delete junk email before it is
28 ;;; displayed in rmail/rmail-summary. Spam emails are defined by
29 ;;; specifying one or more of the sender, subject and contents.
30 ;;; URL: http://www.weizmann.ac.il/~eli/Downloads/rmail-spam-filter/
35 ;;; put in your .emacs:
37 ;;; (load "rmail-spam-filter.el")
39 ;;; and use customize (in rmail-spam-filter group) to:
41 ;;; (*) turn on the variable rmail-use-spam-filter,
43 ;;; (*) specify in variable rsf-definitions-alist what sender,
44 ;;; subject and contents make an email be considered spam.
46 ;;; in addition, you may:
48 ;;; (*) Block future mail with the subject or sender of a message
49 ;;; while reading it in RMAIL: just click on the "Spam" item on the
50 ;;; menubar, and add the subject or sender to the list of spam
51 ;;; definitions using the mouse and the appropriate menu item. You
52 ;;; need to later also save the list of spam definitions using the
53 ;;; same menu item, or alternatively, see variable
54 ;;; `rsf-autosave-newly-added-definitions'.
56 ;;; (*) specify if blind-cc'ed mail (no "To:" header field) is to be
57 ;;; treated as spam (variable rsf-no-blind-cc; Thanks to Ethan
58 ;;; Brown <ethan@gso.saic.com> for this).
60 ;;; (*) specify if rmail-spam-filter should ignore case of spam
61 ;;; definitions (variable rsf-ignore-case; Thanks to
62 ;;; Ethan Brown <ethan@gso.saic.com> for the suggestion).
64 ;;; (*) Specify a "white-list" of trusted senders. If any
65 ;;; rsf-white-list string matches a substring of the "From"
66 ;;; header, the message is flagged as a valid, non-spam message (Ethan
67 ;;; Brown <ethan@gso.saic.com>).
69 ;;; (*) rmail-spam-filter is best used with a general purpose spam
70 ;;; filter such as the procmail-based http://www.spambouncer.org/.
71 ;;; Spambouncer is set to only mark messages as spam/blocked/bulk/OK
72 ;;; via special headers, and these headers may then be defined in
73 ;;; rmail-spam-filter such that the spam is rejected by
74 ;;; rmail-spam-filter itself.
76 ;;; (*) rmail spam filter also works with bbdb to prevent spam senders
77 ;;; from entering into the .bbdb file. See variable
78 ;;; "rsf-auto-delete-spam-bbdb-entries". This is done
79 ;;; in two ways: (a) bbdb is made not to auto-create entries for
80 ;;; messages that are deleted by the rmail-spam-filter, (b) when a
81 ;;; message is deleted in rmail, the user is offered to delete the
82 ;;; sender's bbdb entry as well _if_ it was created at the same day.
85 (if (> emacs-major-version 20)
87 (if (not (fboundp 'rmail-make-summary-line)) (load-library "rmailsum")))
89 (defvar bbdb/mail_auto_create_p)
90 (defvar rmail-summary-mode-map)
92 ;; For find-if and other cool common lisp functions we may want to use.
96 (defgroup rmail-spam-filter nil
97 "Spam filter for RMAIL, the mail reader for Emacs."
100 (defcustom rmail-use-spam-filter nil
101 "*Non-nil to activate the rmail spam filter.
102 Specify `rsf-definitions-alist' to define what you consider spam
105 :group 'rmail-spam-filter )
107 (defcustom rsf-file "~/XRMAIL-SPAM"
108 "*Name of rmail file for optionally saving some of the spam.
109 Spam may be either just deleted, or saved in a separate spam file to
110 be looked at at a later time. Whether the spam is just deleted or
111 also saved in a separete spam file is specified for each definition of
112 spam, as one of the fields of `rsf-definitions-alist'"
114 :group 'rmail-spam-filter )
116 (defcustom rsf-no-blind-cc nil
117 "*Non-nil to treat blind CC (no To: header) as spam."
119 :group 'rmail-spam-filter )
121 (defcustom rsf-ignore-case nil
122 "*Non-nil to ignore case in `rsf-definitions-alist'."
124 :group 'rmail-spam-filter )
126 (defcustom rsf-beep nil
127 "*Non-nil to beep if spam is found."
129 :group 'rmail-spam-filter )
131 (defcustom rsf-sleep-after-message 2.0
132 "*Seconds to wait after display of message that spam was found."
134 :group 'rmail-spam-filter )
136 (defcustom rsf-min-region-to-spam-list 7
137 "*User may highlight a region in an incomming message and use
138 the menubar to add this region to the spam definitions. This
139 variable specifies the minimum size of region that may be added
140 to spam list, to avoid accidentally adding a too short region
141 which would result in false positive identification of spam
144 :group 'rmail-spam-filter )
146 (defcustom rsf-auto-delete-spam-bbdb-entries nil
147 "*Non-nil to make sure no entries are made in bbdb for spam emails.
148 This is done in two ways: (1) bbdb is made not to auto-create entries
149 for messages that are deleted by the `rmail-spam-filter', (2) when a
150 message is deleted in rmail, the user is offered to delete the
151 sender's bbdb entry as well if it was created at the same day. Note
152 that Emacs needs to be restarted after setting this option for it to
155 :group 'rmail-spam-filter )
157 (defcustom rsf-autosave-newly-added-definitions nil
158 "*Non-nil to auto save new spam entries.
159 New entries entered via the spam menu bar item are then saved to
160 customization file immediately after being added via the menu bar, and
161 do not require explicitly saving the file after adding the new
164 :group 'rmail-spam-filter )
166 (defcustom rsf-white-list nil
167 "*List of strings to identify valid senders.
168 If any rsf-white-list string matches a substring of the 'From'
169 header, the message is flagged as a valid, non-spam message. Example:
170 If your domain is emacs.com then including 'emacs.com' in your
171 rsf-white-list would flag all mail from your colleagues as
173 :type '(repeat string)
174 :group 'rmail-spam-filter )
176 (defcustom rsf-definitions-alist nil
177 "*Alist matching strings defining what messages are considered spam.
178 Each definition may contain specifications of one or more of the
179 elements {subject, sender, recipients or contents}, as well as a
180 definition of what to do with the spam (action item). A spam e-mail
181 is defined as one that fits all of the specified elements of any one
182 of the spam definitions. The strings that specify spam subject,
183 sender, etc, may be regexp. For example, to specify that the subject
184 may be either 'this is spam' or 'another spam', use the regexp: 'this
185 is spam\\|another spam' (without the single quotes). To specify that
186 if the contents contain both this and that the message is spam,
187 specify 'this\\&that' in the appropriate spam definition field."
190 (cons :format "%v" :value (from . "")
191 (const :format "" from)
192 (string :tag "From" ""))
193 (cons :format "%v" :value (to . "")
194 (const :format "" to)
195 (string :tag "To" ""))
196 (cons :format "%v" :value (subject . "")
197 (const :format "" subject)
198 (string :tag "Subject" ""))
199 (cons :format "%v" :value (content-type . "")
200 (const :format "" content-type)
201 (string :tag "Content-Type" ""))
202 (cons :format "%v" :value (contents . "")
203 (const :format "" contents)
204 (string :tag "Contents" ""))
205 (cons :format "%v" :value (action . output-and-delete)
206 (const :format "" action)
207 (choice :tag "Action selection"
208 (const :tag "output to spam folder and delete" output-and-delete)
209 (const :tag "delete spam" delete-spam)
212 :group 'rmail-spam-filter)
214 (defvar rsf-scanning-messages-now nil
215 "Non nil when rmail-spam-filter scans messages,
216 for interaction with `rsf-bbdb-auto-delete-spam-entries'")
218 ;; the advantage over the automatic filter definitions is the AND conjunction
219 ;; of in-one-definition-elements
220 (defun check-field (field-symbol message-data definition result)
221 "Check if field-symbol is in `rsf-definitions-alist'.
222 Capture maybe-spam and this-is-a-spam-email in a cons in result,
223 where maybe-spam is in first and this-is-a-spam-email is in rest.
224 The values are returned by destructively changing result.
225 If FIELD-SYMBOL field does not exist AND is not specified,
226 this may still be spam due to another element...
227 if (first result) is nil, we already have a contradiction in another
229 (let ((definition-field (cdr (assoc field-symbol definition))))
230 (if (and (first result) (> (length definition-field) 0))
231 ;; only in this case can maybe-spam change from t to nil
232 ;; ... else, if FIELD-SYMBOL field does appear in the message,
233 ;; and it also appears in spam definition list, this
234 ;; is potentially a spam:
235 (if (and message-data
236 (string-match definition-field message-data))
237 ;; if we do not get a contradiction from another field, this is
239 (setf (rest result) t)
240 ;; the message data contradicts the specification, this is no spam
241 (setf (first result) nil)))))
243 (defun rmail-spam-filter (msg)
244 "Return nil if msg is spam based on rsf-definitions-alist.
245 If spam, optionally output msg to a file `rsf-file' and delete
246 it from rmail file. Called for each new message retrieved by
247 `rmail-get-new-mail'."
251 (this-is-a-spam-email)
256 (message-content-type)
257 (num-spam-definition-elements)
259 (exit-while-loop nil)
260 (saved-case-fold-search case-fold-search)
262 (rsf-saved-bbdb/mail_auto_create_p nil)
265 ;; make sure bbdb does not create entries for messages while spam
266 ;; filter is scanning the rmail file:
267 (setq rsf-saved-bbdb/mail_auto_create_p 'bbdb/mail_auto_create_p)
268 (setq bbdb/mail_auto_create_p nil)
269 ;; let `rsf-bbdb-auto-delete-spam-entries' know that rmail spam
270 ;; filter is running, so that deletion of rmail messages should be
272 (setq rsf-scanning-messages-now t)
275 (setq this-is-a-spam-email nil)
276 ;; Narrow buffer to header of message and get Sender and
277 ;; Subject fields to be used below:
279 (goto-char (rmail-msgbeg msg))
280 (narrow-to-region (point) (progn (search-forward "\n\n") (point)))
281 (setq message-sender (mail-fetch-field "From"))
282 (setq message-recipients
283 (concat (mail-fetch-field "To")
284 (if (mail-fetch-field "Cc")
285 (concat ", " (mail-fetch-field "Cc")))))
286 (setq message-subject (mail-fetch-field "Subject"))
287 (setq message-content-type (mail-fetch-field "Content-Type"))
289 ;; Find number of spam-definition elements in the list
290 ;; rsf-definitions-alist specified by user:
291 (setq num-spam-definition-elements (safe-length
292 rsf-definitions-alist))
294 ;;; do we want to ignore case in spam definitions:
295 (setq case-fold-search rsf-ignore-case)
297 ;; Check for blind CC condition. Set vars such that while
298 ;; loop will be bypassed and spam condition will trigger
299 (if (and rsf-no-blind-cc
300 (null message-recipients))
301 (setq exit-while-loop t
303 this-is-a-spam-email t))
305 ;; Check white list, and likewise cause while loop
307 (if (and message-sender
308 (let ((white-list rsf-white-list)
310 (while (and (not found) white-list)
311 (if (string-match (car white-list) message-sender)
313 (setq white-list (cdr white-list))))
315 (setq exit-while-loop t
317 this-is-a-spam-email nil))
319 ;; maybe-spam is in first, this-is-a-spam-email in rest, this
320 ;; simplifies the call to check-field
321 (setq maybe-spam (cons maybe-spam this-is-a-spam-email))
323 ;; scan all elements of the list rsf-definitions-alist
325 (< num-element num-spam-definition-elements)
326 (not exit-while-loop))
327 (let ((definition (nth num-element rsf-definitions-alist)))
328 ;; Initialize maybe-spam which is set to t in one of two
329 ;; cases: (1) unspecified definition-elements are found in
330 ;; rsf-definitions-alist, (2) empty field is found
331 ;; in the message being scanned (e.g. empty subject,
332 ;; sender, recipients, etc). The variable is set to nil
333 ;; if a non empty field of the scanned message does not
334 ;; match a specified field in
335 ;; rsf-definitions-alist.
337 ;; initialize this-is-a-spam-email to nil. This variable
338 ;; is set to t if one of the spam definitions matches a
339 ;; field in the scanned message.
340 (setq maybe-spam (cons t nil))
342 ;; start scanning incoming message:
343 ;;---------------------------------
345 ;; Maybe the different fields should also be done in a
346 ;; loop to make the whole thing more flexible
347 ;; if sender field is not specified in message being
348 ;; scanned, AND if "from" field does not appear in spam
349 ;; definitions for this element, this may still be spam
350 ;; due to another element...
351 (check-field 'from message-sender definition maybe-spam)
352 ;; next, if spam was not ruled out already, check recipients:
353 (check-field 'to message-recipients definition maybe-spam)
354 ;; next, if spam was not ruled out already, check subject:
355 (check-field 'subject message-subject definition maybe-spam)
356 ;; next, if spam was not ruled out already, check content-type:
357 (check-field 'content-type message-content-type
358 definition maybe-spam)
359 ;; next, if spam was not ruled out already, check
360 ;; contents: if contents field is not specified, this may
361 ;; still be spam due to another element...
362 (check-field 'contents
364 (rmail-msgbeg msg) (rmail-msgend msg))
365 definition maybe-spam)
367 ;; if the search in rsf-definitions-alist found
368 ;; that this email is spam, output the email to the spam
369 ;; rmail file, mark the email for deletion, leave the
370 ;; while loop and return nil so that an rmail summary line
371 ;; wont be displayed for this message:
372 (if (and (first maybe-spam) (rest maybe-spam))
373 ;; found that this is spam, no need to look at the
374 ;; rest of the rsf-definitions-alist, exit
376 (setq exit-while-loop t)
377 ;; else, spam was not yet found, increment number of
378 ;; element in rsf-definitions-alist and proceed
380 (setq num-element (+ num-element 1)))
384 ;; (BK) re-set originally used variables
385 (setq this-is-a-spam-email (rest maybe-spam)
386 maybe-spam (first maybe-spam))
388 (if (and this-is-a-spam-email maybe-spam)
390 ;;(message "Found spam!")
391 ;;(ding 1) (sleep-for 2)
393 ;; temprarily set rmail-current-message in order to
394 ;; output and delete the spam msg if needed:
395 (setq save-current-msg rmail-current-message)
396 (setq rmail-current-message msg)
397 ;; check action item and rsf-definitions-alist
400 ((equal (cdr (assoc 'action
401 (nth num-element rsf-definitions-alist)))
404 (rmail-output-to-rmail-file rsf-file 1 t)
405 ;; Don't delete if automatic deletion after output
407 (unless rmail-delete-after-output (rmail-delete-message))
409 ((equal (cdr (assoc 'action
410 (nth num-element rsf-definitions-alist)))
413 (rmail-delete-message)
416 (setq rmail-current-message save-current-msg)
417 (setq bbdb/mail_auto_create_p
418 'rsf-saved-bbdb/mail_auto_create_p)
419 ;; set return value. These lines must be last in the
420 ;; function, so that they will determine the value
421 ;; returned by rmail-spam-filter:
422 (setq return-value nil))
423 (setq return-value t))))
424 (setq case-fold-search saved-case-fold-search)
425 (setq rsf-scanning-messages-now nil)
429 ;; define functions for interactively adding sender/subject of a
430 ;; specific message to the spam definitions while reading it, using
432 (defun rsf-add-subject-to-spam-list ()
434 (set-buffer rmail-buffer)
435 (let ((message-subject))
436 (setq message-subject (mail-fetch-field "Subject"))
437 ;; note the use of a backquote and comma on the subject line here,
438 ;; to make sure message-subject is actually evaluated and its value
440 (add-to-list 'rsf-definitions-alist
443 `(subject . ,message-subject)
446 '(action . output-and-delete))
448 (customize-mark-to-save 'rsf-definitions-alist)
449 (if rsf-autosave-newly-added-definitions
452 (message "%s" (concat "added subject \n <<< \n" message-subject
453 " \n >>> \n to list of spam definitions. \n"
454 "and saved the spam definitions to file.")))
455 (message "%s" (concat "added subject \n <<< \n" message-subject
456 " \n >>> \n to list of spam definitions. \n"
457 "Don't forget to save the spam definitions to file using the spam
461 (defun rsf-add-sender-to-spam-list ()
463 (set-buffer rmail-buffer)
464 (let ((message-sender))
465 (setq message-sender (mail-fetch-field "From"))
466 ;; note the use of a backquote and comma on the "from" line here,
467 ;; to make sure message-sender is actually evaluated and its value
469 (add-to-list 'rsf-definitions-alist
470 (list `(from . ,message-sender)
475 '(action . output-and-delete))
477 (customize-mark-to-save 'rsf-definitions-alist)
478 (if rsf-autosave-newly-added-definitions
481 (message "%s" (concat "added sender \n <<< \n" message-sender
482 " \n >>> \n to list of spam definitions. \n"
483 "and saved the spam definitions to file.")))
484 (message "%s" (concat "added sender \n <<< \n " message-sender
485 " \n >>> \n to list of spam definitions."
486 "Don't forget to save the spam definitions to file using the spam
491 (defun rsf-add-region-to-spam-list ()
492 "Add the region makred by user in the rmail buffer to spam list.
493 Added to spam definitions as a contents field."
495 (set-buffer rmail-buffer)
496 (let ((region-to-spam-list))
497 ;; check if region is inactive or has zero size:
498 (if (not (and mark-active (not (= (region-beginning) (region-end)))))
499 ;; if inactive, print error message:
500 (message "you need to first highlight some text in the rmail buffer")
501 (if (< (- (region-end) (region-beginning)) rsf-min-region-to-spam-list)
503 (concat "highlighted region is too small; min length set by variable \n"
504 "rsf-min-region-to-spam-list"
505 " is " (number-to-string rsf-min-region-to-spam-list)))
506 ;; if region active and long enough, add to list of spam definisions:
508 (setq region-to-spam-list (buffer-substring (region-beginning) (region-end)))
509 ;; note the use of a backquote and comma on the "from" line here,
510 ;; to make sure message-sender is actually evaluated and its value
512 (add-to-list 'rsf-definitions-alist
517 `(contents . ,region-to-spam-list)
518 '(action . output-and-delete))
520 (customize-mark-to-save 'rsf-definitions-alist)
521 (if rsf-autosave-newly-added-definitions
524 (message "%s" (concat "added highlighted text \n <<< \n" region-to-spam-list
525 " \n >>> \n to list of spam definitions. \n"
526 "and saved the spam definitions to file.")))
527 (message "%s" (concat "added highlighted text \n <<< \n " region-to-spam-list
528 " \n >>> \n to list of spam definitions."
529 "Don't forget to save the spam definitions to file using the
534 (defun rsf-customize-spam-definitions ()
536 (customize-variable (quote rsf-definitions-alist)))
538 (defun rsf-customize-group ()
540 (customize-group (quote rmail-spam-filter)))
542 (defun rsf-custom-save-all ()
546 ;; add the actual menu items and keyboard shortcuts to both rmail and
547 ;; rmail-summary menu-bars::
548 (define-key rmail-summary-mode-map [menu-bar spam]
549 (cons "Spam" (make-sparse-keymap "Spam")))
550 (define-key rmail-mode-map [menu-bar spam]
551 (cons "Spam" (make-sparse-keymap "Spam")))
553 (define-key rmail-summary-mode-map [menu-bar spam customize-group]
554 '("Browse customizations of rmail spam filter" . rsf-customize-group))
555 (define-key rmail-mode-map [menu-bar spam customize-group]
556 '("Browse customizations of rmail spam filter" . rsf-customize-group))
557 (define-key rmail-summary-mode-map "\C-cSg" 'rsf-customize-group)
558 (define-key rmail-mode-map "\C-cSg" 'rsf-customize-group)
560 (define-key rmail-summary-mode-map [menu-bar spam customize-spam-list]
561 '("Customize list of spam definitions" . rsf-customize-spam-definitions))
562 (define-key rmail-mode-map [menu-bar spam customize-spam-list]
563 '("Customize list of spam definitions" . rsf-customize-spam-definitions))
564 (define-key rmail-summary-mode-map "\C-cSd" 'rsf-customize-spam-definitions)
565 (define-key rmail-mode-map "\C-cSd" 'rsf-customize-spam-definitions)
567 (define-key rmail-summary-mode-map [menu-bar spam lambda] '("----"))
568 (define-key rmail-mode-map [menu-bar spam lambda] '("----"))
570 (define-key rmail-summary-mode-map [menu-bar spam my-custom-save-all]
571 '("save newly added spam definitions to customization file" . rsf-custom-save-all))
572 (define-key rmail-mode-map [menu-bar spam my-custom-save-all]
573 '("save newly added spam definitions to customization file" . rsf-custom-save-all))
574 (define-key rmail-summary-mode-map "\C-cSa" 'rsf-custom-save-all)
575 (define-key rmail-mode-map "\C-cSa" 'rsf-custom-save-all)
577 (define-key rmail-summary-mode-map [menu-bar spam add-region-to-spam-list]
578 '("add region to spam list" . rsf-add-region-to-spam-list))
579 (define-key rmail-mode-map [menu-bar spam add-region-to-spam-list]
580 '("add region to spam list" . rsf-add-region-to-spam-list))
581 (define-key rmail-summary-mode-map "\C-cSn" 'rsf-add-region-to-spam-list)
582 (define-key rmail-mode-map "\C-cSn" 'rsf-add-region-to-spam-list)
584 (define-key rmail-summary-mode-map [menu-bar spam add-sender-to-spam-list]
585 '("add sender to spam list" . rsf-add-sender-to-spam-list))
586 (define-key rmail-mode-map [menu-bar spam add-sender-to-spam-list]
587 '("add sender to spam list" . rsf-add-sender-to-spam-list))
588 (define-key rmail-summary-mode-map "\C-cSr" 'rsf-add-sender-to-spam-list)
589 (define-key rmail-mode-map "\C-cSr" 'rsf-add-sender-to-spam-list)
591 (define-key rmail-summary-mode-map [menu-bar spam add-subject-to-spam-list]
592 '("add subject to spam list" . rsf-add-subject-to-spam-list))
593 (define-key rmail-mode-map [menu-bar spam add-subject-to-spam-list]
594 '("add subject to spam list" . rsf-add-subject-to-spam-list))
595 (define-key rmail-summary-mode-map "\C-cSt" 'rsf-add-subject-to-spam-list)
596 (define-key rmail-mode-map "\C-cSt" 'rsf-add-subject-to-spam-list)
598 (defun rsf-add-content-type-field ()
599 "Maintain backward compatibility with previous versions of rmail-spam-filter.
600 The most recent version of rmai-spam-filter checks the contents
601 field of the incoming mail to see if it spam. The format of
602 `rsf-definitions-alist' has therefore changed. This function
603 checks to see if old format is used, and if it is, it converts
604 `rsf-definitions-alist' to the new format. Invoked
605 automatically, no user input is required."
607 (if (and rsf-definitions-alist
608 (not (assoc 'content-type (car rsf-definitions-alist))))
611 (definitions rsf-definitions-alist))
613 (setq current (car definitions))
614 (setq definitions (cdr definitions))
618 (list (assoc 'from current)
620 (assoc 'subject current)
621 (cons 'content-type "")
622 (assoc 'contents current)
623 (assoc 'action current))))))
624 (setq rsf-definitions-alist result)
625 (customize-mark-to-save 'rsf-definitions-alist)
626 (if rsf-autosave-newly-added-definitions
629 (message (concat "converted spam definitions to new format\n"
630 "and saved the spam definitions to file.")))
631 (message (concat "converted spam definitions to new format\n"
632 "Don't forget to save the spam definitions to file using the
636 (provide 'rmail-spam-filter)
638 ;;; arch-tag: 03e1d45d-b72f-4dd7-8f04-e7fd78249746
639 ;;; rmail-spam-fitler ends here