]> code.delx.au - gnu-emacs/blob - lisp/gnus/deuglify.el
Convert consecutive FSF copyright years to ranges.
[gnu-emacs] / lisp / gnus / deuglify.el
1 ;;; deuglify.el --- deuglify broken Outlook (Express) articles
2
3 ;; Copyright (C) 2001-2011
4 ;; Free Software Foundation, Inc.
5
6 ;; Author: Raymond Scholz <rscholz@zonix.de>
7 ;; Thomas Steffen
8 ;; (unwrapping algorithm, based on an idea of Stefan Monnier)
9 ;; Keywords: mail, news
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25
26 ;;; Commentary:
27
28 ;; This file enables Gnus to repair broken citations produced by
29 ;; common user agents like MS Outlook (Express). It may repair
30 ;; articles of other user agents too.
31 ;;
32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
34 ;;
35 ;; Outlook sometimes wraps cited lines before sending a message as
36 ;; seen in this example:
37 ;;
38 ;; Example #1
39 ;; ----------
40 ;;
41 ;; John Doe wrote:
42 ;;
43 ;; > This sentence no verb. This sentence no verb. This sentence
44 ;; no
45 ;; > verb. This sentence no verb. This sentence no verb. This
46 ;; > sentence no verb.
47 ;;
48 ;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those
49 ;; erroneously wrapped lines and will unwrap them. I.e. putting the
50 ;; wrapped parts ("no" in this example) back where they belong (at the
51 ;; end of the cited line above).
52 ;;
53 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
54 ;;
55 ;; Note that some people not only use broken user agents but also
56 ;; practice a bad citation style by omitting blank lines between the
57 ;; cited text and their own text.
58 ;:
59 ;; Example #2
60 ;; ----------
61 ;;
62 ;; John Doe wrote:
63 ;;
64 ;; > This sentence no verb. This sentence no verb. This sentence no
65 ;; You forgot in all your sentences.
66 ;; > verb. This sentence no verb. This sentence no verb. This
67 ;; > sentence no verb.
68 ;;
69 ;; Unwrapping "You forgot in all your sentences." would be invalid as
70 ;; this part wasn't intended to be cited text.
71 ;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting
72 ;; citation line will be of a certain maximum length. You can control
73 ;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also
74 ;; unwrapping will only be done if the line above the (possibly)
75 ;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'.
76 ;;
77 ;; Furthermore no unwrapping will be undertaken if the last character
78 ;; is one of the chars specified in
79 ;; `gnus-outlook-deuglify-unwrap-stop-chars'. Setting this to ".?!"
80 ;; inhibits unwrapping if the cited line ends with a full stop,
81 ;; question mark or exclamation mark. Note that this variable
82 ;; defaults to `nil', triggering a few false positives but generally
83 ;; giving you better results.
84 ;;
85 ;; Unwrapping works on every level of citation. Thus you will be able
86 ;; repair broken citations of broken user agents citing broken
87 ;; citations of broken user agents citing broken citations...
88 ;;
89 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
90 ;;
91 ;; Citations are commonly introduced with an attribution line
92 ;; indicating who wrote the cited text. Outlook adds superfluous
93 ;; information that can be found in the header of the message to this
94 ;; line and often wraps it.
95 ;;
96 ;; If that weren't enough, lots of people write their own text above
97 ;; the cited text and cite the complete original article below.
98 ;;
99 ;; Example #3
100 ;; ----------
101 ;;
102 ;; Hey, John. There's no in all your sentences!
103 ;;
104 ;; John Doe <john.doe@some.domain> wrote in message
105 ;; news:a87usw8$dklsssa$2@some.news.server...
106 ;; > This sentence no verb. This sentence no verb. This sentence
107 ;; no
108 ;; > verb. This sentence no verb. This sentence no verb. This
109 ;; > sentence no verb.
110 ;; >
111 ;; > Bye, John
112 ;;
113 ;; Repairing the attribution line will be done by function
114 ;; `gnus-article-outlook-repair-attribution which calls other function that
115 ;; try to recognize and repair broken attribution lines. See variable
116 ;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be
117 ;; cut off from the beginning of an attribution line and variable
118 ;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are
119 ;; required to be found in an attribution line. These function return
120 ;; the point where the repaired attribution line starts.
121 ;;
122 ;; Rearranging the article so that the cited text appears above the
123 ;; new text will be done by function
124 ;; `gnus-article-outlook-rearrange-citation'. This function calls
125 ;; `gnus-article-outlook-repair-attribution to find and repair an attribution
126 ;; line.
127 ;;
128 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
129 ;;
130 ;; Well, and that's what the message will look like after applying
131 ;; deuglification:
132 ;;
133 ;; Example #3 (deuglified)
134 ;; -----------------------
135 ;;
136 ;; John Doe <john.doe@some.domain> wrote:
137 ;;
138 ;; > This sentence no verb. This sentence no verb. This sentence no
139 ;; > verb. This sentence no verb. This sentence no verb. This
140 ;; > sentence no verb.
141 ;; >
142 ;; > Bye, John
143 ;;
144 ;; Hey, John. There's no in all your sentences!
145 ;;
146 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
147 ;;
148 ;; Usage
149 ;; -----
150 ;;
151 ;; Press `W k' in the Summary Buffer.
152 ;;
153 ;; Non recommended usage :-)
154 ;; ---------------------
155 ;;
156 ;; To automatically invoke deuglification on every article you read,
157 ;; put something like that in your .gnus:
158 ;;
159 ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines)
160 ;;
161 ;; or _one_ of the following lines:
162 ;;
163 ;; ;; repair broken attribution lines
164 ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution)
165 ;;
166 ;; ;; repair broken attribution lines and citations
167 ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation)
168 ;;
169 ;; Note that there always may be some false positives, so I suggest
170 ;; using the manual invocation. After deuglification you may want to
171 ;; refill the whole article using `W w'.
172 ;;
173 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
174 ;;
175 ;; Limitations
176 ;; -----------
177 ;;
178 ;; As I said before there may (or will) be a few false positives on
179 ;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'.
180 ;;
181 ;; `gnus-article-outlook-repair-attribution will only fix the first
182 ;; attribution line found in the article. Furthermore it fixed to
183 ;; certain kinds of attributions. And there may be horribly many
184 ;; false positives, vanishing lines and so on -- so don't trust your
185 ;; eyes. Again I recommend manual invocation.
186 ;;
187 ;; `gnus-article-outlook-rearrange-citation' carries all the limitations of
188 ;; `gnus-article-outlook-repair-attribution.
189 ;;
190 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
191 ;;
192 ;; See ChangeLog for other changes.
193 ;;
194 ;; Revision 1.5 2002/01/27 14:39:17 rscholz
195 ;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit
196 ;; unwrapping if one these chars is first in the possibly wrapped line.
197 ;; * Improved rearranging of the article.
198 ;; * New function `gnus-outlook-repair-attribution-block' for repairing
199 ;; those big "Original Message (following some headers)" attributions.
200 ;;
201 ;; Revision 1.4 2002/01/03 14:05:00 rscholz
202 ;; Renamed `gnus-outlook-deuglify-article' to
203 ;; `gnus-article-outlook-deuglify-article'.
204 ;; Made it easier to deuglify the article while being in Gnus' Article
205 ;; Edit Mode. (suggested by Phil Nitschke)
206 ;;
207 ;;
208 ;; Revision 1.3 2002/01/02 23:35:54 rscholz
209 ;; Fix a bug that caused succeeding long attribution lines to be
210 ;; unwrapped. Minor doc fixes and regular expression tuning.
211 ;;
212 ;; Revision 1.2 2001/12/30 20:14:34 rscholz
213 ;; Clean up source.
214 ;;
215 ;; Revision 1.1 2001/12/30 20:13:32 rscholz
216 ;; Initial revision
217 ;;
218 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
219
220 ;;; Code:
221
222 (require 'gnus-art)
223 (require 'gnus-sum)
224
225 (defconst gnus-outlook-deuglify-version "1.5 Gnus version"
226 "Version of gnus-outlook-deuglify.")
227
228 ;;; User Customizable Variables:
229
230 (defgroup gnus-outlook-deuglify nil
231 "Deuglify articles generated by broken user agents like MS Outlook (Express)."
232 :version "22.1"
233 :group 'gnus)
234
235 (defcustom gnus-outlook-deuglify-unwrap-min 45
236 "Minimum length of the cited line above the (possibly) wrapped line."
237 :version "22.1"
238 :type 'integer
239 :group 'gnus-outlook-deuglify)
240
241 (defcustom gnus-outlook-deuglify-unwrap-max 95
242 "Maximum length of the cited line after unwrapping."
243 :version "22.1"
244 :type 'integer
245 :group 'gnus-outlook-deuglify)
246
247 (defcustom gnus-outlook-deuglify-cite-marks ">|#%"
248 "Characters that indicate cited lines."
249 :version "22.1"
250 :type 'string
251 :group 'gnus-outlook-deuglify)
252
253 (defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil
254 "Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line."
255 :version "22.1"
256 :type '(radio (const :format "None " nil)
257 (string :value ".?!"))
258 :group 'gnus-outlook-deuglify)
259
260 (defcustom gnus-outlook-deuglify-no-wrap-chars "`"
261 "Characters that inhibit unwrapping if they are the first one in the possibly wrapped line."
262 :version "22.1"
263 :type 'string
264 :group 'gnus-outlook-deuglify)
265
266 (defcustom gnus-outlook-deuglify-attrib-cut-regexp
267 "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, "
268 "Regular expression matching the beginning of an attribution line that should be cut off."
269 :version "22.1"
270 :type 'string
271 :group 'gnus-outlook-deuglify)
272
273 (defcustom gnus-outlook-deuglify-attrib-verb-regexp
274 "wrote\\|writes\\|says\\|schrieb\\|schreibt\\|meinte\\|skrev\\|a écrit\\|schreef\\|escribió"
275 "Regular expression matching the verb used in an attribution line."
276 :version "22.1"
277 :type 'string
278 :group 'gnus-outlook-deuglify)
279
280 (defcustom gnus-outlook-deuglify-attrib-end-regexp
281 ": *\\|\\.\\.\\."
282 "Regular expression matching the end of an attribution line."
283 :version "22.1"
284 :type 'string
285 :group 'gnus-outlook-deuglify)
286
287 (defcustom gnus-outlook-display-hook nil
288 "A hook called after an deuglified article has been prepared.
289 It is run after `gnus-article-prepare-hook'."
290 :version "22.1"
291 :type 'hook
292 :group 'gnus-outlook-deuglify)
293
294 ;; Functions
295
296 (defun gnus-outlook-display-article-buffer ()
297 "Redisplay current buffer or article buffer."
298 (with-current-buffer (or gnus-article-buffer (current-buffer))
299 ;; "Emulate" `gnus-article-prepare-display' without calling
300 ;; it. Calling `gnus-article-prepare-display' on an already
301 ;; prepared article removes all MIME parts. I'm unsure whether
302 ;; this is a bug or not.
303 (gnus-article-highlight t)
304 (gnus-treat-article nil)
305 (gnus-run-hooks 'gnus-article-prepare-hook
306 'gnus-outlook-display-hook)))
307
308 ;;;###autoload
309 (defun gnus-article-outlook-unwrap-lines (&optional nodisplay)
310 "Unwrap lines that appear to be wrapped citation lines.
311 You can control what lines will be unwrapped by frobbing
312 `gnus-outlook-deuglify-unwrap-min' and `gnus-outlook-deuglify-unwrap-max',
313 indicating the minimum and maximum length of an unwrapped citation line. If
314 NODISPLAY is non-nil, don't redisplay the article buffer."
315 (interactive "P")
316 (let ((case-fold-search nil)
317 (inhibit-read-only t)
318 (cite-marks gnus-outlook-deuglify-cite-marks)
319 (no-wrap gnus-outlook-deuglify-no-wrap-chars)
320 (stop-chars gnus-outlook-deuglify-unwrap-stop-chars))
321 (gnus-with-article-buffer
322 (article-goto-body)
323 (while (re-search-forward
324 (concat
325 "^\\([ \t" cite-marks "]*\\)"
326 "\\([" cite-marks "].*[^\n " stop-chars "]\\)[ \t]?\n"
327 "\\1\\([^\n " cite-marks no-wrap "]+.*\\)$")
328 nil t)
329 (let ((len12 (- (match-end 2) (match-beginning 1)))
330 (len3 (- (match-end 3) (match-beginning 3))))
331 (when (and (> len12 gnus-outlook-deuglify-unwrap-min)
332 (< (+ len12 len3) gnus-outlook-deuglify-unwrap-max))
333 (replace-match "\\1\\2 \\3")
334 (goto-char (match-beginning 0)))))))
335 (unless nodisplay (gnus-outlook-display-article-buffer)))
336
337 (defun gnus-outlook-rearrange-article (attr-start)
338 "Put the text from ATTR-START to the end of buffer at the top of the article buffer."
339 ;; FIXME: 1. (*) text/plain ( ) text/html
340 (let ((inhibit-read-only t)
341 (cite-marks gnus-outlook-deuglify-cite-marks))
342 (gnus-with-article-buffer
343 (article-goto-body)
344 ;; article does not start with attribution
345 (unless (= (point) attr-start)
346 (gnus-kill-all-overlays)
347 (let ((cur (point))
348 ;; before signature or end of buffer
349 (to (if (gnus-article-search-signature)
350 (point)
351 (point-max))))
352 ;; handle the case where the full quote is below the
353 ;; signature
354 (when (< to attr-start)
355 (setq to (point-max)))
356 (save-excursion
357 (narrow-to-region attr-start to)
358 (goto-char attr-start)
359 (forward-line)
360 (unless (looking-at ">")
361 (message-indent-citation (point) (point-max) 'yank-only)
362 (goto-char (point-max))
363 (newline)
364 (setq to (point-max)))
365 (widen))
366 (transpose-regions cur attr-start attr-start to))))))
367
368 ;; John Doe <john.doe@some.domain> wrote in message
369 ;; news:a87usw8$dklsssa$2@some.news.server...
370
371 (defun gnus-outlook-repair-attribution-outlook ()
372 "Repair a broken attribution line (Outlook)."
373 (let ((case-fold-search nil)
374 (inhibit-read-only t)
375 (cite-marks gnus-outlook-deuglify-cite-marks))
376 (gnus-with-article-buffer
377 (article-goto-body)
378 (when (re-search-forward
379 (concat "^\\([^" cite-marks "].+\\)"
380 "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\)"
381 "\\(.*\n?[^\n" cite-marks "].*\\)?"
382 "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
383 nil t)
384 (gnus-kill-all-overlays)
385 (replace-match "\\1\\2\\4")
386 (match-beginning 0)))))
387
388
389 ;; ----- Original Message -----
390 ;; From: "John Doe" <john.doe@some.domain>
391 ;; To: "Doe Foundation" <info@doefnd.org>
392 ;; Sent: Monday, November 19, 2001 12:13 PM
393 ;; Subject: More Doenuts
394
395 (defun gnus-outlook-repair-attribution-block ()
396 "Repair a big broken attribution block."
397 (let ((case-fold-search nil)
398 (inhibit-read-only t)
399 (cite-marks gnus-outlook-deuglify-cite-marks))
400 (gnus-with-article-buffer
401 (article-goto-body)
402 (when (re-search-forward
403 (concat "^[" cite-marks " \t]*--* ?[^-]+ [^-]+ ?--*\\s *\n"
404 "[^\n:]+:[ \t]*\\([^\n]+\\)\n"
405 "\\([^\n:]+:[ \t]*[^\n]+\n\\)+")
406 nil t)
407 (gnus-kill-all-overlays)
408 (replace-match "\\1 wrote:\n")
409 (match-beginning 0)))))
410
411 ;; On Wed, 16 Jan 2002 23:23:30 +0100, John Doe <john.doe@some.domain> wrote:
412
413 (defun gnus-outlook-repair-attribution-other ()
414 "Repair a broken attribution line (other user agents than Outlook)."
415 (let ((case-fold-search nil)
416 (inhibit-read-only t)
417 (cite-marks gnus-outlook-deuglify-cite-marks))
418 (gnus-with-article-buffer
419 (article-goto-body)
420 (when (re-search-forward
421 (concat "^\\("gnus-outlook-deuglify-attrib-cut-regexp"\\)?"
422 "\\([^" cite-marks "].+\\)\n\\([^\n" cite-marks "].*\\)?"
423 "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\).*"
424 "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
425 nil t)
426 (gnus-kill-all-overlays)
427 (replace-match "\\4 \\5\\6\\7")
428 (match-beginning 0)))))
429
430 ;;;###autoload
431 (defun gnus-article-outlook-repair-attribution (&optional nodisplay)
432 "Repair a broken attribution line.
433 If NODISPLAY is non-nil, don't redisplay the article buffer."
434 (interactive "P")
435 (let ((attrib-start
436 (or
437 (gnus-outlook-repair-attribution-other)
438 (gnus-outlook-repair-attribution-block)
439 (gnus-outlook-repair-attribution-outlook))))
440 (unless nodisplay (gnus-outlook-display-article-buffer))
441 attrib-start))
442
443 (defun gnus-article-outlook-rearrange-citation (&optional nodisplay)
444 "Repair broken citations.
445 If NODISPLAY is non-nil, don't redisplay the article buffer."
446 (interactive "P")
447 (let ((attrib-start (gnus-article-outlook-repair-attribution 'nodisplay)))
448 ;; rearrange citations if an attribution line has been recognized
449 (if attrib-start
450 (gnus-outlook-rearrange-article attrib-start)))
451 (unless nodisplay (gnus-outlook-display-article-buffer)))
452
453 ;;;###autoload
454 (defun gnus-outlook-deuglify-article (&optional nodisplay)
455 "Full deuglify of broken Outlook (Express) articles.
456 Treat dumbquotes, unwrap lines, repair attribution and rearrange citation. If
457 NODISPLAY is non-nil, don't redisplay the article buffer."
458 (interactive "P")
459 ;; apply treatment of dumb quotes
460 (gnus-article-treat-dumbquotes)
461 ;; repair wrapped cited lines
462 (gnus-article-outlook-unwrap-lines 'nodisplay)
463 ;; repair attribution line and rearrange citation.
464 (gnus-article-outlook-rearrange-citation 'nodisplay)
465 (unless nodisplay (gnus-outlook-display-article-buffer)))
466
467 ;;;###autoload
468 (defun gnus-article-outlook-deuglify-article ()
469 "Deuglify broken Outlook (Express) articles and redisplay."
470 (interactive)
471 (gnus-outlook-deuglify-article nil))
472
473 (provide 'deuglify)
474
475 ;; Local Variables:
476 ;; coding: iso-8859-1
477 ;; End:
478
479 ;;; deuglify.el ends here