1 ;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike
3 ;; Copyright (C) 1986, 1987, 1990, 2001, 2002, 2003, 2004,
4 ;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
6 ;; Author: Richard Mlynarik <mly@eddie.mit.edu>
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 3, or (at your option)
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 ;; Boston, MA 02110-1301, USA.
29 ;; Support functions for parsing RFC-822 headers, used by mail and news
34 (defvar rfc822-address-start)
36 ;; uses rfc822-address-start free, throws to address
37 (defun rfc822-bad-address (reason)
40 (narrow-to-region rfc822-address-start
41 (if (re-search-forward "[,;]" nil t)
42 (max (point-min) (1- (point)))
44 ;; make the error string be suitable for inclusion in (...)
45 (let ((losers '("\\" "(" ")" "\n")))
47 (goto-char (point-min))
48 (while (search-forward (car losers) nil t)
52 (setq losers (cdr losers))))
53 (goto-char (point-min)) (insert "(Unparsable address -- "
56 (goto-char (point-max)) (insert "\")"))
57 (rfc822-nuke-whitespace)
58 (throw 'address (buffer-substring rfc822-address-start (point))))
60 (defun rfc822-nuke-whitespace (&optional leave-space)
64 ((= (setq ch (following-char)) ?\()
67 (rfc822-bad-address "Unbalanced comment (...)")
68 (/= (setq ch (following-char)) ?\)))
69 (cond ((looking-at "[^()\\]+")
72 (rfc822-nuke-whitespace))
73 ((< (point) (1- (point-max)))
76 (rfc822-bad-address "orphaned backslash"))))
77 ;; delete remaining "()"
81 ((memq ch '(?\ ?\t ?\n))
82 (delete-region (point)
83 (progn (skip-chars-forward " \t\n") (point)))
90 (= (preceding-char) ?\ )
93 (defun rfc822-looking-at (regex &optional leave-space)
94 (if (cond ((stringp regex)
95 (if (looking-at regex)
96 (progn (goto-char (match-end 0))
100 (= (following-char) regex))
101 (progn (forward-char 1)
103 (let ((tem (match-data)))
104 (rfc822-nuke-whitespace leave-space)
108 (defun rfc822-snarf-word ()
109 ;; word is atom | quoted-string
110 (cond ((= (following-char) ?\")
112 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"")
113 (rfc822-bad-address "Unterminated quoted string")))
114 ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+")
118 (rfc822-bad-address "Rubbish in address"))))
120 (defun rfc822-snarf-words ()
122 (while (rfc822-looking-at ?.)
123 (rfc822-snarf-word)))
125 (defun rfc822-snarf-subdomain ()
126 ;; sub-domain is domain-ref | domain-literal
127 (cond ((= (following-char) ?\[)
129 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]")
130 (rfc822-bad-address "Unterminated domain literal [...]")))
131 ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+")
132 ;; domain-literal = atom
135 (rfc822-bad-address "Rubbish in host/domain specification"))))
137 (defun rfc822-snarf-domain ()
138 (rfc822-snarf-subdomain)
139 (while (rfc822-looking-at ?.)
140 (rfc822-snarf-subdomain)))
142 (defun rfc822-snarf-frob-list (name separator terminator snarfer
149 (format "End of addresses in middle of %s" name)))
150 ((rfc822-looking-at terminator)
152 ((rfc822-looking-at separator)
153 ;; multiple separators are allowed and do nothing.
154 (while (rfc822-looking-at separator))
160 (format "Gubbish in middle of %s" name))))
161 (setq tem (funcall snarfer)
164 (setq list (if (listp tem)
165 (nconc (reverse tem) list)
169 ;; return either an address (a string) or a list of addresses
170 (defun rfc822-addresses-1 (&optional allow-groups)
171 ;; Looking for an rfc822 `address'
172 ;; Either a group (1*word ":" [#mailbox] ";")
173 ;; or a mailbox (addr-spec | 1*word route-addr)
174 ;; addr-spec is (local-part "@" domain)
175 ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">")
176 ;; local-part is (word *("." word))
177 ;; word is (atom | quoted-string)
178 ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)")
179 ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+
180 ;; domain is sub-domain *("." sub-domain)
181 ;; sub-domain is domain-ref | domain-literal
182 ;; domain-literal is "[" *(dtext | quoted-pair) "]"
183 ;; dtext is "[^][\\n"
184 ;; domain-ref is atom
185 (let ((rfc822-address-start (point))
188 ;; optimize common cases:
191 ;; followed by "\\'\\|,\\|([^()\\]*)\\'"
192 ;; other common cases are:
193 ;; foo bar <foo.bar@baz.zap>
194 ;; "foo bar" <foo.bar@baz.zap>
195 ;; those aren't hacked yet.
196 (if (and (rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037 ()<>@,;:\\\"]+\\)" t)
198 (rfc822-looking-at ?,))))
200 ;; rfc822-looking-at may have inserted a space
201 (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1))
202 ;; relying on the fact that rfc822-looking-at <char>
203 ;; doesn't mung match-data
204 (throw 'address (buffer-substring rfc822-address-start (match-end 0)))))
205 (goto-char rfc822-address-start)
207 (cond ((and (= n 1) (rfc822-looking-at ?@))
209 (rfc822-snarf-domain)
211 (buffer-substring rfc822-address-start (point))))
212 ((rfc822-looking-at ?:)
213 (cond ((not allow-groups)
214 (rfc822-bad-address "A group name may not appear here"))
216 (rfc822-bad-address "No name for :...; group")))
219 ;; return a list of addresses
220 (rfc822-snarf-frob-list ":...; group" ?\, ?\;
221 'rfc822-addresses-1 t)))
222 ((rfc822-looking-at ?<)
223 (let ((start (point))
225 (cond ((rfc822-looking-at ?>)
228 ((and (not (eobp)) (= (following-char) ?\@))
229 ;; <@foo.bar,@baz:quux@abcd.efg>
230 (rfc822-snarf-frob-list "<...> address" ?\, ?\:
232 (if (rfc822-looking-at ?\@)
233 (rfc822-snarf-domain)
235 "Gubbish in route-addr")))))
237 (or (rfc822-looking-at ?@)
238 (rfc822-bad-address "Malformed <..@..> address"))
239 (rfc822-snarf-domain)
241 ((progn (rfc822-snarf-words) (rfc822-looking-at ?@))
242 ; allow <foo> (losing unix seems to do this)
243 (rfc822-snarf-domain)))
245 (if (rfc822-looking-at ?\>)
247 (buffer-substring (if strip start (1- start))
248 (if strip end (1+ end))))
249 (rfc822-bad-address "Unterminated <...> address")))))
250 ((looking-at "[^][\000-\037 ()<>@,;:\\.]")
251 ;; this allows "." to be part of the words preceding
252 ;; an addr-spec, since many broken mailers output
253 ;; "Hern K. Herklemeyer III
254 ;; <yank@megadeath.dod.gods-own-country>"
257 (or (= n 0) (bobp) (= (preceding-char) ?\ )
261 (setq again (or (rfc822-looking-at ?.)
262 (looking-at "[^][\000-\037 ()<>@,;:\\.]"))))))
264 (throw 'address nil))
265 ((= n 1) ; allow "foo" (losing unix seems to do this)
267 (buffer-substring rfc822-address-start (point))))
269 (rfc822-bad-address "Missing comma between addresses or badly-formatted address"))
270 ((or (eobp) (= (following-char) ?,))
271 (rfc822-bad-address "Missing comma or route-spec"))
273 (rfc822-bad-address "Strange character or missing comma")))))))
276 (defun rfc822-addresses (header-text)
277 (if (string-match "\\`[ \t]*\\([^][\000-\037 ()<>@,;:\\\".]+\\)[ \t]*\\'"
279 ;; Make very simple case moderately fast.
280 (list (substring header-text (match-beginning 1) (match-end 1)))
281 (let ((buf (generate-new-buffer " rfc822")))
285 (make-local-variable 'case-fold-search)
286 (setq case-fold-search nil) ;For speed(?)
288 ;; unfold continuation lines
289 (goto-char (point-min))
291 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
292 (replace-match "\\1 " t))
294 (goto-char (point-min))
297 rfc822-address-start); this is for rfc822-bad-address
298 (rfc822-nuke-whitespace)
300 (setq rfc822-address-start (point))
302 (catch 'address ; this is for rfc822-bad-address
303 (cond ((rfc822-looking-at ?\,)
305 ((looking-at "[][\000-\037@;:\\.>)]")
308 (format "Strange character \\%c found"
311 (rfc822-addresses-1 t)))))
314 (setq list (cons tem list)))
316 (setq list (nconc (nreverse tem) list)))))
318 (and buf (kill-buffer buf))))))
322 ;; arch-tag: 5d388a24-e173-40fb-9b8e-85269de44b37
323 ;;; rfc822.el ends here