1 ;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike
3 ;; Copyright (C) 1986-1987, 1990, 2001-2014 Free Software Foundation,
6 ;; Author: Richard Mlynarik <mly@eddie.mit.edu>
7 ;; Maintainer: emacs-devel@gnu.org
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27 ;; Support functions for parsing RFC-822 headers, used by mail and news
32 (defvar rfc822-address-start)
34 ;; uses rfc822-address-start free, throws to address
35 (defun rfc822-bad-address (reason)
38 (narrow-to-region rfc822-address-start
39 (if (re-search-forward "[,;]" nil t)
40 (max (point-min) (1- (point)))
42 ;; make the error string be suitable for inclusion in (...)
43 (let ((losers '("\\" "(" ")" "\n")))
45 (goto-char (point-min))
46 (while (search-forward (car losers) nil t)
50 (setq losers (cdr losers))))
51 (goto-char (point-min)) (insert "(Unparsable address -- "
54 (goto-char (point-max)) (insert "\")"))
55 (rfc822-nuke-whitespace)
56 (throw 'address (buffer-substring rfc822-address-start (point))))
58 (defun rfc822-nuke-whitespace (&optional leave-space)
62 ((= (setq ch (following-char)) ?\()
65 (rfc822-bad-address "Unbalanced comment (...)")
66 (/= (setq ch (following-char)) ?\)))
67 (cond ((looking-at "[^()\\]+")
70 (rfc822-nuke-whitespace))
71 ((< (point) (1- (point-max)))
74 (rfc822-bad-address "orphaned backslash"))))
75 ;; delete remaining "()"
79 ((memq ch '(?\ ?\t ?\n))
80 (delete-region (point)
81 (progn (skip-chars-forward " \t\n") (point)))
88 (= (preceding-char) ?\ )
91 (defun rfc822-looking-at (regex &optional leave-space)
92 (if (cond ((stringp regex)
93 (if (looking-at regex)
94 (progn (goto-char (match-end 0))
98 (= (following-char) regex))
99 (progn (forward-char 1)
101 (let ((tem (match-data)))
102 (rfc822-nuke-whitespace leave-space)
106 (defun rfc822-snarf-word ()
107 ;; word is atom | quoted-string
108 (cond ((= (following-char) ?\")
110 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"")
111 (rfc822-bad-address "Unterminated quoted string")))
112 ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+")
116 (rfc822-bad-address "Rubbish in address"))))
118 (defun rfc822-snarf-words ()
120 (while (rfc822-looking-at ?.)
121 (rfc822-snarf-word)))
123 (defun rfc822-snarf-subdomain ()
124 ;; sub-domain is domain-ref | domain-literal
125 (cond ((= (following-char) ?\[)
127 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]")
128 (rfc822-bad-address "Unterminated domain literal [...]")))
129 ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+")
130 ;; domain-literal = atom
133 (rfc822-bad-address "Rubbish in host/domain specification"))))
135 (defun rfc822-snarf-domain ()
136 (rfc822-snarf-subdomain)
137 (while (rfc822-looking-at ?.)
138 (rfc822-snarf-subdomain)))
140 (defun rfc822-snarf-frob-list (name separator terminator snarfer
147 (format "End of addresses in middle of %s" name)))
148 ((rfc822-looking-at terminator)
150 ((rfc822-looking-at separator)
151 ;; multiple separators are allowed and do nothing.
152 (while (rfc822-looking-at separator))
158 (format "Gubbish in middle of %s" name))))
159 (setq tem (funcall snarfer)
162 (setq list (if (listp tem)
163 (nconc (reverse tem) list)
167 ;; return either an address (a string) or a list of addresses
168 (defun rfc822-addresses-1 (&optional allow-groups)
169 ;; Looking for an rfc822 `address'
170 ;; Either a group (1*word ":" [#mailbox] ";")
171 ;; or a mailbox (addr-spec | 1*word route-addr)
172 ;; addr-spec is (local-part "@" domain)
173 ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">")
174 ;; local-part is (word *("." word))
175 ;; word is (atom | quoted-string)
176 ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)")
177 ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+
178 ;; domain is sub-domain *("." sub-domain)
179 ;; sub-domain is domain-ref | domain-literal
180 ;; domain-literal is "[" *(dtext | quoted-pair) "]"
181 ;; dtext is "[^][\\n"
182 ;; domain-ref is atom
183 (let ((rfc822-address-start (point))
186 ;; optimize common cases:
189 ;; followed by "\\'\\|,\\|([^()\\]*)\\'"
190 ;; other common cases are:
191 ;; foo bar <foo.bar@baz.zap>
192 ;; "foo bar" <foo.bar@baz.zap>
193 ;; those aren't hacked yet.
194 (if (and (rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037 ()<>@,;:\\\"]+\\)" t)
196 (rfc822-looking-at ?,))))
198 ;; rfc822-looking-at may have inserted a space
199 (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1))
200 ;; relying on the fact that rfc822-looking-at <char>
201 ;; doesn't mung match-data
202 (throw 'address (buffer-substring rfc822-address-start (match-end 0)))))
203 (goto-char rfc822-address-start)
205 (cond ((and (= n 1) (rfc822-looking-at ?@))
207 (rfc822-snarf-domain)
209 (buffer-substring rfc822-address-start (point))))
210 ((rfc822-looking-at ?:)
211 (cond ((not allow-groups)
212 (rfc822-bad-address "A group name may not appear here"))
214 (rfc822-bad-address "No name for :...; group")))
217 ;; return a list of addresses
218 (rfc822-snarf-frob-list ":...; group" ?\, ?\;
219 'rfc822-addresses-1 t)))
220 ((rfc822-looking-at ?<)
221 (let ((start (point))
223 (cond ((rfc822-looking-at ?>)
226 ((and (not (eobp)) (= (following-char) ?\@))
227 ;; <@foo.bar,@baz:quux@abcd.efg>
228 (rfc822-snarf-frob-list "<...> address" ?\, ?\:
230 (if (rfc822-looking-at ?\@)
231 (rfc822-snarf-domain)
233 "Gubbish in route-addr")))))
235 (or (rfc822-looking-at ?@)
236 (rfc822-bad-address "Malformed <..@..> address"))
237 (rfc822-snarf-domain)
239 ((progn (rfc822-snarf-words) (rfc822-looking-at ?@))
240 ; allow <foo> (losing unix seems to do this)
241 (rfc822-snarf-domain)))
243 (if (rfc822-looking-at ?\>)
245 (buffer-substring (if strip start (1- start))
246 (if strip end (1+ end))))
247 (rfc822-bad-address "Unterminated <...> address")))))
248 ((looking-at "[^][\000-\037 ()<>@,;:\\.]")
249 ;; this allows "." to be part of the words preceding
250 ;; an addr-spec, since many broken mailers output
251 ;; "Hern K. Herklemeyer III
252 ;; <yank@megadeath.dod.gods-own-country>"
255 (or (= n 0) (bobp) (= (preceding-char) ?\ )
259 (setq again (or (rfc822-looking-at ?.)
260 (looking-at "[^][\000-\037 ()<>@,;:\\.]"))))))
262 (throw 'address nil))
263 ((= n 1) ; allow "foo" (losing unix seems to do this)
265 (buffer-substring rfc822-address-start (point))))
267 (rfc822-bad-address "Missing comma between addresses or badly-formatted address"))
268 ((or (eobp) (= (following-char) ?,))
269 (rfc822-bad-address "Missing comma or route-spec"))
271 (rfc822-bad-address "Strange character or missing comma")))))))
274 (defun rfc822-addresses (header-text)
275 (if (string-match "\\`[ \t]*\\([^][\000-\037 ()<>@,;:\\\".]+\\)[ \t]*\\'"
277 ;; Make very simple case moderately fast.
278 (list (substring header-text (match-beginning 1) (match-end 1)))
279 (let ((buf (generate-new-buffer " rfc822")))
281 (with-current-buffer buf
282 (make-local-variable 'case-fold-search)
283 (setq case-fold-search nil) ;For speed(?)
285 ;; unfold continuation lines
286 (goto-char (point-min))
288 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]"
290 (replace-match "\\1 " t))
292 (goto-char (point-min))
293 ;; Give `rfc822-address-start' a non-nil initial value to
294 ;; prevent `rfc822-bad-address' from raising a
295 ;; `wrong-type-argument' error.
296 (let* ((rfc822-address-start (point))
300 ;; Note that `rfc822-nuke-whitespace' and
301 ;; `rfc822-looking-at' can throw.
302 (rfc822-nuke-whitespace)
304 (setq rfc822-address-start (point))
306 (cond ((rfc822-looking-at ?\,)
308 ((looking-at "[][\000-\037@;:\\.>)]")
310 (catch 'address ; For rfc822-bad-address
312 (format "Strange character \\%c found"
315 (rfc822-addresses-1 t))))
318 (setq list (cons tem list)))
320 (setq list (nconc (nreverse tem) list)))))
322 (nreverse (append (if err (list err)) list))))
323 (and buf (kill-buffer buf))))))
327 ;;; rfc822.el ends here