1 ;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike
3 ;; Copyright (C) 1986, 87, 1990 Free Software Foundation, Inc.
5 ;; Author: Richard Mlynarik <mly@eddie.mit.edu>
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
28 ;; Support functions for parsing RFC-822 headers, used by mail and news
33 ;; uses address-start free, throws to address
34 (defun rfc822-bad-address (reason)
37 (narrow-to-region address-start
38 (if (re-search-forward "[,;]" nil t)
39 (max (point-min) (1- (point)))
41 ;; make the error string be suitable for inclusion in (...)
42 (let ((losers '("\\" "(" ")" "\n")))
44 (goto-char (point-min))
45 (while (search-forward (car losers) nil t)
49 (setq losers (cdr losers))))
50 (goto-char (point-min)) (insert "(Unparsable address -- "
53 (goto-char (point-max)) (insert "\")"))
54 (rfc822-nuke-whitespace)
55 (throw 'address (buffer-substring address-start (point))))
57 (defun rfc822-nuke-whitespace (&optional leave-space)
61 ((= (setq ch (following-char)) ?\()
64 (rfc822-bad-address "Unbalanced comment (...)")
65 (/= (setq ch (following-char)) ?\)))
66 (cond ((looking-at "[^()\\]+")
69 (rfc822-nuke-whitespace))
70 ((< (point) (1- (point-max)))
73 (rfc822-bad-address "orphaned backslash"))))
74 ;; delete remaining "()"
78 ((memq ch '(?\ ?\t ?\n))
79 (delete-region (point)
80 (progn (skip-chars-forward " \t\n") (point)))
87 (= (preceding-char) ?\ )
90 (defun rfc822-looking-at (regex &optional leave-space)
91 (if (cond ((stringp regex)
92 (if (looking-at regex)
93 (progn (goto-char (match-end 0))
97 (= (following-char) regex))
98 (progn (forward-char 1)
100 (let ((tem (match-data)))
101 (rfc822-nuke-whitespace leave-space)
105 (defun rfc822-snarf-word ()
106 ;; word is atom | quoted-string
107 (cond ((= (following-char) ?\")
109 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"")
110 (rfc822-bad-address "Unterminated quoted string")))
111 ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+")
115 (rfc822-bad-address "Rubbish in address"))))
117 (defun rfc822-snarf-words ()
119 (while (rfc822-looking-at ?.)
120 (rfc822-snarf-word)))
122 (defun rfc822-snarf-subdomain ()
123 ;; sub-domain is domain-ref | domain-literal
124 (cond ((= (following-char) ?\[)
126 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]")
127 (rfc822-bad-address "Unterminated domain literal [...]")))
128 ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+")
129 ;; domain-literal = atom
132 (rfc822-bad-address "Rubbish in host/domain specification"))))
134 (defun rfc822-snarf-domain ()
135 (rfc822-snarf-subdomain)
136 (while (rfc822-looking-at ?.)
137 (rfc822-snarf-subdomain)))
139 (defun rfc822-snarf-frob-list (name separator terminator snarfer
146 (format "End of addresses in middle of %s" name)))
147 ((rfc822-looking-at terminator)
149 ((rfc822-looking-at separator)
150 ;; multiple separators are allowed and do nothing.
151 (while (rfc822-looking-at separator))
157 (format "Gubbish in middle of %s" name))))
158 (setq tem (funcall snarfer)
161 (setq list (if (listp tem)
162 (nconc (reverse tem) list)
166 ;; return either an address (a string) or a list of addresses
167 (defun rfc822-addresses-1 (&optional allow-groups)
168 ;; Looking for an rfc822 `address'
169 ;; Either a group (1*word ":" [#mailbox] ";")
170 ;; or a mailbox (addr-spec | 1*word route-addr)
171 ;; addr-spec is (local-part "@" domain)
172 ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">")
173 ;; local-part is (word *("." word))
174 ;; word is (atom | quoted-string)
175 ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)")
176 ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+
177 ;; domain is sub-domain *("." sub-domain)
178 ;; sub-domain is domain-ref | domain-literal
179 ;; domain-literal is "[" *(dtext | quoted-pair) "]"
180 ;; dtext is "[^][\\n"
181 ;; domain-ref is atom
182 (let ((address-start (point))
185 ;; optimize common cases:
188 ;; followed by "\\'\\|,\\|([^()\\]*)\\'"
189 ;; other common cases are:
190 ;; foo bar <foo.bar@baz.zap>
191 ;; "foo bar" <foo.bar@baz.zap>
192 ;; those aren't hacked yet.
193 (if (and (rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037 ()<>@,;:\\\"]+\\)" t)
195 (rfc822-looking-at ?,))))
197 ;; rfc822-looking-at may have inserted a space
198 (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1))
199 ;; relying on the fact that rfc822-looking-at <char>
200 ;; doesn't mung match-data
201 (throw 'address (buffer-substring address-start (match-end 0)))))
202 (goto-char address-start)
204 (cond ((and (= n 1) (rfc822-looking-at ?@))
206 (rfc822-snarf-domain)
208 (buffer-substring address-start (point))))
209 ((rfc822-looking-at ?:)
210 (cond ((not allow-groups)
211 (rfc822-bad-address "A group name may not appear here"))
213 (rfc822-bad-address "No name for :...; group")))
216 ;; return a list of addresses
217 (rfc822-snarf-frob-list ":...; group" ?\, ?\;
218 'rfc822-addresses-1 t)))
219 ((rfc822-looking-at ?<)
220 (let ((start (point))
222 (cond ((rfc822-looking-at ?>)
225 ((and (not (eobp)) (= (following-char) ?\@))
226 ;; <@foo.bar,@baz:quux@abcd.efg>
227 (rfc822-snarf-frob-list "<...> address" ?\, ?\:
229 (if (rfc822-looking-at ?\@)
230 (rfc822-snarf-domain)
232 "Gubbish in route-addr")))))
234 (or (rfc822-looking-at ?@)
235 (rfc822-bad-address "Malformed <..@..> address"))
236 (rfc822-snarf-domain)
238 ((progn (rfc822-snarf-words) (rfc822-looking-at ?@))
239 ; allow <foo> (losing unix seems to do this)
240 (rfc822-snarf-domain)))
242 (if (rfc822-looking-at ?\>)
244 (buffer-substring (if strip start (1- start))
245 (if strip end (1+ end))))
246 (rfc822-bad-address "Unterminated <...> address")))))
247 ((looking-at "[^][\000-\037 ()<>@,;:\\.]")
248 ;; this allows "." to be part of the words preceding
249 ;; an addr-spec, since many broken mailers output
250 ;; "Hern K. Herklemeyer III
251 ;; <yank@megadeath.dod.gods-own-country>"
254 (or (= n 0) (bobp) (= (preceding-char) ?\ )
258 (setq again (or (rfc822-looking-at ?.)
259 (looking-at "[^][\000-\037 ()<>@,;:\\.]"))))))
261 (throw 'address nil))
262 ((= n 1) ; allow "foo" (losing unix seems to do this)
264 (buffer-substring address-start (point))))
266 (rfc822-bad-address "Missing comma between addresses or badly-formatted address"))
267 ((or (eobp) (= (following-char) ?,))
268 (rfc822-bad-address "Missing comma or route-spec"))
270 (rfc822-bad-address "Strange character or missing comma")))))))
273 (defun rfc822-addresses (header-text)
274 (if (string-match "\\`[ \t]*\\([^][\000-\037 ()<>@,;:\\\".]+\\)[ \t]*\\'"
276 ;; Make very simple case moderately fast.
277 (list (substring header-text (match-beginning 1) (match-end 1)))
278 (let ((buf (generate-new-buffer " rfc822")))
282 (make-local-variable 'case-fold-search)
283 (setq case-fold-search nil) ;For speed(?)
285 ;; unfold continuation lines
286 (goto-char (point-min))
288 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
289 (replace-match "\\1 " t))
291 (goto-char (point-min))
292 (rfc822-nuke-whitespace)
295 address-start); this is for rfc822-bad-address
297 (setq address-start (point))
299 (catch 'address ; this is for rfc822-bad-address
300 (cond ((rfc822-looking-at ?\,)
302 ((looking-at "[][\000-\037@;:\\.>)]")
305 (format "Strange character \\%c found"
308 (rfc822-addresses-1 t)))))
311 (setq list (cons tem list)))
313 (setq list (nconc (nreverse tem) list)))))
315 (and buf (kill-buffer buf))))))
319 ;;; rfc822.el ends here