1 ;; Hairy rfc822 parser for mail and news and suchlike
2 ;; Copyright (C) 1986-1990 Free Software Foundation, Inc.
3 ;; Author Richard Mlynarik.
5 ;; This file is part of GNU Emacs.
7 ;; GNU Emacs is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 1, or (at your option)
12 ;; GNU Emacs is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GNU Emacs; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 ;; uses address-start free, throws to address
24 (defun rfc822-bad-address (reason)
27 (narrow-to-region address-start
28 (if (re-search-forward "[,;]" nil t)
29 (max (point-min) (1- (point)))
31 ;; make the error string be suitable for inclusion in (...)
32 (let ((losers '("\\" "(" ")" "\n")))
34 (goto-char (point-min))
35 (while (search-forward (car losers) nil t)
39 (setq losers (cdr losers))))
40 (goto-char (point-min)) (insert "(Unparsable address -- "
43 (goto-char (point-max)) (insert "\")"))
44 (rfc822-nuke-whitespace)
45 (throw 'address (buffer-substring address-start (point))))
47 (defun rfc822-nuke-whitespace (&optional leave-space)
51 ((= (setq ch (following-char)) ?\()
54 (rfc822-bad-address "Unbalanced comment (...)")
55 (/= (setq ch (following-char)) ?\)))
56 (cond ((looking-at "[^()\\]+")
59 (rfc822-nuke-whitespace))
60 ((< (point) (1- (point-max)))
63 (rfc822-bad-address "orphaned backslash"))))
64 ;; delete remaining "()"
68 ((memq ch '(?\ ?\t ?\n))
69 (delete-region (point)
70 (progn (skip-chars-forward " \t\n") (point)))
77 (= (preceding-char) ?\ )
80 (defun rfc822-looking-at (regex &optional leave-space)
81 (if (cond ((stringp regex)
82 (if (looking-at regex)
83 (progn (goto-char (match-end 0))
87 (= (following-char) regex))
88 (progn (forward-char 1)
90 (let ((tem (match-data)))
91 (rfc822-nuke-whitespace leave-space)
92 (store-match-data tem)
95 (defun rfc822-snarf-word ()
96 ;; word is atom | quoted-string
97 (cond ((= (following-char) ?\")
99 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"")
100 (rfc822-bad-address "Unterminated quoted string")))
101 ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
105 (rfc822-bad-address "Rubbish in address"))))
107 (defun rfc822-snarf-words ()
109 (while (rfc822-looking-at ?.)
110 (rfc822-snarf-word)))
112 (defun rfc822-snarf-subdomain ()
113 ;; sub-domain is domain-ref | domain-literal
114 (cond ((= (following-char) ?\[)
116 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]")
117 (rfc822-bad-address "Unterminated domain literal [...]")))
118 ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
119 ;; domain-literal = atom
122 (rfc822-bad-address "Rubbish in host/domain specification"))))
124 (defun rfc822-snarf-domain ()
125 (rfc822-snarf-subdomain)
126 (while (rfc822-looking-at ?.)
127 (rfc822-snarf-subdomain)))
129 (defun rfc822-snarf-frob-list (name separator terminator snarfer
136 (format "End of addresses in middle of %s" name)))
137 ((rfc822-looking-at terminator)
139 ((rfc822-looking-at separator)
140 ;; multiple separators are allowed and do nothing.
141 (while (rfc822-looking-at separator))
147 (format "Gubbish in middle of %s" name))))
148 (setq tem (funcall snarfer)
151 (setq list (if (listp tem)
152 (nconc (reverse tem) list)
156 ;; return either an address (a string) or a list of addresses
157 (defun rfc822-addresses-1 (&optional allow-groups)
158 ;; Looking for an rfc822 `address'
159 ;; Either a group (1*word ":" [#mailbox] ";")
160 ;; or a mailbox (addr-spec | 1*word route-addr)
161 ;; addr-spec is (local-part "@" domain)
162 ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">")
163 ;; local-part is (word *("." word))
164 ;; word is (atom | quoted-string)
165 ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)")
166 ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+
167 ;; domain is sub-domain *("." sub-domain)
168 ;; sub-domain is domain-ref | domain-literal
169 ;; domain-literal is "[" *(dtext | quoted-pair) "]"
170 ;; dtext is "[^][\\n"
171 ;; domain-ref is atom
172 (let ((address-start (point))
175 ;; optimize common cases:
178 ;; followed by "\\'\\|,\\|([^()\\]*)\\'"
179 ;; other common cases are:
180 ;; foo bar <foo.bar@baz.zap>
181 ;; "foo bar" <foo.bar@baz.zap>
182 ;; those aren't hacked yet.
183 (if (and (rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\)" t)
185 (rfc822-looking-at ?,))))
187 ;; rfc822-looking-at may have inserted a space
188 (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1))
189 ;; relying on the fact that rfc822-looking-at <char>
190 ;; doesn't mung match-data
191 (throw 'address (buffer-substring address-start (match-end 0)))))
192 (goto-char address-start)
194 (cond ((and (= n 1) (rfc822-looking-at ?@))
196 (rfc822-snarf-domain)
198 (buffer-substring address-start (point))))
199 ((rfc822-looking-at ?:)
200 (cond ((not allow-groups)
201 (rfc822-bad-address "A group name may not appear here"))
203 (rfc822-bad-address "No name for :...; group")))
206 ;; return a list of addresses
207 (rfc822-snarf-frob-list ":...; group" ?\, ?\;
208 'rfc822-addresses-1 t)))
209 ((rfc822-looking-at ?<)
210 (let ((start (point))
212 (cond ((rfc822-looking-at ?>)
215 ((and (not (eobp)) (= (following-char) ?\@))
216 ;; <@foo.bar,@baz:quux@abcd.efg>
217 (rfc822-snarf-frob-list "<...> address" ?\, ?\:
219 (if (rfc822-looking-at ?\@)
220 (rfc822-snarf-domain)
222 "Gubbish in route-addr")))))
224 (or (rfc822-looking-at ?@)
225 (rfc822-bad-address "Malformed <..@..> address"))
226 (rfc822-snarf-domain)
228 ((progn (rfc822-snarf-words) (rfc822-looking-at ?@))
229 ; allow <foo> (losing unix seems to do this)
230 (rfc822-snarf-domain)))
232 (if (rfc822-looking-at ?\>)
234 (buffer-substring (if strip start (1- start))
235 (if strip end (1+ end))))
236 (rfc822-bad-address "Unterminated <...> address")))))
237 ((looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]")
238 ;; this allows "." to be part of the words preceding
239 ;; an addr-spec, since many broken mailers output
240 ;; "Hern K. Herklemeyer III
241 ;; <yank@megadeath.dod.gods-own-country>"
244 (or (= n 0) (bobp) (= (preceding-char) ?\ )
248 (setq again (or (rfc822-looking-at ?.)
249 (looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]"))))))
251 (throw 'address nil))
252 ((= n 1) ; allow "foo" (losing unix seems to do this)
254 (buffer-substring address-start (point))))
256 (rfc822-bad-address "Missing comma between addresses or badly-formatted address"))
257 ((or (eobp) (= (following-char) ?,))
258 (rfc822-bad-address "Missing comma or route-spec"))
260 (rfc822-bad-address "Strange character or missing comma")))))))
263 (defun rfc822-addresses (header-text)
264 (if (string-match "\\`[ \t]*\\([^][\000-\037\177-\377 ()<>@,;:\\\".]+\\)[ \t]*\\'"
266 ;; Make very simple case moderately fast.
267 (list (substring header-text (match-beginning 1) (match-end 1)))
268 (let ((buf (generate-new-buffer " rfc822")))
272 (make-local-variable 'case-fold-search)
273 (setq case-fold-search nil) ;For speed(?)
275 ;; unfold continuation lines
276 (goto-char (point-min))
278 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
279 (replace-match "\\1 " t))
281 (goto-char (point-min))
282 (rfc822-nuke-whitespace)
285 address-start); this is for rfc822-bad-address
287 (setq address-start (point))
289 (catch 'address ; this is for rfc822-bad-address
290 (cond ((rfc822-looking-at ?\,)
292 ((looking-at "[][\000-\037\177-\377@;:\\.>)]")
295 (format "Strange character \\%c found"
298 (rfc822-addresses-1 t)))))
301 (setq list (cons tem list)))
303 (setq list (nconc (nreverse tem) list)))))
305 (and buf (kill-buffer buf))))))