-;;; mail-extr.el --- extract full name and address from RFC 822 mail header.
+;;; mail-extr.el --- extract full name and address from RFC 822 mail header
-;; Copyright (C) 1992 Free Software Foundation, Inc.
+;; Copyright (C) 1991, 1992, 1993, 1994, 1997, 2001
+;; Free Software Foundation, Inc.
;; Author: Joe Wells <jbw@cs.bu.edu>
-;; Version: 1.0
-;; Adapted-By: ESR
+;; Maintainer: FSF
;; Keywords: mail
;; This file is part of GNU Emacs.
;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 1, or (at your option)
+;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to
-;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+;; along with GNU Emacs; see the file COPYING. If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
;;; Commentary:
-;; Here is `mail-extr', a package for extracting full names and canonical
-;; addresses from RFC 822 mail headers. It is intended to be hooked into
-;; other Emacs Lisp packages that deal with RFC 822 format messages, such as
-;; Gnews, GNUS, RMAIL, MH-E, BBDB, VM, Supercite, etc. Thus, this release is
-;; mainly for Emacs Lisp developers.
-
+;; The entry point of this code is
+;;
+;; mail-extract-address-components: (address &optional all)
+;;
+;; Given an RFC-822 ADDRESS, extract full name and canonical address.
+;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
+;; If no name can be extracted, FULL-NAME will be nil.
+;; ADDRESS may be a string or a buffer. If it is a buffer, the visible
+;; (narrowed) portion of the buffer will be interpreted as the address.
+;; (This feature exists so that the clever caller might be able to avoid
+;; consing a string.)
+;; If ADDRESS contains more than one RFC-822 address, only the first is
+;; returned.
+;;
+;; If ALL is non-nil, that means return info about all the addresses
+;; that are found in ADDRESS. The value is a list of elements of
+;; the form (FULL-NAME CANONICAL-ADDRESS), one per address.
+;;
+;; This code is more correct (and more heuristic) parser than the code in
+;; rfc822.el. And despite its size, it's fairly fast.
+;;
;; There are two main benefits:
-
+;;
;; 1. Higher probability of getting the correct full name for a human than
-;; any other package I know of. (On the other hand, it will cheerfully
+;; any other package we know of. (On the other hand, it will cheerfully
;; mangle non-human names/comments.)
;; 2. Address part is put in a canonical form.
-
-;; The interface is not yet carved in stone; please give me suggestions.
-
-;; I have an extensive test-case collection of funny addresses if you want to
+;;
+;; The interface is not yet carved in stone; please give us suggestions.
+;;
+;; We have an extensive test-case collection of funny addresses if you want to
;; work with the code. Developing this code requires frequent testing to
-;; make sure you're not breaking functionality. I'm not posting the
-;; test-cases because they take over 100K.
-
-;; If you find an address that mail-extr fails on, please send it to me along
-;; with what you think the correct results should be. I do not consider it a
-;; bug if mail-extr mangles a comment that does not correspond to a real
-;; human full name, although I would prefer that mail-extr would return the
-;; comment as-is.
-
+;; make sure you're not breaking functionality. The test cases aren't included
+;; because they are over 100K.
+;;
+;; If you find an address that mail-extr fails on, please send it to the
+;; maintainer along with what you think the correct results should be. We do
+;; not consider it a bug if mail-extr mangles a comment that does not
+;; correspond to a real human full name, although we would prefer that
+;; mail-extr would return the comment as-is.
+;;
;; Features:
-
+;;
;; * Full name handling:
-
+;;
;; * knows where full names can be found in an address.
;; * avoids using empty comments and quoted text.
;; * extracts full names from mailbox names.
;; * recognizes common formats for comments after a full name.
;; * puts a period and a space after each initial.
-;; * understands & referring to the mailbox name capitalized.
-;; * strips name prefixes like "Prof.", etc..
+;; * understands & referring to the mailbox name, capitalized.
+;; * strips name prefixes like "Prof.", etc.
;; * understands what characters can occur in names (not just letters).
;; * figures out middle initial from mailbox name.
;; * removes funny nicknames.
;; * keeps suffixes such as Jr., Sr., III, etc.
;; * reorders "Last, First" type names.
-
+;;
;; * Address handling:
-
+;;
;; * parses rfc822 quoted text, comments, and domain literals.
;; * parses rfc822 multi-line headers.
;; * does something reasonable with rfc822 GROUP addresses.
;; * converts rfc822 ROUTE addresses to %-style addresses.
;; * truncates %-style addresses at leftmost fully qualified domain name.
;; * handles local relative precedence of ! vs. % and @ (untested).
-
+;;
;; It does almost no string creation. It primarily uses the built-in
;; parsing routines with the appropriate syntax tables. This should
;; result in greater speed.
-
+;;
;; TODO:
-
+;;
;; * handle all test cases. (This will take forever.)
;; * software to pick the correct header to use (eg., "Senders-Name:").
;; * multiple addresses in the "From:" header (almost all of the necessary
;; * delete unused variables.
;; * arrange for testing with different relative precedences of ! vs. @
;; and %.
-;; * put variant-method back into mail-extract-address-components.
;; * insert documentation strings!
;; * handle X.400-gatewayed addresses according to RFC 1148.
-;;; Change Log:
-;;
+;;; Change Log:
+;;
+;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@lucid.com)
+;;
+;; * merged with jbw's latest version
+;;
+;; Wed Feb 9 21:56:27 1994 Jamie Zawinski (jwz@lucid.com)
+;;
+;; * high-bit chars in comments weren't treated as word syntax
+;;
+;; Sat Feb 5 03:13:40 1994 Jamie Zawinski (jwz@lucid.com)
+;;
+;; * call replace-match with fixed-case arg
+;;
+;; Thu Dec 16 21:56:45 1993 Jamie Zawinski (jwz@lucid.com)
+;;
+;; * some more cleanup, doc, added provide
+;;
+;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu)
+;;
+;; * Made mail-full-name-prefixes a user-customizable variable.
+;; Allow passing the address as a buffer as well as a string.
+;; Allow [ and ] as name characters (Finnish character set).
+;;
+;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu)
+;;
+;; * Handle "null" addresses. Handle = used for spacing in mailbox
+;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are
+;; missing their brackets. Handle uppercase "JR". Extract full
+;; names from X.400 addresses encoded in RFC-822. Fix bug in
+;; handling of multiple addresses where first has trailing comment.
+;; Handle more kinds of telephone extension lead-ins.
+;;
+;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu)
+;;
+;; * Handle HZ encoding for embedding GB encoded chinese characters.
+;;
+;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu)
+;;
+;; * Fixed too broad matching of ham radio call signs. Fixed bug in
+;; handling an unmatched ' in a name string. Enhanced recognition
+;; of when . in the mailbox name terminates the name portion.
+;; Narrowed conversion of . to space to only the necessary
+;; situation. Deal with VMS's stupid date stamps. Handle a unique
+;; way of introducing an alternate address. Fixed spacing bug I
+;; introduced in switching last name order. Fixed bug in handling
+;; address with ! and % but no @. Narrowed the cases in which
+;; certain trailing words are discarded.
+;;
+;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu)
+;;
+;; * Fixed bugs in handling GROUP addresses. Certain words in the
+;; middle of a name no longer terminate it. Handle LISTSERV list
+;; names. Ignore comment field containing mailbox name.
+;;
+;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu)
+;;
+;; * Moved variant-method code back into main function. Handle
+;; underscores as spaces in comments. Handle leading nickname. Add
+;; flag to ignore single-word names. Other changes.
+;;
+;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu)
+;;
+;; * Added in changes by Rod Whitby and Jamie Zawinski. This
+;; includes the flag mail-extr-guess-middle-initial and the fix for
+;; handling multiple addresses correctly. (Whitby just changed
+;; a > to a <.)
+;;
;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Cleaned up some more. Release version 1.0 to world.
-;;
+;;
;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Cleaned up full name extraction extensively.
-;;
+;;
;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Total rewrite. Integrated mail-canonicalize-address into
;; mail-extract-address-components. Now handles GROUP addresses more
;; or less correctly. Better handling of lots of different cases.
-;;
+;;
;; Fri Jun 14 19:39:50 1991
;; * Created.
;;; Code:
\f
-;; Variable definitions.
-(defvar mail-@-binds-tighter-than-! nil)
+(defgroup mail-extr nil
+ "Extract full name and address from RFC 822 mail header."
+ :prefix "mail-extr-"
+ :group 'mail)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; User configuration variable definitions.
+;;
+
+(defcustom mail-extr-guess-middle-initial nil
+ "*Whether to try to guess middle initial from mail address.
+If true, then when we see an address like \"John Smith <jqs@host.com>\"
+we will assume that \"John Q. Smith\" is the fellow's name."
+ :type 'boolean
+ :group 'mail-extr)
+
+(defcustom mail-extr-ignore-single-names nil
+ "*Whether to ignore a name that is just a single word.
+If true, then when we see an address like \"Idiot <dumb@stupid.com>\"
+we will act as though we couldn't find a full name in the address."
+ :type 'boolean
+ :group 'mail-extr)
+
+;; Matches a leading title that is not part of the name (does not
+;; contribute to uniquely identifying the person).
+(defcustom mail-extr-full-name-prefixes
+ (purecopy
+ "\\(Prof\\|D[Rr]\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.?[ \t\n]")
+ "*Matches prefixes to the full name that identify a person's position.
+These are stripped from the full name because they do not contribute to
+uniquely identifying the person."
+ :type 'regexp
+ :group 'mail-extr)
+
+(defcustom mail-extr-@-binds-tighter-than-! nil
+ "*Whether the local mail transport agent looks at ! before @."
+ :type 'boolean
+ :group 'mail-extr)
+
+(defcustom mail-extr-mangle-uucp nil
+ "*Whether to throw away information in UUCP addresses
+by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\"."
+ :type 'boolean
+ :group 'mail-extr)
;;----------------------------------------------------------------------
;; what orderings are meaningful?????
;; arbitrary address.
;;----------------------------------------------------------------------
-(defconst mail-space-char 32)
+\f
-(defconst mail-whitespace " \t\n")
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Constant definitions.
+;;
+
+;; Codes in
+;; Names in ISO 8859-1 Name
+;; ISO 10XXX ISO 8859-2 in
+;; ISO 6937 ISO 10646 RFC Swedish
+;; etc. Hex Oct 1345 TeX Split ASCII Description
+;; --------- ---------- ---- --- ----- ----- -------------------------------
+;; %a E4 344 a: \"a ae { latin small a + diaeresis d
+;; %o F6 366 o: \"o oe | latin small o + diaeresis v
+;; @a E5 345 aa \oa aa } latin small a + ring above e
+;; %u FC 374 u: \"u ue ~ latin small u + diaeresis |
+;; /e E9 351 e' \'e ` latin small e + acute i
+;; %A C4 304 A: \"A AE [ latin capital a + diaeresis D
+;; %O D6 326 O: \"O OE \ latin capital o + diaeresis V
+;; @A C5 305 AA \oA AA ] latin capital a + ring above E
+;; %U DC 334 U: \"U UE ^ latin capital u + diaeresis \
+;; /E C9 311 E' \'E @ latin capital e + acute I
+
+;; NOTE: @a and @A are not in ISO 8859-2 (the codes mentioned above invoke
+;; /l and /L). Some of this data was retrieved from
+;; listserv@jhuvm.hcf.jhu.edu.
-;; Any character that can occur in a name in an RFC822 address.
+;; Any character that can occur in a name, not counting characters that
+;; separate parts of a multipart name (hyphen and period).
;; Yes, there are weird people with digits in their names.
-(defconst mail-all-letters "A-Za-z---{|}'~0-9`.")
+;; You will also notice the consideration for the
+;; Swedish/Finnish/Norwegian character set.
+(defconst mail-extr-all-letters-but-separators
+ (purecopy "][[:alnum:]{|}'~`"))
-;; Any character that can occur in a name, not counting characters that
-;; separate parts of a multipart name.
-(defconst mail-all-letters-but-separators "A-Za-z{|}'~0-9`")
+;; Any character that can occur in a name in an RFC822 address including
+;; the separator (hyphen and possibly period) for multipart names.
+;; #### should . be in here?
+(defconst mail-extr-all-letters
+ (purecopy (concat mail-extr-all-letters-but-separators "---")))
-;; Any character that can start a name
-(defconst mail-first-letters "A-Za-z")
+;; Any character that can start a name.
+;; Keep this set as minimal as possible.
+(defconst mail-extr-first-letters (purecopy "[:alpha:]"))
;; Any character that can end a name.
-(defconst mail-last-letters "A-Za-z`'.")
-
-;; Matches an initial not followed by both a period and a space.
-(defconst mail-bad-initials-pattern
- (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
- mail-all-letters mail-first-letters mail-all-letters))
+;; Keep this set as minimal as possible.
+(defconst mail-extr-last-letters (purecopy "[:alpha:]`'."))
-(defconst mail-non-name-chars (concat "^" mail-all-letters "."))
+(defconst mail-extr-leading-garbage "\\W+")
-(defconst mail-non-begin-name-chars (concat "^" mail-first-letters))
+;; (defconst mail-extr-non-name-chars
+;; (purecopy (concat "^" mail-extr-all-letters ".")))
+;; (defconst mail-extr-non-begin-name-chars
+;; (purecopy (concat "^" mail-extr-first-letters)))
+;; (defconst mail-extr-non-end-name-chars
+;; (purecopy (concat "^" mail-extr-last-letters)))
-(defconst mail-non-end-name-chars (concat "^" mail-last-letters))
+;; Matches an initial not followed by both a period and a space.
+;; (defconst mail-extr-bad-initials-pattern
+;; (purecopy
+;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
+;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters)))
;; Matches periods used instead of spaces. Must not match the period
;; following an initial.
-(defconst mail-bad-\.-pattern
- (format "\\([%s][%s]\\)\\.+\\([%s]\\)"
- mail-all-letters mail-last-letters mail-first-letters))
+(defconst mail-extr-bad-dot-pattern
+ (purecopy
+ (format "\\([%s][%s]\\)\\.+\\([%s]\\)"
+ mail-extr-all-letters
+ mail-extr-last-letters
+ mail-extr-first-letters)))
;; Matches an embedded or leading nickname that should be removed.
-(defconst mail-nickname-pattern
- (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] "
- mail-all-letters))
-
-;; Matches a leading title that is not part of the name (does not
-;; contribute to uniquely identifying the person).
-(defconst mail-full-name-prefixes
- '"\\` *\\(Prof\\|Dr\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.? ")
+;; (defconst mail-extr-nickname-pattern
+;; (purecopy
+;; (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] "
+;; mail-extr-all-letters)))
;; Matches the occurrence of a generational name suffix, and the last
-;; character of the preceding name.
-(defconst mail-full-name-suffix-pattern
- (format
- "\\(,? ?\\([JjSs]r\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)"
- mail-all-letters mail-all-letters))
-
-(defconst mail-roman-numeral-pattern
- "V?I+V?\\b")
+;; character of the preceding name. This is important because we want to
+;; keep such suffixes: they help to uniquely identify the person.
+;; *** Perhaps this should be a user-customizable variable. However, the
+;; *** regular expression is fairly tricky to alter, so maybe not.
+(defconst mail-extr-full-name-suffix-pattern
+ (purecopy
+ (format
+ "\\(,? ?\\([JjSs][Rr]\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)"
+ mail-extr-all-letters mail-extr-all-letters)))
+
+(defconst mail-extr-roman-numeral-pattern (purecopy "V?I+V?\\b"))
;; Matches a trailing uppercase (with other characters possible) acronym.
;; Must not match a trailing uppercase last name or trailing initial
-(defconst mail-weird-acronym-pattern "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)")
-
+(defconst mail-extr-weird-acronym-pattern
+ (purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)"))
+
;; Matches a mixed-case or lowercase name (not an initial).
-(defconst mail-mixed-case-name-pattern
- (format
- "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)"
- mail-all-letters mail-last-letters
- mail-first-letters mail-all-letters mail-all-letters mail-last-letters
- mail-first-letters mail-all-letters))
+;; #### Match Latin1 lower case letters here too?
+;; (defconst mail-extr-mixed-case-name-pattern
+;; (purecopy
+;; (format
+;; "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)"
+;; mail-extr-all-letters mail-extr-last-letters
+;; mail-extr-first-letters mail-extr-all-letters mail-extr-all-letters
+;; mail-extr-last-letters mail-extr-first-letters mail-extr-all-letters)))
;; Matches a trailing alternative address.
-(defconst mail-alternative-address-pattern "[a-zA-Z.]+[!@][a-zA-Z.]")
+;; #### Match Latin1 letters here too?
+;; #### Match _ before @ here too?
+(defconst mail-extr-alternative-address-pattern
+ (purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]"))
;; Matches a variety of trailing comments not including comma-delimited
;; comments.
-(defconst mail-trailing-comment-start-pattern " [-{]\\|--\\|[+@#></\;]")
+(defconst mail-extr-trailing-comment-start-pattern
+ (purecopy " [-{]\\|--\\|[+@#></\;]"))
;; Matches a name (not an initial).
;; This doesn't force a word boundary at the end because sometimes a
;; comment is separated by a `-' with no preceding space.
-(defconst mail-name-pattern
- (format
- "\\b[%s][%s]*[%s]"
- mail-first-letters mail-all-letters mail-last-letters))
+(defconst mail-extr-name-pattern
+ (purecopy (format "\\b[%s][%s]*[%s]"
+ mail-extr-first-letters
+ mail-extr-all-letters
+ mail-extr-last-letters)))
-(defconst mail-initial-pattern
- (format "\\b[%s]\\([. ]\\|\\b\\)" mail-first-letters))
+(defconst mail-extr-initial-pattern
+ (purecopy (format "\\b[%s]\\([. ]\\|\\b\\)" mail-extr-first-letters)))
;; Matches a single name before a comma.
-(defconst mail-last-name-first-pattern
- (concat "\\`" mail-name-pattern ","))
+;; (defconst mail-extr-last-name-first-pattern
+;; (purecopy (concat "\\`" mail-extr-name-pattern ",")))
;; Matches telephone extensions.
-(defconst mail-telephone-extension-pattern
- "\\(\\([Ee]xt\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+")
+(defconst mail-extr-telephone-extension-pattern
+ (purecopy
+ "\\(\\([Ee]xt\\|\\|[Tt]ph\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+"))
;; Matches ham radio call signs.
-(defconst mail-ham-call-sign-pattern
- "\\b[A-Z]+[0-9][A-Z0-9]*")
+;; Help from: Mat Maessen N2NJZ <maessm@rpi.edu>, Mark Feit
+;; <mark@era.com>, Michael Covington <mcovingt@ai.uga.edu>.
+;; Examples: DX504 DX515 K5MRU K8DHK KA9WGN KA9WGN KD3FU KD6EUI KD6HBW
+;; KE9TV KF0NV N1API N3FU N3GZE N3IGS N4KCC N7IKQ N9HHU W4YHF W6ANK WA2SUH
+;; WB7VZI N2NJZ NR3G KJ4KK AB4UM AL7NI KH6OH WN3KBT N4TMI W1A N0NZO
+(defconst mail-extr-ham-call-sign-pattern
+ (purecopy "\\b\\(DX[0-9]+\\|[AKNW][A-Z]?[0-9][A-Z][A-Z]?[A-Z]?\\)"))
+
+;; Possible trailing suffixes: "\\(/\\(KT\\|A[AEG]\\|[R0-9]\\)\\)?"
+;; /KT == Temporary Technician (has CSC but not "real" license)
+;; /AA == Temporary Advanced
+;; /AE == Temporary Extra
+;; /AG == Temporary General
+;; /R == repeater
+;; /# == stations operating out of home district
+;; I don't include these in the regexp above because I can't imagine
+;; anyone putting them with their name in an e-mail address.
;; Matches normal single-part name
-(defconst mail-normal-name-pattern
- (format
- "\\b[%s][%s]+[%s]"
- mail-first-letters mail-all-letters-but-separators mail-last-letters))
+(defconst mail-extr-normal-name-pattern
+ (purecopy (format "\\b[%s][%s]+[%s]"
+ mail-extr-first-letters
+ mail-extr-all-letters-but-separators
+ mail-extr-last-letters)))
+
+;; Matches a single word name.
+;; (defconst mail-extr-one-name-pattern
+;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'")))
;; Matches normal two names with missing middle initial
-(defconst mail-two-name-pattern
- (concat "\\`\\(" mail-normal-name-pattern
- "\\|" mail-initial-pattern
- "\\) +\\(" mail-normal-name-pattern "\\)\\(,\\|\\'\\)"))
-
-(defvar address-syntax-table (make-syntax-table))
-(defvar address-comment-syntax-table (make-syntax-table))
-(defvar address-domain-literal-syntax-table (make-syntax-table))
-(defvar address-text-comment-syntax-table (make-syntax-table))
-(defvar address-text-syntax-table (make-syntax-table))
-(mapcar
- (function
- (lambda (pair)
- (let ((syntax-table (symbol-value (car pair))))
- (mapcar
- (function
- (lambda (item)
- (if (eq 2 (length item))
- (modify-syntax-entry (car item) (car (cdr item)) syntax-table)
- (let ((char (car item))
- (bound (car (cdr item)))
- (syntax (car (cdr (cdr item)))))
- (while (<= char bound)
- (modify-syntax-entry char syntax syntax-table)
- (setq char (1+ char)))))))
- (cdr pair)))))
- '((address-syntax-table
- (0 31 "w") ;control characters
- (32 " ") ;SPC
- (?! ?~ "w") ;printable characters
- (127 "w") ;DEL
- (128 255 "w") ;high-bit-on characters
+;; The first name is not allowed to have a hyphen because this can cause
+;; false matches where the "middle initial" is actually the first letter
+;; of the second part of the first name.
+(defconst mail-extr-two-name-pattern
+ (purecopy
+ (concat "\\`\\(" mail-extr-normal-name-pattern
+ "\\|" mail-extr-initial-pattern
+ "\\) +\\(" mail-extr-name-pattern "\\)\\(,\\|\\'\\)")))
+
+(defconst mail-extr-listserv-list-name-pattern
+ (purecopy "Multiple recipients of list \\([-A-Z]+\\)"))
+
+(defconst mail-extr-stupid-vms-date-stamp-pattern
+ (purecopy
+ "[0-9][0-9]-[JFMASOND][aepuco][nbrylgptvc]-[0-9][0-9][0-9][0-9] [0-9]+ *"))
+
+;;; HZ -- GB (PRC Chinese character encoding) in ASCII embedding protocol
+;;
+;; In ASCII mode, a byte is interpreted as an ASCII character, unless a '~' is
+;; encountered. The character '~' is an escape character. By convention, it
+;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the
+;; following special meaning.
+;;
+;; o The escape sequence '~~' is interpreted as a '~'.
+;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
+;; o The escape sequence '~\n' is a line-continuation marker to be consumed
+;; with no output produced.
+;;
+;; In GB mode, characters are interpreted two bytes at a time as (pure) GB
+;; codes until the escape-from-GB code '~}' is read. This code switches the
+;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}'
+;; ($7E7D) is outside the defined GB range.)
+(defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern
+ (purecopy "~{\\([^~].\\|~[^\}]\\)+~}"))
+
+;; The leading optional lowercase letters are for a bastardized version of
+;; the encoding, as is the optional nature of the final slash.
+(defconst mail-extr-x400-encoded-address-pattern
+ (purecopy "[a-z]?[a-z]?\\(/[A-Za-z]+\\(\\.[A-Za-z]+\\)?=[^/]+\\)+/?\\'"))
+
+(defconst mail-extr-x400-encoded-address-field-pattern-format
+ (purecopy "/%s=\\([^/]+\\)\\(/\\|\\'\\)"))
+
+(defconst mail-extr-x400-encoded-address-surname-pattern
+ ;; S stands for Surname (family name).
+ (purecopy
+ (format mail-extr-x400-encoded-address-field-pattern-format "[Ss]")))
+
+(defconst mail-extr-x400-encoded-address-given-name-pattern
+ ;; G stands for Given name.
+ (purecopy
+ (format mail-extr-x400-encoded-address-field-pattern-format "[Gg]")))
+
+(defconst mail-extr-x400-encoded-address-full-name-pattern
+ ;; PN stands for Personal Name. When used it represents the combination
+ ;; of the G and S fields.
+ ;; "The one system I used having this field asked it with the prompt
+ ;; `Personal Name'. But they mapped it into G and S on outgoing real
+ ;; X.400 addresses. As they mapped G and S into PN on incoming..."
+ (purecopy
+ (format mail-extr-x400-encoded-address-field-pattern-format "[Pp][Nn]")))
+
+\f
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Syntax tables used for quick parsing.
+;;
+
+(defconst mail-extr-address-syntax-table (make-syntax-table))
+(defconst mail-extr-address-comment-syntax-table (make-syntax-table))
+(defconst mail-extr-address-domain-literal-syntax-table (make-syntax-table))
+(defconst mail-extr-address-text-comment-syntax-table (make-syntax-table))
+(defconst mail-extr-address-text-syntax-table (make-syntax-table))
+(mapc
+ (lambda (pair)
+ (let ((syntax-table (symbol-value (car pair))))
+ (dolist (item (cdr pair))
+ (if (eq 2 (length item))
+ ;; modifying syntax of a single character
+ (modify-syntax-entry (car item) (car (cdr item)) syntax-table)
+ ;; modifying syntax of a range of characters
+ (let ((char (nth 0 item))
+ (bound (nth 1 item))
+ (syntax (nth 2 item)))
+ (while (<= char bound)
+ (modify-syntax-entry char syntax syntax-table)
+ (setq char (1+ char))))))))
+ '((mail-extr-address-syntax-table
+ (?\000 ?\037 "w") ;control characters
+ (?\040 " ") ;SPC
+ (?! ?~ "w") ;printable characters
+ (?\177 "w") ;DEL
+ (?\200 ?\377 "w") ;high-bit-on characters
+ (?\240 " ") ;nobreakspace
(?\t " ")
(?\r " ")
(?\n " ")
(?\] ".")
;; % and ! aren't RFC822 characters, but it is convenient to pretend
(?% ".")
- (?! ".")
+ (?! ".") ;; this needs to be word-constituent when not in .UUCP mode
)
- (address-comment-syntax-table
- (0 255 "w")
+ (mail-extr-address-comment-syntax-table
+ (?\000 ?\377 "w")
+ (?\040 " ")
+ (?\240 " ")
+ (?\t " ")
+ (?\r " ")
+ (?\n " ")
(?\( "\(\)")
(?\) "\)\(")
(?\\ "\\"))
- (address-domain-literal-syntax-table
- (0 255 "w")
+ (mail-extr-address-domain-literal-syntax-table
+ (?\000 ?\377 "w")
+ (?\040 " ")
+ (?\240 " ")
+ (?\t " ")
+ (?\r " ")
+ (?\n " ")
(?\[ "\(\]") ;??????
(?\] "\)\[") ;??????
(?\\ "\\"))
- (address-text-comment-syntax-table
- (0 255 "w")
+ (mail-extr-address-text-comment-syntax-table
+ (?\000 ?\377 "w")
+ (?\040 " ")
+ (?\240 " ")
+ (?\t " ")
+ (?\r " ")
+ (?\n " ")
(?\( "\(\)")
(?\) "\)\(")
(?\[ "\(\]")
;; (?\' "\)\`")
;; (?\` "\(\'")
)
- (address-text-syntax-table
- (0 255 ".")
+ (mail-extr-address-text-syntax-table
+ (?\000 ?\177 ".")
+ (?\200 ?\377 "w")
+ (?\040 " ")
+ (?\t " ")
+ (?\r " ")
+ (?\n " ")
(?A ?Z "w")
(?a ?z "w")
(?- "w")
))
\f
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
;; Utility functions and macros.
+;;
-(defmacro undo-backslash-quoting (beg end)
- (`(save-excursion
- (save-restriction
- (narrow-to-region (, beg) (, end))
- (goto-char (point-min))
- ;; undo \ quoting
- (while (re-search-forward "\\\\\\(.\\)" nil t)
- (replace-match "\\1")
- ;; CHECK: does this leave point after the replacement?
- )))))
-
-(defmacro mail-nuke-char-at (pos)
- (` (save-excursion
- (goto-char (, pos))
- (delete-char 1)
- (insert mail-space-char))))
-
-(defmacro mail-nuke-elements-outside-range (list-symbol beg-symbol end-symbol
- &optional no-replace)
- (` (progn
- (setq temp (, list-symbol))
+(defsubst mail-extr-skip-whitespace-forward ()
+ ;; v19 fn skip-syntax-forward is more tasteful, but not byte-coded.
+ (skip-chars-forward " \t\n\r\240"))
+
+(defsubst mail-extr-skip-whitespace-backward ()
+ ;; v19 fn skip-syntax-backward is more tasteful, but not byte-coded.
+ (skip-chars-backward " \t\n\r\240"))
+
+
+(defsubst mail-extr-undo-backslash-quoting (beg end)
+ (save-excursion
+ (save-restriction
+ (narrow-to-region beg end)
+ (goto-char (point-min))
+ ;; undo \ quoting
+ (while (search-forward "\\" nil t)
+ (delete-char -1)
+ (or (eobp)
+ (forward-char 1))))))
+
+(defsubst mail-extr-nuke-char-at (pos)
+ (save-excursion
+ (goto-char pos)
+ (delete-char 1)
+ (insert ?\ )))
+
+(put 'mail-extr-nuke-outside-range
+ 'edebug-form-spec '(symbolp &optional form form atom))
+
+(defmacro mail-extr-nuke-outside-range (list-symbol
+ beg-symbol end-symbol
+ &optional no-replace)
+ "Delete all elements outside BEG..END in LIST.
+LIST-SYMBOL names a variable holding a list of buffer positions
+BEG-SYMBOL and END-SYMBOL name variables delimiting a range
+Each element of LIST-SYMBOL which lies outside of the range is
+ deleted from the list.
+Unless NO-REPLACE is true, at each of the positions in LIST-SYMBOL
+ which lie outside of the range, one character at that position is
+ replaced with a SPC."
+ (or (memq no-replace '(t nil))
+ (error "no-replace must be t or nil, evaluable at macroexpand-time"))
+ `(let ((temp ,list-symbol)
+ ch)
(while temp
- (cond ((or (> (car temp) (, end-symbol))
- (< (car temp) (, beg-symbol)))
- (, (or no-replace
- (` (mail-nuke-char-at (car temp)))))
- (setcar temp nil)))
+ (setq ch (car temp))
+ (when (or (> ch ,end-symbol)
+ (< ch ,beg-symbol))
+ ,@(if no-replace
+ nil
+ `((mail-extr-nuke-char-at ch)))
+ (setcar temp nil))
(setq temp (cdr temp)))
- (setq (, list-symbol) (delq nil (, list-symbol))))))
-
-(defun mail-demarkerize (marker)
- (and marker
- (if (markerp marker)
- (let ((temp (marker-position marker)))
- (set-marker marker nil)
- temp)
- marker)))
-
-(defun mail-markerize (pos)
- (and pos
- (if (markerp pos)
- pos
- (copy-marker pos))))
-
-(defmacro mail-last-element (list)
- "Return last element of LIST."
- (` (let ((list (, list)))
- (while (not (null (cdr list)))
- (setq list (cdr list)))
- (car list))))
-
-(defmacro safe-move-sexp (arg)
- "Safely skip over one balanced sexp, if there is one. Return t if success."
- (` (condition-case error
- (progn
- (goto-char (scan-sexps (point) (, arg)))
- t)
- (error
- (if (string-equal (nth 1 error) "Unbalanced parentheses")
- nil
- (while t
- (signal (car error) (cdr error))))))))
-
+ (setq ,list-symbol (delq nil ,list-symbol))))
+
+(defun mail-extr-demarkerize (marker)
+ ;; if arg is a marker, destroys the marker, then returns the old value.
+ ;; otherwise returns the arg.
+ (if (markerp marker)
+ (let ((temp (marker-position marker)))
+ (set-marker marker nil)
+ temp)
+ marker))
+
+(defun mail-extr-markerize (pos)
+ ;; coerces pos to a marker if non-nil.
+ (if (or (markerp pos) (null pos))
+ pos
+ (copy-marker pos)))
+
+(defsubst mail-extr-safe-move-sexp (arg)
+ ;; Safely skip over one balanced sexp, if there is one. Return t if success.
+ (condition-case error
+ (progn
+ (goto-char (or (scan-sexps (point) arg) (point)))
+ t)
+ (error
+ ;; #### kludge kludge kludge kludge kludge kludge kludge !!!
+ (if (string-equal (nth 1 error) "Unbalanced parentheses")
+ nil
+ (while t
+ (signal (car error) (cdr error)))))))
\f
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
;; The main function to grind addresses
-
-(defun mail-extract-address-components (address)
- "Given an rfc 822 ADDRESS, extract full name and canonical address.
-Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
- (let ((canonicalization-buffer (get-buffer-create "*canonical address*"))
- (extraction-buffer (get-buffer-create "*extract address components*"))
- (foo 'bar)
- char
- multiple-addresses
- <-pos >-pos @-pos :-pos ,-pos !-pos %-pos \;-pos
- group-:-pos group-\;-pos route-addr-:-pos
- record-pos-symbol
- first-real-pos last-real-pos
- phrase-beg phrase-end
- comment-beg comment-end
- quote-beg quote-end
- atom-beg atom-end
- mbox-beg mbox-end
- \.-ends-name
- temp
- name-suffix
- saved-point
- fi mi li
- saved-%-pos saved-!-pos saved-@-pos
- domain-pos \.-pos insert-point)
-
- (save-excursion
- (set-buffer extraction-buffer)
+;;
+
+(defvar disable-initial-guessing-flag) ; dynamic assignment
+(defvar cbeg) ; dynamic assignment
+(defvar cend) ; dynamic assignment
+
+;;;###autoload
+(defun mail-extract-address-components (address &optional all)
+ "Given an RFC-822 address ADDRESS, extract full name and canonical address.
+Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
+If no name can be extracted, FULL-NAME will be nil. Also see
+`mail-extr-ignore-single-names'.
+
+If the optional argument ALL is non-nil, then ADDRESS can contain zero
+or more recipients, separated by commas, and we return a list of
+the form ((FULL-NAME CANONICAL-ADDRESS) ...) with one element for
+each recipient. If ALL is nil, then if ADDRESS contains more than
+one recipients, all but the first is ignored.
+
+ADDRESS may be a string or a buffer. If it is a buffer, the visible
+\(narrowed) portion of the buffer will be interpreted as the address.
+\(This feature exists so that the clever caller might be able to avoid
+consing a string.)"
+ (let ((canonicalization-buffer (get-buffer-create " *canonical address*"))
+ (extraction-buffer (get-buffer-create " *extract address components*"))
+ value-list)
+
+ (with-current-buffer (get-buffer-create extraction-buffer)
+ (fundamental-mode)
(buffer-disable-undo extraction-buffer)
- (set-syntax-table address-syntax-table)
+ (set-syntax-table mail-extr-address-syntax-table)
(widen)
(erase-buffer)
(setq case-fold-search nil)
-
+
;; Insert extra space at beginning to allow later replacement with <
;; without having to move markers.
- (insert mail-space-char address)
-
- ;; stolen from rfc822.el
+ (insert ?\ )
+
+ ;; Insert the address itself.
+ (cond ((stringp address)
+ (insert address))
+ ((bufferp address)
+ (insert-buffer-substring address))
+ (t
+ (error "Invalid address: %s" address)))
+
+ (set-text-properties (point-min) (point-max) nil)
+
+ (with-current-buffer (get-buffer-create canonicalization-buffer)
+ (fundamental-mode)
+ (buffer-disable-undo canonicalization-buffer)
+ (setq case-fold-search nil))
+
+
;; Unfold multiple lines.
(goto-char (point-min))
(while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
(replace-match "\\1 " t))
-
- ;; first pass grabs useful information about address
- (goto-char (point-min))
- (while (progn
- (skip-chars-forward mail-whitespace)
- (not (eobp)))
- (setq char (char-after (point)))
- (or first-real-pos
- (if (not (eq char ?\())
- (setq first-real-pos (point))))
- (cond
- ;; comment
- ((eq char ?\()
- (set-syntax-table address-comment-syntax-table)
- ;; only record the first non-empty comment's position
- (if (and (not comment-beg)
- (save-excursion
- (forward-char 1)
- (skip-chars-forward mail-whitespace)
- (not (eq ?\) (char-after (point))))))
- (setq comment-beg (point)))
- ;; TODO: don't record if unbalanced
- (or (safe-move-sexp 1)
+
+ ;; Loop over addresses until we have as many as we want.
+ (while (and (or all (null value-list))
+ (progn (goto-char (point-min))
+ (skip-chars-forward " \t")
+ (not (eobp))))
+ (let (char
+ end-of-address
+ <-pos >-pos @-pos colon-pos comma-pos !-pos %-pos \;-pos
+ group-:-pos group-\;-pos route-addr-:-pos
+ record-pos-symbol
+ first-real-pos last-real-pos
+ phrase-beg phrase-end
+ cbeg cend ; dynamically set from -voodoo
+ quote-beg quote-end
+ atom-beg atom-end
+ mbox-beg mbox-end
+ \.-ends-name
+ temp
+ ;; name-suffix
+ fi mi li ; first, middle, last initial
+ saved-%-pos saved-!-pos saved-@-pos
+ domain-pos \.-pos insert-point
+ ;; mailbox-name-processed-flag
+ disable-initial-guessing-flag) ; dynamically set from -voodoo
+
+ (set-syntax-table mail-extr-address-syntax-table)
+ (goto-char (point-min))
+
+ ;; Insert extra space at beginning to allow later replacement with <
+ ;; without having to move markers.
+ (or (eq (following-char) ?\ )
+ (insert ?\ ))
+
+ ;; First pass grabs useful information about address.
+ (while (progn
+ (mail-extr-skip-whitespace-forward)
+ (not (eobp)))
+ (setq char (char-after (point)))
+ (or first-real-pos
+ (if (not (eq char ?\())
+ (setq first-real-pos (point))))
+ (cond
+ ;; comment
+ ((eq char ?\()
+ (set-syntax-table mail-extr-address-comment-syntax-table)
+ ;; only record the first non-empty comment's position
+ (if (and (not cbeg)
+ (save-excursion
+ (forward-char 1)
+ (mail-extr-skip-whitespace-forward)
+ (not (eq ?\) (char-after (point))))))
+ (setq cbeg (point)))
+ ;; TODO: don't record if unbalanced
+ (or (mail-extr-safe-move-sexp 1)
+ (forward-char 1))
+ (set-syntax-table mail-extr-address-syntax-table)
+ (if (and cbeg
+ (not cend))
+ (setq cend (point))))
+ ;; quoted text
+ ((eq char ?\")
+ ;; only record the first non-empty quote's position
+ (if (and (not quote-beg)
+ (save-excursion
+ (forward-char 1)
+ (mail-extr-skip-whitespace-forward)
+ (not (eq ?\" (char-after (point))))))
+ (setq quote-beg (point)))
+ ;; TODO: don't record if unbalanced
+ (or (mail-extr-safe-move-sexp 1)
+ (forward-char 1))
+ (if (and quote-beg
+ (not quote-end))
+ (setq quote-end (point))))
+ ;; domain literals
+ ((eq char ?\[)
+ (set-syntax-table mail-extr-address-domain-literal-syntax-table)
+ (or (mail-extr-safe-move-sexp 1)
+ (forward-char 1))
+ (set-syntax-table mail-extr-address-syntax-table))
+ ;; commas delimit addresses when outside < > pairs.
+ ((and (eq char ?,)
+ (or (and (null <-pos)
+ ;; Handle ROUTE-ADDR address that is missing its <.
+ (not (eq ?@ (char-after (1+ (point))))))
+ (and >-pos
+ ;; handle weird munged addresses
+ ;; BUG FIX: This test was reversed. Thanks to the
+ ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au>
+ ;; for discovering this!
+ (< (car (last <-pos)) (car >-pos)))))
+ ;; The argument contains more than one address.
+ ;; Temporarily hide everything after this one.
+ (setq end-of-address (copy-marker (1+ (point)) t))
+ (narrow-to-region (point-min) (1+ (point)))
+ (delete-char 1)
+ (setq char ?\() ; HAVE I NO SHAME??
+ )
+ ;; record the position of various interesting chars, determine
+ ;; legality later.
+ ((setq record-pos-symbol
+ (cdr (assq char
+ '((?< . <-pos) (?> . >-pos) (?@ . @-pos)
+ (?: . colon-pos) (?, . comma-pos) (?! . !-pos)
+ (?% . %-pos) (?\; . \;-pos)))))
+ (set record-pos-symbol
+ (cons (point) (symbol-value record-pos-symbol)))
(forward-char 1))
- (set-syntax-table address-syntax-table)
- (if (and comment-beg
- (not comment-end))
- (setq comment-end (point))))
- ;; quoted text
- ((eq char ?\")
- ;; only record the first non-empty quote's position
- (if (and (not quote-beg)
- (save-excursion
- (forward-char 1)
- (skip-chars-forward mail-whitespace)
- (not (eq ?\" (char-after (point))))))
- (setq quote-beg (point)))
- ;; TODO: don't record if unbalanced
- (or (safe-move-sexp 1)
+ ((eq char ?.)
(forward-char 1))
- (if (and quote-beg
- (not quote-end))
- (setq quote-end (point))))
- ;; domain literals
- ((eq char ?\[)
- (set-syntax-table address-domain-literal-syntax-table)
- (or (safe-move-sexp 1)
+ ((memq char '(
+ ;; comment terminator illegal
+ ?\)
+ ;; domain literal terminator illegal
+ ?\]
+ ;; \ allowed only within quoted strings,
+ ;; domain literals, and comments
+ ?\\
+ ))
+ (mail-extr-nuke-char-at (point))
(forward-char 1))
- (set-syntax-table address-syntax-table))
- ;; commas delimit addresses when outside < > pairs.
- ((and (eq char ?,)
- (or (null <-pos)
- (and >-pos
- ;; handle weird munged addresses
- (> (mail-last-element <-pos) (car >-pos)))))
- (setq multiple-addresses t)
- (delete-char 1)
- (narrow-to-region (point-min) (point)))
- ;; record the position of various interesting chars, determine
- ;; legality later.
- ((setq record-pos-symbol
- (cdr (assq char
- '((?< . <-pos) (?> . >-pos) (?@ . @-pos)
- (?: . :-pos) (?, . ,-pos) (?! . !-pos)
- (?% . %-pos) (?\; . \;-pos)))))
- (set record-pos-symbol
- (cons (point) (symbol-value record-pos-symbol)))
- (forward-char 1))
- ((eq char ?.)
- (forward-char 1))
- ((memq char '(
- ;; comment terminator illegal
- ?\)
- ;; domain literal terminator illegal
- ?\]
- ;; \ allowed only within quoted strings,
- ;; domain literals, and comments
- ?\\
- ))
- (mail-nuke-char-at (point))
- (forward-char 1))
- (t
- (forward-word 1)))
- (or (eq char ?\()
- (setq last-real-pos (point))))
-
- ;; Use only the leftmost <, if any. Replace all others with spaces.
- (while (cdr <-pos)
- (mail-nuke-char-at (car <-pos))
- (setq <-pos (cdr <-pos)))
-
- ;; Use only the rightmost >, if any. Replace all others with spaces.
- (while (cdr >-pos)
- (mail-nuke-char-at (nth 1 >-pos))
- (setcdr >-pos (nthcdr 2 >-pos)))
-
- ;; If multiple @s and a :, but no < and >, insert around buffer.
- ;; This commonly happens on the UUCP "From " line. Ugh.
- (cond ((and (> (length @-pos) 1)
- :-pos ;TODO: check if between @s
- (not <-pos))
- (goto-char (point-min))
- (delete-char 1)
- (setq <-pos (list (point)))
- (insert ?<)))
-
- ;; If < but no >, insert > in rightmost possible position
- (cond ((and <-pos
- (null >-pos))
- (goto-char (point-max))
- (setq >-pos (list (point)))
- (insert ?>)))
-
- ;; If > but no <, replace > with space.
- (cond ((and >-pos
- (null <-pos))
- (mail-nuke-char-at (car >-pos))
- (setq >-pos nil)))
-
- ;; Turn >-pos and <-pos into non-lists
- (setq >-pos (car >-pos)
- <-pos (car <-pos))
-
- ;; Trim other punctuation lists of items outside < > pair to handle
- ;; stupid MTAs.
- (cond (<-pos ; don't need to check >-pos also
- ;; handle bozo software that violates RFC 822 by sticking
- ;; punctuation marks outside of a < > pair
- (mail-nuke-elements-outside-range @-pos <-pos >-pos t)
- ;; RFC 822 says nothing about these two outside < >, but
- ;; remove those positions from the lists to make things
- ;; easier.
- (mail-nuke-elements-outside-range !-pos <-pos >-pos t)
- (mail-nuke-elements-outside-range %-pos <-pos >-pos t)))
-
- ;; Check for : that indicates GROUP list and for : part of
- ;; ROUTE-ADDR spec.
- ;; Can't possibly be more than two :. Nuke any extra.
- (while :-pos
- (setq temp (car :-pos)
- :-pos (cdr :-pos))
- (cond ((and <-pos >-pos
- (> temp <-pos)
- (< temp >-pos))
- (if (or route-addr-:-pos
- (< (length @-pos) 2)
- (> temp (car @-pos))
- (< temp (nth 1 @-pos)))
- (mail-nuke-char-at temp)
- (setq route-addr-:-pos temp)))
- ((or (not <-pos)
- (and <-pos
- (< temp <-pos)))
- (setq group-:-pos temp))))
-
- ;; Nuke any ; that is in or to the left of a < > pair or to the left
- ;; of a GROUP starting :. Also, there may only be one ;.
- (while \;-pos
- (setq temp (car \;-pos)
- \;-pos (cdr \;-pos))
- (cond ((and <-pos >-pos
- (> temp <-pos)
- (< temp >-pos))
- (mail-nuke-char-at temp))
- ((and (or (not group-:-pos)
- (> temp group-:-pos))
- (not group-\;-pos))
- (setq group-\;-pos temp))))
-
- ;; Handle junk like ";@host.company.dom" that sendmail adds.
- ;; **** should I remember comment positions?
- (and group-\;-pos
- ;; this is fine for now
- (mail-nuke-elements-outside-range !-pos group-:-pos group-\;-pos t)
- (mail-nuke-elements-outside-range @-pos group-:-pos group-\;-pos t)
- (mail-nuke-elements-outside-range %-pos group-:-pos group-\;-pos t)
- (mail-nuke-elements-outside-range ,-pos group-:-pos group-\;-pos t)
- (and last-real-pos
- (> last-real-pos (1+ group-\;-pos))
- (setq last-real-pos (1+ group-\;-pos)))
- (and comment-end
- (> comment-end group-\;-pos)
- (setq comment-end nil
- comment-beg nil))
- (and quote-end
- (> quote-end group-\;-pos)
- (setq quote-end nil
- quote-beg nil))
- (narrow-to-region (point-min) group-\;-pos))
-
- ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
- ;; others.
- ;; Hell, go ahead an nuke all of the commas.
- ;; **** This will cause problems when we start handling commas in
- ;; the PHRASE part .... no it won't ... yes it will ... ?????
- (mail-nuke-elements-outside-range ,-pos 1 1)
-
- ;; can only have multiple @s inside < >. The fact that some MTAs
- ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
- ;; handled above.
-
- ;; Locate PHRASE part of ROUTE-ADDR.
- (cond (<-pos
- (goto-char <-pos)
- (skip-chars-backward mail-whitespace)
- (setq phrase-end (point))
- (goto-char (or ;;group-:-pos
- (point-min)))
- (skip-chars-forward mail-whitespace)
- (if (< (point) phrase-end)
- (setq phrase-beg (point))
- (setq phrase-end nil))))
-
- ;; handle ROUTE-ADDRS with real ROUTEs.
- ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
- ;; any % or ! must be semantically meaningless.
- ;; TODO: do this processing into canonicalization buffer
- (cond (route-addr-:-pos
- (setq !-pos nil
- %-pos nil
- >-pos (copy-marker >-pos)
- route-addr-:-pos (copy-marker route-addr-:-pos))
- (goto-char >-pos)
- (insert-before-markers ?X)
- (goto-char (car @-pos))
- (while (setq @-pos (cdr @-pos))
- (delete-char 1)
- (setq %-pos (cons (point-marker) %-pos))
- (insert "%")
- (goto-char (1- >-pos))
- (save-excursion
- (insert-buffer-substring extraction-buffer
- (car @-pos) route-addr-:-pos)
- (delete-region (car @-pos) route-addr-:-pos))
- (or (cdr @-pos)
- (setq saved-@-pos (list (point)))))
- (setq @-pos saved-@-pos)
- (goto-char >-pos)
- (delete-char -1)
- (mail-nuke-char-at route-addr-:-pos)
- (mail-demarkerize route-addr-:-pos)
- (setq route-addr-:-pos nil
- >-pos (mail-demarkerize >-pos)
- %-pos (mapcar 'mail-demarkerize %-pos))))
-
- ;; de-listify @-pos
- (setq @-pos (car @-pos))
-
- ;; TODO: remove comments in the middle of an address
-
- (set-buffer canonicalization-buffer)
-
- (buffer-disable-undo canonicalization-buffer)
- (set-syntax-table address-syntax-table)
- (setq case-fold-search nil)
-
- (widen)
- (erase-buffer)
- (insert-buffer-substring extraction-buffer)
-
- (if <-pos
- (narrow-to-region (progn
- (goto-char (1+ <-pos))
- (skip-chars-forward mail-whitespace)
- (point))
- >-pos)
- ;; ****** Oh no! What if the address is completely empty!
- (narrow-to-region first-real-pos last-real-pos))
-
- (and @-pos %-pos
- (mail-nuke-elements-outside-range %-pos (point-min) @-pos))
- (and %-pos !-pos
- (mail-nuke-elements-outside-range !-pos (point-min) (car %-pos)))
- (and @-pos !-pos (not %-pos)
- (mail-nuke-elements-outside-range !-pos (point-min) @-pos))
-
- ;; Error condition:?? (and %-pos (not @-pos))
-
- (cond (!-pos
- ;; **** I don't understand this save-restriction and the
- ;; narrow-to-region inside it. Why did I do that?
- (save-restriction
- (cond ((and @-pos
- mail-@-binds-tighter-than-!)
- (goto-char @-pos)
- (setq %-pos (cons (point) %-pos)
- @-pos nil)
- (delete-char 1)
- (insert "%")
- (setq insert-point (point-max)))
- (mail-@-binds-tighter-than-!
- (setq insert-point (point-max)))
- (%-pos
- (setq insert-point (mail-last-element %-pos)
- saved-%-pos (mapcar 'mail-markerize %-pos)
- %-pos nil
- @-pos (mail-markerize @-pos)))
- (@-pos
- (setq insert-point @-pos)
- (setq @-pos (mail-markerize @-pos)))
- (t
- (setq insert-point (point-max))))
- (narrow-to-region (point-min) insert-point)
- (setq saved-!-pos (car !-pos))
- (while !-pos
+ (t
+ (forward-word 1)))
+ (or (eq char ?\()
+ ;; At the end of first address of a multiple address header.
+ (and (eq char ?,)
+ (eobp))
+ (setq last-real-pos (point))))
+
+ ;; Use only the leftmost <, if any. Replace all others with spaces.
+ (while (cdr <-pos)
+ (mail-extr-nuke-char-at (car <-pos))
+ (setq <-pos (cdr <-pos)))
+
+ ;; Use only the rightmost >, if any. Replace all others with spaces.
+ (while (cdr >-pos)
+ (mail-extr-nuke-char-at (nth 1 >-pos))
+ (setcdr >-pos (nthcdr 2 >-pos)))
+
+ ;; If multiple @s and a :, but no < and >, insert around buffer.
+ ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc
+ ;; This commonly happens on the UUCP "From " line. Ugh.
+ (when (and (> (length @-pos) 1)
+ (eq 1 (length colon-pos)) ;TODO: check if between last two @s
+ (not \;-pos)
+ (not <-pos))
+ (goto-char (point-min))
+ (delete-char 1)
+ (setq <-pos (list (point)))
+ (insert ?<))
+
+ ;; If < but no >, insert > in rightmost possible position
+ (when (and <-pos (null >-pos))
+ (goto-char (point-max))
+ (setq >-pos (list (point)))
+ (insert ?>))
+
+ ;; If > but no <, replace > with space.
+ (when (and >-pos (null <-pos))
+ (mail-extr-nuke-char-at (car >-pos))
+ (setq >-pos nil))
+
+ ;; Turn >-pos and <-pos into non-lists
+ (setq >-pos (car >-pos)
+ <-pos (car <-pos))
+
+ ;; Trim other punctuation lists of items outside < > pair to handle
+ ;; stupid MTAs.
+ (when <-pos ; don't need to check >-pos also
+ ;; handle bozo software that violates RFC 822 by sticking
+ ;; punctuation marks outside of a < > pair
+ (mail-extr-nuke-outside-range @-pos <-pos >-pos t)
+ ;; RFC 822 says nothing about these two outside < >, but
+ ;; remove those positions from the lists to make things
+ ;; easier.
+ (mail-extr-nuke-outside-range !-pos <-pos >-pos t)
+ (mail-extr-nuke-outside-range %-pos <-pos >-pos t))
+
+ ;; Check for : that indicates GROUP list and for : part of
+ ;; ROUTE-ADDR spec.
+ ;; Can't possibly be more than two :. Nuke any extra.
+ (while colon-pos
+ (setq temp (car colon-pos)
+ colon-pos (cdr colon-pos))
+ (cond ((and <-pos >-pos
+ (> temp <-pos)
+ (< temp >-pos))
+ (if (or route-addr-:-pos
+ (< (length @-pos) 2)
+ (> temp (car @-pos))
+ (< temp (nth 1 @-pos)))
+ (mail-extr-nuke-char-at temp)
+ (setq route-addr-:-pos temp)))
+ ((or (not <-pos)
+ (and <-pos
+ (< temp <-pos)))
+ (setq group-:-pos temp))))
+
+ ;; Nuke any ; that is in or to the left of a < > pair or to the left
+ ;; of a GROUP starting :. Also, there may only be one ;.
+ (while \;-pos
+ (setq temp (car \;-pos)
+ \;-pos (cdr \;-pos))
+ (cond ((and <-pos >-pos
+ (> temp <-pos)
+ (< temp >-pos))
+ (mail-extr-nuke-char-at temp))
+ ((and (or (not group-:-pos)
+ (> temp group-:-pos))
+ (not group-\;-pos))
+ (setq group-\;-pos temp))))
+
+ ;; Nuke unmatched GROUP syntax characters.
+ (when (and group-:-pos (not group-\;-pos))
+ ;; *** Do I really need to erase it?
+ (mail-extr-nuke-char-at group-:-pos)
+ (setq group-:-pos nil))
+ (when (and group-\;-pos (not group-:-pos))
+ ;; *** Do I really need to erase it?
+ (mail-extr-nuke-char-at group-\;-pos)
+ (setq group-\;-pos nil))
+
+ ;; Handle junk like ";@host.company.dom" that sendmail adds.
+ ;; **** should I remember comment positions?
+ (when group-\;-pos
+ ;; this is fine for now
+ (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t)
+ (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t)
+ (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t)
+ (mail-extr-nuke-outside-range comma-pos group-:-pos group-\;-pos t)
+ (and last-real-pos
+ (> last-real-pos (1+ group-\;-pos))
+ (setq last-real-pos (1+ group-\;-pos)))
+ ;; *** This may be wrong:
+ (and cend
+ (> cend group-\;-pos)
+ (setq cend nil
+ cbeg nil))
+ (and quote-end
+ (> quote-end group-\;-pos)
+ (setq quote-end nil
+ quote-beg nil))
+ ;; This was both wrong and unnecessary:
+ ;;(narrow-to-region (point-min) group-\;-pos)
+
+ ;; *** The entire handling of GROUP addresses seems rather lame.
+ ;; *** It deserves a complete rethink, except that these addresses
+ ;; *** are hardly ever seen.
+ )
+
+ ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
+ ;; others.
+ ;; Hell, go ahead and nuke all of the commas.
+ ;; **** This will cause problems when we start handling commas in
+ ;; the PHRASE part .... no it won't ... yes it will ... ?????
+ (mail-extr-nuke-outside-range comma-pos 1 1)
+
+ ;; can only have multiple @s inside < >. The fact that some MTAs
+ ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
+ ;; handled above.
+
+ ;; Locate PHRASE part of ROUTE-ADDR.
+ (when <-pos
+ (goto-char <-pos)
+ (mail-extr-skip-whitespace-backward)
+ (setq phrase-end (point))
+ (goto-char (or ;;group-:-pos
+ (point-min)))
+ (mail-extr-skip-whitespace-forward)
+ (if (< (point) phrase-end)
+ (setq phrase-beg (point))
+ (setq phrase-end nil)))
+
+ ;; handle ROUTE-ADDRS with real ROUTEs.
+ ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
+ ;; any % or ! must be semantically meaningless.
+ ;; TODO: do this processing into canonicalization buffer
+ (when route-addr-:-pos
+ (setq !-pos nil
+ %-pos nil
+ >-pos (copy-marker >-pos)
+ route-addr-:-pos (copy-marker route-addr-:-pos))
+ (goto-char >-pos)
+ (insert-before-markers ?X)
+ (goto-char (car @-pos))
+ (while (setq @-pos (cdr @-pos))
+ (delete-char 1)
+ (setq %-pos (cons (point-marker) %-pos))
+ (insert "%")
+ (goto-char (1- >-pos))
+ (save-excursion
+ (insert-buffer-substring extraction-buffer
+ (car @-pos) route-addr-:-pos)
+ (delete-region (car @-pos) route-addr-:-pos))
+ (or (cdr @-pos)
+ (setq saved-@-pos (list (point)))))
+ (setq @-pos saved-@-pos)
+ (goto-char >-pos)
+ (delete-char -1)
+ (mail-extr-nuke-char-at route-addr-:-pos)
+ (mail-extr-demarkerize route-addr-:-pos)
+ (setq route-addr-:-pos nil
+ >-pos (mail-extr-demarkerize >-pos)
+ %-pos (mapcar 'mail-extr-demarkerize %-pos)))
+
+ ;; de-listify @-pos
+ (setq @-pos (car @-pos))
+
+ ;; TODO: remove comments in the middle of an address
+
+ (with-current-buffer canonicalization-buffer
+ (widen)
+ (erase-buffer)
+ (insert-buffer-substring extraction-buffer)
+
+ (if <-pos
+ (narrow-to-region (progn
+ (goto-char (1+ <-pos))
+ (mail-extr-skip-whitespace-forward)
+ (point))
+ >-pos)
+ (if (and first-real-pos last-real-pos)
+ (narrow-to-region first-real-pos last-real-pos)
+ ;; ****** Oh no! What if the address is completely empty!
+ ;; *** Is this correct?
+ (narrow-to-region (point-max) (point-max))))
+
+ (and @-pos %-pos
+ (mail-extr-nuke-outside-range %-pos (point-min) @-pos))
+ (and %-pos !-pos
+ (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos)))
+ (and @-pos !-pos (not %-pos)
+ (mail-extr-nuke-outside-range !-pos (point-min) @-pos))
+
+ ;; Error condition:?? (and %-pos (not @-pos))
+
+ ;; WARNING: THIS CODE IS DUPLICATED BELOW.
+ (when (and %-pos (not @-pos))
+ (goto-char (car %-pos))
+ (delete-char 1)
+ (setq @-pos (point))
+ (insert "@")
+ (setq %-pos (cdr %-pos)))
+
+ (when (and mail-extr-mangle-uucp !-pos)
+ ;; **** I don't understand this save-restriction and the
+ ;; narrow-to-region inside it. Why did I do that?
+ (save-restriction
+ (cond ((and @-pos
+ mail-extr-@-binds-tighter-than-!)
+ (goto-char @-pos)
+ (setq %-pos (cons (point) %-pos)
+ @-pos nil)
+ (delete-char 1)
+ (insert "%")
+ (setq insert-point (point-max)))
+ (mail-extr-@-binds-tighter-than-!
+ (setq insert-point (point-max)))
+ (%-pos
+ (setq insert-point (car (last %-pos))
+ saved-%-pos (mapcar 'mail-extr-markerize %-pos)
+ %-pos nil
+ @-pos (mail-extr-markerize @-pos)))
+ (@-pos
+ (setq insert-point @-pos)
+ (setq @-pos (mail-extr-markerize @-pos)))
+ (t
+ (setq insert-point (point-max))))
+ (narrow-to-region (point-min) insert-point)
+ (setq saved-!-pos (car !-pos))
+ (while !-pos
+ (goto-char (point-max))
+ (cond ((and (not @-pos)
+ (not (cdr !-pos)))
+ (setq @-pos (point))
+ (insert-before-markers "@ "))
+ (t
+ (setq %-pos (cons (point) %-pos))
+ (insert-before-markers "% ")))
+ (backward-char 1)
+ (insert-buffer-substring
+ (current-buffer)
+ (if (nth 1 !-pos)
+ (1+ (nth 1 !-pos))
+ (point-min))
+ (car !-pos))
+ (delete-char 1)
+ (or (save-excursion
+ (mail-extr-safe-move-sexp -1)
+ (mail-extr-skip-whitespace-backward)
+ (eq ?. (preceding-char)))
+ (insert-before-markers
+ (if (save-excursion
+ (mail-extr-skip-whitespace-backward)
+ (eq ?. (preceding-char)))
+ ""
+ ".")
+ "uucp"))
+ (setq !-pos (cdr !-pos))))
+ (and saved-%-pos
+ (setq %-pos (append (mapcar 'mail-extr-demarkerize
+ saved-%-pos)
+ %-pos)))
+ (setq @-pos (mail-extr-demarkerize @-pos))
+ (narrow-to-region (1+ saved-!-pos) (point-max)))
+
+ ;; WARNING: THIS CODE IS DUPLICATED ABOVE.
+ (when (and %-pos (not @-pos))
+ (goto-char (car %-pos))
+ (delete-char 1)
+ (setq @-pos (point))
+ (insert "@")
+ (setq %-pos (cdr %-pos)))
+
+ (when (setq %-pos (nreverse %-pos)) ; implies @-pos valid
+ (setq temp %-pos)
+ (catch 'truncated
+ (while temp
+ (goto-char (or (nth 1 temp)
+ @-pos))
+ (mail-extr-skip-whitespace-backward)
+ (save-excursion
+ (mail-extr-safe-move-sexp -1)
+ (setq domain-pos (point))
+ (mail-extr-skip-whitespace-backward)
+ (setq \.-pos (eq ?. (preceding-char))))
+ (when (and \.-pos
+ ;; #### string consing
+ (let ((s (intern-soft
+ (buffer-substring domain-pos (point))
+ mail-extr-all-top-level-domains)))
+ (and s (get s 'domain-name))))
+ (narrow-to-region (point-min) (point))
+ (goto-char (car temp))
+ (delete-char 1)
+ (setq @-pos (point))
+ (setcdr temp nil)
+ (setq %-pos (delq @-pos %-pos))
+ (insert "@")
+ (throw 'truncated t))
+ (setq temp (cdr temp)))))
+ (setq mbox-beg (point-min)
+ mbox-end (if %-pos (car %-pos)
+ (or @-pos
+ (point-max))))
+
+ (when @-pos
+ ;; Make the domain-name part lowercase since it's case
+ ;; insensitive anyway.
+ (downcase-region (1+ @-pos) (point-max))))
+
+ ;; Done canonicalizing address.
+ ;; We are now back in extraction-buffer.
+
+ ;; Decide what part of the address to search to find the full name.
+ (cond (
+ ;; Example: "First M. Last" <fml@foo.bar.dom>
+ (and phrase-beg
+ (eq quote-beg phrase-beg)
+ (<= quote-end phrase-end))
+ (narrow-to-region (1+ quote-beg) (1- quote-end))
+ (mail-extr-undo-backslash-quoting (point-min) (point-max)))
+
+ ;; Example: First Last <fml@foo.bar.dom>
+ (phrase-beg
+ (narrow-to-region phrase-beg phrase-end))
+
+ ;; Example: fml@foo.bar.dom (First M. Last)
+ (cbeg
+ (narrow-to-region (1+ cbeg) (1- cend))
+ (mail-extr-undo-backslash-quoting (point-min) (point-max))
+
+ ;; Deal with spacing problems
+ (goto-char (point-min))
+;;; (cond ((not (search-forward " " nil t))
+;;; (goto-char (point-min))
+;;; (cond ((search-forward "_" nil t)
+;;; ;; Handle the *idiotic* use of underlines as spaces.
+;;; ;; Example: fml@foo.bar.dom (First_M._Last)
+;;; (goto-char (point-min))
+;;; (while (search-forward "_" nil t)
+;;; (replace-match " " t)))
+;;; ((search-forward "." nil t)
+;;; ;; Fix . used as space
+;;; ;; Example: danj1@cb.att.com (daniel.jacobson)
+;;; (goto-char (point-min))
+;;; (while (re-search-forward mail-extr-bad-dot-pattern nil t)
+;;; (replace-match "\\1 \\2" t))))))
+ )
+
+ ;; Otherwise we try to get the name from the mailbox portion
+ ;; of the address.
+ ;; Example: First_M_Last@foo.bar.dom
+ (t
+ ;; *** Work in canon buffer instead? No, can't. Hmm.
(goto-char (point-max))
- (cond ((and (not @-pos)
- (not (cdr !-pos)))
- (setq @-pos (point))
- (insert-before-markers "@ "))
- (t
- (setq %-pos (cons (point) %-pos))
- (insert-before-markers "% ")))
- (backward-char 1)
- (insert-buffer-substring
- (current-buffer)
- (if (nth 1 !-pos)
- (1+ (nth 1 !-pos))
- (point-min))
- (car !-pos))
- (delete-char 1)
- (or (save-excursion
- (safe-move-sexp -1)
- (skip-chars-backward mail-whitespace)
- (eq ?. (preceding-char)))
- (insert-before-markers
- (if (save-excursion
- (skip-chars-backward mail-whitespace)
- (eq ?. (preceding-char)))
- ""
- ".")
- "uucp"))
- (setq !-pos (cdr !-pos))))
- (and saved-%-pos
- (setq %-pos (append (mapcar 'mail-demarkerize saved-%-pos)
- %-pos)))
- (setq @-pos (mail-demarkerize @-pos))
- (narrow-to-region (1+ saved-!-pos) (point-max))))
- (cond ((and %-pos
- (not @-pos))
- (goto-char (car %-pos))
- (delete-char 1)
- (setq @-pos (point))
- (insert "@")
- (setq %-pos (cdr %-pos))))
- (setq %-pos (nreverse %-pos))
- ;; RFC 1034 doesn't approve of this, oh well:
- (downcase-region (or (car %-pos) @-pos (point-max)) (point-max))
- (cond (%-pos ; implies @-pos valid
- (setq temp %-pos)
- (catch 'truncated
- (while temp
- (goto-char (or (nth 1 temp)
- @-pos))
- (skip-chars-backward mail-whitespace)
- (save-excursion
- (safe-move-sexp -1)
- (setq domain-pos (point))
- (skip-chars-backward mail-whitespace)
- (setq \.-pos (eq ?. (preceding-char))))
- (cond ((and \.-pos
- (get
- (intern
- (buffer-substring domain-pos (point)))
- 'domain-name))
- (narrow-to-region (point-min) (point))
- (goto-char (car temp))
- (delete-char 1)
- (setq @-pos (point))
- (setcdr temp nil)
- (setq %-pos (delq @-pos %-pos))
- (insert "@")
- (throw 'truncated t)))
- (setq temp (cdr temp))))))
- (setq mbox-beg (point-min)
- mbox-end (if %-pos (car %-pos)
- (or @-pos
- (point-max))))
-
- ;; Done canonicalizing address.
-
- (set-buffer extraction-buffer)
-
- ;; Find the full name
-
- (cond ((and phrase-beg
- (eq quote-beg phrase-beg)
- (<= quote-end phrase-end))
- (narrow-to-region (1+ quote-beg) (1- quote-end))
- (undo-backslash-quoting (point-min) (point-max)))
- (phrase-beg
- (narrow-to-region phrase-beg phrase-end))
- (comment-beg
- (narrow-to-region (1+ comment-beg) (1- comment-end))
- (undo-backslash-quoting (point-min) (point-max)))
- (t
- ;; *** Work in canon buffer instead? No, can't. Hmm.
- (delete-region (point-min) (point-max))
- (insert-buffer-substring canonicalization-buffer
- mbox-beg mbox-end)
- (goto-char (point-min))
- (setq \.-ends-name (search-forward "_" nil t))
- (goto-char (point-min))
- (while (progn
- (skip-chars-forward mail-whitespace)
- (not (eobp)))
- (setq char (char-after (point)))
- (cond
- ((eq char ?\")
- (setq quote-beg (point))
- (or (safe-move-sexp 1)
- ;; TODO: handle this error condition!!!!!
- (forward-char 1))
- ;; take into account deletions
- (setq quote-end (- (point) 2))
- (save-excursion
- (backward-char 1)
- (delete-char 1)
- (goto-char quote-beg)
- (delete-char 1))
- (undo-backslash-quoting quote-beg quote-end)
- (or (eq mail-space-char (char-after (point)))
- (insert " "))
- (setq \.-ends-name t))
- ((eq char ?.)
- (if (eq (char-after (1+ (point))) ?_)
- (progn
- (forward-char 1)
+ (narrow-to-region (point) (point))
+ (insert-buffer-substring canonicalization-buffer
+ mbox-beg mbox-end)
+ (goto-char (point-min))
+
+ ;; Example: First_Last.XXX@foo.bar.dom
+ (setq \.-ends-name (re-search-forward "[_0-9]" nil t))
+
+ (goto-char (point-min))
+
+ (if (not mail-extr-mangle-uucp)
+ (modify-syntax-entry ?! "w" (syntax-table)))
+
+ (while (progn
+ (mail-extr-skip-whitespace-forward)
+ (not (eobp)))
+ (setq char (char-after (point)))
+ (cond
+ ((eq char ?\")
+ (setq quote-beg (point))
+ (or (mail-extr-safe-move-sexp 1)
+ ;; TODO: handle this error condition!!!!!
+ (forward-char 1))
+ ;; take into account deletions
+ (setq quote-end (- (point) 2))
+ (save-excursion
+ (backward-char 1)
(delete-char 1)
- (insert mail-space-char))
- (if \.-ends-name
- (narrow-to-region (point-min) (point))
+ (goto-char quote-beg)
+ (or (eobp)
+ (delete-char 1)))
+ (mail-extr-undo-backslash-quoting quote-beg quote-end)
+ (or (eq ?\ (char-after (point)))
+ (insert " "))
+ ;; (setq mailbox-name-processed-flag t)
+ (setq \.-ends-name t))
+ ((eq char ?.)
+ (if (memq (char-after (1+ (point))) '(?_ ?=))
+ (progn
+ (forward-char 1)
+ (delete-char 1)
+ (insert ?\ ))
+ (if \.-ends-name
+ (narrow-to-region (point-min) (point))
+ (delete-char 1)
+ (insert " ")))
+ ;; (setq mailbox-name-processed-flag t)
+ )
+ ((memq (char-syntax char) '(?. ?\\))
(delete-char 1)
- (insert " "))))
- ((memq (char-syntax char) '(?. ?\\))
- (delete-char 1)
- (insert " "))
- (t
- (setq atom-beg (point))
- (forward-word 1)
- (setq atom-end (point))
- (save-restriction
- (narrow-to-region atom-beg atom-end)
- (goto-char (point-min))
- (while (re-search-forward "\\([^_]+\\)_" nil t)
- (replace-match "\\1 "))
- (goto-char (point-max))))))))
-
- (set-syntax-table address-text-syntax-table)
-
- (setq xxx (variant-method (buffer-string)))
- (delete-region (point-min) (point-max))
- (insert xxx)
- (goto-char (point-min))
-
-;; ;; Compress whitespace
-;; (goto-char (point-min))
-;; (while (re-search-forward "[ \t\n]+" nil t)
-;; (replace-match " "))
-;;
-;; ;; Fix . used as space
-;; (goto-char (point-min))
-;; (while (re-search-forward mail-bad-\.-pattern nil t)
-;; (replace-match "\\1 \\2"))
-;;
-;; ;; Delete trailing parenthesized comment
-;; (goto-char (point-max))
-;; (skip-chars-backward mail-whitespace)
-;; (cond ((memq (char-after (1- (point))) '(?\) ?\} ?\]))
-;; (setq comment-end (point))
-;; (set-syntax-table address-text-comment-syntax-table)
-;; (or (safe-move-sexp -1)
-;; (backward-char 1))
-;; (set-syntax-table address-text-syntax-table)
-;; (setq comment-beg (point))
-;; (skip-chars-backward mail-whitespace)
-;; (if (bobp)
-;; (narrow-to-region (1+ comment-beg) (1- comment-end))
-;; (narrow-to-region (point-min) (point)))))
-;;
-;; ;; Find, save, and delete any name suffix
-;; ;; *** Broken!
-;; (goto-char (point-min))
-;; (cond ((re-search-forward mail-full-name-suffix-pattern nil t)
-;; (setq name-suffix (buffer-substring (match-beginning 3)
-;; (match-end 3)))
-;; (replace-match "\\1 \\4")))
-;;
-;; ;; Delete ALL CAPS words and after, if preceded by mixed-case or
-;; ;; lowercase words. Eg. XT-DEM.
-;; (goto-char (point-min))
-;; ;; ## This will lose on something like "SMITH MAX".
-;; ;; ## maybe it should be
-;; ;; ## " \\([A-Z]+[-_/][A-Z]+\\|[A-Z][A-Z][A-Z]\\)\\b.*[^A-Z \t]"
-;; ;; ## that is, three-letter-upper-case-word with non-upper-case
-;; ;; ## characters following it.
-;; (if (re-search-forward mail-mixed-case-name-pattern nil t)
-;; (if (re-search-forward mail-weird-acronym-pattern nil t)
-;; (narrow-to-region (point-min) (match-beginning 0))))
-;;
-;; ;; Delete trailing alternative address
-;; (goto-char (point-min))
-;; (if (re-search-forward mail-alternative-address-pattern nil t)
-;; (narrow-to-region (point-min) (match-beginning 0)))
-;;
-;; ;; Delete trailing comment
-;; (goto-char (point-min))
-;; (if (re-search-forward mail-trailing-comment-start-pattern nil t)
-;; (or (progn
-;; (goto-char (match-beginning 0))
-;; (skip-chars-backward mail-whitespace)
-;; (bobp))
-;; (narrow-to-region (point-min) (match-beginning 0))))
-;;
-;; ;; Delete trailing comma-separated comment
-;; (goto-char (point-min))
-;; ;; ## doesn't this break "Smith, John"? Yes.
-;; (re-search-forward mail-last-name-first-pattern nil t)
-;; (while (search-forward "," nil t)
-;; (or (save-excursion
-;; (backward-char 2)
-;; (looking-at mail-full-name-suffix-pattern))
-;; (narrow-to-region (point-min) (1- (point)))))
-;;
-;; ;; Delete telephone numbers and ham radio call signs
-;; (goto-char (point-min))
-;; (if (re-search-forward mail-telephone-extension-pattern nil t)
-;; (narrow-to-region (point-min) (match-beginning 0)))
-;; (goto-char (point-min))
-;; (if (re-search-forward mail-ham-call-sign-pattern nil t)
-;; (if (eq (match-beginning 0) (point-min))
-;; (narrow-to-region (match-end 0) (point-max))
-;; (narrow-to-region (point-min) (match-beginning 0))))
-;;
-;; ;; Delete trailing word followed immediately by .
-;; (goto-char (point-min))
-;; ;; ## what's this for? doesn't it mess up "Public, Harry Q."? No.
-;; (if (re-search-forward "\\b[A-Za-z][A-Za-z]+\\. *\\'" nil t)
-;; (narrow-to-region (point-min) (match-beginning 0)))
-;;
-;; ;; Handle & substitution
-;; ;; TODO: remember to disable middle initial guessing
-;; (goto-char (point-min))
-;; (cond ((re-search-forward "\\( \\|\\`\\)&\\( \\|\\'\\)" nil t)
-;; (goto-char (match-end 1))
-;; (delete-char 1)
-;; (capitalize-region
-;; (point)
-;; (progn
-;; (insert-buffer-substring canonicalization-buffer
-;; mbox-beg mbox-end)
-;; (point)))))
-;;
-;; ;; Delete nickname
-;; (goto-char (point-min))
-;; (if (re-search-forward mail-nickname-pattern nil t)
-;; (replace-match (if (eq (match-beginning 2) (1- (match-end 2)))
-;; " \\2 "
-;; " ")))
-;;
-;; ;; Fixup initials
-;; (while (progn
-;; (goto-char (point-min))
-;; (re-search-forward mail-bad-initials-pattern nil t))
-;; (replace-match
-;; (if (match-beginning 4)
-;; "\\1. \\4"
-;; (if (match-beginning 5)
-;; "\\1. \\5"
-;; "\\1. "))))
-;;
-;; ;; Delete title
-;; (goto-char (point-min))
-;; (if (re-search-forward mail-full-name-prefixes nil t)
-;; (narrow-to-region (point) (point-max)))
-;;
-;; ;; Delete trailing and preceding non-name characters
-;; (goto-char (point-min))
-;; (skip-chars-forward mail-non-begin-name-chars)
-;; (narrow-to-region (point) (point-max))
-;; (goto-char (point-max))
-;; (skip-chars-backward mail-non-end-name-chars)
-;; (narrow-to-region (point-min) (point))
-
- ;; If name is "First Last" and userid is "F?L", then assume
- ;; the middle initial is the second letter in the userid.
- ;; initially by Jamie Zawinski <jwz@lucid.com>
- (cond ((and (eq 3 (- mbox-end mbox-beg))
- (progn
- (goto-char (point-min))
- (looking-at mail-two-name-pattern)))
- (setq fi (char-after (match-beginning 0))
- li (char-after (match-beginning 3)))
- (save-excursion
- (set-buffer canonicalization-buffer)
- ;; char-equal is ignoring case here, so no need to upcase
- ;; or downcase.
- (let ((case-fold-search t))
- (and (char-equal fi (char-after mbox-beg))
- (char-equal li (char-after (1- mbox-end)))
- (setq mi (char-after (1+ mbox-beg))))))
- (cond ((and mi
- ;; TODO: use better table than syntax table
- (eq ?w (char-syntax mi)))
- (goto-char (match-beginning 3))
- (insert (upcase mi) ". ")))))
-
-;; ;; Restore suffix
-;; (cond (name-suffix
-;; (goto-char (point-max))
-;; (insert ", " name-suffix)
-;; (backward-word 1)
-;; (cond ((memq (following-char) '(?j ?J ?s ?S))
-;; (capitalize-word 1)
-;; (or (eq (following-char) ?.)
-;; (insert ?.)))
-;; (t
-;; (upcase-word 1)))))
-
- ;; Result
- (list (buffer-string)
- (progn
- (set-buffer canonicalization-buffer)
- (buffer-string)))
- )))
-
-;; TODO: put this back in the above function now that it's proven:
-(defun variant-method (string)
- (let ((variant-buffer (get-buffer-create "*variant method buffer*"))
- (word-count 0)
- mixed-case-flag lower-case-flag upper-case-flag
+ (insert " ")
+ ;; (setq mailbox-name-processed-flag t)
+ )
+ (t
+ (setq atom-beg (point))
+ (forward-word 1)
+ (setq atom-end (point))
+ (goto-char atom-beg)
+ (save-restriction
+ (narrow-to-region atom-beg atom-end)
+ (cond
+
+ ;; Handle X.400 addresses encoded in RFC-822.
+ ;; *** Shit! This has to handle the case where it is
+ ;; *** embedded in a quote too!
+ ;; *** Shit! The input is being broken up into atoms
+ ;; *** by periods!
+ ((looking-at mail-extr-x400-encoded-address-pattern)
+
+ ;; Copy the contents of the individual fields that
+ ;; might hold name data to the beginning.
+ (mapc
+ (lambda (field-pattern)
+ (when
+ (save-excursion
+ (re-search-forward field-pattern nil t))
+ (insert-buffer-substring (current-buffer)
+ (match-beginning 1)
+ (match-end 1))
+ (insert " ")))
+ (list mail-extr-x400-encoded-address-given-name-pattern
+ mail-extr-x400-encoded-address-surname-pattern
+ mail-extr-x400-encoded-address-full-name-pattern))
+
+ ;; Discard the rest, since it contains stuff like
+ ;; routing information, not part of a name.
+ (mail-extr-skip-whitespace-backward)
+ (delete-region (point) (point-max))
+
+ ;; Handle periods used for spacing.
+ (while (re-search-forward mail-extr-bad-dot-pattern nil t)
+ (replace-match "\\1 \\2" t))
+
+ ;; (setq mailbox-name-processed-flag t)
+ )
+
+ ;; Handle normal addresses.
+ (t
+ (goto-char (point-min))
+ ;; Handle _ and = used for spacing.
+ (while (re-search-forward "\\([^_=]+\\)[_=]" nil t)
+ (replace-match "\\1 " t)
+ ;; (setq mailbox-name-processed-flag t)
+ )
+ (goto-char (point-max))))))))
+
+ ;; undo the dirty deed
+ (if (not mail-extr-mangle-uucp)
+ (modify-syntax-entry ?! "." (syntax-table)))
+ ;;
+ ;; If we derived the name from the mailbox part of the address,
+ ;; and we only got one word out of it, don't treat that as a
+ ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar")
+ ;; (if (not mailbox-name-processed-flag)
+ ;; (delete-region (point-min) (point-max)))
+ ))
+
+ (set-syntax-table mail-extr-address-text-syntax-table)
+
+ (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer)
+ (goto-char (point-min))
+
+ ;; If name is "First Last" and userid is "F?L", then assume
+ ;; the middle initial is the second letter in the userid.
+ ;; Initial code by Jamie Zawinski <jwz@lucid.com>
+ ;; *** Make it work when there's a suffix as well.
+ (goto-char (point-min))
+ (when (and mail-extr-guess-middle-initial
+ (not disable-initial-guessing-flag)
+ (eq 3 (- mbox-end mbox-beg))
+ (progn
+ (goto-char (point-min))
+ (looking-at mail-extr-two-name-pattern)))
+ (setq fi (char-after (match-beginning 0))
+ li (char-after (match-beginning 3)))
+ (with-current-buffer canonicalization-buffer
+ ;; char-equal is ignoring case here, so no need to upcase
+ ;; or downcase.
+ (let ((case-fold-search t))
+ (and (char-equal fi (char-after mbox-beg))
+ (char-equal li (char-after (1- mbox-end)))
+ (setq mi (char-after (1+ mbox-beg))))))
+ (when (and mi
+ ;; TODO: use better table than syntax table
+ (eq ?w (char-syntax mi)))
+ (goto-char (match-beginning 3))
+ (insert (upcase mi) ". ")))
+
+ ;; Nuke name if it is the same as mailbox name.
+ (let ((buffer-length (- (point-max) (point-min)))
+ (i 0)
+ (names-match-flag t))
+ (when (and (> buffer-length 0)
+ (eq buffer-length (- mbox-end mbox-beg)))
+ (goto-char (point-max))
+ (insert-buffer-substring canonicalization-buffer
+ mbox-beg mbox-end)
+ (while (and names-match-flag
+ (< i buffer-length))
+ (or (eq (downcase (char-after (+ i (point-min))))
+ (downcase
+ (char-after (+ i buffer-length (point-min)))))
+ (setq names-match-flag nil))
+ (setq i (1+ i)))
+ (delete-region (+ (point-min) buffer-length) (point-max))
+ (if names-match-flag
+ (narrow-to-region (point) (point)))))
+
+ ;; Nuke name if it's just one word.
+ (goto-char (point-min))
+ (and mail-extr-ignore-single-names
+ (not (re-search-forward "[- ]" nil t))
+ (narrow-to-region (point) (point)))
+
+ ;; Record the result
+ (setq value-list
+ (cons (list (if (not (= (point-min) (point-max)))
+ (buffer-string))
+ (with-current-buffer canonicalization-buffer
+ (if (not (= (point-min) (point-max)))
+ (buffer-string))))
+ value-list))
+
+ ;; Unless one address is all we wanted,
+ ;; delete this one from extraction-buffer
+ ;; and get ready to extract the next address.
+ (when all
+ (if end-of-address
+ (narrow-to-region 1 end-of-address)
+ (widen))
+ (delete-region (point-min) (point-max))
+ (widen))
+ )))
+ (if all (nreverse value-list) (car value-list))
+ ))
+
+(defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
+ (let ((word-count 0)
+ (case-fold-search nil)
+ mixed-case-flag lower-case-flag ;;upper-case-flag
suffix-flag last-name-comma-flag
- comment-beg comment-end initial beg end
+ ;;cbeg cend
+ initial
+ begin-again-flag
+ drop-this-word-if-trailing-flag
+ drop-last-word-if-trailing-flag
+ word-found-flag
+ this-word-beg last-word-beg
+ name-beg name-end
+ name-done-flag
)
(save-excursion
- (set-buffer variant-buffer)
- (buffer-disable-undo variant-buffer)
- (set-syntax-table address-text-syntax-table)
- (widen)
- (erase-buffer)
- (setq case-fold-search nil)
-
- (insert string)
-
+ (set-syntax-table mail-extr-address-text-syntax-table)
+
+ ;; Get rid of comments.
+ (goto-char (point-min))
+ (while (not (eobp))
+ ;; Initialize for this iteration of the loop.
+ (skip-chars-forward "^({[\"'`")
+ (let ((cbeg (point)))
+ (set-syntax-table mail-extr-address-text-comment-syntax-table)
+ (if (memq (following-char) '(?\' ?\`))
+ (search-forward "'" nil 'move
+ (if (eq ?\' (following-char)) 2 1))
+ (or (mail-extr-safe-move-sexp 1)
+ (goto-char (point-max))))
+ (set-syntax-table mail-extr-address-text-syntax-table)
+ (when (eq (char-after cbeg) ?\()
+ ;; Delete the comment itself.
+ (delete-region cbeg (point))
+ ;; Canonicalize whitespace where the comment was.
+ (skip-chars-backward " \t")
+ (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)")
+ (replace-match "")
+ (setq cbeg (point))
+ (skip-chars-forward " \t")
+ (if (bobp)
+ (delete-region (point) cbeg)
+ (just-one-space))))))
+
+ ;; This was moved above.
;; Fix . used as space
+ ;; But it belongs here because it occurs not only as
+ ;; rypens@reks.uia.ac.be (Piet.Rypens)
+ ;; but also as
+ ;; "Piet.Rypens" <rypens@reks.uia.ac.be>
+ ;;(goto-char (point-min))
+ ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t)
+ ;; (replace-match "\\1 \\2" t))
+
+ (unless (search-forward " " nil t)
+ (goto-char (point-min))
+ (cond ((search-forward "_" nil t)
+ ;; Handle the *idiotic* use of underlines as spaces.
+ ;; Example: fml@foo.bar.dom (First_M._Last)
+ (goto-char (point-min))
+ (while (search-forward "_" nil t)
+ (replace-match " " t)))
+ ((search-forward "." nil t)
+ ;; Fix . used as space
+ ;; Example: danj1@cb.att.com (daniel.jacobson)
+ (goto-char (point-min))
+ (while (re-search-forward mail-extr-bad-dot-pattern nil t)
+ (replace-match "\\1 \\2" t)))))
+
+ ;; Loop over the words (and other junk) in the name.
(goto-char (point-min))
- (while (re-search-forward mail-bad-\.-pattern nil t)
- (replace-match "\\1 \\2"))
+ (while (not name-done-flag)
+
+ (when word-found-flag
+ ;; Last time through this loop we skipped over a word.
+ (setq last-word-beg this-word-beg)
+ (setq drop-last-word-if-trailing-flag
+ drop-this-word-if-trailing-flag)
+ (setq word-found-flag nil))
+
+ (when begin-again-flag
+ ;; Last time through the loop we found something that
+ ;; indicates we should pretend we are beginning again from
+ ;; the start.
+ (setq word-count 0)
+ (setq last-word-beg nil)
+ (setq drop-last-word-if-trailing-flag nil)
+ (setq mixed-case-flag nil)
+ (setq lower-case-flag nil)
+ ;; (setq upper-case-flag nil)
+ (setq begin-again-flag nil))
+
+ ;; Initialize for this iteration of the loop.
+ (mail-extr-skip-whitespace-forward)
+ (if (eq word-count 0) (narrow-to-region (point) (point-max)))
+ (setq this-word-beg (point))
+ (setq drop-this-word-if-trailing-flag nil)
+
+ ;; Decide what to do based on what we are looking at.
+ (cond
- ;; Skip any initial garbage.
- (goto-char (point-min))
- (skip-chars-forward mail-non-begin-name-chars)
- (skip-chars-backward "& \"")
- (narrow-to-region (point) (point-max))
-
- (catch 'stop
- (while t
- (skip-chars-forward mail-whitespace)
-
+ ;; Delete title
+ ((and (eq word-count 0)
+ (looking-at mail-extr-full-name-prefixes))
+ (goto-char (match-end 0))
+ (narrow-to-region (point) (point-max)))
+
+ ;; Stop after name suffix
+ ((and (>= word-count 2)
+ (looking-at mail-extr-full-name-suffix-pattern))
+ (mail-extr-skip-whitespace-backward)
+ (setq suffix-flag (point))
+ (if (eq ?, (following-char))
+ (forward-char 1)
+ (insert ?,))
+ ;; Enforce at least one space after comma
+ (or (eq ?\ (following-char))
+ (insert ?\ ))
+ (mail-extr-skip-whitespace-forward)
+ (cond ((memq (following-char) '(?j ?J ?s ?S))
+ (capitalize-word 1)
+ (if (eq (following-char) ?.)
+ (forward-char 1)
+ (insert ?.)))
+ (t
+ (upcase-word 1)))
+ (setq word-found-flag t)
+ (setq name-done-flag t))
+
+ ;; Handle SCA names
+ ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
+ (goto-char (match-beginning 1))
+ (narrow-to-region (point) (point-max))
+ (setq begin-again-flag t))
+
+ ;; Check for initial last name followed by comma
+ ((and (eq ?, (following-char))
+ (eq word-count 1))
+ (forward-char 1)
+ (setq last-name-comma-flag t)
+ (or (eq ?\ (following-char))
+ (insert ?\ )))
+
+ ;; Stop before trailing comma-separated comment
+ ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
+ ;; *** This case is redundant???
+ ;;((eq ?, (following-char))
+ ;; (setq name-done-flag t))
+
+ ;; Delete parenthesized/quoted comment/nickname
+ ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
+ (setq cbeg (point))
+ (set-syntax-table mail-extr-address-text-comment-syntax-table)
+ (cond ((memq (following-char) '(?\' ?\`))
+ (or (search-forward "'" nil t
+ (if (eq ?\' (following-char)) 2 1))
+ (delete-char 1)))
+ (t
+ (or (mail-extr-safe-move-sexp 1)
+ (goto-char (point-max)))))
+ (set-syntax-table mail-extr-address-text-syntax-table)
+ (setq cend (point))
(cond
-
- ;; Delete title
+ ;; Handle case of entire name being quoted
((and (eq word-count 0)
- (looking-at mail-full-name-prefixes))
- (goto-char (match-end 0))
- (narrow-to-region (point) (point-max)))
-
- ;; Stop after name suffix
- ((and (>= word-count 2)
- (looking-at mail-full-name-suffix-pattern))
- (skip-chars-backward mail-whitespace)
- (setq suffix-flag (point))
- (if (eq ?, (following-char))
- (forward-char 1)
- (insert ?,))
- ;; Enforce at least one space after comma
- (or (eq mail-space-char (following-char))
- (insert mail-space-char))
- (skip-chars-forward mail-whitespace)
- (cond ((memq (following-char) '(?j ?J ?s ?S))
- (capitalize-word 1)
- (if (eq (following-char) ?.)
- (forward-char 1)
- (insert ?.)))
- (t
- (upcase-word 1)))
- (setq word-count (1+ word-count))
- (throw 'stop t))
-
- ;; Handle SCA names
- ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
- (setq word-count 0)
- (goto-char (match-beginning 1))
- (narrow-to-region (point) (point-max)))
-
- ;; Various stopping points
- ((or
- ;; Stop before ALL CAPS acronyms, if preceded by mixed-case or
- ;; lowercase words. Eg. XT-DEM.
- (and (>= word-count 2)
- (or mixed-case-flag lower-case-flag)
- (looking-at mail-weird-acronym-pattern)
- (not (looking-at mail-roman-numeral-pattern)))
- ;; Stop before 4-or-more letter lowercase words preceded by
- ;; mixed case or uppercase words.
- (and (>= word-count 2)
- (or upper-case-flag mixed-case-flag)
- (looking-at "[a-z][a-z][a-z][a-z]+\\b"))
- ;; Stop before trailing alternative address
- (looking-at mail-alternative-address-pattern)
- ;; Stop before trailing comment not introduced by comma
- (looking-at mail-trailing-comment-start-pattern)
- ;; Stop before telephone numbers
- (looking-at mail-telephone-extension-pattern))
- (throw 'stop t))
-
- ;; Check for initial last name followed by comma
- ((and (eq ?, (following-char))
- (eq word-count 1))
- (forward-char 1)
- (setq last-name-comma-flag t)
- (or (eq mail-space-char (following-char))
- (insert mail-space-char)))
-
- ;; Stop before trailing comma-separated comment
- ((eq ?, (following-char))
- (throw 'stop t))
-
- ;; Delete parenthesized/quoted comment/nickname
- ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
- (setq comment-beg (point))
- (set-syntax-table address-text-comment-syntax-table)
- (cond ((memq (following-char) '(?\' ?\`))
- (if (eq ?\' (following-char))
- (forward-char 1))
- (or (search-forward "'" nil t)
- (delete-char 1)))
- (t
- (or (safe-move-sexp 1)
- (goto-char (point-max)))))
- (set-syntax-table address-text-syntax-table)
- (setq comment-end (point))
- (cond
- ;; Handle case of entire name being quoted
- ((and (eq word-count 0)
- (looking-at " *\\'")
- (>= (- comment-end comment-beg) 2))
- (narrow-to-region (1+ comment-beg) (1- comment-end))
- (goto-char (point-min)))
- (t
- ;; Handle case of quoted initial
- (if (and (or (= 3 (- comment-end comment-beg))
- (and (= 4 (- comment-end comment-beg))
- (eq ?. (char-after (+ 2 comment-beg)))))
- (not (looking-at " *\\'")))
- (setq initial (char-after (1+ comment-beg)))
- (setq initial nil))
- (delete-region comment-beg comment-end)
- (if initial
- (insert initial ". ")))))
-
- ;; Delete ham radio call signs
- ((looking-at mail-ham-call-sign-pattern)
- (delete-region (match-beginning 0) (match-end 0)))
-
- ;; Handle & substitution
- ;; TODO: remember to disable middle initial guessing
- ((and (or (bobp)
- (eq mail-space-char (preceding-char)))
- (looking-at "&\\( \\|\\'\\)"))
- (delete-char 1)
- (capitalize-region
- (point)
- (progn
- (insert-buffer-substring canonicalization-buffer
- mbox-beg mbox-end)
- (point))))
-
- ;; Fixup initials
- ((looking-at mail-initial-pattern)
- (or (eq (following-char) (upcase (following-char)))
+ (looking-at " *\\'")
+ (>= (- cend cbeg) 2))
+ (narrow-to-region (1+ cbeg) (1- cend))
+ (goto-char (point-min)))
+ (t
+ ;; Handle case of quoted initial
+ (if (and (or (= 3 (- cend cbeg))
+ (and (= 4 (- cend cbeg))
+ (eq ?. (char-after (+ 2 cbeg)))))
+ (not (looking-at " *\\'")))
+ (setq initial (char-after (1+ cbeg)))
+ (setq initial nil))
+ (delete-region cbeg cend)
+ (if initial
+ (insert initial ". ")))))
+
+ ;; Handle *Stupid* VMS date stamps
+ ((looking-at mail-extr-stupid-vms-date-stamp-pattern)
+ (replace-match "" t))
+
+ ;; Handle Chinese characters.
+ ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
+ (goto-char (match-end 0))
+ (setq word-found-flag t))
+
+ ;; Skip initial garbage characters.
+ ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
+ ((and (eq word-count 0)
+ (looking-at mail-extr-leading-garbage))
+ (goto-char (match-end 0))
+ ;; *** Skip backward over these???
+ ;; (skip-chars-backward "& \"")
+ (narrow-to-region (point) (point-max)))
+
+ ;; Various stopping points
+ ((or
+
+ ;; Stop before ALL CAPS acronyms, if preceded by mixed-case
+ ;; words. Example: XT-DEM.
+ (and (>= word-count 2)
+ mixed-case-flag
+ (looking-at mail-extr-weird-acronym-pattern)
+ (not (looking-at mail-extr-roman-numeral-pattern)))
+
+ ;; Stop before trailing alternative address
+ (looking-at mail-extr-alternative-address-pattern)
+
+ ;; Stop before trailing comment not introduced by comma
+ ;; THIS CASE MUST BE AFTER AN EARLIER CASE.
+ (looking-at mail-extr-trailing-comment-start-pattern)
+
+ ;; Stop before telephone numbers
+ (and (>= word-count 1)
+ (looking-at mail-extr-telephone-extension-pattern)))
+ (setq name-done-flag t))
+
+ ;; Delete ham radio call signs
+ ((looking-at mail-extr-ham-call-sign-pattern)
+ (delete-region (match-beginning 0) (match-end 0)))
+
+ ;; Fixup initials
+ ((looking-at mail-extr-initial-pattern)
+ (or (eq (following-char) (upcase (following-char)))
+ (setq lower-case-flag t))
+ (forward-char 1)
+ (if (eq ?. (following-char))
+ (forward-char 1)
+ (insert ?.))
+ (or (eq ?\ (following-char))
+ (insert ?\ ))
+ (setq word-found-flag t))
+
+ ;; Handle BITNET LISTSERV list names.
+ ((and (eq word-count 0)
+ (looking-at mail-extr-listserv-list-name-pattern))
+ (narrow-to-region (match-beginning 1) (match-end 1))
+ (setq word-found-flag t)
+ (setq name-done-flag t))
+
+ ;; Handle & substitution, when & is last and is not first.
+ ((and (> word-count 0)
+ (eq ?\ (preceding-char))
+ (eq (following-char) ?&)
+ (eq (1+ (point)) (point-max)))
+ (delete-char 1)
+ (capitalize-region
+ (point)
+ (progn
+ (insert-buffer-substring canonicalization-buffer
+ mbox-beg mbox-end)
+ (point)))
+ (setq disable-initial-guessing-flag t)
+ (setq word-found-flag t))
+
+ ;; Handle & between names, as in "Bob & Susie".
+ ((and (> word-count 0) (eq (following-char) ?\&))
+ (setq name-beg (point))
+ (setq name-end (1+ name-beg))
+ (setq word-found-flag t)
+ (goto-char name-end))
+
+ ;; Regular name words
+ ((looking-at mail-extr-name-pattern)
+ (setq name-beg (point))
+ (setq name-end (match-end 0))
+
+ ;; Certain words will be dropped if they are at the end.
+ (and (>= word-count 2)
+ (not lower-case-flag)
+ (or
+ ;; A trailing 4-or-more letter lowercase words preceded by
+ ;; mixed case or uppercase words will be dropped.
+ (looking-at "[a-z][a-z][a-z][a-z]+[ \t]*\\'")
+ ;; Drop a trailing word which is terminated with a period.
+ (eq ?. (char-after (1- name-end))))
+ (setq drop-this-word-if-trailing-flag t))
+
+ ;; Set the flags that indicate whether we have seen a lowercase
+ ;; word, a mixed case word, and an uppercase word.
+ (if (re-search-forward "[a-z]" name-end t)
+ (if (progn
+ (goto-char name-beg)
+ (re-search-forward "[A-Z]" name-end t))
+ (setq mixed-case-flag t)
(setq lower-case-flag t))
- (forward-char 1)
- (if (eq ?. (following-char))
- (forward-char 1)
- (insert ?.))
- (or (eq mail-space-char (following-char))
- (insert mail-space-char))
- (setq word-count (1+ word-count)))
-
- ;; Regular name words
- ((looking-at mail-name-pattern)
- (setq beg (point))
- (setq end (match-end 0))
- (set (if (re-search-forward "[a-z]" end t)
- (if (progn
- (goto-char beg)
- (re-search-forward "[A-Z]" end t))
- 'mixed-case-flag
- 'lower-case-flag)
- 'upper-case-flag) t)
- (goto-char end)
- (setq word-count (1+ word-count)))
+;; (setq upper-case-flag t)
+ )
+
+ (goto-char name-end)
+ (setq word-found-flag t))
+
+ ;; Allow a number as a word, if it doesn't mean anything else.
+ ((looking-at "[0-9]+\\>")
+ (setq name-beg (point))
+ (setq name-end (match-end 0))
+ (goto-char name-end)
+ (setq word-found-flag t))
+
+ (t
+ (setq name-done-flag t)
+ ))
+
+ ;; Count any word that we skipped over.
+ (if word-found-flag
+ (setq word-count (1+ word-count))))
+
+ ;; If the last thing in the name is 2 or more periods, or one or more
+ ;; other sentence terminators (but not a single period) then keep them
+ ;; and the preceding word. This is for the benefit of whole sentences
+ ;; in the name field: it's better behavior than dropping the last word
+ ;; of the sentence...
+ (if (and (not suffix-flag)
+ (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
+ (goto-char (setq suffix-flag (point-max))))
+
+ ;; Drop everything after point and certain trailing words.
+ (narrow-to-region (point-min)
+ (or (and drop-last-word-if-trailing-flag
+ last-word-beg)
+ (point)))
+
+ ;; Xerox's mailers SUCK!!!!!!
+ ;; We simply refuse to believe that any last name is PARC or ADOC.
+ ;; If it looks like that is the last name, that there is no meaningful
+ ;; here at all. Actually I guess it would be best to map patterns
+ ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
+ ;; actually know that that is what's going on.
+ (unless suffix-flag
+ (goto-char (point-min))
+ (let ((case-fold-search t))
+ (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'")
+ (erase-buffer))))
- (t
- (throw 'stop t)))))
-
- (narrow-to-region (point-min) (point))
-
- ;; Delete trailing word followed immediately by .
- (cond ((not suffix-flag)
- (goto-char (point-min))
- (if (re-search-forward "\\b[A-Za-z][A-Za-z]+\\. *\\'" nil t)
- (narrow-to-region (point-min) (match-beginning 0)))))
-
;; If last name first put it at end (but before suffix)
- (cond (last-name-comma-flag
- (goto-char (point-min))
- (search-forward ",")
- (setq end (1- (point)))
- (goto-char (or suffix-flag (point-max)))
- (or (eq mail-space-char (preceding-char))
- (insert mail-space-char))
- (insert-buffer-substring (current-buffer) (point-min) end)
- (narrow-to-region (1+ end) (point-max))))
-
- (goto-char (point-max))
- (skip-chars-backward mail-non-end-name-chars)
- (if (eq ?. (following-char))
- (forward-char 1))
- (narrow-to-region (point)
- (progn
- (goto-char (point-min))
- (skip-chars-forward mail-non-begin-name-chars)
- (point)))
-
+ (when last-name-comma-flag
+ (goto-char (point-min))
+ (search-forward ",")
+ (setq name-end (1- (point)))
+ (goto-char (or suffix-flag (point-max)))
+ (or (eq ?\ (preceding-char))
+ (insert ?\ ))
+ (insert-buffer-substring (current-buffer) (point-min) name-end)
+ (goto-char name-end)
+ (skip-chars-forward "\t ,")
+ (narrow-to-region (point) (point-max)))
+
+ ;; Delete leading and trailing junk characters.
+ ;; *** This is probably completely unneeded now.
+ ;;(goto-char (point-max))
+ ;;(skip-chars-backward mail-extr-non-end-name-chars)
+ ;;(if (eq ?. (following-char))
+ ;; (forward-char 1))
+ ;;(narrow-to-region (point)
+ ;; (progn
+ ;; (goto-char (point-min))
+ ;; (skip-chars-forward mail-extr-non-begin-name-chars)
+ ;; (point)))
+
;; Compress whitespace
(goto-char (point-min))
(while (re-search-forward "[ \t\n]+" nil t)
- (replace-match " "))
-
- (buffer-substring (point-min) (point-max))
-
+ (replace-match (if (eobp) "" " ") t))
)))
-;; The country names are just in there for show right now, and because
-;; Jamie thought it would be neat. They aren't used yet.
+\f
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Table of top-level domain names.
+;;
+;; This is used during address canonicalization; be careful of format changes.
;; Keep in mind that the country abbreviations follow ISO-3166. There is
;; a U.S. FIPS that specifies a different set of two-letter country
;; abbreviations.
-
-;; TODO: put this in its own obarray, instead of cluttering up the main
-;; symbol table with junk.
-
-(mapcar
- (function
- (lambda (x)
- (if (symbolp x)
- (put x 'domain-name t)
- (put (car x) 'domain-name (nth 1 x)))))
- '((ag "Antigua")
- (ar "Argentina") ; Argentine Republic
- arpa ; Advanced Projects Research Agency
- (at "Austria") ; The Republic of _
- (au "Australia")
- (bb "Barbados")
- (be "Belgium") ; The Kingdom of _
- (bg "Bulgaria")
- bitnet ; Because It's Time NET
- (bo "Bolivia") ; Republic of _
- (br "Brazil") ; The Federative Republic of _
- (bs "Bahamas")
- (bz "Belize")
- (ca "Canada")
- (ch "Switzerland") ; The Swiss Confederation
- (cl "Chile") ; The Republic of _
- (cn "China") ; The People's Republic of _
- (co "Columbia")
- com ; Commercial
- (cr "Costa Rica") ; The Republic of _
- (cs "Czechoslovakia")
- (de "Germany")
- (dk "Denmark")
- (dm "Dominica")
- (do "Dominican Republic") ; The _
- (ec "Ecuador") ; The Republic of _
- edu ; Educational
- (eg "Egypt") ; The Arab Republic of _
- (es "Spain") ; The Kingdom of _
- (fi "Finland") ; The Republic of _
- (fj "Fiji")
- (fr "France")
- gov ; Government (U.S.A.)
- (gr "Greece") ; The Hellenic Republic
- (hk "Hong Kong")
- (hu "Hungary") ; The Hungarian People's Republic (???)
- (ie "Ireland")
- (il "Israel") ; The State of _
- (in "India") ; The Republic of _
- int ; something British, don't know what
- (is "Iceland") ; The Republic of _
- (it "Italy") ; The Italian Republic
- (jm "Jamaica")
- (jp "Japan")
- (kn "St. Kitts and Nevis")
- (kr "South Korea")
- (lc "St. Lucia")
- (lk "Sri Lanka") ; The Democratic Socialist Republic of _
- mil ; Military (U.S.A.)
- (mx "Mexico") ; The United Mexican States
- (my "Malaysia") ; changed to Myanmar????
- (na "Namibia")
- nato ; North Atlantic Treaty Organization
- net ; Network
- (ni "Nicaragua") ; The Republic of _
- (nl "Netherlands") ; The Kingdom of the _
- (no "Norway") ; The Kingdom of _
- (nz "New Zealand")
- org ; Organization
- (pe "Peru")
- (pg "Papua New Guinea")
- (ph "Philippines") ; The Republic of the _
- (pl "Poland")
- (pr "Puerto Rico")
- (pt "Portugal") ; The Portugese Republic
- (py "Paraguay")
- (se "Sweden") ; The Kingdom of _
- (sg "Singapore") ; The Republic of _
- (sr "Suriname")
- (su "Soviet Union")
- (th "Thailand") ; The Kingdom of _
- (tn "Tunisia")
- (tr "Turkey") ; The Republic of _
- (tt "Trinidad and Tobago")
- (tw "Taiwan")
- (uk "United Kingdom") ; The _ of Great Britain
- unter-dom ; something German
- (us "U.S.A.") ; The United States of America
- uucp ; Unix to Unix CoPy
- (uy "Uruguay") ; The Eastern Republic of _
- (vc "St. Vincent and the Grenadines")
- (ve "Venezuela") ; The Republic of _
- (yu "Yugoslavia") ; The Socialist Federal Republic of _
- ;; Also said to be Zambia ...
- (za "South Africa") ; The Republic of _ (why not Zaire???)
- (zw "Zimbabwe") ; Republic of _
- ))
-;; fipnet
+;;
+;; Updated by the RIPE Network Coordination Centre.
+;;
+;; Source: ISO 3166 Maintenance Agency
+;; http://www.iso.org/iso/en/prods-services/iso3166ma/02iso-3166-code-lists/list-en1-semic.txt
+;; http://www.iana.org/domain-names.htm
+;; http://www.iana.org/cctld/cctld-whois.htm
+;; Latest change: Mon Jul 8 14:21:59 CEST 2002
+
+(defconst mail-extr-all-top-level-domains
+ (let ((ob (make-vector 739 0)))
+ (mapc
+ (lambda (x)
+ (put (intern (downcase (car x)) ob)
+ 'domain-name
+ (if (nth 2 x)
+ (format (nth 2 x) (nth 1 x))
+ (nth 1 x))))
+ '(
+ ;; ISO 3166 codes:
+ ("ad" "Andorra")
+ ("ae" "United Arab Emirates")
+ ("af" "Afghanistan")
+ ("ag" "Antigua and Barbuda")
+ ("ai" "Anguilla")
+ ("al" "Albania")
+ ("am" "Armenia")
+ ("an" "Netherlands Antilles")
+ ("ao" "Angola")
+ ("aq" "Antarctica") ; continent
+ ("ar" "Argentina" "Argentine Republic")
+ ("as" "American Samoa")
+ ("at" "Austria" "The Republic of %s")
+ ("au" "Australia")
+ ("aw" "Aruba")
+ ("az" "Azerbaijan")
+ ("ba" "Bosnia-Herzegovina")
+ ("bb" "Barbados")
+ ("bd" "Bangladesh")
+ ("be" "Belgium" "The Kingdom of %s")
+ ("bf" "Burkina Faso")
+ ("bg" "Bulgaria")
+ ("bh" "Bahrain")
+ ("bi" "Burundi")
+ ("bj" "Benin")
+ ("bm" "Bermuda")
+ ("bn" "Brunei Darussalam")
+ ("bo" "Bolivia" "Republic of %s")
+ ("br" "Brazil" "The Federative Republic of %s")
+ ("bs" "Bahamas")
+ ("bt" "Bhutan")
+ ("bv" "Bouvet Island")
+ ("bw" "Botswana")
+ ("by" "Belarus")
+ ("bz" "Belize")
+ ("ca" "Canada")
+ ("cc" "Cocos (Keeling) Islands")
+ ("cd" "Congo" "The Democratic Republic of the %s")
+ ("cf" "Central African Republic")
+ ("cg" "Congo")
+ ("ch" "Switzerland" "The Swiss Confederation")
+ ("ci" "Ivory Coast") ; Cote D'ivoire
+ ("ck" "Cook Islands")
+ ("cl" "Chile" "The Republic of %s")
+ ("cm" "Cameroon") ; In .fr domain
+ ("cn" "China" "The People's Republic of %s")
+ ("co" "Colombia")
+ ("cr" "Costa Rica" "The Republic of %s")
+ ("cu" "Cuba")
+ ("cv" "Cape Verde")
+ ("cx" "Christmas Island")
+ ("cy" "Cyprus")
+ ("cz" "Czech Republic")
+ ("de" "Germany")
+ ("dj" "Djibouti")
+ ("dk" "Denmark")
+ ("dm" "Dominica")
+ ("do" "Dominican Republic" "The %s")
+ ("dz" "Algeria")
+ ("ec" "Ecuador" "The Republic of %s")
+ ("ee" "Estonia")
+ ("eg" "Egypt" "The Arab Republic of %s")
+ ("eh" "Western Sahara")
+ ("er" "Eritrea")
+ ("es" "Spain" "The Kingdom of %s")
+ ("et" "Ethiopia")
+ ("fi" "Finland" "The Republic of %s")
+ ("fj" "Fiji")
+ ("fk" "Falkland Islands (Malvinas)")
+ ("fm" "Micronesia" "Federated States of %s")
+ ("fo" "Faroe Islands")
+ ("fr" "France")
+ ("ga" "Gabon")
+ ("gb" "United Kingdom")
+ ("gd" "Grenada")
+ ("ge" "Georgia")
+ ("gf" "French Guiana")
+ ("gh" "Ghana")
+ ("gi" "Gibraltar")
+ ("gl" "Greenland")
+ ("gm" "Gambia")
+ ("gn" "Guinea")
+ ("gp" "Guadeloupe (Fr.)")
+ ("gq" "Equatorial Guinea")
+ ("gr" "Greece" "The Hellenic Republic (%s)")
+ ("gs" "South Georgia and The South Sandwich Islands")
+ ("gt" "Guatemala")
+ ("gu" "Guam (U.S.)")
+ ("gw" "Guinea-Bissau")
+ ("gy" "Guyana")
+ ("hk" "Hong Kong")
+ ("hm" "Heard Island and Mcdonald Islands")
+ ("hn" "Honduras")
+ ("hr" "Croatia" "Croatia (Hrvatska)")
+ ("ht" "Haiti")
+ ("hu" "Hungary" "The Hungarian Republic")
+ ("id" "Indonesia")
+ ("ie" "Ireland")
+ ("il" "Israel" "The State of %s")
+ ("im" "Isle of Man" "The %s") ; NOT in ISO 3166-1 of 2001-02-26
+ ("in" "India" "The Republic of %s")
+ ("io" "British Indian Ocean Territory")
+ ("iq" "Iraq")
+ ("ir" "Iran" "Islamic Republic of %s")
+ ("is" "Iceland" "The Republic of %s")
+ ("it" "Italy" "The Italian Republic")
+ ("jm" "Jamaica")
+ ("jo" "Jordan")
+ ("jp" "Japan")
+ ("ke" "Kenya")
+ ("kg" "Kyrgyzstan")
+ ("kh" "Cambodia")
+ ("ki" "Kiribati")
+ ("km" "Comoros")
+ ("kn" "Saint Kitts and Nevis")
+ ("kp" "Korea (North)" "Democratic People's Republic of Korea")
+ ("kr" "Korea (South)" "Republic of Korea")
+ ("kw" "Kuwait")
+ ("ky" "Cayman Islands")
+ ("kz" "Kazakhstan")
+ ("la" "Lao People's Democratic Republic")
+ ("lb" "Lebanon")
+ ("lc" "Saint Lucia")
+ ("li" "Liechtenstein")
+ ("lk" "Sri Lanka" "The Democratic Socialist Republic of %s")
+ ("lr" "Liberia")
+ ("ls" "Lesotho")
+ ("lt" "Lithuania")
+ ("lu" "Luxembourg")
+ ("lv" "Latvia")
+ ("ly" "Libyan Arab Jamahiriya")
+ ("ma" "Morocco")
+ ("mc" "Monaco")
+ ("md" "Moldova" "The Republic of %s")
+ ("mg" "Madagascar")
+ ("mh" "Marshall Islands")
+ ("mk" "Macedonia" "The Former Yugoslav Republic of %s")
+ ("ml" "Mali")
+ ("mm" "Myanmar")
+ ("mn" "Mongolia")
+ ("mo" "Macao")
+ ("mp" "Northern Mariana Islands")
+ ("mq" "Martinique")
+ ("mr" "Mauritania")
+ ("ms" "Montserrat")
+ ("mt" "Malta")
+ ("mu" "Mauritius")
+ ("mv" "Maldives")
+ ("mw" "Malawi")
+ ("mx" "Mexico" "The United Mexican States")
+ ("my" "Malaysia")
+ ("mz" "Mozambique")
+ ("na" "Namibia")
+ ("nc" "New Caledonia (Fr.)")
+ ("ne" "Niger") ; In .fr domain
+ ("nf" "Norfolk Island")
+ ("ng" "Nigeria")
+ ("ni" "Nicaragua" "The Republic of %s")
+ ("nl" "Netherlands" "The Kingdom of the %s")
+ ("no" "Norway" "The Kingdom of %s")
+ ("np" "Nepal") ; Via .in domain
+ ("nr" "Nauru")
+ ("nu" "Niue")
+ ("nz" "New Zealand")
+ ("om" "Oman")
+ ("pa" "Panama")
+ ("pe" "Peru")
+ ("pf" "French Polynesia")
+ ("pg" "Papua New Guinea")
+ ("ph" "Philippines" "The Republic of the %s")
+ ("pk" "Pakistan")
+ ("pl" "Poland")
+ ("pm" "Saint Pierre and Miquelon")
+ ("pn" "Pitcairn")
+ ("pr" "Puerto Rico (U.S.)")
+ ("ps" "Palestinian Territory, Occupied")
+ ("pt" "Portugal" "The Portuguese Republic")
+ ("pw" "Palau")
+ ("py" "Paraguay")
+ ("qa" "Qatar")
+ ("re" "Reunion (Fr.)") ; In .fr domain
+ ("ro" "Romania")
+ ("ru" "Russia" "Russian Federation")
+ ("rw" "Rwanda")
+ ("sa" "Saudi Arabia")
+ ("sb" "Solomon Islands")
+ ("sc" "Seychelles")
+ ("sd" "Sudan")
+ ("se" "Sweden" "The Kingdom of %s")
+ ("sg" "Singapore" "The Republic of %s")
+ ("sh" "Saint Helena")
+ ("si" "Slovenia")
+ ("sj" "Svalbard and Jan Mayen") ; In .no domain
+ ("sk" "Slovakia" "The Slovak Republic")
+ ("sl" "Sierra Leone")
+ ("sm" "San Marino")
+ ("sn" "Senegal")
+ ("so" "Somalia")
+ ("sr" "Suriname")
+ ("st" "Sao Tome and Principe")
+ ("su" "U.S.S.R." "The Union of Soviet Socialist Republics")
+ ("sv" "El Salvador")
+ ("sy" "Syrian Arab Republic")
+ ("sz" "Swaziland")
+ ("tc" "Turks and Caicos Islands")
+ ("td" "Chad")
+ ("tf" "French Southern Territories")
+ ("tg" "Togo")
+ ("th" "Thailand" "The Kingdom of %s")
+ ("tj" "Tajikistan")
+ ("tk" "Tokelau")
+ ("tl" "East Timor")
+ ("tm" "Turkmenistan")
+ ("tn" "Tunisia")
+ ("to" "Tonga")
+ ("tp" "East Timor")
+ ("tr" "Turkey" "The Republic of %s")
+ ("tt" "Trinidad and Tobago")
+ ("tv" "Tuvalu")
+ ("tw" "Taiwan" "%s, Province of China")
+ ("tz" "Tanzania" "United Republic of %s")
+ ("ua" "Ukraine")
+ ("ug" "Uganda")
+ ("uk" "United Kingdom" "The %s of Great Britain and Northern Ireland")
+ ("um" "United States Minor Outlying Islands")
+ ("us" "United States" "The %s of America")
+ ("uy" "Uruguay" "The Eastern Republic of %s")
+ ("uz" "Uzbekistan")
+ ("va" "Holy See (Vatican City State)")
+ ("vc" "Saint Vincent and the Grenadines")
+ ("ve" "Venezuela" "The Republic of %s")
+ ("vg" "Virgin Islands, British")
+ ("vi" "Virgin Islands, U.S.")
+ ("vn" "Vietnam")
+ ("vu" "Vanuatu")
+ ("wf" "Wallis and Futuna")
+ ("ws" "Samoa")
+ ("ye" "Yemen")
+ ("yt" "Mayotte")
+ ("yu" "Yugoslavia" "Yugoslavia, AKA Serbia-Montenegro")
+ ("za" "South Africa" "The Republic of %s")
+ ("zm" "Zambia")
+ ("zw" "Zimbabwe" "Republic of %s")
+ ;; Generic Domains:
+ ("aero" t "Air Transport Industry")
+ ("biz" t "Businesses")
+ ("com" t "Commercial")
+ ("coop" t "Cooperative Associations")
+ ("info" t "Info")
+ ("museum" t "Museums")
+ ("name" t "Individuals")
+ ("net" t "Network")
+ ("org" t "Non-profit Organization")
+ ;;("pro" t "Credentialed professionals")
+ ;;("bitnet" t "Because It's Time NET")
+ ("gov" t "United States Government")
+ ("edu" t "Educational")
+ ("mil" t "United States Military")
+ ("int" t "International Treaties")
+ ;;("nato" t "North Atlantic Treaty Organization")
+ ("uucp" t "Unix to Unix CoPy")
+ ;; Infrastructure Domains:
+ ("arpa" t "Advanced Research Projects Agency (U.S. DoD)")
+ ))
+ ob))
+
+;;;###autoload
+(defun what-domain (domain)
+ "Convert mail domain DOMAIN to the country it corresponds to."
+ (interactive
+ (let ((completion-ignore-case t))
+ (list (completing-read "Domain: "
+ mail-extr-all-top-level-domains nil t))))
+ (or (setq domain (intern-soft (downcase domain)
+ mail-extr-all-top-level-domains))
+ (error "No such domain"))
+ (message "%s: %s" (upcase (symbol-name domain)) (get domain 'domain-name)))
\f
-;; Code for testing.
-
-(defun time-extract ()
- (let (times list)
- (setq times (cons (current-time-string) times)
- list problem-address-alist)
- (while list
- (mail-extract-address-components (car (car list)))
- (setq list (cdr list)))
- (setq times (cons (current-time-string) times))
- (nreverse times)))
-
-(defun test-extract (&optional starting-point)
- (interactive)
- (set-buffer (get-buffer-create "*Testing*"))
- (erase-buffer)
- (sit-for 0)
- (mapcar 'test-extract-internal
- (if starting-point
- (memq starting-point problem-address-alist)
- problem-address-alist)))
-
-(defvar failed-item)
-(defun test-extract-internal (item)
- (setq failed-item item)
- (let* ((address (car item))
- (correct-name (nth 1 item))
- (correct-canon (nth 2 item))
- (result (mail-extract-address-components address))
- (name (car result))
- (canon (nth 1 result))
- (name-correct (or (null correct-name)
- (string-equal (downcase correct-name)
- (downcase name))))
- (canon-correct (or (null correct-canon)
- (string-equal correct-canon canon))))
- (cond ((not (and name-correct canon-correct))
- (pop-to-buffer "*Testing*")
- (select-window (get-buffer-window (current-buffer)))
- (goto-char (point-max))
- (insert "Address: " address "\n")
- (if (not name-correct)
- (insert " Correct Name: [" correct-name
- "]\; Result: [" name "]\n"))
- (if (not canon-correct)
- (insert " Correct Canon: [" correct-canon
- "]\; Result: [" canon "]\n"))
- (insert "\n")
- (sit-for 0))))
- (setq failed-item nil))
-
-(defun test-continue-extract ()
- (interactive)
- (test-extract failed-item))
+;(let ((all nil))
+; (mapatoms #'(lambda (x)
+; (if (and (boundp x)
+; (string-match "^mail-extr-" (symbol-name x)))
+; (setq all (cons x all)))))
+; (setq all (sort all #'string-lessp))
+; (cons 'setq
+; (apply 'nconc (mapcar #'(lambda (x)
+; (list x (symbol-value x)))
+; all))))
\f
-;; Assorted junk.
-
-;; warsaw@nlm.nih.gov (A Bad Dude -- Barry Warsaw)
-
-;;'(from
-;; reply-to
-;; return-path
-;; x-uucp-from
-;; sender
-;; resent-from
-;; resent-sender
-;; resent-reply-to)
+(provide 'mail-extr)
;;; mail-extr.el ends here