-;;; mail-extr.el --- extract full name and address from RFC 822 mail header
+;;; mail-extr.el --- extract full name and address from RFC 822 mail header -*- coding: utf-8 -*-
-;; Copyright (C) 1991, 1992, 1993, 1994, 1997, 2001
-;; Free Software Foundation, Inc.
+;; Copyright (C) 1991, 1992, 1993, 1994, 1997, 2001, 2002, 2003, 2004,
+;; 2005 Free Software Foundation, Inc.
;; Author: Joe Wells <jbw@cs.bu.edu>
;; Maintainer: FSF
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Commentary:
;; The entry point of this code is
;;
;; mail-extract-address-components: (address &optional all)
-;;
+;;
;; Given an RFC-822 ADDRESS, extract full name and canonical address.
;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
;; If no name can be extracted, FULL-NAME will be nil.
-;; ADDRESS may be a string or a buffer. If it is a buffer, the visible
+;; ADDRESS may be a string or a buffer. If it is a buffer, the visible
;; (narrowed) portion of the buffer will be interpreted as the address.
;; (This feature exists so that the clever caller might be able to avoid
;; consing a string.)
;; make sure you're not breaking functionality. The test cases aren't included
;; because they are over 100K.
;;
-;; If you find an address that mail-extr fails on, please send it to the
+;; If you find an address that mail-extr fails on, please send it to the
;; maintainer along with what you think the correct results should be. We do
;; not consider it a bug if mail-extr mangles a comment that does not
-;; correspond to a real human full name, although we would prefer that
+;; correspond to a real human full name, although we would prefer that
;; mail-extr would return the comment as-is.
;;
;; Features:
;; * insert documentation strings!
;; * handle X.400-gatewayed addresses according to RFC 1148.
-;;; Change Log:
-;;
+;;; Change Log:
+;;
;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@lucid.com)
;;
;; * merged with jbw's latest version
;; * some more cleanup, doc, added provide
;;
;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu)
-;;
+;;
;; * Made mail-full-name-prefixes a user-customizable variable.
-;; Allow passing the address as a buffer as well as as a string.
+;; Allow passing the address as a buffer as well as a string.
;; Allow [ and ] as name characters (Finnish character set).
-;;
+;;
;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Handle "null" addresses. Handle = used for spacing in mailbox
;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are
;; missing their brackets. Handle uppercase "JR". Extract full
;; names from X.400 addresses encoded in RFC-822. Fix bug in
;; handling of multiple addresses where first has trailing comment.
;; Handle more kinds of telephone extension lead-ins.
-;;
+;;
;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Handle HZ encoding for embedding GB encoded chinese characters.
-;;
+;;
;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Fixed too broad matching of ham radio call signs. Fixed bug in
;; handling an unmatched ' in a name string. Enhanced recognition
;; of when . in the mailbox name terminates the name portion.
;; introduced in switching last name order. Fixed bug in handling
;; address with ! and % but no @. Narrowed the cases in which
;; certain trailing words are discarded.
-;;
+;;
;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Fixed bugs in handling GROUP addresses. Certain words in the
;; middle of a name no longer terminate it. Handle LISTSERV list
;; names. Ignore comment field containing mailbox name.
-;;
+;;
;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Moved variant-method code back into main function. Handle
;; underscores as spaces in comments. Handle leading nickname. Add
;; flag to ignore single-word names. Other changes.
-;;
+;;
;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Added in changes by Rod Whitby and Jamie Zawinski. This
;; includes the flag mail-extr-guess-middle-initial and the fix for
;; handling multiple addresses correctly. (Whitby just changed
;; a > to a <.)
-;;
+;;
;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Cleaned up some more. Release version 1.0 to world.
-;;
+;;
;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Cleaned up full name extraction extensively.
-;;
+;;
;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu)
-;;
+;;
;; * Total rewrite. Integrated mail-canonicalize-address into
;; mail-extract-address-components. Now handles GROUP addresses more
;; or less correctly. Better handling of lots of different cases.
-;;
+;;
;; Fri Jun 14 19:39:50 1991
;; * Created.
:type 'boolean
:group 'mail-extr)
-(defcustom mail-extr-ignore-single-names t
+(defcustom mail-extr-ignore-single-names nil
"*Whether to ignore a name that is just a single word.
If true, then when we see an address like \"Idiot <dumb@stupid.com>\"
+we will act as though we couldn't find a full name in the address."
+ :type 'boolean
+ :version "22.1"
+ :group 'mail-extr)
+
+(defcustom mail-extr-ignore-realname-equals-mailbox-name t
+"*Whether to ignore a name that is equal to the mailbox name.
+If true, then when the address is like \"Single <single@address.com>\"
we will act as though we couldn't find a full name in the address."
:type 'boolean
:group 'mail-extr)
;; Constant definitions.
;;
-;; Codes in
-;; Names in ISO 8859-1 Name
-;; ISO 10XXX ISO 8859-2 in
-;; ISO 6937 ISO 10646 RFC Swedish
-;; etc. Hex Oct 1345 TeX Split ASCII Description
-;; --------- ---------- ---- --- ----- ----- -------------------------------
-;; %a E4 344 a: \"a ae { latin small a + diaeresis d
-;; %o F6 366 o: \"o oe | latin small o + diaeresis v
-;; @a E5 345 aa \oa aa } latin small a + ring above e
-;; %u FC 374 u: \"u ue ~ latin small u + diaeresis |
-;; /e E9 351 e' \'e ` latin small e + acute i
-;; %A C4 304 A: \"A AE [ latin capital a + diaeresis D
-;; %O D6 326 O: \"O OE \ latin capital o + diaeresis V
-;; @A C5 305 AA \oA AA ] latin capital a + ring above E
-;; %U DC 334 U: \"U UE ^ latin capital u + diaeresis \
-;; /E C9 311 E' \'E @ latin capital e + acute I
-
-;; NOTE: @a and @A are not in ISO 8859-2 (the codes mentioned above invoke
-;; /l and /L). Some of this data was retrieved from
-;; listserv@jhuvm.hcf.jhu.edu.
-
;; Any character that can occur in a name, not counting characters that
;; separate parts of a multipart name (hyphen and period).
;; Yes, there are weird people with digits in their names.
(defconst mail-extr-leading-garbage "\\W+")
-;; (defconst mail-extr-non-name-chars
+;; (defconst mail-extr-non-name-chars
;; (purecopy (concat "^" mail-extr-all-letters ".")))
;; (defconst mail-extr-non-begin-name-chars
;; (purecopy (concat "^" mail-extr-first-letters)))
;; (defconst mail-extr-non-end-name-chars
;; (purecopy (concat "^" mail-extr-last-letters)))
-;; Matches an initial not followed by both a period and a space.
+;; Matches an initial not followed by both a period and a space.
;; (defconst mail-extr-bad-initials-pattern
-;; (purecopy
+;; (purecopy
;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters)))
;; Must not match a trailing uppercase last name or trailing initial
(defconst mail-extr-weird-acronym-pattern
(purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)"))
-
+
;; Matches a mixed-case or lowercase name (not an initial).
;; #### Match Latin1 lower case letters here too?
;; (defconst mail-extr-mixed-case-name-pattern
;; Matches a trailing alternative address.
;; #### Match Latin1 letters here too?
-;; #### Match _ before @ here too?
+;; #### Match _ before @ here too?
(defconst mail-extr-alternative-address-pattern
(purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]"))
;; Matches a single word name.
;; (defconst mail-extr-one-name-pattern
;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'")))
-
+
;; Matches normal two names with missing middle initial
;; The first name is not allowed to have a hyphen because this can cause
;; false matches where the "middle initial" is actually the first letter
;; encountered. The character '~' is an escape character. By convention, it
;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the
;; following special meaning.
-;;
+;;
;; o The escape sequence '~~' is interpreted as a '~'.
;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
;; o The escape sequence '~\n' is a line-continuation marker to be consumed
;; with no output produced.
-;;
+;;
;; In GB mode, characters are interpreted two bytes at a time as (pure) GB
;; codes until the escape-from-GB code '~}' is read. This code switches the
;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}'
(?\040 " ") ;SPC
(?! ?~ "w") ;printable characters
(?\177 "w") ;DEL
- (?\200 ?\377 "w") ;high-bit-on characters
- (?\240 " ") ;nobreakspace
(?\t " ")
(?\r " ")
(?\n " ")
;; Utility functions and macros.
;;
+;; Fixme: There are Latin-1 nbsp below. If such characters should be
+;; included, this is the wrong thing to do -- it should use syntax (or
+;; regexp char classes).
+
(defsubst mail-extr-skip-whitespace-forward ()
;; v19 fn skip-syntax-forward is more tasteful, but not byte-coded.
- (skip-chars-forward " \t\n\r\240"))
+ (skip-chars-forward " \t\n\r "))
(defsubst mail-extr-skip-whitespace-backward ()
;; v19 fn skip-syntax-backward is more tasteful, but not byte-coded.
- (skip-chars-backward " \t\n\r\240"))
+ (skip-chars-backward " \t\n\r "))
(defsubst mail-extr-undo-backslash-quoting (beg end)
(defvar disable-initial-guessing-flag) ; dynamic assignment
(defvar cbeg) ; dynamic assignment
(defvar cend) ; dynamic assignment
+(defvar mail-extr-all-top-level-domains) ; Defined below.
;;;###autoload
(defun mail-extract-address-components (address &optional all)
"Given an RFC-822 address ADDRESS, extract full name and canonical address.
Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
-If no name can be extracted, FULL-NAME will be nil.
+If no name can be extracted, FULL-NAME will be nil. Also see
+`mail-extr-ignore-single-names' and `mail-extr-ignore-realname-equals-mailbox-name'.
If the optional argument ALL is non-nil, then ADDRESS can contain zero
or more recipients, separated by commas, and we return a list of
one recipients, all but the first is ignored.
ADDRESS may be a string or a buffer. If it is a buffer, the visible
- (narrowed) portion of the buffer will be interpreted as the address.
- (This feature exists so that the clever caller might be able to avoid
- consing a string.)"
+\(narrowed) portion of the buffer will be interpreted as the address.
+\(This feature exists so that the clever caller might be able to avoid
+consing a string.)"
(let ((canonicalization-buffer (get-buffer-create " *canonical address*"))
(extraction-buffer (get-buffer-create " *extract address components*"))
value-list)
(widen)
(erase-buffer)
(setq case-fold-search nil)
-
+
;; Insert extra space at beginning to allow later replacement with <
;; without having to move markers.
(insert ?\ )
(buffer-disable-undo canonicalization-buffer)
(setq case-fold-search nil))
-
+
;; Unfold multiple lines.
(goto-char (point-min))
(while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
(replace-match "\\1 " t))
-
+
;; Loop over addresses until we have as many as we want.
(while (and (or all (null value-list))
(progn (goto-char (point-min))
;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
;; others.
- ;; Hell, go ahead an nuke all of the commas.
+ ;; Hell, go ahead and nuke all of the commas.
;; **** This will cause problems when we start handling commas in
;; the PHRASE part .... no it won't ... yes it will ... ?????
(mail-extr-nuke-outside-range comma-pos 1 1)
(setq names-match-flag nil))
(setq i (1+ i)))
(delete-region (+ (point-min) buffer-length) (point-max))
- (if names-match-flag
- (narrow-to-region (point) (point)))))
+ (and names-match-flag
+ mail-extr-ignore-realname-equals-mailbox-name
+ (narrow-to-region (point) (point)))))
;; Nuke name if it's just one word.
(goto-char (point-min))
(if all (nreverse value-list) (car value-list))
))
+(defcustom mail-extr-disable-voodoo "\\cj"
+ "*If it is a regexp, names matching it will never be modified.
+If it is neither nil nor a string, modifying of names will never take
+place. It affects how `mail-extract-address-components' works."
+ :type '(choice (regexp :size 0)
+ (const :tag "Always enabled" nil)
+ (const :tag "Always disabled" t))
+ :group 'mail-extr)
+
(defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
- (let ((word-count 0)
- (case-fold-search nil)
- mixed-case-flag lower-case-flag ;;upper-case-flag
- suffix-flag last-name-comma-flag
- ;;cbeg cend
- initial
- begin-again-flag
- drop-this-word-if-trailing-flag
- drop-last-word-if-trailing-flag
- word-found-flag
- this-word-beg last-word-beg
- name-beg name-end
- name-done-flag
- )
- (save-excursion
- (set-syntax-table mail-extr-address-text-syntax-table)
-
- ;; Get rid of comments.
- (goto-char (point-min))
- (while (not (eobp))
- ;; Initialize for this iteration of the loop.
- (skip-chars-forward "^({[\"'`")
- (let ((cbeg (point)))
- (set-syntax-table mail-extr-address-text-comment-syntax-table)
- (if (memq (following-char) '(?\' ?\`))
- (search-forward "'" nil 'move
- (if (eq ?\' (following-char)) 2 1))
- (or (mail-extr-safe-move-sexp 1)
- (goto-char (point-max))))
- (set-syntax-table mail-extr-address-text-syntax-table)
- (when (eq (char-after cbeg) ?\()
- ;; Delete the comment itself.
- (delete-region cbeg (point))
- ;; Canonicalize whitespace where the comment was.
- (skip-chars-backward " \t")
- (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)")
- (replace-match "")
- (setq cbeg (point))
- (skip-chars-forward " \t")
- (if (bobp)
- (delete-region (point) cbeg)
- (just-one-space))))))
-
- ;; This was moved above.
- ;; Fix . used as space
- ;; But it belongs here because it occurs not only as
- ;; rypens@reks.uia.ac.be (Piet.Rypens)
- ;; but also as
- ;; "Piet.Rypens" <rypens@reks.uia.ac.be>
- ;;(goto-char (point-min))
- ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t)
- ;; (replace-match "\\1 \\2" t))
-
- (unless (search-forward " " nil t)
+ (unless (and mail-extr-disable-voodoo
+ (or (not (stringp mail-extr-disable-voodoo))
+ (progn
+ (goto-char (point-min))
+ (re-search-forward mail-extr-disable-voodoo nil t))))
+ (let ((word-count 0)
+ (case-fold-search nil)
+ mixed-case-flag lower-case-flag ;;upper-case-flag
+ suffix-flag last-name-comma-flag
+ ;;cbeg cend
+ initial
+ begin-again-flag
+ drop-this-word-if-trailing-flag
+ drop-last-word-if-trailing-flag
+ word-found-flag
+ this-word-beg last-word-beg
+ name-beg name-end
+ name-done-flag
+ )
+ (save-excursion
+ (set-syntax-table mail-extr-address-text-syntax-table)
+
+ ;; Get rid of comments.
(goto-char (point-min))
- (cond ((search-forward "_" nil t)
- ;; Handle the *idiotic* use of underlines as spaces.
- ;; Example: fml@foo.bar.dom (First_M._Last)
- (goto-char (point-min))
- (while (search-forward "_" nil t)
- (replace-match " " t)))
- ((search-forward "." nil t)
- ;; Fix . used as space
- ;; Example: danj1@cb.att.com (daniel.jacobson)
- (goto-char (point-min))
- (while (re-search-forward mail-extr-bad-dot-pattern nil t)
- (replace-match "\\1 \\2" t)))))
-
- ;; Loop over the words (and other junk) in the name.
- (goto-char (point-min))
- (while (not name-done-flag)
-
- (when word-found-flag
- ;; Last time through this loop we skipped over a word.
- (setq last-word-beg this-word-beg)
- (setq drop-last-word-if-trailing-flag
- drop-this-word-if-trailing-flag)
- (setq word-found-flag nil))
-
- (when begin-again-flag
- ;; Last time through the loop we found something that
- ;; indicates we should pretend we are beginning again from
- ;; the start.
- (setq word-count 0)
- (setq last-word-beg nil)
- (setq drop-last-word-if-trailing-flag nil)
- (setq mixed-case-flag nil)
- (setq lower-case-flag nil)
- ;; (setq upper-case-flag nil)
- (setq begin-again-flag nil))
-
- ;; Initialize for this iteration of the loop.
- (mail-extr-skip-whitespace-forward)
- (if (eq word-count 0) (narrow-to-region (point) (point-max)))
- (setq this-word-beg (point))
- (setq drop-this-word-if-trailing-flag nil)
-
- ;; Decide what to do based on what we are looking at.
- (cond
-
- ;; Delete title
- ((and (eq word-count 0)
- (looking-at mail-extr-full-name-prefixes))
- (goto-char (match-end 0))
- (narrow-to-region (point) (point-max)))
-
- ;; Stop after name suffix
- ((and (>= word-count 2)
- (looking-at mail-extr-full-name-suffix-pattern))
- (mail-extr-skip-whitespace-backward)
- (setq suffix-flag (point))
- (if (eq ?, (following-char))
- (forward-char 1)
- (insert ?,))
- ;; Enforce at least one space after comma
- (or (eq ?\ (following-char))
- (insert ?\ ))
+ (while (not (eobp))
+ ;; Initialize for this iteration of the loop.
+ (skip-chars-forward "^({[\"'`")
+ (let ((cbeg (point)))
+ (set-syntax-table mail-extr-address-text-comment-syntax-table)
+ (if (memq (following-char) '(?\' ?\`))
+ (search-forward "'" nil 'move
+ (if (eq ?\' (following-char)) 2 1))
+ (or (mail-extr-safe-move-sexp 1)
+ (goto-char (point-max))))
+ (set-syntax-table mail-extr-address-text-syntax-table)
+ (when (eq (char-after cbeg) ?\()
+ ;; Delete the comment itself.
+ (delete-region cbeg (point))
+ ;; Canonicalize whitespace where the comment was.
+ (skip-chars-backward " \t")
+ (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)")
+ (replace-match "")
+ (setq cbeg (point))
+ (skip-chars-forward " \t")
+ (if (bobp)
+ (delete-region (point) cbeg)
+ (just-one-space))))))
+
+ ;; This was moved above.
+ ;; Fix . used as space
+ ;; But it belongs here because it occurs not only as
+ ;; rypens@reks.uia.ac.be (Piet.Rypens)
+ ;; but also as
+ ;; "Piet.Rypens" <rypens@reks.uia.ac.be>
+ ;;(goto-char (point-min))
+ ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t)
+ ;; (replace-match "\\1 \\2" t))
+
+ (unless (search-forward " " nil t)
+ (goto-char (point-min))
+ (cond ((search-forward "_" nil t)
+ ;; Handle the *idiotic* use of underlines as spaces.
+ ;; Example: fml@foo.bar.dom (First_M._Last)
+ (goto-char (point-min))
+ (while (search-forward "_" nil t)
+ (replace-match " " t)))
+ ((search-forward "." nil t)
+ ;; Fix . used as space
+ ;; Example: danj1@cb.att.com (daniel.jacobson)
+ (goto-char (point-min))
+ (while (re-search-forward mail-extr-bad-dot-pattern nil t)
+ (replace-match "\\1 \\2" t)))))
+
+ ;; Loop over the words (and other junk) in the name.
+ (goto-char (point-min))
+ (while (not name-done-flag)
+
+ (when word-found-flag
+ ;; Last time through this loop we skipped over a word.
+ (setq last-word-beg this-word-beg)
+ (setq drop-last-word-if-trailing-flag
+ drop-this-word-if-trailing-flag)
+ (setq word-found-flag nil))
+
+ (when begin-again-flag
+ ;; Last time through the loop we found something that
+ ;; indicates we should pretend we are beginning again from
+ ;; the start.
+ (setq word-count 0)
+ (setq last-word-beg nil)
+ (setq drop-last-word-if-trailing-flag nil)
+ (setq mixed-case-flag nil)
+ (setq lower-case-flag nil)
+ ;; (setq upper-case-flag nil)
+ (setq begin-again-flag nil))
+
+ ;; Initialize for this iteration of the loop.
(mail-extr-skip-whitespace-forward)
- (cond ((memq (following-char) '(?j ?J ?s ?S))
- (capitalize-word 1)
- (if (eq (following-char) ?.)
- (forward-char 1)
- (insert ?.)))
- (t
- (upcase-word 1)))
- (setq word-found-flag t)
- (setq name-done-flag t))
-
- ;; Handle SCA names
- ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
- (goto-char (match-beginning 1))
- (narrow-to-region (point) (point-max))
- (setq begin-again-flag t))
-
- ;; Check for initial last name followed by comma
- ((and (eq ?, (following-char))
- (eq word-count 1))
- (forward-char 1)
- (setq last-name-comma-flag t)
- (or (eq ?\ (following-char))
- (insert ?\ )))
-
- ;; Stop before trailing comma-separated comment
- ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
- ;; *** This case is redundant???
- ;;((eq ?, (following-char))
- ;; (setq name-done-flag t))
-
- ;; Delete parenthesized/quoted comment/nickname
- ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
- (setq cbeg (point))
- (set-syntax-table mail-extr-address-text-comment-syntax-table)
- (cond ((memq (following-char) '(?\' ?\`))
- (or (search-forward "'" nil t
- (if (eq ?\' (following-char)) 2 1))
- (delete-char 1)))
- (t
- (or (mail-extr-safe-move-sexp 1)
- (goto-char (point-max)))))
- (set-syntax-table mail-extr-address-text-syntax-table)
- (setq cend (point))
+ (if (eq word-count 0) (narrow-to-region (point) (point-max)))
+ (setq this-word-beg (point))
+ (setq drop-this-word-if-trailing-flag nil)
+
+ ;; Decide what to do based on what we are looking at.
(cond
- ;; Handle case of entire name being quoted
+
+ ;; Delete title
((and (eq word-count 0)
- (looking-at " *\\'")
- (>= (- cend cbeg) 2))
- (narrow-to-region (1+ cbeg) (1- cend))
- (goto-char (point-min)))
- (t
- ;; Handle case of quoted initial
- (if (and (or (= 3 (- cend cbeg))
- (and (= 4 (- cend cbeg))
- (eq ?. (char-after (+ 2 cbeg)))))
- (not (looking-at " *\\'")))
- (setq initial (char-after (1+ cbeg)))
- (setq initial nil))
- (delete-region cbeg cend)
- (if initial
- (insert initial ". ")))))
-
- ;; Handle *Stupid* VMS date stamps
- ((looking-at mail-extr-stupid-vms-date-stamp-pattern)
- (replace-match "" t))
-
- ;; Handle Chinese characters.
- ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
- (goto-char (match-end 0))
- (setq word-found-flag t))
-
- ;; Skip initial garbage characters.
- ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
- ((and (eq word-count 0)
- (looking-at mail-extr-leading-garbage))
- (goto-char (match-end 0))
- ;; *** Skip backward over these???
- ;; (skip-chars-backward "& \"")
- (narrow-to-region (point) (point-max)))
-
- ;; Various stopping points
- ((or
-
- ;; Stop before ALL CAPS acronyms, if preceded by mixed-case
- ;; words. Example: XT-DEM.
- (and (>= word-count 2)
- mixed-case-flag
- (looking-at mail-extr-weird-acronym-pattern)
- (not (looking-at mail-extr-roman-numeral-pattern)))
-
- ;; Stop before trailing alternative address
- (looking-at mail-extr-alternative-address-pattern)
-
- ;; Stop before trailing comment not introduced by comma
- ;; THIS CASE MUST BE AFTER AN EARLIER CASE.
- (looking-at mail-extr-trailing-comment-start-pattern)
-
- ;; Stop before telephone numbers
- (and (>= word-count 1)
- (looking-at mail-extr-telephone-extension-pattern)))
- (setq name-done-flag t))
-
- ;; Delete ham radio call signs
- ((looking-at mail-extr-ham-call-sign-pattern)
- (delete-region (match-beginning 0) (match-end 0)))
-
- ;; Fixup initials
- ((looking-at mail-extr-initial-pattern)
- (or (eq (following-char) (upcase (following-char)))
- (setq lower-case-flag t))
- (forward-char 1)
- (if (eq ?. (following-char))
- (forward-char 1)
- (insert ?.))
- (or (eq ?\ (following-char))
- (insert ?\ ))
- (setq word-found-flag t))
-
- ;; Handle BITNET LISTSERV list names.
- ((and (eq word-count 0)
- (looking-at mail-extr-listserv-list-name-pattern))
- (narrow-to-region (match-beginning 1) (match-end 1))
- (setq word-found-flag t)
- (setq name-done-flag t))
-
- ;; Handle & substitution, when & is last and is not first.
- ((and (> word-count 0)
- (eq ?\ (preceding-char))
- (eq (following-char) ?&)
- (eq (1+ (point)) (point-max)))
- (delete-char 1)
- (capitalize-region
- (point)
- (progn
- (insert-buffer-substring canonicalization-buffer
- mbox-beg mbox-end)
- (point)))
- (setq disable-initial-guessing-flag t)
- (setq word-found-flag t))
-
- ;; Handle & between names, as in "Bob & Susie".
- ((and (> word-count 0) (eq (following-char) ?\&))
- (setq name-beg (point))
- (setq name-end (1+ name-beg))
- (setq word-found-flag t)
- (goto-char name-end))
-
- ;; Regular name words
- ((looking-at mail-extr-name-pattern)
- (setq name-beg (point))
- (setq name-end (match-end 0))
-
- ;; Certain words will be dropped if they are at the end.
- (and (>= word-count 2)
- (not lower-case-flag)
- (or
- ;; A trailing 4-or-more letter lowercase words preceded by
- ;; mixed case or uppercase words will be dropped.
- (looking-at "[a-z][a-z][a-z][a-z]+[ \t]*\\'")
- ;; Drop a trailing word which is terminated with a period.
- (eq ?. (char-after (1- name-end))))
- (setq drop-this-word-if-trailing-flag t))
-
- ;; Set the flags that indicate whether we have seen a lowercase
- ;; word, a mixed case word, and an uppercase word.
- (if (re-search-forward "[a-z]" name-end t)
- (if (progn
- (goto-char name-beg)
- (re-search-forward "[A-Z]" name-end t))
- (setq mixed-case-flag t)
+ (looking-at mail-extr-full-name-prefixes))
+ (goto-char (match-end 0))
+ (narrow-to-region (point) (point-max)))
+
+ ;; Stop after name suffix
+ ((and (>= word-count 2)
+ (looking-at mail-extr-full-name-suffix-pattern))
+ (mail-extr-skip-whitespace-backward)
+ (setq suffix-flag (point))
+ (if (eq ?, (following-char))
+ (forward-char 1)
+ (insert ?,))
+ ;; Enforce at least one space after comma
+ (or (eq ?\ (following-char))
+ (insert ?\ ))
+ (mail-extr-skip-whitespace-forward)
+ (cond ((memq (following-char) '(?j ?J ?s ?S))
+ (capitalize-word 1)
+ (if (eq (following-char) ?.)
+ (forward-char 1)
+ (insert ?.)))
+ (t
+ (upcase-word 1)))
+ (setq word-found-flag t)
+ (setq name-done-flag t))
+
+ ;; Handle SCA names
+ ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
+ (goto-char (match-beginning 1))
+ (narrow-to-region (point) (point-max))
+ (setq begin-again-flag t))
+
+ ;; Check for initial last name followed by comma
+ ((and (eq ?, (following-char))
+ (eq word-count 1))
+ (forward-char 1)
+ (setq last-name-comma-flag t)
+ (or (eq ?\ (following-char))
+ (insert ?\ )))
+
+ ;; Stop before trailing comma-separated comment
+ ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
+ ;; *** This case is redundant???
+ ;;((eq ?, (following-char))
+ ;; (setq name-done-flag t))
+
+ ;; Delete parenthesized/quoted comment/nickname
+ ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
+ (setq cbeg (point))
+ (set-syntax-table mail-extr-address-text-comment-syntax-table)
+ (cond ((memq (following-char) '(?\' ?\`))
+ (or (search-forward "'" nil t
+ (if (eq ?\' (following-char)) 2 1))
+ (delete-char 1)))
+ (t
+ (or (mail-extr-safe-move-sexp 1)
+ (goto-char (point-max)))))
+ (set-syntax-table mail-extr-address-text-syntax-table)
+ (setq cend (point))
+ (cond
+ ;; Handle case of entire name being quoted
+ ((and (eq word-count 0)
+ (looking-at " *\\'")
+ (>= (- cend cbeg) 2))
+ (narrow-to-region (1+ cbeg) (1- cend))
+ (goto-char (point-min)))
+ (t
+ ;; Handle case of quoted initial
+ (if (and (or (= 3 (- cend cbeg))
+ (and (= 4 (- cend cbeg))
+ (eq ?. (char-after (+ 2 cbeg)))))
+ (not (looking-at " *\\'")))
+ (setq initial (char-after (1+ cbeg)))
+ (setq initial nil))
+ (delete-region cbeg cend)
+ (if initial
+ (insert initial ". ")))))
+
+ ;; Handle *Stupid* VMS date stamps
+ ((looking-at mail-extr-stupid-vms-date-stamp-pattern)
+ (replace-match "" t))
+
+ ;; Handle Chinese characters.
+ ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
+ (goto-char (match-end 0))
+ (setq word-found-flag t))
+
+ ;; Skip initial garbage characters.
+ ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
+ ((and (eq word-count 0)
+ (looking-at mail-extr-leading-garbage))
+ (goto-char (match-end 0))
+ ;; *** Skip backward over these???
+ ;; (skip-chars-backward "& \"")
+ (narrow-to-region (point) (point-max)))
+
+ ;; Various stopping points
+ ((or
+
+ ;; Stop before ALL CAPS acronyms, if preceded by mixed-case
+ ;; words. Example: XT-DEM.
+ (and (>= word-count 2)
+ mixed-case-flag
+ (looking-at mail-extr-weird-acronym-pattern)
+ (not (looking-at mail-extr-roman-numeral-pattern)))
+
+ ;; Stop before trailing alternative address
+ (looking-at mail-extr-alternative-address-pattern)
+
+ ;; Stop before trailing comment not introduced by comma
+ ;; THIS CASE MUST BE AFTER AN EARLIER CASE.
+ (looking-at mail-extr-trailing-comment-start-pattern)
+
+ ;; Stop before telephone numbers
+ (and (>= word-count 1)
+ (looking-at mail-extr-telephone-extension-pattern)))
+ (setq name-done-flag t))
+
+ ;; Delete ham radio call signs
+ ((looking-at mail-extr-ham-call-sign-pattern)
+ (delete-region (match-beginning 0) (match-end 0)))
+
+ ;; Fixup initials
+ ((looking-at mail-extr-initial-pattern)
+ (or (eq (following-char) (upcase (following-char)))
(setq lower-case-flag t))
-;; (setq upper-case-flag t)
- )
-
- (goto-char name-end)
- (setq word-found-flag t))
+ (forward-char 1)
+ (if (eq ?. (following-char))
+ (forward-char 1)
+ (insert ?.))
+ (or (eq ?\ (following-char))
+ (insert ?\ ))
+ (setq word-found-flag t))
+
+ ;; Handle BITNET LISTSERV list names.
+ ((and (eq word-count 0)
+ (looking-at mail-extr-listserv-list-name-pattern))
+ (narrow-to-region (match-beginning 1) (match-end 1))
+ (setq word-found-flag t)
+ (setq name-done-flag t))
+
+ ;; Handle & substitution, when & is last and is not first.
+ ((and (> word-count 0)
+ (eq ?\ (preceding-char))
+ (eq (following-char) ?&)
+ (eq (1+ (point)) (point-max)))
+ (delete-char 1)
+ (capitalize-region
+ (point)
+ (progn
+ (insert-buffer-substring canonicalization-buffer
+ mbox-beg mbox-end)
+ (point)))
+ (setq disable-initial-guessing-flag t)
+ (setq word-found-flag t))
+
+ ;; Handle & between names, as in "Bob & Susie".
+ ((and (> word-count 0) (eq (following-char) ?\&))
+ (setq name-beg (point))
+ (setq name-end (1+ name-beg))
+ (setq word-found-flag t)
+ (goto-char name-end))
+
+ ;; Regular name words
+ ((looking-at mail-extr-name-pattern)
+ (setq name-beg (point))
+ (setq name-end (match-end 0))
+
+ ;; Certain words will be dropped if they are at the end.
+ (and (>= word-count 2)
+ (not lower-case-flag)
+ (or
+ ;; Trailing 4-or-more letter lowercase words preceded by
+ ;; mixed case or uppercase words will be dropped.
+ (looking-at "[[:lower:]]\\{4,\\}[ \t]*\\'")
+ ;; Drop a trailing word which is terminated with a period.
+ (eq ?. (char-after (1- name-end))))
+ (setq drop-this-word-if-trailing-flag t))
+
+ ;; Set the flags that indicate whether we have seen a lowercase
+ ;; word, a mixed case word, and an uppercase word.
+ (if (re-search-forward "[[:lower:]]" name-end t)
+ (if (progn
+ (goto-char name-beg)
+ (re-search-forward "[[:upper:]]" name-end t))
+ (setq mixed-case-flag t)
+ (setq lower-case-flag t))
+ ;; (setq upper-case-flag t)
+ )
+
+ (goto-char name-end)
+ (setq word-found-flag t))
- ;; Allow a number as a word, if it doesn't mean anything else.
- ((looking-at "[0-9]+\\>")
- (setq name-beg (point))
- (setq name-end (match-end 0))
+ ;; Allow a number as a word, if it doesn't mean anything else.
+ ((looking-at "[0-9]+\\>")
+ (setq name-beg (point))
+ (setq name-end (match-end 0))
+ (goto-char name-end)
+ (setq word-found-flag t))
+
+ (t
+ (setq name-done-flag t)
+ ))
+
+ ;; Count any word that we skipped over.
+ (if word-found-flag
+ (setq word-count (1+ word-count))))
+
+ ;; If the last thing in the name is 2 or more periods, or one or more
+ ;; other sentence terminators (but not a single period) then keep them
+ ;; and the preceding word. This is for the benefit of whole sentences
+ ;; in the name field: it's better behavior than dropping the last word
+ ;; of the sentence...
+ (if (and (not suffix-flag)
+ (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
+ (goto-char (setq suffix-flag (point-max))))
+
+ ;; Drop everything after point and certain trailing words.
+ (narrow-to-region (point-min)
+ (or (and drop-last-word-if-trailing-flag
+ last-word-beg)
+ (point)))
+
+ ;; Xerox's mailers SUCK!!!!!!
+ ;; We simply refuse to believe that any last name is PARC or ADOC.
+ ;; If it looks like that is the last name, that there is no meaningful
+ ;; here at all. Actually I guess it would be best to map patterns
+ ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
+ ;; actually know that that is what's going on.
+ (unless suffix-flag
+ (goto-char (point-min))
+ (let ((case-fold-search t))
+ (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'")
+ (erase-buffer))))
+
+ ;; If last name first put it at end (but before suffix)
+ (when last-name-comma-flag
+ (goto-char (point-min))
+ (search-forward ",")
+ (setq name-end (1- (point)))
+ (goto-char (or suffix-flag (point-max)))
+ (or (eq ?\ (preceding-char))
+ (insert ?\ ))
+ (insert-buffer-substring (current-buffer) (point-min) name-end)
(goto-char name-end)
- (setq word-found-flag t))
-
- (t
- (setq name-done-flag t)
- ))
-
- ;; Count any word that we skipped over.
- (if word-found-flag
- (setq word-count (1+ word-count))))
-
- ;; If the last thing in the name is 2 or more periods, or one or more
- ;; other sentence terminators (but not a single period) then keep them
- ;; and the preceding word. This is for the benefit of whole sentences
- ;; in the name field: it's better behavior than dropping the last word
- ;; of the sentence...
- (if (and (not suffix-flag)
- (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
- (goto-char (setq suffix-flag (point-max))))
-
- ;; Drop everything after point and certain trailing words.
- (narrow-to-region (point-min)
- (or (and drop-last-word-if-trailing-flag
- last-word-beg)
- (point)))
-
- ;; Xerox's mailers SUCK!!!!!!
- ;; We simply refuse to believe that any last name is PARC or ADOC.
- ;; If it looks like that is the last name, that there is no meaningful
- ;; here at all. Actually I guess it would be best to map patterns
- ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
- ;; actually know that that is what's going on.
- (unless suffix-flag
- (goto-char (point-min))
- (let ((case-fold-search t))
- (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'")
- (erase-buffer))))
+ (skip-chars-forward "\t ,")
+ (narrow-to-region (point) (point-max)))
- ;; If last name first put it at end (but before suffix)
- (when last-name-comma-flag
+ ;; Delete leading and trailing junk characters.
+ ;; *** This is probably completely unneeded now.
+ ;;(goto-char (point-max))
+ ;;(skip-chars-backward mail-extr-non-end-name-chars)
+ ;;(if (eq ?. (following-char))
+ ;; (forward-char 1))
+ ;;(narrow-to-region (point)
+ ;; (progn
+ ;; (goto-char (point-min))
+ ;; (skip-chars-forward mail-extr-non-begin-name-chars)
+ ;; (point)))
+
+ ;; Compress whitespace
(goto-char (point-min))
- (search-forward ",")
- (setq name-end (1- (point)))
- (goto-char (or suffix-flag (point-max)))
- (or (eq ?\ (preceding-char))
- (insert ?\ ))
- (insert-buffer-substring (current-buffer) (point-min) name-end)
- (goto-char name-end)
- (skip-chars-forward "\t ,")
- (narrow-to-region (point) (point-max)))
-
- ;; Delete leading and trailing junk characters.
- ;; *** This is probably completely unneeded now.
- ;;(goto-char (point-max))
- ;;(skip-chars-backward mail-extr-non-end-name-chars)
- ;;(if (eq ?. (following-char))
- ;; (forward-char 1))
- ;;(narrow-to-region (point)
- ;; (progn
- ;; (goto-char (point-min))
- ;; (skip-chars-forward mail-extr-non-begin-name-chars)
- ;; (point)))
-
- ;; Compress whitespace
- (goto-char (point-min))
- (while (re-search-forward "[ \t\n]+" nil t)
- (replace-match (if (eobp) "" " ") t))
- )))
+ (while (re-search-forward "[ \t\n]+" nil t)
+ (replace-match (if (eobp) "" " ") t))
+ ))))
\f
;; Keep in mind that the country abbreviations follow ISO-3166. There is
;; a U.S. FIPS that specifies a different set of two-letter country
;; abbreviations.
+;;
+;; Updated by the RIPE Network Coordination Centre.
+;;
+;; Source: ISO 3166 Maintenance Agency
+;; http://www.iso.org/iso/en/prods-services/iso3166ma/02iso-3166-code-lists/list-en1-semic.txt
+;; http://www.iana.org/domain-names.htm
+;; http://www.iana.org/cctld/cctld-whois.htm
+;; Latest change: Mon Jul 8 14:21:59 CEST 2002
(defconst mail-extr-all-top-level-domains
(let ((ob (make-vector 739 0)))
("bz" "Belize")
("ca" "Canada")
("cc" "Cocos (Keeling) Islands")
- ("cd" "The Democratic Republic of The Congo")
+ ("cd" "Congo" "The Democratic Republic of the %s")
("cf" "Central African Republic")
("cg" "Congo")
("ch" "Switzerland" "The Swiss Confederation")
("gp" "Guadeloupe (Fr.)")
("gq" "Equatorial Guinea")
("gr" "Greece" "The Hellenic Republic (%s)")
- ("gs" "South Georgia And The South Sandwich Islands")
+ ("gs" "South Georgia and The South Sandwich Islands")
("gt" "Guatemala")
("gu" "Guam (U.S.)")
("gw" "Guinea-Bissau")
("gy" "Guyana")
("hk" "Hong Kong")
- ("hm" "Heard Island And Mcdonald Islands")
+ ("hm" "Heard Island and Mcdonald Islands")
("hn" "Honduras")
("hr" "Croatia" "Croatia (Hrvatska)")
("ht" "Haiti")
("kr" "Korea (South)" "Republic of Korea")
("kw" "Kuwait")
("ky" "Cayman Islands")
- ("kz" "Kazakstan")
+ ("kz" "Kazakhstan")
("la" "Lao People's Democratic Republic")
("lb" "Lebanon")
("lc" "Saint Lucia")
("ml" "Mali")
("mm" "Myanmar")
("mn" "Mongolia")
- ("mo" "Macau")
+ ("mo" "Macao")
("mp" "Northern Mariana Islands")
("mq" "Martinique")
("mr" "Mauritania")
("mv" "Maldives")
("mw" "Malawi")
("mx" "Mexico" "The United Mexican States")
- ("my" "Malaysia" "%s (changed to Myanmar?)") ;???
+ ("my" "Malaysia")
("mz" "Mozambique")
("na" "Namibia")
("nc" "New Caledonia (Fr.)")
("om" "Oman")
("pa" "Panama")
("pe" "Peru")
- ("pf" "Polynesia (Fr.)")
+ ("pf" "French Polynesia")
("pg" "Papua New Guinea")
("ph" "Philippines" "The Republic of the %s")
("pk" "Pakistan")
("sn" "Senegal")
("so" "Somalia")
("sr" "Suriname")
- ("st" "Sao Tome And Principe")
+ ("st" "Sao Tome and Principe")
("su" "U.S.S.R." "The Union of Soviet Socialist Republics")
("sv" "El Salvador")
("sy" "Syrian Arab Republic")
("sz" "Swaziland")
- ("tc" "Turks And Caicos Islands")
+ ("tc" "Turks and Caicos Islands")
("td" "Chad")
("tf" "French Southern Territories")
("tg" "Togo")
("th" "Thailand" "The Kingdom of %s")
("tj" "Tajikistan")
("tk" "Tokelau")
+ ("tl" "East Timor")
("tm" "Turkmenistan")
("tn" "Tunisia")
("to" "Tonga")
("uy" "Uruguay" "The Eastern Republic of %s")
("uz" "Uzbekistan")
("va" "Holy See (Vatican City State)")
- ("vc" "St. Vincent and the Grenadines")
+ ("vc" "Saint Vincent and the Grenadines")
("ve" "Venezuela" "The Republic of %s")
("vg" "Virgin Islands, British")
("vi" "Virgin Islands, U.S.")
("za" "South Africa" "The Republic of %s")
("zm" "Zambia")
("zw" "Zimbabwe" "Republic of %s")
- ;; Special top-level domains:
- ("arpa" t "Advanced Research Projects Agency (U.S. DoD)")
- ("bitnet" t "Because It's Time NET")
+ ;; Generic Domains:
+ ("aero" t "Air Transport Industry")
+ ("biz" t "Businesses")
("com" t "Commercial")
- ("edu" t "Educational")
- ("gov" t "Government (U.S.)")
- ("int" t "International (NATO)")
- ("mil" t "Military (U.S.)")
- ("nato" t "North Atlantic Treaty Organization")
+ ("coop" t "Cooperative Associations")
+ ("info" t "Info")
+ ("museum" t "Museums")
+ ("name" t "Individuals")
("net" t "Network")
("org" t "Non-profit Organization")
- ;;("unter-dom" t "? (Ger.)")
+ ;;("pro" t "Credentialed professionals")
+ ;;("bitnet" t "Because It's Time NET")
+ ("gov" t "United States Government")
+ ("edu" t "Educational")
+ ("mil" t "United States Military")
+ ("int" t "International Treaties")
+ ;;("nato" t "North Atlantic Treaty Organization")
("uucp" t "Unix to Unix CoPy")
- ;;("fipnet" nil "?")
+ ;; Infrastructure Domains:
+ ("arpa" t "Advanced Research Projects Agency (U.S. DoD)")
))
ob))
\f
;(let ((all nil))
; (mapatoms #'(lambda (x)
-; (if (and (boundp x)
+; (if (and (boundp x)
; (string-match "^mail-extr-" (symbol-name x)))
; (setq all (cons x all)))))
; (setq all (sort all #'string-lessp))
\f
(provide 'mail-extr)
+;;; arch-tag: 7785fade-1073-4ed6-b4f6-28db34a7982d
;;; mail-extr.el ends here