X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/51751aa26f9935609630f04e781a954b54ecc82e..7e09ef09a479731d01b1ca46e94ddadd73ac98e3:/lisp/gnus/html2text.el diff --git a/lisp/gnus/html2text.el b/lisp/gnus/html2text.el index 0635ab0afc..aef8063684 100644 --- a/lisp/gnus/html2text.el +++ b/lisp/gnus/html2text.el @@ -1,6 +1,6 @@ -;;; html2text.el --- a simple html to plain text converter +;;; html2text.el --- a simple html to plain text converter -*- coding: utf-8 -*- -;; Copyright (C) 2002-2011 Free Software Foundation, Inc. +;; Copyright (C) 2002-2015 Free Software Foundation, Inc. ;; Author: Joakim Hove @@ -65,14 +65,14 @@ ("–" . "-") ("‰" . "%%") ("±" . "+-") - ("£" . "£") + ("£" . "£") (""" . "\"") ("»" . ">>") ("&rdquo" . "\"") ("®" . "(R)") ("›" . ")") ("’" . "'") - ("§" . "§") + ("§" . "§") ("¹" . "^1") ("²" . "^2") ("³" . "^3") @@ -123,7 +123,7 @@ If this list contains the element \"font\".") This is an alist where each dotted pair consists of a tag, and then the name of a function to be called when this tag is found. The function is called with the arguments p1, p2, p3 and p4. These are -demontrated below: +demonstrated below: \" This is bold text \" ^ ^ ^ ^ @@ -179,72 +179,20 @@ formatting, and then moved afterward.") (defun html2text-get-attr (p1 p2) (goto-char p1) - (re-search-forward " +[^ ]" p2 t) - (let* ((attr-string (buffer-substring-no-properties (1- (point)) (1- p2))) - (tmp-list (split-string attr-string)) - (attr-list) - (counter 0) - (prev (car tmp-list)) - (this (nth 1 tmp-list)) - (next (nth 2 tmp-list)) - (index 1)) - - (cond - ;; size=3 - ((string-match "[^ ]=[^ ]" prev) - (let ((attr (nth 0 (split-string prev "="))) - (value (nth 1 (split-string prev "=")))) - (setq attr-list (cons (list attr value) attr-list)))) - ;; size= 3 - ((string-match "[^ ]=\\'" prev) - (setq attr-list (cons (list (substring prev 0 -1) this) attr-list)))) - - (while (< index (length tmp-list)) - (cond - ;; size=3 - ((string-match "[^ ]=[^ ]" this) - (let ((attr (nth 0 (split-string this "="))) - (value (nth 1 (split-string this "=")))) - (setq attr-list (cons (list attr value) attr-list)))) - ;; size =3 - ((string-match "\\`=[^ ]" this) - (setq attr-list (cons (list prev (substring this 1)) attr-list))) - ;; size= 3 - ((string-match "[^ ]=\\'" this) - (setq attr-list (cons (list (substring this 0 -1) next) attr-list))) - ;; size = 3 - ((string= "=" this) - (setq attr-list (cons (list prev next) attr-list)))) - (setq index (1+ index)) - (setq prev this) - (setq this next) - (setq next (nth (1+ index) tmp-list))) - ;; - ;; Tags with no accompanying "=" i.e. value=nil - ;; - (setq prev (car tmp-list)) - (setq this (nth 1 tmp-list)) - (setq next (nth 2 tmp-list)) - (setq index 1) - - (when (and (not (string-match "=" prev)) - (not (string= (substring this 0 1) "="))) - (setq attr-list (cons (list prev nil) attr-list))) - (while (< index (1- (length tmp-list))) - (when (and (not (string-match "=" this)) - (not (or (string= (substring next 0 1) "=") - (string= (substring prev -1) "=")))) - (setq attr-list (cons (list this nil) attr-list))) - (setq index (1+ index)) - (setq prev this) - (setq this next) - (setq next (nth (1+ index) tmp-list))) - - (when (and this - (not (string-match "=" this)) - (not (string= (substring prev -1) "="))) - (setq attr-list (cons (list this nil) attr-list))) - ;; return - value + (re-search-forward "\\s-+" p2 t) + (let (attr-list) + (while (re-search-forward "[-a-z0-9._]+" p2 t) + (setq attr-list + (cons + (list (match-string 0) + (when (looking-at "\\s-*=") + (goto-char (match-end 0)) + (skip-chars-forward "[:space:]") + (when (or (looking-at "\"[^\"]*\"\\|'[^']*'") + (looking-at "[-a-z0-9._:]+")) + (goto-char (match-end 0)) + (match-string 0)))) + attr-list))) attr-list)) ;; @@ -358,7 +306,8 @@ formatting, and then moved afterward.") (delete-region p1 p4) (when href (goto-char p1) - (insert (substring href 1 -1 )) + (insert (if (string-match "\\`['\"].*['\"]\\'" href) + (substring href 1 -1) href)) (put-text-property p1 (point) 'face 'bold)))) ;; @@ -409,7 +358,7 @@ fashion, quite close to pure guess-work. It does work in some cases though." (while (re-search-forward "^
$" nil t) (delete-region (match-beginning 0) (match-end 0))) ;; Removing lonely
on a single line, if they are left intact we - ;; dont have any paragraphs at all. + ;; don't have any paragraphs at all. (goto-char (point-min)) (while (not (eobp)) (let ((p1 (point)))