X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/f991a50a90ca5317073f01ccff37ff5048b1881e..90fb0b2d13d0f44ecb9606587681cb4d8a6f0225:/lisp/net/shr.el diff --git a/lisp/net/shr.el b/lisp/net/shr.el index e463c7edaf..68972020db 100644 --- a/lisp/net/shr.el +++ b/lisp/net/shr.el @@ -37,6 +37,7 @@ (require 'dom) (require 'seq) (require 'svg) +(require 'image) (defgroup shr nil "Simple HTML Renderer" @@ -274,22 +275,19 @@ DOM should be a parse tree as generated by (set-window-hscroll nil 0) (shr-descend dom) (shr-fill-lines start (point)) - (shr-remove-trailing-whitespace start (point)) + (shr--remove-blank-lines-at-the-end start (point)) (when shr-warning (message "%s" shr-warning)))) -(defun shr-remove-trailing-whitespace (start end) - (let ((width (window-width))) - (save-restriction +(defun shr--remove-blank-lines-at-the-end (start end) + (save-restriction + (save-excursion (narrow-to-region start end) - (goto-char start) - (while (not (eobp)) - (end-of-line) - (when (> (shr-previous-newline-padding-width (current-column)) width) - (dolist (overlay (overlays-at (point))) - (when (overlay-get overlay 'before-string) - (overlay-put overlay 'before-string nil)))) - (forward-line 1))))) + (goto-char end) + (when (and (re-search-backward "[^ \n]" nil t) + (not (eobp))) + (forward-line 1) + (delete-region (point) (point-max)))))) (defun shr-copy-url (&optional image-url) "Copy the URL under point to the kill ring. @@ -299,8 +297,10 @@ image under point instead. If called twice, then try to fetch the URL and see whether it redirects somewhere else." (interactive "P") - (let ((url (or (get-text-property (point) 'shr-url) - (get-text-property (point) 'image-url)))) + (let ((url (if image-url + (get-text-property (point) 'image-url) + (or (get-text-property (point) 'shr-url) + (get-text-property (point) 'image-url))))) (cond ((not url) (message "No URL under point")) @@ -557,6 +557,16 @@ size, and full-buffer size." (insert string) (shr-pixel-column)))) +(defsubst shr--translate-insertion-chars () + ;; Remove soft hyphens. + (goto-char (point-min)) + (while (search-forward "­" nil t) + (replace-match "" t t)) + ;; Translate non-breaking spaces into real spaces. + (goto-char (point-min)) + (while (search-forward " " nil t) + (replace-match " " t t))) + (defun shr-insert (text) (when (and (not (bolp)) (get-text-property (1- (point)) 'image-url)) @@ -567,14 +577,11 @@ size, and full-buffer size." (insert text) (save-restriction (narrow-to-region start (point)) - ;; Remove soft hyphens. - (goto-char (point-min)) - (while (search-forward "­" nil t) - (replace-match "" t t)) + (shr--translate-insertion-chars) (goto-char (point-max))))) (t (let ((font-start (point))) - (when (and (string-match "\\`[ \t\n\r ]" text) + (when (and (string-match "\\`[ \t\n\r]" text) (not (bolp)) (not (eq (char-after (1- (point))) ? ))) (insert " ")) @@ -584,14 +591,11 @@ size, and full-buffer size." (save-restriction (narrow-to-region start (point)) (goto-char start) - (when (looking-at "[ \t\n\r ]+") + (when (looking-at "[ \t\n\r]+") (replace-match "" t t)) - (while (re-search-forward "[ \t\n\r ]+" nil t) + (while (re-search-forward "[ \t\n\r]+" nil t) (replace-match " " t t)) - ;; Remove soft hyphens. - (goto-char (point-min)) - (while (search-forward "­" nil t) - (replace-match "" t t)) + (shr--translate-insertion-chars) (goto-char (point-max))) ;; We may have removed everything we inserted if if was just ;; spaces. @@ -805,8 +809,13 @@ size, and full-buffer size." (url-expand-file-name url (concat (car base) (cadr base)))))) (defun shr-ensure-newline () - (unless (zerop (current-column)) - (insert "\n"))) + (unless (bobp) + (let ((prefix (get-text-property (line-beginning-position) + 'shr-prefix-length))) + (unless (or (zerop (current-column)) + (and prefix + (= prefix (- (point) (line-beginning-position))))) + (insert "\n"))))) (defun shr-ensure-paragraph () (unless (bobp) @@ -834,6 +843,10 @@ size, and full-buffer size." (line-end-position)) (line-end-position))))) (delete-region (match-beginning 0) (match-end 0))) + ;; We have a single blank line. + ((and (eolp) (bolp)) + (insert "\n")) + ;; Insert new paragraph. (t (insert "\n\n")))))) @@ -937,7 +950,8 @@ If EXTERNAL, browse the URL using `shr-external-browser'." (let ((param (match-string 4 data)) (payload (url-unhex-string (match-string 5 data)))) (when (string-match "^.*\\(;[ \t]*base64\\)$" param) - (setq payload (base64-decode-string payload))) + (setq payload (ignore-errors + (base64-decode-string payload)))) payload))) ;; Behind display-graphic-p test. @@ -997,22 +1011,25 @@ element is the data blob and the second element is the content-type." (defun shr-rescale-image (data content-type width height) "Rescale DATA, if too big, to fit the current buffer. WIDTH and HEIGHT are the sizes given in the HTML data, if any." - (if (not (and (fboundp 'imagemagick-types) - (get-buffer-window (current-buffer)))) + (if (or (not (fboundp 'imagemagick-types)) + (not (get-buffer-window (current-buffer)))) (create-image data nil t :ascent 100) (let* ((edges (window-inside-pixel-edges (get-buffer-window (current-buffer)))) (max-width (truncate (* shr-max-image-proportion (- (nth 2 edges) (nth 0 edges))))) (max-height (truncate (* shr-max-image-proportion - (- (nth 3 edges) (nth 1 edges)))))) + (- (nth 3 edges) (nth 1 edges))))) + (scaling (image-compute-scaling-factor image-scaling-factor))) (when (or (and width (> width max-width)) (and height (> height max-height))) (setq width nil height nil)) - (if (and width height) + (if (and width height + (< (* width scaling) max-width) + (< (* height scaling) max-height)) (create-image data 'imagemagick t :ascent 100 @@ -1158,18 +1175,6 @@ ones, in case fg and bg are nil." t))) new-colors))) -(defun shr-previous-newline-padding-width (width) - (let ((overlays (overlays-at (point))) - (previous-width 0)) - (if (null overlays) - width - (dolist (overlay overlays) - (setq previous-width - (+ previous-width - (length (plist-get (overlay-properties overlay) - 'before-string))))) - (+ width previous-width)))) - ;;; Tag-specific rendering rules. (defun shr-tag-html (dom) @@ -1259,7 +1264,7 @@ ones, in case fg and bg are nil." (shr-ensure-paragraph)) (defun shr-tag-div (dom) - (shr-ensure-paragraph) + (shr-ensure-newline) (shr-generic dom) (shr-ensure-newline)) @@ -1446,13 +1451,14 @@ The preference is a float determined from `shr-prefer-media-type'." (defun shr-tag-img (dom &optional url) (when (or url (and dom - (> (length (dom-attr dom 'src)) 0))) + (or (> (length (dom-attr dom 'src)) 0) + (> (length (dom-attr dom 'srcset)) 0)))) (when (> (current-column) 0) (insert "\n")) (let ((alt (dom-attr dom 'alt)) (width (shr-string-number (dom-attr dom 'width))) (height (shr-string-number (dom-attr dom 'height))) - (url (shr-expand-url (or url (dom-attr dom 'src))))) + (url (shr-expand-url (or url (shr--preferred-image dom))))) (let ((start (point-marker))) (when (zerop (length alt)) (setq alt "*")) @@ -1512,6 +1518,43 @@ The preference is a float determined from `shr-prefer-media-type'." (shr-fill-text (or (dom-attr dom 'title) alt)))))))) +(defun shr--preferred-image (dom) + (let ((srcset (dom-attr dom 'srcset)) + (frame-width (frame-pixel-width)) + (width (string-to-number (or (dom-attr dom 'width) "100"))) + candidate) + (when (> (length srcset) 0) + ;; srcset consist of a series of URL/size specifications + ;; separated by the ", " string. + (setq srcset + (sort (mapcar + (lambda (elem) + (let ((spec (split-string elem " "))) + (cond + ((= (length spec) 1) + ;; Make sure it's well formed. + (list (car spec) 0)) + ((string-match "\\([0-9]+\\)x\\'" (cadr spec)) + ;; If we have an "x" form, then use the width + ;; spec to compute the real width. + (list (car spec) + (* width (string-to-number + (match-string 1 (cadr spec)))))) + (t + (list (car spec) + (string-to-number (cadr spec))))))) + (split-string srcset ", ")) + (lambda (e1 e2) + (> (cadr e1) (cadr e2))))) + ;; Choose the smallest picture that's bigger than the current + ;; frame. + (setq candidate (caar srcset)) + (while (and srcset + (> (cadr (car srcset)) frame-width)) + (setq candidate (caar srcset)) + (pop srcset))) + (or candidate (dom-attr dom 'src)))) + (defun shr-string-number (string) (if (null string) nil @@ -1592,6 +1635,10 @@ The preference is a float determined from `shr-prefer-media-type'." (shr-ensure-paragraph) (let ((shr-list-mode 'ul)) (shr-generic dom)) + ;; If we end on an empty
  • , then make sure we really end on a new + ;; paragraph. + (unless (bolp) + (insert "\n")) (shr-ensure-paragraph)) (defun shr-tag-ol (dom) @@ -1618,7 +1665,9 @@ The preference is a float determined from `shr-prefer-media-type'." (put-text-property start (1+ start) 'shr-continuation-indentation shr-indentation) (put-text-property start (1+ start) 'shr-prefix-length (length bullet)) - (shr-generic dom))))) + (shr-generic dom)))) + (unless (bolp) + (insert "\n"))) (defun shr-mark-fill (start) ;; We may not have inserted any text to fill. @@ -1685,19 +1734,19 @@ The preference is a float determined from `shr-prefer-media-type'." (let* ((direction (dom-attr dom 'dir)) (char (cond ((equal direction "ltr") - #x202d) ; LRO + ?\N{LEFT-TO-RIGHT OVERRIDE}) ((equal direction "rtl") - #x202e)))) ; RLO + ?\N{RIGHT-TO-LEFT OVERRIDE})))) (when char - (insert char)) + (insert ?\N{FIRST STRONG ISOLATE} char)) (shr-generic dom) (when char - (insert #x202c)))) ; PDF + (insert ?\N{POP DIRECTIONAL FORMATTING} ?\N{POP DIRECTIONAL ISOLATE})))) (defun shr-tag-bdi (dom) - (insert #x2068) ; FSI + (insert ?\N{FIRST STRONG ISOLATE}) (shr-generic dom) - (insert #x2069)) ; PDI + (insert ?\N{POP DIRECTIONAL ISOLATE})) ;;; Table rendering algorithm.