]> code.delx.au - gnu-emacs/blob - lisp/gnus/nnrss.el
Merge from emacs--devo--0
[gnu-emacs] / lisp / gnus / nnrss.el
1 ;;; nnrss.el --- interfacing with RSS
2
3 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005,
4 ;; 2006, 2007 Free Software Foundation, Inc.
5
6 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
7 ;; Keywords: RSS
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published
13 ;; by the Free Software Foundation; either version 2, or (at your
14 ;; option) any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 ;; Boston, MA 02110-1301, USA.
25
26 ;;; Commentary:
27
28 ;;; Code:
29
30 (eval-when-compile (require 'cl))
31
32 (require 'gnus)
33 (require 'nnoo)
34 (require 'nnmail)
35 (require 'message)
36 (require 'mm-util)
37 (require 'gnus-util)
38 (require 'time-date)
39 (require 'rfc2231)
40 (require 'mm-url)
41 (require 'rfc2047)
42 (require 'mml)
43 (eval-when-compile
44 (ignore-errors
45 (require 'xml)))
46 (eval '(require 'xml))
47
48 (nnoo-declare nnrss)
49
50 (defvoo nnrss-directory (nnheader-concat gnus-directory "rss/")
51 "Where nnrss will save its files.")
52
53 ;; (group max rss-url)
54 (defvoo nnrss-server-data nil)
55
56 ;; (num timestamp url subject author date extra)
57 (defvoo nnrss-group-data nil)
58 (defvoo nnrss-group-max 0)
59 (defvoo nnrss-group-min 1)
60 (defvoo nnrss-group nil)
61 (defvoo nnrss-group-hashtb nil)
62 (defvoo nnrss-status-string "")
63
64 (defconst nnrss-version "nnrss 1.0")
65
66 (defvar nnrss-group-alist '()
67 "List of RSS addresses.")
68
69 (defvar nnrss-use-local nil)
70
71 (defvar nnrss-description-field 'X-Gnus-Description
72 "Field name used for DESCRIPTION.
73 To use the description in headers, put this name into `nnmail-extra-headers'.")
74
75 (defvar nnrss-url-field 'X-Gnus-Url
76 "Field name used for URL.
77 To use the description in headers, put this name into `nnmail-extra-headers'.")
78
79 (defvar nnrss-content-function nil
80 "A function which is called in `nnrss-request-article'.
81 The arguments are (ENTRY GROUP ARTICLE).
82 ENTRY is the record of the current headline. GROUP is the group name.
83 ARTICLE is the article number of the current headline.")
84
85 (defvar nnrss-file-coding-system mm-universal-coding-system
86 "Coding system used when reading and writing files.")
87
88 (defvar nnrss-compatible-encoding-alist
89 (delq nil (mapcar (lambda (elem)
90 (if (and (mm-coding-system-p (car elem))
91 (mm-coding-system-p (cdr elem)))
92 elem))
93 mm-charset-override-alist))
94 "Alist of encodings and those supersets.
95 The cdr of each element is used to decode data if it is available when
96 the car is what the data specify as the encoding. Or, the car is used
97 for decoding when the cdr that the data specify is not available.")
98
99 (defvar nnrss-wash-html-in-text-plain-parts nil
100 "*Non-nil means render text in text/plain parts as HTML.
101 The function specified by the `mm-text-html-renderer' variable will be
102 used to render text. If it is nil, text will simply be folded.")
103
104 (nnoo-define-basics nnrss)
105
106 ;;; Interface functions
107
108 (defsubst nnrss-format-string (string)
109 (gnus-replace-in-string string " *\n *" " "))
110
111 (defun nnrss-decode-group-name (group)
112 (if (and group (mm-coding-system-p 'utf-8))
113 (setq group (mm-decode-coding-string group 'utf-8))
114 group))
115
116 (deffoo nnrss-retrieve-headers (articles &optional group server fetch-old)
117 (setq group (nnrss-decode-group-name group))
118 (nnrss-possibly-change-group group server)
119 (let (e)
120 (save-excursion
121 (set-buffer nntp-server-buffer)
122 (erase-buffer)
123 (dolist (article articles)
124 (if (setq e (assq article nnrss-group-data))
125 (insert (number-to-string (car e)) "\t" ;; number
126 ;; subject
127 (or (nth 3 e) "")
128 "\t"
129 ;; from
130 (or (nth 4 e) "(nobody)")
131 "\t"
132 ;; date
133 (or (nth 5 e) "")
134 "\t"
135 ;; id
136 (format "<%d@%s.nnrss>" (car e) group)
137 "\t"
138 ;; refs
139 "\t"
140 ;; chars
141 "-1" "\t"
142 ;; lines
143 "-1" "\t"
144 ;; Xref
145 "" "\t"
146 (if (and (nth 6 e)
147 (memq nnrss-description-field
148 nnmail-extra-headers))
149 (concat (symbol-name nnrss-description-field)
150 ": "
151 (nnrss-format-string (nth 6 e))
152 "\t")
153 "")
154 (if (and (nth 2 e)
155 (memq nnrss-url-field
156 nnmail-extra-headers))
157 (concat (symbol-name nnrss-url-field)
158 ": "
159 (nnrss-format-string (nth 2 e))
160 "\t")
161 "")
162 "\n")))))
163 'nov)
164
165 (deffoo nnrss-request-group (group &optional server dont-check)
166 (setq group (nnrss-decode-group-name group))
167 (nnheader-message 6 "nnrss: Requesting %s..." group)
168 (nnrss-possibly-change-group group server)
169 (prog1
170 (if dont-check
171 t
172 (nnrss-check-group group server)
173 (nnheader-report 'nnrss "Opened group %s" group)
174 (nnheader-insert
175 "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max
176 (prin1-to-string group)
177 t))
178 (nnheader-message 6 "nnrss: Requesting %s...done" group)))
179
180 (deffoo nnrss-close-group (group &optional server)
181 t)
182
183 (eval-when-compile
184 (defvar mm-text-html-renderer)
185 (defvar mm-text-html-washer-alist))
186
187 (deffoo nnrss-request-article (article &optional group server buffer)
188 (setq group (nnrss-decode-group-name group))
189 (when (stringp article)
190 (setq article (if (string-match "\\`<\\([0-9]+\\)@" article)
191 (string-to-number (match-string 1 article))
192 0)))
193 (nnrss-possibly-change-group group server)
194 (let ((e (assq article nnrss-group-data))
195 (nntp-server-buffer (or buffer nntp-server-buffer))
196 post err)
197 (when e
198 (with-current-buffer nntp-server-buffer
199 (erase-buffer)
200 (if group
201 (insert "Newsgroups: " group "\n"))
202 (if (nth 3 e)
203 (insert "Subject: " (nth 3 e) "\n"))
204 (if (nth 4 e)
205 (insert "From: " (nth 4 e) "\n"))
206 (if (nth 5 e)
207 (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
208 (let ((header (buffer-string))
209 (text (nth 6 e))
210 (link (nth 2 e))
211 (enclosure (nth 7 e))
212 (comments (nth 8 e))
213 ;; Enable encoding of Newsgroups header in XEmacs.
214 (default-enable-multibyte-characters t)
215 (rfc2047-header-encoding-alist
216 (if (mm-coding-system-p 'utf-8)
217 (cons '("Newsgroups" . utf-8)
218 rfc2047-header-encoding-alist)
219 rfc2047-header-encoding-alist))
220 rfc2047-encode-encoded-words body fn)
221 (when (or text link enclosure comments)
222 (insert "\n")
223 (insert "<#multipart type=alternative>\n"
224 "<#part type=\"text/plain\">\n")
225 (setq body (point))
226 (when text
227 (insert text)
228 (goto-char body)
229 (if (and nnrss-wash-html-in-text-plain-parts
230 (progn
231 (require 'mm-view)
232 (setq fn (or (cdr (assq mm-text-html-renderer
233 mm-text-html-washer-alist))
234 mm-text-html-renderer))))
235 (progn
236 (narrow-to-region body (point-max))
237 (if (functionp fn)
238 (funcall fn)
239 (apply (car fn) (cdr fn)))
240 (widen)
241 (goto-char body)
242 (re-search-forward "[^\t\n ]" nil t)
243 (beginning-of-line)
244 (delete-region body (point))
245 (goto-char (point-max))
246 (skip-chars-backward "\t\n ")
247 (end-of-line)
248 (delete-region (point) (point-max))
249 (insert "\n"))
250 (while (re-search-forward "\n+" nil t)
251 (replace-match " "))
252 (goto-char body)
253 ;; See `nnrss-check-group', which inserts "<br /><br />".
254 (when (search-forward "<br /><br />" nil t)
255 (if (eobp)
256 (replace-match "\n")
257 (replace-match "\n\n")))
258 (unless (eobp)
259 (let ((fill-column default-fill-column)
260 (window (get-buffer-window nntp-server-buffer)))
261 (when window
262 (setq fill-column
263 (max 1 (/ (* (window-width window) 7) 8))))
264 (fill-region (point) (point-max))
265 (goto-char (point-max))
266 ;; XEmacs version of `fill-region' inserts newline.
267 (unless (bolp)
268 (insert "\n")))))
269 (when (or link enclosure)
270 (insert "\n")))
271 (when link
272 (insert link "\n"))
273 (when enclosure
274 (insert (car enclosure) " "
275 (nth 2 enclosure) " "
276 (nth 3 enclosure) "\n"))
277 (when comments
278 (insert comments "\n"))
279 (setq body (buffer-substring body (point)))
280 (insert "<#/part>\n"
281 "<#part type=\"text/html\">\n"
282 "<html><head></head><body>\n")
283 (when text
284 (insert text "\n"))
285 (when link
286 (insert "<p><a href=\"" link "\">link</a></p>\n"))
287 (when enclosure
288 (insert "<p><a href=\"" (car enclosure) "\">"
289 (cadr enclosure) "</a> " (nth 2 enclosure)
290 " " (nth 3 enclosure) "</p>\n"))
291 (when comments
292 (insert "<p><a href=\"" comments "\">comments</a></p>\n"))
293 (insert "</body></html>\n"
294 "<#/part>\n"
295 "<#/multipart>\n"))
296 (condition-case nil
297 (mml-to-mime)
298 (error
299 (erase-buffer)
300 (insert header
301 "Content-Type: text/plain; charset=gnus-decoded\n"
302 "Content-Transfer-Encoding: 8bit\n\n"
303 body)
304 (nnheader-message
305 3 "Warning - there might be invalid characters"))))
306 (goto-char (point-min))
307 (search-forward "\n\n")
308 (forward-line -1)
309 (insert (format "Message-ID: <%d@%s.nnrss>\n"
310 (car e)
311 (let ((rfc2047-encoding-type 'mime)
312 rfc2047-encode-max-chars)
313 (rfc2047-encode-string
314 (gnus-replace-in-string group "[\t\n ]+" "_")))))
315 (when nnrss-content-function
316 (funcall nnrss-content-function e group article))))
317 (cond
318 (err
319 (nnheader-report 'nnrss err))
320 ((not e)
321 (nnheader-report 'nnrss "no such id: %d" article))
322 (t
323 (nnheader-report 'nnrss "article %s retrieved" (car e))
324 ;; we return the article number.
325 (cons nnrss-group (car e))))))
326
327 (deffoo nnrss-request-list (&optional server)
328 (nnrss-possibly-change-group nil server)
329 (nnrss-generate-active)
330 t)
331
332 (deffoo nnrss-open-server (server &optional defs connectionless)
333 (nnrss-read-server-data server)
334 (nnoo-change-server 'nnrss server defs)
335 t)
336
337 (deffoo nnrss-request-expire-articles
338 (articles group &optional server force)
339 (setq group (nnrss-decode-group-name group))
340 (nnrss-possibly-change-group group server)
341 (let (e days not-expirable changed)
342 (dolist (art articles)
343 (if (and (setq e (assq art nnrss-group-data))
344 (nnmail-expired-article-p
345 group
346 (if (listp (setq days (nth 1 e))) days
347 (days-to-time (- days (time-to-days '(0 0)))))
348 force))
349 (setq nnrss-group-data (delq e nnrss-group-data)
350 changed t)
351 (push art not-expirable)))
352 (if changed
353 (nnrss-save-group-data group server))
354 not-expirable))
355
356 (deffoo nnrss-request-delete-group (group &optional force server)
357 (setq group (nnrss-decode-group-name group))
358 (nnrss-possibly-change-group group server)
359 (let (elem)
360 ;; There may be two or more entries in `nnrss-group-alist' since
361 ;; this function didn't delete them formerly.
362 (while (setq elem (assoc group nnrss-group-alist))
363 (setq nnrss-group-alist (delq elem nnrss-group-alist))))
364 (setq nnrss-server-data
365 (delq (assoc group nnrss-server-data) nnrss-server-data))
366 (nnrss-save-server-data server)
367 (ignore-errors
368 (delete-file (nnrss-make-filename group server)))
369 t)
370
371 (deffoo nnrss-request-list-newsgroups (&optional server)
372 (nnrss-possibly-change-group nil server)
373 (save-excursion
374 (set-buffer nntp-server-buffer)
375 (erase-buffer)
376 (dolist (elem nnrss-group-alist)
377 (if (third elem)
378 (insert (car elem) "\t" (third elem) "\n"))))
379 t)
380
381 (nnoo-define-skeleton nnrss)
382
383 ;;; Internal functions
384 (eval-when-compile (defun xml-rpc-method-call (&rest args)))
385
386 (defun nnrss-get-encoding ()
387 "Return an encoding attribute specified in the current xml contents.
388 If `nnrss-compatible-encoding-alist' specifies the compatible encoding,
389 it is used instead. If the xml contents doesn't specify the encoding,
390 return `utf-8' which is the default encoding for xml if it is available,
391 otherwise return nil."
392 (goto-char (point-min))
393 (if (re-search-forward
394 "<\\?[^>]*encoding=\\(\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)"
395 nil t)
396 (let ((encoding (intern (downcase (or (match-string 2)
397 (match-string 3))))))
398 (or
399 (mm-coding-system-p (cdr (assq encoding
400 nnrss-compatible-encoding-alist)))
401 (mm-coding-system-p encoding)
402 (mm-coding-system-p (car (rassq encoding
403 nnrss-compatible-encoding-alist)))))
404 (mm-coding-system-p 'utf-8)))
405
406 (defun nnrss-fetch (url &optional local)
407 "Fetch URL and put it in a the expected Lisp structure."
408 (mm-with-unibyte-buffer
409 ;;some CVS versions of url.el need this to close the connection quickly
410 (let (cs xmlform htmlform)
411 ;; bit o' work necessary for w3 pre-cvs and post-cvs
412 (if local
413 (let ((coding-system-for-read 'binary))
414 (insert-file-contents url))
415 ;; FIXME: shouldn't binding `coding-system-for-read' be moved
416 ;; to `mm-url-insert'?
417 (let ((coding-system-for-read 'binary))
418 (condition-case err
419 (mm-url-insert url)
420 (error (if (or debug-on-quit debug-on-error)
421 (signal (car err) (cdr err))
422 (message "nnrss: Failed to fetch %s" url))))))
423 (nnheader-remove-cr-followed-by-lf)
424 ;; Decode text according to the encoding attribute.
425 (when (setq cs (nnrss-get-encoding))
426 (insert (prog1
427 (mm-decode-coding-string (buffer-string) cs)
428 (erase-buffer)
429 (mm-enable-multibyte))))
430 (goto-char (point-min))
431
432 ;; Because xml-parse-region can't deal with anything that isn't
433 ;; xml and w3-parse-buffer can't deal with some xml, we have to
434 ;; parse with xml-parse-region first and, if that fails, parse
435 ;; with w3-parse-buffer. Yuck. Eventually, someone should find out
436 ;; why w3-parse-buffer fails to parse some well-formed xml and
437 ;; fix it.
438
439 (condition-case err1
440 (setq xmlform (xml-parse-region (point-min) (point-max)))
441 (error
442 (condition-case err2
443 (setq htmlform (caddar (w3-parse-buffer
444 (current-buffer))))
445 (error
446 (message "\
447 nnrss: %s: Not valid XML %s and w3-parse doesn't work %s"
448 url err1 err2)))))
449 (if htmlform
450 htmlform
451 xmlform))))
452
453 (defun nnrss-possibly-change-group (&optional group server)
454 (when (and server
455 (not (nnrss-server-opened server)))
456 (nnrss-open-server server))
457 (when (and group (not (equal group nnrss-group)))
458 (nnrss-read-group-data group server)
459 (setq nnrss-group group)))
460
461 (defvar nnrss-extra-categories '(nnrss-snarf-moreover-categories))
462
463 (defun nnrss-generate-active ()
464 (when (y-or-n-p "Fetch extra categories? ")
465 (dolist (func nnrss-extra-categories)
466 (funcall func)))
467 (save-excursion
468 (set-buffer nntp-server-buffer)
469 (erase-buffer)
470 (dolist (elem nnrss-group-alist)
471 (insert (prin1-to-string (car elem)) " 0 1 y\n"))
472 (dolist (elem nnrss-server-data)
473 (unless (assoc (car elem) nnrss-group-alist)
474 (insert (prin1-to-string (car elem)) " 0 1 y\n")))))
475
476 (eval-and-compile (autoload 'timezone-parse-date "timezone"))
477
478 (defun nnrss-normalize-date (date)
479 "Return a date string of DATE in the RFC822 style.
480 This function handles the ISO 8601 date format described in
481 <URL:http://www.w3.org/TR/NOTE-datetime>, and also the RFC822 style
482 which RSS 2.0 allows."
483 (let (case-fold-search vector year month day time zone cts)
484 (cond ((null date))
485 ;; RFC822
486 ((string-match " [0-9]+ " date)
487 (setq vector (timezone-parse-date date)
488 year (string-to-number (aref vector 0)))
489 (when (>= year 1969)
490 (setq month (string-to-number (aref vector 1))
491 day (string-to-number (aref vector 2)))
492 (unless (>= (length (setq time (aref vector 3))) 3)
493 (setq time "00:00:00"))
494 (when (and (setq zone (aref vector 4))
495 (not (string-match "\\`[A-Z+-]" zone)))
496 (setq zone nil))))
497 ;; ISO 8601
498 ((string-match
499 (eval-when-compile
500 (concat
501 ;; 1. year
502 "\\(199[0-9]\\|20[0-9][0-9]\\)"
503 "\\(-"
504 ;; 3. month
505 "\\([01][0-9]\\)"
506 "\\(-"
507 ;; 5. day
508 "\\([0-3][0-9]\\)"
509 "\\)?\\)?\\(T"
510 ;; 7. hh:mm
511 "\\([012][0-9]:[0-5][0-9]\\)"
512 "\\("
513 ;; 9. :ss
514 "\\(:[0-5][0-9]\\)"
515 "\\(\\.[0-9]+\\)?\\)?\\)?"
516 ;; 13+14,15,16. zone
517 "\\(\\(\\([+-][012][0-9]\\):\\([0-5][0-9]\\)\\)"
518 "\\|\\([+-][012][0-9][0-5][0-9]\\)"
519 "\\|\\(Z\\)\\)?"))
520 date)
521 (setq year (string-to-number (match-string 1 date))
522 month (string-to-number (or (match-string 3 date) "1"))
523 day (string-to-number (or (match-string 5 date) "1"))
524 time (if (match-beginning 9)
525 (substring date (match-beginning 7) (match-end 9))
526 (concat (or (match-string 7 date) "00:00") ":00"))
527 zone (cond ((match-beginning 13)
528 (concat (match-string 13 date)
529 (match-string 14 date)))
530 ((match-beginning 16) ;; Z
531 "+0000")
532 (t ;; nil if zone is not provided.
533 (match-string 15 date))))))
534 (if month
535 (progn
536 (setq cts (current-time-string (encode-time 0 0 0 day month year)))
537 (format "%s, %02d %s %04d %s%s"
538 (substring cts 0 3) day (substring cts 4 7) year time
539 (if zone
540 (concat " " zone)
541 "")))
542 (message-make-date))))
543
544 ;;; data functions
545
546 (defun nnrss-read-server-data (server)
547 (setq nnrss-server-data nil)
548 (let ((file (nnrss-make-filename "nnrss" server)))
549 (when (file-exists-p file)
550 ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII
551 ;; file names. So, we use `insert-file-contents' instead.
552 (mm-with-multibyte-buffer
553 (let ((coding-system-for-read nnrss-file-coding-system)
554 (file-name-coding-system nnmail-pathname-coding-system))
555 (insert-file-contents file)
556 (eval-region (point-min) (point-max)))))))
557
558 (defun nnrss-save-server-data (server)
559 (gnus-make-directory nnrss-directory)
560 (let ((coding-system-for-write nnrss-file-coding-system)
561 (file-name-coding-system nnmail-pathname-coding-system))
562 (with-temp-file (nnrss-make-filename "nnrss" server)
563 (insert (format ";; -*- coding: %s; -*-\n"
564 nnrss-file-coding-system))
565 (gnus-prin1 `(setq nnrss-group-alist ',nnrss-group-alist))
566 (insert "\n")
567 (gnus-prin1 `(setq nnrss-server-data ',nnrss-server-data)))))
568
569 (defun nnrss-read-group-data (group server)
570 (setq nnrss-group-data nil)
571 (setq nnrss-group-hashtb (gnus-make-hashtable))
572 (let ((pair (assoc group nnrss-server-data)))
573 (setq nnrss-group-max (or (cadr pair) 0))
574 (setq nnrss-group-min (+ nnrss-group-max 1)))
575 (let ((file (nnrss-make-filename group server)))
576 (when (file-exists-p file)
577 ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII
578 ;; file names. So, we use `insert-file-contents' instead.
579 (mm-with-multibyte-buffer
580 (let ((coding-system-for-read nnrss-file-coding-system)
581 (file-name-coding-system nnmail-pathname-coding-system))
582 (insert-file-contents file)
583 (eval-region (point-min) (point-max))))
584 (dolist (e nnrss-group-data)
585 (gnus-sethash (or (nth 2 e) (nth 6 e)) t nnrss-group-hashtb)
586 (when (and (car e) (> nnrss-group-min (car e)))
587 (setq nnrss-group-min (car e)))
588 (when (and (car e) (< nnrss-group-max (car e)))
589 (setq nnrss-group-max (car e)))))))
590
591 (defun nnrss-save-group-data (group server)
592 (gnus-make-directory nnrss-directory)
593 (let ((coding-system-for-write nnrss-file-coding-system)
594 (file-name-coding-system nnmail-pathname-coding-system))
595 (with-temp-file (nnrss-make-filename group server)
596 (insert (format ";; -*- coding: %s; -*-\n"
597 nnrss-file-coding-system))
598 (gnus-prin1 `(setq nnrss-group-data ',nnrss-group-data)))))
599
600 (defun nnrss-make-filename (name server)
601 (expand-file-name
602 (nnrss-translate-file-chars
603 (concat name
604 (and server
605 (not (equal server ""))
606 "-")
607 server
608 ".el"))
609 nnrss-directory))
610
611 (gnus-add-shutdown 'nnrss-close 'gnus)
612
613 (defun nnrss-close ()
614 "Clear internal nnrss variables."
615 (setq nnrss-group-data nil
616 nnrss-server-data nil
617 nnrss-group-hashtb nil
618 nnrss-group-alist nil))
619
620 ;;; URL interface
621
622 (defun nnrss-no-cache (url)
623 "")
624
625 (defun nnrss-insert-w3 (url)
626 (mm-with-unibyte-current-buffer
627 (condition-case err
628 (mm-url-insert url)
629 (error (if (or debug-on-quit debug-on-error)
630 (signal (car err) (cdr err))
631 (message "nnrss: Failed to fetch %s" url))))))
632
633 (defun nnrss-decode-entities-string (string)
634 (if string
635 (mm-with-multibyte-buffer
636 (insert string)
637 (mm-url-decode-entities-nbsp)
638 (buffer-string))))
639
640 (defalias 'nnrss-insert 'nnrss-insert-w3)
641
642 (defun nnrss-mime-encode-string (string)
643 (mm-with-multibyte-buffer
644 (insert string)
645 (mm-url-decode-entities-nbsp)
646 (goto-char (point-min))
647 (while (re-search-forward "[\t\n ]+" nil t)
648 (replace-match " "))
649 (goto-char (point-min))
650 (skip-chars-forward " ")
651 (delete-region (point-min) (point))
652 (goto-char (point-max))
653 (skip-chars-forward " ")
654 (delete-region (point) (point-max))
655 (let ((rfc2047-encoding-type 'mime)
656 rfc2047-encode-max-chars)
657 (rfc2047-encode-region (point-min) (point-max)))
658 (goto-char (point-min))
659 (while (search-forward "\n" nil t)
660 (delete-backward-char 1))
661 (buffer-string)))
662
663 ;;; Snarf functions
664
665 (defun nnrss-check-group (group server)
666 (let (file xml subject url extra changed author date feed-subject
667 enclosure comments rss-ns rdf-ns content-ns dc-ns)
668 (if (and nnrss-use-local
669 (file-exists-p (setq file (expand-file-name
670 (nnrss-translate-file-chars
671 (concat group ".xml"))
672 nnrss-directory))))
673 (setq xml (nnrss-fetch file t))
674 (setq url (or (nth 2 (assoc group nnrss-server-data))
675 (second (assoc group nnrss-group-alist))))
676 (unless url
677 (setq url
678 (cdr
679 (assoc 'href
680 (nnrss-discover-feed
681 (read-string
682 (format "URL to search for %s: " group) "http://")))))
683 (let ((pair (assoc group nnrss-server-data)))
684 (if pair
685 (setcdr (cdr pair) (list url))
686 (push (list group nnrss-group-max url) nnrss-server-data)))
687 (setq changed t))
688 (setq xml (nnrss-fetch url)))
689 ;; See
690 ;; http://feeds.archive.org/validator/docs/howto/declare_namespaces.html
691 ;; for more RSS namespaces.
692 (setq dc-ns (nnrss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/")
693 rdf-ns (nnrss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
694 rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")
695 content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/"))
696 (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml)))
697 (when (and (listp item)
698 (string= (concat rss-ns "item") (car item))
699 (if (setq url (nnrss-decode-entities-string
700 (nnrss-node-text rss-ns 'link (cddr item))))
701 (not (gnus-gethash url nnrss-group-hashtb))
702 (setq extra (or (nnrss-node-text content-ns 'encoded item)
703 (nnrss-node-text rss-ns 'description item)))
704 (not (gnus-gethash extra nnrss-group-hashtb))))
705 (setq subject (nnrss-node-text rss-ns 'title item))
706 (setq extra (or extra
707 (nnrss-node-text content-ns 'encoded item)
708 (nnrss-node-text rss-ns 'description item)))
709 (if (setq feed-subject (nnrss-node-text dc-ns 'subject item))
710 (setq extra (concat feed-subject "<br /><br />" extra)))
711 (setq author (or (nnrss-node-text rss-ns 'author item)
712 (nnrss-node-text dc-ns 'creator item)
713 (nnrss-node-text dc-ns 'contributor item)))
714 (setq date (nnrss-normalize-date
715 (or (nnrss-node-text dc-ns 'date item)
716 (nnrss-node-text rss-ns 'pubDate item))))
717 (setq comments (nnrss-node-text rss-ns 'comments item))
718 (when (setq enclosure (cadr (assq (intern (concat rss-ns "enclosure")) item)))
719 (let ((url (cdr (assq 'url enclosure)))
720 (len (cdr (assq 'length enclosure)))
721 (type (cdr (assq 'type enclosure)))
722 (name))
723 (setq len
724 (if (and len (integerp (setq len (string-to-number len))))
725 ;; actually already in `ls-lisp-format-file-size' but
726 ;; probably not worth to require it for one function
727 (do ((size (/ len 1.0) (/ size 1024.0))
728 (post-fixes (list "" "k" "M" "G" "T" "P" "E")
729 (cdr post-fixes)))
730 ((< size 1024)
731 (format "%.1f%s" size (car post-fixes))))
732 "0"))
733 (setq url (or url ""))
734 (setq name (if (string-match "/\\([^/]*\\)$" url)
735 (match-string 1 url)
736 "file"))
737 (setq type (or type ""))
738 (setq enclosure (list url name len type))))
739 (push
740 (list
741 (incf nnrss-group-max)
742 (current-time)
743 url
744 (and subject (nnrss-mime-encode-string subject))
745 (and author (nnrss-mime-encode-string author))
746 date
747 (and extra (nnrss-decode-entities-string extra))
748 enclosure
749 comments)
750 nnrss-group-data)
751 (gnus-sethash (or url extra) t nnrss-group-hashtb)
752 (setq changed t))
753 (setq extra nil))
754 (when changed
755 (nnrss-save-group-data group server)
756 (let ((pair (assoc group nnrss-server-data)))
757 (if pair
758 (setcar (cdr pair) nnrss-group-max)
759 (push (list group nnrss-group-max) nnrss-server-data)))
760 (nnrss-save-server-data server))))
761
762 (defun nnrss-opml-import (opml-file)
763 "OPML subscriptions import.
764 Read the file and attempt to subscribe to each Feed in the file."
765 (interactive "fImport file: ")
766 (mapc
767 (lambda (node)
768 (let ((xmlurl (cdr (assq 'xmlUrl (cadr node)))))
769 (when (and xmlurl
770 (not (string-match "\\`[\t ]*\\'" xmlurl))
771 (prog1
772 (y-or-n-p (format "Subscribe to %s " xmlurl))
773 (message "")))
774 (condition-case err
775 (progn
776 (gnus-group-make-rss-group xmlurl)
777 (forward-line 1))
778 (error
779 (message
780 "Failed to subscribe to %s (%s); type any key to continue: "
781 xmlurl
782 (error-message-string err))
783 (let ((echo-keystrokes 0))
784 (read-char)))))))
785 (nnrss-find-el 'outline
786 (mm-with-multibyte-buffer
787 (insert-file-contents opml-file)
788 (xml-parse-region (point-min) (point-max))))))
789
790 (defun nnrss-opml-export ()
791 "OPML subscription export.
792 Export subscriptions to a buffer in OPML Format."
793 (interactive)
794 (with-current-buffer (get-buffer-create "*OPML Export*")
795 (mm-set-buffer-file-coding-system 'utf-8)
796 (insert "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
797 "<!-- OPML generated by Emacs Gnus' nnrss.el -->\n"
798 "<opml version=\"1.1\">\n"
799 " <head>\n"
800 " <title>mySubscriptions</title>\n"
801 " <dateCreated>" (format-time-string "%a, %d %b %Y %T %z")
802 "</dateCreated>\n"
803 " <ownerEmail>" user-mail-address "</ownerEmail>\n"
804 " <ownerName>" (user-full-name) "</ownerName>\n"
805 " </head>\n"
806 " <body>\n")
807 (dolist (sub nnrss-group-alist)
808 (insert " <outline text=\"" (car sub)
809 "\" xmlUrl=\"" (cadr sub) "\"/>\n"))
810 (insert " </body>\n"
811 "</opml>\n"))
812 (pop-to-buffer "*OPML Export*")
813 (when (fboundp 'sgml-mode)
814 (sgml-mode)))
815
816 (defun nnrss-generate-download-script ()
817 "Generate a download script in the current buffer.
818 It is useful when `(setq nnrss-use-local t)'."
819 (interactive)
820 (insert "#!/bin/sh\n")
821 (insert "WGET=wget\n")
822 (insert "RSSDIR='" (expand-file-name nnrss-directory) "'\n")
823 (dolist (elem nnrss-server-data)
824 (let ((url (or (nth 2 elem)
825 (second (assoc (car elem) nnrss-group-alist)))))
826 (insert "$WGET -q -O \"$RSSDIR\"/'"
827 (nnrss-translate-file-chars (concat (car elem) ".xml"))
828 "' '" url "'\n"))))
829
830 (defun nnrss-translate-file-chars (name)
831 (let ((nnheader-file-name-translation-alist
832 (append nnheader-file-name-translation-alist '((?' . ?_)))))
833 (nnheader-translate-file-chars name)))
834
835 (defvar nnrss-moreover-url
836 "http://w.moreover.com/categories/category_list_rss.html"
837 "The url of moreover.com categories.")
838
839 (defun nnrss-snarf-moreover-categories ()
840 "Snarf RSS links from moreover.com."
841 (interactive)
842 (let (category name url changed)
843 (with-temp-buffer
844 (nnrss-insert nnrss-moreover-url)
845 (goto-char (point-min))
846 (while (re-search-forward
847 "<a name=\"\\([^\"]+\\)\">\\|<a href=\"\\(http://[^\"]*moreover\\.com[^\"]+page\\?c=\\([^\"&]+\\)&o=rss\\)" nil t)
848 (if (match-string 1)
849 (setq category (match-string 1))
850 (setq url (match-string 2)
851 name (mm-url-decode-entities-string
852 (rfc2231-decode-encoded-string
853 (match-string 3))))
854 (if category
855 (setq name (concat category "." name)))
856 (unless (assoc name nnrss-server-data)
857 (setq changed t)
858 (push (list name 0 url) nnrss-server-data)))))
859 (if changed
860 (nnrss-save-server-data ""))))
861
862 (defun nnrss-node-text (namespace local-name element)
863 (let* ((node (assq (intern (concat namespace (symbol-name local-name)))
864 element))
865 (text (if (and node (listp node))
866 (nnrss-node-just-text node)
867 node))
868 (cleaned-text (if text
869 (gnus-replace-in-string
870 (gnus-replace-in-string
871 text "^[\000-\037\177]+\\|^ +\\| +$" "")
872 "\r\n" "\n"))))
873 (if (string-equal "" cleaned-text)
874 nil
875 cleaned-text)))
876
877 (defun nnrss-node-just-text (node)
878 (if (and node (listp node))
879 (mapconcat 'nnrss-node-just-text (cddr node) " ")
880 node))
881
882 (defun nnrss-find-el (tag data &optional found-list)
883 "Find the all matching elements in the data.
884 Careful with this on large documents!"
885 (when (consp data)
886 (dolist (bit data)
887 (when (car-safe bit)
888 (when (equal tag (car bit))
889 ;; Old xml.el may return a list of string.
890 (when (and (consp (caddr bit))
891 (stringp (caaddr bit)))
892 (setcar (cddr bit) (caaddr bit)))
893 (setq found-list
894 (append found-list
895 (list bit))))
896 (if (and (consp (car-safe (caddr bit)))
897 (not (stringp (caddr bit))))
898 (setq found-list
899 (append found-list
900 (nnrss-find-el
901 tag (caddr bit))))
902 (setq found-list
903 (append found-list
904 (nnrss-find-el
905 tag (cddr bit))))))))
906 found-list)
907
908 (defun nnrss-rsslink-p (el)
909 "Test if the element we are handed is an RSS autodiscovery link."
910 (and (eq (car-safe el) 'link)
911 (string-equal (cdr (assoc 'rel (cadr el))) "alternate")
912 (or (string-equal (cdr (assoc 'type (cadr el)))
913 "application/rss+xml")
914 (string-equal (cdr (assoc 'type (cadr el))) "text/xml"))))
915
916 (defun nnrss-get-rsslinks (data)
917 "Extract the <link> elements that are links to RSS from the parsed data."
918 (delq nil (mapcar
919 (lambda (el)
920 (if (nnrss-rsslink-p el) el))
921 (nnrss-find-el 'link data))))
922
923 (defun nnrss-extract-hrefs (data)
924 "Recursively extract hrefs from a page's source.
925 DATA should be the output of `xml-parse-region' or
926 `w3-parse-buffer'."
927 (mapcar (lambda (ahref)
928 (cdr (assoc 'href (cadr ahref))))
929 (nnrss-find-el 'a data)))
930
931 (defmacro nnrss-match-macro (base-uri item onsite-list offsite-list)
932 `(cond ((or (string-match (concat "^" ,base-uri) ,item)
933 (not (string-match "://" ,item)))
934 (setq ,onsite-list (append ,onsite-list (list ,item))))
935 (t (setq ,offsite-list (append ,offsite-list (list ,item))))))
936
937 (defun nnrss-order-hrefs (base-uri hrefs)
938 "Given a list of hrefs, sort them using the following priorities:
939 1. links ending in .rss
940 2. links ending in .rdf
941 3. links ending in .xml
942 4. links containing the above
943 5. offsite links
944
945 BASE-URI is used to determine the location of the links and
946 whether they are `offsite' or `onsite'."
947 (let (rss-onsite-end rdf-onsite-end xml-onsite-end
948 rss-onsite-in rdf-onsite-in xml-onsite-in
949 rss-offsite-end rdf-offsite-end xml-offsite-end
950 rss-offsite-in rdf-offsite-in xml-offsite-in)
951 (dolist (href hrefs)
952 (cond ((null href))
953 ((string-match "\\.rss$" href)
954 (nnrss-match-macro
955 base-uri href rss-onsite-end rss-offsite-end))
956 ((string-match "\\.rdf$" href)
957 (nnrss-match-macro
958 base-uri href rdf-onsite-end rdf-offsite-end))
959 ((string-match "\\.xml$" href)
960 (nnrss-match-macro
961 base-uri href xml-onsite-end xml-offsite-end))
962 ((string-match "rss" href)
963 (nnrss-match-macro
964 base-uri href rss-onsite-in rss-offsite-in))
965 ((string-match "rdf" href)
966 (nnrss-match-macro
967 base-uri href rdf-onsite-in rdf-offsite-in))
968 ((string-match "xml" href)
969 (nnrss-match-macro
970 base-uri href xml-onsite-in xml-offsite-in))))
971 (append
972 rss-onsite-end rdf-onsite-end xml-onsite-end
973 rss-onsite-in rdf-onsite-in xml-onsite-in
974 rss-offsite-end rdf-offsite-end xml-offsite-end
975 rss-offsite-in rdf-offsite-in xml-offsite-in)))
976
977 (defun nnrss-discover-feed (url)
978 "Given a page, find an RSS feed using Mark Pilgrim's
979 `ultra-liberal rss locator' (http://diveintomark.org/2002/08/15.html)."
980
981 (let ((parsed-page (nnrss-fetch url)))
982
983 ;; 1. if this url is the rss, use it.
984 (if (nnrss-rss-p parsed-page)
985 (let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://purl.org/rss/1.0/")))
986 (nnrss-rss-title-description rss-ns parsed-page url))
987
988 ;; 2. look for the <link rel="alternate"
989 ;; type="application/rss+xml" and use that if it is there.
990 (let ((links (nnrss-get-rsslinks parsed-page)))
991 (if links
992 (let* ((xml (nnrss-fetch
993 (cdr (assoc 'href (cadar links)))))
994 (rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")))
995 (nnrss-rss-title-description rss-ns xml (cdr (assoc 'href (cadar links)))))
996
997 ;; 3. look for links on the site in the following order:
998 ;; - onsite links ending in .rss, .rdf, or .xml
999 ;; - onsite links containing any of the above
1000 ;; - offsite links ending in .rss, .rdf, or .xml
1001 ;; - offsite links containing any of the above
1002 (let* ((base-uri (progn (string-match ".*://[^/]+/?" url)
1003 (match-string 0 url)))
1004 (hrefs (nnrss-order-hrefs
1005 base-uri (nnrss-extract-hrefs parsed-page)))
1006 (rss-link nil))
1007 (while (and (eq rss-link nil) (not (eq hrefs nil)))
1008 (let ((href-data (nnrss-fetch (car hrefs))))
1009 (if (nnrss-rss-p href-data)
1010 (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/")))
1011 (setq rss-link (nnrss-rss-title-description
1012 rss-ns href-data (car hrefs))))
1013 (setq hrefs (cdr hrefs)))))
1014 (if rss-link rss-link
1015
1016 ;; 4. check syndic8
1017 (nnrss-find-rss-via-syndic8 url))))))))
1018
1019 (defun nnrss-find-rss-via-syndic8 (url)
1020 "Query syndic8 for the rss feeds it has for URL."
1021 (if (not (locate-library "xml-rpc"))
1022 (progn
1023 (message "XML-RPC is not available... not checking Syndic8.")
1024 nil)
1025 (require 'xml-rpc)
1026 (let ((feedid (xml-rpc-method-call
1027 "http://www.syndic8.com/xmlrpc.php"
1028 'syndic8.FindSites
1029 url)))
1030 (when feedid
1031 (let* ((feedinfo (xml-rpc-method-call
1032 "http://www.syndic8.com/xmlrpc.php"
1033 'syndic8.GetFeedInfo
1034 feedid))
1035 (urllist
1036 (delq nil
1037 (mapcar
1038 (lambda (listinfo)
1039 (if (string-equal
1040 (cdr (assoc "status" listinfo))
1041 "Syndicated")
1042 (cons
1043 (cdr (assoc "sitename" listinfo))
1044 (list
1045 (cons 'title
1046 (cdr (assoc
1047 "sitename" listinfo)))
1048 (cons 'href
1049 (cdr (assoc
1050 "dataurl" listinfo)))))))
1051 feedinfo))))
1052 (if (not (> (length urllist) 1))
1053 (cdar urllist)
1054 (let ((completion-ignore-case t)
1055 (selection
1056 (mapcar (lambda (listinfo)
1057 (cons (cdr (assoc "sitename" listinfo))
1058 (string-to-number
1059 (cdr (assoc "feedid" listinfo)))))
1060 feedinfo)))
1061 (cdr (assoc
1062 (completing-read
1063 "Multiple feeds found. Select one: "
1064 selection nil t) urllist)))))))))
1065
1066 (defun nnrss-rss-p (data)
1067 "Test if DATA is an RSS feed.
1068 Simply ensures that the first element is rss or rdf."
1069 (or (eq (caar data) 'rss)
1070 (eq (caar data) 'rdf:RDF)))
1071
1072 (defun nnrss-rss-title-description (rss-namespace data url)
1073 "Return the title of an RSS feed."
1074 (if (nnrss-rss-p data)
1075 (let ((description (intern (concat rss-namespace "description")))
1076 (title (intern (concat rss-namespace "title")))
1077 (channel (nnrss-find-el (intern (concat rss-namespace "channel"))
1078 data)))
1079 (list
1080 (cons 'description (caddr (nth 0 (nnrss-find-el description channel))))
1081 (cons 'title (caddr (nth 0 (nnrss-find-el title channel))))
1082 (cons 'href url)))))
1083
1084 (defun nnrss-get-namespace-prefix (el uri)
1085 "Given EL (containing a parsed element) and URI (containing a string
1086 that gives the URI for which you want to retrieve the namespace
1087 prefix), return the prefix."
1088 (let* ((prefix (car (rassoc uri (cadar el))))
1089 (nslist (if prefix
1090 (split-string (symbol-name prefix) ":")))
1091 (ns (cond ((eq (length nslist) 1) ; no prefix given
1092 "")
1093 ((eq (length nslist) 2) ; extract prefix
1094 (cadr nslist)))))
1095 (if (and ns (not (string= ns "")))
1096 (concat ns ":")
1097 ns)))
1098
1099 (provide 'nnrss)
1100
1101
1102 ;;; nnrss.el ends here
1103
1104 ;;; arch-tag: 12910c07-0cdf-44fb-8d2c-416ded64c267