]> code.delx.au - monosys/blob - scripts/youtube.cgi
More fixes for youtube.cgi
[monosys] / scripts / youtube.cgi
1 #!/usr/bin/env python
2
3 import cgi
4 from lxml.html import document_fromstring
5 import os
6 import re
7 import shutil
8 import subprocess
9 import sys
10 import urllib
11
12
13 urllib.URLopener.version = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"
14
15 fmt_quality = [
16 (38, ".mp4"), # 4096x3072
17 (37, ".mp4"), # 1920x1080
18 (22, ".mp4"), # 1280x720
19 (45, ".webm"), # 1280x720
20 (43, ".webm"), # 640x360
21 (35, ".flv"), # 854x480
22 (34, ".flv"), # 640x360
23 (18, ".mp4"), # 480x360
24 (5, ".flv"), # 400x240
25 (17, ".3gp"), # 176x144
26 ]
27
28
29 def print_form(url="", msg=""):
30 print "Content-Type: application/xhtml+xml\r\n\r\n"
31 print """
32 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
33 <html xmlns="http://www.w3.org/1999/xhtml">
34 <head>
35 <title>delx.net.au - YouTube Scraper</title>
36 <link rel="stylesheet" type="text/css" href="/style.css"/>
37 <style type="text/css">
38 input[type="text"] {
39 width: 100%;
40 }
41 .error {
42 color: red;
43 }
44 </style>
45 </head>
46 <body>
47 <h1>delx.net.au - YouTube Scraper</h1>
48 {0}
49 <form action="" method="get">
50 <p>This page will let you easily download YouTube videos to watch offline. It
51 will automatically grab the highest quality version.</p>
52 <div><input type="text" name="url" value="{1}"/></div>
53 <div><input type="submit" value="Download!"/></div>
54 </form>
55 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='http://delx.net.au/utils/youtube.cgi?url='+escape(location);})()">YouTube Download</a>
56 to easily download videos. Right-click the link and add it to bookmarks,
57 then when you're looking at a YouTube page select that bookmark from your
58 browser's bookmarks menu to download the video straight away.</p>
59 </body>
60 </html>
61 """.replace("{0}", msg).replace("{1}", url)
62
63 def parse_url(url):
64 f = urllib.urlopen(url)
65 doc = document_fromstring(f.read())
66 f.close()
67 return doc
68
69 def get_video_url(doc):
70 embed = doc.xpath("//embed")[0]
71 flashvars = embed.attrib["flashvars"]
72 flashvars = cgi.parse_qs(flashvars)
73 fmt_url_map = {}
74 for pair in flashvars["fmt_url_map"][0].split(","):
75 key, value = pair.split("|")
76 key = int(key)
77 fmt_url_map[key] = value
78 for fmt, extension in fmt_quality:
79 try:
80 video_url = fmt_url_map[fmt]
81 break
82 except KeyError:
83 continue
84 else:
85 return None, None, None
86
87 title = doc.xpath("/html/head/title/text()")[0]
88 title = re.sub("\s+", " ", title.strip())
89 valid_chars = frozenset("-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
90 filename = "".join(c for c in title.encode("ascii", "ignore") if c in valid_chars)
91 filename += extension
92
93 return video_url, filename
94
95 def cgimain():
96 args = cgi.parse()
97 try:
98 url = args["url"][0]
99 except:
100 print_form(url="http://www.youtube.com/watch?v=FOOBAR")
101 return
102
103 try:
104 doc = parse_url(url)
105 video_url, filename = get_video_url(doc)
106 data = urllib.urlopen(video_url)
107 httpinfo = data.info()
108 sys.stdout.write("Content-Disposition: attachment; filename=\"%s\"\r\n" % filename)
109 sys.stdout.write("Content-Length: %s\r\n" % httpinfo.getheader("Content-Length"))
110 sys.stdout.write("\r\n")
111 shutil.copyfileobj(data, sys.stdout)
112 data.close()
113 except Exception, e:
114 print_form(
115 url=url,
116 msg="<p class='error'>Sorry, there was an error. Check your URL?</p>"
117 )
118 return
119
120 def main():
121 try:
122 url = sys.argv[1]
123 except:
124 print >>sys.stderr, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0]
125 sys.exit(1)
126 doc = parse_url(url)
127 video_url, filename = get_video_url(doc)
128 data = urllib.urlopen(video_url)
129 outfile = open(filename, "w")
130 shutil.copyfileobj(data, outfile)
131 data.close()
132 outfile.close()
133
134
135 if __name__ == "__main__":
136 if os.environ.has_key("SCRIPT_NAME"):
137 cgimain()
138 else:
139 main()
140