]> code.delx.au - monosys/blob - ripping/youtube.cgi
7d00908e7a6fa7f6cf806b23fb737b00a2b9a767
[monosys] / ripping / youtube.cgi
1 #!/usr/bin/env python
2
3 import cgi
4 from lxml.html import document_fromstring
5 import os
6 import re
7 import shutil
8 import subprocess
9 import sys
10 import urllib
11
12
13 class VideoUnavailable(Exception):
14 pass
15
16 urllib.URLopener.version = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"
17
18 fmt_quality = [
19 (38, ".mp4"), # 4096x3072
20 (37, ".mp4"), # 1920x1080
21 (22, ".mp4"), # 1280x720
22 ### (45, ".webm"), # 1280x720
23 ### (43, ".webm"), # 640x360
24 (35, ".flv"), # 854x480
25 (34, ".flv"), # 640x360
26 (18, ".mp4"), # 480x360
27 (5, ".flv"), # 400x240
28 (17, ".3gp"), # 176x144
29 ]
30
31
32 def print_form(url="", msg=""):
33 script_url = "http://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
34 print "Content-Type: application/xhtml+xml\r\n\r\n"
35 print """
36 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
37 <html xmlns="http://www.w3.org/1999/xhtml">
38 <head>
39 <title>delx.net.au - YouTube Scraper</title>
40 <link rel="stylesheet" type="text/css" href="/style.css"/>
41 <style type="text/css">
42 input[type="text"] {
43 width: 100%;
44 }
45 .error {
46 color: red;
47 }
48 </style>
49 </head>
50 <body>
51 <h1>delx.net.au - YouTube Scraper</h1>
52 {0}
53 <form action="" method="get">
54 <p>This page will let you easily download YouTube videos to watch offline. It
55 will automatically grab the highest quality version.</p>
56 <div><input type="text" name="url" value="{1}"/></div>
57 <div><input type="submit" value="Download!"/></div>
58 </form>
59 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='{2}?url='+escape(location);})()">YouTube Download</a>
60 to easily download videos. Right-click the link and add it to bookmarks,
61 then when you're looking at a YouTube page select that bookmark from your
62 browser's bookmarks menu to download the video straight away.</p>
63 </body>
64 </html>
65 """.replace("{0}", msg).replace("{1}", url).replace("{2}", script_url)
66
67 def parse_url(url):
68 f = urllib.urlopen(url)
69 doc = document_fromstring(f.read())
70 f.close()
71 return doc
72
73 def get_video_url(doc):
74 unavailable = doc.xpath("//div[@id='unavailable-message']/text()")
75 if unavailable:
76 raise VideoUnavailable(unavailable[0].strip())
77 embed = doc.xpath("//embed")[0]
78 flashvars = embed.attrib["flashvars"]
79 flashvars = cgi.parse_qs(flashvars)
80 fmt_url_map = {}
81 for url_desc in flashvars["url_encoded_fmt_stream_map"][0].split(","):
82 url_desc_map = cgi.parse_qs(url_desc)
83 key = int(url_desc_map["itag"][0])
84 fmt_url_map[key] = url_desc_map["url"][0]
85 for fmt, extension in fmt_quality:
86 try:
87 video_url = fmt_url_map[fmt]
88 break
89 except KeyError:
90 continue
91 else:
92 return None, None, None
93
94 title = doc.xpath("/html/head/title/text()")[0]
95 title = re.sub("\s+", " ", title.strip())
96 valid_chars = frozenset("-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
97 filename = "".join(c for c in title.encode("ascii", "ignore") if c in valid_chars)
98 filename += extension
99
100 return video_url, filename
101
102 def cgimain():
103 args = cgi.parse()
104 try:
105 url = args["url"][0]
106 except:
107 print_form(url="http://www.youtube.com/watch?v=FOOBAR")
108 return
109
110 try:
111 doc = parse_url(url)
112 video_url, filename = get_video_url(doc)
113 data = urllib.urlopen(video_url)
114 httpinfo = data.info()
115 sys.stdout.write("Content-Disposition: attachment; filename=\"%s\"\r\n" % filename)
116 sys.stdout.write("Content-Length: %s\r\n" % httpinfo.getheader("Content-Length"))
117 sys.stdout.write("\r\n")
118 shutil.copyfileobj(data, sys.stdout)
119 data.close()
120 except VideoUnavailable, e:
121 print_form(
122 url=url,
123 msg="<p class='error'>Sorry, there was an error: %s</p>" % cgi.escape(e.message)
124 )
125 except Exception, e:
126 print_form(
127 url=url,
128 msg="<p class='error'>Sorry, there was an error. Check your URL?</p>"
129 )
130 return
131
132 def main():
133 try:
134 url = sys.argv[1]
135 except:
136 print >>sys.stderr, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0]
137 sys.exit(1)
138 doc = parse_url(url)
139 video_url, filename = get_video_url(doc)
140 data = urllib.urlopen(video_url)
141 outfile = open(filename, "w")
142 shutil.copyfileobj(data, outfile)
143 data.close()
144 outfile.close()
145
146
147 if __name__ == "__main__":
148 if os.environ.has_key("SCRIPT_NAME"):
149 cgimain()
150 else:
151 main()
152