]> code.delx.au - youtube-cgi/blob - youtube.cgi
added HD qualities
[youtube-cgi] / youtube.cgi
1 #!/usr/bin/env python
2
3 import cookielib
4 import cgi
5 import itertools
6 import json
7 from lxml import html
8 import os
9 import re
10 import resource
11 import shutil
12 import subprocess
13 import sys
14 import time
15 import urllib
16 import urllib2
17 import urlparse
18
19
20 MAX_MEMORY_BYTES = 128 * 1024*1024
21 USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
22
23 MIMETYPES = {
24 "video/mp4": "mp4",
25 "video/x-flv": "flv",
26 "video/3gpp": "3gp",
27 }
28
29 QUALITIES = {
30 "hd1080": 5,
31 "hd720": 4,
32 "large": 3,
33 "medium": 2,
34 "small": 1,
35 }
36
37
38 class VideoUnavailable(Exception):
39 pass
40
41 def print_form(url="", msg=""):
42 script_url = "http://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
43 sys.stdout.write("Content-Type: application/xhtml+xml\r\n\r\n")
44 sys.stdout.write("""
45 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
46 <html xmlns="http://www.w3.org/1999/xhtml">
47 <head>
48 <title>delx.net.au - YouTube Scraper</title>
49 <link rel="stylesheet" type="text/css" href="/style.css"/>
50 <style type="text/css">
51 input[type="text"] {
52 width: 100%;
53 }
54 .error {
55 color: red;
56 }
57 </style>
58 </head>
59 <body>
60 <h1>delx.net.au - YouTube Scraper</h1>
61 {0}
62 <form action="" method="get">
63 <p>This page will let you easily download YouTube videos to watch offline. It
64 will automatically grab the highest quality version.</p>
65 <div><input type="text" name="url" value="{1}"/></div>
66 <div><input type="submit" value="Download!"/></div>
67 </form>
68 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='{2}?url='+escape(location);})()">YouTube Download</a>
69 to easily download videos. Right-click the link and add it to bookmarks,
70 then when you're looking at a YouTube page select that bookmark from your
71 browser's bookmarks menu to download the video straight away.</p>
72 </body>
73 </html>
74 """.replace("{0}", msg).replace("{1}", url).replace("{2}", script_url))
75
76 cookiejar = cookielib.CookieJar()
77 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
78 referrer = ""
79
80 def urlopen(url):
81 global referrer
82 req = urllib2.Request(url)
83 if referrer:
84 req.add_header("Referer", referrer)
85 referrer = url
86 req.add_header("User-Agent", USER_AGENT)
87 return urlopener.open(req)
88
89 def parse_url(url):
90 f = urlopen(url)
91 doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
92 f.close()
93 return doc
94
95 def append_to_qs(url, params):
96 r = list(urlparse.urlsplit(url))
97 qs = urlparse.parse_qs(r[3])
98 qs.update(params)
99 r[3] = urllib.urlencode(qs, True)
100 url = urlparse.urlunsplit(r)
101 return url
102
103 def convert_from_old_itag(player_config):
104 url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
105 url_data["url"] = []
106 for itag_url in url_data["itag"]:
107 pos = itag_url.find("url=")
108 url_data["url"].append(itag_url[pos+4:])
109 player_config["args"]["url_encoded_fmt_stream_map"] = urllib.urlencode(url_data, True)
110
111 def get_player_config(doc):
112 player_config = None
113 for script in doc.xpath("//script"):
114 if not script.text:
115 continue
116 for line in script.text.split("\n"):
117 if "yt.playerConfig =" in line:
118 p1 = line.find("=")
119 p2 = line.rfind(";")
120 if p1 >= 0 and p2 > 0:
121 return json.loads(line[p1+1:p2])
122 if "'PLAYER_CONFIG': " in line:
123 p1 = line.find(":")
124 if p1 >= 0:
125 player_config = json.loads(line[p1+1:])
126 convert_from_old_itag(player_config)
127 return player_config
128
129 def get_best_video(player_config):
130 url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
131 url_data = itertools.izip_longest(
132 url_data["url"],
133 url_data["type"],
134 url_data["quality"],
135 url_data.get("sig", []),
136 )
137 best_url = None
138 best_quality = None
139 best_extension = None
140 for video_url, mimetype, quality, signature in url_data:
141 mimetype = mimetype.split(";")[0]
142 if mimetype not in MIMETYPES:
143 continue
144 extension = MIMETYPES[mimetype]
145 quality = QUALITIES.get(quality.split(",")[0], -1)
146 if best_quality is None or quality > best_quality:
147 if signature:
148 video_url = append_to_qs(video_url, {"signature": signature})
149 best_url = video_url
150 best_quality = quality
151 best_extension = extension
152
153 return best_url, best_extension
154
155 def sanitize_filename(filename):
156 return (
157 re.sub("\s+", " ", filename.strip())
158 .replace("\\", "-")
159 .replace("/", "-")
160 .replace("\0", " ")
161 )
162
163 def get_video_url(doc):
164 unavailable = doc.xpath("//div[@id='unavailable-message']/text()")
165 if unavailable:
166 raise VideoUnavailable(unavailable[0].strip())
167
168 player_config = get_player_config(doc)
169 if not player_config:
170 raise VideoUnavailable("Could not find video URL")
171
172 video_url, extension = get_best_video(player_config)
173 if not video_url:
174 return None, None
175
176 title = doc.xpath("/html/head/title/text()")[0]
177 filename = sanitize_filename(title)
178 filename += "." + extension
179
180 return video_url, filename
181
182 def write_video(filename, video_data):
183 httpinfo = video_data.info()
184 encoded_filename = urllib.quote(filename.encode("utf-8"))
185 sys.stdout.write("Content-Disposition: attachment; filename*=UTF-8''%s\r\n" % encoded_filename)
186 sys.stdout.write("Content-Length: %s\r\n" % httpinfo.getheader("Content-Length"))
187 sys.stdout.write("\r\n")
188 shutil.copyfileobj(video_data, sys.stdout)
189 video_data.close()
190
191 def cgimain():
192 args = cgi.parse()
193 try:
194 url = args["url"][0]
195 except:
196 print_form(url="http://www.youtube.com/watch?v=FOOBAR")
197 return
198
199 try:
200 doc = parse_url(url)
201 video_url, filename = get_video_url(doc)
202 video_data = urlopen(video_url)
203 write_video(filename, video_data)
204 except VideoUnavailable, e:
205 print_form(
206 url=url,
207 msg="<p class='error'>Sorry, there was an error: %s</p>" % cgi.escape(e.message)
208 )
209 except Exception, e:
210 print_form(
211 url=url,
212 msg="<p class='error'>Sorry, there was an error. Check your URL?</p>"
213 )
214 return
215
216 def copy_with_progress(total_size, infile, outfile):
217 def pp_size(size):
218 suffixes = ["", "KiB", "MiB", "GiB"]
219 for i, suffix in enumerate(suffixes):
220 if size < 1024:
221 break
222 size /= 1024
223 return "%d %s" % (size, suffix)
224
225 start_ts = time.time()
226 last_ts = 0
227 bytes_read = 0
228 while True:
229 now = time.time()
230 if now - last_ts > 0.5:
231 last_ts = now
232 sys.stdout.write("\33[2K\r")
233 sys.stdout.write("%s / %s (%s/sec)" % (
234 pp_size(bytes_read),
235 pp_size(total_size),
236 pp_size(bytes_read / (now - start_ts)),
237 ))
238 sys.stdout.flush()
239
240 buf = infile.read(32768)
241 if not buf:
242 break
243 outfile.write(buf)
244 bytes_read += len(buf)
245
246 def main():
247 try:
248 url = sys.argv[1]
249 except:
250 print >>sys.stderr, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0]
251 sys.exit(1)
252 doc = parse_url(url)
253 video_url, filename = get_video_url(doc)
254 video_data = urlopen(video_url)
255 outfile = open(filename, "w")
256 total_size = int(video_data.info().getheader("Content-Length"))
257 print "Downloading", filename.encode("utf-8")
258 copy_with_progress(total_size, video_data, outfile)
259 video_data.close()
260 outfile.close()
261
262
263 if __name__ == "__main__":
264 resource.setrlimit(resource.RLIMIT_AS, (MAX_MEMORY_BYTES, MAX_MEMORY_BYTES))
265 if os.environ.has_key("SCRIPT_NAME"):
266 cgimain()
267 else:
268 try:
269 main()
270 except KeyboardInterrupt:
271 print "\nExiting..."
272 sys.exit(1)
273