]> code.delx.au - youtube-cgi/blob - youtube.cgi
better command line downloader
[youtube-cgi] / youtube.cgi
1 #!/usr/bin/env python
2
3 from __future__ import division
4
5 import cookielib
6 import cgi
7 import itertools
8 import json
9 from lxml import html
10 import os
11 import re
12 import resource
13 import shutil
14 import subprocess
15 import sys
16 import time
17 import urllib
18 import urllib2
19 import urlparse
20
21
22 MAX_MEMORY_BYTES = 128 * 1024*1024
23 USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
24
25 MIMETYPES = {
26 "video/mp4": "mp4",
27 "video/x-flv": "flv",
28 "video/3gpp": "3gp",
29 }
30
31 QUALITIES = {
32 "hd1080": 5,
33 "hd720": 4,
34 "large": 3,
35 "medium": 2,
36 "small": 1,
37 }
38
39
40 class VideoUnavailable(Exception):
41 pass
42
43 def print_form(url="", msg=""):
44 script_url = "http://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
45 sys.stdout.write("Content-Type: application/xhtml+xml\r\n\r\n")
46 sys.stdout.write("""
47 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
48 <html xmlns="http://www.w3.org/1999/xhtml">
49 <head>
50 <title>delx.net.au - YouTube Scraper</title>
51 <link rel="stylesheet" type="text/css" href="/style.css"/>
52 <style type="text/css">
53 input[type="text"] {
54 width: 100%;
55 }
56 .error {
57 color: red;
58 }
59 </style>
60 </head>
61 <body>
62 <h1>delx.net.au - YouTube Scraper</h1>
63 {0}
64 <form action="" method="get">
65 <p>This page will let you easily download YouTube videos to watch offline. It
66 will automatically grab the highest quality version.</p>
67 <div><input type="text" name="url" value="{1}"/></div>
68 <div><input type="submit" value="Download!"/></div>
69 </form>
70 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='{2}?url='+escape(location);})()">YouTube Download</a>
71 to easily download videos. Right-click the link and add it to bookmarks,
72 then when you're looking at a YouTube page select that bookmark from your
73 browser's bookmarks menu to download the video straight away.</p>
74 </body>
75 </html>
76 """.replace("{0}", msg).replace("{1}", url).replace("{2}", script_url))
77
78 cookiejar = cookielib.CookieJar()
79 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
80 referrer = ""
81
82 def urlopen(url):
83 global referrer
84 req = urllib2.Request(url)
85 if referrer:
86 req.add_header("Referer", referrer)
87 referrer = url
88 req.add_header("User-Agent", USER_AGENT)
89 return urlopener.open(req)
90
91 def parse_url(url):
92 f = urlopen(url)
93 doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
94 f.close()
95 return doc
96
97 def append_to_qs(url, params):
98 r = list(urlparse.urlsplit(url))
99 qs = urlparse.parse_qs(r[3])
100 qs.update(params)
101 r[3] = urllib.urlencode(qs, True)
102 url = urlparse.urlunsplit(r)
103 return url
104
105 def convert_from_old_itag(player_config):
106 url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
107 url_data["url"] = []
108 for itag_url in url_data["itag"]:
109 pos = itag_url.find("url=")
110 url_data["url"].append(itag_url[pos+4:])
111 player_config["args"]["url_encoded_fmt_stream_map"] = urllib.urlencode(url_data, True)
112
113 def get_player_config(doc):
114 player_config = None
115 for script in doc.xpath("//script"):
116 if not script.text:
117 continue
118 for line in script.text.split("\n"):
119 if "yt.playerConfig =" in line:
120 p1 = line.find("=")
121 p2 = line.rfind(";")
122 if p1 >= 0 and p2 > 0:
123 return json.loads(line[p1+1:p2])
124 if "'PLAYER_CONFIG': " in line:
125 p1 = line.find(":")
126 if p1 >= 0:
127 player_config = json.loads(line[p1+1:])
128 convert_from_old_itag(player_config)
129 return player_config
130
131 def get_best_video(player_config):
132 url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
133 url_data = itertools.izip_longest(
134 url_data["url"],
135 url_data["type"],
136 url_data["quality"],
137 url_data.get("sig", []),
138 )
139 best_url = None
140 best_quality = None
141 best_extension = None
142 for video_url, mimetype, quality, signature in url_data:
143 mimetype = mimetype.split(";")[0]
144 if mimetype not in MIMETYPES:
145 continue
146 extension = MIMETYPES[mimetype]
147 quality = QUALITIES.get(quality.split(",")[0], -1)
148 if best_quality is None or quality > best_quality:
149 if signature:
150 video_url = append_to_qs(video_url, {"signature": signature})
151 best_url = video_url
152 best_quality = quality
153 best_extension = extension
154
155 return best_url, best_extension
156
157 def sanitize_filename(filename):
158 return (
159 re.sub("\s+", " ", filename.strip())
160 .replace("\\", "-")
161 .replace("/", "-")
162 .replace("\0", " ")
163 )
164
165 def get_video_url(doc):
166 unavailable = doc.xpath("//div[@id='unavailable-message']/text()")
167 if unavailable:
168 raise VideoUnavailable(unavailable[0].strip())
169
170 player_config = get_player_config(doc)
171 if not player_config:
172 raise VideoUnavailable("Could not find video URL")
173
174 video_url, extension = get_best_video(player_config)
175 if not video_url:
176 return None, None
177
178 title = doc.xpath("/html/head/title/text()")[0]
179 filename = sanitize_filename(title)
180 filename += "." + extension
181
182 return video_url, filename
183
184 def write_video(filename, video_data):
185 httpinfo = video_data.info()
186 encoded_filename = urllib.quote(filename.encode("utf-8"))
187 sys.stdout.write("Content-Disposition: attachment; filename*=UTF-8''%s\r\n" % encoded_filename)
188 sys.stdout.write("Content-Length: %s\r\n" % httpinfo.getheader("Content-Length"))
189 sys.stdout.write("\r\n")
190 shutil.copyfileobj(video_data, sys.stdout)
191 video_data.close()
192
193 def cgimain():
194 args = cgi.parse()
195 try:
196 url = args["url"][0]
197 except:
198 print_form(url="http://www.youtube.com/watch?v=FOOBAR")
199 return
200
201 try:
202 doc = parse_url(url)
203 video_url, filename = get_video_url(doc)
204 video_data = urlopen(video_url)
205 write_video(filename, video_data)
206 except VideoUnavailable, e:
207 print_form(
208 url=url,
209 msg="<p class='error'>Sorry, there was an error: %s</p>" % cgi.escape(e.message)
210 )
211 except Exception, e:
212 print_form(
213 url=url,
214 msg="<p class='error'>Sorry, there was an error. Check your URL?</p>"
215 )
216 return
217
218 def copy_with_progress(total_size, infile, outfile):
219 def pp_size(size):
220 suffixes = ["", "KiB", "MiB", "GiB"]
221 for i, suffix in enumerate(suffixes):
222 if size < 1024:
223 break
224 size /= 1024
225 return "%.2f %s" % (size, suffix)
226
227 def print_status():
228 sys.stdout.write("\33[2K\r")
229 sys.stdout.write("%s / %s (%s/sec)" % (
230 pp_size(bytes_read),
231 pp_size(total_size),
232 pp_size(bytes_read / (now - start_ts)),
233 ))
234 sys.stdout.flush()
235
236 start_ts = time.time()
237 last_ts = 0
238 bytes_read = 0
239 while True:
240 now = time.time()
241 if now - last_ts > 0.5:
242 last_ts = now
243 print_status()
244
245 buf = infile.read(32768)
246 if not buf:
247 break
248 outfile.write(buf)
249 bytes_read += len(buf)
250
251 # Newline at the end
252 print_status()
253 print
254
255 def main():
256 try:
257 url = sys.argv[1]
258 except:
259 print >>sys.stderr, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0]
260 sys.exit(1)
261 doc = parse_url(url)
262 video_url, filename = get_video_url(doc)
263 video_data = urlopen(video_url)
264 if os.path.isfile(filename):
265 print >>sys.stderr, "Error! File exists:", filename
266 sys.exit(1)
267 outfile = open(filename, "w")
268 total_size = int(video_data.info().getheader("Content-Length"))
269 print "Downloading", filename.encode("utf-8")
270 copy_with_progress(total_size, video_data, outfile)
271 video_data.close()
272 outfile.close()
273
274
275 if __name__ == "__main__":
276 resource.setrlimit(resource.RLIMIT_AS, (MAX_MEMORY_BYTES, MAX_MEMORY_BYTES))
277 if os.environ.has_key("SCRIPT_NAME"):
278 cgimain()
279 else:
280 try:
281 main()
282 except KeyboardInterrupt:
283 print "\nExiting..."
284 sys.exit(1)
285