]>
code.delx.au - youtube-cgi/blob - youtube.cgi
18 MOZILLA_RELEASE_URL
= "https://www.mozilla.org/en-US/firefox/releases/"
19 USER_AGENT_TEMPLATE
= "Mozilla/5.0 (X11; Linux x86_64; rv:83.0) Gecko/20100101 Firefox/%s"
36 class VideoUnavailable(Exception):
39 class NotYouTube(Exception):
42 def print_form(url
="", msg
=""):
43 script_url
= "https://%s%s" % (os
.environ
["HTTP_HOST"], os
.environ
["REQUEST_URI"])
44 sys
.stdout
.write("Content-Type: text/html\r\n\r\n")
49 <title>delx.net.au - YouTube Scraper</title>
50 <link rel="stylesheet" type="text/css" href="/style.css">
51 <style type="text/css">
61 <h1>delx.net.au - YouTube Scraper</h1>
63 <form action="" method="get">
64 <p>This page will let you easily download YouTube videos to watch offline. It
65 will automatically grab the highest quality version.</p>
66 <div><input type="text" name="url" value="{1}"/></div>
67 <div><input type="submit" value="Download!"/></div>
69 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='{2}?url='+escape(location);})()">YouTube Download</a>
70 to easily download videos. Right-click the link and add it to bookmarks,
71 then when you're looking at a YouTube page select that bookmark from your
72 browser's bookmarks menu to download the video straight away.</p>
75 """.replace("{0}", msg
).replace("{1}", url
).replace("{2}", script_url
))
77 cookiejar
= http
.cookiejar
.CookieJar()
78 urlopener
= urllib
.request
.build_opener(urllib
.request
.HTTPCookieProcessor(cookiejar
))
82 def urlopen(url
, offset
=None):
85 page
= MozillaReleasesPageParser()
86 with urllib
.request
.urlopen(MOZILLA_RELEASE_URL
) as f
:
87 page
.feed(f
.read().decode("utf-8"))
89 user_agent
= USER_AGENT_TEMPLATE
% page
.latest_release
91 if url
.startswith("//"):
93 if not url
.startswith("http://") and not url
.startswith("https://"):
94 url
= "https://www.youtube.com" + url
97 req
= urllib
.request
.Request(url
)
101 req
.add_header("Referer", referrer
)
103 req
.add_header("User-Agent", user_agent
)
106 req
.add_header("Range", "bytes=%d-" % offset
)
108 res
= urlopener
.open(req
)
110 content_range
= res
.getheader("Content-Range")
112 tokens
= content_range
.split()
113 assert tokens
[0] == "bytes"
114 start
= int(tokens
[1].split("-")[0])
115 assert start
== offset
118 def validate_url(url
):
119 parsed_url
= urllib
.parse
.urlparse(url
)
120 scheme_ok
= parsed_url
.scheme
== "https"
121 host
= parsed_url
.netloc
.lstrip("www.").lstrip("m.")
122 host_ok
= host
in ["youtube.com", "youtu.be"]
124 if scheme_ok
and host_ok
:
129 def load_parse_url(url
, parser
):
131 parser
.feed(f
.read().decode("utf-8"))
135 def append_to_qs(url
, params
):
136 r
= list(urllib
.parse
.urlsplit(url
))
137 qs
= urllib
.parse
.parse_qs(r
[3])
139 r
[3] = urllib
.parse
.urlencode(qs
, True)
140 url
= urllib
.parse
.urlunsplit(r
)
143 def get_player_config(scripts
):
145 ("ytcfg.set({\"", 2, "});", 1),
146 ("ytInitialPlayerResponse = {\"", 2, "};", 1),
149 for script
in scripts
:
150 for line
in script
.split("\n"):
151 for s1
, off1
, s2
, off2
in config_strings
:
153 p1
= line
.find(s1
) + len(s1
) - off1
154 p2
= line
.find(s2
, p1
) + off2
155 if p1
>= 0 and p2
> 0:
156 player_config
.update(json
.loads(line
[p1
:p2
]))
159 def extract_js(script
):
160 PREFIX
= "var _yt_player={};(function(g){var window=this;"
161 SUFFIX
= ";})(_yt_player);\n"
162 assert script
.startswith(PREFIX
)
163 assert script
.endswith(SUFFIX
)
165 return script
[len(PREFIX
):-len(SUFFIX
)]
167 def find_cipher_func(script
):
168 FUNC_NAME
= R
"([a-zA-Z0-9$]+)"
169 DECODE_URI_COMPONENT
= R
"(\(decodeURIComponent)?"
170 FUNC_PARAMS
= R
"(\([a-zA-Z,\.]+\.s\))"
171 TERMINATOR
= R
"[,;\)]"
172 PATTERN
= FUNC_NAME
+ DECODE_URI_COMPONENT
+ FUNC_PARAMS
+ TERMINATOR
174 match
= re
.search(PATTERN
, script
)
175 func_name
= match
.groups()[0]
178 def construct_url_from_cipher_result(cipher_result
):
179 for k
, v
in cipher_result
.items():
180 if isinstance(v
, str) and v
.startswith("https://"):
184 raise Exception("Could not find URL-like string in cipher result!")
186 for k
, v
in cipher_result
.items():
187 if isinstance(v
, dict):
189 for k2
, v2
in v
.items():
190 params
[k2
] = urllib
.parse
.unquote(v2
)
191 return append_to_qs(temp_url
, params
)
193 raise Exception("Could not find params-like structure in cipher result!")
195 def decode_cipher_url(js_url
, cipher
):
196 cipher
= urllib
.parse
.parse_qs(cipher
)
204 script
= f
.read().decode("utf-8")
207 cipher_func_name
= find_cipher_func(script
)
210 "cipher_func_name": cipher_func_name
,
211 "args": json
.dumps(args
),
212 "code": json
.dumps(extract_js(script
)),
214 p
= subprocess
.Popen(
218 stdin
=subprocess
.PIPE
,
219 stdout
=subprocess
.PIPE
221 js_decode_script
= ("""
222 const vm = require('vm');
224 const fakeGlobal = {};
225 fakeGlobal.window = fakeGlobal;
226 fakeGlobal.location = {
228 host: 'www.youtube.com',
229 hostname: 'www.youtube.com',
230 href: 'https://www.youtube.com',
231 origin: 'https://www.youtube.com',
235 fakeGlobal.history = {
236 pushState: function(){}
238 fakeGlobal.document = {
239 location: fakeGlobal.location
241 fakeGlobal.document = {};
242 fakeGlobal.navigator = {
245 fakeGlobal.XMLHttpRequest = class XMLHttpRequest {};
246 fakeGlobal.matchMedia = () => ({matches: () => {}, media: ''});
247 fakeGlobal.result = null;
248 fakeGlobal.g = function(){}; // this is _yt_player
249 fakeGlobal.TimeRanges = function(){};
251 const code_string = %(code)s + ';';
252 const exec_string = 'result = %(cipher_func_name)s(...%(args)s);';
253 vm.runInNewContext(code_string + exec_string, fakeGlobal);
255 console.log(JSON.stringify(fakeGlobal.result));
258 p
.stdin
.write(js_decode_script
.encode("utf-8"))
261 result
= json
.load(p
.stdout
)
263 raise Exception("js failed to execute: %d" % p
.returncode
)
265 result_url
= construct_url_from_cipher_result(result
)
268 def get_best_video(player_config
):
269 formats
= player_config
["streamingData"]["formats"]
273 best_extension
= None
274 for format_data
in formats
:
275 mimetype
= format_data
["mimeType"].split(";")[0]
276 quality
= format_data
["quality"]
278 if quality
not in QUALITIES
:
280 if mimetype
not in MIMETYPES
:
283 extension
= MIMETYPES
[mimetype
]
284 quality
= QUALITIES
.get(quality
, -1)
286 if best_quality
is not None and quality
< best_quality
:
289 if "signatureCipher" in format_data
:
290 js_url
= player_config
["PLAYER_JS_URL"]
291 video_url
= decode_cipher_url(js_url
, format_data
["signatureCipher"])
293 video_url
= format_data
["url"]
296 best_quality
= quality
297 best_extension
= extension
299 return best_url
, best_extension
301 def sanitize_filename(filename
):
303 re
.sub("\s+", " ", filename
.strip())
309 def get_video_url(page
):
310 player_config
= get_player_config(page
.scripts
)
311 if not player_config
:
312 raise VideoUnavailable(page
.unavailable_message
or "Could not find video URL")
314 video_url
, extension
= get_best_video(player_config
)
318 title
= player_config
["videoDetails"].get("title", None)
320 title
= "Unknown title"
322 filename
= sanitize_filename(title
) + "." + extension
324 return video_url
, filename
326 class YouTubeVideoPageParser(html
.parser
.HTMLParser
):
329 self
.unavailable_message
= None
332 def handle_starttag(self
, tag
, attrs
):
334 self
._handle
_unavailable
_message
(tag
, attrs
)
335 self
._handle
_script
(tag
, attrs
)
337 def handle_endtag(self
, tag
):
338 self
.handle_data
= self
._ignore
_data
340 def _ignore_data(self
, _
):
343 def _handle_unavailable_message(self
, tag
, attrs
):
344 if attrs
.get("id", None) == "unavailable-message":
345 self
.handle_data
= self
._handle
_unavailable
_message
_data
347 def _handle_unavailable_message_data(self
, data
):
348 self
.unavailable_message
= data
.strip()
350 def _handle_script(self
, tag
, attrs
):
352 self
.handle_data
= self
._handle
_script
_data
354 def _handle_script_data(self
, data
):
356 self
.scripts
.append(data
)
358 class MozillaReleasesPageParser(html
.parser
.HTMLParser
):
361 self
.latest_release
= "1.0"
363 def handle_starttag(self
, tag
, attrs
):
365 if attrs
.get("data-latest-firefox", None):
366 self
.latest_release
= attrs
.get("data-latest-firefox", None)
368 def write_video(filename
, video_data
):
369 quoted_filename
= urllib
.parse
.quote(filename
.encode("utf-8"))
370 sys
.stdout
.buffer.write(
371 b
"Content-Disposition: attachment; filename*=UTF-8''{0}\r\n"
372 .replace(b
"{0}", quoted_filename
.encode("utf-8"))
374 sys
.stdout
.buffer.write(
375 b
"Content-Length: {0}\r\n"
376 .replace(b
"{0}", video_data
.getheader("Content-Length").encode("utf-8"))
378 sys
.stdout
.buffer.write(b
"\r\n")
379 shutil
.copyfileobj(video_data
, sys
.stdout
.buffer)
387 print_form(url
="https://www.youtube.com/watch?v=FOOBAR")
391 page
= YouTubeVideoPageParser()
393 with
urlopen(url
) as f
:
394 page
.feed(f
.read().decode("utf-8"))
396 video_url
, filename
= get_video_url(page
)
397 video_data
= urlopen(video_url
)
398 except VideoUnavailable
as e
:
401 msg
="<p class='error'>Sorry, there was an error: %s</p>" % cgi
.escape(e
.args
[0])
406 msg
="<p class='error'>Sorry, that does not look like a YouTube page!</p>"
408 except Exception as e
:
411 msg
="<p class='error'>Sorry, there was an unknown error.</p>"
415 write_video(filename
, video_data
)
418 suffixes
= ["", "KiB", "MiB", "GiB"]
419 for i
, suffix
in enumerate(suffixes
):
423 return "%.2f %s" % (size
, suffix
)
425 def copy_with_progress(content_length
, infile
, outfile
):
429 rate
= last_bytes_read
/ (now
- last_ts
)
430 sys
.stdout
.write("\33[2K\r")
431 sys
.stdout
.write("%s / %s (%s/sec)" % (
433 pp_size(content_length
),
443 if now
- last_ts
> 0.5:
448 buf
= infile
.read(32768)
452 last_bytes_read
+= len(buf
)
453 bytes_read
+= len(buf
)
463 print("Usage: %s https://youtube.com/watch?v=FOOBAR" % sys
.argv
[0], file=sys
.stderr
)
466 page
= YouTubeVideoPageParser()
467 with
urlopen(url
) as f
:
468 page
.feed(f
.read().decode("utf-8"))
470 video_url
, filename
= get_video_url(page
)
471 print("Downloading", filename
)
473 outfile
= open(filename
, "ab")
474 offset
= outfile
.tell()
476 print("Resuming download from", pp_size(offset
))
481 video_data
= urlopen(video_url
, offset
)
482 except urllib
.error
.HTTPError
as e
:
484 print("File is complete!")
489 content_length
= int(video_data
.getheader("Content-Length"))
490 if total_size
is None:
491 total_size
= content_length
494 copy_with_progress(content_length
, video_data
, outfile
)
499 if outfile
.tell() != total_size
:
501 offset
= outfile
.tell()
502 if old_offset
== offset
:
504 print("Restarting download from", pp_size(offset
))
511 if __name__
== "__main__":
512 if "SCRIPT_NAME" in os
.environ
:
517 except KeyboardInterrupt:
518 print("\nExiting...")