]> code.delx.au - youtube-cgi/commitdiff
Google seems to care about user agents now...
authorJames Bunton <jamesbunton@delx.net.au>
Sat, 5 Dec 2020 12:59:28 +0000 (23:59 +1100)
committerJames Bunton <jamesbunton@delx.net.au>
Sat, 5 Dec 2020 12:59:28 +0000 (23:59 +1100)
youtube.cgi

index 0aadef35e5885458ec97282c27bfb2465cd9a375..212ceae83234d81ccc0c63b7681f00d9fadbd92f 100755 (executable)
@@ -15,7 +15,8 @@ import urllib.parse
 import urllib.request
 
 
-USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:82.0) Gecko/20100101 Firefox/82.0"
+MOZILLA_RELEASE_URL = "https://www.mozilla.org/en-US/firefox/releases/"
+USER_AGENT_TEMPLATE = "Mozilla/5.0 (X11; Linux x86_64; rv:83.0) Gecko/20100101 Firefox/%s"
 
 MIMETYPES = {
     "video/mp4": "mp4",
@@ -76,8 +77,17 @@ def print_form(url="", msg=""):
 cookiejar = http.cookiejar.CookieJar()
 urlopener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookiejar))
 referrer = ""
+user_agent = None
 
 def urlopen(url, offset=None):
+    global user_agent
+    if not user_agent:
+        page = MozillaReleasesPageParser()
+        with urllib.request.urlopen(MOZILLA_RELEASE_URL) as f:
+            page.feed(f.read().decode("utf-8"))
+            page.close()
+        user_agent = USER_AGENT_TEMPLATE % page.latest_release
+
     if url.startswith("//"):
         url = "https:" + url
     if not url.startswith("http://") and not url.startswith("https://"):
@@ -90,7 +100,7 @@ def urlopen(url, offset=None):
     else:
         req.add_header("Referer", referrer)
 
-    req.add_header("User-Agent", USER_AGENT)
+    req.add_header("User-Agent", user_agent)
 
     if offset:
         req.add_header("Range", "bytes=%d-" % offset)
@@ -116,7 +126,7 @@ def validate_url(url):
     else:
         raise NotYouTube()
 
-def parse_url(url, parser):
+def load_parse_url(url, parser):
     f = urlopen(url)
     parser.feed(f.read().decode("utf-8"))
     parser.close()
@@ -229,6 +239,7 @@ def decode_cipher_url(js_url, cipher):
         fakeGlobal.matchMedia = () => ({matches: () => {}, media: ''});
         fakeGlobal.result_url = null;
         fakeGlobal.g = function(){}; // this is _yt_player
+        fakeGlobal.TimeRanges = function(){};
 
         const code_string = %(code)s + ';';
         const exec_string = 'result_url = %(url_func_name)s(%(cipher_func_name)s(...%(args)s));';
@@ -340,6 +351,16 @@ class YouTubeVideoPageParser(html.parser.HTMLParser):
         if data:
             self.scripts.append(data)
 
+class MozillaReleasesPageParser(html.parser.HTMLParser):
+    def __init__(self):
+        super().__init__()
+        self.latest_release = "1.0"
+
+    def handle_starttag(self, tag, attrs):
+        attrs = dict(attrs)
+        if attrs.get("data-latest-firefox", None):
+            self.latest_release = attrs.get("data-latest-firefox", None)
+
 def write_video(filename, video_data):
     quoted_filename = urllib.parse.quote(filename.encode("utf-8"))
     sys.stdout.buffer.write(
@@ -365,7 +386,9 @@ def cgimain():
     try:
         page = YouTubeVideoPageParser()
         validate_url(url)
-        parse_url(url, page)
+        with urlopen(url) as f:
+            page.feed(f.read().decode("utf-8"))
+            page.close()
         video_url, filename = get_video_url(page)
         video_data = urlopen(video_url)
     except VideoUnavailable as e:
@@ -437,7 +460,9 @@ def main():
         sys.exit(1)
 
     page = YouTubeVideoPageParser()
-    parse_url(url, page)
+    with urlopen(url) as f:
+        page.feed(f.read().decode("utf-8"))
+        page.close()
     video_url, filename = get_video_url(page)
     print("Downloading", filename)