]> code.delx.au - youtube-cgi/blobdiff - youtube.cgi
Fixed for latest changes
[youtube-cgi] / youtube.cgi
index 0aadef35e5885458ec97282c27bfb2465cd9a375..b0771af44fd95ffe910a361f608f6bf3493829a3 100755 (executable)
@@ -15,7 +15,8 @@ import urllib.parse
 import urllib.request
 
 
-USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:82.0) Gecko/20100101 Firefox/82.0"
+MOZILLA_RELEASE_URL = "https://www.mozilla.org/en-US/firefox/releases/"
+USER_AGENT_TEMPLATE = "Mozilla/5.0 (X11; Linux x86_64; rv:83.0) Gecko/20100101 Firefox/%s"
 
 MIMETYPES = {
     "video/mp4": "mp4",
@@ -76,8 +77,17 @@ def print_form(url="", msg=""):
 cookiejar = http.cookiejar.CookieJar()
 urlopener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookiejar))
 referrer = ""
+user_agent = None
 
 def urlopen(url, offset=None):
+    global user_agent
+    if not user_agent:
+        page = MozillaReleasesPageParser()
+        with urllib.request.urlopen(MOZILLA_RELEASE_URL) as f:
+            page.feed(f.read().decode("utf-8"))
+            page.close()
+        user_agent = USER_AGENT_TEMPLATE % page.latest_release
+
     if url.startswith("//"):
         url = "https:" + url
     if not url.startswith("http://") and not url.startswith("https://"):
@@ -90,7 +100,7 @@ def urlopen(url, offset=None):
     else:
         req.add_header("Referer", referrer)
 
-    req.add_header("User-Agent", USER_AGENT)
+    req.add_header("User-Agent", user_agent)
 
     if offset:
         req.add_header("Range", "bytes=%d-" % offset)
@@ -116,7 +126,7 @@ def validate_url(url):
     else:
         raise NotYouTube()
 
-def parse_url(url, parser):
+def load_parse_url(url, parser):
     f = urlopen(url)
     parser.feed(f.read().decode("utf-8"))
     parser.close()
@@ -132,8 +142,8 @@ def append_to_qs(url, params):
 
 def get_player_config(scripts):
     config_strings = [
-        ("ytplayer.config = {", 1, "};", 1),
         ("ytcfg.set({\"", 2, "});", 1),
+        ("ytInitialPlayerResponse = {\"", 2, "};", 1),
     ]
     player_config = {}
     for script in scripts:
@@ -165,13 +175,22 @@ def find_cipher_func(script):
     func_name = match.groups()[0]
     return func_name
 
-def find_url_func(script):
-    FUNC_NAME = R"([a-zA-Z0-9$]+)"
-    PATTERN = R"this\.url\s*=\s*" + FUNC_NAME + R"\s*\(\s*this\s*\)"
-
-    match = re.search(PATTERN, script)
-    func_name = match.groups()[0]
-    return func_name
+def construct_url_from_cipher_result(cipher_result):
+    for k, v in cipher_result.items():
+        if isinstance(v, str) and v.startswith("https://"):
+            temp_url = v
+            break
+    else:
+        raise Exception("Could not find URL-like string in cipher result!")
+
+    for k, v in cipher_result.items():
+        if isinstance(v, dict):
+            params = {}
+            for k2, v2 in v.items():
+                params[k2] = urllib.parse.unquote(v2)
+            return append_to_qs(temp_url, params)
+    else:
+        raise Exception("Could not find params-like structure in cipher result!")
 
 def decode_cipher_url(js_url, cipher):
     cipher = urllib.parse.parse_qs(cipher)
@@ -186,11 +205,9 @@ def decode_cipher_url(js_url, cipher):
     f.close()
 
     cipher_func_name = find_cipher_func(script)
-    url_func_name = find_url_func(script)
 
     params = {
         "cipher_func_name": cipher_func_name,
-        "url_func_name": url_func_name,
         "args": json.dumps(args),
         "code": json.dumps(extract_js(script)),
     }
@@ -227,29 +244,29 @@ def decode_cipher_url(js_url, cipher):
         };
         fakeGlobal.XMLHttpRequest = class XMLHttpRequest {};
         fakeGlobal.matchMedia = () => ({matches: () => {}, media: ''});
-        fakeGlobal.result_url = null;
+        fakeGlobal.result = null;
         fakeGlobal.g = function(){}; // this is _yt_player
+        fakeGlobal.TimeRanges = function(){};
 
         const code_string = %(code)s + ';';
-        const exec_string = 'result_url = %(url_func_name)s(%(cipher_func_name)s(...%(args)s));';
+        const exec_string = 'result = %(cipher_func_name)s(...%(args)s);';
         vm.runInNewContext(code_string + exec_string, fakeGlobal);
 
-        console.log(fakeGlobal.result_url);
+        console.log(JSON.stringify(fakeGlobal.result));
     """ % params)
 
     p.stdin.write(js_decode_script.encode("utf-8"))
     p.stdin.close()
 
-    result_url = p.stdout.read().decode("utf-8").strip()
+    result = json.load(p.stdout)
     if p.wait() != 0:
         raise Exception("js failed to execute: %d" % p.returncode)
 
+    result_url = construct_url_from_cipher_result(result)
     return result_url
 
 def get_best_video(player_config):
-    player_args = player_config["args"]
-    player_response = json.loads(player_args["player_response"])
-    formats = player_response["streamingData"]["formats"]
+    formats = player_config["streamingData"]["formats"]
 
     best_url = None
     best_quality = None
@@ -298,9 +315,7 @@ def get_video_url(page):
     if not video_url:
         return None, None
 
-    title = player_config["args"].get("title", None)
-    if not title:
-        title = json.loads(player_config["args"]["player_response"])["videoDetails"]["title"]
+    title = player_config["videoDetails"].get("title", None)
     if not title:
         title = "Unknown title"
 
@@ -340,6 +355,16 @@ class YouTubeVideoPageParser(html.parser.HTMLParser):
         if data:
             self.scripts.append(data)
 
+class MozillaReleasesPageParser(html.parser.HTMLParser):
+    def __init__(self):
+        super().__init__()
+        self.latest_release = "1.0"
+
+    def handle_starttag(self, tag, attrs):
+        attrs = dict(attrs)
+        if attrs.get("data-latest-firefox", None):
+            self.latest_release = attrs.get("data-latest-firefox", None)
+
 def write_video(filename, video_data):
     quoted_filename = urllib.parse.quote(filename.encode("utf-8"))
     sys.stdout.buffer.write(
@@ -365,7 +390,9 @@ def cgimain():
     try:
         page = YouTubeVideoPageParser()
         validate_url(url)
-        parse_url(url, page)
+        with urlopen(url) as f:
+            page.feed(f.read().decode("utf-8"))
+            page.close()
         video_url, filename = get_video_url(page)
         video_data = urlopen(video_url)
     except VideoUnavailable as e:
@@ -437,7 +464,9 @@ def main():
         sys.exit(1)
 
     page = YouTubeVideoPageParser()
-    parse_url(url, page)
+    with urlopen(url) as f:
+        page.feed(f.read().decode("utf-8"))
+        page.close()
     video_url, filename = get_video_url(page)
     print("Downloading", filename)