]> code.delx.au - youtube-cgi/blobdiff - youtube.cgi
Don't try to download from non-YouTube sites
[youtube-cgi] / youtube.cgi
index c0b5e999a2bb6ce5b90d37dce5debf70622b3777..d1cb6667783a934428c49be0b01f8dcccfafd08c 100755 (executable)
@@ -36,8 +36,11 @@ QUALITIES = {
 class VideoUnavailable(Exception):
     pass
 
+class NotYouTube(Exception):
+    pass
+
 def print_form(url="", msg=""):
-    script_url = "http://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
+    script_url = "https://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
     sys.stdout.write("Content-Type: text/html\r\n\r\n")
     sys.stdout.write("""
 <!DOCTYPE html>
@@ -103,6 +106,16 @@ def urlopen(url, offset=None):
         assert start == offset
     return res
 
+def validate_url(url):
+    parsed_url = urllib.parse.urlparse(url)
+    scheme_ok = parsed_url.scheme == "https"
+    host_ok = parsed_url.netloc.lstrip("www.") in ["youtube.com", "youtu.be"]
+
+    if scheme_ok and host_ok:
+        return
+    else:
+        raise NotYouTube()
+
 def parse_url(url, parser):
     f = urlopen(url)
     parser.feed(f.read().decode("utf-8"))
@@ -138,7 +151,7 @@ def extract_js(script):
 
 def find_func_name(script):
     FUNC_NAME = R"([a-zA-Z0-9$]+)"
-    FUNC_PARAMS = R"(\([a-zA-Z]+\.s\))"
+    FUNC_PARAMS = R"(\([a-zA-Z,\.]+\.s\))"
     TERMINATOR = R"[,;\)]"
     PATTERN = FUNC_NAME + FUNC_PARAMS + TERMINATOR
 
@@ -166,9 +179,9 @@ def decode_signature(js_url, signature):
         stdout=subprocess.PIPE
     )
     js_decode_script = ("""
-        var vm = require('vm');
+        const vm = require('vm');
 
-        var sandbox = {
+        const sandbox = {
             location: {
                 hash: '',
                 href: '',
@@ -181,17 +194,33 @@ def decode_signature(js_url, signature):
             navigator: {
                 userAgent: ''
             },
+            XMLHttpRequest: class XMLHttpRequest {},
+            matchMedia: () => ({matches: () => {}, media: ''}),
             signature: %(signature)s,
             transformed_signature: null,
             g: function(){} // this is _yt_player
         };
         sandbox.window = sandbox;
 
-        var code_string = %(code)s + ';';
-        var exec_string = 'transformed_signature = %(func_name)s(signature);';
+        const code_string = %(code)s + ';';
+        const exec_string = 'transformed_signature = %(func_name)s("", "MARKER", signature);';
         vm.runInNewContext(code_string + exec_string, sandbox);
 
-        console.log(sandbox.transformed_signature);
+        function findSignature(obj) {
+            if (typeof obj !== 'object') {
+                return;
+            }
+            for (const [key, value] of Object.entries(obj)) {
+                if (key === 'MARKER') {
+                    return value;
+                }
+                const result = findSignature(value);
+                if (result) {
+                    return result;
+                }
+            }
+        }
+        console.log(findSignature(sandbox.transformed_signature));
     """ % params)
 
     p.stdin.write(js_decode_script.encode("utf-8"))
@@ -327,11 +356,12 @@ def cgimain():
     try:
         url = args["url"][0]
     except:
-        print_form(url="http://www.youtube.com/watch?v=FOOBAR")
+        print_form(url="https://www.youtube.com/watch?v=FOOBAR")
         return
 
     try:
         page = YouTubeVideoPageParser()
+        validate_url(url)
         parse_url(url, page)
         video_url, filename = get_video_url(page)
         video_data = urlopen(video_url)
@@ -340,10 +370,15 @@ def cgimain():
             url=url,
             msg="<p class='error'>Sorry, there was an error: %s</p>" % cgi.escape(e.args[0])
         )
+    except NotYouTube:
+        print_form(
+            url=url,
+            msg="<p class='error'>Sorry, that does not look like a YouTube page!</p>"
+        )
     except Exception as e:
         print_form(
             url=url,
-            msg="<p class='error'>Sorry, there was an error. Check your URL?</p>"
+            msg="<p class='error'>Sorry, there was an unknown error.</p>"
         )
         return
 
@@ -395,7 +430,7 @@ def main():
     try:
         url = sys.argv[1]
     except:
-        print("Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0], file=sys.stderr)
+        print("Usage: %s https://youtube.com/watch?v=FOOBAR" % sys.argv[0], file=sys.stderr)
         sys.exit(1)
 
     page = YouTubeVideoPageParser()