import urllib.request
-USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0"
+MOZILLA_RELEASE_URL = "https://www.mozilla.org/en-US/firefox/releases/"
+USER_AGENT_TEMPLATE = "Mozilla/5.0 (X11; Linux x86_64; rv:83.0) Gecko/20100101 Firefox/%s"
MIMETYPES = {
"video/mp4": "mp4",
cookiejar = http.cookiejar.CookieJar()
urlopener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookiejar))
referrer = ""
+user_agent = None
def urlopen(url, offset=None):
+ global user_agent
+ if not user_agent:
+ page = MozillaReleasesPageParser()
+ with urllib.request.urlopen(MOZILLA_RELEASE_URL) as f:
+ page.feed(f.read().decode("utf-8"))
+ page.close()
+ user_agent = USER_AGENT_TEMPLATE % page.latest_release
+
if url.startswith("//"):
url = "https:" + url
if not url.startswith("http://") and not url.startswith("https://"):
else:
req.add_header("Referer", referrer)
- req.add_header("User-Agent", USER_AGENT)
+ req.add_header("User-Agent", user_agent)
if offset:
req.add_header("Range", "bytes=%d-" % offset)
def validate_url(url):
parsed_url = urllib.parse.urlparse(url)
scheme_ok = parsed_url.scheme == "https"
- host_ok = parsed_url.netloc.lstrip("www.") in ["youtube.com", "youtu.be"]
+ host = parsed_url.netloc.lstrip("www.").lstrip("m.")
+ host_ok = host in ["youtube.com", "youtu.be"]
if scheme_ok and host_ok:
return
else:
raise NotYouTube()
-def parse_url(url, parser):
+def load_parse_url(url, parser):
f = urlopen(url)
parser.feed(f.read().decode("utf-8"))
parser.close()
return url
def get_player_config(scripts):
- player_config = None
+ config_strings = [
+ ("ytcfg.set({\"", 2, "});", 1),
+ ("ytInitialPlayerResponse = {\"", 2, "};", 1),
+ ]
+ player_config = {}
for script in scripts:
for line in script.split("\n"):
- s = "ytplayer.config = {"
- if s in line:
- p1 = line.find(s) + len(s) - 1
- p2 = line.find("};", p1) + 1
- if p1 >= 0 and p2 > 0:
- return json.loads(line[p1:p2])
+ for s1, off1, s2, off2 in config_strings:
+ if s1 in line:
+ p1 = line.find(s1) + len(s1) - off1
+ p2 = line.find(s2, p1) + off2
+ if p1 >= 0 and p2 > 0:
+ player_config.update(json.loads(line[p1:p2]))
+ return player_config
def extract_js(script):
PREFIX = "var _yt_player={};(function(g){var window=this;"
return script[len(PREFIX):-len(SUFFIX)]
-def find_func_name(script):
+def find_cipher_func(script):
FUNC_NAME = R"([a-zA-Z0-9$]+)"
DECODE_URI_COMPONENT = R"(\(decodeURIComponent)?"
FUNC_PARAMS = R"(\([a-zA-Z,\.]+\.s\))"
func_name = match.groups()[0]
return func_name
-def decode_signature(js_url, signature):
+def construct_url_from_cipher_result(cipher_result):
+ for k, v in cipher_result.items():
+ if isinstance(v, str) and v.startswith("https://"):
+ temp_url = v
+ break
+ else:
+ raise Exception("Could not find URL-like string in cipher result!")
+
+ for k, v in cipher_result.items():
+ if isinstance(v, dict):
+ params = {}
+ for k2, v2 in v.items():
+ params[k2] = urllib.parse.unquote(v2)
+ return append_to_qs(temp_url, params)
+ else:
+ raise Exception("Could not find params-like structure in cipher result!")
+
+def decode_cipher_url(js_url, cipher):
+ cipher = urllib.parse.parse_qs(cipher)
+ args = [
+ cipher["url"][0],
+ cipher["sp"][0],
+ cipher["s"][0],
+ ]
+
f = urlopen(js_url)
script = f.read().decode("utf-8")
f.close()
- func_name = find_func_name(script)
+ cipher_func_name = find_cipher_func(script)
params = {
- "func_name": func_name,
- "signature": json.dumps(signature),
+ "cipher_func_name": cipher_func_name,
+ "args": json.dumps(args),
"code": json.dumps(extract_js(script)),
}
p = subprocess.Popen(
js_decode_script = ("""
const vm = require('vm');
- const sandbox = {
- location: {
- hash: '',
- href: '',
- protocol: 'http:'
- },
- history: {
- pushState: function(){}
- },
- document: {},
- navigator: {
- userAgent: ''
- },
- XMLHttpRequest: class XMLHttpRequest {},
- matchMedia: () => ({matches: () => {}, media: ''}),
- signature: %(signature)s,
- transformed_signature: null,
- g: function(){} // this is _yt_player
+ const fakeGlobal = {};
+ fakeGlobal.window = fakeGlobal;
+ fakeGlobal.location = {
+ hash: '',
+ host: 'www.youtube.com',
+ hostname: 'www.youtube.com',
+ href: 'https://www.youtube.com',
+ origin: 'https://www.youtube.com',
+ pathname: '/',
+ protocol: 'https:'
+ };
+ fakeGlobal.history = {
+ pushState: function(){}
+ };
+ fakeGlobal.document = {
+ location: fakeGlobal.location
+ };
+ fakeGlobal.document = {};
+ fakeGlobal.navigator = {
+ userAgent: ''
};
- sandbox.window = sandbox;
+ fakeGlobal.XMLHttpRequest = class XMLHttpRequest {};
+ fakeGlobal.matchMedia = () => ({matches: () => {}, media: ''});
+ fakeGlobal.result = null;
+ fakeGlobal.g = function(){}; // this is _yt_player
+ fakeGlobal.TimeRanges = function(){};
const code_string = %(code)s + ';';
- const exec_string = 'transformed_signature = %(func_name)s(signature);';
- vm.runInNewContext(code_string + exec_string, sandbox);
+ const exec_string = 'result = %(cipher_func_name)s(...%(args)s);';
+ vm.runInNewContext(code_string + exec_string, fakeGlobal);
- console.log(sandbox.transformed_signature);
+ console.log(JSON.stringify(fakeGlobal.result));
""" % params)
p.stdin.write(js_decode_script.encode("utf-8"))
p.stdin.close()
- transformed_signature = p.stdout.read().decode("utf-8").strip()
- transformed_signature = urllib.parse.unquote(transformed_signature)
+ result = json.load(p.stdout)
if p.wait() != 0:
raise Exception("js failed to execute: %d" % p.returncode)
- return transformed_signature
+ result_url = construct_url_from_cipher_result(result)
+ return result_url
def get_best_video(player_config):
- js_url = player_config["assets"]["js"]
-
- player_args = player_config["args"]
- player_response = json.loads(player_args["player_response"])
- formats = player_response["streamingData"]["formats"]
+ formats = player_config["streamingData"]["formats"]
best_url = None
best_quality = None
continue
if "signatureCipher" in format_data:
- cipher = urllib.parse.parse_qs(format_data["signatureCipher"])
- video_url = cipher["url"][0]
- if "sig" in cipher:
- signature = cipher["sig"][0]
- elif "s" in cipher:
- signature = decode_signature(js_url, cipher["s"][0])
- sp = cipher.get("sp", ["signature"])[0]
- video_url = append_to_qs(video_url, {sp: signature})
+ js_url = player_config["PLAYER_JS_URL"]
+ video_url = decode_cipher_url(js_url, format_data["signatureCipher"])
else:
video_url = format_data["url"]
if not video_url:
return None, None
- title = player_config["args"].get("title", None)
- if not title:
- title = json.loads(player_config["args"]["player_response"])["videoDetails"]["title"]
+ title = player_config["videoDetails"].get("title", None)
if not title:
title = "Unknown title"
if data:
self.scripts.append(data)
+class MozillaReleasesPageParser(html.parser.HTMLParser):
+ def __init__(self):
+ super().__init__()
+ self.latest_release = "1.0"
+
+ def handle_starttag(self, tag, attrs):
+ attrs = dict(attrs)
+ if attrs.get("data-latest-firefox", None):
+ self.latest_release = attrs.get("data-latest-firefox", None)
+
def write_video(filename, video_data):
quoted_filename = urllib.parse.quote(filename.encode("utf-8"))
sys.stdout.buffer.write(
try:
page = YouTubeVideoPageParser()
validate_url(url)
- parse_url(url, page)
+ with urlopen(url) as f:
+ page.feed(f.read().decode("utf-8"))
+ page.close()
video_url, filename = get_video_url(page)
video_data = urlopen(video_url)
except VideoUnavailable as e:
sys.exit(1)
page = YouTubeVideoPageParser()
- parse_url(url, page)
+ with urlopen(url) as f:
+ page.feed(f.read().decode("utf-8"))
+ page.close()
video_url, filename = get_video_url(page)
print("Downloading", filename)