import urllib.request
-USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0"
+MOZILLA_RELEASE_URL = "https://www.mozilla.org/en-US/firefox/releases/"
+USER_AGENT_TEMPLATE = "Mozilla/5.0 (X11; Linux x86_64; rv:83.0) Gecko/20100101 Firefox/%s"
MIMETYPES = {
"video/mp4": "mp4",
cookiejar = http.cookiejar.CookieJar()
urlopener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookiejar))
referrer = ""
+user_agent = None
def urlopen(url, offset=None):
+ global user_agent
+ if not user_agent:
+ page = MozillaReleasesPageParser()
+ with urllib.request.urlopen(MOZILLA_RELEASE_URL) as f:
+ page.feed(f.read().decode("utf-8"))
+ page.close()
+ user_agent = USER_AGENT_TEMPLATE % page.latest_release
+
if url.startswith("//"):
url = "https:" + url
if not url.startswith("http://") and not url.startswith("https://"):
else:
req.add_header("Referer", referrer)
- req.add_header("User-Agent", USER_AGENT)
+ req.add_header("User-Agent", user_agent)
if offset:
req.add_header("Range", "bytes=%d-" % offset)
def validate_url(url):
parsed_url = urllib.parse.urlparse(url)
scheme_ok = parsed_url.scheme == "https"
- host_ok = parsed_url.netloc.lstrip("www.") in ["youtube.com", "youtu.be"]
+ host = parsed_url.netloc.lstrip("www.").lstrip("m.")
+ host_ok = host in ["youtube.com", "youtu.be"]
if scheme_ok and host_ok:
return
else:
raise NotYouTube()
-def parse_url(url, parser):
+def load_parse_url(url, parser):
f = urlopen(url)
parser.feed(f.read().decode("utf-8"))
parser.close()
return url
def get_player_config(scripts):
- player_config = None
+ config_strings = [
+ ("ytcfg.set({\"", 2, "});", 1),
+ ("ytInitialPlayerResponse = {\"", 2, "};", 1),
+ ]
+ player_config = {}
for script in scripts:
for line in script.split("\n"):
- s = "ytplayer.config = {"
- if s in line:
- p1 = line.find(s) + len(s) - 1
- p2 = line.find("};", p1) + 1
- if p1 >= 0 and p2 > 0:
- return json.loads(line[p1:p2])
+ for s1, off1, s2, off2 in config_strings:
+ if s1 in line:
+ p1 = line.find(s1) + len(s1) - off1
+ p2 = line.find(s2, p1) + off2
+ if p1 >= 0 and p2 > 0:
+ player_config.update(json.loads(line[p1:p2]))
+ return player_config
def extract_js(script):
PREFIX = "var _yt_player={};(function(g){var window=this;"
func_name = match.groups()[0]
return func_name
-def find_url_func(script):
- FUNC_NAME = R"([a-zA-Z0-9$]+)"
- PATTERN = R"this\.url\s*=\s*" + FUNC_NAME + R"\s*\(\s*this\s*\)"
-
- match = re.search(PATTERN, script)
- func_name = match.groups()[0]
- return func_name
+def construct_url_from_cipher_result(cipher_result):
+ for k, v in cipher_result.items():
+ if isinstance(v, str) and v.startswith("https://"):
+ temp_url = v
+ break
+ else:
+ raise Exception("Could not find URL-like string in cipher result!")
+
+ for k, v in cipher_result.items():
+ if isinstance(v, dict):
+ params = {}
+ for k2, v2 in v.items():
+ params[k2] = urllib.parse.unquote(v2)
+ return append_to_qs(temp_url, params)
+ else:
+ raise Exception("Could not find params-like structure in cipher result!")
def decode_cipher_url(js_url, cipher):
cipher = urllib.parse.parse_qs(cipher)
f.close()
cipher_func_name = find_cipher_func(script)
- url_func_name = find_url_func(script)
params = {
"cipher_func_name": cipher_func_name,
- "url_func_name": url_func_name,
"args": json.dumps(args),
"code": json.dumps(extract_js(script)),
}
};
fakeGlobal.XMLHttpRequest = class XMLHttpRequest {};
fakeGlobal.matchMedia = () => ({matches: () => {}, media: ''});
- fakeGlobal.result_url = null;
+ fakeGlobal.result = null;
fakeGlobal.g = function(){}; // this is _yt_player
+ fakeGlobal.TimeRanges = function(){};
const code_string = %(code)s + ';';
- const exec_string = 'result_url = %(url_func_name)s(%(cipher_func_name)s(...%(args)s));';
+ const exec_string = 'result = %(cipher_func_name)s(...%(args)s);';
vm.runInNewContext(code_string + exec_string, fakeGlobal);
- console.log(fakeGlobal.result_url);
+ console.log(JSON.stringify(fakeGlobal.result));
""" % params)
p.stdin.write(js_decode_script.encode("utf-8"))
p.stdin.close()
- result_url = p.stdout.read().decode("utf-8").strip()
+ result = json.load(p.stdout)
if p.wait() != 0:
raise Exception("js failed to execute: %d" % p.returncode)
+ result_url = construct_url_from_cipher_result(result)
return result_url
def get_best_video(player_config):
- js_url = player_config["assets"]["js"]
-
- player_args = player_config["args"]
- player_response = json.loads(player_args["player_response"])
- formats = player_response["streamingData"]["formats"]
+ formats = player_config["streamingData"]["formats"]
best_url = None
best_quality = None
continue
if "signatureCipher" in format_data:
+ js_url = player_config["PLAYER_JS_URL"]
video_url = decode_cipher_url(js_url, format_data["signatureCipher"])
else:
video_url = format_data["url"]
if not video_url:
return None, None
- title = player_config["args"].get("title", None)
- if not title:
- title = json.loads(player_config["args"]["player_response"])["videoDetails"]["title"]
+ title = player_config["videoDetails"].get("title", None)
if not title:
title = "Unknown title"
if data:
self.scripts.append(data)
+class MozillaReleasesPageParser(html.parser.HTMLParser):
+ def __init__(self):
+ super().__init__()
+ self.latest_release = "1.0"
+
+ def handle_starttag(self, tag, attrs):
+ attrs = dict(attrs)
+ if attrs.get("data-latest-firefox", None):
+ self.latest_release = attrs.get("data-latest-firefox", None)
+
def write_video(filename, video_data):
quoted_filename = urllib.parse.quote(filename.encode("utf-8"))
sys.stdout.buffer.write(
try:
page = YouTubeVideoPageParser()
validate_url(url)
- parse_url(url, page)
+ with urlopen(url) as f:
+ page.feed(f.read().decode("utf-8"))
+ page.close()
video_url, filename = get_video_url(page)
video_data = urlopen(video_url)
except VideoUnavailable as e:
sys.exit(1)
page = YouTubeVideoPageParser()
- parse_url(url, page)
+ with urlopen(url) as f:
+ page.feed(f.read().decode("utf-8"))
+ page.close()
video_url, filename = get_video_url(page)
print("Downloading", filename)