]> code.delx.au - webdl/commitdiff
iview: Use HLS instead of RTMP
authorJames Bunton <jamesbunton@delx.net.au>
Tue, 19 Aug 2014 15:45:01 +0000 (01:45 +1000)
committerJames Bunton <jamesbunton@delx.net.au>
Tue, 19 Aug 2014 15:45:01 +0000 (01:45 +1000)
ABC seems to have broken the legacy streaming format, this should be more future-proof

common.py
iview.py

index f51ec5305f60e04eda6686e8cdcf480c188d9c28..94c0e2c57175550b6149d97363b59b526a050f14 100644 (file)
--- a/common.py
+++ b/common.py
@@ -173,14 +173,8 @@ def convert_flv_mp4(orig_filename):
     if orig_filename != flv_filename:
         os.rename(orig_filename, flv_filename)
     print "Converting %s to mp4" % flv_filename
-    cmd = [
-        "avconv",
-        "-i", flv_filename,
-        "-acodec", "copy",
-        "-vcodec", "copy",
-        mp4_filename,
-    ]
-    if not exec_subprocess(cmd):
+    if not avconv_remux(flv_filename, mp4_filename):
+        # failed, error has already been logged
         return
     try:
         flv_size = os.stat(flv_filename).st_size
@@ -192,6 +186,16 @@ def convert_flv_mp4(orig_filename):
     except Exception, e:
         print "Conversion failed", e
 
+def avconv_remux(infile, outfile):
+    cmd = [
+        "avconv",
+        "-i", infile,
+        "-acodec", "copy",
+        "-vcodec", "copy",
+        outfile,
+    ]
+    return exec_subprocess(cmd)
+
 def convert_filename(filename):
     if os.path.splitext(filename.lower())[1] in (".mp4", ".flv"):
         f = open(filename)
@@ -249,6 +253,99 @@ def download_urllib(filename, url, referrer=None):
     convert_filename(filename)
     return True
 
+def download_hls_get_stream(url, hack_url_func):
+    url = hack_url_func(url)
+
+    def parse_bandwidth(line):
+        params = line.split(":", 1)[1].split(",")
+        for kv in params:
+            k, v = kv.split("=", 1)
+            if k == "BANDWIDTH":
+                return int(v)
+        return 0
+
+    m3u8 = grab_text(url, 0)
+    best_bandwidth = None
+    best_url = None
+    for line in m3u8.split("\n"):
+        if line.startswith("#EXT-X-STREAM-INF:"):
+            bandwidth = parse_bandwidth(line)
+            if best_bandwidth is None or bandwidth > best_bandwidth:
+                best_bandwidth = bandwidth
+                best_url = None
+        elif not line.startswith("#"):
+            if best_url is None:
+                best_url = line.strip()
+
+    if not best_url:
+        raise Exception("Failed to find best stream for HLS: " + url)
+
+    return best_url
+
+def download_hls_segments(tmpdir, url, hack_url_func):
+    m3u8 = grab_text(url, 0)
+    result = []
+
+    local_m3u8_filename = tmpdir + "/index.m3u8"
+    local_m3u8 = open(local_m3u8_filename, "w")
+
+    i = 1
+    for line in m3u8.split("\n"):
+        if not line.strip():
+            continue
+        if line.startswith("#"):
+            local_m3u8.write(line + "\n")
+            continue
+
+        outfile = "%s/segment_%d.ts" % (tmpdir, i)
+        i += 1
+        local_m3u8.write(outfile + "\n")
+        download_hls_fetch_segment(line, outfile)
+        sys.stdout.write(".")
+        sys.stdout.flush()
+
+    sys.stdout.write("\n")
+
+    local_m3u8.close()
+    return local_m3u8_filename
+
+def download_hls_fetch_segment(segment, outfile):
+    try:
+        src = _urlopen(segment)
+        dst = open(outfile, "w")
+        shutil.copyfileobj(src, dst)
+    finally:
+        try:
+            src.close()
+        except:
+            pass
+        try:
+            dst.close()
+        except:
+            pass
+
+def download_hls(filename, m3u8_master_url, hack_url_func=None):
+    if hack_url_func is None:
+        hack_url_func = lambda url: url
+
+    tmpdir = tempfile.mkdtemp(prefix="webdl-hls")
+    filename = sanify_filename(filename)
+
+    print "Downloading: %s" % filename
+
+    try:
+        best_stream_url = download_hls_get_stream(m3u8_master_url, hack_url_func)
+        local_m3u8 = download_hls_segments(tmpdir, best_stream_url, hack_url_func)
+        avconv_remux(local_m3u8, filename)
+        return False
+    except KeyboardInterrupt:
+        print "\nCancelled", m3u8_master_url
+        return False
+    finally:
+        shutil.rmtree(tmpdir)
+
+    return True
+
 def natural_sort(l, key=None):
     ignore_list = ["a", "the"]
     def key_func(k):
index c51d73fb02cc1fd628b8d1d5d21cf7a2884ffca3..cfe5ec4a9ceab6e08eeac0a8e915e560155db4c6 100644 (file)
--- a/iview.py
+++ b/iview.py
-from common import grab_xml, grab_json, download_rtmp, Node
-import itertools
-
-BASE_URL = "http://www.abc.net.au/iview/"
-CONFIG_URL = BASE_URL + "xml/config.xml"
-HASH_URL = BASE_URL + "images/iview.jpg"
-NS = {
-    "auth": "http://www.abc.net.au/iView/Services/iViewHandshaker",
-}
-
-class IviewNode(Node):
-    def __init__(self, title, parent, params, vpath):
+from common import grab_json, grab_xml, Node, download_hls
+import urlparse
+
+API_URL = "http://iview.abc.net.au/api"
+AUTH_URL = "http://iview.abc.net.au/auth"
+
+def format_episode_title(series, ep):
+    if ep:
+        return series + " " + ep
+    else:
+        return series
+
+class IviewEpisodeNode(Node):
+    def __init__(self, title, parent, video_key):
         Node.__init__(self, title, parent)
-        self.params = params
-        self.vpath = vpath
-        self.filename = self.title + "." + vpath.rsplit(".", 1)[1]
+        self.video_key = video_key
+        self.filename = title + ".mp4"
         self.can_download = True
 
+    def find_hls_url(self, playlist):
+        for video in playlist:
+            if video["type"] == "program":
+                return video["hls-high"]
+        raise Exception("Missing hls-high program stream for " + self.video_key)
+
+    def get_auth_details(self):
+        auth_doc = grab_xml(AUTH_URL, 0)
+        NS = {
+            "auth": "http://www.abc.net.au/iView/Services/iViewHandshaker",
+        }
+        token = auth_doc.xpath("//auth:tokenhd/text()", namespaces=NS)[0]
+        token_url = auth_doc.xpath("//auth:server/text()", namespaces=NS)[0]
+        token_hostname = urlparse.urlparse(token_url).netloc
+        return token, token_hostname
+
+    def hack_url_auth_token(self, video_url, token, token_hostname):
+        parsed_url = urlparse.urlparse(video_url)
+        hacked_url = parsed_url._replace(netloc=token_hostname, query="hdnea=" + token)
+        video_url = urlparse.urlunparse(hacked_url)
+        return video_url
+
     def download(self):
-        auth_doc = grab_xml(self.params["auth"], 0)
-        server = self.params["server_streaming"]
-        token = auth_doc.xpath("//auth:token/text()", namespaces=NS)[0]
-        playpath = auth_doc.xpath("//auth:path/text()", namespaces=NS)[0]
-        if playpath == "playback/_definst_/":
-            playpath = "flash/" + playpath
-        vbase = server + "?auth=" + token
-        vpath, ext = self.vpath.rsplit(".", 1)
-        vpath = ext + ":" + playpath + vpath
-        return download_rtmp(self.filename, vbase, vpath, HASH_URL)
-
-class IviewSeriesNode(Node):
-    def __init__(self, title, parent, params, series_ids):
+        info = grab_json(API_URL + "/programs/" + self.video_key, 3600)
+        video_url = self.find_hls_url(info["playlist"])
+        token, token_hostname= self.get_auth_details()
+        hack_url = lambda url: self.hack_url_auth_token(url, token, token_hostname)
+        download_hls(self.filename, video_url, hack_url)
+
+
+class IviewIndexNode(Node):
+    def __init__(self, title, parent, url):
         Node.__init__(self, title, parent)
-        self.params = params
-        self.series_ids = series_ids
+        self.url = url
+        self.series_map = {}
+
+    def add_episode(self, ep_info):
+        video_key = ep_info["episodeHouseNumber"]
+        series_title = ep_info["seriesTitle"]
+        title = ep_info.get("title", None)
+        episode_title = format_episode_title(series_title, title)
+
+        series_node = self.series_map.get(series_title, None)
+        if not series_node:
+            series_node = Node(series_title, self)
+            self.series_map[series_title] = series_node
+
+        IviewEpisodeNode(episode_title, series_node, video_key)
 
     def fill_children(self):
-        for series_id in self.series_ids:
-            self.fill_children_for_id(series_id)
-
-    def fill_children_for_id(self, series_id):
-        series_doc = grab_json(self.params["api"] + "series=" + series_id, 3600)
-        for episode_list in series_doc:
-            if episode_list["a"] == series_id:
-                episode_list = episode_list["f"]
-                break
-        else:
-            return
-
-        for episode in episode_list:
-            vpath = episode["n"]
-            episode_title = episode["b"].strip()
-            if not episode_title.startswith(self.title):
-                episode_title = self.title + " " + episode_title
-            if episode_title.lower().endswith(" (final)"):
-                episode_title = episode_title[:-8]
-            IviewNode(episode_title, self, self.params, vpath)
-
-class SeriesInfo(object):
-    def __init__(self, title):
-        self.title = title
-        self.series_ids = set()
-        self.categories = set()
-
-    def add_series_id(self, series_id):
-        self.series_ids.add(series_id)
-
-    def add_categories(self, categories):
-        self.categories.update(categories)
+        info = grab_json(self.url, 3600)
+        for index_list in info["index"]:
+            for ep_info in index_list["episodes"]:
+                self.add_episode(ep_info)
 
-class IviewRootNode(Node):
-    def __init__(self, parent):
-        Node.__init__(self, "ABC iView", parent)
-        self.params = {}
-        self.series_info = {}
-        self.categories_map = {}
-
-    def load_params(self):
-        config_doc = grab_xml(CONFIG_URL, 24*3600)
-        for p in config_doc.xpath("/config/param"):
-            key = p.attrib["name"]
-            value = p.attrib["value"]
-            self.params[key] = value
-
-    def load_series(self):
-        series_list_doc = grab_json(self.params["api"] + "seriesIndex", 3600)
-        for series in series_list_doc:
-            title = series["b"].replace("&amp;", "&")
-            sid = series["a"]
-            categories = series["e"].split()
-            info = self.series_info.get(title, None)
-            if not info:
-                info = SeriesInfo(title)
-                self.series_info[title] = info
-            info.add_categories(categories)
-            info.add_series_id(sid)
+class IviewFlatNode(Node):
+    def __init__(self, title, parent, url):
+        Node.__init__(self, title, parent)
+        self.url = url
+
+    def add_episode(self, ep_info):
+        video_key = ep_info["episodeHouseNumber"]
+        series_title = ep_info["seriesTitle"]
+        title = ep_info.get("title", None)
+        episode_title = format_episode_title(series_title, title)
+
+        IviewEpisodeNode(episode_title, self, video_key)
 
+    def fill_children(self):
+        info = grab_json(self.url, 3600)
+        for ep_info in info:
+            self.add_episode(ep_info)
+
+
+class IviewRootNode(Node):
     def load_categories(self):
-        categories_doc = grab_xml(BASE_URL + self.params["categories"], 24*3600)
-        by_channel = Node("By Channel", self)
-        by_genre = Node("By Genre", self)
-        for category in categories_doc.xpath("//category"):
-            cid = category.attrib["id"]
-            category_name = category.xpath("name/text()")[0]
-            if "genre" in category.attrib:
-                parent = by_genre
-            elif cid in ["abc1", "abc2", "abc3", "abc4", "original"]:
-                parent = by_channel
-            elif cid in ["featured", "recent", "last-chance", "trailers"]:
-                parent = self
-            else:
-                continue
-            node = Node(category_name, parent)
-            self.categories_map[cid] = node
-
-    def link_series(self):
-        # Create a duplicate within each category for each series
-        for s in self.series_info.itervalues():
-            for cid in s.categories:
-                parent = self.categories_map.get(cid)
-                if parent:
-                    IviewSeriesNode(s.title, parent, self.params, s.series_ids)
+        by_category_node = Node("By Category", self)
+        def category(name, slug):
+            IviewIndexNode(name, by_category_node, API_URL + "/category/" + slug)
+
+        category("Arts & Culture", "arts")
+        category("Comedy", "comedy")
+        category("Documentary", "docs")
+        category("Drama", "drama")
+        category("Education", "education")
+        category("Lifestyle", "lifestyle")
+        category("News & Current Affairs", "news")
+        category("Panel & Discussion", "panel")
+        category("Sport", "sport")
+
+    def load_channels(self):
+        by_channel_node = Node("By Channel", self)
+        def channel(name, slug):
+            IviewIndexNode(name, by_channel_node, API_URL + "/channel/" + slug)
+
+        channel("ABC1", "abc1")
+        channel("ABC2", "abc2")
+        channel("ABC3", "abc3")
+        channel("ABC4Kids", "abc4kids")
+        channel("iView Exclusives", "iview")
+
+    def load_featured(self):
+        IviewFlatNode("Featured", self, API_URL + "/featured")
 
     def fill_children(self):
-        self.load_params()
-        self.load_series()
         self.load_categories()
-        self.link_series()
+        self.load_channels()
+        self.load_featured()
 
 
 def fill_nodes(root_node):
-    IviewRootNode(root_node)
+    IviewRootNode("ABC iView", root_node)