From 52b09ded0b5912819ae31dd870227eba61f776a4 Mon Sep 17 00:00:00 2001 From: James Bunton Date: Sun, 18 Dec 2011 23:01:08 +1100 Subject: [PATCH] iView and SBS webdl stuff gets its own project, now it just needs a good name! --- .hgignore | 4 ++ autograbber.py | 55 +++++++++++++++++++++++ autosocks.py | 89 ++++++++++++++++++++++++++++++++++++ common.py | 120 +++++++++++++++++++++++++++++++++++++++++++++++++ grabber.py | 59 ++++++++++++++++++++++++ iview.py | 63 ++++++++++++++++++++++++++ sbs.py | 62 +++++++++++++++++++++++++ 7 files changed, 452 insertions(+) create mode 100644 .hgignore create mode 100755 autograbber.py create mode 100644 autosocks.py create mode 100644 common.py create mode 100755 grabber.py create mode 100644 iview.py create mode 100644 sbs.py diff --git a/.hgignore b/.hgignore new file mode 100644 index 0000000..9803160 --- /dev/null +++ b/.hgignore @@ -0,0 +1,4 @@ +syntax: glob +*.py[oc] +.*.swp + diff --git a/autograbber.py b/autograbber.py new file mode 100755 index 0000000..1c6ef0f --- /dev/null +++ b/autograbber.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# vim:ts=4:sts=4:sw=4:noet + +from common import load_root_node +import fnmatch +import sys + +class DownloadList(object): + def __init__(self, filename): + self.f = open(filename, "a+") + self.seen_list = set() + for line in self.f: + self.seen_list.add(line.strip()) + + def has_seen(self, node): + return node.title in self.seen_list + + def mark_seen(self, node): + self.seen_list.add(node.title) + self.f.write(node.title + "\n") + + +def match(download_list, node, pattern): + if node.can_download: + if not download_list.has_seen(node): + if node.download(): + download_list.mark_seen(node) + else: + print >>sys.stderr, "Failed to download!", node.title + return + + p = pattern[0] + for child in node.children: + if fnmatch.fnmatch(child.title, p): + match(download_list, child, pattern[1:]) + + +def main(): + print "Loading episode data...", + sys.stdout.flush() + node = load_root_node() + print "done" + + download_list = DownloadList("downloaded_auto.txt") + + for search in sys.argv[1:]: + search = search.split("/") + match(download_list, node, search) + +if __name__ == "__main__": + try: + main() + except (KeyboardInterrupt, EOFError): + print "\nExiting..." + diff --git a/autosocks.py b/autosocks.py new file mode 100644 index 0000000..b71bee8 --- /dev/null +++ b/autosocks.py @@ -0,0 +1,89 @@ +# vim:ts=4:sts=4:sw=4:noet + +import subprocess + + +def detect_gnome(): + """ Gnome via python-gconf """ + from gconf import client_get_default + gconf_client = client_get_default() + mode = gconf_client.get_string("/system/proxy/mode") + if mode != "manual": + return None, None + host = gconf_client.get_string("/system/proxy/socks_host") + port = gconf_client.get_int("/system/proxy/socks_port") + return host, port + +def detect_osx(): + """ OS X 10.5 and up via PyObjC """ + from SystemConfiguration import SCDynamicStoreCopyProxies + osx_proxy = SCDynamicStoreCopyProxies(None) + if osx_proxy.get("SOCKSEnable"): + host = osx_proxy.get("SOCKSProxy") + port = int(osx_proxy.get("SOCKSPort")) + return host, port + return None, None + +def detect_kde(): + """ KDE via command line, why no python bindings for KDE proxy settings? """ + if os.environ.get("KDE_FULL_SESSION") != "true": + return None, None + p = subprocess.Popen( + [ + "kreadconfig", + "--file", + "kioslaverc", + "--group", + "Proxy Settings", + "--key", + "socksProxy", + ], + shell=True, + stdout=subprocess.PIPE, + ) + host, port = p.stdout.readline()[:-1].split(":") + p.close() + port = int(port) + return host, port + +def detect_env(): + """ fallback to environment variables """ + socks_environ = os.environ.get("SOCKS_SERVER") + if not socks_environ: + return None, None + host, port = socks_environ + port = int(port) + return host, port + + +def configure_socks(host, port): + """ hijack socket.socket using SocksiPy """ + try: + import socks, socket + except ImportError: + print >>sys.stderr, "Failed to use configured SOCKS proxy:", host, port + print >>sys.stderr, "Try installing SocksiPy: http://socksipy.sf.net" + return False + + socket.socket = socks.socksocket + socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, host, port) + return True + + +def try_autosocks(): + functions = [ + detect_gnome, + detect_osx, + detect_kde, + detect_env, + ] + for func in functions: + host, port = None, None + try: + host, port = func() + except Exception, e: + pass + if host is not None and port is not None: + return configure_socks(host, port) + return False + diff --git a/common.py b/common.py new file mode 100644 index 0000000..f286333 --- /dev/null +++ b/common.py @@ -0,0 +1,120 @@ +# vim:ts=4:sts=4:sw=4:noet + +from lxml import etree +import json +import md5 +import os +import shutil +import signal +import subprocess +import sys +import tempfile +import time +import urllib + + +import autosocks +autosocks.try_autosocks() + +CACHE_DIR = os.path.expanduser("~/.cache/webdl") + +class Node(object): + def __init__(self, title, parent=None): + self.title = title + if parent: + parent.children.append(self) + self.parent = parent + self.children = [] + self.can_download = False + + def download(self): + raise NotImplemented + + +def load_root_node(): + root_node = Node("Root") + + import iview + iview_node = Node("ABC iView", root_node) + iview.fill_nodes(iview_node) + + import sbs + sbs_node = Node("SBS", root_node) + sbs.fill_nodes(sbs_node) + + return root_node + + +def urlopen(url): + try: + os.mkdir(CACHE_DIR) + except OSError: + pass + + filename = md5.new(url).hexdigest() + filename = os.path.join(CACHE_DIR, filename) + if os.path.exists(filename): + if int(time.time()) - os.path.getmtime(filename) < 24*3600: + return open(filename) + + src = urllib.urlopen(url) + dst = open(filename, "w") + shutil.copyfileobj(src, dst) + src.close() + dst.close() + + return open(filename) + +def grab_xml(url): + f = urlopen(url) + doc = etree.parse(f) + f.close() + return doc + +def grab_json(url): + f = urlopen(url) + doc = json.load(f) + f.close() + return doc + +def download_rtmp(filename, vbase, vpath): + if vpath.endswith(".flv"): + vpath = vpath[:-4] + cmd = [ + "rtmpdump", + "-o", filename, + "-r", vbase, + "-y", vpath, + ] + try: + p = subprocess.Popen(cmd) + ret = p.wait() + if ret != 0: + print >>sys.stderr, "rtmpdump exited with error code:", ret + return False + except OSError, e: + print >>sys.stderr, "Failed to run rtmpdump!", e + return False + except KeyboardInterrupt: + print "Cancelled", cmd + try: + p.terminate() + p.wait() + except KeyboardInterrupt: + p.send_signal(signal.SIGKILL) + p.wait() + +def download_urllib(filename, url): + print "Downloading: %s -> %s" % (url, filename) + try: + src = urllib.urlopen(url) + dst = open(filename, "w") + shutil.copyfileobj(src, dst) + return True + except KeyboardInterrupt: + print "\nCancelled", url + finally: + src.close() + dst.close() + return False + diff --git a/grabber.py b/grabber.py new file mode 100755 index 0000000..b4b8310 --- /dev/null +++ b/grabber.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# vim:ts=4:sts=4:sw=4:noet + +from common import load_root_node +import sys + +def choose(options, allow_multi): + skeys = sorted(options.keys()) + for i, key in enumerate(skeys): + print " %d) %s" % (i+1, key) + print " 0) Back" + while True: + try: + values = map(int, raw_input("Choose> ").split()) + if len(values) == 0: + continue + if 0 in values: + return + values = [options[skeys[value-1]] for value in values] + if allow_multi: + return values + else: + if len(values) == 1: + return values[0] + except ValueError, IndexError: + print >>sys.stderr, "Invalid input, please try again" + pass + +def main(): + print "Loading episode data...", + sys.stdout.flush() + node = load_root_node() + print "done" + while True: + options = {} + will_download = True + for n in node.children: + options[n.title] = n + if not n.can_download: + will_download = False + result = choose(options, allow_multi=will_download) + if result is None: + if node.parent is not None: + node = node.parent + else: + break + elif will_download: + for n in result: + if not n.download(): + raw_input("Press return to continue...\n") + else: + node = result + +if __name__ == "__main__": + try: + main() + except (KeyboardInterrupt, EOFError): + print "\nExiting..." + diff --git a/iview.py b/iview.py new file mode 100644 index 0000000..93dbc12 --- /dev/null +++ b/iview.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# vim:ts=4:sts=4:sw=4:noet + +from common import grab_xml, grab_json, download_rtmp, Node + +BASE_URL = "http://www.abc.net.au/iview/" +CONFIG_URL = BASE_URL + "xml/config.xml" +HASH_URL = BASE_URL + "images/iview.jpg" +NS = { + "auth": "http://www.abc.net.au/iView/Services/iViewHandshaker", +} + +class IviewNode(Node): + def __init__(self, title, parent, vpath): + Node.__init__(self, title, parent) + self.vpath = vpath + self.can_download = True + + def download(self): + auth_doc = grab_xml(PARAMS["auth"]) + vbase = auth_doc.xpath("//auth:server/text()", namespaces=NS)[0] + token = auth_doc.xpath("//auth:token/text()", namespaces=NS)[0] + vbase += "?auth=" + token + vpath, ext = self.vpath.rsplit(".", 1) + vpath = ext + ":" + vpath + filename = self.title + "." + ext + download_rtmp(filename, vbase, vpath) + + +def fill_nodes(root_node): + config_doc = grab_xml(CONFIG_URL) + global PARAMS + PARAMS = dict((p.attrib["name"], p.attrib["value"]) for p in config_doc.xpath("/config/param")) + + categories_doc = grab_xml(BASE_URL + PARAMS["categories"]) + categories_map = {} + for category in categories_doc.xpath("//category[@genre='true']"): + cid = category.attrib["id"] + category_name = category.xpath("name/text()")[0] + category_node = Node(category_name, root_node) + categories_map[cid] = category_node + + # Create a duplicate of each series within each category that it appears + series_list_doc = grab_json(PARAMS["api"] + "seriesIndex") + for series in series_list_doc: + categories = series["e"].split() + sid = series["a"] + series_title = series["b"].replace("&", "&") + series_nodes = [] + for cid in categories: + category_node = categories_map.get(cid, None) + if category_node: + series_nodes.append(Node(series_title, category_node)) + series_doc = grab_json(PARAMS["api"] + "series=" + sid)[0] + for episode in series_doc["f"]: + vpath = episode["n"] + episode_title = episode["b"].strip() + if series_title != episode_title: + episode_title = series_title + " " + episode_title + for series_node in series_nodes: + IviewNode(episode_title, series_node, vpath) + + diff --git a/sbs.py b/sbs.py new file mode 100644 index 0000000..40da84e --- /dev/null +++ b/sbs.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# vim:ts=4:sts=4:sw=4:noet + +from common import grab_xml as _grab_xml, download_rtmp, download_urllib, Node + + +BASE_URL = "http://player.sbs.com.au" + +def grab_xml(path): + return _grab_xml(BASE_URL + path) + +class SbsNode(Node): + def __init__(self, title, parent, video_desc_url): + Node.__init__(self, title, parent) + self.video_desc_url = video_desc_url + self.can_download = True + + def download(self): + video = grab_xml(self.video_desc_url) + vbase = video.xpath("//meta/@base")[0] + bestrate = 0 + bestvpath = None + for vpath in video.xpath("//switch/video"): + rate = float(vpath.xpath("@system-bitrate")[0]) + if rate > bestrate: + bestrate = rate + bestvpath = vpath.xpath("@src")[0] + filename = self.title + "." + bestvpath.rsplit(".", 1)[1] + if vbase.startswith("rtmp://"): + return download_rtmp(filename, vbase, bestvpath) + else: + return download_urllib(filename, vbase + bestvpath) + + +def fill_nodes(root_node): + settings = grab_xml("/playerassets/programs/config/standalone_settings.xml") + menu_url = settings.xpath("/settings/setting[@name='menuURL']/@value")[0] + + root_menu = grab_xml(menu_url) + seen_category_titles = set() + for menu in root_menu.xpath("//menu"): + try: + category_title = menu.xpath("title/text()")[0] + playlist_url = menu.xpath("playlist/@xmlSrc")[0] + if category_title in seen_category_titles: + # append a number to the name + i = 2 + while True: + if (category_title+str(i)) not in seen_category_titles: + category_title += str(i) + break + i += 1 + seen_category_titles.add(category_title) + category_node = Node(category_title, root_node) + playlist = grab_xml(playlist_url) + for video_desc in playlist.xpath("//video"): + video_desc_url = video_desc.xpath("@src")[0] + video_title = video_desc.xpath("title/text()")[0].strip() + SbsNode(video_title, category_node, video_desc_url) + except IndexError: + continue + -- 2.39.2