From 28b806bd28a74527660fa5c11dacf9c4f8c526e3 Mon Sep 17 00:00:00 2001 From: James Bunton Date: Sat, 30 May 2015 13:19:55 +1000 Subject: [PATCH] python3 compatibility --- README.md | 18 ++++---- autograbber.py | 19 ++++---- autosocks.py | 8 ++-- brightcove.py | 6 +-- common.py | 111 ++++++++++++++++++++++++++-------------------- grabber.py | 20 ++++++--- iview.py | 8 ++-- python2_compat.py | 25 +++++++++++ sbs.py | 8 ++-- 9 files changed, 133 insertions(+), 90 deletions(-) create mode 100644 python2_compat.py diff --git a/README.md b/README.md index 321608e..2f34c99 100644 --- a/README.md +++ b/README.md @@ -5,32 +5,30 @@ WebDL is a set of Python scripts to grab video from online Free To Air Australia ## Requirements * [Livestreamer](http://docs.livestreamer.io/install.html) -* python3-pycrypto -- Livestreamer needs this for some videos -* [rtmpdump](https://rtmpdump.mplayerhq.hu) -* python 2.7 (webdl doesn't work with python3 yet) -* python2-lxml (for python2) +* python 2.7 or 3.2+ +* pycrypto -- Livestreamer needs this for some videos +* python-lxml * ffmpeg / libav-tools ## Instructions ### Arch Linux - pacman -S livestreamer python-crypto python2-lxml rtmpdump ffmpeg + pacman -S livestreamer python-crypto python-lxml ffmpeg ### Ubuntu - apt-get install livestreamer python3-crypto python-lxml rtmpdump libav-tools + apt-get install livestreamer python-crypto python-lxml libav-tools ### Mac OS X Warning, this is untested! - brew install python3 python rtmpdump ffmpeg - pip3 install livestreamer pycrypto - pip install lxml + brew install python3 ffmpeg + pip3 install livestreamer pycrypto lxml ### Then get WebDL itself hg clone https://bitbucket.org/delx/webdl cd webdl - ./grabber.py + python3 ./grabber.py ## Bug reports diff --git a/autograbber.py b/autograbber.py index 84b8469..1f87ceb 100755 --- a/autograbber.py +++ b/autograbber.py @@ -1,7 +1,8 @@ -#!/usr/bin/python2 +#!/usr/bin/env python from common import load_root_node import fnmatch +import logging import os import sys @@ -21,10 +22,10 @@ class DownloadList(object): try: self.f = open(filename, "r") for line in self.f: - self.seen_list.add(line.decode("utf-8").strip()) + self.seen_list.add(line.strip()) self.f.close() - except Exception, e: - print >>sys.stderr, "Could not open:", filename, e + except Exception as e: + logging.error("Could not open: %s -- %s", filename, e) self.f = open(filename, "a") def has_seen(self, node): @@ -32,7 +33,7 @@ class DownloadList(object): def mark_seen(self, node): self.seen_list.add(node.title) - self.f.write(node.title.encode("utf-8") + "\n") + self.f.write(node.title + "\n") self.f.flush() @@ -42,11 +43,11 @@ def match(download_list, node, pattern, count=0): if node.download(): download_list.mark_seen(node) else: - print >>sys.stderr, "Failed to download!", node.title + logging.error("Failed to download! %s", node.title) return if count >= len(pattern): - print "No match found for pattern:", "/".join(pattern) + logging.error("No match found for pattern:", "/".join(pattern)) return p = pattern[count] for child in node.get_children(): @@ -68,10 +69,10 @@ if __name__ == "__main__": destdir = os.path.abspath(sys.argv[1]) patternfile = os.path.abspath(sys.argv[2]) except IndexError: - print >>sys.stderr, "Usage: %s destdir patternfile" % sys.argv[0] + print("Usage: %s destdir patternfile" % sys.argv[0]) sys.exit(1) try: main(destdir, patternfile) except (KeyboardInterrupt, EOFError): - print "\nExiting..." + print("\nExiting...") diff --git a/autosocks.py b/autosocks.py index 329f073..68018e3 100644 --- a/autosocks.py +++ b/autosocks.py @@ -1,5 +1,5 @@ +import logging import subprocess -import sys def detect_gnome(): @@ -60,8 +60,8 @@ def configure_socks(host, port): try: import socks, socket except ImportError: - print >>sys.stderr, "Failed to use configured SOCKS proxy:", host, port - print >>sys.stderr, "Try installing SocksiPy: http://socksipy.sf.net" + logging.error("Failed to use configured SOCKS proxy: %s:%s", host, port) + logging.error("Try installing SocksiPy: http://socksipy.sf.net") return False socket.socket = socks.socksocket @@ -80,7 +80,7 @@ def try_autosocks(): host, port = None, None try: host, port = func() - except Exception, e: + except Exception as e: pass if host is not None and port is not None: return configure_socks(host, port) diff --git a/brightcove.py b/brightcove.py index 7bb5737..082c92a 100644 --- a/brightcove.py +++ b/brightcove.py @@ -1,3 +1,4 @@ +import logging import re import sys @@ -26,7 +27,7 @@ class BrightcoveVideoNode(Node): doc = grab_json(desc_url, 3600) video_url = doc["HLSURL"] if not video_url: - print("No HLS stream available for: " + self.title) + logging.error("No HLS stream available for: %s", self.title) return False filename = self.title + ".ts" @@ -50,8 +51,6 @@ class BrightcoveRootNode(Node): def fill_children(self): page_number = 0 while page_number < 100: - sys.stdout.write(".") - sys.stdout.flush() url = self.get_all_videos_url(page_number) page_number += 1 @@ -62,7 +61,6 @@ class BrightcoveRootNode(Node): for video_desc in items: self.process_video(video_desc) - print def process_video(self, video_desc): if not video_desc["customFields"]: diff --git a/common.py b/common.py index 3370b36..daede56 100644 --- a/common.py +++ b/common.py @@ -1,21 +1,19 @@ -from lxml import etree, html -import cookielib +import python2_compat + +import hashlib +import http.cookiejar import json -try: - import hashlib -except ImportError: - import md5 as hashlib +import logging +import lxml.etree +import lxml.html import os import re import shutil import signal import subprocess -import sys -import tempfile import time -import urllib -import urllib2 -import urlparse +import urllib.parse +import urllib.request try: @@ -24,9 +22,20 @@ try: except ImportError: pass -CACHE_DIR = os.path.join(os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), "webdl") + +logging.basicConfig( + format = "%(levelname)s %(message)s", + level = logging.INFO if os.environ.get("DEBUG", None) is None else logging.DEBUG, +) + +CACHE_DIR = os.path.join( + os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), + "webdl" +) + USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0" + class Node(object): def __init__(self, title, parent=None): self.title = title @@ -64,39 +73,42 @@ def load_root_node(): valid_chars = frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") def sanify_filename(filename): - filename = filename.encode("ascii", "ignore") filename = "".join(c for c in filename if c in valid_chars) + assert len(filename) > 0 return filename -cookiejar = cookielib.CookieJar() -urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) +cookiejar = http.cookiejar.CookieJar() +urlopener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookiejar)) def _urlopen(url, referrer=None): - req = urllib2.Request(url) + req = urllib.request.Request(url) req.add_header("User-Agent", USER_AGENT) if referrer: req.add_header("Referer", referrer) return urlopener.open(req) def urlopen(url, max_age): -### print url + logging.debug("urlopen(%r, %r)", url, max_age) + if not os.path.isdir(CACHE_DIR): os.makedirs(CACHE_DIR) if max_age <= 0: return _urlopen(url) - filename = hashlib.md5(url).hexdigest() + filename = hashlib.md5(url.encode("utf-8")).hexdigest() filename = os.path.join(CACHE_DIR, filename) if os.path.exists(filename): file_age = int(time.time()) - os.path.getmtime(filename) if file_age < max_age: - return open(filename) + logging.debug("loading from cache: %s", filename) + return open(filename, "rb") + logging.debug("downloading: %s -> %s", url, filename) src = _urlopen(url) dst = open(filename, "wb") try: shutil.copyfileobj(src, dst) - except Exception, e: + except Exception as e: try: os.unlink(filename) except OSError: @@ -105,7 +117,7 @@ def urlopen(url, max_age): src.close() dst.close() - return open(filename) + return open(filename, "rb") def grab_text(url, max_age): f = urlopen(url, max_age) @@ -115,45 +127,47 @@ def grab_text(url, max_age): def grab_html(url, max_age): f = urlopen(url, max_age) - doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True)) + doc = lxml.html.parse(f, lxml.html.HTMLParser(encoding="utf-8", recover=True)) f.close() return doc def grab_xml(url, max_age): f = urlopen(url, max_age) - doc = etree.parse(f, etree.XMLParser(encoding="utf-8", recover=True)) + doc = lxml.etree.parse(f, lxml.etree.XMLParser(encoding="utf-8", recover=True)) f.close() return doc def grab_json(url, max_age, skip_assignment=False, skip_function=False): f = urlopen(url, max_age) + text = f.read().decode("utf-8") + if skip_assignment: - text = f.read() pos = text.find("=") - doc = json.loads(text[pos+1:]) + text = text[pos+1:] + elif skip_function: - text = f.read() pos = text.find("(") rpos = text.rfind(")") - doc = json.loads(text[pos+1:rpos]) - else: - doc = json.load(f) + text = text[pos+1:rpos] + + doc = json.loads(text) f.close() return doc def exec_subprocess(cmd): + logging.debug("Executing: %s", cmd) try: p = subprocess.Popen(cmd) ret = p.wait() if ret != 0: - print >>sys.stderr, cmd[0], "exited with error code:", ret + logging.error("%s exited with error code: %s", cmd[0], ret) return False else: return True - except OSError, e: - print >>sys.stderr, "Failed to run", cmd[0], e + except OSError as e: + logging.error("Failed to run: %s -- %s", cmd[0], e) except KeyboardInterrupt: - print "Cancelled", cmd + logging.info("Cancelled: %s", cmd) try: p.terminate() p.wait() @@ -194,7 +208,7 @@ def generate_remux_cmd(infile, outfile): raise Exception("You must install ffmpeg or libav-tools") def remux(infile, outfile): - print "Converting %s to mp4" % infile + logging.info("Converting %s to mp4", infile) cmd = generate_remux_cmd(infile, outfile) if not exec_subprocess(cmd): # failed, error has already been logged @@ -206,18 +220,18 @@ def remux(infile, outfile): os.unlink(infile) return True else: - print >>sys.stderr, "The size of", outfile, "is suspicious, did avconv fail?" + logging.error("The size of %s is suspicious, did the remux fail?", outfile) return False - except Exception, e: - print >>sys.stderr, "Conversion failed", e + except Exception as e: + logging.error("Conversion failed! %s", e) return False def convert_to_mp4(filename): - with open(filename) as f: + with open(filename, "rb") as f: fourcc = f.read(4) basename, ext = os.path.splitext(filename) - if ext == ".mp4" and fourcc == "FLV\x01": + if ext == ".mp4" and fourcc == b"FLV\x01": os.rename(filename, basename + ".flv") ext = ".flv" filename = basename + ext @@ -231,7 +245,7 @@ def convert_to_mp4(filename): def download_hds(filename, video_url, pvswf=None): filename = sanify_filename(filename) - print "Downloading: %s" % filename + logging.info("Downloading: %s", filename) video_url = video_url.replace("http://", "hds://") if pvswf: @@ -253,7 +267,8 @@ def download_hds(filename, video_url, pvswf=None): def download_hls(filename, video_url): filename = sanify_filename(filename) video_url = video_url.replace("http://", "hlsvariant://") - print "Downloading: %s" % filename + logging.info("Downloading: %s", filename) + cmd = [ "livestreamer", "-o", filename, @@ -275,7 +290,7 @@ def natural_sort(l, key=None): for c in re.split("([0-9]+)", k): c = c.strip() if c.isdigit(): - newk.append(int(c)) + newk.append(c.zfill(5)) else: for subc in c.split(): if subc not in ignore_list: @@ -285,14 +300,14 @@ def natural_sort(l, key=None): return sorted(l, key=key_func) def append_to_qs(url, params): - r = list(urlparse.urlsplit(url)) - qs = urlparse.parse_qs(r[3]) - for k, v in params.iteritems(): + r = list(urllib.parse.urlsplit(url)) + qs = urllib.parse.parse_qs(r[3]) + for k, v in params.items(): if v is not None: qs[k] = v - elif qs.has_key(k): + elif k in qs: del qs[k] - r[3] = urllib.urlencode(qs, True) - url = urlparse.urlunsplit(r) + r[3] = urllib.parse.urlencode(sorted(qs.items()), True) + url = urllib.parse.urlunsplit(r) return url diff --git a/grabber.py b/grabber.py index 61cfcec..72b39b2 100755 --- a/grabber.py +++ b/grabber.py @@ -1,17 +1,23 @@ -#!/usr/bin/python2 +#!/usr/bin/env python from common import load_root_node, natural_sort -import sys + +# Python2 compatibility +try: + raw_input +except NameError: + raw_input = input + def choose(options, allow_multi): reverse_map = {} for i, (key, value) in enumerate(options): - print "%3d) %s" % (i+1, key.encode('utf-8')) + print("%3d) %s" % (i+1, key)) reverse_map[i+1] = value - print " 0) Back" + print(" 0) Back") while True: try: - values = map(int, raw_input("Choose> ").split()) + values = list(map(int, raw_input("Choose> ").split())) if len(values) == 0: continue if 0 in values: @@ -23,7 +29,7 @@ def choose(options, allow_multi): if len(values) == 1: return values[0] except (ValueError, IndexError): - print >>sys.stderr, "Invalid input, please try again" + print("Invalid input, please try again") pass def main(): @@ -54,5 +60,5 @@ if __name__ == "__main__": try: main() except (KeyboardInterrupt, EOFError): - print "\nExiting..." + print("\nExiting...") diff --git a/iview.py b/iview.py index d0e0e58..03f730b 100644 --- a/iview.py +++ b/iview.py @@ -1,5 +1,5 @@ from common import grab_json, grab_xml, Node, download_hls -import urlparse +import urllib.parse API_URL = "http://iview.abc.net.au/api" AUTH_URL = "http://iview.abc.net.au/auth" @@ -30,13 +30,13 @@ class IviewEpisodeNode(Node): } token = auth_doc.xpath("//auth:tokenhd/text()", namespaces=NS)[0] token_url = auth_doc.xpath("//auth:server/text()", namespaces=NS)[0] - token_hostname = urlparse.urlparse(token_url).netloc + token_hostname = urllib.parse.urlparse(token_url).netloc return token, token_hostname def add_auth_token_to_url(self, video_url, token, token_hostname): - parsed_url = urlparse.urlparse(video_url) + parsed_url = urllib.parse.urlparse(video_url) hacked_url = parsed_url._replace(netloc=token_hostname, query="hdnea=" + token) - video_url = urlparse.urlunparse(hacked_url) + video_url = urllib.parse.urlunparse(hacked_url) return video_url def download(self): diff --git a/python2_compat.py b/python2_compat.py new file mode 100644 index 0000000..055cbbf --- /dev/null +++ b/python2_compat.py @@ -0,0 +1,25 @@ +import sys + +# Python 2.7 compatibility +try: + import cookielib + import urllib + import urllib2 + import urlparse + + class dummy_http_module: + cookiejar = cookielib + sys.modules['http'] = dummy_http_module + sys.modules['http.cookiejar'] = dummy_http_module.cookiejar + + class dummy_urllib_module: + parse = urlparse + request = urllib2 + sys.modules['urllib'] = dummy_urllib_module + sys.modules['urllib.parse'] = dummy_urllib_module.parse + sys.modules['urllib.request'] = dummy_urllib_module.request + dummy_urllib_module.parse.urlencode = urllib.urlencode + +except ImportError: + pass + diff --git a/sbs.py b/sbs.py index bf59941..d95a4a6 100644 --- a/sbs.py +++ b/sbs.py @@ -3,7 +3,7 @@ from common import grab_text, grab_html, grab_json, grab_xml, download_hds, Node, append_to_qs import collections -import urlparse +import urllib.parse BASE = "http://www.sbs.com.au" VIDEO_MENU = BASE + "/ondemand/js/video-menu" @@ -32,11 +32,11 @@ class SbsNode(Node): doc = grab_html(VIDEO_URL % self.video_id, 0) meta_video = doc.xpath("//meta[@property='og:video']")[0] swf_url = meta_video.attrib["content"] - swf_url_qs = urlparse.parse_qs(urlparse.urlparse(swf_url).query) + swf_url_qs = urllib.parse.parse_qs(urllib.parse.urlparse(swf_url).query) desc_url = swf_url_qs["v"][0] doc = grab_text(desc_url, 0) - doc_qs = urlparse.parse_qs(doc) + doc_qs = urllib.parse.parse_qs(doc) desc_url = doc_qs["releaseUrl"][0] doc = grab_xml(desc_url, 0) @@ -91,7 +91,7 @@ class SbsRootNode(Node): SbsNavNode(name, parent, url) def fill_category(self, parent, cat_data): - if not cat_data.has_key("children"): + if "children" not in cat_data: name = cat_data["name"] self.create_nav_node(name, parent, cat_data, "url") return -- 2.39.2