import cgi
import itertools
import json
-from lxml.html import document_fromstring, tostring
+from lxml import html
import os
import re
import resource
def print_form(url="", msg=""):
script_url = "http://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
- print "Content-Type: application/xhtml+xml\r\n\r\n"
- print """
+ sys.stdout.write("Content-Type: application/xhtml+xml\r\n\r\n")
+ sys.stdout.write("""
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
browser's bookmarks menu to download the video straight away.</p>
</body>
</html>
-""".replace("{0}", msg).replace("{1}", url).replace("{2}", script_url)
+""".replace("{0}", msg).replace("{1}", url).replace("{2}", script_url))
cookiejar = cookielib.CookieJar()
urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
def parse_url(url):
f = urlopen(url)
- doc = document_fromstring(f.read())
+ doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
f.close()
return doc
mimetype = mimetype.split(";")[0]
if mimetype not in MIMETYPES:
continue
- extension = "." + MIMETYPES[mimetype]
+ extension = MIMETYPES[mimetype]
quality = QUALITIES.get(quality.split(",")[0], -1)
if best_quality is None or quality > best_quality:
if signature:
return best_url, best_extension
+def sanitize_filename(filename):
+ return (
+ re.sub("\s+", " ", filename.strip())
+ .replace("\\", "-")
+ .replace("/", "-")
+ .replace("\0", " ")
+ )
+
def get_video_url(doc):
unavailable = doc.xpath("//div[@id='unavailable-message']/text()")
if unavailable:
return None, None
title = doc.xpath("/html/head/title/text()")[0]
- title = re.sub("\s+", " ", title.strip())
- valid_chars = frozenset("-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
- filename = "".join(c for c in title.encode("ascii", "ignore") if c in valid_chars)
- filename += extension
+ filename = sanitize_filename(title)
+ filename += "." + extension
return video_url, filename
+def write_video(filename, video_data):
+ httpinfo = video_data.info()
+ encoded_filename = urllib.quote(filename.encode("utf-8"))
+ sys.stdout.write("Content-Disposition: attachment; filename*=UTF-8''%s\r\n" % encoded_filename)
+ sys.stdout.write("Content-Length: %s\r\n" % httpinfo.getheader("Content-Length"))
+ sys.stdout.write("\r\n")
+ shutil.copyfileobj(video_data, sys.stdout)
+ video_data.close()
+
def cgimain():
args = cgi.parse()
try:
try:
doc = parse_url(url)
video_url, filename = get_video_url(doc)
- data = urlopen(video_url)
- httpinfo = data.info()
- sys.stdout.write("Content-Disposition: attachment; filename=\"%s\"\r\n" % filename)
- sys.stdout.write("Content-Length: %s\r\n" % httpinfo.getheader("Content-Length"))
- sys.stdout.write("\r\n")
- shutil.copyfileobj(data, sys.stdout)
- data.close()
+ video_data = urlopen(video_url)
+ write_video(filename, video_data)
except VideoUnavailable, e:
print_form(
url=url,