]>
code.delx.au - youtube-cgi/blob - youtube.cgi
7 from lxml
.html
import document_fromstring
, tostring
19 MAX_MEMORY_BYTES
= 128 * 1024*1024
20 USER_AGENT
= "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
35 class VideoUnavailable(Exception):
38 def print_form(url
="", msg
=""):
39 script_url
= "http://%s%s" % (os
.environ
["HTTP_HOST"], os
.environ
["REQUEST_URI"])
40 print "Content-Type: application/xhtml+xml\r\n\r\n"
42 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
43 <html xmlns="http://www.w3.org/1999/xhtml">
45 <title>delx.net.au - YouTube Scraper</title>
46 <link rel="stylesheet" type="text/css" href="/style.css"/>
47 <style type="text/css">
57 <h1>delx.net.au - YouTube Scraper</h1>
59 <form action="" method="get">
60 <p>This page will let you easily download YouTube videos to watch offline. It
61 will automatically grab the highest quality version.</p>
62 <div><input type="text" name="url" value="{1}"/></div>
63 <div><input type="submit" value="Download!"/></div>
65 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='{2}?url='+escape(location);})()">YouTube Download</a>
66 to easily download videos. Right-click the link and add it to bookmarks,
67 then when you're looking at a YouTube page select that bookmark from your
68 browser's bookmarks menu to download the video straight away.</p>
71 """.replace("{0}", msg
).replace("{1}", url
).replace("{2}", script_url
)
73 cookiejar
= cookielib
.CookieJar()
74 urlopener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(cookiejar
))
79 req
= urllib2
.Request(url
)
81 req
.add_header("Referer", referrer
)
83 req
.add_header("User-Agent", USER_AGENT
)
84 return urlopener
.open(req
)
88 doc
= document_fromstring(f
.read())
92 def append_to_qs(url
, params
):
93 r
= list(urlparse
.urlsplit(url
))
94 qs
= urlparse
.parse_qs(r
[3])
96 r
[3] = urllib
.urlencode(qs
, True)
97 url
= urlparse
.urlunsplit(r
)
100 def convert_from_old_itag(player_config
):
101 url_data
= urlparse
.parse_qs(player_config
["args"]["url_encoded_fmt_stream_map"])
103 for itag_url
in url_data
["itag"]:
104 pos
= itag_url
.find("url=")
105 url_data
["url"].append(itag_url
[pos
+4:])
106 player_config
["args"]["url_encoded_fmt_stream_map"] = urllib
.urlencode(url_data
, True)
108 def get_player_config(doc
):
110 for script
in doc
.xpath("//script"):
113 for line
in script
.text
.split("\n"):
114 if "yt.playerConfig =" in line
:
117 if p1
>= 0 and p2
> 0:
118 return json
.loads(line
[p1
+1:p2
])
119 if "'PLAYER_CONFIG': " in line
:
122 player_config
= json
.loads(line
[p1
+1:])
123 convert_from_old_itag(player_config
)
126 def get_best_video(player_config
):
127 url_data
= urlparse
.parse_qs(player_config
["args"]["url_encoded_fmt_stream_map"])
128 url_data
= itertools
.izip_longest(
132 url_data
.get("sig", []),
136 best_extension
= None
137 for video_url
, mimetype
, quality
, signature
in url_data
:
138 mimetype
= mimetype
.split(";")[0]
139 if mimetype
not in MIMETYPES
:
141 extension
= "." + MIMETYPES
[mimetype
]
142 quality
= QUALITIES
.get(quality
.split(",")[0], -1)
143 if best_quality
is None or quality
> best_quality
:
145 video_url
= append_to_qs(video_url
, {"signature": signature
})
147 best_quality
= quality
148 best_extension
= extension
150 return best_url
, best_extension
152 def get_video_url(doc
):
153 unavailable
= doc
.xpath("//div[@id='unavailable-message']/text()")
155 raise VideoUnavailable(unavailable
[0].strip())
157 player_config
= get_player_config(doc
)
158 if not player_config
:
159 raise VideoUnavailable("Could not find video URL")
161 video_url
, extension
= get_best_video(player_config
)
165 title
= doc
.xpath("/html/head/title/text()")[0]
166 title
= re
.sub("\s+", " ", title
.strip())
167 valid_chars
= frozenset("-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
168 filename
= "".join(c
for c
in title
.encode("ascii", "ignore") if c
in valid_chars
)
169 filename
+= extension
171 return video_url
, filename
178 print_form(url
="http://www.youtube.com/watch?v=FOOBAR")
183 video_url
, filename
= get_video_url(doc
)
184 data
= urlopen(video_url
)
185 httpinfo
= data
.info()
186 sys
.stdout
.write("Content-Disposition: attachment; filename=\"%s\"\r\n" % filename
)
187 sys
.stdout
.write("Content-Length: %s\r\n" % httpinfo
.getheader("Content-Length"))
188 sys
.stdout
.write("\r\n")
189 shutil
.copyfileobj(data
, sys
.stdout
)
191 except VideoUnavailable
, e
:
194 msg
="<p class='error'>Sorry, there was an error: %s</p>" % cgi
.escape(e
.message
)
199 msg
="<p class='error'>Sorry, there was an error. Check your URL?</p>"
207 print >>sys
.stderr
, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys
.argv
[0]
210 video_url
, filename
= get_video_url(doc
)
211 data
= urlopen(video_url
)
212 outfile
= open(filename
, "w")
213 shutil
.copyfileobj(data
, outfile
)
218 if __name__
== "__main__":
219 resource
.setrlimit(resource
.RLIMIT_AS
, (MAX_MEMORY_BYTES
, MAX_MEMORY_BYTES
))
220 if os
.environ
.has_key("SCRIPT_NAME"):