+++ /dev/null
-#!/usr/bin/env python2.4
-
-import GregDebug, base64, os, sys, urlparse
-
-from twisted.internet import reactor, protocol
-from twisted.web.client import HTTPClientFactory
-from twisted.web.http import HTTPClient
-from twisted.web.client import _parse as parseURL
-
-__all__ = ('downloadURL', )
-
-def parseURL(url, defaultPort = None):
- """Based on twisted.web.client._parse"""
- parsed = urlparse.urlparse(url)
- scheme = parsed[0]
- path = urlparse.urlunparse(('','')+parsed[2:])
- if defaultPort is None:
- if scheme == 'https':
- defaultPort = 443
- else:
- defaultPort = 80
- host, port = parsed[1], defaultPort
-
- if '@' in host:
- authUser, host = host.split('@', 1)
- auth = (authUser, )
- if ':' in authUser:
- auth = tuple(authUser.split(':', 1))
- else:
- auth = None
-
- if ':' in host:
- host, port = host.rsplit(':', 1)
- port = int(port)
-
- return scheme, auth, host, port, path
-
-class HTTPProxyFactory(protocol.ClientFactory):
- def __init__(self, realFactory, proxyServer, proxyMethod = 'GET', proxyPassword = None):
- self.realFactory = realFactory
- self.proxyHost, self.proxyPort = proxyServer
- self.proxyMethod = proxyMethod
- self.proxyPassword = proxyPassword
-
- def buildProtocol(self, addr):
- protocol = HTTPProxyProtocol(self, self.realFactory.buildProtocol(addr) )
- return protocol
-
- def __getattr__(self, key):
- return getattr(self.realFactory, key)
-
-class HTTPProxyProtocol(protocol.Protocol):
- def __init__(self, factory, proxied):
- self.factory = factory
- self.proxied = proxied
- self.proxyPassword = factory.proxyPassword
- if self.proxyPassword is not None:
- self.proxyPassword = base64.standard_b64encode('%s:%s' % self.proxyPassword)
- if factory.proxyMethod == 'GET':
- self.__connectionMade = self.__connectionMade_GET
- else:
- raise NotImplementedError
-
- def __send(self, value):
- self.transport.write(value)
-
- def __getTransportWrites(self, function, *args, **kwargs):
- temp = self.transport.write
- request = []
- self.transport.write = lambda data: request.append(data)
- function(*args, **kwargs)
- self.proxied.connectionMade()
- self.transport.write = temp
- return request
-
- def __connectionMade_GET(self):
- self.factory.realFactory.path = self.factory.realFactory.url
- self.proxied.makeConnection(self.transport)
-
- self.__send('GET %s HTTP/1.0\r\n' % self.factory.realFactory.url)
- if self.proxyPassword is not None:
- self.__send('Proxy-Authorization: Basic %s\r\n' % self.proxyPassword)
-
- # Remove the real http client's get request
- for line in self.__getTransportWrites(self.proxied.connectionMade)[1:]:
- self.__send(line)
-
- def connectionMade(self):
- self.proxied.transport = self.transport
- self.__connectionMade()
-
- def dataReceived(self, data):
- self.proxied.dataReceived(data)
-
- def connectionLost(self, reason):
- self.proxied.connectionLost(reason)
-
-proxies = {}
-def downloadURL(url, method = 'GET', successBack = None, errorBack = None):
- factory = HTTPClientFactory(url, method = method)
- scheme, auth, host, port, path = parseURL(url)
- if successBack is not None:
- factory.deferred.addCallback(successBack)
- if errorBack is not None:
- factory.deferred.addErrback(errorBack)
- if scheme in proxies:
- (host, port), password, factory_type = proxies[scheme]
- # Change the factory to the proxies one
- factory = factory_type(realFactory = factory, proxyServer = (host, port), proxyMethod = method, proxyPassword = password)
-
- reactor.connectTCP(host, port, factory)
- return factory
-
-# Note: Does not currently honor the no-proxy variable
-def parseProxies():
- for k,v in ( (k,v) for k,v in os.environ.items() if v and k.endswith('_proxy')):
- proxy_type = k[:-len('_proxy')]
- if proxy_type == 'http':
- _, auth, host, port, _ = parseURL(v)
- proxies[proxy_type] = (host, port), auth, HTTPProxyFactory
-
-def main(urls):
- def summerise(string, summerisedLen = 100):
- if len(string) <= summerisedLen:
- return string
- else:
- summerisedLen -= 5
- start = summerisedLen // 2
- return '%s ... %s' % (string[:start], string[-(summerisedLen - start):])
-
- def s(data):
- print 'Success: "%r"' % summerise(data)
-### print 'factory: (\n\t%s\n)' % '\n\t'.join('%s:%s' % (attr, getattr(factory, attr)) for attr in dir(factory))
-
- def e(data):
- print data
-
- for url in urls:
- factory = downloadURL(url, successBack = s, errorBack = e)
- reactor.run()
-
-# Parse the environment variables for proxy servers
-parseProxies()
-if __name__ == "__main__":
- main(sys.argv[1:])