Created
July 19, 2010 15:45
-
-
Save carletes/481575 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tempfile | |
import os | |
import sys | |
from twisted.internet import defer, protocol, reactor | |
from twisted.python import log | |
from twisted.web import client, http_headers | |
def download(urls, offset, length): | |
"""Downloads a fragment of a HTTP resource from a list of candidate URLs, | |
and saves it to a file. | |
``urls`` is an iterable of URLs under which the fragment may be found. | |
This function tries to download the resource from the first URL, then the | |
second, and so on. | |
Returns a deferred, whose callback will be fired with the name of the file | |
containing the downloaded fragment. | |
""" | |
agent = client.Agent(reactor) | |
r = "bytes=%d-%d" % (offset, offset + length - 1) | |
headers = http_headers.Headers({"Range": [r]}) | |
log.msg("Trying %s" % (urls[0],)) | |
d = agent.request("GET", urls[0], headers) | |
def headers_ready(response, ret): | |
msg = "%d %s" % (response.code, response.phrase) | |
if response.code not in (200, 206): | |
log.msg("Bad response: % s" % (msg,)) | |
raise Exception(msg) | |
log.msg(msg) | |
response.deliverBody(FileWriter(ret)) | |
def error(fail, ret, urls, offset, length): | |
if not urls: | |
return Exception("Resource unavailable") | |
d = download(urls, offset, length) | |
d.chainDeferred(ret) | |
ret = defer.Deferred() | |
d.addCallback(headers_ready, ret) | |
d.addErrback(error, ret, urls[1:], offset, length) | |
return ret | |
class FileWriter(protocol.Protocol): | |
def __init__(self, finished): | |
self.fd, self.fname = tempfile.mkstemp() | |
self.finished = finished | |
def dataReceived(self, data): | |
try: | |
log.msg("Got %d bytes" % (len(data),)) | |
os.write(self.fd, data) | |
except Exception, exc: | |
log.err() | |
self.transport.stopProducing() | |
self.close() | |
self.unlink() | |
self.finished.errback(exc) | |
def connectionLost(self, reason): | |
self.close() | |
if isinstance(reason.value, client.ResponseDone): | |
log.msg("Done") | |
self.finished.callback(self.fname) | |
else: | |
log.msg("Connection error: %s" % (reason.getErrorMessage())) | |
self.unlink() | |
self.finished.errback(reason) | |
def close(self): | |
try: | |
os.close(self.fd) | |
except: | |
pass | |
def unlink(self): | |
try: | |
os.unlink(self.fname) | |
except: | |
pass | |
def main(): | |
log.startLogging(sys.stdout) | |
try: | |
host = sys.argv[1] | |
except: | |
host = "google.com" | |
d = download(["http://%s/nope" % (host,), | |
"http://%s/neither-here" % (host,), | |
"http://%s/" % (host,)], | |
offset=17, length=42) | |
def _ok(fname): | |
log.msg("URL downloaded to %s" % (fname,)) | |
def _stop(_): | |
reactor.stop() | |
d.addCallback(_ok) | |
d.addBoth(_stop) | |
if __name__ == "__main__": | |
reactor.callWhenRunning(main) | |
reactor.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment