Skip to content

Instantly share code, notes, and snippets.

@carletes
Created July 19, 2010 15:45
Show Gist options
  • Save carletes/481575 to your computer and use it in GitHub Desktop.
Save carletes/481575 to your computer and use it in GitHub Desktop.
import tempfile
import os
import sys
from twisted.internet import defer, protocol, reactor
from twisted.python import log
from twisted.web import client, http_headers
def download(urls, offset, length):
"""Downloads a fragment of a HTTP resource from a list of candidate URLs,
and saves it to a file.
``urls`` is an iterable of URLs under which the fragment may be found.
This function tries to download the resource from the first URL, then the
second, and so on.
Returns a deferred, whose callback will be fired with the name of the file
containing the downloaded fragment.
"""
agent = client.Agent(reactor)
r = "bytes=%d-%d" % (offset, offset + length - 1)
headers = http_headers.Headers({"Range": [r]})
log.msg("Trying %s" % (urls[0],))
d = agent.request("GET", urls[0], headers)
def headers_ready(response, ret):
msg = "%d %s" % (response.code, response.phrase)
if response.code not in (200, 206):
log.msg("Bad response: % s" % (msg,))
raise Exception(msg)
log.msg(msg)
response.deliverBody(FileWriter(ret))
def error(fail, ret, urls, offset, length):
if not urls:
return Exception("Resource unavailable")
d = download(urls, offset, length)
d.chainDeferred(ret)
ret = defer.Deferred()
d.addCallback(headers_ready, ret)
d.addErrback(error, ret, urls[1:], offset, length)
return ret
class FileWriter(protocol.Protocol):
def __init__(self, finished):
self.fd, self.fname = tempfile.mkstemp()
self.finished = finished
def dataReceived(self, data):
try:
log.msg("Got %d bytes" % (len(data),))
os.write(self.fd, data)
except Exception, exc:
log.err()
self.transport.stopProducing()
self.close()
self.unlink()
self.finished.errback(exc)
def connectionLost(self, reason):
self.close()
if isinstance(reason.value, client.ResponseDone):
log.msg("Done")
self.finished.callback(self.fname)
else:
log.msg("Connection error: %s" % (reason.getErrorMessage()))
self.unlink()
self.finished.errback(reason)
def close(self):
try:
os.close(self.fd)
except:
pass
def unlink(self):
try:
os.unlink(self.fname)
except:
pass
def main():
log.startLogging(sys.stdout)
try:
host = sys.argv[1]
except:
host = "google.com"
d = download(["http://%s/nope" % (host,),
"http://%s/neither-here" % (host,),
"http://%s/" % (host,)],
offset=17, length=42)
def _ok(fname):
log.msg("URL downloaded to %s" % (fname,))
def _stop(_):
reactor.stop()
d.addCallback(_ok)
d.addBoth(_stop)
if __name__ == "__main__":
reactor.callWhenRunning(main)
reactor.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment