Created
August 1, 2012 18:44
-
-
Save migurski/3229707 to your computer and use it in GitHub Desktop.
RemoteFileObject is a simple mapping from HTTP Range headers to a local file-like object
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import time | |
from urlparse import urlparse | |
from httplib import HTTPConnection | |
from os.path import basename | |
from cStringIO import StringIO | |
from datetime import timedelta | |
from os import SEEK_SET, SEEK_CUR, SEEK_END | |
class RemoteFileObject: | |
""" Implement enough of this to be useful: | |
http://docs.python.org/release/2.5.2/lib/bltin-file-objects.html | |
Pull data from a remote URL with HTTP range headers. | |
""" | |
def __init__(self, url, verbose=False, block_size=(16 * 1024)): | |
self.verbose = verbose | |
# scheme://host/path;parameters?query#fragment | |
(scheme, host, path, parameters, query, fragment) = urlparse(url) | |
self.host = host | |
self.rest = path + (query and ('?' + query) or '') | |
self.offset = 0 | |
self.length = self.get_length() | |
self.chunks = {} | |
self.block_size = block_size | |
self.start_time = time() | |
def get_length(self): | |
""" | |
""" | |
conn = HTTPConnection(self.host) | |
conn.request('GET', self.rest, headers={'Range': '0-1'}) | |
length = int(conn.getresponse().getheader('content-length')) | |
if self.verbose: | |
print >> stderr, length, 'bytes in', basename(self.rest) | |
return length | |
def get_range(self, start, end): | |
""" | |
""" | |
headers = {'Range': 'bytes=%(start)d-%(end)d' % locals()} | |
conn = HTTPConnection(self.host) | |
conn.request('GET', self.rest, headers=headers) | |
return conn.getresponse().read() | |
def read(self, count=None): | |
""" Read /count/ bytes from the resource at the current offset. | |
""" | |
if count is None: | |
# to the end | |
count = self.length - self.offset | |
out = StringIO() | |
while count: | |
chunk_offset = self.block_size * (self.offset / self.block_size) | |
if chunk_offset not in self.chunks: | |
range = chunk_offset, min(self.length, self.offset + self.block_size) - 1 | |
self.chunks[chunk_offset] = StringIO(self.get_range(*range)) | |
if self.verbose: | |
loaded = float(self.block_size) * len(self.chunks) / self.length | |
expect = (time() - self.start_time) / loaded | |
remain = max(0, int(expect * (1 - loaded))) | |
print >> stderr, '%.1f%%' % min(100, 100 * loaded), | |
print >> stderr, 'of', basename(self.rest), | |
print >> stderr, 'with', timedelta(seconds=remain), 'to go' | |
chunk = self.chunks[chunk_offset] | |
in_chunk_offset = self.offset % self.block_size | |
in_chunk_count = min(count, self.block_size - in_chunk_offset) | |
chunk.seek(in_chunk_offset, SEEK_SET) | |
out.write(chunk.read(in_chunk_count)) | |
count -= in_chunk_count | |
self.offset += in_chunk_count | |
out.seek(0) | |
return out.read() | |
def seek(self, offset, whence=SEEK_SET): | |
""" Seek to the specified offset. | |
/whence/ behaves as with other file-like objects: | |
http://docs.python.org/lib/bltin-file-objects.html | |
""" | |
if whence == SEEK_SET: | |
self.offset = offset | |
elif whence == SEEK_CUR: | |
self.offset += offset | |
elif whence == SEEK_END: | |
self.offset = self.length + offset | |
def tell(self): | |
return self.offset |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment