Created
September 3, 2016 07:48
-
-
Save tremby/92ae3a4e913d261ea5180717c3f3aeb8 to your computer and use it in GitHub Desktop.
Seekable or non-seekable (depending on HTTP server capabilities) file object from URL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
class SeekableURL(object): | |
def __init__(self, url): | |
self._url = url | |
self._pointer = 0 | |
self._filesize = None | |
self._is_seekable = None | |
self._got_stats = False | |
self._stream = None | |
def seek(self, position, whence=0): | |
self._get_stats() | |
if self.seekable(): | |
if whence == 0: | |
self._pointer = position | |
elif whence == 1: | |
self._pointer += position | |
elif whence == 2: | |
self._pointer = self._filesize - 1 - position | |
else: | |
raise ValueError("Unrecognized value {} for whence".format(whence)) | |
else: | |
raise RuntimeError("This stream is not seekable") | |
def _get_stats(self): | |
if self._got_stats: | |
return | |
response = requests.head(self._url, headers={'range': 'bytes=0-0'}) | |
self._got_stats = True | |
self._is_seekable = response.status_code == 206 | |
if response.headers.get('content-range'): | |
self._filesize = int(response.headers.get('content-range').split('/', 1)[1]) | |
elif response.headers.get('content-length'): | |
self._filesize = int(response.headers.get('content-length')) | |
response.raise_for_status() | |
def seekable(self): | |
self._get_stats() | |
return self._is_seekable | |
def size(self): | |
self._get_stats() | |
return self._filesize | |
def read(self, size=-1): | |
# Make sure we know about the server's capabilities | |
self._get_stats() | |
if self._is_seekable: | |
# Request this particular chunk from the HTTP server | |
byte_range = ( | |
self._pointer, | |
'' if size < 0 else min(self._pointer + size, self._filesize) - 1, | |
) | |
response = requests.get(self._url, headers={'range': 'bytes={}-{}'.format(*byte_range)}) | |
self._pointer += int(response.headers.get('content-length')) | |
response.raise_for_status() | |
# Return this chunk | |
return response.content | |
# Range is not supported, so defer to a non-seekable stream | |
if self._stream is None: | |
self._stream = requests.get(self._url, stream=True).raw | |
return self._stream.read(size) | |
def tell(self): | |
return self._pointer |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How do you implement a seekable stream in the server?