Created
August 5, 2023 12:47
-
-
Save NyaMisty/d515964c40ed941b3aa7f8019657c532 to your computer and use it in GitHub Desktop.
Python fileobj from HTTP, with ranged request!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import requests | |
class HTTPFileObj: | |
def __init__(self, url, chunk_size=1024): | |
self.url = url | |
self.chunk_size = chunk_size | |
self.start_byte = 0 # Initialize start_byte here | |
self.buffer = b"" # Initialize buffer to store remaining data | |
self.max_size = -1 | |
self.eof = False | |
self._download() | |
def _download(self): | |
self.response = self._get_response() | |
self.remaining_bytes = int(self.response.headers.get("Content-Length", 0)) | |
if self.max_size < 0: | |
self.max_size = self.remaining_bytes | |
self.content = self.response.iter_content(chunk_size=self.chunk_size) | |
def _get_response(self): | |
headers = {"Range": f"bytes={self.start_byte}-"} | |
return requests.get(self.url, headers=headers, stream=True) | |
def seek(self, offset, whence=io.SEEK_SET): | |
originalStart = self.start_byte | |
if whence == io.SEEK_SET: | |
new_start_byte = offset | |
elif whence == io.SEEK_CUR: | |
new_start_byte += offset | |
elif whence == io.SEEK_END: | |
new_start_byte = self.max_size + offset | |
else: | |
raise Exception("lalala") | |
self.start_byte = new_start_byte | |
#print(new_start_byte, self.max_size) | |
if self.start_byte >= self.max_size: | |
self.eof = True | |
self.buffer = b"" | |
self.remaining_bytes -= self.start_byte - originalStart | |
elif 0 < self.start_byte - originalStart < len(self.buffer): | |
bufoff = self.start_byte - originalStart | |
self.buffer = self.buffer[bufoff:] | |
self.remaining_bytes -= bufoff | |
else: | |
self.eof = False | |
self.buffer = b"" | |
self._download() | |
return self.start_byte | |
def read(self, size=-1): | |
if self.eof: | |
return b"" | |
data = self.buffer[:size] | |
self.buffer = self.buffer[size:] | |
while (size < 0 or len(data) < size) and self.remaining_bytes > 0: | |
#print(self.remaining_bytes) | |
try: | |
chunk = next(self.content) | |
except StopIteration: # Reached EOF, attempt to continue with Ranged request | |
print("EOF") | |
self._download() | |
chunk = next(self.content) | |
self.remaining_bytes -= len(chunk) | |
if self.remaining_bytes <= 0: | |
self.eof = True | |
remaining_size = size - len(data) if size >= 0 else len(chunk) | |
if remaining_size < len(chunk): | |
self.buffer = chunk[remaining_size:] # Store remaining data in buffer | |
chunk = chunk[:remaining_size] | |
else: | |
self.buffer = b"" # Clear buffer if all data in chunk is used | |
self.start_byte += len(chunk) # Update start_byte after reading | |
data += chunk | |
#self.content = io.BytesIO() # for simulating StopIteration | |
print("before", self.start_byte) | |
print("after", self.start_byte) | |
return data | |
def close(self): | |
self.response.close() | |
# Example | |
url = "http://127.0.0.1:12343/chfs/shared/test10m" | |
file_obj = HTTPFileObj(url) | |
print(file_obj.read(100).hex()) | |
#open('111', 'wb').write(file_obj.read(10485660)) | |
print(file_obj.seek(10485660)) | |
print(file_obj.read(100).hex()) | |
file_obj.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment