-
-
Save mygithub23/51941abb97aa6ddce3b4f557eec1861f to your computer and use it in GitHub Desktop.
Sample code to build a tar chunk-by-chunk and stream it out all at once.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Building a tar file chunk-by-chunk. | |
# | |
# This is a quick bit of sample code for streaming data to a tar file, | |
# building it piece-by-piece. The tarfile is built on-the-fly and streamed | |
# back out. This is useful for web applications that need to dynamically | |
# build a tar file without swamping the server. | |
import tarfile | |
from io import BytesIO | |
class FileStream: | |
def __init__(self): | |
self.buffer = BytesIO() | |
self.offset = 0 | |
def write(self, s): | |
self.buffer.write(s) | |
self.offset += len(s) | |
def tell(self): | |
return self.offset | |
def close(self): | |
self.buffer.close() | |
def pop(self): | |
s = self.buffer.getvalue() | |
self.buffer.close() | |
self.buffer = BytesIO() | |
return s | |
@staticmethod | |
def _split_every(n, text): | |
while text: | |
yield text[:n] | |
text = text[n:] | |
@classmethod | |
def yield_tar_gz(cls, file_data_iterable): | |
stream = FileStream() | |
tar = tarfile.TarFile.open(mode='w|gz', fileobj=stream) | |
for filename, data in file_data_iterable: | |
tar_info = tarfile.TarInfo(filename) | |
tar.addfile(tar_info) | |
yield stream.pop() | |
for chunk in cls._split_every(tarfile.BLOCKSIZE, data): | |
bin_chunk = chunk.encode('utf8') | |
tar_info.size += len(bin_chunk) | |
tar.fileobj.write(bin_chunk) | |
yield stream.pop() | |
blocks, remainder = divmod(tar_info.size, tarfile.BLOCKSIZE) | |
if remainder > 0: | |
tar.fileobj.write(tarfile.NUL * | |
(tarfile.BLOCKSIZE - remainder)) | |
yield stream.pop() | |
blocks += 1 | |
tar.offset += blocks * tarfile.BLOCKSIZE | |
tar.close() | |
yield stream.pop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment