Last active
April 27, 2017 21:46
-
-
Save vtermanis/739e34b8a4e4b323932996a288fa1001 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# Copyright (c) 2017 Iotic Labs Ltd. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# https://github.com/Iotic-Labs/py-lz4framed/blob/master/LICENSE | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# WARNING: This is by no means complete / fully tested. Loosely based on | |
# approach taken by https://github.com/python/cpython/blob/3.6/Lib/gzip.py | |
from __future__ import unicode_literals, print_function | |
from sys import argv | |
from io import BytesIO, BufferedReader, SEEK_END | |
import tarfile | |
from lz4framed import Lz4FramedNoDataError, Decompressor, Compressor | |
class LZ4FFile(object): | |
"""lz4-frame compressed file reader""" | |
def __init__(self, filename): | |
self.__file = open(filename, 'rb') | |
self.__reader = BufferedReader(_LZ4Decoder(self.__file)) | |
def __enter__(self): | |
return self | |
def __exit__(self, exc_type, exc_value, traceback): | |
if self.__file is not None: | |
self.close() | |
def close(self): | |
self._check_not_closed() | |
self.__file.close() | |
self.__file = None | |
def _check_not_closed(self): | |
if self.__file is None: | |
raise ValueError('File already closed') | |
def read(self, size=-1): | |
self._check_not_closed() | |
return self.__reader.read(size) | |
def readline(self): | |
self._check_not_closed() | |
return self.__reader.readline() | |
def __iter__(self): | |
return iter(self.__reader) | |
class _LZ4Decoder(object): | |
"""Support class for LZ4FFile - exposes just enough methods to satisfy io.BufferedReader.""" | |
def __init__(self, fp): | |
self.__decompressor = Decompressor(fp) | |
# left over part from read which yielded too much data | |
self.__remaining = b'' | |
# Property required by e.g. BufferedReader | |
closed = False | |
@staticmethod | |
def readable(): | |
return True | |
def readinto(self, buf): | |
return BytesIO(self.read(len(buf))).readinto(buf) | |
def read(self, size=-1): | |
# empty read | |
if size == 0: | |
return b'' | |
# start with any data remaining from last read | |
buf = BytesIO(self.__remaining) | |
buf.seek(0, SEEK_END) | |
self.__remaining = b'' | |
# read all remaining data | |
if size < 0: | |
try: | |
for chunk in self.__decompressor: | |
buf.write(chunk) | |
except Lz4FramedNoDataError: | |
pass | |
return buf.getvalue() | |
# read at most size bytes | |
else: | |
iterator = iter(self.__decompressor) | |
while buf.tell() < size: | |
try: | |
buf.write(next(iterator)) | |
except (StopIteration, Lz4FramedNoDataError): | |
break | |
# too much data? preserve for next read | |
data = buf.getvalue() | |
if len(data) > size: | |
data, self.__remaining = data[:size], data[size:] | |
return data | |
class WriteableCompressor(Compressor): | |
"""Mimics a writeable file-like object (for outputting lz4-framed data)""" | |
def write(self, b): # pylint: disable=invalid-name | |
return self.update(b) | |
def tar_example(): | |
# produce compressed tar file | |
with open(argv[1], 'wb') as out_file: | |
with WriteableCompressor(fp=out_file) as compressor: | |
with tarfile.open(mode='w|', fileobj=compressor) as tar: | |
tar.add(argv[2]) | |
# decompress tar file to /tmp | |
with LZ4FFile(argv[1]) as in_file: | |
with tarfile.open(mode='r|', fileobj=in_file) as tar: | |
tar.extractall('/tmp/') | |
def example(): | |
# produce compressed output to file | |
with open(argv[2], 'wb') as out_file, open(argv[1], 'rb') as in_file: | |
with Compressor(out_file) as compressor: | |
try: | |
compressor.update(in_file.read()) | |
except Lz4FramedNoDataError: | |
# end of input | |
pass | |
# read compressed input file line-by-line (binary, not text!) | |
with LZ4FFile(argv[2]) as in_file: | |
for line in in_file: | |
print(line) | |
if __name__ == '__main__': | |
example() | |
# tar_example() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Revision 2 - fixed remaining buffer usage: BytesIO initialised with some data has current position at start instead of end of stream.
Revision 3 - Added tar example & minor documentation updates