Skip to content

Instantly share code, notes, and snippets.

@vtermanis
Last active April 27, 2017 21:46
Show Gist options
  • Save vtermanis/739e34b8a4e4b323932996a288fa1001 to your computer and use it in GitHub Desktop.
Save vtermanis/739e34b8a4e4b323932996a288fa1001 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) 2017 Iotic Labs Ltd. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://github.com/Iotic-Labs/py-lz4framed/blob/master/LICENSE
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# WARNING: This is by no means complete / fully tested. Loosely based on
# approach taken by https://github.com/python/cpython/blob/3.6/Lib/gzip.py
from __future__ import unicode_literals, print_function
from sys import argv
from io import BytesIO, BufferedReader, SEEK_END
import tarfile
from lz4framed import Lz4FramedNoDataError, Decompressor, Compressor
class LZ4FFile(object):
"""lz4-frame compressed file reader"""
def __init__(self, filename):
self.__file = open(filename, 'rb')
self.__reader = BufferedReader(_LZ4Decoder(self.__file))
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
if self.__file is not None:
self.close()
def close(self):
self._check_not_closed()
self.__file.close()
self.__file = None
def _check_not_closed(self):
if self.__file is None:
raise ValueError('File already closed')
def read(self, size=-1):
self._check_not_closed()
return self.__reader.read(size)
def readline(self):
self._check_not_closed()
return self.__reader.readline()
def __iter__(self):
return iter(self.__reader)
class _LZ4Decoder(object):
"""Support class for LZ4FFile - exposes just enough methods to satisfy io.BufferedReader."""
def __init__(self, fp):
self.__decompressor = Decompressor(fp)
# left over part from read which yielded too much data
self.__remaining = b''
# Property required by e.g. BufferedReader
closed = False
@staticmethod
def readable():
return True
def readinto(self, buf):
return BytesIO(self.read(len(buf))).readinto(buf)
def read(self, size=-1):
# empty read
if size == 0:
return b''
# start with any data remaining from last read
buf = BytesIO(self.__remaining)
buf.seek(0, SEEK_END)
self.__remaining = b''
# read all remaining data
if size < 0:
try:
for chunk in self.__decompressor:
buf.write(chunk)
except Lz4FramedNoDataError:
pass
return buf.getvalue()
# read at most size bytes
else:
iterator = iter(self.__decompressor)
while buf.tell() < size:
try:
buf.write(next(iterator))
except (StopIteration, Lz4FramedNoDataError):
break
# too much data? preserve for next read
data = buf.getvalue()
if len(data) > size:
data, self.__remaining = data[:size], data[size:]
return data
class WriteableCompressor(Compressor):
"""Mimics a writeable file-like object (for outputting lz4-framed data)"""
def write(self, b): # pylint: disable=invalid-name
return self.update(b)
def tar_example():
# produce compressed tar file
with open(argv[1], 'wb') as out_file:
with WriteableCompressor(fp=out_file) as compressor:
with tarfile.open(mode='w|', fileobj=compressor) as tar:
tar.add(argv[2])
# decompress tar file to /tmp
with LZ4FFile(argv[1]) as in_file:
with tarfile.open(mode='r|', fileobj=in_file) as tar:
tar.extractall('/tmp/')
def example():
# produce compressed output to file
with open(argv[2], 'wb') as out_file, open(argv[1], 'rb') as in_file:
with Compressor(out_file) as compressor:
try:
compressor.update(in_file.read())
except Lz4FramedNoDataError:
# end of input
pass
# read compressed input file line-by-line (binary, not text!)
with LZ4FFile(argv[2]) as in_file:
for line in in_file:
print(line)
if __name__ == '__main__':
example()
# tar_example()
@vtermanis
Copy link
Author

vtermanis commented Apr 27, 2017

Revision 2 - fixed remaining buffer usage: BytesIO initialised with some data has current position at start instead of end of stream.
Revision 3 - Added tar example & minor documentation updates

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment