Created
August 7, 2016 13:23
-
-
Save julian-savage/ed81f01c1f3e1bf1c92d3d105d14d459 to your computer and use it in GitHub Desktop.
InputStream which decompresses gzip/zlib compressed data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
import Compression | |
public class GzipInputStream : InputStream { | |
private var decompressor : Decompressor | |
override public init(data: Data) { | |
let compressedInputStream = InputStream(data: data) | |
self.decompressor = Decompressor(compressedInputStream) | |
super.init(data: data) | |
} | |
override public init?(url: URL) | |
{ | |
let compressedInputStream = InputStream(url: url) | |
guard compressedInputStream != nil else { | |
return nil | |
} | |
self.decompressor = Decompressor(compressedInputStream!) | |
super.init(url: url) | |
} | |
override public func open() | |
{ | |
self.decompressor.open() | |
} | |
override public func close() | |
{ | |
self.decompressor.close() | |
} | |
// reads up to length bytes into the supplied buffer, which must be at least of size len. Returns the actual number of bytes read. | |
override public func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength len: Int) -> Int | |
{ | |
return self.decompressor.read(buffer, maxLength: len) | |
} | |
// returns in O(1) a pointer to the buffer in 'buffer' and by reference in 'len' how many bytes are available. This buffer is only valid until the next stream operation. Subclassers may return NO for this if it is not appropriate for the stream type. This may return NO if the buffer is not available. | |
override public func getBuffer(_ buffer: UnsafeMutablePointer<UnsafeMutablePointer<UInt8>?>, length len: UnsafeMutablePointer<Int>) -> Bool { | |
// Unimplemented - data is decompressed straight into read() caller's buffer, and returning the compressed | |
// data wouldn't make much sense | |
return false | |
} | |
// returns YES if the stream has bytes available or if it impossible to tell without actually doing the read. | |
override public var hasBytesAvailable: Bool { | |
return self.decompressor.hasBytesAvailable | |
} | |
} | |
// Main functionality implemented in this separate class to avoid init() duplication in parent class | |
private class Decompressor { | |
static let READ_BUFFER_SIZE = 8 * 1024 | |
// RFC 1952 - GZIP Constants | |
static let GZIP_MINIMUM_HEADER_SIZE = 10 | |
static let GZIP_ID1 : UInt8 = 0x1f | |
static let GZIP_ID2 : UInt8 = 0x8b | |
static let GZIP_FNAME_FLAG : UInt8 = 8 | |
var compressedInputStream : InputStream | |
var skipHeader : Bool | |
var readBuffer : UnsafeMutablePointer<UInt8> | |
var compressionStream : UnsafeMutablePointer<compression_stream> | |
var compressionStatus : compression_status | |
init(_ compressedInputStream : InputStream) { | |
self.compressedInputStream = compressedInputStream | |
self.skipHeader = true | |
self.readBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: Decompressor.READ_BUFFER_SIZE) | |
self.compressionStream = UnsafeMutablePointer<compression_stream>.allocate(capacity: 1) | |
self.compressionStatus = compression_stream_init(self.compressionStream, COMPRESSION_STREAM_DECODE, COMPRESSION_ZLIB) | |
self.compressionStream.pointee.src_ptr = UnsafePointer<UInt8>(self.readBuffer) | |
self.compressionStream.pointee.src_size = 0 | |
} | |
func open() { | |
self.compressedInputStream.open() | |
} | |
func close() { | |
self.compressedInputStream.close() | |
} | |
deinit { | |
compression_stream_destroy(self.compressionStream) | |
self.readBuffer.deinitialize() | |
self.compressionStream.deinitialize() | |
} | |
func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength len: Int) -> Int { | |
var currentBufferPosition = UnsafeMutablePointer<UInt8>(buffer) | |
while ((currentBufferPosition - buffer) < len && self.compressionStatus == COMPRESSION_STATUS_OK) { | |
// Start by running compression process to get any bytes still available | |
self.compressionStream.pointee.dst_ptr = currentBufferPosition | |
self.compressionStream.pointee.dst_size = len - (currentBufferPosition - buffer) | |
self.compressionStatus = compression_stream_process(compressionStream, 0) | |
guard self.compressionStatus != COMPRESSION_STATUS_ERROR else { | |
return -1 | |
} | |
// compression_stream_process updates dst_ptr so we can calculate | |
let decompressedLength = self.compressionStream.pointee.dst_ptr - currentBufferPosition | |
currentBufferPosition = self.compressionStream.pointee.dst_ptr | |
// If decompression produced no results then add data from compressedInputStream to compressionStream | |
if decompressedLength == 0 { | |
let bytesRead = self.compressedInputStream.read(self.readBuffer, maxLength: Decompressor.READ_BUFFER_SIZE) | |
var bytesSkipped = 0 | |
// gzip created files have headers which compress_stream_process doesn't understand | |
// they also have trailers but they are ignored by compress_stream_process if it hits the zlib end | |
if self.skipHeader { | |
if bytesRead > Decompressor.GZIP_MINIMUM_HEADER_SIZE && self.readBuffer[0] == Decompressor.GZIP_ID1 && self.readBuffer[1] == Decompressor.GZIP_ID2 { | |
// start by skipping fixed 10 byte header | |
bytesSkipped += Decompressor.GZIP_MINIMUM_HEADER_SIZE | |
let flg = self.readBuffer[3] | |
let fname : Bool = flg & Decompressor.GZIP_FNAME_FLAG > 0 | |
// skip variable length file name if FNAME flag set | |
if fname { | |
while (self.readBuffer[bytesSkipped] != 0) { | |
bytesSkipped += 1 | |
} | |
bytesSkipped += 1 | |
} | |
} | |
// Remember we've skipped the header | |
self.skipHeader = false | |
} | |
self.compressionStream.pointee.src_ptr = UnsafePointer<UInt8>(self.readBuffer) + bytesSkipped | |
self.compressionStream.pointee.src_size = bytesRead - bytesSkipped | |
} | |
} | |
return self.compressionStatus == COMPRESSION_STATUS_ERROR ? -1 : (currentBufferPosition - buffer) | |
} | |
func getBuffer(_ buffer: UnsafeMutablePointer<UnsafeMutablePointer<UInt8>?>, length len: UnsafeMutablePointer<Int>) -> Bool { | |
return false | |
} | |
var hasBytesAvailable: Bool { | |
// Will always return true for the first call even if compressedInputStream will not return data, which is safe for InputStream where hasBytesAvailable may return true if it doesn't know if read() will succeed | |
return self.compressionStatus == COMPRESSION_STATUS_OK | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi. What is the license associated with this code? May I use it in a private/closed-source project?