Skip to content

Instantly share code, notes, and snippets.

@iansltx
Last active May 21, 2019 14:32
Show Gist options
  • Save iansltx/4820b02ab276c3306314daaa41573445 to your computer and use it in GitHub Desktop.
Save iansltx/4820b02ab276c3306314daaa41573445 to your computer and use it in GitHub Desktop.
<?php // works on single-file non-ZIp64 zip files compressed with deflate (AKA a common case), licensed MIT
function getLinesFromZippedCSVfromURL(string $url) : \Generator
{
$stream = fopen($url, 'rb');
fread($stream, 4 + 2 + 2 + 2 + 2 + 2 + 4); // skip up to compressed size
// bindec() was doing weird things, hence converting through hex first
// sttrev() to match endian-ness to expectations; zip file values are little-endian
$compressedSize = hexdec(bin2hex(strrev(fread($stream, 4)))); // compressed size
fread($stream, 4); // uncompressed size; we don't need this
$filenameLength = hexdec(bin2hex(strrev(fread($stream, 2))));
$extraLength = hexdec(bin2hex(strrev(fread($stream, 2))));
fread($stream, $filenameLength); // filename
if ($extraLength) {
fread($stream, $extraLength);
}
// We're past headers now; let's get some data!
$inflate = inflate_init(ZLIB_ENCODING_RAW);
$uncompressedBuffer = '';
for ($offset = 0, $chunkSize = 256; $offset < $compressedSize; $offset += $chunkSize) {
if ($offset + $chunkSize > $compressedSize) {
$chunkSize = $compressedSize - $offset;
}
$uncompressedBuffer .= inflate_add($inflate, fread($stream, $chunkSize));
while (strpos($uncompressedBuffer, "\n") !== false) {
[$line, $uncompressedBuffer] = explode("\n", $uncompressedBuffer, 2);
yield str_getcsv($line);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment