Last active
June 27, 2022 15:48
-
-
Save david0/c5bdfc68bc48565bb22d to your computer and use it in GitHub Desktop.
proof of concept for deterministic/reproduceable Zip file checksums
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
proof of concept to make Zip-file checksums more reliable | |
related to https://github.com/composer/composer/issues/2540 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
mkdir testdata | |
dd if=/dev/random of=testdata/data1 count=100 | |
dd if=/dev/random of=testdata/data2 count=100 | |
rm testdata*.zip | |
zip -r testdata1_extra.zip testdata/ | |
sleep 1 && touch -a testdata/* # force change atime | |
zip -r testdata2_extra.zip testdata/ #same as testdata1_extra.zip, different atime | |
zip -r -X testdata3_noextra.zip testdata/ | |
sleep 1 && touch -a testdata/* # force change atime | |
zip -r -X testdata4_noextra.zip testdata/ | |
dd if=/dev/random of=testdata/data2 count=100 | |
zip -r testdata5_corrupted_extra.zip testdata/ | |
zip -r -X testdata5_corrupted_noextra.zip testdata/ | |
shasum testdata*.zip |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Test script to mask out the "extra" part of the zip file header | |
class ZipHasher | |
{ | |
const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50; | |
const CENTRAL_DIRECTORY_STRUCTURE_HEADER = 0x2014b50; | |
const CENTRAL_DIRECTORY_STRUCTURE_END = 0x06054b50; | |
const LOCAL_FILE_HEADER_FORMAT = 'vversion/vgeneral/vcmethod/vlastmodtime/vlastmoddate/Vcrc/VcompressedSize/VuncompressedSize/vfilenameLen/vextraLen'; | |
const CENTRAL_DIRECTORY_STRUCTURE_FORMAT = 'vcversion/vrequiredVersion/vflag/vcmethod/vlastmodtime/vlastmoddate/Vcrc/VcompressedSize/VuncompressedSize/vfilenameLen/vextraLen/vcommentLen/vdisk/vinternalFileAttributes/VexternalFileAttributes/Voffset'; | |
const END_OF_CENTRAL_DIRECTORY_STRUCTURE_FORMAT = 'vdiskNr/vdiskCdsStart/vtotalEntries/Vsize/Voffset/VcommentLen'; | |
/** | |
* Hash Zip file and ignore sections "extra" and "central directory structure" | |
* because we interested in content only | |
* | |
* @param string $filename | |
* @return string the sha | |
*/ | |
public function hash($filename) | |
{ | |
$contents = ''; | |
$fp = fopen($filename, 'rb'); | |
while (!feof($fp)) { | |
$structure = $this->unpack($fp, 'Vsignature'); | |
$signature = $structure['signature']; | |
$contents .= pack('V', $signature); | |
if ($signature == self::LOCAL_FILE_HEADER_SIGNATURE) { | |
$localFileHeader = $this->readLocalFileHeader($fp); | |
$localFileHeader['extraLen'] = 0; | |
$localFileHeader['extra'] = ''; | |
$contents .= $this->packLocalFileHeader($localFileHeader); | |
if ($localFileHeader['compressedSize'] > 0) | |
$contents .= fread($fp, $localFileHeader['compressedSize']); | |
} elseif ($signature == self::CENTRAL_DIRECTORY_STRUCTURE_HEADER) { | |
$cds = $this->readCentralDirectoryStructure($fp); | |
$cds['extraLen'] = 0; | |
$cds['extra'] = ''; | |
$cds['offset'] = 0; | |
$contents .= $this->packCentralDirectoryStructure($cds); | |
} elseif ($signature == self::CENTRAL_DIRECTORY_STRUCTURE_END) { | |
$cdsEnd = $this->readEndOfCentralDirectoryHeader($fp); | |
$cdsEnd['size'] = 0; | |
$cdsEnd['offset'] = 0; | |
$contents .= $this->packEndOfCentralDirectoryHeader($cdsEnd); | |
} else | |
throw new RuntimeException('unexpected ' . bin2hex($signature)); | |
} | |
fclose($fp); | |
return sha1($contents); | |
} | |
public function readLocalFileHeader($fp) | |
{ | |
$structure = $this->unpack($fp, self::LOCAL_FILE_HEADER_FORMAT); | |
$structure['filename'] = $structure['filenameLen'] > 0 ? fread($fp, $structure['filenameLen']) : ''; | |
$structure['extra'] = $structure['extraLen'] > 0 ? fread($fp, $structure['extraLen']) : ''; | |
return $structure; | |
} | |
private function packLocalFileHeader(array $header) | |
{ | |
$headerStr = $this->pack(self::LOCAL_FILE_HEADER_FORMAT, $header); | |
$headerStr .= $header['filename']; | |
$headerStr .= $header['extra']; | |
return $headerStr; | |
} | |
private function readCentralDirectoryStructure($fp) | |
{ | |
$structure = $this->unpack($fp, self::CENTRAL_DIRECTORY_STRUCTURE_FORMAT); | |
$structure['filename'] = $structure['filenameLen'] > 0 ? fread($fp, $structure['filenameLen']) : ''; | |
$structure['extra'] = $structure['extraLen'] > 0 ? fread($fp, $structure['extraLen']) : ''; | |
$structure['comment'] = $structure['commentLen'] > 0 ? fread($fp, $structure['commentLen']) : ''; | |
return $structure; | |
} | |
private function packCentralDirectoryStructure(array $header) | |
{ | |
$headerStr = $this->pack(self::CENTRAL_DIRECTORY_STRUCTURE_FORMAT, $header); | |
$headerStr .= $header['filename']; | |
$headerStr .= $header['extra']; | |
$headerStr .= $header['comment']; | |
return $headerStr; | |
} | |
private function readEndOfCentralDirectoryHeader($fp) | |
{ | |
$structure = $this->unpack($fp, self::END_OF_CENTRAL_DIRECTORY_STRUCTURE_FORMAT); | |
$structure['comment'] = $structure['commentLen'] > 0 ? fread($fp, $structure['commentLen']) : ''; | |
return $structure; | |
} | |
private function packEndOfCentralDirectoryHeader(array $header) | |
{ | |
$headerStr = $this->pack(self::END_OF_CENTRAL_DIRECTORY_STRUCTURE_FORMAT, $header); | |
$headerStr .= $header['comment']; | |
return $headerStr; | |
} | |
private function unpack($fp, $format) | |
{ | |
$expectedLen = 0; | |
foreach (explode('/', $format) as $entry) { | |
static $sizes = array('v' => 2, 'V' => 4, 'n'=>2); | |
$expectedLen += $sizes[$entry[0]]; | |
} | |
$data = fread($fp, $expectedLen); | |
if (strlen($data) !== $expectedLen) | |
throw new RuntimeException('format error: unexpected ' . bin2hex($data)); | |
return unpack($format, $data); | |
} | |
private function pack($format, array $header) | |
{ | |
$formatStr = ''; | |
$args = array(); | |
foreach (explode('/', $format) as $entry) { | |
$formatStr .= $entry[0]; | |
$key = substr($entry, 1); | |
$args[] = $header[$key]; | |
} | |
array_unshift($args, $formatStr); // first parameter: format | |
return call_user_func_array('pack', $args); | |
} | |
} | |
$zipHasher = new ZipHasher(); | |
foreach (glob('*.zip') as $filename) | |
printf("%s: %s\n", $filename, $zipHasher->hash($filename)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment