Last active
February 8, 2024 00:07
-
-
Save davepermen/e906fb4e7ae30461bb3f6f80b4789df5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static public class ByteHelpers | |
{ | |
static public Stream WriteBytes(this Stream stream, params byte[] bytes) | |
{ | |
stream.Write(bytes); | |
return stream; | |
} | |
static public Stream Write_8(this Stream stream, params byte[] bytes) | |
{ | |
stream.Write(bytes); | |
return stream; | |
} | |
static public Stream Write16(this Stream stream, params UInt16[] values) | |
{ | |
foreach (var value in values) | |
{ | |
stream.WriteByte((byte)(value >> 0)); | |
stream.WriteByte((byte)(value >> 8)); | |
} | |
return stream; | |
} | |
static public Stream Write32(this Stream stream, params UInt32[] values) | |
{ | |
foreach (var value in values) | |
{ | |
stream.WriteByte((byte)(value >> 0)); | |
stream.WriteByte((byte)(value >> 8)); | |
stream.WriteByte((byte)(value >> 16)); | |
stream.WriteByte((byte)(value >> 24)); | |
} | |
return stream; | |
} | |
static public Stream Write64(this Stream stream, params UInt64[] values) | |
{ | |
foreach (var value in values) | |
{ | |
stream.WriteByte((byte)(value >> 0)); | |
stream.WriteByte((byte)(value >> 8)); | |
stream.WriteByte((byte)(value >> 16)); | |
stream.WriteByte((byte)(value >> 24)); | |
stream.WriteByte((byte)(value >> 32)); | |
stream.WriteByte((byte)(value >> 40)); | |
stream.WriteByte((byte)(value >> 48)); | |
stream.WriteByte((byte)(value >> 56)); | |
} | |
return stream; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Text; | |
namespace UncompressedZipWriter; | |
record FileInZip(string Name, Stream Stream, long Size, DateTime LastModified) | |
{ | |
public ulong Offset { get; set; } = 0; | |
public ushort TimeBits => (ushort)((LastModified.Second / 2) | LastModified.Minute << 5 | LastModified.Hour << 11); | |
public ushort DateBits => (ushort)(LastModified.Day | LastModified.Month << 5 | (LastModified.Year - 1980) << 9); | |
public uint CrcBits { get; set; } = 0; | |
public byte[] NameAsBytes => Encoding.UTF8.GetBytes(Name); | |
} | |
record FileInZipSize(string Name, long Size) | |
{ | |
public byte[] NameAsBytes => Encoding.UTF8.GetBytes(Name); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Humanizer; | |
using System.Diagnostics; | |
using UncompressedZipWriter; | |
Console.WriteLine("Let's Zip This!\n"); | |
TestZip( | |
Directory.GetFiles(@"C:\Users\spoda\OneDrive - Coop Genossenschaft\Downloads"), | |
@"C:\Users\spoda\Downloads\Downloads.test.zip" | |
); | |
TestZip( | |
[ | |
@"C:\Users\spoda\Downloads\Files\Big4_0GB.bin" | |
], | |
@"C:\Users\spoda\Downloads\Big4_0GB.test.zip" | |
); | |
TestZip( | |
[ | |
@"C:\Users\spoda\Downloads\Files\Big4_0GB.bin", | |
@"C:\Users\spoda\Downloads\Files\Big4_1GB.bin" | |
], | |
@"C:\Users\spoda\Downloads\Big4_0_1GB.test.zip" | |
); | |
TestZip( | |
[ | |
@"C:\Users\spoda\Downloads\Files\Big1_0GB.bin", | |
@"C:\Users\spoda\Downloads\Files\Big1_1GB.bin", | |
@"C:\Users\spoda\Downloads\Files\Big1_2GB.bin", | |
@"C:\Users\spoda\Downloads\Files\Big1_3GB.bin", | |
@"C:\Users\spoda\Downloads\Files\Big1_4GB.bin" | |
], | |
@"C:\Users\spoda\Downloads\Big1_0-4GB.test.zip" | |
); | |
TestZip( | |
Directory.GetFiles(@"C:\Users\spoda\OneDrive - Coop Genossenschaft\Downloads").Concat( | |
[ | |
@"C:\Users\spoda\Downloads\Files\Big1_0GB.bin", | |
@"C:\Users\spoda\Downloads\Files\Big1_1GB.bin", | |
@"C:\Users\spoda\Downloads\Files\Big1_2GB.bin", | |
@"C:\Users\spoda\Downloads\Files\Big1_3GB.bin", | |
@"C:\Users\spoda\Downloads\Files\Big1_4GB.bin" | |
]), | |
@"C:\Users\spoda\Downloads\All.test.zip" | |
); | |
void TestZip(IEnumerable<string> filelist, string file) | |
{ | |
var timer = Stopwatch.StartNew(); | |
Console.WriteLine(Path.GetFileName(file)); | |
File.Delete(file); | |
using (var stream = File.OpenWrite(file)) | |
{ | |
stream.FilesToZip(filelist); | |
} | |
Console.WriteLine($"Size: {Zip64.FilesToZipSize(filelist)} - {new FileInfo(file).Length} = {Zip64.FilesToZipSize(filelist) - new FileInfo(file).Length}"); | |
timer.Stop(); | |
Console.WriteLine($"Duration: {timer.Elapsed.Humanize(precision: 2)}"); | |
Console.WriteLine(); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT | |
/// https://rzymek.github.io/post/excel-zip64/ | |
namespace UncompressedZipWriter; | |
static public class Zip64 | |
{ | |
const ushort bitflags = 0b0000_1000_0000_1000; // (bit 3 for Data Descriptor at End, bit 11 for UTF-8) | |
static public long FilesToZipSize(IEnumerable<(string name, string path)> filesToZipSize) | |
{ | |
var files = filesToZipSize.Select(f => new FileInZipSize(Name: f.name, Size: new FileInfo(f.path).Length)); | |
int zip64offsetReached = 0; | |
ulong offset = 0; | |
foreach(var file in files) | |
{ | |
if(offset >= 0xFFFFFFFF) | |
{ | |
zip64offsetReached++; | |
} | |
offset += (ulong)(50 + file.NameAsBytes.Length + file.Size); | |
} | |
return files.Sum(f => 50 + f.NameAsBytes.Length + f.Size + 66 + f.NameAsBytes.Length) | |
+ zip64offsetReached * 8 | |
+ 98; | |
} | |
static public void FilesToZip(this Stream zip, IEnumerable<(string name, string path)> filesToZip) | |
{ | |
var files = filesToZip.Select(f => new FileInZip(Name: f.name, Stream: File.OpenRead(f.path), Size: new FileInfo(f.path).Length, LastModified: new FileInfo(f.path).LastWriteTime)).ToArray(); | |
ulong position = 0; | |
/// [local file entries] | |
foreach (var file in files) | |
{ | |
position += zip.WriteFileEntry(file, position); | |
file.Stream.Close(); | |
} | |
/// [central directory] | |
var start = position; | |
ulong length = 0; | |
foreach (var file in files) | |
{ | |
length += zip.WriteCentralDirectoryEntry(file); | |
} | |
zip.WriteEndOfCentralDirectory(count: (ulong)files.Length, offset: start, length: length); | |
} | |
static ulong WriteFileEntry(this Stream zip, FileInZip file, ulong offset) | |
{ | |
file.Offset = offset; | |
// 4 + 2*5 + 4*3 + 2*2 + ... + ... + 4 + 8*2 = 50 + filename.Length + file.Length | |
zip | |
.Write_8(0x50, 0x4b, 0x03, 0x04) /// header [local file header] | |
.Write16(45, bitflags, 0, file.TimeBits, file.DateBits) // version (45 = ZIP64) | general purpose bitflag | compression method (0 = store) | time | date | |
.Write32(0, 0, 0) // CRC bits | compressed size | uncompressed size => 0 each for data descriptor | |
.Write16((ushort)file.NameAsBytes.Length, 0) // filename length | extrafield size | |
.Write_8(file.NameAsBytes) // filename | |
.WriteStreamAndComputeCrc(file.Stream, crc => file.CrcBits = crc) /// write the actual data and calculate crc | |
.Write32(file.CrcBits) // CRC bits | |
.Write64((ulong)file.Size, (ulong)file.Size) // compressed size: ZIP64 extra | uncompressed size: ZIP64 extra | |
; | |
return (ulong)(50 + file.NameAsBytes.Length + file.Size); | |
} | |
static ulong WriteCentralDirectoryEntry(this Stream zip, FileInZip file) | |
{ | |
// 4 + 2*6 + 4*3 + 2*5 + 4*2 + ... + 2 + 2 + 8*2 = 74 + filename.Length | |
var zip64offset = file.Offset >= 0xFFFFFFFF; | |
zip | |
.Write_8(0x50, 0x4b, 0x01, 0x02) /// header [central directory header] | |
.Write16(45, 45, bitflags, 0, file.TimeBits, file.DateBits) // version (ZIP64) | min version to extract (ZIP64) | general purpose bitflag (bit 3 for Data Descriptor at End, bit 11 for UTF-8) | compression method (0 = store) | time | date | |
.Write32(file.CrcBits, 0xFFFFFFFF, 0xFFFFFFFF) // CRC bits | compressed size | uncompressed size => FFFFFFFF for ZIP64 | |
.Write16((ushort)file.NameAsBytes.Length) // filename length | |
.Write16(zip64offset ? (ushort)28 : (ushort)20, 0, 0, 0) // extrafield length | file comment length | disk number | internal file attributes | |
.Write32(0, zip64offset ? 0xFFFFFFFF : (uint)file.Offset) // external file attributes, offset of file | |
.Write_8(file.NameAsBytes) // filename | |
.Write_8(0x01, 0x00) /// extrafield header | |
.Write16(zip64offset ? (ushort)24 : (ushort)16) // size of extrafield (below) | |
.Write64((ulong)file.Size, (ulong)file.Size) // compressed size: ZIP64 extra | uncompressed size: ZIP64 extra | |
; | |
if(zip64offset) | |
{ | |
zip.Write64(file.Offset); | |
} | |
return (ulong)(66 + file.NameAsBytes.Length + (file.Offset >= 0xFFFFFFFF ? 8 : 0)); | |
} | |
static Stream WriteEndOfCentralDirectory(this Stream zip, ulong count, ulong offset, ulong length) | |
{ | |
// 4 + 8 + 2*2 + 4*2 + 8*4 | |
// + 4 + 4 + 8 + 4 | |
// + 4 + 2*4 + 4 + 4 + 2 | |
// = 98 | |
return zip | |
.Write_8(0x50, 0x4b, 0x06, 0x06) /// header [zip64 end of central directory record] | |
.Write64(44) // size of remaining record is 56 bytes | |
.Write16(45, 45) // version (ZIP64) | min version to extract (ZIP64) | |
.Write32(0, 0) // number of this disk | number of the disk with the start of the central directory | |
.Write64(count, count, length, offset) // total number of entries in the central directory on this disk | total number of entries in the central directory | size of central directory | offset of start of central directory with respect to the starting disk number | |
.Write_8(0x50, 0x4b, 0x06, 0x07) /// header [zip64 end of central directory locator] | |
.Write32(0) // number of the disk with the start of the zip64 end of central directory | |
.Write64(offset + length) // relative offset of the zip64 end of central directory record | |
.Write32(1) // total number of disks | |
.Write_8(0x50, 0x4b, 0x05, 0x06) /// header [end of central directory record] | |
.Write16(0, 0, 0xFFFF, 0xFFFF) // disk number | starting disk | central directory number | central directory amount | |
.Write32(0xFFFFFFFF) // central directory sizes | |
.Write32(0xFFFFFFFF) // central directory offset | |
.Write16(0) | |
; | |
} | |
static Stream WriteStreamAndComputeCrc(this Stream output, Stream input, Action<uint> calculatedCrc) | |
{ | |
byte[] buff = new byte[1024 * 1024]; | |
int len = input.Read(buff, 0, buff.Length); | |
uint crc = Force.Crc32.Crc32Algorithm.Compute(buff, 0, len); | |
output.Write(buff, 0, len); | |
while ((len = input.Read(buff, 0, buff.Length)) > 0) | |
{ | |
crc = Force.Crc32.Crc32Algorithm.Append(crc, buff, 0, len); | |
output.Write(buff, 0, len); | |
} | |
calculatedCrc(crc); | |
return output; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment