Skip to content

Instantly share code, notes, and snippets.

@rvaiya
Last active November 30, 2021 06:54
Show Gist options
  • Save rvaiya/9b39813d74ce3d5e1412e6b813a29c3b to your computer and use it in GitHub Desktop.
Save rvaiya/9b39813d74ce3d5e1412e6b813a29c3b to your computer and use it in GitHub Desktop.
An overview and sample implementation of the zip format.
package main
import "os"
import "hash/crc32"
// Basically an annotated runnable hex dump :P. See zip-implementation.go for an
// overview of the format and a barebones implementation.
func main() {
name := "test file.txt"
data := []byte("Test file content\n")
chksum := crc32.ChecksumIEEE(data)
//File header
fh := []byte {
0x50, 0x4b, 0x03, 0x04, //header
0x0a, 0x00, //version to extract
0x00, 0x00, //flags
0x00, 0x00, //compression method
0x00, 0x00, //Files modification time
0x00, 0x00, //Files last modification date
byte(chksum), byte(chksum>>8), byte(chksum>>16), byte(chksum>>24), //crc32 checksum
byte(len(data)), byte(len(data)>>8), byte(len(data)>>16), byte(len(data)>>24), //compressed file size
byte(len(data)), byte(len(data)>>8), byte(len(data)>>16), byte(len(data)>>24), //uncompressed file size
byte(len(name)&0xFF), byte(len(name)>>8), //name length (appended to the header)
0x00, 0x00, //extra fields (should be 0)
}
fh = append(fh, []byte(name)...)
cdr := []byte{
0x50, 0x4b, 0x01, 0x02, //central directory file header.
0x1e, 0x03, //version made by
0x0a, 0x00, //version needed to extract
0x00, 0x00, //flags
0x00, 0x00, //compression method
0x00, 0x00, //Files modification time
0x00, 0x00, //Files last modification date
byte(chksum), byte(chksum>>8), byte(chksum>>16), byte(chksum>>24), //crc32 checksum (again)
byte(len(data)), byte(len(data)>>8), byte(len(data)>>16), byte(len(data)>>24), //compressed file size (again)
byte(len(data)), byte(len(data)>>8), byte(len(data)>>16), byte(len(data)>>24), //uncompressed file size
byte(len(name)&0xFF), byte(len(name)>>8),//name len
0x00, 0x00, //extra fields
0x00, 0x00, //comment len
0x00, 0x00, //start disk for files which span multiple disks :/
0x00, 0x00, //internal file attributes (e.g binary/ascii)
0x00, 0x00, 0x00, 0x00, //system dependent file system attributes
0x00, 0x00, 0x00, 0x00, //file record offset.
}
cdr = append(cdr, []byte(name)...)
cdOffset := len(fh) + len(data)
//End of central directory record
eocdr := []byte {
0x50, 0x4b, 0x05, 0x06, //header
0x00, 0x00, //number of disks for multi disk files from a bygone era
0x00, 0x00, //disk number of central directory
0x01, 0x00, //number of files on the present disk
0x01, 0x00, //total number of files
byte(len(cdr)), byte(len(cdr)>>8), byte(len(cdr)>>16), byte(len(cdr)>>24),
byte(cdOffset), byte(cdOffset>>8), byte(cdOffset>>16), byte(cdOffset>>24), //offset of start of central directory
0x00, 0x00, //comment length
}
output,err := os.Create("test.zip")
if err != nil {
panic(err)
}
output.Write(fh)
output.Write(data)
output.Write(cdr)
output.Write(eocdr)
output.Close()
}
package main
import "os"
import "hash/crc32"
import "encoding/binary"
//Overview:
// A zip file consists of a collection of file entries ('records') each
// consisting of a file header containing metadata about the file (name, size,
// etc) followed by (optionally compressed) file data. Serving as an index for
// these records, the file also contains a 'central directory' (cd) comprised
// of central directory records. Finally the file contains an 'end of central
// directory' record which contains the offset of the central directory and
// serves as the starting point for unzip utilities.
// E.G
// ... (potentially arbitrary data)
// <file1 header> ] File record
// <file1 data> ]
// ...
// <file2 header>
// <file2 data>
// ...
// <central directory record 1> ] Contains the offset of <file1 header>
// <central directory record 2>
// ...
// <end of central directory record> ] Contains the offset of <central directory record 1>
// Since file record offsets are drawn from central directory entries, file
// records may be scattered throughout the file. Some programs use this to
// their advantage by appending data to themselves so they also function as
// valid zip files (e.g the self extracting archives of yore). Additionally,
// updating the zip file can be done relatively cheaply (without having to
// rewrite the entire file) by ammending (or replacing) the central directory
// as necessary.
// The following code produces the simplest zip file possible. Files are
// written uncompressed in sequence followed by the central directory. It is
// intended solely for educational purposes. Go already ships with a first
// class zip library which supports compression along with all the bells and
// whistles.
//Refs:
// https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
// https://en.wikipedia.org/wiki/ZIP_(file_format)
func createCentralDirectoryRecord(name string, sz uint32, chksum uint32, fileOffset uint32) []byte {
cdr := make([]byte, 46)
binary.LittleEndian.PutUint32(cdr[:], 0x02014b50) //Central directory record signature
binary.LittleEndian.PutUint32(cdr[16:], chksum)
binary.LittleEndian.PutUint32(cdr[20:], sz)
binary.LittleEndian.PutUint32(cdr[24:], sz) //Compressed size (identical to size in the absence of compression)
binary.LittleEndian.PutUint16(cdr[28:], uint16(len(name)))
binary.LittleEndian.PutUint32(cdr[42:], fileOffset)
cdr = append(cdr[:], []byte(name)...)
return cdr
}
func createEndOfCentralDirectoryRecord(dirOffset uint32, dirSize uint32, numDirEntries uint16) []byte {
var eocd [22]byte
binary.LittleEndian.PutUint32(eocd[:], 0x06054b50) //End of central directory record signature
binary.LittleEndian.PutUint16(eocd[8:], numDirEntries)
binary.LittleEndian.PutUint16(eocd[10:], numDirEntries)
binary.LittleEndian.PutUint32(eocd[12:], dirSize)
binary.LittleEndian.PutUint32(eocd[16:], dirOffset) //Offset corresponding to the start of the first entry in the
return eocd[:]
}
// Some unzip implementations seem to prefer obtaining metadata from the central
// directory while others seem to favour the file header, so we need both :(.
// The format falls just short of allowing on the fly file generation by virtue of
// requiring file size information up front in the file header even though it
// is also present in the central directory.
func createFileHeader(name string, data []byte, chksum uint32) []byte {
fh := make([]byte, 30)
binary.LittleEndian.PutUint32(fh[:], 0x04034b50) // File header signature
binary.LittleEndian.PutUint32(fh[14:], chksum)
binary.LittleEndian.PutUint32(fh[18:], uint32(len(data)))
binary.LittleEndian.PutUint32(fh[22:], uint32(len(data)))
binary.LittleEndian.PutUint16(fh[26:], uint16(len(name)))
fh = append(fh, []byte(name)...)
return fh
}
func main() {
names := []string{"test1.txt", "test2.txt"}
data := [][]byte{[]byte("test1 content\n"), []byte("test2 content\n")}
output, err := os.Create("test0.zip")
if err != nil {
panic(err)
}
cdes := [][]byte{}
offset := uint32(0)
// Generate and write each file record.
for i, _ := range names {
name := names[i]
data := data[i]
chksum := crc32.ChecksumIEEE(data)
fh := createFileHeader(name, data, chksum)
// Generate the corresponding central directory entry.
cde := createCentralDirectoryRecord(name, uint32(len(data)), chksum, offset)
cdes = append(cdes, cde)
output.Write(fh)
output.Write(data)
offset += uint32(len(fh) + len(data))
}
// Output the accreted central directory entries.
sz := 0
for _, cde := range cdes {
output.Write(cde)
sz += len(cde)
}
eocd := createEndOfCentralDirectoryRecord(uint32(offset), uint32(sz), uint16(len(cdes)))
output.Write(eocd)
output.Close()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment