Last active
November 30, 2021 06:54
-
-
Save rvaiya/9b39813d74ce3d5e1412e6b813a29c3b to your computer and use it in GitHub Desktop.
An overview and sample implementation of the zip format.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import "os" | |
import "hash/crc32" | |
// Basically an annotated runnable hex dump :P. See zip-implementation.go for an | |
// overview of the format and a barebones implementation. | |
func main() { | |
name := "test file.txt" | |
data := []byte("Test file content\n") | |
chksum := crc32.ChecksumIEEE(data) | |
//File header | |
fh := []byte { | |
0x50, 0x4b, 0x03, 0x04, //header | |
0x0a, 0x00, //version to extract | |
0x00, 0x00, //flags | |
0x00, 0x00, //compression method | |
0x00, 0x00, //Files modification time | |
0x00, 0x00, //Files last modification date | |
byte(chksum), byte(chksum>>8), byte(chksum>>16), byte(chksum>>24), //crc32 checksum | |
byte(len(data)), byte(len(data)>>8), byte(len(data)>>16), byte(len(data)>>24), //compressed file size | |
byte(len(data)), byte(len(data)>>8), byte(len(data)>>16), byte(len(data)>>24), //uncompressed file size | |
byte(len(name)&0xFF), byte(len(name)>>8), //name length (appended to the header) | |
0x00, 0x00, //extra fields (should be 0) | |
} | |
fh = append(fh, []byte(name)...) | |
cdr := []byte{ | |
0x50, 0x4b, 0x01, 0x02, //central directory file header. | |
0x1e, 0x03, //version made by | |
0x0a, 0x00, //version needed to extract | |
0x00, 0x00, //flags | |
0x00, 0x00, //compression method | |
0x00, 0x00, //Files modification time | |
0x00, 0x00, //Files last modification date | |
byte(chksum), byte(chksum>>8), byte(chksum>>16), byte(chksum>>24), //crc32 checksum (again) | |
byte(len(data)), byte(len(data)>>8), byte(len(data)>>16), byte(len(data)>>24), //compressed file size (again) | |
byte(len(data)), byte(len(data)>>8), byte(len(data)>>16), byte(len(data)>>24), //uncompressed file size | |
byte(len(name)&0xFF), byte(len(name)>>8),//name len | |
0x00, 0x00, //extra fields | |
0x00, 0x00, //comment len | |
0x00, 0x00, //start disk for files which span multiple disks :/ | |
0x00, 0x00, //internal file attributes (e.g binary/ascii) | |
0x00, 0x00, 0x00, 0x00, //system dependent file system attributes | |
0x00, 0x00, 0x00, 0x00, //file record offset. | |
} | |
cdr = append(cdr, []byte(name)...) | |
cdOffset := len(fh) + len(data) | |
//End of central directory record | |
eocdr := []byte { | |
0x50, 0x4b, 0x05, 0x06, //header | |
0x00, 0x00, //number of disks for multi disk files from a bygone era | |
0x00, 0x00, //disk number of central directory | |
0x01, 0x00, //number of files on the present disk | |
0x01, 0x00, //total number of files | |
byte(len(cdr)), byte(len(cdr)>>8), byte(len(cdr)>>16), byte(len(cdr)>>24), | |
byte(cdOffset), byte(cdOffset>>8), byte(cdOffset>>16), byte(cdOffset>>24), //offset of start of central directory | |
0x00, 0x00, //comment length | |
} | |
output,err := os.Create("test.zip") | |
if err != nil { | |
panic(err) | |
} | |
output.Write(fh) | |
output.Write(data) | |
output.Write(cdr) | |
output.Write(eocdr) | |
output.Close() | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import "os" | |
import "hash/crc32" | |
import "encoding/binary" | |
//Overview: | |
// A zip file consists of a collection of file entries ('records') each | |
// consisting of a file header containing metadata about the file (name, size, | |
// etc) followed by (optionally compressed) file data. Serving as an index for | |
// these records, the file also contains a 'central directory' (cd) comprised | |
// of central directory records. Finally the file contains an 'end of central | |
// directory' record which contains the offset of the central directory and | |
// serves as the starting point for unzip utilities. | |
// E.G | |
// ... (potentially arbitrary data) | |
// <file1 header> ] File record | |
// <file1 data> ] | |
// ... | |
// <file2 header> | |
// <file2 data> | |
// ... | |
// <central directory record 1> ] Contains the offset of <file1 header> | |
// <central directory record 2> | |
// ... | |
// <end of central directory record> ] Contains the offset of <central directory record 1> | |
// Since file record offsets are drawn from central directory entries, file | |
// records may be scattered throughout the file. Some programs use this to | |
// their advantage by appending data to themselves so they also function as | |
// valid zip files (e.g the self extracting archives of yore). Additionally, | |
// updating the zip file can be done relatively cheaply (without having to | |
// rewrite the entire file) by ammending (or replacing) the central directory | |
// as necessary. | |
// The following code produces the simplest zip file possible. Files are | |
// written uncompressed in sequence followed by the central directory. It is | |
// intended solely for educational purposes. Go already ships with a first | |
// class zip library which supports compression along with all the bells and | |
// whistles. | |
//Refs: | |
// https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT | |
// https://en.wikipedia.org/wiki/ZIP_(file_format) | |
func createCentralDirectoryRecord(name string, sz uint32, chksum uint32, fileOffset uint32) []byte { | |
cdr := make([]byte, 46) | |
binary.LittleEndian.PutUint32(cdr[:], 0x02014b50) //Central directory record signature | |
binary.LittleEndian.PutUint32(cdr[16:], chksum) | |
binary.LittleEndian.PutUint32(cdr[20:], sz) | |
binary.LittleEndian.PutUint32(cdr[24:], sz) //Compressed size (identical to size in the absence of compression) | |
binary.LittleEndian.PutUint16(cdr[28:], uint16(len(name))) | |
binary.LittleEndian.PutUint32(cdr[42:], fileOffset) | |
cdr = append(cdr[:], []byte(name)...) | |
return cdr | |
} | |
func createEndOfCentralDirectoryRecord(dirOffset uint32, dirSize uint32, numDirEntries uint16) []byte { | |
var eocd [22]byte | |
binary.LittleEndian.PutUint32(eocd[:], 0x06054b50) //End of central directory record signature | |
binary.LittleEndian.PutUint16(eocd[8:], numDirEntries) | |
binary.LittleEndian.PutUint16(eocd[10:], numDirEntries) | |
binary.LittleEndian.PutUint32(eocd[12:], dirSize) | |
binary.LittleEndian.PutUint32(eocd[16:], dirOffset) //Offset corresponding to the start of the first entry in the | |
return eocd[:] | |
} | |
// Some unzip implementations seem to prefer obtaining metadata from the central | |
// directory while others seem to favour the file header, so we need both :(. | |
// The format falls just short of allowing on the fly file generation by virtue of | |
// requiring file size information up front in the file header even though it | |
// is also present in the central directory. | |
func createFileHeader(name string, data []byte, chksum uint32) []byte { | |
fh := make([]byte, 30) | |
binary.LittleEndian.PutUint32(fh[:], 0x04034b50) // File header signature | |
binary.LittleEndian.PutUint32(fh[14:], chksum) | |
binary.LittleEndian.PutUint32(fh[18:], uint32(len(data))) | |
binary.LittleEndian.PutUint32(fh[22:], uint32(len(data))) | |
binary.LittleEndian.PutUint16(fh[26:], uint16(len(name))) | |
fh = append(fh, []byte(name)...) | |
return fh | |
} | |
func main() { | |
names := []string{"test1.txt", "test2.txt"} | |
data := [][]byte{[]byte("test1 content\n"), []byte("test2 content\n")} | |
output, err := os.Create("test0.zip") | |
if err != nil { | |
panic(err) | |
} | |
cdes := [][]byte{} | |
offset := uint32(0) | |
// Generate and write each file record. | |
for i, _ := range names { | |
name := names[i] | |
data := data[i] | |
chksum := crc32.ChecksumIEEE(data) | |
fh := createFileHeader(name, data, chksum) | |
// Generate the corresponding central directory entry. | |
cde := createCentralDirectoryRecord(name, uint32(len(data)), chksum, offset) | |
cdes = append(cdes, cde) | |
output.Write(fh) | |
output.Write(data) | |
offset += uint32(len(fh) + len(data)) | |
} | |
// Output the accreted central directory entries. | |
sz := 0 | |
for _, cde := range cdes { | |
output.Write(cde) | |
sz += len(cde) | |
} | |
eocd := createEndOfCentralDirectoryRecord(uint32(offset), uint32(sz), uint16(len(cdes))) | |
output.Write(eocd) | |
output.Close() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment