Created
April 28, 2024 20:34
-
-
Save christoofar/b20e4b036229746910c2915a6e3d7c38 to your computer and use it in GitHub Desktop.
How to make a proprietary binary free-form format using sentinel bytes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package archive | |
import ( | |
"errors" | |
"strings" | |
"time" | |
) | |
// ArchiveRecord is a struct that represents a header record in the archive file | |
// The archive file is a binary file that contains multiple ArchiveRecords, | |
// each of which represents a file that has been archived. Following the header | |
// record is the file data itself. | |
// | |
// After the file data, look for the 0x1C marker to find the next ArchiveRecord | |
// and file data for that file. | |
// | |
// You can use the FileSize field to determine how many bytes to read for the file data and for skipping to the next ArchiveRecord. | |
type ArchiveRecord struct { // ** Byte offsets | |
Marker byte // 00 - This will always be 0x1C | |
FileName [256]byte // 01-256 - The filename of the file being archived | |
Fnlf byte // 257 - Force a linefeed 0x0A after the filename so it looks better in Notepad | |
FileTime int64 // 257-264 - The time the file was archived (Unix timestamp, in GMT/UTC time zone) | |
FileSize int64 // 265-272 - The size of the file being archived in bytes | |
Fntimelf byte // 273 - Force a linefeed 0x0A after the file size so it looks better in Notepad | |
PassValidation byte // 274 - 0x00 = File passed validation, 0x01 = File failed validation | |
ValidationFailReason [256]byte // 275-530 - The reason the file failed validation | |
Fnvalidationlf byte // 531 - Force a linefeed 0x0A after the validation reason so it looks better in Notepad | |
TrailingMarker byte // 532 - This will always be 0x1F | |
Data []byte // 533+ - The data of the file being archived | |
} | |
/* | |
* | |
* Error types | |
* | |
*/ | |
// ErrInvalidArchiveRecord is an error type that represents an invalid archive record. This is probably | |
// due to corruption in the archive or you are parsing the file incorrectly. If you are reading the file, the | |
// start of every record begins with 0x1C and the end of every record header ends with 0x1F at position 532. | |
// The remaining bytes are the file data until the next 0x1C marker is reached (which belongs to the next record). | |
var ErrInvalidArchiveRecord = errors.New("archive header must be 533 bytes long") | |
// ErrInvalidArchiveSentinelValues is an error type that represents invalid sentinel values in the archive record | |
// which is an indication that there is data corruption in the archive file | |
var ErrInvalidArchiveSentinelValues = errors.New( | |
`invalid archive sentinel values expected 0x1C and 0x1F in the first and last byte of the record respectively`) | |
// ErrInvalidSizeMismatch is an error type that is raised when the total number of bytes in the file record does not match | |
// what is stored in the FileSize field of the record header. This is an indication of data corruption in the archive file | |
// or that the file was stored incorrectly, without placing the file size in the header. | |
var ErrInvalidSizeMismatch = errors.New("invalid size mismatch comparing the size of the file bytes to the FileSize field in the record header") | |
/* | |
* | |
* | |
* Read/Write functions | |
* | |
* | |
*/ | |
// ReadFileName returns the FileName field of the ArchiveRecord as a string | |
func (r *ArchiveRecord) ReadFileName() string { | |
return strings.TrimSpace(string(r.FileName[:])) | |
} | |
// ReadFileTime returns the FileTime field of the ArchiveRecord as a time.Time | |
func (r *ArchiveRecord) ReadFileTime() time.Time { | |
return time.Unix(r.FileTime, 0) | |
} | |
// ReadFileSize returns the FileSize field of the ArchiveRecord as an int64 | |
func (r *ArchiveRecord) ReadFileSize() int64 { | |
return r.FileSize | |
} | |
// ReadPassValidation returns the PassValidation field of the ArchiveRecord as a bool | |
func (r *ArchiveRecord) ReadPassValidation() bool { | |
return r.PassValidation == 0 | |
} | |
// ReadValidationFailReason returns the ValidationFailReason field of the ArchiveRecord as a string | |
func (r *ArchiveRecord) ReadValidationFailReason() string { | |
return strings.TrimSpace(string(r.ValidationFailReason[:])) | |
} | |
// SetFileName sets the FileName field of the ArchiveRecord. The filename should be 256 characters or less. | |
// If the filename is longer than 256 characters, the string will be truncated. | |
func (r *ArchiveRecord) SetFileName(name string) { | |
if len(name) > 256 { | |
name = name[:256] | |
} | |
// if len is less than 256, the rest of the array will be spaces | |
if len(name) < 256 { | |
for i := len(name); i < 256; i++ { | |
name += " " | |
} | |
} | |
copy(r.FileName[:], name) | |
} | |
// SetFileTime sets the FileTime field of the ArchiveRecord. The time will be converted to UTC and stored | |
// as a Unix timestamp. | |
func (r *ArchiveRecord) SetFileTime(t time.Time) { | |
r.FileTime = t.UTC().Unix() | |
} | |
// SetFileSize sets the FileSize field of the ArchiveRecord. The size should be the size of the file being archived. | |
func (r *ArchiveRecord) SetFileSize(size int64) { | |
r.FileSize = size | |
} | |
// SetPassValidation sets the PassValidation field of the ArchiveRecord. If pass is true, the file passed validation. | |
func (r *ArchiveRecord) SetPassValidation(pass bool, reason string) { | |
if len(reason) > 256 { | |
reason = reason[:256] | |
} | |
// if len is less than 256, the rest of the array will be spaces | |
if len(reason) < 256 { | |
for i := len(reason); i < 256; i++ { | |
reason += " " | |
} | |
} | |
if pass { | |
r.PassValidation = 0 | |
copy(r.ValidationFailReason[:], reason) | |
} else { | |
r.PassValidation = 1 | |
} | |
} | |
/* | |
* | |
* | |
* Serialization functions | |
* | |
* | |
*/ | |
// ToBytes converts the ArchiveRecord to a byte slice | |
func (r *ArchiveRecord) ToBytes() ([]byte, error) { | |
data := make([]byte, 533) | |
data[0] = r.Marker | |
copy(data[1:257], r.FileName[:]) | |
data[257] = r.Fnlf | |
copy(data[258:266], int64ToBytes(r.FileTime)) | |
copy(data[266:274], int64ToBytes(r.FileSize)) | |
data[274] = r.Fntimelf | |
data[275] = r.PassValidation | |
copy(data[276:532], r.ValidationFailReason[:]) | |
data[532] = r.TrailingMarker | |
// Anything after byte 533 is the file data | |
data = append(data, r.Data...) | |
// Set the FileSize automatically to the length of data being stored | |
r.FileSize = int64(len(r.Data)) | |
return data, nil | |
} | |
// FromBytes converts a byte slice to an ArchiveRecord. The byte slice should contain a single ArchiveRecord. | |
// Which exactly 533 bytes long for the record header, followed by the file data. This function also checks | |
// the sentinel values at the beginning and end of the record to ensure the record is valid. | |
func (r *ArchiveRecord) FromBytes(data []byte) error { | |
// The smallest a record can be is 533 bytes | |
if len(data) < 533 { | |
return ErrInvalidArchiveRecord | |
} | |
r.Marker = data[0] | |
copy(r.FileName[:], data[1:257]) | |
r.Fnlf = data[257] | |
r.FileTime = bytesToInt64(data[258:266]) | |
r.FileSize = bytesToInt64(data[266:274]) | |
r.Fntimelf = data[274] | |
r.PassValidation = data[275] | |
copy(r.ValidationFailReason[:], data[276:532]) | |
r.TrailingMarker = data[532] | |
// Anything after the 533rd byte is the file data | |
r.Data = data[533:] | |
// Check the sentinel values | |
if r.Marker != 0x1C || r.TrailingMarker != 0x1F { | |
return ErrInvalidArchiveSentinelValues | |
} | |
// Check that the size of the file data matches the FileSize field | |
if int64(len(r.Data)) != r.FileSize { | |
return ErrInvalidSizeMismatch | |
} | |
return nil | |
} | |
// ArchiveRecordFromBytes creates an ArchiveRecord from a byte slice. The byte slice should contain a single ArchiveRecord. | |
func ArchiveRecordFromBytes(data []byte) (*ArchiveRecord, error) { | |
record := NewArchiveRecord() | |
err := record.FromBytes(data) | |
if err != nil { | |
return nil, err | |
} | |
return record, nil | |
} | |
/* | |
* | |
* | |
* Constructor functions | |
* | |
* | |
*/ | |
// NewArchiveRecord creates a new ArchiveRecord with default values | |
func NewArchiveRecord() *ArchiveRecord { | |
record := &ArchiveRecord{ | |
Marker: 0x1C, // Each record starts with 0x1C which is the ASCII File Separator | |
FileName: [256]byte{}, | |
FileTime: time.Now().UTC().Unix(), | |
Fnlf: 0x0A, // Linefeed after the filename | |
FileSize: 0, // Size of the data being archived, which follows this structure | |
Fntimelf: 0x0A, // Linefeed after the file time | |
PassValidation: 0, | |
ValidationFailReason: [256]byte{}, // Reason the file failed validation | |
Fnvalidationlf: 0x0A, // Linefeed after the validation reason | |
TrailingMarker: 0x1F, // Each record ends with 0x1F which is the ASCII Unit Separator | |
} | |
for i := 0; i < 256; i++ { | |
record.FileName[i] = 0x20 // Fill the filename with spaces | |
} | |
for i := 0; i < 256; i++ { | |
record.ValidationFailReason[i] = 0x20 // Fill the validation fail reason with spaces | |
} | |
return record | |
} | |
/* | |
* | |
* | |
* Helper functions | |
* | |
* | |
*/ | |
// int64ToBytes converts an int64 to a byte slice (for storing unix timestamps in the ArchiveRecord struct) | |
func int64ToBytes(i int64) []byte { | |
b := make([]byte, 8) | |
b[0] = byte(i) | |
b[1] = byte(i >> 8) | |
b[2] = byte(i >> 16) | |
b[3] = byte(i >> 24) | |
b[4] = byte(i >> 32) | |
b[5] = byte(i >> 40) | |
b[6] = byte(i >> 48) | |
b[7] = byte(i >> 56) | |
return b | |
} | |
// bytesToInt64 converts a byte slice to an int64 (for reading unix timestamps from the ArchiveRecord struct) | |
func bytesToInt64(b []byte) int64 { | |
return int64(b[0]) | int64(b[1])<<8 | int64(b[2])<<16 | int64(b[3])<<24 | int64(b[4])<<32 | int64(b[5])<<40 | int64(b[6])<<48 | int64(b[7])<<56 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment