Last active
February 20, 2024 12:16
-
-
Save giuseppe/c6aed51d38d6bb53c40a6559fa988bb4 to your computer and use it in GitHub Desktop.
PoC create a sparse file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"errors" | |
"io" | |
"os" | |
) | |
type state int | |
const ( | |
zerosThreshold = 1024 | |
stateData = iota | |
stateZeros | |
) | |
type SparseWriterUnderlyingFile interface { | |
Seek(offset int64, whence int) (ret int64, err error) | |
Close() error | |
Write(b []byte) (n int, err error) | |
} | |
type SparseWriter struct { | |
state state | |
file SparseWriterUnderlyingFile | |
zeros int64 | |
lastIsZero bool | |
} | |
func NewSparseWriterToFile(file SparseWriterUnderlyingFile) *SparseWriter { | |
return &SparseWriter{file: file, state: stateData} | |
} | |
func NewSparseWriter(filePath string) (*SparseWriter, error) { | |
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666) | |
if err != nil { | |
return nil, err | |
} | |
return &SparseWriter{file: file, state: stateData}, nil | |
} | |
func (sw *SparseWriter) createHole() error { | |
zeros := sw.zeros | |
if zeros == 0 { | |
return nil | |
} | |
sw.zeros = 0 | |
sw.lastIsZero = true | |
_, err := sw.file.Seek(zeros, io.SeekCurrent) | |
return err | |
} | |
func findFirstNotZero(b []byte) int { | |
for i, v := range b { | |
if v != 0 { | |
return i | |
} | |
} | |
return -1 | |
} | |
// Write writes data to the file, creating holes for long sequences of zeros. | |
func (sw *SparseWriter) Write(data []byte) (int, error) { | |
written, current := 0, 0 | |
totalLen := len(data) | |
for current < len(data) { | |
switch sw.state { | |
case stateData: | |
nextZero := bytes.IndexByte(data[current:], 0) | |
if nextZero < 0 { | |
_, err := sw.file.Write(data[written:]) | |
sw.lastIsZero = false | |
return totalLen, err | |
} else { | |
current += nextZero | |
sw.state = stateZeros | |
} | |
case stateZeros: | |
nextNonZero := findFirstNotZero(data[current:]) | |
if nextNonZero < 0 { | |
// finish with a zero, flush any data and keep track of the zeros | |
if written != current { | |
if _, err := sw.file.Write(data[written:current]); err != nil { | |
return -1, err | |
} | |
sw.lastIsZero = false | |
} | |
sw.zeros += int64(len(data) - current) | |
return totalLen, nil | |
} | |
// do not bother with too short sequences | |
if sw.zeros == 0 && nextNonZero < zerosThreshold { | |
sw.state = stateData | |
current += nextNonZero | |
continue | |
} | |
if written != current { | |
if _, err := sw.file.Write(data[written:current]); err != nil { | |
return -1, err | |
} | |
sw.lastIsZero = false | |
} | |
sw.zeros += int64(nextNonZero) | |
current += nextNonZero | |
if err := sw.createHole(); err != nil { | |
return -1, err | |
} | |
written = current | |
} | |
} | |
return totalLen, nil | |
} | |
// Close closes the SparseWriter's underlying file. | |
func (sw *SparseWriter) Close() error { | |
if sw.file == nil { | |
return errors.New("file is already closed") | |
} | |
if err := sw.createHole(); err != nil { | |
return err | |
} | |
if sw.lastIsZero { | |
if _, err := sw.file.Seek(-1, os.SEEK_CUR); err != nil { | |
return err | |
} | |
if _, err := sw.file.Write([]byte{0}); err != nil { | |
return err | |
} | |
} | |
err := sw.file.Close() | |
sw.file = nil | |
return err | |
} | |
func main() { | |
writer, err := NewSparseWriter("output.sparse") | |
if err != nil { | |
panic(err) | |
} | |
defer writer.Close() | |
buf := make([]byte, 1024*1024) | |
if _, err := io.CopyBuffer(writer, os.Stdin, buf); err != nil { | |
panic(err) | |
} | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"errors" | |
"io" | |
"testing" | |
) | |
type MemorySparseFile struct { | |
buffer bytes.Buffer | |
pos int64 | |
} | |
func (m *MemorySparseFile) Seek(offset int64, whence int) (int64, error) { | |
var newPos int64 | |
switch whence { | |
case io.SeekStart: | |
newPos = offset | |
case io.SeekCurrent: | |
newPos = m.pos + offset | |
case io.SeekEnd: | |
newPos = int64(m.buffer.Len()) + offset | |
default: | |
return 0, errors.New("unsupported seek whence") | |
} | |
if newPos < 0 { | |
return 0, errors.New("negative position is not allowed") | |
} | |
m.pos = newPos | |
return newPos, nil | |
} | |
func (m *MemorySparseFile) Write(b []byte) (n int, err error) { | |
if int64(m.buffer.Len()) < m.pos { | |
padding := make([]byte, m.pos-int64(m.buffer.Len())) | |
_, err := m.buffer.Write(padding) | |
if err != nil { | |
return 0, err | |
} | |
} | |
m.buffer.Next(int(m.pos) - m.buffer.Len()) | |
n, err = m.buffer.Write(b) | |
m.pos += int64(n) | |
return n, err | |
} | |
func (m *MemorySparseFile) Close() error { | |
return nil | |
} | |
func testInputWithWriteLen(t *testing.T, input []byte, chunkSize int) { | |
m := &MemorySparseFile{} | |
sparseWriter := NewSparseWriterToFile(m) | |
for i := 0; i < len(input); i += chunkSize { | |
end := i + chunkSize | |
if end > len(input) { | |
end = len(input) | |
} | |
_, err := sparseWriter.Write(input[i:end]) | |
if err != nil { | |
t.Fatalf("Expected no error, got %v", err) | |
} | |
} | |
err := sparseWriter.Close() | |
if err != nil { | |
t.Fatalf("Expected no error, got %v", err) | |
} | |
if !bytes.Equal(input, m.buffer.Bytes()) { | |
t.Fatalf("Incorrect output") | |
} | |
} | |
func testInput(t *testing.T, inputBytes []byte) { | |
currentLen := 1 | |
for { | |
testInputWithWriteLen(t, inputBytes, currentLen) | |
currentLen <<= 1 | |
if currentLen > len(inputBytes) { | |
break | |
} | |
} | |
} | |
func TestSparseWriter(t *testing.T) { | |
testInput(t, []byte("hello")) | |
testInput(t, append(make([]byte, 100), []byte("hello")...)) | |
testInput(t, []byte("")) | |
// add "hello" at the beginning | |
largeInput := make([]byte, 1024*1024) | |
copy(largeInput, []byte("hello")) | |
testInput(t, largeInput) | |
// add "hello" at the end | |
largeInput = make([]byte, 1024*1024) | |
copy(largeInput[1024*1024-5:], []byte("hello")) | |
testInput(t, largeInput) | |
// add "hello" in the middle | |
largeInput = make([]byte, 1024*1024) | |
copy(largeInput[len(largeInput)/2:], []byte("hello")) | |
testInput(t, largeInput) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment