Created
March 11, 2014 03:15
-
-
Save asmedrano/9478830 to your computer and use it in GitHub Desktop.
Split a file into several files of given bytesize. Try not to leave orphaned rows.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"os" | |
"bytes" | |
"errors" | |
"sync" | |
"path/filepath" | |
"strconv" | |
) | |
var EOF = errors.New("EOF") | |
func main() { | |
if len(os.Args) < 4 { | |
fmt.Println("USAGE: ./splitfile <file> <outdir> <bytesize>") | |
os.Exit(1) | |
} | |
byteSize, err := strconv.ParseInt(os.Args[3], 0, 64) | |
if err != nil{ | |
fmt.Println("Invalid <bytesize>") | |
os.Exit(1) | |
} | |
targDir, err := filepath.Abs(os.Args[2]) | |
if err != nil{ | |
fmt.Println("Invalid <outdir>") | |
os.Exit(1) | |
} | |
f, err := os.Open(os.Args[1]) | |
if err != nil { | |
panic(err) | |
} | |
defer f.Close() | |
slices := [][]byte{} // create containing byte slice structures | |
var i int64 = 0 | |
var lastLen int64 = 0 | |
// create a buffer to accept a limited amount of chunks | |
buf := make([]byte, byteSize) | |
nl := []byte("\n") | |
// while loop over file till we EOF | |
for { | |
// seek the file back to make up for orphaned rows | |
f.Seek(i*byteSize - lastLen, 0) | |
nr, er := f.Read(buf) | |
if nr > 0{ | |
// split the bytes by newline so we can remove last element.We dont wanna have any orphaned csv lines | |
split := bytes.Split(buf, nl) | |
sl := len(split) | |
// the offset to pick up elements the next iteration | |
lastLen += int64(len(split[sl-1])) | |
slices = append(slices, bytes.Join(split[0:sl-1], nl)) // this puts the bytes back together after we cleand them up | |
} | |
if er == EOF { | |
break | |
} | |
if er != nil { | |
err = er | |
break | |
} | |
i++ | |
} | |
var wg sync.WaitGroup | |
for i, _ := range slices { | |
// finally write the bytes to new files spawing new go routines to do it. | |
wg.Add(1) | |
go func(i int) { | |
file, err := os.OpenFile(fmt.Sprintf("/%v/%d.out", targDir, i), os.O_CREATE|os.O_WRONLY, 0777) | |
defer file.Close() | |
if err != nil { | |
fmt.Println("FAIL!") | |
return | |
} | |
_, err = file.Write(slices[i]) | |
if err != nil{ | |
fmt.Println("Fail at writing") | |
return | |
} | |
defer wg.Done() | |
}(i) | |
} | |
wg.Wait() | |
fmt.Println("Done.") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment