Skip to content

Instantly share code, notes, and snippets.

@carthegian
Last active September 4, 2024 09:14
Show Gist options
  • Save carthegian/b16dd7f6d731dba39af0eee682944e57 to your computer and use it in GitHub Desktop.
Save carthegian/b16dd7f6d731dba39af0eee682944e57 to your computer and use it in GitHub Desktop.
How to write large amount of data to CSV sequentially or concurrently
package main
import (
"encoding/csv"
"fmt"
"log"
"math/rand"
"os"
"strconv"
"sync"
"time"
)
func main() {
// Process start
start := time.Now()
// Create CSV file
w, f := getCSVWriter()
// Write CSV header
writeCSVHeader(w)
// Define data size
records := 10000
// ----------------------------------------
// Uncomment each of functions below to run
testSequential(records, w)
// testConcurrencyWaitGroup(records, w)
// testConcurrencyChannel(records, w)
// ----------------------------------------
// Flush buffer to file
w.Flush()
// Close the file
err := f.Close()
if err != nil {
log.Fatalln(err)
}
// Process finish then print elapsed time
elapsed := time.Since(start)
fmt.Printf("Time taken: %s to finish", elapsed)
}
func testSequential(records int, w *csv.Writer) {
allData := make([]data, records)
for i := 0; i < records; i++ {
// Wait randomized 0~100ms to simulate API call
time.Sleep(getRandomSleepTime(100))
// Assign dummy data
allData[i] = getDummyData(i)
fmt.Printf("[Write " + allData[i].Name + "] ")
}
// Write CSV body
writeCSVBody(allData, w)
fmt.Println("Write CSV " + strconv.Itoa(records) + " records is finished")
}
// Note this function shares memory for each goroutine, which violates Go principle
// "Do not communicate by sharing memory; instead, share memory by communicating."
// I just put this as an example
func testConcurrencyWaitGroup(records int, w *csv.Writer) {
allData := make([]data, records)
// Define WaitGroup
var wg sync.WaitGroup
wg.Add(records)
for i := 0; i < records; i++ {
go func(i int) {
// Mark gotoutine as as finished when data is assigned
defer wg.Done()
// Wait randomized 0~100ms to simulate API call
time.Sleep(getRandomSleepTime(100))
// Assign dummy data
allData[i] = getDummyData(i)
fmt.Printf("[Write " + allData[i].Name + "] ")
}(i)
}
// Make WaitGroup wait for all goroutines to finish
wg.Wait()
// Write CSV body
writeCSVBody(allData, w)
fmt.Println("Write CSV " + strconv.Itoa(records) + " records is finished")
}
func testConcurrencyChannel(records int, w *csv.Writer) {
// Define buffered channel
ch := make(chan data, records)
done := make(chan bool)
// Close channel only if sending is finished
defer close(ch)
for i := 0; i < records; i++ {
go func(i int) {
// Wait randomized 0~100ms to simulate API call
time.Sleep(getRandomSleepTime(100))
// Send data to channel
ch <- getDummyData(i)
fmt.Printf("[Write " + getDummyData(i).Name + "] ")
}(i)
}
// Write CSV body
go writeCSVBodyWithChannel(ch, done, records, w)
// Notify main goroutine process is finished
<-done
fmt.Println("Write CSV " + strconv.Itoa(records) + " records is finished")
}
type data struct {
ID string
Name string
Description string
}
func getRandomSleepTime(denom int) time.Duration {
s := rand.NewSource(time.Now().UnixNano())
r := rand.New(s)
return time.Duration(r.Intn(denom)) * time.Millisecond
}
func getDummyData(num int) data {
return data{
ID: strconv.Itoa(num + 1),
Name: "Name" + strconv.Itoa(num+1),
Description: "Desc",
}
}
func getCSVWriter() (*csv.Writer, *os.File) {
f, err := os.Create("/home/ec2-user/concurrency.csv")
if err != nil {
log.Fatalln(err)
}
w := csv.NewWriter(f)
w.UseCRLF = true
return w, f
}
func writeCSVHeader(w *csv.Writer) {
err := w.Write([]string{
"ID",
"Name",
"Description",
})
if err != nil {
log.Fatalln(err)
}
}
func writeCSVBody(allData []data, w *csv.Writer) {
// Write data from slice to CSV
for _, data := range allData {
err := w.Write([]string{
data.ID,
data.Name,
data.Description,
})
if err != nil {
log.Fatalln("File writing failed")
log.Fatalln(err)
}
}
}
func writeCSVBodyWithChannel(ch chan data, done chan bool, records int, w *csv.Writer) {
// Write data from channel to CSV
for data := range ch {
err := w.Write([]string{
data.ID,
data.Name,
data.Description,
})
if err != nil {
log.Fatalln("File writing failed")
log.Fatalln(err)
}
records--
// Check if all records are processed, if yes then notify channel
if records == 0 {
done <- true
}
}
}
@mushoffa
Copy link

Six Pack Senpai, baru sadar ini codingan ente.

@carthegian
Copy link
Author

@mushoffa
Wkwkwkw koq bisa aja lo nemu ini...

@mushoffa
Copy link

@carthegian Gw lagi nyari coding buat write CSV concurrent pake GO, nemu artikel web elu, terus gw liat .gist.nya. Jujur awal.ny kagak sadar, pas lagi scrolling, kok nama lu familiar... Gw liat alias elu jg gw inget banget, dan pas liat profile elu... ternyata itu elu :D

@mushoffa
Copy link

Anyway keren banget, software engineer skg ya di sana... gmn kabarnya?

@carthegian
Copy link
Author

Hoo, geblek random banget.. =p
Oke2 aja lah d sini. Yang dikerjain ga beda jauh, cuma beda bahasa aja.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment