Last active
August 29, 2015 14:26
-
-
Save anxiousmodernman/5b79048ff79c7e796da1 to your computer and use it in GitHub Desktop.
large SQL result to CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ReportExporter Contains required methods for extracting csv data from SQL queries | |
type ReportExporter interface { | |
FromSqlRow(*sql.Rows) error | |
ToStrings() []string | |
} | |
// Report A type for holding metadata to instantiate the right csv schema as well | |
// as write the correct output filename | |
type Report struct { | |
SQL string | |
OutputFile string | |
Type string // example here could be "person" | |
} | |
// Creator is a factory function type for instances of ReportExporter types | |
type Creator func() ReportExporter | |
// SchemaTypes is a global map of Creator factory functions populated by init() | |
var schemaTypes = make(map[string]Creator) | |
func init() { | |
schemaTypes["person"] = newPerson | |
schemaTypes["foo"] = newFoo | |
schemaTypes["baz"] = newBaz | |
} | |
// Factory functions for report types | |
func newPerson() ReportExporter { | |
return new(Person) | |
} | |
func newFoo() ReportExporter { | |
return new(Foo) // other Foo implementations not shown | |
} | |
func newBaz() ReportExporter { | |
return new(Baz) // other Baz implementations not shown | |
} | |
// Person is a type to represent one kind of 3-column result set | |
type Person struct { | |
PersonID sql.NullInt64 | |
Name sql.NullString | |
JoinDate gorp.NullTime | |
} | |
// ToStrings returns a slice of strings for the csv writer. Implemented for every ReportExporter. | |
func (p *Person) ToStrings() []string { | |
var output []string | |
personID := NullInt64ToString(p.PersonID) | |
name := NullStringToString(p.Name) | |
joinDate := NullTimeToString(p.JoinDate) | |
return append(output, personID, name, joinDate) | |
} | |
// FromSqlRow wraps the call to Scan. Implemented for every ReportExporter. | |
func (p *Person) FromSqlRow(cursor *sql.Rows) error { | |
err := cursor.Scan(&p.PersonID, &p.Name, &p.JoinDate) | |
if err != nil { | |
return err | |
} | |
return nil | |
} | |
// NullInt64ToString helper to prevent writing the "zero" type. Used in ReportExporter ToStrings() | |
func NullInt64ToString(value sql.NullInt64) string { | |
if value.Valid { | |
v, err := value.Value() | |
if err != nil { | |
log.Fatal("Error extracting int64 from sql.NullInt64: ", value) | |
panic(err) | |
} | |
return strconv.FormatInt(v.(int64), 10) | |
} | |
return "" | |
} | |
// NullStringToString ... unsure if I need this ... Used in ReportExporter ToStrings() | |
func NullStringToString(value sql.NullString) string { | |
if value.Valid { | |
v, err := value.Value() | |
if err != nil { | |
log.Fatal("Error extracting string from sql.NullString: ", value) | |
} | |
return v.(string) | |
} | |
return "" | |
} | |
// NullTimeToString Formats time and writes empty string for NULLS. Used in ReportExporter ToStrings() | |
func NullTimeToString(value gorp.NullTime) string { | |
if value.Valid { | |
v, err := value.Value() | |
if err != nil { | |
log.Fatal("Error extracting Time from gorp.NullTime: ", value) | |
} | |
t := v.(time.Time) | |
return fmt.Sprintf("%d-%02d-%02d %02d:%02d:%02d", | |
t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second()) | |
} | |
return "" | |
} | |
// PerformQuery does the "work". Runs a batch analytics query embedded in a higher level "Report" struct | |
// and writes the results to pipe-separated csv | |
// Params: DB wraps a connection, Report wraps report metadata + sql query as a string | |
func PerformQuery(db *DB, r Report) error { | |
log.Println("Starting to create", r.OutputFile) | |
rows, err := db.Conn.Query(r.SQL) | |
if err != nil { | |
return err | |
} | |
defer rows.Close() | |
log.Println("Finished query for", r.OutputFile) | |
log.Println("Writing file:", r.OutputFile) | |
csvfile, err := os.Create(r.OutputFile) | |
if err != nil { | |
fmt.Println("Error:", err) | |
return err | |
} | |
defer csvfile.Close() | |
writer := csv.NewWriter(csvfile) | |
writer.Comma = '|' | |
i := 0 | |
fac := schemaTypes[r.Type] // a map of string -> factory functions that new up a struct (a Person in this case) | |
for rows.Next() { | |
i++ | |
re := fac() // new up some kind of report (e.g. Person) | |
re.FromSqlRow(rows) // does the rows.Scan(...) | |
writer.Write(re.ToStrings()) // gives writer the []string | |
if i%100 == 0 { | |
writer.Flush() | |
} | |
} | |
writer.Flush() | |
log.Println("Finished writing", r.OutputFile) | |
return nil | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello, gophers! Here's my scenario: I am writing arbitrary SQL (think views, reports) to CSV, and I need to handle the possibility of a lot of nulls. These are not really "model" objects per se, more like views. Just arbitrary result sets. The result sets are quite large (over GB) in size. Right now it is performing fairly well, but I am the only person doing Go at my company, so I need to use the Gopher Signal.
General requirements:
FromSqlRow
andToStrings
methods for all of themIs there a better way to handle NULLS when I really just need to transform them into empty string? For many-column result sets, my
ToStrings
methods are kind of a bummer to write (many calls to theNullBlahToString
helpers)Is there some feature of a library e.g.
gorp
that I am missing that can help me here?