Last active
August 9, 2023 17:55
-
-
Save zacharysyoung/da51b614acc7f8e97b249d1f47302900 to your computer and use it in GitHub Desktop.
Generate CSVs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import csv | |
import random | |
# Used to characterize answer for https://stackoverflow.com/questions/75578992 | |
with open("input.csv", "w", newline="") as f: | |
w = csv.writer(f) | |
w.writerow(["RowNum", "ID"]) | |
for i in range(20_000_000): | |
if i % 100_000 == 0: | |
print(f"to row {i+1}") | |
w.writerow([i+1, random.randint(0, 20_000_000)]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/csv" | |
"fmt" | |
"os" | |
"strconv" | |
"strings" | |
// random field data | |
"math/rand" | |
"time" | |
) | |
var rowsArg, colsArg float64 | |
func init() { | |
if len(os.Args) < 2 || os.Args[1] == "-h" { | |
fmt.Println("genCSV [ROWS] [COLS]") | |
return | |
} | |
var err error | |
// Parsing as float so I can use the following syntaxes: 1_000_000, 10e6 | |
rowsArg, err = strconv.ParseFloat(os.Args[1], 0) | |
if err != nil { | |
panic(err) | |
} | |
colsArg, err = strconv.ParseFloat(os.Args[2], 0) | |
if err != nil { | |
panic(err) | |
} | |
} | |
func main() { | |
rand.Seed(time.Now().UnixNano()) | |
rows := int(rowsArg) | |
cols := int(colsArg) + 1 | |
lens := [4]int{5, 6, 7, 8} // possible lengths of a cell | |
row := make([]string, cols) // row with cols-number of cells | |
fOut, _ := os.Create(fmt.Sprintf("gen_%dx%d.csv", rows, cols)) | |
defer fOut.Close() | |
w := csv.NewWriter(fOut) | |
// Create and write header | |
row[0] = "ID" | |
for i := 1; i < cols; i++ { | |
row[i] = fmt.Sprintf("Col%d", i) | |
} | |
w.Write(row) | |
var len int | |
var char string | |
for i := 0; i < rows; i++ { | |
row[0] = fmt.Sprintf("%d", i) | |
for j := 1; j < cols; j++ { | |
char = string(rune('a' + rand.Intn(26))) // pick a lower-case letter at random | |
len = lens[rand.Intn(4)] // pick length from possible `lens` | |
row[j] = strings.Repeat(char, len) | |
} | |
w.Write(row) | |
} | |
w.Flush() | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/csv" | |
"fmt" | |
"math/rand" | |
"os" | |
"strconv" | |
"strings" | |
) | |
const ( | |
sUsage = "usage: gen [-h] COL_TYPES NROWS" | |
sHelp = `generate a CSV with NROWS number of rows following the layout of COL_TYPES, a | |
comma-separated list of types (e.g., 'float64,int,bool,string'). Each field of | |
a row will be filled with random values of that column's type.` | |
) | |
type Type int | |
const ( | |
tF64 Type = iota | |
tInt | |
tBool | |
tString | |
) | |
func usage() { | |
fmt.Fprintln(os.Stderr, sUsage) | |
os.Exit(1) | |
} | |
func help() { | |
fmt.Fprintln(os.Stderr, sUsage) | |
fmt.Fprintln(os.Stderr) | |
fmt.Fprintln(os.Stderr, sHelp) | |
fmt.Fprintln(os.Stderr) | |
os.Exit(1) | |
} | |
func main() { | |
args := os.Args | |
if len(args) < 2 { | |
usage() | |
} | |
if args[1] == "-h" { | |
help() | |
} | |
if len(args) < 3 { | |
usage() | |
} | |
types, err := readHeader(args[1]) | |
if err != nil { | |
fmt.Fprintf(os.Stderr, "error: could not read header %q: %v\n", args[1], err) | |
usage() | |
} | |
nrows, err := strconv.Atoi(args[2]) | |
if err != nil { | |
fmt.Fprintf(os.Stderr, "error: could not read number of rows %q: %v\n", args[2], err) | |
usage() | |
} | |
w := csv.NewWriter(os.Stdout) | |
// header | |
row := make([]string, len(types)) | |
for i := 0; i < len(types); i++ { | |
row[i] = fmt.Sprintf("Col%d", i) | |
} | |
w.Write(row) | |
for i := 0; i < nrows; i++ { | |
for j, t := range types { | |
row[j] = genValue(t) | |
} | |
w.Write(row) | |
} | |
w.Flush() | |
} | |
// readHeader reads the header string and parses the Type of each column. | |
func readHeader(s string) ([]Type, error) { | |
types := make([]Type, 0) | |
r := csv.NewReader(strings.NewReader(s)) | |
colTypes, err := r.Read() | |
if err != nil { | |
return types, err | |
} | |
for _, x := range colTypes { | |
switch x { | |
case "float64": | |
types = append(types, tF64) | |
case "int": | |
types = append(types, tInt) | |
case "bool": | |
types = append(types, tBool) | |
case "string": | |
types = append(types, tString) | |
default: | |
return types, fmt.Errorf("%v does not match valid types: float64, int, bool, string", x) | |
} | |
} | |
return types, nil | |
} | |
func genValue(t Type) string { | |
var s string | |
switch t { | |
case tF64: | |
s = strconv.FormatFloat(rand.Float64()*100, 'g', 6, 64)[:5] | |
case tInt: | |
s = strconv.Itoa(rand.Int())[:5] | |
case tBool: | |
s = "false" | |
if rand.Intn(2) == 1 { | |
s = "true" | |
} | |
case tString: | |
for i := 0; i < 5; i++ { | |
s += string(byte(rand.Intn(26) + 65)) | |
} | |
} | |
return s | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/csv" | |
"flag" | |
"fmt" | |
"math" | |
"os" | |
"strconv" | |
) | |
const desc = `Print a ROWSxCOLS CSV of uniformly named and padded fields, e.g., ./prog 10 5: | |
r01c1,r01c2,...,...,r01c5 | |
r02c1,r02c2,...,...,r02c5 | |
.....,.....,...,...,..... | |
r10c1,r10c2,...,...,r10c5 | |
` | |
var ( | |
// Args | |
rows int | |
cols int | |
// Whole-file padding values | |
padRows int | |
padCols int | |
) | |
func init() { | |
flag.Usage = func() { | |
usage := fmt.Sprintf("Usage of %s: ROWS COLS [Flags]\n", os.Args[0]) | |
fmt.Fprintf(flag.CommandLine.Output(), "%s\n%s\nFlags:\n\n", usage, desc) | |
flag.PrintDefaults() | |
os.Exit(1) | |
} | |
flag.Parse() | |
if len(flag.Args()) != 2 { | |
flag.Usage() | |
} | |
parseArg := func(arg, name string) int { | |
num, err := strconv.ParseFloat(arg, 32) | |
if err != nil || num != math.Trunc(num) { | |
fmt.Fprintf( | |
flag.CommandLine.Output(), | |
"expected %s to be a whole number; got %q\n\n", name, arg) | |
flag.Usage() | |
} | |
return int(num) | |
} | |
rows = parseArg(flag.Args()[0], "ROWS") | |
cols = parseArg(flag.Args()[1], "COLS") | |
// Get "width" of decimal number of rows and columns, for padding later | |
padRows = int(math.Ceil(math.Log10(float64(rows + 1)))) | |
padCols = int(math.Ceil(math.Log10(float64(cols + 1)))) | |
} | |
func main() { | |
row := make([]string, cols) | |
w := csv.NewWriter(os.Stdout) | |
for j := 0; j < cols; j++ { | |
row[j] = fmt.Sprintf("Col%0*d", padCols, j+1) | |
} | |
w.Write(row) | |
for i := 0; i < rows; i++ { | |
for j := 0; j < cols; j++ { | |
row[j] = fmt.Sprintf("r%0*dc%0*d", padRows, i+1, padCols, j+1) | |
} | |
w.Write(row) | |
} | |
w.Flush() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment