Last active
March 28, 2024 21:40
-
-
Save annibal/91d5939fcdd26b64caf0a50e2318788f to your computer and use it in GitHub Desktop.
Function to generate controlled random CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function generateCsv(params) { | |
const { colsDef, rowsDef, withHeaders, asString } = params || {}; | |
const formatNumber = (val, intDigits = 1, decDigits = 0) => { | |
if (isNaN(val)) return ""; | |
return (+val).toLocaleString(undefined, { | |
minimumIntegerDigits: intDigits, | |
minimumFractionDigits: decDigits, | |
}); | |
}; | |
const isNumberValid = (val) => val != null && !isNaN(val); | |
const isRangeValid = (range) => { | |
if (range == null) return false; | |
if (!isNumberValid(range[0]) || !isNumberValid(range[1])) return false; | |
return true; | |
}; | |
const randomRange = (min, max, decimals = 0) => | |
Math.round((Math.random() * (max - min) + min) * 10 ** decimals) / | |
10 ** decimals; | |
const arrColsDef = colsDef || []; | |
const colLenDigits = Math.ceil( | |
arrColsDef.length === 0 ? 1 : Math.log10(arrColsDef.length) | |
); | |
const colGenerators = arrColsDef.map((columnDefinition, idx) => { | |
const colTitle = | |
columnDefinition.title || `column-${formatNumber(idx, colLenDigits)}`; | |
let colDataFn = (idx) => randomRange(0, 100); | |
let nullChance = 0.05; | |
if (isNumberValid(columnDefinition.nullChance)) { | |
nullChance = +columnDefinition.nullChance; | |
} | |
if (typeof columnDefinition.fn === "function") { | |
colDataFn = columnDefinition.fn; | |
} else { | |
if (columnDefinition.values?.length > 0) { | |
const len = columnDefinition.values.length; | |
if (columnDefinition.continous) { | |
colDataFn = (idx) => columnDefinition.values[len % idx]; | |
} else { | |
colDataFn = (idx) => columnDefinition.values[randomRange(0, len - 1)]; | |
} | |
} else { | |
if (columnDefinition.type === "number") { | |
// default | |
if (isRangeValid(columnDefinition.range)) { | |
const min = +columnDefinition.range[0]; | |
const max = +columnDefinition.range[1]; | |
const decimals = isNumberValid(columnDefinition.decimals) | |
? columnDefinition.decimals | |
: 0; | |
colDataFn = (idx) => randomRange(min, max, decimals); | |
} | |
} | |
if (columnDefinition.type === "date") { | |
let formatDate = (dt) => | |
dt.toJSON().split("T").join(" ").slice(0, 19); | |
if (typeof columnDefinition.formatDate === "function") { | |
formatDate = columnDefinition.formatDate; | |
} | |
if (columnDefinition.continous) { | |
let interval = 1 * 24 * 60 * 60 * 1000; | |
if (isNumberValid(columnDefinition.interval)) { | |
interval = columnDefinition.interval; | |
} | |
let range = [-2 * 60 * 60 * 1000, 2 * 60 * 60 * 1000]; | |
if (isRangeValid(columnDefinition.range)) { | |
range = [+columnDefinition.range[0], +columnDefinition.range[1]]; | |
} | |
let dtFrom = +new Date( | |
`${new Date().toJSON().slice(0, 7)}-01 12:00:00` | |
); | |
if (isNumberValid(+new Date(columnDefinition.dateStart))) { | |
dtFrom = +new Date(columnDefinition.dateStart); | |
} | |
colDataFn = (idx) => { | |
const delta = idx * interval + randomRange(+range[0], +range[1]); | |
const dt = new Date(delta + dtFrom); | |
return formatDate(dt); | |
}; | |
} else { | |
let range = [ | |
+new Date(`${new Date().toJSON().slice(0, 10)} 00:00:00`), | |
+new Date(`${new Date().toJSON().slice(0, 10)} 23:59:59`), | |
]; | |
if ( | |
isRangeValid( | |
(columnDefinition.range || []).map((x) => new Date(x)) | |
) | |
) { | |
range = [ | |
+new Date(columnDefinition.range[0]), | |
+new Date(columnDefinition.range[1]), | |
]; | |
} | |
colDataFn = (idx) => { | |
const dt = new Date(randomRange(+range[0], +range[1])); | |
return formatDate(dt); | |
}; | |
} | |
} | |
if (columnDefinition.type === "string") { | |
let rangeWords = [1, 10]; | |
if (isRangeValid(columnDefinition.rangeWords)) { | |
rangeWords = columnDefinition.rangeWords; | |
} | |
let rangeCharacters = [3, 20]; | |
if (isRangeValid(columnDefinition.rangeCharacters)) { | |
rangeCharacters = columnDefinition.rangeCharacters; | |
} | |
const randChar = () => randomRange(10, 36).toString(36); | |
const randWord = () => | |
Array(randomRange(rangeCharacters[0], rangeCharacters[1])) | |
.fill(null) | |
.map(() => randChar()) | |
.join(""); | |
const randSentence = () => | |
Array(randomRange(rangeWords[0], rangeWords[1])) | |
.fill(null) | |
.map(() => randWord()) | |
.join(" "); | |
colDataFn = randSentence; | |
} | |
} | |
} | |
const columnDataGenFunction = (idx) => { | |
if (Math.random() < nullChance) return null; | |
return colDataFn(idx); | |
}; | |
return { | |
title: colTitle, | |
gen: columnDataGenFunction, | |
}; | |
}); | |
let rowAmount = 5; | |
if (isNumberValid(rowsDef?.rowAmount)) { | |
rowAmount = +rowsDef.rowAmount; | |
} else { | |
if (isRangeValid(rowsDef?.range)) { | |
rowAmount = randomRange(+rowsDef.range[0], +rowsDef.range[1]); | |
} | |
} | |
const rowTitle = colGenerators.map((c) => c.title); | |
const rows = Array(rowAmount) | |
.fill(null) | |
.map((_, idx) => colGenerators.map((col) => col.gen(idx))); | |
const resultRows = withHeaders ? [rowTitle, ...rows] : rows; | |
if (asString) { | |
return resultRows.map((x) => x.join(",")).join("\n"); | |
} | |
return resultRows; | |
} | |
(() => { | |
const csv = generateCsv({ | |
rowsDef: { range: [20, 40] }, | |
withHeaders: true, | |
colsDef: [ | |
{ title: "ID", fn: (i) => i }, | |
{ title: "truple", values: ["one", "two", "third"], continuous: true }, | |
{ title: "category", values: ["A", "B", "C", "D"] }, | |
{ title: "description", type: "string" }, | |
{ | |
title: "name", | |
type: "string", | |
rangeWords: [1, 1], | |
rangeCharacters: [8, 16], | |
}, | |
{ title: "exportedAt", type: "date" }, | |
{ title: "createdAt", type: "date", range: ["2023-06-01", "2023-06-30"] }, | |
{ | |
title: "dateBucket", | |
type: "date", | |
range: [0, 0], | |
continous: true, | |
dateStart: "2023-08-01T00:00:00Z", | |
interval: 8 * 60 * 60 * 1000, | |
}, | |
{ | |
title: "sale_time", | |
type: "date", | |
range: [-30 * 60 * 1000, 30 * 60 * 1000], | |
continous: true, | |
dateStart: "2023-08-05T00:00:00Z", | |
interval: 60 * 60 * 1000, | |
formatDate: (dt) => dt.toJSON().replace("T", " ").slice(0, 16) + ":00", | |
}, | |
{ title: "sale_value", type: "number", range: [50, 300], decimals: 2 }, | |
{ title: "progress" }, | |
{ type: "number" }, | |
], | |
}); | |
const headers = csv[0]; | |
const rows = csv | |
.slice(1) | |
.map((row) => headers.reduce((a, c, i) => ({ ...a, [c]: row[i] }), {})); | |
console.table(rows, headers); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment