Last active
July 20, 2023 16:40
-
-
Save Crisfole/d6a6c696aed057eab25225ddefd47f63 to your computer and use it in GitHub Desktop.
JSON Sample Directory to zod types
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!npx ts-node | |
import { readdir, readFile, writeFile } from "node:fs/promises"; | |
import { basename, join, resolve, relative } from "node:path"; | |
import { cwd } from "node:process"; | |
import { fileURLToPath } from "node:url"; | |
interface JsonFile { | |
path: string; | |
content: unknown; | |
} | |
async function loadJson(dirname: string): Promise<JsonFile[]> { | |
const jsonFilesInDir = await readdir(dirname, { | |
encoding: "utf-8", | |
withFileTypes: true, | |
}); | |
const loadPromises = jsonFilesInDir | |
.filter((dirent) => dirent.name.endsWith(".json")) | |
.map(async (dirent) => { | |
const fullPath = resolve(join(dirname, dirent.name)); | |
const content = await readFile(fullPath, { encoding: "utf-8" }); | |
return { path: fullPath, content: JSON.parse(content) }; | |
}); | |
return await Promise.all(loadPromises); | |
} | |
interface TypeDefRequest { | |
path: [...string[], string]; | |
pathKey: string; | |
export?: true; | |
extract(x: any): object[]; | |
} | |
type ValueType = | |
| "string" | |
| "number" | |
| "bigint" | |
| "boolean" | |
| "symbol" | |
| "undefined" | |
| "object" | |
| "function" | |
| "null" | |
| "array" | |
| "record"; | |
interface BuiltSchema { | |
path: string[]; | |
pathKey: string; | |
isNullable: boolean; | |
schema: Record<string, Set<ValueType>>; | |
shouldExport: true | undefined; | |
} | |
function buildSchema( | |
examples: unknown[], | |
request: TypeDefRequest, | |
paths: Set<string>, | |
unfulfilledRequests: TypeDefRequest[], | |
): BuiltSchema { | |
let isNullable = false; | |
// Each file provides no more than 1000 randomly selected examples | |
// If this frequently fails add more... | |
const items = examples.flatMap((e) => | |
request | |
.extract(e) | |
.filter((x) => { | |
if (x == null) { | |
isNullable = true; | |
} | |
return x != null; | |
}) | |
.map((value) => ({ value, rand: Math.random() })) | |
.sort((a, b) => a.rand - b.rand) | |
.map((x) => x.value) | |
.slice(0, 1000), | |
); | |
const itemKeys = [...new Set(items.flatMap(Object.keys))]; | |
console.log(`Building ${request.pathKey} Schema using ${items.length} examples. Found ${itemKeys.length} keys`); | |
const schema = Object.fromEntries( | |
itemKeys.map((key) => [ | |
key, | |
new Set( | |
items.map((item: any) => { | |
const val = item[key]; | |
const type = typeof val; | |
if (type === "object") { | |
if (val === null) { | |
return "null"; | |
} else if (Array.isArray(val)) { | |
const path: [...string[], string] = [...request.path, "*", key]; | |
const pathKey = path.join("."); | |
if (!paths.has(pathKey)) { | |
paths.add(pathKey); | |
unfulfilledRequests.push({ | |
path, | |
pathKey, | |
extract: (_useSampledItems) => items.flatMap((self: any) => self[key] ?? []), | |
}); | |
} | |
return "array"; | |
} else if (isRecord(val)) { | |
const path: [...string[], string] = [...request.path, "*", key]; | |
const pathKey = path.join("."); | |
if (!["UnitAvailability.response.result.ILS_Units.*.Unit"].includes(pathKey)) { | |
console.log("\n\nGENERATING RECORD:\n\n" + pathKey + "\n\n"); | |
console.log(val); | |
process.exit(1); | |
} | |
if (!paths.has(pathKey)) { | |
paths.add(pathKey); | |
unfulfilledRequests.push({ | |
path, | |
pathKey, | |
extract: (_useSampledItems) => items.flatMap((self: any) => Object.values(self[key] ?? {})), | |
}); | |
} | |
return "record"; | |
} | |
const path: [...string[], string] = [...request.path, key]; | |
const pathKey = path.join("."); | |
if (!paths.has(pathKey)) { | |
paths.add(pathKey); | |
unfulfilledRequests.push({ | |
path, | |
pathKey, | |
extract: (_useSampledItems) => items.map((self: any) => self[key]), | |
}); | |
} | |
} else if (type === "number") { | |
if (!isFinite(val)) { | |
return "bigint"; | |
} | |
} | |
return type; | |
}), | |
), | |
]), | |
); | |
return { | |
path: [...request.path], | |
pathKey: request.pathKey, | |
isNullable, | |
schema, | |
shouldExport: request.export, | |
}; | |
} | |
function isRecord<T extends object>(obj: T): boolean { | |
const keys = Object.keys(obj); | |
// If there are fewer than 5 keys go with an object literal | |
if (keys.length < 5) return false; | |
// If there are more than 75 keys this should be treated as a record type | |
if (keys.length > 75) return true; | |
// If the non nullish values are of differing types it is an object literal. | |
const foundTypes = new Set( | |
Object.values(obj) | |
.filter((x) => x != null) | |
.map((v) => typeof v), | |
); | |
if (foundTypes.size > 1) { | |
return false; | |
} | |
const foundType = foundTypes.values().next().value; | |
// For strings, numbers, or booleans, use the object literal. This will fail for | |
// Many apis, but is an easy fix: `z.record(z.string(), z.string())` or `z.record(z.string(), z.boolean())` | |
if (foundType !== 'object') { | |
return false; | |
} | |
const keySet = new Set<string>(); | |
for (const val of Object.values(obj)) { | |
const valueKeys = Object.keys(val).sort().join(","); | |
if (keySet.size === 0 || !keySet.has(valueKeys)) { | |
keySet.add(valueKeys); | |
} | |
// If we find 7 or more combinations of keys that differ they're | |
// most likely distinct types, use an object literal...This will probably need tweaking | |
if (keySet.size > 6) { | |
return false; | |
} | |
} | |
// If the keys generally match well assume they're records. | |
return true; | |
} | |
function buildSchemas(jsons: unknown[], name: string): NamedSchema[] { | |
const paths = new Set(name); | |
const unfulfilled: TypeDefRequest[] = [ | |
{ | |
path: [name], | |
pathKey: name, | |
export: true, | |
extract: (x: any) => [x], | |
}, | |
]; | |
const schemata: BuiltSchema[] = []; | |
while (unfulfilled.length > 0) { | |
schemata.push(buildSchema(jsons, unfulfilled.shift()!, paths, unfulfilled)); | |
} | |
return nameSchemas(schemata); | |
} | |
interface NamedSchema extends BuiltSchema { | |
name: string; | |
pathKey: string; | |
} | |
function schemaNameFromKey(input: string): string { | |
return input.replace(/\W/g, "").replace(/^./, (x) => x.toUpperCase()); | |
} | |
function buildSchemaName(built: BuiltSchema, usedNames: Set<string>): string { | |
const reversedPath = [...built.path].reverse(); | |
let name = ""; | |
for (const segment of reversedPath) { | |
const pascaledSegment = schemaNameFromKey(segment); | |
name = `${pascaledSegment}${name}`; | |
if (!usedNames.has(name)) { | |
usedNames.add(name); | |
return name; | |
} | |
} | |
throw new Error("Could not come up with a unique schema name for path: " + built.path.join(".")); | |
} | |
function nameSchemas(schemas: BuiltSchema[]): NamedSchema[] { | |
const usedNames = new Set<string>(); | |
return schemas.map((s) => Object.assign(s, { name: buildSchemaName(s, usedNames) })); | |
} | |
type ZodDef = { repr: string; real: boolean }; | |
function valueTypeToZodDef( | |
value: ValueType, | |
key: string, | |
schema: NamedSchema, | |
pathKeyToName: Record<string, string>, | |
): ZodDef { | |
switch (value) { | |
case "undefined": | |
return { repr: "z.undefined()", real: false }; | |
case "null": | |
return { repr: "z.null()", real: false }; | |
case "string": | |
return { repr: "z.string()", real: true }; | |
case "number": | |
return { repr: "z.number()", real: true }; | |
case "bigint": | |
return { repr: "z.bigint()", real: true }; | |
case "boolean": | |
return { repr: "z.boolean()", real: true }; | |
case "object": | |
return { repr: `${pathKeyToName[[...schema.path, key].join(".")]}Schema`, real: true }; | |
case "record": | |
return { repr: `z.record(z.string(), ${pathKeyToName[[...schema.path, "*", key].join(".")]}Schema)`, real: true }; | |
case "array": | |
return { repr: `z.array(${pathKeyToName[[...schema.path, "*", key].join(".")]}Schema)`, real: true }; | |
case "symbol": | |
throw new Error("Received content that is impossible in JSON"); | |
case "function": | |
throw new Error("Received content that is impossible in JSON"); | |
} | |
} | |
function zodType( | |
valueTypes: Set<ValueType>, | |
key: string, | |
schema: NamedSchema, | |
pathKeyToName: Record<string, string>, | |
): string { | |
const isNullable = valueTypes.has("null"); | |
const isOptional = valueTypes.has("undefined"); | |
const allTypes = [...valueTypes].map((vt) => valueTypeToZodDef(vt, key, schema, pathKeyToName)); | |
const realTypes = allTypes.filter((adt) => adt.real); | |
if (realTypes.length == 0) { | |
if (isNullable && isOptional) { | |
return "z.null().optional(), // TODO: Find a query that gives data for this"; | |
} | |
return `${allTypes[0].repr}, // TODO: Find a query that gives data for this`; | |
} else if (realTypes.length == 1) { | |
return `${realTypes[0].repr},`; | |
} else { | |
return `z.union([${realTypes.map((r) => r.repr).join(", ")}]),`; | |
} | |
} | |
function zodSchemaDef(schema: NamedSchema, pathKeyToName: Record<string, string>): string { | |
return `z.strictObject({${Object.entries(schema.schema) | |
.map(([key, val]) => `\n "${key.replace('"', '\\"')}": ${zodType(val, key, schema, pathKeyToName)}`) | |
.join("")}\n})`; | |
} | |
function typescriptZodSchemaDef(schema: NamedSchema, pathKeyToName: Record<string, string>) { | |
const schemaDef = `const ${schema.name}Schema = ${zodSchemaDef(schema, pathKeyToName)}`; | |
if (!schema.shouldExport) { | |
return schemaDef; | |
} | |
return `export ${schemaDef};\n\nexport type ${schema.name} = z.infer<typeof ${schema.name}Schema>;`; | |
} | |
function generateFileContent(schemas: NamedSchema[]): string { | |
const pathKeyToName = Object.fromEntries(schemas.map((s) => [s.pathKey, s.name])); | |
return schemas | |
.map((x) => typescriptZodSchemaDef(x, pathKeyToName)) | |
.concat('import { z } from "zod";') | |
.reverse() | |
.join("\n\n"); | |
} | |
async function main() { | |
if (process.argv.length < 3) { | |
console.error("Usage: ./makeZod.ts sample_dir [...other_sample_dirs]"); | |
process.exit(1); | |
} | |
for (const dirname of process.argv.slice(2)) { | |
const schemaName = basename(dirname); | |
const outfile = join(dirname, "..", `${schemaName}.ts`); | |
console.log(`Generating zod schema for json files found in ${dirname}. Saving to ${outfile}...\n`); | |
const jsons = await loadJson(dirname); | |
const schemas = buildSchemas( | |
jsons.map((j) => j.content), | |
schemaName, | |
); | |
const outfileContent = generateFileContent(schemas); | |
await writeFile(outfile, outfileContent, "utf-8"); | |
console.log(`${dirname} complete`); | |
} | |
} | |
if (process.argv?.[1] === fileURLToPath(import.meta.url)) { | |
await main(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment