Skip to content

Instantly share code, notes, and snippets.

@Crisfole
Last active July 20, 2023 16:40
Show Gist options
  • Save Crisfole/d6a6c696aed057eab25225ddefd47f63 to your computer and use it in GitHub Desktop.
Save Crisfole/d6a6c696aed057eab25225ddefd47f63 to your computer and use it in GitHub Desktop.
JSON Sample Directory to zod types
#!npx ts-node
import { readdir, readFile, writeFile } from "node:fs/promises";
import { basename, join, resolve, relative } from "node:path";
import { cwd } from "node:process";
import { fileURLToPath } from "node:url";
interface JsonFile {
path: string;
content: unknown;
}
async function loadJson(dirname: string): Promise<JsonFile[]> {
const jsonFilesInDir = await readdir(dirname, {
encoding: "utf-8",
withFileTypes: true,
});
const loadPromises = jsonFilesInDir
.filter((dirent) => dirent.name.endsWith(".json"))
.map(async (dirent) => {
const fullPath = resolve(join(dirname, dirent.name));
const content = await readFile(fullPath, { encoding: "utf-8" });
return { path: fullPath, content: JSON.parse(content) };
});
return await Promise.all(loadPromises);
}
interface TypeDefRequest {
path: [...string[], string];
pathKey: string;
export?: true;
extract(x: any): object[];
}
type ValueType =
| "string"
| "number"
| "bigint"
| "boolean"
| "symbol"
| "undefined"
| "object"
| "function"
| "null"
| "array"
| "record";
interface BuiltSchema {
path: string[];
pathKey: string;
isNullable: boolean;
schema: Record<string, Set<ValueType>>;
shouldExport: true | undefined;
}
function buildSchema(
examples: unknown[],
request: TypeDefRequest,
paths: Set<string>,
unfulfilledRequests: TypeDefRequest[],
): BuiltSchema {
let isNullable = false;
// Each file provides no more than 1000 randomly selected examples
// If this frequently fails add more...
const items = examples.flatMap((e) =>
request
.extract(e)
.filter((x) => {
if (x == null) {
isNullable = true;
}
return x != null;
})
.map((value) => ({ value, rand: Math.random() }))
.sort((a, b) => a.rand - b.rand)
.map((x) => x.value)
.slice(0, 1000),
);
const itemKeys = [...new Set(items.flatMap(Object.keys))];
console.log(`Building ${request.pathKey} Schema using ${items.length} examples. Found ${itemKeys.length} keys`);
const schema = Object.fromEntries(
itemKeys.map((key) => [
key,
new Set(
items.map((item: any) => {
const val = item[key];
const type = typeof val;
if (type === "object") {
if (val === null) {
return "null";
} else if (Array.isArray(val)) {
const path: [...string[], string] = [...request.path, "*", key];
const pathKey = path.join(".");
if (!paths.has(pathKey)) {
paths.add(pathKey);
unfulfilledRequests.push({
path,
pathKey,
extract: (_useSampledItems) => items.flatMap((self: any) => self[key] ?? []),
});
}
return "array";
} else if (isRecord(val)) {
const path: [...string[], string] = [...request.path, "*", key];
const pathKey = path.join(".");
if (!["UnitAvailability.response.result.ILS_Units.*.Unit"].includes(pathKey)) {
console.log("\n\nGENERATING RECORD:\n\n" + pathKey + "\n\n");
console.log(val);
process.exit(1);
}
if (!paths.has(pathKey)) {
paths.add(pathKey);
unfulfilledRequests.push({
path,
pathKey,
extract: (_useSampledItems) => items.flatMap((self: any) => Object.values(self[key] ?? {})),
});
}
return "record";
}
const path: [...string[], string] = [...request.path, key];
const pathKey = path.join(".");
if (!paths.has(pathKey)) {
paths.add(pathKey);
unfulfilledRequests.push({
path,
pathKey,
extract: (_useSampledItems) => items.map((self: any) => self[key]),
});
}
} else if (type === "number") {
if (!isFinite(val)) {
return "bigint";
}
}
return type;
}),
),
]),
);
return {
path: [...request.path],
pathKey: request.pathKey,
isNullable,
schema,
shouldExport: request.export,
};
}
function isRecord<T extends object>(obj: T): boolean {
const keys = Object.keys(obj);
// If there are fewer than 5 keys go with an object literal
if (keys.length < 5) return false;
// If there are more than 75 keys this should be treated as a record type
if (keys.length > 75) return true;
// If the non nullish values are of differing types it is an object literal.
const foundTypes = new Set(
Object.values(obj)
.filter((x) => x != null)
.map((v) => typeof v),
);
if (foundTypes.size > 1) {
return false;
}
const foundType = foundTypes.values().next().value;
// For strings, numbers, or booleans, use the object literal. This will fail for
// Many apis, but is an easy fix: `z.record(z.string(), z.string())` or `z.record(z.string(), z.boolean())`
if (foundType !== 'object') {
return false;
}
const keySet = new Set<string>();
for (const val of Object.values(obj)) {
const valueKeys = Object.keys(val).sort().join(",");
if (keySet.size === 0 || !keySet.has(valueKeys)) {
keySet.add(valueKeys);
}
// If we find 7 or more combinations of keys that differ they're
// most likely distinct types, use an object literal...This will probably need tweaking
if (keySet.size > 6) {
return false;
}
}
// If the keys generally match well assume they're records.
return true;
}
function buildSchemas(jsons: unknown[], name: string): NamedSchema[] {
const paths = new Set(name);
const unfulfilled: TypeDefRequest[] = [
{
path: [name],
pathKey: name,
export: true,
extract: (x: any) => [x],
},
];
const schemata: BuiltSchema[] = [];
while (unfulfilled.length > 0) {
schemata.push(buildSchema(jsons, unfulfilled.shift()!, paths, unfulfilled));
}
return nameSchemas(schemata);
}
interface NamedSchema extends BuiltSchema {
name: string;
pathKey: string;
}
function schemaNameFromKey(input: string): string {
return input.replace(/\W/g, "").replace(/^./, (x) => x.toUpperCase());
}
function buildSchemaName(built: BuiltSchema, usedNames: Set<string>): string {
const reversedPath = [...built.path].reverse();
let name = "";
for (const segment of reversedPath) {
const pascaledSegment = schemaNameFromKey(segment);
name = `${pascaledSegment}${name}`;
if (!usedNames.has(name)) {
usedNames.add(name);
return name;
}
}
throw new Error("Could not come up with a unique schema name for path: " + built.path.join("."));
}
function nameSchemas(schemas: BuiltSchema[]): NamedSchema[] {
const usedNames = new Set<string>();
return schemas.map((s) => Object.assign(s, { name: buildSchemaName(s, usedNames) }));
}
type ZodDef = { repr: string; real: boolean };
function valueTypeToZodDef(
value: ValueType,
key: string,
schema: NamedSchema,
pathKeyToName: Record<string, string>,
): ZodDef {
switch (value) {
case "undefined":
return { repr: "z.undefined()", real: false };
case "null":
return { repr: "z.null()", real: false };
case "string":
return { repr: "z.string()", real: true };
case "number":
return { repr: "z.number()", real: true };
case "bigint":
return { repr: "z.bigint()", real: true };
case "boolean":
return { repr: "z.boolean()", real: true };
case "object":
return { repr: `${pathKeyToName[[...schema.path, key].join(".")]}Schema`, real: true };
case "record":
return { repr: `z.record(z.string(), ${pathKeyToName[[...schema.path, "*", key].join(".")]}Schema)`, real: true };
case "array":
return { repr: `z.array(${pathKeyToName[[...schema.path, "*", key].join(".")]}Schema)`, real: true };
case "symbol":
throw new Error("Received content that is impossible in JSON");
case "function":
throw new Error("Received content that is impossible in JSON");
}
}
function zodType(
valueTypes: Set<ValueType>,
key: string,
schema: NamedSchema,
pathKeyToName: Record<string, string>,
): string {
const isNullable = valueTypes.has("null");
const isOptional = valueTypes.has("undefined");
const allTypes = [...valueTypes].map((vt) => valueTypeToZodDef(vt, key, schema, pathKeyToName));
const realTypes = allTypes.filter((adt) => adt.real);
if (realTypes.length == 0) {
if (isNullable && isOptional) {
return "z.null().optional(), // TODO: Find a query that gives data for this";
}
return `${allTypes[0].repr}, // TODO: Find a query that gives data for this`;
} else if (realTypes.length == 1) {
return `${realTypes[0].repr},`;
} else {
return `z.union([${realTypes.map((r) => r.repr).join(", ")}]),`;
}
}
function zodSchemaDef(schema: NamedSchema, pathKeyToName: Record<string, string>): string {
return `z.strictObject({${Object.entries(schema.schema)
.map(([key, val]) => `\n "${key.replace('"', '\\"')}": ${zodType(val, key, schema, pathKeyToName)}`)
.join("")}\n})`;
}
function typescriptZodSchemaDef(schema: NamedSchema, pathKeyToName: Record<string, string>) {
const schemaDef = `const ${schema.name}Schema = ${zodSchemaDef(schema, pathKeyToName)}`;
if (!schema.shouldExport) {
return schemaDef;
}
return `export ${schemaDef};\n\nexport type ${schema.name} = z.infer<typeof ${schema.name}Schema>;`;
}
function generateFileContent(schemas: NamedSchema[]): string {
const pathKeyToName = Object.fromEntries(schemas.map((s) => [s.pathKey, s.name]));
return schemas
.map((x) => typescriptZodSchemaDef(x, pathKeyToName))
.concat('import { z } from "zod";')
.reverse()
.join("\n\n");
}
async function main() {
if (process.argv.length < 3) {
console.error("Usage: ./makeZod.ts sample_dir [...other_sample_dirs]");
process.exit(1);
}
for (const dirname of process.argv.slice(2)) {
const schemaName = basename(dirname);
const outfile = join(dirname, "..", `${schemaName}.ts`);
console.log(`Generating zod schema for json files found in ${dirname}. Saving to ${outfile}...\n`);
const jsons = await loadJson(dirname);
const schemas = buildSchemas(
jsons.map((j) => j.content),
schemaName,
);
const outfileContent = generateFileContent(schemas);
await writeFile(outfile, outfileContent, "utf-8");
console.log(`${dirname} complete`);
}
}
if (process.argv?.[1] === fileURLToPath(import.meta.url)) {
await main();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment