Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save archisgore/d1dda9965db63342546501689fa3f309 to your computer and use it in GitHub Desktop.
Save archisgore/d1dda9965db63342546501689fa3f309 to your computer and use it in GitHub Desktop.
Convert json-schema into AWS Glue struct schema (for automating JSON parsing in Glue using AWS CDK)
import request from 'sync-request';
import * as glue from '@aws-cdk/aws-glue';
export function polytectJsonSchemaToGlue(): glue.Type {
console.log("Getting Polytect reference schema...")
const res = request('GET', 'https://raw.githubusercontent.com/polyverse/polytect/master/reference/schema.json');
const body = res.getBody();
const schema = JSON.parse(body.toString());
return recursiveGlueColumns(schema, schema.definitions);
}
function recursiveGlueColumns(schema: any, definitions: any): glue.Type {
if (typeof(schema.type) !== 'undefined') {
if (schema.type == 'object') {
let columns: glue.Column[] = [];
for (let propertyName in schema.properties) {
const property = schema.properties[propertyName];
columns.push({
name: propertyName,
//comment: property.description,
type: recursiveGlueColumns(property, definitions),
});
}
return glue.Schema.struct(columns);
} else if (schema.type == 'string') {
return glue.Schema.STRING;
} else if (schema.type == 'boolean') {
return glue.Schema.BOOLEAN;
} else if (schema.type == 'integer') {
return glue.Schema.INTEGER;
} else if (Array.isArray(schema.type) && schema.type[0] == 'string' && schema.type[1] == 'null') {
return glue.Schema.STRING;
} else if (Array.isArray(schema.type) && schema.type[0] == 'integer' && schema.type[1] == 'null') {
return glue.Schema.INTEGER;
} else {
console.log("Reached the typed end!! ", JSON.stringify(schema));
}
} else if (typeof(schema["$ref"]) !== 'undefined') {
const ref = schema["$ref"];
const refName = ref.split("/")[2];
const refSchema = definitions[refName];
return recursiveGlueColumns(refSchema, definitions);
} else if (typeof(schema.allOf) !== 'undefined') {
// get the schema elsewhere...
if (schema.allOf.length > 1 && schema.allOf[1].type != "null") {
console.log("Unable to handle multiple schema allOfs: ", schema);
throw "Error!!!";
}
if (schema.allOf.length < 1) {
console.log("Unable to handle zero schema allOfs: ", schema);
throw "Error!!!";
}
return recursiveGlueColumns(schema.allOf[0], definitions);
} else if (typeof(schema.anyOf) !== 'undefined') {
if (schema.anyOf.length < 1) {
console.log("Unable to handle zero schema anyOfs: ", schema);
throw "Error!!!";
}
if (typeof(schema.anyOf[0]["$ref"]) !== 'undefined') {
return recursiveGlueColumns(schema.anyOf[0], definitions);
}
// get the schema elsewhere...
let columns: glue.Column[] = [];
schema.anyOf.forEach((struct: any) => {
if (typeof(struct.required) !== 'undefined') {
const propertyName = struct.required[0];
columns.push({
name: propertyName,
//comment: struct.description,
type: recursiveGlueColumns(struct.properties[propertyName], definitions),
});
} else if (typeof(struct.enum) !== 'undefined') {
columns.push({
name: struct.enum[0],
//comment: struct.description,
type: glue.Schema.STRING,
});
} else if (typeof(struct.type) !== 'undefined' && struct.type == "null") {
// do nothing
} else {
console.log("Reached the anyOf end!! ", JSON.stringify(schema));
throw "Error!";
}
});
return glue.Schema.struct(columns);
} else if (typeof(schema.enum) !== 'undefined') {
return glue.Schema.STRING;
} else {
console.log("Reached the end VERY !! ", JSON.stringify(schema));
throw "Error!";
}
return glue.Schema.STRING;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment