Last active
August 9, 2024 03:42
-
-
Save jonschlinkert/b5cc36ccb10a422c7dca0c0be4272ac0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export const fixJson = (ast, stack) => { | |
const fixValue = last => { | |
if (last.type === 'text' && /\d\.$/.test(last.value)) { | |
last.value += '0'; | |
} else if (last.type === 'text' && /tr?u?e?$/.test(last.value)) { | |
last.value = 'true'; | |
} else if (last.type === 'text' && /fa?l?s?e?$/.test(last.value)) { | |
last.value = 'false'; | |
} else if (last.type === 'text' && /nu?l?l?$/.test(last.value)) { | |
last.value = 'null'; | |
} | |
}; | |
while (stack.length > 1) { | |
const block = stack[stack.length - 1]; | |
const last = block.nodes[block.nodes.length - 1]; | |
let siblings; | |
let lastIndex; | |
switch (block.name) { | |
case 'string': | |
block.nodes.push({ type: 'string_close', value: '"' }); | |
break; | |
case 'property': | |
if (last.type === 'colon') { | |
block.nodes.push({ type: 'text', value: '""' }); | |
} else if (last.type === 'comma') { | |
block.nodes.pop(); | |
} | |
lastIndex = Math.max(block.nodes.findLastIndex(node => node.type === 'comma'), 0); | |
siblings = block.nodes.slice(lastIndex); | |
if (siblings.length > 1 && !siblings.some(node => node.type === 'colon')) { | |
block.nodes.push({ type: 'colon', value: ':' }); | |
block.nodes.push({ type: 'text', value: '""' }); | |
} else { | |
fixValue(last); | |
} | |
block.nodes.push({ type: 'property_close', value: '}' }); | |
break; | |
case 'array': | |
if (last.type === 'comma') { | |
block.nodes.pop(); | |
} else { | |
fixValue(last); | |
} | |
block.nodes.push({ type: 'array_close', value: ']' }); | |
break; | |
default: { | |
console.log(block.name); | |
break; | |
} | |
} | |
stack.pop(); | |
} | |
return ast; | |
}; | |
// eslint-disable-next-line complexity | |
export const parseJson = (input: string, options: unknown) => { | |
const source = input.trim(); | |
const root = { type: 'root', nodes: [] as any[] }; | |
const stack = [root]; | |
let block = root; | |
let prev; | |
let last; | |
const push = (node: any) => { | |
if (node.type === 'text' && prev?.type === 'text') { | |
prev.value += node.value || ''; | |
return; | |
} | |
if (node.type === 'block') { | |
block.nodes.push(node); | |
stack.push(node); | |
block = node; | |
} else { | |
block.nodes.push(node); | |
} | |
prev = node; | |
}; | |
for (let i = 0; i < source.length; i++) { | |
const value = source[i]; | |
if (value === '\\') { | |
push({ type: 'text', value: value + (source[++i] || '\\') }); | |
continue; | |
} | |
if (options?.whitespace !== false) { | |
if (value === '\r' || value === '\n' || value === ' ' || value === '\t') { | |
if (block.name !== 'string') { | |
continue; | |
} | |
} | |
} | |
if (value === ':' && block.name === 'property') { | |
push({ type: 'colon', value }); | |
continue; | |
} | |
if (value === ',' && block.name === 'array') { | |
push({ type: 'comma', value }); | |
continue; | |
} | |
if (value === ',') { | |
if (block.name === 'property') { | |
push({ type: 'comma', value }); | |
} else { | |
push({ type: 'text', value }); | |
} | |
continue; | |
} | |
if (value === '"') { | |
if (block.name === 'string') { | |
block = stack.pop(); | |
block.nodes.push({ type: 'string_close', value }); | |
block = stack[stack.length - 1]; | |
prev = block.nodes[block.nodes.length - 1]; | |
continue; | |
} | |
push({ type: 'block', name: 'string', nodes: [] }); | |
push({ type: 'string_open', value }); | |
continue; | |
} | |
if (value === '[') { | |
push({ type: 'block', name: 'array', nodes: [] }); | |
push({ type: 'array_open', value }); | |
continue; | |
} | |
if (value === ']' && block.name === 'array') { | |
stack.pop(); | |
block.nodes.push({ type: 'array_close', value }); | |
block = stack[stack.length - 1]; | |
prev = block.nodes[block.nodes.length - 1]; | |
continue; | |
} | |
if (value === '{') { | |
push({ type: 'block', name: 'property', nodes: [] }); | |
push({ type: 'property_open', value }); | |
continue; | |
} | |
if (value === '}' && block.name === 'property') { | |
last = block.nodes[block.nodes.length - 1]; | |
if (last?.type === 'text') { | |
last = block.nodes.pop(); | |
const string = { type: 'block', name: 'string', nodes: [] }; | |
string.nodes.push({ type: 'string_open', value: '"' }); | |
string.nodes.push(last); | |
string.nodes.push({ type: 'string_close', value: '"' }); | |
block.nodes.push(string); | |
last = block.nodes[block.nodes.length - 1]; | |
} | |
switch (block.nodes.length) { | |
case 2: | |
block.nodes.push({ type: 'colon', value: ':' }); | |
block.nodes.push({ type: 'text', value: '""' }); | |
break; | |
case 3: | |
block.nodes.push({ type: 'text', value: '""' }); | |
break; | |
default: { | |
break; | |
} | |
} | |
stack.pop(); | |
block.nodes.push({ type: 'property_close', value }); | |
block = stack[stack.length - 1]; | |
prev = block.nodes[block.nodes.length - 1]; | |
continue; | |
} | |
if (block.name === 'string') { | |
push({ type: 'text', value }); | |
continue; | |
} | |
push({ type: 'text', value }); | |
} | |
if (options?.fix === true) { | |
return fixJson(root.nodes[0], stack); | |
} | |
return root.nodes[0]; | |
}; | |
export const stringifyJson = (node: any) => { | |
if (!node?.type) return '{}'; | |
if (node.type === 'block') { | |
return node.nodes.map(n => stringifyJson(n)).join(''); | |
} | |
return node.value; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import assert from 'assert/strict'; | |
import { parseJson, stringifyJson } from '.'; | |
const pkg = JSON.stringify({ | |
name: '@fake-org-name/abc-xyz', | |
version: '0.0.1', | |
main: 'dist/index.js', | |
module: 'dist/index.mjs', | |
files: ['dist'], | |
bin: { | |
ai: 'dist/bin/cli.js' | |
}, | |
scripts: { | |
eslint: 'npx eslint --ext .ts .', | |
test: "ts-mocha -r esbuild-register 'test/**/*.ts'", | |
tsup: 'npx tsup' | |
}, | |
dependencies: { | |
'@anthropic-ai/sdk': '^0.24.3', | |
'@fake-org-name/abc-xyz-001': 'workspace:*', | |
'@paralleldrive/cuid2': '^2.2.2', | |
'@xenova/transformers': '^2.17.2', | |
'ajv': '^8.16.0', | |
'ajv-formats': '^3.0.1', | |
'ansi-colors': '^4.1.3', | |
'archy': '^1.0.0', | |
'boxen': '^7.1.1', | |
'clipboardy': '^4.0.0', | |
'diff': '^5.2.0', | |
'dotenv': '^16.4.5', | |
'emit-keypress': '^0.0.3', | |
'enquirer': '^2.4.1', | |
'glob': '^10.4.2', | |
'gpt-3-encoder': '^1.1.4', | |
'groq-sdk': '^0.5.0', | |
'is-glob': '^4.0.3', | |
'kind-of': '^6.0.3', | |
'log-update': '^6.0.0', | |
'mathjs': '^13.0.1', | |
'minimist': '^1.2.8', | |
'openai': '^4.52.1', | |
'ora': '^8.0.1', | |
'picomatch': '^4.0.2', | |
'python-ast': '^0.1.0', | |
'yaml': '^2.4.5' | |
}, | |
devDependencies: { | |
'@ai-sdk/openai': '^0.0.33', | |
'@types/node': '^20.14.9', | |
'ai': '^3.2.10', | |
'esbuild-register': '^3.5.0', | |
'eslint': '^8.57.0', | |
'gulp-format-md': '^2.0.0', | |
'ts-mocha': '^10.0.0', | |
'ts-node': '^10.9.2', | |
'tsconfig-paths': '^4.2.0', | |
'tsup': '^8.1.0', | |
'typescript': '^5.5.2' | |
} | |
}); | |
const tsconfig = JSON.stringify({ | |
'compilerOptions': { | |
baseUrl: '.', | |
whatever: null, | |
allowJs: true, | |
allowSyntheticDefaultImports: true, | |
checkJs: false, | |
esModuleInterop: true, | |
isolatedModules: true, | |
moduleResolution: 'NodeNext', | |
module: 'NodeNext', | |
noEmit: true, | |
forceConsistentCasingInFileNames: true, | |
lib: ['ES2022'], | |
resolveJsonModule: true, | |
strict: true, | |
target: 'ES2022', | |
types: ['node'], | |
paths: { '~/*': ['src/*'] } | |
}, | |
'ts-node': { | |
transpileOnly: true | |
}, | |
'include': ['**/*.ts'], | |
'exclude': ['node_modules', 'build', 'public', 'dist', 'tmp', 'temp', false, null, true, 0.1, 0x1, 0b1, 0o1, 1e1] | |
}); | |
const json = JSON.stringify({ | |
name: 'Jon', | |
age: 30.5, | |
foo: true, | |
bar: false, | |
address: { | |
street: '123 Main St', | |
city: 'Springfield', | |
state: 'IL' | |
}, | |
phone: '555-555-5555', | |
keywords: ['This is "Great!"', 'bar: "baz"', 'qux'] | |
}, null, 2); | |
const start = Date.now(); | |
let count = 0; | |
let length = 0; | |
describe.only('parseJson', () => { | |
after(() => { | |
console.log(`${Date.now() - start}ms`); | |
console.log(`Parsed ${count.toLocaleString()} JSON strings`); | |
console.log(`Total length: ${length.toLocaleString()} chars`); | |
}); | |
const unit = (input, i) => { | |
assert.doesNotThrow(() => { | |
const ast = parseJson(input, { fix: true }); | |
const parsed = JSON.parse(stringifyJson(ast)); | |
console.log(parsed); | |
length += input.length; | |
count++; | |
}, Error); | |
}; | |
for (const jsonString of [pkg, tsconfig, json]) { | |
for (let i = 0; i < jsonString.length; i++) { | |
it(`should parse JSON string at index ${i}`, () => [ | |
unit(jsonString.slice(0, i + 1), i) | |
]); | |
} | |
} | |
}); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment