|
#!/usr/bin/env node |
|
const fs = require('fs') |
|
const path = require('path') |
|
const frontMatter = require('front-matter') // Assuming `front-matter` library is installed |
|
|
|
/** |
|
* Recursively reads all markdown files (.md or .mdx) in a directory. |
|
* @param {string} dir - Directory to search. |
|
* @param {string[]} [fileList=[]] - List of file paths (recursive accumulation). |
|
* @returns {string[]} - List of file paths for markdown files. |
|
* @example |
|
* const files = readMarkdownFiles('./content') |
|
* console.log(files) // ['content/file1.md', 'content/folder/file2.mdx'] |
|
*/ |
|
function readMarkdownFiles(dir, fileList = []) { |
|
const files = fs.readdirSync(dir) |
|
files.forEach(file => { |
|
const filePath = path.join(dir, file) |
|
if (fs.statSync(filePath).isDirectory()) { |
|
readMarkdownFiles(filePath, fileList) |
|
} else if (filePath.endsWith('.md') || filePath.endsWith('.mdx')) { |
|
fileList.push(filePath) |
|
} |
|
}) |
|
return fileList |
|
} |
|
|
|
/** |
|
* Parses front matter from a markdown file. |
|
* @param {string} file - The file path of the markdown file. |
|
* @param {string} baseDir - The base directory for calculating the relative path. |
|
* @returns {{file: string, attributes: object}} - Relative file path and parsed front matter attributes. |
|
* @example |
|
* const result = parseFrontMatter('content/file.md', 'content') |
|
* console.log(result) // { file: 'file.md', attributes: { title: 'Example' } } |
|
*/ |
|
function parseFrontMatter(file, baseDir) { |
|
const content = fs.readFileSync(file, 'utf8') |
|
const {attributes} = frontMatter(content) |
|
const relativeFilePath = path.relative(baseDir, file) // Get relative path |
|
return {file: relativeFilePath, attributes} // Include relative filename |
|
} |
|
|
|
/** |
|
* Recursively collects JSON paths and types, treating `null` and `undefined` distinctly. |
|
* @param {object} schemaCollection - The schema collection object. |
|
* @param {object} data - JSON object to analyze. |
|
* @param {string} file - Relative file path containing the data. |
|
* @param {string} [parentPath=''] - Parent JSON path (for nested structures). |
|
*/ |
|
function collectTypes(schemaCollection, data, file, parentPath = '') { |
|
// Track all keys processed in this file |
|
const keysProcessed = new Set(); |
|
|
|
// Iterate over the current data attributes |
|
for (const [key, value] of Object.entries(data)) { |
|
const currentPath = parentPath ? `${parentPath}.${key}` : key; |
|
|
|
// Determine the type for each value |
|
const valueType = |
|
value === null ? 'null' : |
|
value === undefined ? 'undefined' : |
|
Array.isArray(value) ? 'array' : typeof value; |
|
|
|
// Initialize collection for each path if it doesn't exist |
|
if (!schemaCollection[currentPath]) { |
|
schemaCollection[currentPath] = {}; |
|
} |
|
|
|
// Track the defined types |
|
if (valueType === 'undefined') { |
|
// If value is explicitly undefined, register it |
|
if (!schemaCollection[currentPath]['undefined']) { |
|
schemaCollection[currentPath]['undefined'] = new Set(); |
|
} |
|
schemaCollection[currentPath]['undefined'].add(file); |
|
} else { |
|
// Add the defined type |
|
if (!schemaCollection[currentPath][valueType]) { |
|
schemaCollection[currentPath][valueType] = new Set(); |
|
} |
|
schemaCollection[currentPath][valueType].add(file); |
|
} |
|
|
|
// Mark this key as processed |
|
keysProcessed.add(currentPath); |
|
|
|
// Recursive call for nested objects or arrays of objects |
|
if (valueType === 'object') { |
|
collectTypes(schemaCollection, value, file, currentPath); |
|
} else if (valueType === 'array') { |
|
value.forEach((item, idx) => { |
|
if (typeof item === 'object' && item !== null) { |
|
collectTypes(schemaCollection, item, file, `${currentPath}[${idx}]`); |
|
} |
|
}); |
|
} |
|
} |
|
|
|
// After processing all current data, check for missing keys |
|
for (const existingPath of Object.keys(schemaCollection)) { |
|
if (!keysProcessed.has(existingPath)) { |
|
// If a key from the schema collection is missing, register it as undefined |
|
if (!schemaCollection[existingPath]['undefined']) { |
|
schemaCollection[existingPath]['undefined'] = new Set(); |
|
} |
|
schemaCollection[existingPath]['undefined'].add(file); |
|
} |
|
} |
|
} |
|
|
|
|
|
/** |
|
* Analyzes schema for conflicting types across files. |
|
* @param {object} schemaCollection - The schema collection object. |
|
* @returns {object} - Object with conflicts for each JSON path. |
|
* @example |
|
* const conflicts = assessConflicts(schemaCollection) |
|
* console.log(conflicts) // { "path.to.field": { conflictTypes: ['string', 'number'], filesByType: { 'string': ['file1.md'], 'number': ['file2.md'] }}} |
|
*/ |
|
function assessConflicts(schemaCollection) { |
|
const conflicts = {} |
|
|
|
Object.entries(schemaCollection).forEach(([path, typesByFile]) => { |
|
const uniqueTypes = Object.keys(typesByFile) |
|
if (uniqueTypes.length > 1) { // Detects conflict if more than one type exists |
|
conflicts[path] = { |
|
conflictTypes: uniqueTypes, |
|
filesByType: Object.fromEntries(Object.entries(typesByFile).map( |
|
([type, files]) => [type, Array.from(files)] |
|
)) |
|
} |
|
} |
|
}) |
|
return conflicts |
|
} |
|
|
|
/** |
|
* Identifies consistent data types across all files. |
|
* @param {object} schemaCollection - The schema collection object. |
|
* @returns {object} - Object with consistent types for each JSON path. |
|
* @example |
|
* const consistent = assessSimilarities(schemaCollection) |
|
* console.log(consistent) // { "path.to.field": { type: 'string' }} |
|
*/ |
|
function assessSimilarities(schemaCollection) { |
|
const consistent = {} |
|
Object.entries(schemaCollection).forEach(([path, typesByFile]) => { |
|
const uniqueTypes = Object.keys(typesByFile) |
|
if (uniqueTypes.length === 1) { // Consistent if only one type exists |
|
const consistentType = uniqueTypes[0] |
|
consistent[path] = {type: consistentType} // Only return the type |
|
} |
|
}) |
|
return consistent |
|
} |
|
|
|
/** |
|
* Assesses both similarities and conflicts in the schema collection. |
|
* @param {object} schemaCollection - The schema collection object. |
|
* @returns {object} - Object containing both consistent and conflicting schema elements. |
|
* @example |
|
* const schemaAssessment = assessSchema(schemaCollection) |
|
* console.log(schemaAssessment) // { consistent: {...}, conflicts: {...} } |
|
*/ |
|
function assessSchema(files) { |
|
const schemaCollection = {} |
|
let omittedFiles = [] |
|
|
|
files.forEach(file => { |
|
const {file: relativeFilePath, attributes} = parseFrontMatter(file, directory) |
|
if (!attributes || Object.keys(attributes).length === 0) { |
|
omittedFiles.push(file) |
|
return |
|
} |
|
collectTypes(schemaCollection, attributes, relativeFilePath) |
|
}) |
|
const conflicts = assessConflicts(schemaCollection) |
|
const consistent = assessSimilarities(schemaCollection) |
|
return { omittedFiles, results: { consistent, conflicts } } |
|
} |
|
|
|
/** |
|
* Main function to run the schema analysis on a directory of markdown files. |
|
* @param {string} directory - Path to the directory containing markdown files. |
|
* @example |
|
* main('./content') |
|
*/ |
|
function main(directory) { |
|
|
|
if (directory === '--help' || directory === '-h') { |
|
displayHelp() |
|
return |
|
} |
|
if (!isValidDirectory(directory)) { |
|
displayHelp(directory) |
|
return |
|
} |
|
|
|
const files = readMarkdownFiles(directory) |
|
const {results,omittedFiles} = assessSchema(files) |
|
|
|
console.log(`Scan Directory: ${directory}`) |
|
console.log(`Total files analyzed: ${files.length}`) |
|
console.log(`Total omitted files: ${omittedFiles.length}`) |
|
|
|
console.log('Schema Assessment Results:') |
|
console.log('Consistent Types:', JSON.stringify(results.consistent,null,2)) |
|
console.log('Conflicting Types:', JSON.stringify(results.conflicts,null,2)) |
|
|
|
if (omittedFiles.length) { |
|
console.log('Omitted Files:', omittedFiles) |
|
} |
|
} |
|
|
|
/** |
|
* Displays help information for using the script. |
|
* |
|
* @param {string} [directory] - Optional directory path that was specified. |
|
* If provided, it indicates an invalid directory, which is displayed in the logs. |
|
*/ |
|
function displayHelp(directory) { |
|
if (directory) { |
|
console.log(`Invalid directory specified: ${directory}`) |
|
} |
|
console.log(`Usage: node script.js <directory> |
|
Options: |
|
-h, --help Show help information |
|
Description: |
|
This script scans markdown files in a specified directory, analyzes their front matter, |
|
and outputs a summary of consistent and conflicting schema data. |
|
`) |
|
} |
|
|
|
/** |
|
* Checks if the provided directory is valid. |
|
* @param {string} dir - Directory path to validate. |
|
* @returns {boolean} - True if the directory exists; otherwise, false. |
|
*/ |
|
function isValidDirectory(dir) { |
|
return fs.existsSync(dir) && fs.statSync(dir).isDirectory() |
|
} |
|
|
|
// Start the script |
|
const directory = process.argv[2] |
|
main(directory) |