psenger · October 30, 2024 03:30
diff --git a/README.md b/README.md
diff --git a/deduce-frontmatter-schema.js b/deduce-frontmatter-schema.js
 #!/usr/bin/env node
 const fs = require('fs')
 const path = require('path')
 const frontMatter = require('front-matter') // Assuming `front-matter` library is installed

 /**
 * Recursively reads all markdown files (.md or .mdx) in a directory.
 * @param {string} dir - Directory to search.
 * @param {string[]} [fileList=[]] - List of file paths (recursive accumulation).
 * @returns {string[]} - List of file paths for markdown files.
 * @example
 * const files = readMarkdownFiles('./content')
 * console.log(files) // ['content/file1.md', 'content/folder/file2.mdx']
 */
 function readMarkdownFiles(dir, fileList = []) {
    const files = fs.readdirSync(dir)
    files.forEach(file => {
        const filePath = path.join(dir, file)
        if (fs.statSync(filePath).isDirectory()) {
            readMarkdownFiles(filePath, fileList)
        } else if (filePath.endsWith('.md') || filePath.endsWith('.mdx')) {
            fileList.push(filePath)
        }
    })
    return fileList
 }

 /**
 * Parses front matter from a markdown file.
 * @param {string} file - The file path of the markdown file.
 * @param {string} baseDir - The base directory for calculating the relative path.
 * @returns {{file: string, attributes: object}} - Relative file path and parsed front matter attributes.
 * @example
 * const result = parseFrontMatter('content/file.md', 'content')
 * console.log(result) // { file: 'file.md', attributes: { title: 'Example' } }
 */
 function parseFrontMatter(file, baseDir) {
    const content = fs.readFileSync(file, 'utf8')
    const {attributes} = frontMatter(content)
    const relativeFilePath = path.relative(baseDir, file) // Get relative path
    return {file: relativeFilePath, attributes} // Include relative filename
 }

 /**
 * Recursively collects JSON paths and types, treating `null` and `undefined` distinctly.
 * @param {object} schemaCollection - The schema collection object.
 * @param {object} data - JSON object to analyze.
 * @param {string} file - Relative file path containing the data.
 * @param {string} [parentPath=''] - Parent JSON path (for nested structures).
 */
 function collectTypes(schemaCollection, data, file, parentPath = '') {
    // Track all keys processed in this file
    const keysProcessed = new Set();

    // Iterate over the current data attributes
    for (const [key, value] of Object.entries(data)) {
        const currentPath = parentPath ? `${parentPath}.${key}` : key;

        // Determine the type for each value
        const valueType =
            value === null ? 'null' :
                value === undefined ? 'undefined' :
                    Array.isArray(value) ? 'array' : typeof value;

        // Initialize collection for each path if it doesn't exist
        if (!schemaCollection[currentPath]) {
            schemaCollection[currentPath] = {};
        }

        // Track the defined types
        if (valueType === 'undefined') {
            // If value is explicitly undefined, register it
            if (!schemaCollection[currentPath]['undefined']) {
                schemaCollection[currentPath]['undefined'] = new Set();
            }
            schemaCollection[currentPath]['undefined'].add(file);
        } else {
            // Add the defined type
            if (!schemaCollection[currentPath][valueType]) {
                schemaCollection[currentPath][valueType] = new Set();
            }
            schemaCollection[currentPath][valueType].add(file);
        }

        // Mark this key as processed
        keysProcessed.add(currentPath);

        // Recursive call for nested objects or arrays of objects
        if (valueType === 'object') {
            collectTypes(schemaCollection, value, file, currentPath);
        } else if (valueType === 'array') {
            value.forEach((item, idx) => {
                if (typeof item === 'object' && item !== null) {
                    collectTypes(schemaCollection, item, file, `${currentPath}[${idx}]`);
                }
            });
        }
    }

    // After processing all current data, check for missing keys
    for (const existingPath of Object.keys(schemaCollection)) {
        if (!keysProcessed.has(existingPath)) {
            // If a key from the schema collection is missing, register it as undefined
            if (!schemaCollection[existingPath]['undefined']) {
                schemaCollection[existingPath]['undefined'] = new Set();
            }
            schemaCollection[existingPath]['undefined'].add(file);
        }
    }
 }


 /**
 * Analyzes schema for conflicting types across files.
 * @param {object} schemaCollection - The schema collection object.
 * @returns {object} - Object with conflicts for each JSON path.
 * @example
 * const conflicts = assessConflicts(schemaCollection)
 * console.log(conflicts) // { "path.to.field": { conflictTypes: ['string', 'number'], filesByType: { 'string': ['file1.md'], 'number': ['file2.md'] }}}
 */
 function assessConflicts(schemaCollection) {
    const conflicts = {}

    Object.entries(schemaCollection).forEach(([path, typesByFile]) => {
        const uniqueTypes = Object.keys(typesByFile)
        if (uniqueTypes.length > 1) {  // Detects conflict if more than one type exists
            conflicts[path] = {
                conflictTypes: uniqueTypes,
                filesByType: Object.fromEntries(Object.entries(typesByFile).map(
                    ([type, files]) => [type, Array.from(files)]
                ))
            }
        }
    })
    return conflicts
 }

 /**
 * Identifies consistent data types across all files.
 * @param {object} schemaCollection - The schema collection object.
 * @returns {object} - Object with consistent types for each JSON path.
 * @example
 * const consistent = assessSimilarities(schemaCollection)
 * console.log(consistent) // { "path.to.field": { type: 'string' }}
 */
 function assessSimilarities(schemaCollection) {
    const consistent = {}
    Object.entries(schemaCollection).forEach(([path, typesByFile]) => {
        const uniqueTypes = Object.keys(typesByFile)
        if (uniqueTypes.length === 1) { // Consistent if only one type exists
            const consistentType = uniqueTypes[0]
            consistent[path] = {type: consistentType} // Only return the type
        }
    })
    return consistent
 }

 /**
 * Assesses both similarities and conflicts in the schema collection.
 * @param {object} schemaCollection - The schema collection object.
 * @returns {object} - Object containing both consistent and conflicting schema elements.
 * @example
 * const schemaAssessment = assessSchema(schemaCollection)
 * console.log(schemaAssessment) // { consistent: {...}, conflicts: {...} }
 */
 function assessSchema(files) {
    const schemaCollection = {}
    let omittedFiles = []

    files.forEach(file => {
        const {file: relativeFilePath, attributes} = parseFrontMatter(file, directory)
        if (!attributes || Object.keys(attributes).length === 0) {
            omittedFiles.push(file)
            return
        }
        collectTypes(schemaCollection, attributes, relativeFilePath)
    })
    const conflicts = assessConflicts(schemaCollection)
    const consistent = assessSimilarities(schemaCollection)
    return { omittedFiles, results: { consistent, conflicts } }
 }

 /**
 * Main function to run the schema analysis on a directory of markdown files.
 * @param {string} directory - Path to the directory containing markdown files.
 * @example
 * main('./content')
 */
 function main(directory) {

    if (directory === '--help' || directory === '-h') {
        displayHelp()
        return
    }
    if (!isValidDirectory(directory)) {
        displayHelp(directory)
        return
    }

    const files = readMarkdownFiles(directory)
    const {results,omittedFiles} = assessSchema(files)

    console.log(`Scan Directory: ${directory}`)
    console.log(`Total files analyzed: ${files.length}`)
    console.log(`Total omitted files: ${omittedFiles.length}`)

    console.log('Schema Assessment Results:')
    console.log('Consistent Types:', JSON.stringify(results.consistent,null,2))
    console.log('Conflicting Types:', JSON.stringify(results.conflicts,null,2))

    if (omittedFiles.length) {
        console.log('Omitted Files:', omittedFiles)
    }
 }

 /**
 * Displays help information for using the script.
 *
 * @param {string} [directory] - Optional directory path that was specified.
 * If provided, it indicates an invalid directory, which is displayed in the logs.
 */
 function displayHelp(directory) {
    if (directory) {
        console.log(`Invalid directory specified: ${directory}`)
    }
    console.log(`Usage: node script.js <directory>
 Options:
  -h, --help      Show help information
 Description:
  This script scans markdown files in a specified directory, analyzes their front matter,
  and outputs a summary of consistent and conflicting schema data.
 `)
 }

 /**
 * Checks if the provided directory is valid.
 * @param {string} dir - Directory path to validate.
 * @returns {boolean} - True if the directory exists; otherwise, false.
 */
 function isValidDirectory(dir) {
    return fs.existsSync(dir) && fs.statSync(dir).isDirectory()
 }

 // Start the script
 const directory = process.argv[2]
 main(directory)
diff --git a/package.json b/package.json
 {
  "name": "deduce-frontmatter-schema",
  "version": "1.0.0",
  "main": "deduce-frontmatter-schema.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "",
  "license": "ISC",
  "description": "",
  "dependencies": {
    "front-matter": "^4.0.2"
  }
 }
	#!/usr/bin/env node
	const fs = require('fs')
	const path = require('path')
	const frontMatter = require('front-matter') // Assuming `front-matter` library is installed

	/**
	* Recursively reads all markdown files (.md or .mdx) in a directory.
	* @param {string} dir - Directory to search.
	* @param {string[]} [fileList=[]] - List of file paths (recursive accumulation).
	* @returns {string[]} - List of file paths for markdown files.
	* @example
	* const files = readMarkdownFiles('./content')
	* console.log(files) // ['content/file1.md', 'content/folder/file2.mdx']
	*/
	function readMarkdownFiles(dir, fileList = []) {
	const files = fs.readdirSync(dir)
	files.forEach(file => {
	const filePath = path.join(dir, file)
	if (fs.statSync(filePath).isDirectory()) {
	readMarkdownFiles(filePath, fileList)
	} else if (filePath.endsWith('.md') \|\| filePath.endsWith('.mdx')) {
	fileList.push(filePath)
	}
	})
	return fileList
	}

	/**
	* Parses front matter from a markdown file.
	* @param {string} file - The file path of the markdown file.
	* @param {string} baseDir - The base directory for calculating the relative path.
	* @returns {{file: string, attributes: object}} - Relative file path and parsed front matter attributes.
	* @example
	* const result = parseFrontMatter('content/file.md', 'content')
	* console.log(result) // { file: 'file.md', attributes: { title: 'Example' } }
	*/
	function parseFrontMatter(file, baseDir) {
	const content = fs.readFileSync(file, 'utf8')
	const {attributes} = frontMatter(content)
	const relativeFilePath = path.relative(baseDir, file) // Get relative path
	return {file: relativeFilePath, attributes} // Include relative filename
	}

	/**
	* Recursively collects JSON paths and types, treating `null` and `undefined` distinctly.
	* @param {object} schemaCollection - The schema collection object.
	* @param {object} data - JSON object to analyze.
	* @param {string} file - Relative file path containing the data.
	* @param {string} [parentPath=''] - Parent JSON path (for nested structures).
	*/
	function collectTypes(schemaCollection, data, file, parentPath = '') {
	// Track all keys processed in this file
	const keysProcessed = new Set();

	// Iterate over the current data attributes
	for (const [key, value] of Object.entries(data)) {
	const currentPath = parentPath ? `${parentPath}.${key}` : key;

	// Determine the type for each value
	const valueType =
	value === null ? 'null' :
	value === undefined ? 'undefined' :
	Array.isArray(value) ? 'array' : typeof value;

	// Initialize collection for each path if it doesn't exist
	if (!schemaCollection[currentPath]) {
	schemaCollection[currentPath] = {};
	}

	// Track the defined types
	if (valueType === 'undefined') {
	// If value is explicitly undefined, register it
	if (!schemaCollection[currentPath]['undefined']) {
	schemaCollection[currentPath]['undefined'] = new Set();
	}
	schemaCollection[currentPath]['undefined'].add(file);
	} else {
	// Add the defined type
	if (!schemaCollection[currentPath][valueType]) {
	schemaCollection[currentPath][valueType] = new Set();
	}
	schemaCollection[currentPath][valueType].add(file);
	}

	// Mark this key as processed
	keysProcessed.add(currentPath);

	// Recursive call for nested objects or arrays of objects
	if (valueType === 'object') {
	collectTypes(schemaCollection, value, file, currentPath);
	} else if (valueType === 'array') {
	value.forEach((item, idx) => {
	if (typeof item === 'object' && item !== null) {
	collectTypes(schemaCollection, item, file, `${currentPath}[${idx}]`);
	}
	});
	}
	}

	// After processing all current data, check for missing keys
	for (const existingPath of Object.keys(schemaCollection)) {
	if (!keysProcessed.has(existingPath)) {
	// If a key from the schema collection is missing, register it as undefined
	if (!schemaCollection[existingPath]['undefined']) {
	schemaCollection[existingPath]['undefined'] = new Set();
	}
	schemaCollection[existingPath]['undefined'].add(file);
	}
	}
	}


	/**
	* Analyzes schema for conflicting types across files.
	* @param {object} schemaCollection - The schema collection object.
	* @returns {object} - Object with conflicts for each JSON path.
	* @example
	* const conflicts = assessConflicts(schemaCollection)
	* console.log(conflicts) // { "path.to.field": { conflictTypes: ['string', 'number'], filesByType: { 'string': ['file1.md'], 'number': ['file2.md'] }}}
	*/
	function assessConflicts(schemaCollection) {
	const conflicts = {}

	Object.entries(schemaCollection).forEach(([path, typesByFile]) => {
	const uniqueTypes = Object.keys(typesByFile)
	if (uniqueTypes.length > 1) { // Detects conflict if more than one type exists
	conflicts[path] = {
	conflictTypes: uniqueTypes,
	filesByType: Object.fromEntries(Object.entries(typesByFile).map(
	([type, files]) => [type, Array.from(files)]
	))
	}
	}
	})
	return conflicts
	}

	/**
	* Identifies consistent data types across all files.
	* @param {object} schemaCollection - The schema collection object.
	* @returns {object} - Object with consistent types for each JSON path.
	* @example
	* const consistent = assessSimilarities(schemaCollection)
	* console.log(consistent) // { "path.to.field": { type: 'string' }}
	*/
	function assessSimilarities(schemaCollection) {
	const consistent = {}
	Object.entries(schemaCollection).forEach(([path, typesByFile]) => {
	const uniqueTypes = Object.keys(typesByFile)
	if (uniqueTypes.length === 1) { // Consistent if only one type exists
	const consistentType = uniqueTypes[0]
	consistent[path] = {type: consistentType} // Only return the type
	}
	})
	return consistent
	}

	/**
	* Assesses both similarities and conflicts in the schema collection.
	* @param {object} schemaCollection - The schema collection object.
	* @returns {object} - Object containing both consistent and conflicting schema elements.
	* @example
	* const schemaAssessment = assessSchema(schemaCollection)
	* console.log(schemaAssessment) // { consistent: {...}, conflicts: {...} }
	*/
	function assessSchema(files) {
	const schemaCollection = {}
	let omittedFiles = []

	files.forEach(file => {
	const {file: relativeFilePath, attributes} = parseFrontMatter(file, directory)
	if (!attributes \|\| Object.keys(attributes).length === 0) {
	omittedFiles.push(file)
	return
	}
	collectTypes(schemaCollection, attributes, relativeFilePath)
	})
	const conflicts = assessConflicts(schemaCollection)
	const consistent = assessSimilarities(schemaCollection)
	return { omittedFiles, results: { consistent, conflicts } }
	}

	/**
	* Main function to run the schema analysis on a directory of markdown files.
	* @param {string} directory - Path to the directory containing markdown files.
	* @example
	* main('./content')
	*/
	function main(directory) {

	if (directory === '--help' \|\| directory === '-h') {
	displayHelp()
	return
	}
	if (!isValidDirectory(directory)) {
	displayHelp(directory)
	return
	}

	const files = readMarkdownFiles(directory)
	const {results,omittedFiles} = assessSchema(files)

	console.log(`Scan Directory: ${directory}`)
	console.log(`Total files analyzed: ${files.length}`)
	console.log(`Total omitted files: ${omittedFiles.length}`)

	console.log('Schema Assessment Results:')
	console.log('Consistent Types:', JSON.stringify(results.consistent,null,2))
	console.log('Conflicting Types:', JSON.stringify(results.conflicts,null,2))

	if (omittedFiles.length) {
	console.log('Omitted Files:', omittedFiles)
	}
	}

	/**
	* Displays help information for using the script.
	*
	* @param {string} [directory] - Optional directory path that was specified.
	* If provided, it indicates an invalid directory, which is displayed in the logs.
	*/
	function displayHelp(directory) {
	if (directory) {
	console.log(`Invalid directory specified: ${directory}`)
	}
	console.log(`Usage: node script.js <directory>
	Options:
	-h, --help Show help information
	Description:
	This script scans markdown files in a specified directory, analyzes their front matter,
	and outputs a summary of consistent and conflicting schema data.
	`)
	}

	/**
	* Checks if the provided directory is valid.
	* @param {string} dir - Directory path to validate.
	* @returns {boolean} - True if the directory exists; otherwise, false.
	*/
	function isValidDirectory(dir) {
	return fs.existsSync(dir) && fs.statSync(dir).isDirectory()
	}

	// Start the script
	const directory = process.argv[2]
	main(directory)
	{
	"name": "deduce-frontmatter-schema",
	"version": "1.0.0",
	"main": "deduce-frontmatter-schema.js",
	"scripts": {
	"test": "echo \"Error: no test specified\" && exit 1"
	},
	"author": "",
	"license": "ISC",
	"description": "",
	"dependencies": {
	"front-matter": "^4.0.2"
	}
	}