Last active
March 5, 2016 08:42
-
-
Save dzimchuk/c802b1e26c9e27ec8ab5 to your computer and use it in GitHub Desktop.
Node script to convert xml/html MiniBlog post to json/markdown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var process = require('process'); | |
var toMarkdown = require('to-markdown'); | |
var xml2js = require('xml2js'); | |
var fs = require('fs') | |
var path = require('path'); | |
var inputDir = ""; | |
var outputDir = ""; | |
try { | |
initialize(); | |
} catch (err) { | |
console.error(err); | |
console.log('Usage:\nnode convert.js <input directory> <output directory>'); | |
} | |
fs.readdir(inputDir, function(err, files) { | |
if (err) throw err; | |
files.forEach(function(filename) { | |
processFile(filename); | |
}); | |
}); | |
function initialize() { | |
if (process.argv.length != 4) { | |
throw "Invalid arguments"; | |
} | |
inputDir = getDirectory(process.argv[2]); | |
outputDir = getDirectory(process.argv[3]); | |
} | |
function getDirectory(directory) { | |
var stats = fs.statSync(directory); | |
if (!stats.isDirectory()){ | |
throw "Provided argument is not a directory"; | |
} | |
return directory; | |
} | |
function processFile(filename) { | |
console.log('Processing ' + filename); | |
fs.readFile(getInputFilePath(filename), 'utf8', function (err, content) { | |
if (err) throw err; | |
var parser = new xml2js.Parser({explicitArray: false, mergeAttrs: true}); | |
parser.parseString(content, function(err, doc) { | |
if (err) throw err; | |
var json = JSON.stringify(convert(doc)); | |
save(filename, json); | |
}); | |
}); | |
} | |
var customConverters = [{ | |
filter: function (node) { | |
return node.nodeName === 'PRE' && | |
node.firstChild && | |
node.firstChild.nodeName !== 'CODE'; | |
}, | |
replacement: function(content, node) { | |
return '\n\n```\n' + node.textContent + '\n```\n\n'; | |
} | |
}, | |
{ | |
filter: function (node) { | |
return node.nodeName === 'PRE' && | |
node.firstChild && | |
node.firstChild.nodeName === 'CODE'; | |
}, | |
replacement: function(content, node) { | |
return '\n\n```\n' + node.textContent + '\n```\n\n'; | |
} | |
}]; | |
function convert(doc){ | |
try { | |
if (doc.post.categories != "") { | |
doc.post.categories = isArray(doc.post.categories.category) ? doc.post.categories.category : [ doc.post.categories.category ]; | |
} | |
else { | |
doc.post.categories = []; | |
} | |
if (doc.post.comments != "") { | |
doc.post.comments = isArray(doc.post.comments.comment) ? doc.post.comments.comment : [ doc.post.comments.comment ]; | |
} | |
else { | |
doc.post.comments = []; | |
} | |
doc.post.content = toMarkdown(doc.post.content, { converters: customConverters }); | |
return doc.post; | |
} catch (err) { | |
console.log(doc.post); | |
throw err; | |
} | |
} | |
function isArray(testObject) { | |
return testObject && | |
!(testObject.propertyIsEnumerable('length')) && | |
typeof testObject === 'object' && | |
typeof testObject.length === 'number'; | |
} | |
function save(filename, content) { | |
fs.writeFile(getOutputFilePath(filename), content, 'utf8', function(err) { | |
if (err) throw err; | |
}); | |
} | |
function getInputFilePath(filename) { | |
return path.join(inputDir, filename); | |
} | |
function getOutputFilePath(filename) { | |
var parts = path.parse(filename); | |
return path.join(outputDir, parts.name + '.json'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment