Skip to content

Instantly share code, notes, and snippets.

@dzimchuk
Last active March 5, 2016 08:42
Show Gist options
  • Save dzimchuk/c802b1e26c9e27ec8ab5 to your computer and use it in GitHub Desktop.
Save dzimchuk/c802b1e26c9e27ec8ab5 to your computer and use it in GitHub Desktop.
Node script to convert xml/html MiniBlog post to json/markdown
var process = require('process');
var toMarkdown = require('to-markdown');
var xml2js = require('xml2js');
var fs = require('fs')
var path = require('path');
var inputDir = "";
var outputDir = "";
try {
initialize();
} catch (err) {
console.error(err);
console.log('Usage:\nnode convert.js <input directory> <output directory>');
}
fs.readdir(inputDir, function(err, files) {
if (err) throw err;
files.forEach(function(filename) {
processFile(filename);
});
});
function initialize() {
if (process.argv.length != 4) {
throw "Invalid arguments";
}
inputDir = getDirectory(process.argv[2]);
outputDir = getDirectory(process.argv[3]);
}
function getDirectory(directory) {
var stats = fs.statSync(directory);
if (!stats.isDirectory()){
throw "Provided argument is not a directory";
}
return directory;
}
function processFile(filename) {
console.log('Processing ' + filename);
fs.readFile(getInputFilePath(filename), 'utf8', function (err, content) {
if (err) throw err;
var parser = new xml2js.Parser({explicitArray: false, mergeAttrs: true});
parser.parseString(content, function(err, doc) {
if (err) throw err;
var json = JSON.stringify(convert(doc));
save(filename, json);
});
});
}
var customConverters = [{
filter: function (node) {
return node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName !== 'CODE';
},
replacement: function(content, node) {
return '\n\n```\n' + node.textContent + '\n```\n\n';
}
},
{
filter: function (node) {
return node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE';
},
replacement: function(content, node) {
return '\n\n```\n' + node.textContent + '\n```\n\n';
}
}];
function convert(doc){
try {
if (doc.post.categories != "") {
doc.post.categories = isArray(doc.post.categories.category) ? doc.post.categories.category : [ doc.post.categories.category ];
}
else {
doc.post.categories = [];
}
if (doc.post.comments != "") {
doc.post.comments = isArray(doc.post.comments.comment) ? doc.post.comments.comment : [ doc.post.comments.comment ];
}
else {
doc.post.comments = [];
}
doc.post.content = toMarkdown(doc.post.content, { converters: customConverters });
return doc.post;
} catch (err) {
console.log(doc.post);
throw err;
}
}
function isArray(testObject) {
return testObject &&
!(testObject.propertyIsEnumerable('length')) &&
typeof testObject === 'object' &&
typeof testObject.length === 'number';
}
function save(filename, content) {
fs.writeFile(getOutputFilePath(filename), content, 'utf8', function(err) {
if (err) throw err;
});
}
function getInputFilePath(filename) {
return path.join(inputDir, filename);
}
function getOutputFilePath(filename) {
var parts = path.parse(filename);
return path.join(outputDir, parts.name + '.json');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment