Skip to content

Instantly share code, notes, and snippets.

@szkrd
Created June 12, 2016 17:33
Show Gist options
  • Save szkrd/02ca22c93b1622bbeb081fff418a84b1 to your computer and use it in GitHub Desktop.
Save szkrd/02ca22c93b1622bbeb081fff418a84b1 to your computer and use it in GitHub Desktop.
Create md files from blogger posts
// import from blogger-XX-XX-XXXX.xml
// deps:
/*
"dedent": "0.6.0",
"diacritics": "1.2.3",
"html-entities": "1.2.0",
"lodash": "4.0.1",
"marked": "0.3.5",
"sanitize-filename": "1.5.3",
"shelljs": "0.5.3",
"xml2json": "0.9.0"
*/
require('shelljs/global')
var _ = require('lodash')
var dedent = require('dedent')
var parser = require('xml2json')
var sanitize = require('sanitize-filename')
var removeDiacritics = require('diacritics').remove
var HtmlEntities = require('html-entities').XmlEntities
var entities = new HtmlEntities()
// find the last xml file around here
var xml = _.last(ls('*.xml').sort())
if (!xml) {
console.log('No blogger xml file to import from.')
process.exit(1)
}
// and convert it to something less painful
xml = cat(xml)
var blog = parser.toJson(xml, {object: true})
var entries = _.get(blog, 'feed.entry')
var posts = _.filter(entries, entry => entry.id.indexOf('.post-') > -1)
// create easily parsable json structure from blogger trash
posts = posts.map(post => {
var ret = {}
ret.date = post.published.substr(0, 10)
ret.tags = _.without(_.map(post.category, cat => cat.term.indexOf('/') > -1 ? '' : cat.term), '')
ret.title = post.title.$t
ret.body = post.content.$t
return ret
})
// write markdown files
posts.forEach(post => {
var niceName = entities.decode(post.title.replace(/^\d*@?\s?\|?\s?/, '').replace(/\//g, '-'))
var fileName = post.date.replace(/-/g, '') + ' ' + niceName
fileName = removeDiacritics(sanitize(fileName))
fileName = fileName.replace(/\s+/g, ' ')
var tags = _.map(post.tags.sort(), tag => `[${tag}]`).join(', ')
var body = entities.decode(post.body)
var md = dedent(`
# ${niceName}
## ${tags}
${body}
`)
md.to(`./${fileName}.md`)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment