Created
February 14, 2024 00:55
-
-
Save chesterbr/6368adb7530f6d582046a5d93a4d4a49 to your computer and use it in GitHub Desktop.
A script that imports Disqus posts into a format usable by Staticman
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// import_disqus.mjs | |
// ================= | |
// | |
// Converts Disqus XML export to a format compatible with Staticman (https://staticman.net/) | |
// | |
// Requires Node.js and the following packages: | |
// | |
// npm install xml2js crypto-js strip-indent | |
// | |
// (c) 2024 Carlos Duarte Do Nascimento (https://chester.me) | |
// Released under the MIT License (https://opensource.org/licenses/MIT) | |
import fs from 'fs'; | |
import { Parser } from 'xml2js'; | |
import path from 'path'; | |
import stripIndent from 'strip-indent'; | |
import CryptoJS from 'crypto-js'; | |
var parser = new Parser(); | |
fs.readFile('data.xml', function(err, data) { | |
parser.parseString(data, function (err, result) { | |
// Yes, Disqus calls a post a "thread" and a comment a "post" 🤦; | |
// let's first untangle this mess | |
const posts = result.disqus.thread; | |
const comments = result.disqus.post; | |
// All we need from posts is the URL and slug, so let's build a | |
// dictionary to quickly retrieve them from the post ID | |
const postInfo = posts.reduce((obj, post) => { | |
const slug = post.link[0].replace(/\/$/, "").split('/').slice(-1)[0].replace(/\.html$/, ""); | |
obj[post['$']['dsq:id']] = { | |
"slug": slug, | |
"url": post.link[0] | |
} | |
return obj; | |
}, {}); | |
// Now we can create one file for each comment | |
comments.forEach(comment => { | |
if (comment.isDeleted[0] === 'true' || comment.isSpam[0] === 'true') { | |
return; | |
} | |
const disqus_comment_id = comment['$']['dsq:id']; | |
const disqus_unix_timestamp = new Date(comment.createdAt[0]).getTime() / 1000; | |
const post = postInfo[comment.thread[0]['$']['dsq:id']]; | |
const username = comment.author[0].username ? comment.author[0].username[0] : ""; | |
const yml = stripIndent(` | |
_id: ${disqus_comment_id} | |
_parent: ${post.url} | |
replying_to_uid: '${comment.parent ? comment.parent[0]['$']['dsq:id'] : ''}' | |
message: ${singleQuote(comment.message[0])} | |
name: ${singleQuote(comment.author[0].name[0])} | |
email: '${username == "chesterbr" ? "3a49ee98333d753103cf708e40d36984" : CryptoJS.MD5(username+"@chester.me").toString()}' | |
hp: '' | |
date: ${disqus_unix_timestamp} | |
`).replace(/^\s+/, '') // Nix first line | |
.replace(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F-\x9F]/g, ""); // 🔪 invalid Unicode characters | |
// Use comment ID as milisseconds in filename for uniqueness | |
const dirname = "comments/" + post.slug; | |
const filename = `entry${disqus_unix_timestamp * 1000 + disqus_comment_id % 1000}.yml`; | |
if (!fs.existsSync(dirname)){ | |
fs.mkdirSync(dirname, { recursive: true }); | |
} | |
fs.writeFileSync(path.join(dirname, filename), yml); | |
}); | |
}); | |
}); | |
function singleQuote(str) { | |
return `'${str.replace(/'/g, "''")}'`; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment