Skip to content

Instantly share code, notes, and snippets.

@yurenju
Created July 16, 2023 09:44
Show Gist options
  • Save yurenju/5c7ff1d9bd090ec6fecf9575d9d05181 to your computer and use it in GitHub Desktop.
Save yurenju/5c7ff1d9bd090ec6fecf9575d9d05181 to your computer and use it in GitHub Desktop.
blogger-to-hugo.js
const { XMLParser } = require("fast-xml-parser");
const TurndownService = require("turndown");
const moment = require("moment");
const shell = require("shelljs");
const fetch = require("node-fetch");
const yaml = require("js-yaml");
const fs = require("fs");
const path = require("path");
const OUT_DIR = "out";
const tds = new TurndownService({ codeBlockStyle: "fenced", fence: "```" });
tds.addRule("wppreblock", {
filter: ["pre"],
replacement: function (content) {
return "```\n" + content + "\n```";
},
});
const parser = new XMLParser({
ignoreAttributes: false,
});
// const parseString = util.promisify(parser.parseString);
function normalizeFilename(title) {
const specialCharsMap = {
"<": "〈",
">": "〉",
":": ":",
'"': "〃",
"/": "/",
"\\": "\",
"|": "|",
"?": "?",
"*": "*",
".": ".",
"\0": "",
};
return title.replace(
/[<>:"\/\\|?.\*\x00-\x1F]/g,
(match) => specialCharsMap[match]
);
}
async function transformImages(articleFolder, htmlContent) {
let modifiedHtml = htmlContent;
shell.mkdir("-p", `${articleFolder}/images`);
const pattern = /<img.*?src="(.*?)"/g;
let matched;
let i = 0;
for (; (matched = pattern.exec(htmlContent)) !== null; i++) {
const imageUrl = matched[1];
try {
const response = await fetch(imageUrl);
const buffer = await response.buffer();
const ext = path.extname(imageUrl);
const filename = `${i}${ext}`;
const imagePath = `${articleFolder}/images/${filename}`;
fs.writeFileSync(imagePath, buffer);
modifiedHtml = modifiedHtml.replace(imageUrl, `images/${filename}`);
} catch (e) {
console.error(e.message);
}
}
if (i === 0) {
shell.rm("-rf", `${articleFolder}/images`);
}
return modifiedHtml;
}
async function main() {
shell.rm("-rf", OUT_DIR);
shell.mkdir("-p", OUT_DIR);
const result = await parser.parse(shell.cat("blog-backup.xml"));
const posts = result.feed.entry
.filter((entry) => {
return (
entry.id.indexOf(".post-") > 0 &&
!entry.hasOwnProperty("thr:in-reply-to")
);
})
.filter(
(entry) =>
!(entry["app:control"] && entry["app:control"]["app:draft"] == "yes")
);
console.log(`found ${posts.length} posts`);
for (let i = 0; i < posts.length; i++) {
const post = posts[i];
const date = moment(post.published).format("YYYY-MM-DD");
const filenameTitle = normalizeFilename(post.title["#text"] || date);
const articleFolderName = `${date}_${filenameTitle}`;
const articleFolder = `${OUT_DIR}/${articleFolderName}`;
console.log(i, articleFolderName);
shell.mkdir("-p", articleFolder);
const modifiedContent = await transformImages(
articleFolder,
post.content["#text"]
);
let tags = [];
if (Array.isArray(post.category)) {
tags = post.category
.filter(
(category) =>
category["@_scheme"] === "http://www.blogger.com/atom/ns#"
)
.map((category) => category["@_term"]);
}
const markdown = tds.turndown(modifiedContent);
const title = post.title["#text"] || date;
const categories = ["tech"];
const header = yaml.dump({ title, date, tags, categories });
const content = `---\n${header}---\n${markdown}`;
fs.writeFileSync(`${articleFolder}/index.md`, content);
}
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment