Skip to content

Instantly share code, notes, and snippets.

@robdmoore
Last active March 31, 2018 09:27
Show Gist options
  • Save robdmoore/476075a7b81afb7420fc1b0da63bbda9 to your computer and use it in GitHub Desktop.
Save robdmoore/476075a7b81afb7420fc1b0da63bbda9 to your computer and use it in GitHub Desktop.
Wordpress -> Jekyll converter

Following https://mdavies.net/2014/11/27/migrating-wpjekyll/#migrate-from-wordpress you can convert your posts across to Jekyll format, but it adds superfluous metadata and also doesn't correctly handle Wordpress shorttags. This program helps with that.

Fair warning: it's deliberately dirty / hacky code. I ran it with the posts in a Git repo and inspected file-by-file tweaking as I went.

using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Web;
// <package id = "HtmlAgilityPack" version="1.5.5" targetFramework="net461" />
// <package id = "ReverseMarkdown" version="1.5.0" targetFramework="net461" />
namespace JekyllConverter
{
class Program
{
static void Main(string[] args)
{
var converter = new ReverseMarkdown.Converter();
var dir = args.Length > 0 ? args[0] : @"C:\dev\temp\jekylltest\test";
var files = Directory.GetFiles(dir);
var preambleRegex = new Regex(@"(?s)^---.*?^---", RegexOptions.Multiline);
var codeHtmlRegex = new Regex("<br\\s*/>|<\\/?p>");
foreach (var file in files)
{
var fileToWrite = file;
var fileContent = File.ReadAllText(file);
var preamble = preambleRegex.Match(fileContent).Value;
var content = fileContent.Replace(preamble, "");
if (file.EndsWith(".html"))
{
var codeBlocks = Regex.Matches(content, @"(?s)\[code(\s+lang(uage)?=""(.+?)"")?\](.*?)\[\/code\]", RegexOptions.Multiline).OfType<Match>().ToList();
codeBlocks.ForEach(match => content = content.Replace(match.Value, "<p>|||</p>"));
content = Regex.Replace(content, @"(\[caption.+?caption=""(.+?)"".*?](.+?)\[\/caption\])|(\[caption.*?]\s*(<a.+?</a>)(.+?)\[\/caption\])", "$5$3<br /><em>$2$6</em>");
var markdown = converter.Convert(content);
var index = 0;
var markdownWithCode = Regex.Replace(markdown, "\\|\\|\\|", m =>
{
var match = codeBlocks[index++];
return $"```{match.Groups[3].Value}{HttpUtility.HtmlDecode(codeHtmlRegex.Replace(match.Groups[4].Value, ""))}```";
});
content = markdownWithCode;
fileToWrite = Regex.Replace(file, @"\.html$", ".md");
}
preamble = Regex.Replace(preamble, @"^parent_id:.+?$\s+", "", RegexOptions.Multiline);
preamble = Regex.Replace(preamble, @"^published:.+?$\s+", "", RegexOptions.Multiline);
preamble = Regex.Replace(preamble, @"^password:.+?$\s+", "", RegexOptions.Multiline);
preamble = Regex.Replace(preamble, @"^status:.+?$\s+", "", RegexOptions.Multiline);
preamble = Regex.Replace(preamble, @"^meta:[\s\S]+?(?=(^\S))+", "", RegexOptions.Multiline);
preamble = Regex.Replace(preamble, @"^author:[\s\S]+?display_name:\s+(.+?)$[\s\S]+?(?=(^\S))+", "author: $1\n", RegexOptions.Multiline);
File.WriteAllText(fileToWrite, preamble + content);
if (fileToWrite != file)
File.Delete(file);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment