Created
October 17, 2018 22:04
-
-
Save brainwipe/f806c1e89e0f003ce24578a1dcdf1dd6 to your computer and use it in GitHub Desktop.
Converts Blogger export XML and converts to Markdown
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Requires nuget Package https://www.nuget.org/packages/Html2Markdown/ | |
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Text; | |
using System.Xml; | |
using Html2Markdown; | |
namespace BloggerToMd | |
{ | |
public static class BloggerToMarkdown | |
{ | |
public static void Convert(string bloggerFileName) | |
{ | |
var document = Load(bloggerFileName); | |
var manager = BloggerNameSpaceManager(document); | |
var blogPosts = document.DocumentElement.SelectNodes("atom:entry[contains(atom:id, 'post')]", manager); | |
var folder = Path.GetDirectoryName(bloggerFileName); | |
foreach (XmlNode blogPost in blogPosts) | |
{ | |
var post = ToBlogPost(blogPost, manager); | |
SaveToFile(post, folder); | |
} | |
} | |
private static XmlDocument Load(string bloggerFileName) | |
{ | |
var doc = new XmlDocument(); | |
doc.Load(bloggerFileName); | |
return doc; | |
} | |
private static XmlNamespaceManager BloggerNameSpaceManager(XmlDocument bloggerDocument) | |
{ | |
var manager = new XmlNamespaceManager(bloggerDocument.NameTable); | |
manager.AddNamespace("openSearch", "http://a9.com/-/spec/opensearchrss/1.0/"); | |
manager.AddNamespace("gd", "http://schemas.google.com/g/2005"); | |
manager.AddNamespace("thr", "http://purl.org/syndication/thread/1.0"); | |
manager.AddNamespace("georss", "http://www.georss.org/georss"); | |
manager.AddNamespace("atom", "http://www.w3.org/2005/Atom"); | |
return manager; | |
} | |
private static BlogPost ToBlogPost(XmlNode blogPost, XmlNamespaceManager manager) | |
{ | |
var converter = new Converter(); | |
var title = blogPost.SelectSingleNode("atom:title", manager).InnerText; | |
var date = DateTimeOffset.Parse(blogPost.SelectSingleNode("atom:published", manager).InnerText); | |
var tagNodes = blogPost.SelectNodes("atom:category[@scheme='http://www.blogger.com/atom/ns#']/@term", | |
manager); | |
var tags = new List<string>(); | |
foreach (XmlNode tag in tagNodes) | |
{ | |
tags.Add(tag.Value); | |
} | |
var content = blogPost.SelectSingleNode("atom:content", manager).InnerText; | |
var markdown = converter.Convert(content); | |
return new BlogPost(title, date, tags.ToArray(), markdown); | |
} | |
private static void SaveToFile(BlogPost post, string folder) | |
{ | |
var fileName = $"{post.Date:yyyy-MM-dd}-{SafeFileName(post.Title)}.md"; | |
var sb = new StringBuilder(); | |
sb.Append($@"Title: {post.Title} | |
Date: {post.Date:dd/MM/yy} | |
--- | |
"); | |
sb.Append(post.Markdown); | |
var fullPath = Path.Combine(folder, fileName); | |
File.WriteAllText(fullPath, sb.ToString()); | |
} | |
private static string SafeFileName(string fileName) => | |
Path.GetInvalidFileNameChars() | |
.Aggregate(fileName, (current, c) => current.Replace(c, '-')) | |
.Substring(0,10); | |
private class BlogPost | |
{ | |
public BlogPost(string title, DateTimeOffset date, string[] tags, string markdown) | |
{ | |
Title = title; | |
Date = date; | |
Tags = tags; | |
Markdown = markdown; | |
} | |
public string Title { get; } | |
public DateTimeOffset Date { get; } | |
public string[] Tags { get; } | |
public string Markdown { get; } | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment