Skip to content

Instantly share code, notes, and snippets.

@CallumVass
Created December 1, 2023 08:08
Show Gist options
  • Save CallumVass/574cbc578b360cd81d38657635bb32c8 to your computer and use it in GitHub Desktop.
Save CallumVass/574cbc578b360cd81d38657635bb32c8 to your computer and use it in GitHub Desktop.
Wordpress To Markdown
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using HtmlAgilityPack;
const string baseUrl = "https://YOUR_DOMAIN_HERE";
// I know my site only had 14 pages, adjust accordingly
const int maxPageTraversal = 14;
var root = Directory.GetCurrentDirectory();
var converter = new ReverseMarkdown.Converter();
// Get posts from WordPress
var wordpressPosts = await GetWordpressPostsAsync();
// Create markdown files
foreach (var post in wordpressPosts.Where(e => e.Status == "publish"))
{
var postDate = post.Date.ToString("yyyy-MM-dd");
var postTitle = post.Title.Rendered;
var postSlug = post.Slug;
var postContent = post.Content.Rendered;
var postDir = Path.Combine(root, "posts");
Directory.CreateDirectory(postDir);
var postPath = Path.Combine(postDir, $"{postSlug}.md");
var content = new StringBuilder();
content.AppendLine("---");
content.AppendLine("title: " + StripHtml(postTitle));
content.AppendLine("pubDate: " + postDate);
content.AppendLine("---");
content.AppendLine();
content.AppendLine(converter.Convert(postContent));
await File.WriteAllTextAsync(postPath, content.ToString());
}
return;
string StripHtml(string content)
{
content = content.Replace("[…]", "...");
var doc = new HtmlDocument();
doc.LoadHtml(content);
return string.Join(" ", doc.DocumentNode.DescendantsAndSelf()
.Where(n => n.NodeType == HtmlNodeType.Text)
.Select(n => n.InnerText.Trim()));
}
async Task<IReadOnlyCollection<WordpressPost>> GetWordpressPostsAsync()
{
var client = new HttpClient();
var posts = new List<WordpressPost>();
for (var i = 1; i <= maxPageTraversal; i++)
{
var response =
await client.GetAsync(
$"{baseUrl}/wp-json/wp/v2/posts?_fields=slug,date,status,title,content,excerpt,standard,id&order=asc&page={i}");
if (response.IsSuccessStatusCode)
{
var json = await response.Content.ReadAsStringAsync();
var newPosts = JsonSerializer.Deserialize<List<WordpressPost>>(json);
posts.AddRange(newPosts!);
}
}
return posts;
}
public class RenderedText
{
[JsonPropertyName("rendered")] public string Rendered { get; set; }
}
public class WordpressPost
{
[JsonPropertyName("date")] public DateTime Date { get; set; }
[JsonPropertyName("slug")] public string Slug { get; set; }
[JsonPropertyName("status")] public string Status { get; set; }
[JsonPropertyName("title")] public RenderedText Title { get; set; }
[JsonPropertyName("content")] public RenderedText Content { get; set; }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment