Created
December 1, 2023 08:08
-
-
Save CallumVass/574cbc578b360cd81d38657635bb32c8 to your computer and use it in GitHub Desktop.
Wordpress To Markdown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Text; | |
using System.Text.Json; | |
using System.Text.Json.Serialization; | |
using HtmlAgilityPack; | |
const string baseUrl = "https://YOUR_DOMAIN_HERE"; | |
// I know my site only had 14 pages, adjust accordingly | |
const int maxPageTraversal = 14; | |
var root = Directory.GetCurrentDirectory(); | |
var converter = new ReverseMarkdown.Converter(); | |
// Get posts from WordPress | |
var wordpressPosts = await GetWordpressPostsAsync(); | |
// Create markdown files | |
foreach (var post in wordpressPosts.Where(e => e.Status == "publish")) | |
{ | |
var postDate = post.Date.ToString("yyyy-MM-dd"); | |
var postTitle = post.Title.Rendered; | |
var postSlug = post.Slug; | |
var postContent = post.Content.Rendered; | |
var postDir = Path.Combine(root, "posts"); | |
Directory.CreateDirectory(postDir); | |
var postPath = Path.Combine(postDir, $"{postSlug}.md"); | |
var content = new StringBuilder(); | |
content.AppendLine("---"); | |
content.AppendLine("title: " + StripHtml(postTitle)); | |
content.AppendLine("pubDate: " + postDate); | |
content.AppendLine("---"); | |
content.AppendLine(); | |
content.AppendLine(converter.Convert(postContent)); | |
await File.WriteAllTextAsync(postPath, content.ToString()); | |
} | |
return; | |
string StripHtml(string content) | |
{ | |
content = content.Replace("[…]", "..."); | |
var doc = new HtmlDocument(); | |
doc.LoadHtml(content); | |
return string.Join(" ", doc.DocumentNode.DescendantsAndSelf() | |
.Where(n => n.NodeType == HtmlNodeType.Text) | |
.Select(n => n.InnerText.Trim())); | |
} | |
async Task<IReadOnlyCollection<WordpressPost>> GetWordpressPostsAsync() | |
{ | |
var client = new HttpClient(); | |
var posts = new List<WordpressPost>(); | |
for (var i = 1; i <= maxPageTraversal; i++) | |
{ | |
var response = | |
await client.GetAsync( | |
$"{baseUrl}/wp-json/wp/v2/posts?_fields=slug,date,status,title,content,excerpt,standard,id&order=asc&page={i}"); | |
if (response.IsSuccessStatusCode) | |
{ | |
var json = await response.Content.ReadAsStringAsync(); | |
var newPosts = JsonSerializer.Deserialize<List<WordpressPost>>(json); | |
posts.AddRange(newPosts!); | |
} | |
} | |
return posts; | |
} | |
public class RenderedText | |
{ | |
[JsonPropertyName("rendered")] public string Rendered { get; set; } | |
} | |
public class WordpressPost | |
{ | |
[JsonPropertyName("date")] public DateTime Date { get; set; } | |
[JsonPropertyName("slug")] public string Slug { get; set; } | |
[JsonPropertyName("status")] public string Status { get; set; } | |
[JsonPropertyName("title")] public RenderedText Title { get; set; } | |
[JsonPropertyName("content")] public RenderedText Content { get; set; } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment