Created
December 18, 2022 17:07
-
-
Save alexandruc/5f30ac3be02ae8464a4060b47f9edb40 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Small script to transform hugo markdown files with html to pure markdown | |
import requests | |
import os | |
import dateparser | |
url = "https://tools.atatus.com/tools/html-to-markdown" | |
working_dir = "location of the hugo blog posts" | |
posts = os.listdir(working_dir) | |
def get_post_date(post): | |
date = post.split("-") | |
date = "-".join(date[:3]) | |
return dateparser.parse(date) | |
# some filter that i needed at some point, can be removed | |
posts_to_transform = sorted([p for p in posts if get_post_date(p).year <= 2017 and get_post_date(p)>=dateparser.parse("2015-09-27")]) | |
for fname in posts_to_transform: | |
file_path = os.path.join(working_dir, fname) | |
with open(file_path, "r") as f: | |
content = f.read() | |
header_limit = "+++" | |
start_header = content.find(header_limit) | |
end_header = content.find(header_limit, start_header+len(header_limit)) | |
header = content[:end_header+len(header_limit)] | |
post_content = content[end_header+len(header_limit):] | |
res = requests.post(url, {"html": post_content.strip()}) | |
transformed_content = res.text.replace("\\*", "*") | |
transformed_content = transformed_content.replace("\\_", "_") | |
transformed_content = transformed_content.replace("\\[", "[") | |
transformed_content = transformed_content.replace("\\]", "]") | |
transformed_content = transformed_content.replace("\\{", "}") | |
transformed_content = transformed_content.replace("\\}", "}") | |
transformed_post = header + "\n\n" + transformed_content | |
with open(file_path, "w") as f: | |
f.write(transformed_post) | |
print(f"transformed: {fname}") | |
print("Done") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment