-
-
Save ntantri/e95b941b0eb407371b529f8212e93cd4 to your computer and use it in GitHub Desktop.
Batch convert HTML to Markdown
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Converts HTML from https://exportmyposts.jazzychad.net/ exports to Markdown | |
POSTS_DIR=/Users/richard/Desktop/d6y/posts | |
for file in $POSTS_DIR/*.html | |
do | |
echo $file | |
# The filename without the path: | |
basefile=`basename $file` | |
# Filenames have the form: yyyy-mm-dd-hh:mm:ss-slug.html | |
# Remove the hh:mm:ss- part | |
shortfile=${basefile/[0-9][0-9]:[0-9][0-9]:[0-9][0-9]-} | |
# Remove the .html part | |
withoutext=${shortfile%.html} | |
# The slug is the part after the date: | |
slug=${withoutext:11} | |
# The publication date in yyyy-mm-dd format | |
pubdatedash=${withoutext:0:10} | |
# The publication date in yyyy/mm/dd format | |
pubdate=${pubdatedash//-//} | |
# The output filename e.g., yyyy-mm-dd-slug.md | |
mdfile=${withoutext}.md | |
# MAGIC! | |
pandoc --reference-links -s -f html -t markdown $file > $mdfile.tmp | |
# The first line is a comment followed by the title | |
mdtitle=`head -1 $mdfile.tmp` | |
title=${mdtitle:1} | |
# Write meta data to the start of the file | |
echo "title: $title" > $mdfile | |
echo "date: $pubdate" >> $mdfile | |
echo "alias: /$slug" >> $mdfile | |
# Append the rest of the markdown without the title (as I don't need it) | |
tail +7 $mdfile.tmp >> $mdfile | |
rm $mdfile.tmp | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment