Last active
August 31, 2022 12:42
-
-
Save pvgenuchten/3cf227a5bb409e5d91f459b55738566d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Goal of the script is export an existing site to hugo markdown | |
# use httrack or some other tool to download the full website | |
# then run this script to convert the local copy to markdown | |
# verify that the header and footer are properly removed (by setting the split point) | |
# copy the markdowns into the hugo content section | |
import os | |
from markdownify import markdownify as md | |
# loop through all folders and files | |
for root, dirs, files in os.walk('.'): | |
level = root.replace('.', '').count(os.sep) | |
indent = ' ' * 4 * (level) | |
print('{}{}/'.format(indent, os.path.basename(root))) | |
subindent = ' ' * 4 * (level + 1) | |
for f in files: | |
try: | |
if os.path.basename(f).split('.')[1] == 'html': | |
# if file is html | |
print('{}{}'.format(subindent, f)) | |
# open file | |
File_object = open(root + os.sep + f, "r+", encoding="utf-8") | |
page = ''.join(File_object.readlines()) | |
File_object.close() | |
# strip header / footer | |
try: | |
main = page.split('<!-- /#content-header -->')[1].split('<!-- /#content -->')[0] | |
except Exception as ex: | |
print('fail split: '+ str(ex)) | |
break | |
# fetch title | |
try: | |
ttl = md(main.spit('<h1>')[1].split('</h1>')[0]) | |
except: | |
ttl = os.path.basename(f).split('.')[0] | |
# markdownify https://github.com/matthewwithanm/python-markdownify | |
out = md(main) | |
# save as file.md | |
pth = root + os.sep + os.path.basename(f).split('.')[0] + ".md" | |
File_object = open(pth, "w", encoding="utf-8") | |
File_object.write("---\ntitle: '{0}'\ndate: 2022-06-01\nicon: ''\ndraft: false\n---\n\n\n".format(ttl)) | |
File_object.write(out) | |
File_object.close() | |
except Exception as ex: | |
print('Error: '+ str(ex)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment