-
-
Save aolle/6e595650391deef79ffb1c9bb38fb6e9 to your computer and use it in GitHub Desktop.
Convert Nimbus Notes HTML to Markdown for Joplin
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------------------------------------------------------------------------- | |
# Nimbus note HTML export to markdown converter | |
# Extract all zip files containing 'note.html' and convert to markdown | |
# | |
# Setup: | |
# 1) install python 3 for your OS | |
# 2) install pandoc https://github.com/jgm/pandoc/releases/tag/2.11.4 | |
# on Windows, the .msi will automatically add pandoc to your $PATH | |
# otherwise add it to your $PATH. | |
# 3) save this script in the directory where your HTML exports were | |
# exported. Open a terminal / command prompt and cd to the directory | |
# where you saved convert.py. | |
# 4) Issue the command "python convert.py" | |
# (add the word "debug" afterward for extra output: python convert.py debug) | |
# 5) To use for Joplin import, Use File -> Import -> Markdown (Directory) | |
# and select the 'converted' directory that is created by this script | |
# | |
# Happy note-taking. -dogboydog | |
# ------------------------------------------------------------------------- | |
import os | |
import pathlib | |
import re | |
import shutil | |
import subprocess | |
import sys | |
from os.path import abspath | |
from zipfile import ZipFile | |
notes_written = 0 | |
notes_failed = 0 | |
sep = os.path.sep | |
color = not ('no-color' in sys.argv[1:] or 'NO_COLOR' in os.environ) | |
debug_on = 'DEBUG' in os.environ and os.environ['DEBUG'] != "0" | |
if ('debug' in sys.argv[1:]): | |
debug_on = True | |
clean = True # set to False to keep html files from conversion | |
class _c: | |
HEADER = '\033[95m' if color else '' | |
BLUE = '\u001b[34m' if color else '' | |
CYAN = '\033[96m' if color else '' | |
GREEN = '\033[92m' if color else '' | |
YELLOW = '\u001b[33m' if color else '' | |
RED = '\033[91m' if color else '' | |
ENDC = '\033[0m' if color else '' | |
BOLD = '\033[1m' if color else '' | |
UNDERLINE = '\033[4m' if color else '' | |
html_extension = ".html" | |
zip_extension = ".zip" | |
def log_debug(message): | |
if debug_on: | |
print(f"{_c.BLUE}{message}{_c.ENDC}") | |
def remove_empty_dir(empty_dir): | |
try: | |
os.removedirs(empty_dir) | |
log_debug(f"Deleted empty directory '{empty_dir}'") | |
except OSError: | |
pass | |
# recursively delete empty directories | |
def remove_empty_dirs(path): | |
# topdown False: start with deepest nested directories | |
for root, dirnames, filenames in os.walk(path, topdown=False): | |
for dirname in dirnames: | |
remove_empty_dir(os.path.realpath(os.path.join(root, dirname))) | |
def clean_up(): | |
if not clean: | |
return | |
clean_extensions = [".woff2", ".css", ".woff", | |
".ttf", "icomoon.svg", "icomoon.eot"] | |
for directory, subdirlist, filelist in os.walk('converted'): | |
for f in filelist: | |
parent_dir = f"{converted_dir}" | |
for clean_ext in clean_extensions: | |
if (f.endswith(clean_ext)): | |
os.unlink(f"{directory}{sep}{f}") | |
remove_empty_dirs("converted") | |
def write_note(html_file, markdown_destination): | |
global notes_written, notes_failed | |
print(f"Writing markdown to {markdown_destination}") | |
pandoc_run = subprocess.run( | |
["pandoc", html_file, | |
"--from", "html", "--to", "markdown_strict-raw_html"], | |
capture_output=True, | |
shell=True) | |
if pandoc_run.returncode != 0: | |
print(pandoc_run.stderr.decode()) | |
print(f"Failed to convert {html_file}") | |
notes_failed += 1 | |
else: | |
log_debug(pandoc_run.stdout.decode()) | |
with open(markdown_destination, "w", encoding="utf-8") as markdown_fp: | |
markdown_content = pandoc_run.stdout.decode() | |
markdown_fp.write(markdown_content) | |
notes_written += 1 | |
print( | |
f"Searching for zip files containing HTML to convert...") | |
for directory, subdirlist, filelist in os.walk('.'): | |
for f in filelist: | |
if (f.endswith(zip_extension)): | |
print(f"Found zipped note: {f}") | |
with ZipFile(f"{directory}{sep}{f}", 'r') as zip: | |
converted_dir = f"converted{sep}{directory}" | |
pathlib.Path(converted_dir).resolve().mkdir( | |
parents=True, exist_ok=True) | |
zip.extractall(converted_dir) | |
for file_in_zip in zip.infolist(): | |
if file_in_zip.is_dir(): | |
continue | |
file_in_zip_ext = file_in_zip.filename[len( | |
file_in_zip.filename)-len(html_extension):] | |
if html_extension in file_in_zip_ext.lower(): | |
note_new_filename = zip.filename[0:len( | |
zip.filename)-len(zip_extension)] + ".html" | |
old_path = pathlib.Path( | |
f"{converted_dir}{sep}{file_in_zip.filename}").resolve() | |
new_path = pathlib.Path( | |
f"converted{sep}{note_new_filename}") | |
log_debug(f"Renaming {old_path} to {new_path}") | |
shutil.move(old_path, new_path) | |
print( | |
f"Will try to convert all HTML notes in the current directory to Markdown") | |
for directory, subdirlist, filelist in os.walk('converted'): | |
for f in filelist: | |
converted_dir = "converted" | |
parent_dir = f"{converted_dir}" | |
if (f.endswith(html_extension)): | |
note_name = f.replace(html_extension, "").strip() | |
root = f"{directory}{sep}" | |
html_note = f"{root}{f}" | |
print(f"Found HTML note: {html_note}") | |
parent_dir = f"{directory}" | |
parent_dir_pathlib = pathlib.Path(parent_dir).resolve() | |
log_debug(f"mkdir {parent_dir_pathlib}") | |
parent_dir_pathlib.mkdir(parents=True, exist_ok=True) | |
markdown_destination = f"{parent_dir_pathlib}{sep}{ note_name }.md" | |
write_note(html_note, markdown_destination) | |
if clean: | |
os.unlink(html_note) | |
clean_up() | |
print(f"\n{_c.GREEN}Wrote {notes_written} notes.{_c.ENDC}") | |
if notes_failed > 0: | |
print(f"{_c.RED}{notes_failed} notes failed to convert :( {_c.ENDC}") |
same here. but the sole thing that it also renames the HTML files before it tries to convert them made it so much easier to work with the files afterwards, so thanks a lot! :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
After running the script and searching for ZIP archives, when trying to convert HTML to Markdown, it just stands there and does nothing. I don't understand what to do, alas... The debug does not display any errors or warnings.
Stopping on this stage: