Last active
April 6, 2024 08:04
-
-
Save dogboydog/a9db5044513a349ca17e6e96d502340e to your computer and use it in GitHub Desktop.
Convert Nimbus Notes HTML to Markdown for Joplin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------------------------------------------------------------------------- | |
# Nimbus note HTML export to markdown converter | |
# Extract all zip files containing 'note.html' and convert to markdown | |
# | |
# Setup: | |
# 1) install python 3 for your OS | |
# 2) install pandoc https://github.com/jgm/pandoc/releases/tag/2.11.4 | |
# on Windows, the .msi will automatically add pandoc to your $PATH | |
# otherwise add it to your $PATH. | |
# 3) save this script in the directory where your HTML exports were | |
# exported. Open a terminal / command prompt and cd to the directory | |
# where you saved convert.py. | |
# 4) Issue the command "python convert.py" | |
# (add the word "debug" afterward for extra output: python convert.py debug) | |
# 5) To use for Joplin import, Use File -> Import -> Markdown (Directory) | |
# and select the 'converted' directory that is created by this script | |
# | |
# Happy note-taking. -dogboydog | |
# ------------------------------------------------------------------------- | |
import os | |
import pathlib | |
import re | |
import shutil | |
import subprocess | |
import sys | |
from os.path import abspath | |
from zipfile import ZipFile | |
notes_written = 0 | |
notes_failed = 0 | |
sep = os.path.sep | |
color = not ('no-color' in sys.argv[1:] or 'NO_COLOR' in os.environ) | |
debug_on = 'DEBUG' in os.environ and os.environ['DEBUG'] != "0" | |
if ('debug' in sys.argv[1:]): | |
debug_on = True | |
clean = True # set to False to keep html files from conversion | |
class _c: | |
HEADER = '\033[95m' if color else '' | |
BLUE = '\u001b[34m' if color else '' | |
CYAN = '\033[96m' if color else '' | |
GREEN = '\033[92m' if color else '' | |
YELLOW = '\u001b[33m' if color else '' | |
RED = '\033[91m' if color else '' | |
ENDC = '\033[0m' if color else '' | |
BOLD = '\033[1m' if color else '' | |
UNDERLINE = '\033[4m' if color else '' | |
html_extension = ".html" | |
zip_extension = ".zip" | |
def log_debug(message): | |
if debug_on: | |
print(f"{_c.BLUE}{message}{_c.ENDC}") | |
def remove_empty_dir(empty_dir): | |
try: | |
os.removedirs(empty_dir) | |
log_debug(f"Deleted empty directory '{empty_dir}'") | |
except OSError: | |
pass | |
# recursively delete empty directories | |
def remove_empty_dirs(path): | |
# topdown False: start with deepest nested directories | |
for root, dirnames, filenames in os.walk(path, topdown=False): | |
for dirname in dirnames: | |
remove_empty_dir(os.path.realpath(os.path.join(root, dirname))) | |
def clean_up(): | |
if not clean: | |
return | |
clean_extensions = [".woff2", ".css", ".woff", | |
".ttf", "icomoon.svg", "icomoon.eot"] | |
for directory, subdirlist, filelist in os.walk('converted'): | |
for f in filelist: | |
parent_dir = f"{converted_dir}" | |
for clean_ext in clean_extensions: | |
if (f.endswith(clean_ext)): | |
os.unlink(f"{directory}{sep}{f}") | |
remove_empty_dirs("converted") | |
def write_note(html_file, markdown_destination): | |
global notes_written, notes_failed | |
print(f"Writing markdown to {markdown_destination}") | |
pandoc_run = subprocess.run( | |
["pandoc", html_file, | |
"--from", "html", "--to", "markdown_strict-raw_html"], | |
capture_output=True, | |
check=True) | |
if pandoc_run.returncode != 0: | |
print(pandoc_run.stderr.decode()) | |
print(f"Failed to convert {html_file}") | |
notes_failed += 1 | |
else: | |
log_debug(pandoc_run.stdout.decode()) | |
with open(markdown_destination, "w", encoding="utf-8") as markdown_fp: | |
markdown_content = pandoc_run.stdout.decode() | |
markdown_fp.write(markdown_content) | |
notes_written += 1 | |
print( | |
f"Searching for zip files containing HTML to convert...") | |
for directory, subdirlist, filelist in os.walk('.'): | |
for f in filelist: | |
if (f.endswith(zip_extension)): | |
print(f"Found zipped note: {f}") | |
with ZipFile(f"{directory}{sep}{f}", 'r') as zip: | |
converted_dir = f"converted{sep}{directory}" | |
pathlib.Path(converted_dir).resolve().mkdir( | |
parents=True, exist_ok=True) | |
zip.extractall(converted_dir) | |
for file_in_zip in zip.infolist(): | |
if file_in_zip.is_dir(): | |
continue | |
file_in_zip_ext = file_in_zip.filename[len( | |
file_in_zip.filename)-len(html_extension):] | |
if html_extension in file_in_zip_ext.lower(): | |
note_new_filename = zip.filename[0:len( | |
zip.filename)-len(zip_extension)] + ".html" | |
old_path = pathlib.Path( | |
f"{converted_dir}{sep}{file_in_zip.filename}").resolve() | |
new_path = pathlib.Path( | |
f"converted{sep}{note_new_filename}") | |
log_debug(f"Renaming {old_path} to {new_path}") | |
shutil.move(old_path, new_path) | |
print( | |
f"Will try to convert all HTML notes in the current directory to Markdown") | |
for directory, subdirlist, filelist in os.walk('converted'): | |
for f in filelist: | |
converted_dir = "converted" | |
parent_dir = f"{converted_dir}" | |
if (f.endswith(html_extension)): | |
note_name = f.replace(html_extension, "").strip() | |
root = f"{directory}{sep}" | |
html_note = f"{root}{f}" | |
print(f"Found HTML note: {html_note}") | |
parent_dir = f"{directory}" | |
parent_dir_pathlib = pathlib.Path(parent_dir).resolve() | |
log_debug(f"mkdir {parent_dir_pathlib}") | |
parent_dir_pathlib.mkdir(parents=True, exist_ok=True) | |
markdown_destination = f"{parent_dir_pathlib}{sep}{ note_name }.md" | |
write_note(html_note, markdown_destination) | |
if clean: | |
os.unlink(html_note) | |
clean_up() | |
print(f"\n{_c.GREEN}Wrote {notes_written} notes.{_c.ENDC}") | |
if notes_failed > 0: | |
print(f"{_c.RED}{notes_failed} notes failed to convert :( {_c.ENDC}") |
So, why can't PanDock itself attach a file inside MD?
Please think about how you can attach images.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sorry about that. I'm not sure if there's an easy way to get it to support images