Skip to content

Instantly share code, notes, and snippets.

@dogboydog
Last active April 6, 2024 08:04
Show Gist options
  • Save dogboydog/a9db5044513a349ca17e6e96d502340e to your computer and use it in GitHub Desktop.
Save dogboydog/a9db5044513a349ca17e6e96d502340e to your computer and use it in GitHub Desktop.
Convert Nimbus Notes HTML to Markdown for Joplin
# -------------------------------------------------------------------------
# Nimbus note HTML export to markdown converter
# Extract all zip files containing 'note.html' and convert to markdown
#
# Setup:
# 1) install python 3 for your OS
# 2) install pandoc https://github.com/jgm/pandoc/releases/tag/2.11.4
# on Windows, the .msi will automatically add pandoc to your $PATH
# otherwise add it to your $PATH.
# 3) save this script in the directory where your HTML exports were
# exported. Open a terminal / command prompt and cd to the directory
# where you saved convert.py.
# 4) Issue the command "python convert.py"
# (add the word "debug" afterward for extra output: python convert.py debug)
# 5) To use for Joplin import, Use File -> Import -> Markdown (Directory)
# and select the 'converted' directory that is created by this script
#
# Happy note-taking. -dogboydog
# -------------------------------------------------------------------------
import os
import pathlib
import re
import shutil
import subprocess
import sys
from os.path import abspath
from zipfile import ZipFile
notes_written = 0
notes_failed = 0
sep = os.path.sep
color = not ('no-color' in sys.argv[1:] or 'NO_COLOR' in os.environ)
debug_on = 'DEBUG' in os.environ and os.environ['DEBUG'] != "0"
if ('debug' in sys.argv[1:]):
debug_on = True
clean = True # set to False to keep html files from conversion
class _c:
HEADER = '\033[95m' if color else ''
BLUE = '\u001b[34m' if color else ''
CYAN = '\033[96m' if color else ''
GREEN = '\033[92m' if color else ''
YELLOW = '\u001b[33m' if color else ''
RED = '\033[91m' if color else ''
ENDC = '\033[0m' if color else ''
BOLD = '\033[1m' if color else ''
UNDERLINE = '\033[4m' if color else ''
html_extension = ".html"
zip_extension = ".zip"
def log_debug(message):
if debug_on:
print(f"{_c.BLUE}{message}{_c.ENDC}")
def remove_empty_dir(empty_dir):
try:
os.removedirs(empty_dir)
log_debug(f"Deleted empty directory '{empty_dir}'")
except OSError:
pass
# recursively delete empty directories
def remove_empty_dirs(path):
# topdown False: start with deepest nested directories
for root, dirnames, filenames in os.walk(path, topdown=False):
for dirname in dirnames:
remove_empty_dir(os.path.realpath(os.path.join(root, dirname)))
def clean_up():
if not clean:
return
clean_extensions = [".woff2", ".css", ".woff",
".ttf", "icomoon.svg", "icomoon.eot"]
for directory, subdirlist, filelist in os.walk('converted'):
for f in filelist:
parent_dir = f"{converted_dir}"
for clean_ext in clean_extensions:
if (f.endswith(clean_ext)):
os.unlink(f"{directory}{sep}{f}")
remove_empty_dirs("converted")
def write_note(html_file, markdown_destination):
global notes_written, notes_failed
print(f"Writing markdown to {markdown_destination}")
pandoc_run = subprocess.run(
["pandoc", html_file,
"--from", "html", "--to", "markdown_strict-raw_html"],
capture_output=True,
check=True)
if pandoc_run.returncode != 0:
print(pandoc_run.stderr.decode())
print(f"Failed to convert {html_file}")
notes_failed += 1
else:
log_debug(pandoc_run.stdout.decode())
with open(markdown_destination, "w", encoding="utf-8") as markdown_fp:
markdown_content = pandoc_run.stdout.decode()
markdown_fp.write(markdown_content)
notes_written += 1
print(
f"Searching for zip files containing HTML to convert...")
for directory, subdirlist, filelist in os.walk('.'):
for f in filelist:
if (f.endswith(zip_extension)):
print(f"Found zipped note: {f}")
with ZipFile(f"{directory}{sep}{f}", 'r') as zip:
converted_dir = f"converted{sep}{directory}"
pathlib.Path(converted_dir).resolve().mkdir(
parents=True, exist_ok=True)
zip.extractall(converted_dir)
for file_in_zip in zip.infolist():
if file_in_zip.is_dir():
continue
file_in_zip_ext = file_in_zip.filename[len(
file_in_zip.filename)-len(html_extension):]
if html_extension in file_in_zip_ext.lower():
note_new_filename = zip.filename[0:len(
zip.filename)-len(zip_extension)] + ".html"
old_path = pathlib.Path(
f"{converted_dir}{sep}{file_in_zip.filename}").resolve()
new_path = pathlib.Path(
f"converted{sep}{note_new_filename}")
log_debug(f"Renaming {old_path} to {new_path}")
shutil.move(old_path, new_path)
print(
f"Will try to convert all HTML notes in the current directory to Markdown")
for directory, subdirlist, filelist in os.walk('converted'):
for f in filelist:
converted_dir = "converted"
parent_dir = f"{converted_dir}"
if (f.endswith(html_extension)):
note_name = f.replace(html_extension, "").strip()
root = f"{directory}{sep}"
html_note = f"{root}{f}"
print(f"Found HTML note: {html_note}")
parent_dir = f"{directory}"
parent_dir_pathlib = pathlib.Path(parent_dir).resolve()
log_debug(f"mkdir {parent_dir_pathlib}")
parent_dir_pathlib.mkdir(parents=True, exist_ok=True)
markdown_destination = f"{parent_dir_pathlib}{sep}{ note_name }.md"
write_note(html_note, markdown_destination)
if clean:
os.unlink(html_note)
clean_up()
print(f"\n{_c.GREEN}Wrote {notes_written} notes.{_c.ENDC}")
if notes_failed > 0:
print(f"{_c.RED}{notes_failed} notes failed to convert :( {_c.ENDC}")
@dogboydog
Copy link
Author

I don't remember honestly. It depends on if the pandoc command will embed images i think. Let me know if you try it and how it turns out.

@sks2012
Copy link

sks2012 commented Mar 18, 2024

Me, without the images, it's all

@dogboydog
Copy link
Author

Sorry about that. I'm not sure if there's an easy way to get it to support images

@sks2012
Copy link

sks2012 commented Apr 6, 2024

So, why can't PanDock itself attach a file inside MD?
Please think about how you can attach images.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment