Last active
May 10, 2023 12:39
-
-
Save vszakats/5a3bd939721d1dde6142d9ea3b2d1b5f to your computer and use it in GitHub Desktop.
Apple Notes.app JSON to Markdown/HTML converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Copyright 2022-present Viktor Szakats. The MIT License. | |
# SPDX-License-Identifier: MIT | |
# Convert 'apple_cloud_notes_parser'-made JSON into Markdown or HTML. | |
# The goal is to export the content losslessly and with the ability to | |
# continue editing them after importing or opening them in Markdown | |
# editors as-is. | |
# Requires: | |
# - Ruby | |
# - https://github.com/threeplanetssoftware/apple_cloud_notes_parser/releases/tag/v0.11 | |
# or later | |
# - $ bundle install | |
# - Compatible Notes.app backup | |
# Optional: | |
# - macOS for 'created' output file timestamps | |
# Joplin import requirements: | |
# - --md-linebreaks | |
# - --frontmatter (optional) | |
# - as of Joplin v2.8.8, its import functionality does not support | |
# importing linked/embedded images as proper attachments (both with | |
# or without --embed) | |
# TODO: | |
# - Copy linked objects under the output directory and reference them | |
# via relative links. | |
# - Escape double-quotes in values ending up in HTML attributes. | |
# - Solve continued-lines differently: | |
# Iterate through the full line in a sub-loop. | |
# This allows to change text attributes when they actually change, | |
# so 4 bold fragments result in a single **/** pair instead of one | |
# for each fragment. | |
# - Replace '-notes-font-hints' CSS style once we discover its purpose. | |
# - Possibly fold long itemlist lines into shorter ones. | |
# - A newline would be nice after the last continuous monospace fragment. | |
import argparse | |
import base64 | |
import copy | |
import datetime | |
import json | |
import mimetypes | |
import os | |
import re | |
import time | |
# Item styles | |
STYLE_TYPE_DEFAULT = -1 | |
STYLE_TYPE_TITLE = 0 | |
STYLE_TYPE_HEADING = 1 | |
STYLE_TYPE_SUBHEADING = 2 | |
STYLE_TYPE_MONOSPACED = 4 | |
STYLE_TYPE_LIST_DOT = 100 | |
STYLE_TYPE_LIST_DASH = 101 | |
STYLE_TYPE_LIST_NUM = 102 | |
STYLE_TYPE_CHECKBOX = 103 | |
# Font weights | |
FONT_TYPE_DEFAULT = 0 | |
FONT_TYPE_BOLD = 1 | |
FONT_TYPE_ITALIC = 2 | |
FONT_TYPE_BOLD_ITALIC = 3 | |
# Text alignments | |
TEXT_ALIGNMENT_LEFT = 0 | |
TEXT_ALIGNMENT_CENTER = 1 | |
TEXT_ALIGNMENT_RIGHT = 2 | |
TEXT_ALIGNMENT_JUSTIFY = 3 | |
css4 = True | |
def line_is_codefence(line): | |
return line.lstrip()[0:3] == "```" | |
def fn_to_url(fn, embed=False): | |
if embed: | |
(mime, _) = mimetypes.guess_type(fn) | |
if mime is not None: | |
with open(fn, "rb") as f: | |
return ( | |
"data:" | |
+ mime | |
+ ";base64," | |
+ base64.b64encode(f.read()).decode("ascii") | |
) | |
return "file://" + fn.replace(" ", "%20") | |
def htmlattr(value): | |
return value.replace('"', "%22") | |
def note_export( | |
note, | |
out_prefix="", | |
overwrite=True, | |
format="md", | |
embedimages=True, | |
frontmatter=True, | |
md_linebreaks=False, | |
fn_template="{account}-{folder}-{note_id:06d}-{title}", | |
debug=0, | |
autotidy_cr=True, | |
autotidy_hr=True, | |
): | |
# Skip folder index | |
if "note_proto" not in note: | |
return | |
outfn = fn_template.format( | |
account=note["account"], | |
folder=note["folder"], | |
note_id=note["note_id"], | |
title=note["title"], | |
) | |
outfn = out_prefix + re.sub(r"[\/\\\?\*|:•]", "_", outfn) | |
if format == "md": | |
FMT_EXT = ".md" | |
FMT_H1_O = "# " | |
FMT_H1_C = "" | |
FMT_H2_O = "## " | |
FMT_H2_C = "" | |
FMT_H3_O = "### " | |
FMT_H3_C = "" | |
FMT_CODE_O = "`" | |
FMT_CODE_C = FMT_CODE_O | |
FMT_CODEBLOCK_O = "```" | |
FMT_CODEBLOCK_C = FMT_CODEBLOCK_O | |
FMT_LINE = "---" | |
FMT_LINK_PURE = "<{0}>" | |
FMT_LINK_NAMED = "[{1}]({0})" | |
FMT_LINK_IMAGE = "[]({1})" | |
FMT_IMAGE = "" | |
FMT_BOLD_O = "**" | |
FMT_BOLD_C = FMT_BOLD_O | |
FMT_ITALIC_O = "_" | |
FMT_ITALIC_C = FMT_ITALIC_O | |
FMT_TABLE_O = "" | |
FMT_TABLE_C = "" | |
FMT_TABLE_ROW_O = "| " | |
FMT_TABLE_ROW_C = "" | |
FMT_TABLE_CELL_O = "" | |
FMT_TABLE_CELL_C = " | " | |
FMT_TABLE_HEADER_O = "---" | |
FMT_TABLE_HEADER_C = " | " | |
FMT_ITEM_DOT = "* " | |
FMT_ITEM_DASH = "- " | |
FMT_ITEM_LIST = "1. " | |
FMT_ITEM_CHKE = "* [x] " | |
FMT_ITEM_CHKD = "* [ ] " | |
if md_linebreaks: | |
# Some Markdown imports will need this to avoid | |
# (mis-)interpreting content as HTML: | |
FMT_LINEBREAK = " " | |
else: | |
FMT_LINEBREAK = "<br>" | |
elif format == "html": | |
FMT_EXT = ".html" | |
FMT_H1_O = "<h1>" | |
FMT_H1_C = "</h1>" | |
FMT_H2_O = "<h2>" | |
FMT_H2_C = "</h2>" | |
FMT_H3_O = "<h3>" | |
FMT_H3_C = "</h3>" | |
FMT_CODE_O = "<code>" | |
FMT_CODE_C = "</code>" | |
FMT_CODEBLOCK_O = FMT_CODE_O | |
FMT_CODEBLOCK_C = FMT_CODE_C | |
FMT_LINE = "<hr>" | |
FMT_LINK_PURE = '<a href="{0}">{0}</a>' | |
FMT_LINK_NAMED = '<a href="{0}">{1}</a>' | |
FMT_LINK_IMAGE = '<a href="{1}"><img style="max-width: 100%; max-height: 100%;" src="{0}" alt="{2}"></a>' | |
FMT_IMAGE = ( | |
'<img style="max-width: 100%; max-height: 100%;" src="{0}" alt="{1}">' | |
) | |
FMT_BOLD_O = "<b>" | |
FMT_BOLD_C = "</b>" | |
FMT_ITALIC_O = "<i>" | |
FMT_ITALIC_C = "</i>" | |
FMT_TABLE_O = "<table>" | |
FMT_TABLE_C = "</table>" | |
FMT_TABLE_ROW_O = "<tr>" | |
FMT_TABLE_ROW_C = "</tr>" | |
FMT_TABLE_CELL_O = "<td>" | |
FMT_TABLE_CELL_C = "</td>" | |
FMT_TABLE_HEADER_O = "<th>" | |
FMT_TABLE_HEADER_C = "</th>" | |
FMT_ITEM_DOT = "<li>" | |
FMT_ITEM_DASH = "<li>" | |
FMT_ITEM_LIST = "<li>" | |
FMT_ITEM_CHKE = '<li><input type="checkbox" checked>' | |
FMT_ITEM_CHKD = '<li><input type="checkbox">' | |
FMT_LINEBREAK = "<br>" | |
FMT_LIST_DASH_O = '<ul style="list-style-type: ' + "'– '" + ';">' | |
FMT_LIST_DASH_C = "</ul>" | |
FMT_LIST_DOT_O = "<ul>" | |
FMT_LIST_DOT_C = "</ul>" | |
FMT_LIST_NUM_O = "<ol>" | |
FMT_LIST_NUM_C = "</ol>" | |
outfnext = outfn + FMT_EXT | |
if not overwrite and os.path.isfile(outfnext): | |
return | |
if debug >= 1: | |
print(note["note_id"]) | |
proto = copy.deepcopy(note["note_proto"]) # Avoid modifying the original object | |
eos = note["embedded_objects"] | |
proto["embedded_objects"] = eos | |
stripcolor = False | |
# Raw/unformatted text. UTF-8. | |
# | |
# \u2028: manual line breaks | |
# \n: entry separators | |
text = proto["document"]["note"]["noteText"] | |
if debug >= 2: | |
with open(outfn + "-1-ori.txt", "w") as f: | |
f.write(text) | |
f.close() | |
with open(outfn + "-1-ori.json", "w") as f: | |
json.dump(proto, f, indent=2, ensure_ascii=False, sort_keys=True) | |
f.close() | |
if debug >= 1: | |
print("! text length chars:", len(text)) | |
print("! text length bytes:", len(bytes(text, "utf-8"))) | |
print("! length ori:", len(proto["document"]["note"]["attributeRun"])) | |
# Merge consecutive fragments having the same attributes | |
# Can result in a 10x reduction of fragment numbers, while | |
# also giving searchable/meaningful text fields. | |
attrs = [] | |
c = 0 | |
while c < len(proto["document"]["note"]["attributeRun"]): | |
i = proto["document"]["note"]["attributeRun"][c] | |
this = i.copy() | |
this["length"] = 0 | |
while c < len(proto["document"]["note"]["attributeRun"]) - 1: | |
next = proto["document"]["note"]["attributeRun"][c + 1].copy() | |
next["length"] = 0 | |
if json.dumps(this, sort_keys=True) == json.dumps(next, sort_keys=True): | |
i["length"] += proto["document"]["note"]["attributeRun"][c + 1][ | |
"length" | |
] | |
c += 1 | |
else: | |
break | |
if stripcolor and "color" in i: | |
del i["color"] | |
attrs.append(i) | |
c += 1 | |
if debug >= 1: | |
print("! length merged:", len(attrs)) | |
proto["document"]["note"]["attributeRun"] = attrs | |
# Fill text for each fragment | |
c = 0 | |
pos = 0 | |
for i in proto["document"]["note"]["attributeRun"]: | |
plen = i["length"] | |
tmp = text[pos : pos + plen] | |
proto["document"]["note"]["attributeRun"][c]["text"] = tmp | |
# Bizarre trick to make positions match after encountering | |
# high-Unicode codepoints: | |
for cc in tmp: | |
if ord(cc) > 65535: | |
pos -= 1 | |
c += 1 | |
pos += plen | |
if debug >= 1: | |
print("! slice length total:", pos) | |
# Delete lengths | |
for i in proto["document"]["note"]["attributeRun"]: | |
if "length" in i: | |
del i["length"] | |
if debug >= 2: | |
with open(outfn + "-2-merged.json", "w") as f: | |
json.dump(proto, f, indent=2, ensure_ascii=False, sort_keys=True) | |
f.close() | |
# Split texts at newlines into separate sections. | |
# This helps processing it accurately, because Apple uses newlines | |
# as item terminators. | |
attrs = [] | |
for i in proto["document"]["note"]["attributeRun"]: | |
if ( | |
"\n" in i["text"] and "link" not in i | |
): # Do not tear up links where their text have spilled-in newline. Deal with this later. | |
ispl = i["text"].split("\n") | |
ispllen = len(ispl) | |
c = 0 | |
for t in ispl: | |
c += 1 | |
i2 = i.copy() | |
i2["text"] = t | |
if c < ispllen: | |
i2["text"] += "\n" | |
if len(i2["text"]) > 0: | |
attrs.append(i2) | |
else: | |
attrs.append(i) | |
if debug >= 1: | |
print("! length resplit:", len(attrs)) | |
proto["document"]["note"]["attributeRun"] = attrs | |
if debug >= 2: | |
with open(outfn + "-3-merged-split-at-eol.json", "w") as f: | |
json.dump(proto, f, indent=2, ensure_ascii=False, sort_keys=True) | |
f.close() | |
INDENT_SPACES = " " | |
tsfmt = "%Y-%m-%d %H:%M:%S %z" # YYYY-MM-DD hh:mm:ss +1030" | |
create = datetime.datetime.strptime(note["creation_time"], tsfmt) | |
update = datetime.datetime.strptime(note["modify_time"], tsfmt) | |
createutc = create.astimezone(datetime.timezone(datetime.timedelta(0))) | |
updateutc = update.astimezone(datetime.timezone(datetime.timedelta(0))) | |
if debug >= 1: | |
print("!", create, update) | |
print("!", createutc, updateutc) | |
out = "" | |
if frontmatter: | |
out += "---\n" | |
out += "title: " + note["title"] + "\n" | |
tsfmt = "%Y-%m-%d %H:%M:%SZ" # YYYY-MM-DD hh:mm:ssZ | |
out += "updated: " + updateutc.strftime(tsfmt) + "\n" | |
out += "created: " + createutc.strftime(tsfmt) + "\n" | |
out += "---\n" | |
continuing = False | |
cont_f1 = "" | |
cont_i1 = "" | |
in_list = 0 | |
in_codefence = False | |
prev_i1 = "" | |
in_list_close = [] | |
c = 0 | |
while c < len(proto["document"]["note"]["attributeRun"]): | |
i = proto["document"]["note"]["attributeRun"][c] | |
c += 1 | |
if debug >= 3: | |
out += ">" + str(c) + "<" | |
mystr = i["text"] | |
# Text ending with a newline is significant. It closes list items. | |
# If there is none, it means the entry continues in the next | |
# "attributeRun" item. Notes.app uses separate elements when the | |
# font style is different or if there is a special element (e.g. | |
# a link) is present in the line. | |
if mystr.endswith("\n"): | |
eol = True | |
mystr = mystr[:-1] | |
else: | |
eol = False | |
# layout | |
n1 = "" # leading newline, if any | |
i1 = "" # indentation | |
f1 = "" # layout prefix markup (bullet, dash, header) | |
c1 = "" # line-ending markup (e.g. FMT_LINEBREAK or FMT_H1_C), if any | |
new_in_list = 0 | |
ao = "" | |
ac = "" | |
codefence_start = False | |
if line_is_codefence(mystr) and not in_codefence: | |
if format == "html": | |
mystr = FMT_CODE_O | |
n1 = "\n" | |
in_codefence = True | |
codefence_start = True | |
# Strip CR | |
mystr_no_cr = mystr.replace("\r", "") | |
if autotidy_cr: | |
# Strip CR, remains of CRLF EOLs | |
mystr = mystr_no_cr | |
if not in_codefence: | |
if "paragraphStyle" in i: | |
paragraphStyle = i["paragraphStyle"] | |
else: | |
paragraphStyle = {} | |
if "styleType" in paragraphStyle: | |
styleType = paragraphStyle["styleType"] | |
else: | |
styleType = STYLE_TYPE_DEFAULT | |
if "indentAmount" in paragraphStyle: | |
indentAmount = paragraphStyle["indentAmount"] | |
else: | |
indentAmount = 0 | |
if "alignment" in paragraphStyle: | |
alignment = paragraphStyle["alignment"] | |
else: | |
alignment = 0 | |
if alignment == TEXT_ALIGNMENT_CENTER: | |
ao = '<p style="text-align: center">' | |
ac = "</p>" | |
elif alignment == TEXT_ALIGNMENT_JUSTIFY: | |
ao = '<p style="text-align: justify">' | |
ac = "</p>" | |
elif alignment == TEXT_ALIGNMENT_RIGHT: | |
ao = '<p style="text-align: right">' | |
ac = "</p>" | |
if ( | |
styleType == STYLE_TYPE_LIST_DOT | |
or styleType == STYLE_TYPE_LIST_DASH | |
or styleType == STYLE_TYPE_LIST_NUM | |
or styleType == STYLE_TYPE_CHECKBOX | |
): | |
if styleType == STYLE_TYPE_CHECKBOX: | |
checkboxDone = paragraphStyle["checklist"]["done"] == 1 | |
else: | |
checkboxDone = None | |
if not continuing: | |
# Make sure to have an empty line before starting a list | |
# (otherwise it may not render as a list) | |
if in_list == 0 and not out.endswith("\n\n") and len(out) > 0: | |
n1 = "\n" | |
if styleType == STYLE_TYPE_LIST_DOT: | |
f1 = FMT_ITEM_DOT | |
elif styleType == STYLE_TYPE_LIST_DASH: | |
f1 = FMT_ITEM_DASH | |
elif styleType == STYLE_TYPE_LIST_NUM: | |
f1 = FMT_ITEM_LIST | |
elif styleType == STYLE_TYPE_CHECKBOX: | |
if checkboxDone: | |
f1 = FMT_ITEM_CHKE | |
else: | |
f1 = FMT_ITEM_CHKD | |
i1 = indentAmount * INDENT_SPACES | |
new_in_list = styleType | |
elif styleType == STYLE_TYPE_TITLE: | |
f1 = FMT_H1_O | |
c1 = FMT_H1_C | |
elif styleType == STYLE_TYPE_HEADING: | |
f1 = FMT_H2_O | |
c1 = FMT_H2_C | |
elif styleType == STYLE_TYPE_SUBHEADING: | |
f1 = FMT_H3_O | |
c1 = FMT_H3_C | |
elif styleType == STYLE_TYPE_MONOSPACED: | |
n1 = "\n" | |
f1 = FMT_CODE_O | |
c1 = FMT_CODE_C | |
else: | |
# Forced newlines for lines not part of lists. | |
# Needed for Markdown to avoid joining these lines together. | |
if in_list == 0: | |
html_with_div = True | |
if eol: | |
if html_with_div: | |
if mystr_no_cr == "": | |
c1 = FMT_LINEBREAK | |
else: | |
c1 = FMT_LINEBREAK | |
# Not strictly necessary, but makes output more alike the | |
# Notes app built-in HTML export | |
if format == "html" and html_with_div: | |
if eol: | |
c1 += "</div>" | |
if not continuing: | |
f1 = "<div>" | |
if continuing: | |
indent = cont_i1 + len(cont_f1) * " " | |
else: | |
indent = i1 + len(f1) * " " | |
# Process payload | |
# Escape user content that may interfere with HTML | |
if format == "html": | |
mystr = ( | |
mystr.replace("&", "&") | |
.replace("<", "<") | |
.replace(">", ">") | |
.replace(" ", " ") | |
) | |
# Upconvert common "manual separators" (vertical line ASCII art) | |
# to markup. Not strictly necessary and we recommend to disable | |
# this if it interferes with content. | |
if ( | |
autotidy_hr | |
and not continuing | |
and in_list == 0 | |
and mystr_no_cr != "" | |
and ( | |
mystr_no_cr.replace("*", "") == "" | |
or mystr_no_cr.replace("-", "") == "" | |
or mystr_no_cr.replace("=", "") == "" | |
) | |
): | |
mystr = FMT_LINE | |
if c1 == FMT_LINEBREAK: | |
c1 = "" | |
# Strip forced linefeed before it. | |
if out.endswith("\n" + FMT_LINEBREAK + "\n"): | |
out = out[: -len("\n" + FMT_LINEBREAK + "\n")] | |
n1 = "\n\n" | |
# Make sure to include one linefeed before it. | |
# Otherwise Markdown renderers interpret it as | |
# a section header marker. | |
elif not out.endswith("\n\n") and len(out) > 0: | |
n1 = "\n" | |
# Escape user content that may interfere with Markdown markup | |
if format == "md": | |
# Also escape ']'? | |
mystr = ( | |
mystr.replace("[", "\[").replace("__", "\_\_").replace("**", "\*\*") | |
) | |
if "link" in i: | |
prep = "" | |
if mystr != "": | |
if mystr[0] == "\u2028" or mystr[0] == "\n": | |
# Cleanup newlines sometimes sneaking into the beginning | |
# of the link text | |
prep = FMT_LINEBREAK + "\n" + indent | |
mystr = mystr[1:] | |
if i["link"] == mystr: | |
mystr = FMT_LINK_PURE.format(mystr) | |
elif i["link"] == "http://" + mystr: # Bump auto-links to HTTPS | |
mystr = FMT_LINK_NAMED.format("https://" + mystr, mystr) | |
else: | |
mystr = FMT_LINK_NAMED.format(i["link"], mystr) | |
mystr = prep + mystr | |
elif "attachmentInfo" in i: | |
id = i["attachmentInfo"]["attachmentIdentifier"] | |
myeo = None | |
for oi in eos: | |
if oi["uuid"] == id: | |
myeo = oi | |
break | |
mystr = "" | |
if myeo is not None: | |
type = myeo["type"] | |
if debug >= 1: | |
print("! attachment:", myeo["primary_key"], type) | |
if type == "public.url": | |
if myeo["url"] is not None: | |
mystr += FMT_LINK_PURE.format(myeo["url"]) | |
else: | |
mystr += FMT_LINK_PURE.format("url://lost") | |
elif type == "com.apple.notes.inlinetextattachment.hashtag": | |
if myeo["alt_text"] is not None: | |
mystr += myeo["alt_text"] | |
else: | |
# Seen in "Recently Deleted" in iCloud, which was also | |
# not appearing anymore on the UI (aka "fake deleted"), | |
# but still there and also occupying storage. | |
mystr += "#{lost_tag}" | |
elif type == "com.apple.notes.table": | |
# | Name | Size | Color | | |
# | --- | --- | --- | | |
# | lime | small | green | | |
# | orange | medium | orange | | |
# | grapefruit | large | yellow or pink | | |
mystr = "" | |
tbl = FMT_TABLE_O | |
headerdone = False | |
for row in myeo["table"]: | |
rr = "" | |
sep = "" | |
for col in row: | |
if format == "html": | |
if not headerdone: | |
rr += ( | |
FMT_TABLE_HEADER_O | |
+ col | |
+ FMT_TABLE_HEADER_C | |
) | |
else: | |
rr += FMT_TABLE_CELL_O + col + FMT_TABLE_CELL_C | |
elif format == "md": | |
rr += FMT_TABLE_CELL_O + col + FMT_TABLE_CELL_C | |
sep += FMT_TABLE_HEADER_O + FMT_TABLE_HEADER_C | |
tbl += ( | |
"\n" | |
+ indent | |
+ FMT_TABLE_ROW_O | |
+ rr.rstrip() | |
+ FMT_TABLE_ROW_C | |
) | |
if not headerdone: | |
if format == "md": | |
tbl += ( | |
"\n" | |
+ indent | |
+ FMT_TABLE_ROW_O | |
+ sep.rstrip() | |
+ FMT_TABLE_ROW_C | |
) | |
headerdone = True | |
mystr += tbl + "\n" + FMT_TABLE_C + "\n" | |
elif type == "com.apple.notes.gallery": | |
mystr += "(gallery of {})".format(len(myeo["child_objects"])) | |
for mycho in myeo["child_objects"]: | |
if "filename" in mycho: | |
id = mycho["filename"] | |
if id is None: | |
id = "unnamed" | |
if "backup_location" in mycho: | |
filepath = mycho["backup_location"] | |
else: | |
filepath = mycho["filepath"] | |
mystr += ( | |
"\n" | |
+ indent | |
+ FMT_IMAGE.format(fn_to_url(filepath, embedimages), id) | |
) | |
else: | |
if "filename" in myeo: | |
id = myeo["filename"] | |
if id is None: | |
id = "unnamed" | |
if "backup_location" in myeo: | |
filepath = myeo["backup_location"] | |
else: | |
filepath = myeo["filepath"] | |
if ( | |
myeo["conforms_to"] == "image" | |
or i["attachmentInfo"]["typeUti"] == "com.apple.drawing" | |
): | |
mystr += FMT_IMAGE.format( | |
fn_to_url(filepath, embedimages), id | |
) | |
elif ( | |
"thumbnails" in myeo | |
and myeo["thumbnails"] | |
and len(myeo["thumbnails"]) > 0 | |
): | |
th = myeo["thumbnails"][-1] # pick the largest | |
if "backup_location" in th: | |
thumbnail = th["backup_location"] | |
else: | |
thumbnail = th["filepath"] | |
mystr += FMT_LINK_IMAGE.format( | |
fn_to_url(thumbnail, embedimages), | |
fn_to_url(filepath), | |
id, | |
) | |
else: | |
mystr += FMT_LINK_NAMED.format(fn_to_url(filepath), id) | |
if ( | |
type == "public.vcard" | |
or type == "public.comma-separated-values-text" | |
): | |
mystr += "\n\n" + FMT_CODEBLOCK_O + "\n" | |
with open(filepath, "r") as f: | |
mystr += f.read() | |
f.close() | |
mystr += "\n" + FMT_CODEBLOCK_C + "\n" | |
else: | |
mystr += "{" + id + " attachment not found" + "}" | |
else: | |
# normal text | |
# This avoids leaving a indent-spaces-only line below the | |
# item, while also preserving the extra closing newline | |
# as it appears in Notes. | |
if mystr.endswith("\u2028"): | |
mystr = mystr[:-1] + FMT_LINEBREAK | |
if eol: | |
mystr += FMT_LINEBREAK | |
else: | |
mystr += "\n" + indent | |
mystr = mystr.replace( | |
"\u2028", FMT_LINEBREAK + "\n" + indent | |
) # \u2028 = LINE SEPARATOR | |
# font style | |
wo = "" # basic styles | |
wc = wo | |
uo = "" # underline | |
uc = uo | |
so = "" # strikethrough | |
sc = so | |
yo = "" # superscript/subscript | |
yc = yo | |
xo = "" # span style | |
xc = xo | |
if mystr != "" and not in_codefence: | |
st = [] # span styles | |
if "fontWeight" in i: | |
fontWeight = i["fontWeight"] | |
if fontWeight == FONT_TYPE_BOLD: | |
wo = FMT_BOLD_O | |
wc = FMT_BOLD_C | |
elif fontWeight == FONT_TYPE_ITALIC: | |
wo = FMT_ITALIC_O | |
wc = FMT_ITALIC_C | |
elif fontWeight == FONT_TYPE_BOLD_ITALIC: | |
wo = FMT_BOLD_O + FMT_ITALIC_O | |
wc = FMT_ITALIC_C + FMT_BOLD_C | |
if "underlined" in i and i["underlined"] == 1: | |
uo = "<u>" | |
uc = "</u>" | |
if "strikethrough" in i and i["strikethrough"] == 1: | |
so = "<s>" | |
sc = "</s>" | |
if "superscript" in i: | |
if i["superscript"] < 0: # subscript | |
yo = "<sub>" | |
yc = "</sub>" | |
elif i["superscript"] > 0: # superscript | |
yo = "<sup>" | |
yc = "</sup>" | |
if "font" in i: | |
font = i["font"] | |
if "pointSize" in font: | |
# Notes.app built-in export uses "px" (with the pointSize value) | |
st.append( | |
"font-size: " + str(font["pointSize"]) + "px" | |
) # not "pt"! | |
if "fontName" in font: | |
st.append("font-family: " + font["fontName"]) | |
if "fontHints" in font: | |
# Purpose undiscovered. Values seen: 1 | |
st.append("-notes-font-hints: " + str(font["fontHints"])) | |
# Omit color for links to avoid noise. Most Markdown renderers | |
# color links by default, overriding custom colors we would set | |
# here. | |
if "color" in i and "link" not in i: | |
color = i["color"] | |
if color["alpha"] != 1: | |
if css4: | |
rgba = "rgb({} {} {} / {})" | |
else: | |
rgba = "rgba({}, {}, {}, {})" | |
colorhtml = rgba.format( | |
int(color["red"] * 255), | |
int(color["green"] * 255), | |
int(color["blue"] * 255), | |
color["alpha"], | |
) | |
else: | |
colorhtml = "#{:02x}{:02x}{:02x}".format( | |
int(color["red"] * 255), | |
int(color["green"] * 255), | |
int(color["blue"] * 255), | |
) | |
st.append("color: " + colorhtml) | |
if len(st) > 0: | |
xo = '<span style="{}">'.format("; ".join(st) + ";") | |
xc = "</span>" | |
# Omit forced-newline-only lines after a section separator | |
if ( | |
c1 == FMT_LINEBREAK | |
and mystr_no_cr == "" | |
and out.endswith("\n" + FMT_LINE + "\n") | |
and not in_codefence | |
): | |
c1 = "" | |
# Construct output line | |
# Ensure that we move any space prefixes or suffixes outside the | |
# markup. Markdown renderers ignore whitespace-separated markups. | |
o1 = "" | |
if mystr != "" and xc == "" and uc == "" and sc == "" and wc != "": | |
olen = len(mystr) | |
slen = len(mystr.rstrip()) | |
if olen != slen: | |
c1 = mystr[slen:] + c1 | |
mystr = mystr.rstrip() | |
olen = len(mystr) | |
slen = len(mystr.lstrip()) | |
if olen != slen: | |
o1 = mystr[: olen - slen] | |
mystr = mystr.lstrip() | |
# Quick hack, might not fit/cover all situations | |
if ao != "": | |
c1 = c1.replace(FMT_LINEBREAK, "") | |
if not codefence_start and line_is_codefence(mystr) and in_codefence: | |
if format == "html": | |
mystr = FMT_CODE_O | |
in_codefence = False | |
if not continuing: | |
if format == "html" and (in_list != new_in_list or prev_i1 != i1): | |
list = "" | |
i1n = len(i1) / len(INDENT_SPACES) | |
prev_i1n = len(prev_i1) / len(INDENT_SPACES) | |
if i1n > prev_i1n: | |
while i1n > prev_i1n: | |
if new_in_list == STYLE_TYPE_LIST_NUM: | |
lo = FMT_LIST_NUM_O | |
lc = FMT_LIST_NUM_C | |
elif new_in_list == STYLE_TYPE_LIST_DASH: | |
lo = FMT_LIST_DASH_O | |
lc = FMT_LIST_DASH_C | |
else: | |
lo = FMT_LIST_DOT_O | |
lc = FMT_LIST_DOT_C | |
list += i1 + lo + "\n" | |
in_list_close.append({"c": lc, "i1": i1}) | |
i1n -= 1 | |
elif i1n < prev_i1n: | |
while i1n < prev_i1n: | |
tmp = in_list_close.pop() | |
list += tmp["i1"] + tmp["c"] + "\n" | |
i1n += 1 | |
else: | |
if len(in_list_close) > 0: | |
tmp = in_list_close.pop() | |
list += tmp["i1"] + tmp["c"] + "\n" | |
if new_in_list == STYLE_TYPE_LIST_NUM: | |
lo = FMT_LIST_NUM_O | |
lc = FMT_LIST_NUM_C | |
elif new_in_list == STYLE_TYPE_LIST_DASH: | |
lo = FMT_LIST_DASH_O | |
lc = FMT_LIST_DASH_C | |
else: | |
lo = FMT_LIST_DOT_O | |
lc = FMT_LIST_DOT_C | |
list += i1 + lo + "\n" | |
in_list_close.append({"c": lc, "i1": i1}) | |
if list != "": | |
if not out.endswith("\n"): | |
out += "\n" | |
out += list | |
in_list = new_in_list | |
prev_i1 = i1 | |
out += ( | |
n1 | |
+ i1 | |
+ f1 | |
+ o1 | |
+ wo | |
+ so | |
+ uo | |
+ yo | |
+ ao | |
+ xo | |
+ mystr | |
+ xc | |
+ ac | |
+ yc | |
+ uc | |
+ sc | |
+ wc | |
+ c1 | |
) | |
if eol: | |
out += "\n" | |
continuing = False | |
else: | |
if not continuing: | |
cont_f1 = f1 | |
cont_i1 = i1 | |
continuing = True | |
if not out.endswith("\n"): | |
out += "\n" | |
print("Writing '" + outfnext + "'") | |
with open(outfnext, "w") as f: | |
f.write(out) | |
f.close() | |
# Requires macOS + Apple Developer Tools | |
if os.path.isfile("/usr/bin/SetFile"): | |
tsfmt = "%m/%d/%Y %H:%M:%S" # "MM/DD/YYYY [hh:mm:[:ss] [AM | PM]]" | |
os.popen( | |
"TZ= /usr/bin/SetFile -d '{}' -m '{}' '{}'".format( | |
createutc.strftime(tsfmt), | |
updateutc.strftime(tsfmt), | |
outfnext.replace("'", "'\\''"), | |
) | |
) | |
else: | |
os.utime( | |
outfnext, | |
times=(time.mktime(update.timetuple()), time.mktime(update.timetuple())), | |
) | |
return | |
parser = argparse.ArgumentParser( | |
formatter_class=argparse.RawDescriptionHelpFormatter, | |
description="""Convert JSON exports to Markdown or HTML. | |
First create an export with: | |
https://github.com/threeplanetssoftware/apple_cloud_notes_parser | |
e869efe6fac0927eb9a7c5327c67415765b3a6ec | |
(2022-12-09 20:08:50 -0500) or later.""", | |
) | |
parser.add_argument( | |
"--overwrite", | |
dest="overwrite", | |
action="store_true", | |
help="overwrite existing output", | |
) | |
parser.add_argument( | |
"--md", dest="to_md", action="store_true", default=True, help="export in Markdown" | |
) | |
parser.add_argument( | |
"--html", dest="to_html", action="store_true", help="export in HTML" | |
) | |
parser.add_argument( | |
"--json", | |
dest="json_input", | |
action="store", | |
default="", | |
help="input JSON (or export directory root)", | |
) | |
parser.add_argument( | |
"--output-prefix", | |
dest="out_prefix", | |
action="store", | |
default="exported-Note-", | |
help="output prefix", | |
) | |
parser.add_argument( | |
"--embed", dest="embedimages", action="store_true", help="embed images" | |
) | |
parser.add_argument( | |
"--frontmatter", | |
dest="frontmatter", | |
action="store_true", | |
help="add front matter (title and dates)", | |
) | |
parser.add_argument( | |
"--md-linebreaks", | |
dest="md_linebreaks", | |
action="store_true", | |
help="use native Markdown linebreaks (double-space) instead of <br>", | |
) | |
parser.add_argument( | |
"--no-tidy-hr", | |
dest="notidy_hr", | |
action="store_true", | |
help="do not convert common manual line separators (full line of '*', '-' or '=' characters) to markup", | |
) | |
parser.add_argument( | |
"--no-tidy-cr", | |
dest="notidy_cr", | |
action="store_true", | |
help="do not delete <CR> characters", | |
) | |
parser.add_argument( | |
"--debug", dest="debug", action="store", default=0, help="debug level 0-3" | |
) | |
parser.add_argument( | |
dest="id_list", metavar="note IDs, export all if none specified", nargs="*" | |
) | |
args = parser.parse_args() | |
# point to a apple_cloud_notes_parser output JSON: | |
if not args.json_input: | |
print( | |
"! Error: You must set the input with --json (e.g. 'output/json/all_notes_1.json')" | |
) | |
quit(0) | |
if os.path.isdir(args.json_input): | |
args.json_input = os.path.join(args.json_input, "json/all_notes_1.json") | |
formats = [] | |
if args.to_md: | |
formats.append("md") | |
if args.to_html: | |
formats.append("html") | |
if len(formats) == 0: | |
print("! Error: You must set at least one export format via --md and/or --html") | |
quit(0) | |
try: | |
with open(args.json_input, "r") as f: | |
notes = json.load(f) | |
f.close() | |
except IOError: | |
print("! Error: Could not open input JSON:", args.json_input) | |
quit(1) | |
if len(args.id_list) == 0: | |
args.id_list = notes["notes"] | |
for note_id in args.id_list: | |
if note_id in notes["notes"]: | |
note = notes["notes"][note_id] | |
for format in formats: | |
note_export( | |
note, | |
out_prefix=args.out_prefix, | |
overwrite=args.overwrite, | |
format=format, | |
embedimages=args.embedimages, | |
frontmatter=args.frontmatter, | |
md_linebreaks=args.md_linebreaks, | |
debug=int(args.debug), | |
autotidy_cr=not args.notidy_cr, | |
autotidy_hr=not args.notidy_hr, | |
) | |
else: | |
print("! Warning: Note not found:", note_id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment