Skip to content

Instantly share code, notes, and snippets.

@danilomo
Created September 13, 2024 19:58
Show Gist options
  • Save danilomo/abfb274623193923a661f5544df0492f to your computer and use it in GitHub Desktop.
Save danilomo/abfb274623193923a661f5544df0492f to your computer and use it in GitHub Desktop.
Logseq to Org roam v1
# Usage:
# - pip install marko (in a venv or system wide if you want)
# - python logseq_to_org.py <logseq's pages directory> <output folder>
import datetime as dt
import re
import sys
import uuid
from contextlib import contextmanager
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Generator, cast
from marko import Markdown, block, inline
from marko.helpers import MarkoExtension
from marko.renderer import Renderer
class OrgRenderer(Renderer):
"""
Adapted from Marko's MarkdownRenderer (https://github.com/frostming/marko/blob/master/marko/md_renderer.py)
"""
titles_to_uuid = {}
@classmethod
def transform_links(cls, line):
pattern = re.compile(r"\[\[(.*)\]\]", re.MULTILINE)
def replace_match(match):
title = match.group(1)
uuid = cls.titles_to_uuid.get(title, None)
if not uuid:
return f"[[{title}]]"
return f"[[id:{uuid}][{title}]]"
transformed_string = pattern.sub(replace_match, line)
return transformed_string
def __init__(self) -> None:
super().__init__()
self._prefix = ""
self._second_prefix = ""
def __enter__(self) -> "OrgRenderer":
self._prefix = ""
self._second_prefix = ""
return super().__enter__()
@contextmanager
def container(
self, prefix: str, second_prefix: str = ""
) -> Generator[None, None, None]:
old_prefix = self._prefix
old_second_prefix = self._second_prefix
self._prefix += prefix
self._second_prefix += second_prefix
yield
self._prefix = old_prefix
self._second_prefix = old_second_prefix
def render_paragraph(self, element: block.Paragraph) -> str:
children = self.render_children(element)
line = self._prefix + children + "\n"
self._prefix = self._second_prefix
return OrgRenderer.transform_links(line)
def render_list(self, element: block.List) -> str:
result = []
if element.ordered:
for num, child in enumerate(element.children, element.start):
with self.container(f"{num}. ", " " * (len(str(num)) + 2)):
result.append(self.render(child))
else:
for child in element.children:
with self.container(f"{element.bullet} ", " "):
result.append(self.render(child))
self._prefix = self._second_prefix
return "".join(result)
def render_list_item(self, element: block.ListItem) -> str:
return self.render_children(element)
def render_quote(self, element: block.Quote) -> str:
with self.container("> ", "> "):
result = self.render_children(element).rstrip("\n")
self._prefix = self._second_prefix
return result + "\n"
def render_fenced_code(self, element: block.FencedCode) -> str:
lang = f" {element.lang}" if element.lang else ""
block_type = "SRC" if lang else "EXAMPLE"
lines = [f"#+BEGIN_{block_type}{lang}"]
lines.extend(
self._second_prefix + line
for line in self.render_children(element).splitlines()
)
lines.append(f"#+END_{block_type}")
self._prefix = self._second_prefix
return "\n".join(lines) + "\n"
def render_code_block(self, element: block.CodeBlock) -> str:
indent = " " * 4
lines = self.render_children(element).splitlines()
lines = [self._prefix + indent + lines[0]] + [
self._second_prefix + indent + line for line in lines[1:]
]
self._prefix = self._second_prefix
return "\n".join(lines) + "\n"
def render_html_block(self, element: block.HTMLBlock) -> str:
result = self._prefix + element.body + "\n" # type: ignore[attr-defined]
self._prefix = self._second_prefix
return result
def render_thematic_break(self, element: block.ThematicBreak) -> str:
result = self._prefix + "* * *\n"
self._prefix = self._second_prefix
return result
def render_heading(self, element: block.Heading) -> str:
result = "*" * element.level + " " + self.render_children(element) + "\n"
self._prefix = self._second_prefix
return result
def render_setext_heading(self, element: block.SetextHeading) -> str:
return self.render_heading(cast("block.Heading", element))
def render_blank_line(self, element: block.BlankLine) -> str:
result = self._prefix + "\n"
self._prefix = self._second_prefix
return result
def render_link_ref_def(self, element: block.LinkRefDef) -> str:
link_text = element.dest
if element.title:
link_text += f" {element.title}"
return f"[{element.label}]: {link_text}\n"
def render_emphasis(self, element: inline.Emphasis) -> str:
return f"*{self.render_children(element)}*"
def render_strong_emphasis(self, element: inline.StrongEmphasis) -> str:
return f"**{self.render_children(element)}**"
def render_inline_html(self, element: inline.InlineHTML) -> str:
return cast(str, element.children)
def render_link(self, element: inline.Link) -> str:
link_text = self.render_children(element)
link_title = (
'"{}"'.format(element.title.replace('"', '\\"')) if element.title else None
)
assert self.root_node
label = next(
(
k
for k, v in self.root_node.link_ref_defs.items()
if v == (element.dest, link_title)
),
None,
)
if label is not None:
if label == link_text:
return f"[{label}]"
return f"[[{label}][{link_text}]]"
title = f" {link_title}" if link_title is not None else ""
return f"[[{element.dest}{title}][{link_text}]]"
def render_auto_link(self, element: inline.AutoLink) -> str:
return f"<{element.dest}>"
def render_image(self, element: inline.Image) -> str:
template = "![{}]({}{})"
title = (
' "{}"'.format(element.title.replace('"', '\\"')) if element.title else ""
)
return template.format(self.render_children(element), element.dest, title)
def render_literal(self, element: inline.Literal) -> str:
return f"\\{element.children}"
def render_raw_text(self, element: inline.RawText) -> str:
from marko.ext.pangu import PANGU_RE
return re.sub(PANGU_RE, " ", element.children)
def render_line_break(self, element: inline.LineBreak) -> str:
return "\n" if element.soft else "\\\n"
def render_code_span(self, element: inline.CodeSpan) -> str:
text = element.children
if text and text[0] == "`" or text[-1] == "`":
return f"`` {text} ``"
return f"`{element.children}`"
def sanitize(filename: str):
return filename.replace(" ", "_").replace("-", "_")
@dataclass
class Note:
file: Path
uuid: str = field(default_factory=lambda: str(uuid.uuid4()))
@property
def title(self):
return self.file.stem
@property
def org_roam_filename(self):
stat = self.file.lstat()
ctime = int(stat.st_ctime)
date = dt.datetime(1970, 1, 1) + dt.timedelta(seconds=ctime)
date_as_str = date.strftime("%Y%m%d%H%M%S")
return f"{date_as_str}_{sanitize(self.title)}.org"
def read_contents(self):
with open(self.file) as f:
return "".join(f)
class Notes:
def __init__(self, notes_list: list[Note]):
self.notes_list = notes_list
self.titles_to_uuid = {note.title: note.uuid for note in notes_list}
ext = MarkoExtension(renderer_mixins=[OrgRenderer])
self.markdown = Markdown(extensions=[ext])
def convert_files(self, output_dir: str):
for note in self.notes_list:
output_file = Path(output_dir) / note.org_roam_filename
with open(output_file, "+w") as f:
output_org = self.convert_to_org(note)
f.write(output_org)
def convert_to_org(self, note: Note):
document_body = self.markdown(note.read_contents())
return f""":PROPERTIES:
:ID: {note.uuid}
:END:
#+title: {note.title}
{document_body}"""
def main():
input_dir, output_dir = sys.argv[1:]
notes_list = [Note(file) for file in Path(input_dir).glob("*")]
notes = Notes(notes_list)
OrgRenderer.titles_to_uuid = notes.titles_to_uuid
notes.convert_files(output_dir)
if __name__ == "__main__":
main()
@brianwisti
Copy link

I'm working on my own version of this task, and this gist has many great ideas I would like to borrow. I didn't even know all the cool stuff Marko can do. What is the license for the code?

(If effectively public domain, I'm fond of 0BSD for making that explicit.)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment