Created
September 13, 2024 19:58
-
-
Save danilomo/abfb274623193923a661f5544df0492f to your computer and use it in GitHub Desktop.
Logseq to Org roam v1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Usage: | |
# - pip install marko (in a venv or system wide if you want) | |
# - python logseq_to_org.py <logseq's pages directory> <output folder> | |
import datetime as dt | |
import re | |
import sys | |
import uuid | |
from contextlib import contextmanager | |
from dataclasses import dataclass, field | |
from pathlib import Path | |
from typing import TYPE_CHECKING, Generator, cast | |
from marko import Markdown, block, inline | |
from marko.helpers import MarkoExtension | |
from marko.renderer import Renderer | |
class OrgRenderer(Renderer): | |
""" | |
Adapted from Marko's MarkdownRenderer (https://github.com/frostming/marko/blob/master/marko/md_renderer.py) | |
""" | |
titles_to_uuid = {} | |
@classmethod | |
def transform_links(cls, line): | |
pattern = re.compile(r"\[\[(.*)\]\]", re.MULTILINE) | |
def replace_match(match): | |
title = match.group(1) | |
uuid = cls.titles_to_uuid.get(title, None) | |
if not uuid: | |
return f"[[{title}]]" | |
return f"[[id:{uuid}][{title}]]" | |
transformed_string = pattern.sub(replace_match, line) | |
return transformed_string | |
def __init__(self) -> None: | |
super().__init__() | |
self._prefix = "" | |
self._second_prefix = "" | |
def __enter__(self) -> "OrgRenderer": | |
self._prefix = "" | |
self._second_prefix = "" | |
return super().__enter__() | |
@contextmanager | |
def container( | |
self, prefix: str, second_prefix: str = "" | |
) -> Generator[None, None, None]: | |
old_prefix = self._prefix | |
old_second_prefix = self._second_prefix | |
self._prefix += prefix | |
self._second_prefix += second_prefix | |
yield | |
self._prefix = old_prefix | |
self._second_prefix = old_second_prefix | |
def render_paragraph(self, element: block.Paragraph) -> str: | |
children = self.render_children(element) | |
line = self._prefix + children + "\n" | |
self._prefix = self._second_prefix | |
return OrgRenderer.transform_links(line) | |
def render_list(self, element: block.List) -> str: | |
result = [] | |
if element.ordered: | |
for num, child in enumerate(element.children, element.start): | |
with self.container(f"{num}. ", " " * (len(str(num)) + 2)): | |
result.append(self.render(child)) | |
else: | |
for child in element.children: | |
with self.container(f"{element.bullet} ", " "): | |
result.append(self.render(child)) | |
self._prefix = self._second_prefix | |
return "".join(result) | |
def render_list_item(self, element: block.ListItem) -> str: | |
return self.render_children(element) | |
def render_quote(self, element: block.Quote) -> str: | |
with self.container("> ", "> "): | |
result = self.render_children(element).rstrip("\n") | |
self._prefix = self._second_prefix | |
return result + "\n" | |
def render_fenced_code(self, element: block.FencedCode) -> str: | |
lang = f" {element.lang}" if element.lang else "" | |
block_type = "SRC" if lang else "EXAMPLE" | |
lines = [f"#+BEGIN_{block_type}{lang}"] | |
lines.extend( | |
self._second_prefix + line | |
for line in self.render_children(element).splitlines() | |
) | |
lines.append(f"#+END_{block_type}") | |
self._prefix = self._second_prefix | |
return "\n".join(lines) + "\n" | |
def render_code_block(self, element: block.CodeBlock) -> str: | |
indent = " " * 4 | |
lines = self.render_children(element).splitlines() | |
lines = [self._prefix + indent + lines[0]] + [ | |
self._second_prefix + indent + line for line in lines[1:] | |
] | |
self._prefix = self._second_prefix | |
return "\n".join(lines) + "\n" | |
def render_html_block(self, element: block.HTMLBlock) -> str: | |
result = self._prefix + element.body + "\n" # type: ignore[attr-defined] | |
self._prefix = self._second_prefix | |
return result | |
def render_thematic_break(self, element: block.ThematicBreak) -> str: | |
result = self._prefix + "* * *\n" | |
self._prefix = self._second_prefix | |
return result | |
def render_heading(self, element: block.Heading) -> str: | |
result = "*" * element.level + " " + self.render_children(element) + "\n" | |
self._prefix = self._second_prefix | |
return result | |
def render_setext_heading(self, element: block.SetextHeading) -> str: | |
return self.render_heading(cast("block.Heading", element)) | |
def render_blank_line(self, element: block.BlankLine) -> str: | |
result = self._prefix + "\n" | |
self._prefix = self._second_prefix | |
return result | |
def render_link_ref_def(self, element: block.LinkRefDef) -> str: | |
link_text = element.dest | |
if element.title: | |
link_text += f" {element.title}" | |
return f"[{element.label}]: {link_text}\n" | |
def render_emphasis(self, element: inline.Emphasis) -> str: | |
return f"*{self.render_children(element)}*" | |
def render_strong_emphasis(self, element: inline.StrongEmphasis) -> str: | |
return f"**{self.render_children(element)}**" | |
def render_inline_html(self, element: inline.InlineHTML) -> str: | |
return cast(str, element.children) | |
def render_link(self, element: inline.Link) -> str: | |
link_text = self.render_children(element) | |
link_title = ( | |
'"{}"'.format(element.title.replace('"', '\\"')) if element.title else None | |
) | |
assert self.root_node | |
label = next( | |
( | |
k | |
for k, v in self.root_node.link_ref_defs.items() | |
if v == (element.dest, link_title) | |
), | |
None, | |
) | |
if label is not None: | |
if label == link_text: | |
return f"[{label}]" | |
return f"[[{label}][{link_text}]]" | |
title = f" {link_title}" if link_title is not None else "" | |
return f"[[{element.dest}{title}][{link_text}]]" | |
def render_auto_link(self, element: inline.AutoLink) -> str: | |
return f"<{element.dest}>" | |
def render_image(self, element: inline.Image) -> str: | |
template = "![{}]({}{})" | |
title = ( | |
' "{}"'.format(element.title.replace('"', '\\"')) if element.title else "" | |
) | |
return template.format(self.render_children(element), element.dest, title) | |
def render_literal(self, element: inline.Literal) -> str: | |
return f"\\{element.children}" | |
def render_raw_text(self, element: inline.RawText) -> str: | |
from marko.ext.pangu import PANGU_RE | |
return re.sub(PANGU_RE, " ", element.children) | |
def render_line_break(self, element: inline.LineBreak) -> str: | |
return "\n" if element.soft else "\\\n" | |
def render_code_span(self, element: inline.CodeSpan) -> str: | |
text = element.children | |
if text and text[0] == "`" or text[-1] == "`": | |
return f"`` {text} ``" | |
return f"`{element.children}`" | |
def sanitize(filename: str): | |
return filename.replace(" ", "_").replace("-", "_") | |
@dataclass | |
class Note: | |
file: Path | |
uuid: str = field(default_factory=lambda: str(uuid.uuid4())) | |
@property | |
def title(self): | |
return self.file.stem | |
@property | |
def org_roam_filename(self): | |
stat = self.file.lstat() | |
ctime = int(stat.st_ctime) | |
date = dt.datetime(1970, 1, 1) + dt.timedelta(seconds=ctime) | |
date_as_str = date.strftime("%Y%m%d%H%M%S") | |
return f"{date_as_str}_{sanitize(self.title)}.org" | |
def read_contents(self): | |
with open(self.file) as f: | |
return "".join(f) | |
class Notes: | |
def __init__(self, notes_list: list[Note]): | |
self.notes_list = notes_list | |
self.titles_to_uuid = {note.title: note.uuid for note in notes_list} | |
ext = MarkoExtension(renderer_mixins=[OrgRenderer]) | |
self.markdown = Markdown(extensions=[ext]) | |
def convert_files(self, output_dir: str): | |
for note in self.notes_list: | |
output_file = Path(output_dir) / note.org_roam_filename | |
with open(output_file, "+w") as f: | |
output_org = self.convert_to_org(note) | |
f.write(output_org) | |
def convert_to_org(self, note: Note): | |
document_body = self.markdown(note.read_contents()) | |
return f""":PROPERTIES: | |
:ID: {note.uuid} | |
:END: | |
#+title: {note.title} | |
{document_body}""" | |
def main(): | |
input_dir, output_dir = sys.argv[1:] | |
notes_list = [Note(file) for file in Path(input_dir).glob("*")] | |
notes = Notes(notes_list) | |
OrgRenderer.titles_to_uuid = notes.titles_to_uuid | |
notes.convert_files(output_dir) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment