Created
May 13, 2024 03:48
-
-
Save j2kun/00804c6e893ee1fdbaee196e100d650d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pathlib | |
import pprint | |
import re | |
import subprocess | |
import fire | |
import marko | |
from mastodon import Mastodon | |
FILES_TO_IGNORE = set( | |
[ | |
"_index.md", | |
] | |
) | |
BLOG_URL_BASE = "https://www.jeremykun.com" | |
# A simple text file with two urls per line | |
DATABASE_FILE = "scripts/published_toots.txt" | |
INLINE_MATH_DOLLAR_REGEX = re.compile(r"\$(.*?)\$") | |
def getGitRoot(): | |
return ( | |
subprocess.Popen( | |
["git", "rev-parse", "--show-toplevel"], | |
stdout=subprocess.PIPE, | |
) | |
.communicate()[0] | |
.rstrip() | |
.decode("utf-8") | |
) | |
def canonical_url(filename: str) -> str: | |
# Note: using the url metadata inside the markdown file itself is not | |
# supported, we just assume no special url is set. | |
stripped_filename = filename.rstrip(".md") | |
return f"{BLOG_URL_BASE}/shortform/{stripped_filename}/" | |
def convert_paragraph(node): | |
if node.get_type() != "Paragraph": | |
raise ValueError(f"Invalid input node of type {node.get_type()}") | |
toot_str = "" | |
for child in node.children: | |
match child.get_type(): | |
case "LineBreak": | |
toot_str += " " | |
case "RawText": | |
# in this case, child.children is a single string, despite the | |
# name "children". | |
text = child.children | |
# Convert to mathstodon-compatible inline mathmode | |
text = INLINE_MATH_DOLLAR_REGEX.sub(r"\\(\1\\)", text) | |
toot_str += text | |
case "Link": | |
if child.dest.startswith("http"): | |
url = child.dest | |
elif child.dest.startswith("/"): | |
url = f"{BLOG_URL_BASE}{child.dest}" | |
else: | |
raise ValueError(f"Unsupported link destination f{child.dest}") | |
assert len(child.children) == 1 | |
link_text = child.children[0].children | |
toot_str += f"{link_text} ({url})" | |
case "CodeSpan": | |
toot_str += f"`{child.children}`" | |
case _: | |
raise ValueError(f"Unsupported paragraph node type: {child.get_type()}") | |
return toot_str | |
def convert_code_block(node, post_permalink): | |
# Code blocks make for bad toots, so just omit the actual code and link to | |
# the post. | |
if node.get_type() != "FencedCode": | |
raise ValueError(f"Invalid input node of type {node.get_type()}") | |
return f"(Code omitted for brevity. See: {post_permalink})" | |
def convert_post_to_thread(content, post_permalink): | |
md = marko.Markdown() | |
doc = md.parse(content) | |
toots = [] | |
in_metadata = False | |
for child in doc.children: | |
# skip over hugo frontmatter, which uses ------ to demarcate it, and | |
# marko parses this as a ThematicBreak. | |
if child.get_type() == "ThematicBreak": | |
in_metadata = not in_metadata | |
continue | |
if in_metadata: | |
continue | |
match child.get_type(): | |
case "LineBreak": | |
continue | |
case "BlankLine": | |
continue | |
case "Paragraph": | |
toots.append(convert_paragraph(child)) | |
case "FencedCode": | |
toots.append(convert_code_block(child, post_permalink)) | |
case _: | |
raise ValueError( | |
f"Unsupported doc node type {child.get_type()}: {child}" | |
) | |
return toots | |
def load_database(path): | |
if not os.path.exists(path): | |
return {} | |
mapping = {} | |
with open(path, "r") as infile: | |
for line in infile: | |
blog_url, mastodon_url = line.strip().split() | |
mapping[blog_url] = mastodon_url | |
return mapping | |
def dump_database(mapping, path): | |
with open(path, "w") as outfile: | |
for blog_url, mastodon_url in mapping.items(): | |
outfile.write(f"{blog_url} {mastodon_url}\n") | |
def publish_to_mastodon(mastodon_client=None): | |
"""Idempotently publish all shortform posts to mastodon.""" | |
if mastodon_client is None: | |
# File generated by scripts/login_with_mastodon.py or else set in | |
# environment for headless usage in GH actions. | |
mastodon_client = Mastodon( | |
api_base_url="https://mathstodon.xyz", | |
access_token=os.getenv( | |
"MASTODON_TOKEN", "scripts/jeremykun_tootbot_usercred.secret" | |
), | |
) | |
git_root = pathlib.Path(getGitRoot()) | |
if not os.path.isdir(git_root / ".git"): | |
raise RuntimeError(f"Could not find git root, looked at {git_root}") | |
print(f"Found {git_root=}") | |
shortform_path = git_root / "content" / "shortform" | |
if not os.path.isdir(shortform_path): | |
raise ValueError(f"Could not find shortform_path at {shortform_path}") | |
posts_to_try = set( | |
[x for x in os.listdir(shortform_path) if x not in FILES_TO_IGNORE] | |
) | |
print(f"{posts_to_try=}") | |
# dict mapping Blog URL to first post url in published Mastodon thread. | |
database_path = git_root / DATABASE_FILE | |
published_toots = load_database(database_path) | |
print("Existing toots:") | |
pprint.pp(published_toots) | |
try: | |
for filename in posts_to_try: | |
print(f"Processing {filename}") | |
blog_post_permalink = canonical_url(filename) | |
if blog_post_permalink in published_toots: | |
print( | |
f"{filename} has existing toot thread at " | |
f"{published_toots[blog_post_permalink]}, skipping." | |
) | |
continue | |
with open(shortform_path / filename, "r") as infile: | |
toots = convert_post_to_thread(infile.read(), blog_post_permalink) | |
# Add a backlink to the end of the first toot in the thread | |
toots[0] += f"\n\nArchived at: {blog_post_permalink}" | |
# a debug print of the toots about to be posted | |
print(f"Printing toot thread for {filename}:\n----------------------") | |
for i, toot in enumerate(toots): | |
print(f"\n{i}.\t{toot}") | |
print("\n----------------------\n") | |
print(f"Publishing toot thread for {filename}") | |
# TODO: delete thread if later toots fail to post | |
toots_for_post = [] | |
for i, toot in enumerate(toots): | |
reply_id = toots_for_post[-1]["id"] if len(toots_for_post) > 0 else None | |
status_dict = mastodon_client.status_post(toot, in_reply_to_id=reply_id) | |
print( | |
f"Successfully posted toot {i} of the thread: " | |
f"{status_dict['id']} -> {status_dict['url']}" | |
) | |
toots_for_post.append(status_dict) | |
# All toots posted successfully | |
published_toots[blog_post_permalink] = toots_for_post[0]["url"] | |
finally: | |
print("Writing successful toot URLs to disk") | |
dump_database(published_toots, database_path) | |
if __name__ == "__main__": | |
fire.Fire(publish_to_mastodon) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment