Skip to content

Instantly share code, notes, and snippets.

@manmal
Last active January 26, 2025 18:07
Show Gist options
  • Save manmal/3a2ea85ace8993962dd523ce6928c91a to your computer and use it in GitHub Desktop.
Save manmal/3a2ea85ace8993962dd523ce6928c91a to your computer and use it in GitHub Desktop.
Invert Pinboard HTML Backup for import into Linkwarden
from bs4 import BeautifulSoup
# Run:
# $ pip install beautifulsoup4
# For more info: https://github.com/linkwarden/linkwarden/issues/555
INPUT_FILE = "pinboard_export.html"
def reverse_bookmarks():
# Read the input file
with open(INPUT_FILE, "r", encoding="utf-8") as f:
content = f.read()
# Parse the HTML using BeautifulSoup
soup = BeautifulSoup(content, "html.parser")
# Find the main DL element
main_dl = soup.find("dl")
# Get all direct DT elements
bookmarks = []
# First preserve the p tag if it exists
p_tag = None
for child in main_dl.children:
if child.name == 'p':
p_tag = child
break
# Collect all DT and their following content until the next DT
current_dt = None
current_content = []
for child in main_dl.children:
if isinstance(child, str) and child.strip() == '':
continue # Skip empty text nodes
if child.name == 'dt':
# Store previous group if exists
if current_dt is not None:
bookmarks.append((current_dt, current_content))
# Start new group
current_dt = child
current_content = []
elif current_dt is not None:
# Collect all content (tags or text) that follows the current DT
current_content.append(child)
# Don't forget the last group
if current_dt is not None:
bookmarks.append((current_dt, current_content))
# Clear the DL element
main_dl.clear()
# Add back the p tag if it existed
if p_tag:
main_dl.append(p_tag)
# Add the bookmarks in reverse order
for dt, content_list in reversed(bookmarks):
main_dl.append(dt)
for content in content_list:
main_dl.append(content)
# Write the modified HTML to a new file
output_filename = "reversed_" + INPUT_FILE
with open(output_filename, "w", encoding="utf-8") as f:
f.write(str(soup))
if __name__ == "__main__":
reverse_bookmarks()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment