Last active
January 26, 2025 18:07
-
-
Save manmal/3a2ea85ace8993962dd523ce6928c91a to your computer and use it in GitHub Desktop.
Invert Pinboard HTML Backup for import into Linkwarden
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
# Run: | |
# $ pip install beautifulsoup4 | |
# For more info: https://github.com/linkwarden/linkwarden/issues/555 | |
INPUT_FILE = "pinboard_export.html" | |
def reverse_bookmarks(): | |
# Read the input file | |
with open(INPUT_FILE, "r", encoding="utf-8") as f: | |
content = f.read() | |
# Parse the HTML using BeautifulSoup | |
soup = BeautifulSoup(content, "html.parser") | |
# Find the main DL element | |
main_dl = soup.find("dl") | |
# Get all direct DT elements | |
bookmarks = [] | |
# First preserve the p tag if it exists | |
p_tag = None | |
for child in main_dl.children: | |
if child.name == 'p': | |
p_tag = child | |
break | |
# Collect all DT and their following content until the next DT | |
current_dt = None | |
current_content = [] | |
for child in main_dl.children: | |
if isinstance(child, str) and child.strip() == '': | |
continue # Skip empty text nodes | |
if child.name == 'dt': | |
# Store previous group if exists | |
if current_dt is not None: | |
bookmarks.append((current_dt, current_content)) | |
# Start new group | |
current_dt = child | |
current_content = [] | |
elif current_dt is not None: | |
# Collect all content (tags or text) that follows the current DT | |
current_content.append(child) | |
# Don't forget the last group | |
if current_dt is not None: | |
bookmarks.append((current_dt, current_content)) | |
# Clear the DL element | |
main_dl.clear() | |
# Add back the p tag if it existed | |
if p_tag: | |
main_dl.append(p_tag) | |
# Add the bookmarks in reverse order | |
for dt, content_list in reversed(bookmarks): | |
main_dl.append(dt) | |
for content in content_list: | |
main_dl.append(content) | |
# Write the modified HTML to a new file | |
output_filename = "reversed_" + INPUT_FILE | |
with open(output_filename, "w", encoding="utf-8") as f: | |
f.write(str(soup)) | |
if __name__ == "__main__": | |
reverse_bookmarks() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment