Last active
August 30, 2024 11:58
-
-
Save iandanforth/f72e750db0c022d76a93eb922f5e0f3c to your computer and use it in GitHub Desktop.
Expand Twitter URLs from a twitter archive manifest
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script replaces all strings matching https://t.co/ links in a the manifest with their expanded versions | |
import json | |
import re | |
import urlexpander | |
# Open the file containing twitter bookmarks in json | |
# and convert to a dictionary | |
with open('bookmark-manifest.json', 'r') as fh: | |
contents = fh.read() | |
bookmarks = json.loads(contents) | |
# Go through each bookmark and collect all the short url links | |
for mark in bookmarks: | |
text = mark['text'] | |
links = re.findall(r'https://t.co/\w+', text) | |
if links: | |
print(links) | |
# Use urlexpander.expand() to expand any found links | |
# then replace them in `text` | |
for link in links: | |
expanded = urlexpander.expand(link) | |
text = text.replace(link, expanded) | |
# Save the expanded text back to the bookmark | |
mark['text'] = text | |
# Write the updated bookmarks back to the file | |
with open('bookmark-manifest-expanded.json', 'w') as fh: | |
json.dump(bookmarks, fh, indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Authored mostly by Codex all bugs should be reported to OpenAI