Last active
November 19, 2022 16:55
-
-
Save LongHairedHacker/360ea2005c5a53bb126d4097f9e801e7 to your computer and use it in GitHub Desktop.
Extend all the t.co short links in a twitter backup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
from os import path | |
import sys | |
import re | |
import requests | |
import shutil | |
url_cache = dict() | |
def resolve_url(url): | |
global url_cache | |
if not url in url_cache.keys(): | |
try: | |
r = requests.get(url, allow_redirects=False) | |
if r.status_code == 301: | |
url_cache[url] = r.headers['Location'] | |
else: | |
url_cache[url] = url | |
except UnicodeDecodeError: | |
return url | |
return url_cache[url] | |
def replace_links_in_file(filepath): | |
URL_REGEX = re.compile(r".*(https?:\/\/t\.co/[a-zA-Z0-9]*).*") | |
print("Processing %s" % (filepath)) | |
infile = open(filepath, "r") | |
outfile = open(filepath + "_tmp", "w") | |
for line in infile: | |
match = URL_REGEX.match(line) | |
if match is not None: | |
url = match.group(1) | |
new_url = resolve_url(url) | |
print("%s -> %s" % (url, new_url)) | |
line = line.replace(url, new_url) | |
outfile.write(line) | |
outfile.close() | |
infile.close() | |
shutil.move(filepath + "_tmp", filepath) | |
print("Done %s" % (filepath)) | |
def main(): | |
if len(sys.argv) != 2: | |
print("%s <backup directory>" % (sys.argv[0])) | |
sys.exit(1) | |
backup_dir = sys.argv[1] | |
replace_links_in_file(path.join(backup_dir, "data/direct-messages-group.js")) | |
replace_links_in_file(path.join(backup_dir, "data/direct-messages.js")) | |
replace_links_in_file(path.join(backup_dir, "data/profile.js")) | |
replace_links_in_file(path.join(backup_dir, "data/like.js")) | |
replace_links_in_file(path.join(backup_dir, "data/tweets.js")) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment