Created
November 6, 2022 17:46
-
-
Save codebykat/9de33cd28794160cd546049d0ebdfead to your computer and use it in GitHub Desktop.
convert a twitter tweet export (tweets or likes) to a csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# convert a twitter tweet export (e.g. tweets.js) or like export (e.g. likes.js) to a csv | |
# usage: python convert-tweet-archive.py filename.js | |
import sys | |
import json | |
import csv | |
import os | |
from datetime import datetime | |
with open(sys.argv[1], encoding='utf-8') as f: | |
tweets = [] | |
# n.b. these need to match the attributes in the json | |
key = "tweet" | |
headers = ["id", "created_at", "full_text"] | |
firstline = f.readline() | |
lines = f.readlines() | |
# likes exports are in a slightly different format | |
if firstline.startswith("window.YTD.like"): | |
key = "like" | |
headers = ["tweetId", "fullText", "expandedUrl"] | |
# re-add the first bracket so we still have valid json | |
lines = "[" + "".join(lines) | |
j = json.loads(lines) | |
for item in j: | |
tweet = item.get(key) | |
parsed_tweet = [] | |
for header in headers: | |
attribute = tweet[header] | |
if header == "created_at": | |
# convert timestamps from this weird format: e.g. Fri Oct 28 14:58:42 +0000 2022 | |
attribute = datetime.strptime(attribute, "%a %b %d %H:%M:%S %z %Y") | |
parsed_tweet.append(attribute) | |
tweets.append(parsed_tweet) | |
print("writing %d tweets to csv..." % len(tweets)) | |
with open(os.path.splitext(sys.argv[1])[0] + ".csv", 'w') as outfile: | |
csvwriter = csv.writer(outfile) | |
csvwriter.writerow(headers) | |
csvwriter.writerows(tweets) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment