-
-
Save intentionally-left-nil/536d32b9388675d7c98b019d524983a5 to your computer and use it in GitHub Desktop.
Delete (very) old tweets obtained from a twitter archive
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python3 | |
# Largely copied from http://www.mathewinkson.com/2015/03/delete-old-tweets-selectively-using-python-and-tweepy | |
# However, Mathew's script cannot delete tweets older than something like a year (these tweets are not available from the twitter API) | |
# This script is a complement on first use, to delete old tweets. It uses your twitter archive to find tweets' ids to delete | |
# How to use it : | |
# - download and extract your twitter archive (tweet.js will contain all your tweets with dates and ids) | |
# - put this script in the extracted directory | |
# - complete the secrets to access twitter's API on your behalf and, possibly, modify days_to_keep | |
# - delete the few junk characters at the beginning of tweet.js, until the first '[' (it crashed my json parser) | |
# - review the script !!!! It has not been thoroughly tested, it may have some unexpected behaviors... | |
# - run this script | |
# - forget this script, you can now use Mathew's script for your future deletions | |
# | |
# License : Unlicense http://unlicense.org/ | |
import tweepy | |
import json | |
from datetime import datetime, timedelta, timezone | |
import os | |
from os import path | |
consumer_key = '' | |
consumer_secret = '' | |
access_token = '' | |
access_token_secret = '' | |
days_to_keep = 365 | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token, access_token_secret) | |
api = tweepy.API(auth) | |
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_to_keep) | |
print(cutoff_date) | |
data_dir = path.join(os.getcwd(), 'data', 'js', 'tweets') | |
tweets = [] | |
for filename in os.listdir(data_dir): | |
with open(path.join(data_dir, filename), 'r', encoding='UTF-8') as fp: | |
js_file = fp.read() | |
contents = json.loads(js_file[js_file.find('\n') + 1:]) | |
tweets.extend(contents) | |
for tweet in tweets: | |
d = datetime.strptime(tweet['created_at'], "%Y-%m-%d %H:%M:%S %z") | |
if d < cutoff_date: | |
try: | |
api.destroy_status(tweet['id_str']) | |
print(tweet['created_at'] + " " + tweet['id_str']) | |
except: | |
print("failed to delete " + tweet['id_str']) | |
pass |
This works for me:
https://gist.github.com/bluebossa63/e1dbdad9c0bc4fd625e072bfe304cf42
#!/bin/python3
# Largely copied from http://www.mathewinkson.com/2015/03/delete-old-tweets-selectively-using-python-and-tweepy
# However, Mathew's script cannot delete tweets older than something like a year (these tweets are not available from the twitter API)
# This script is a complement on first use, to delete old tweets. It uses your twitter archive to find tweets' ids to delete
# How to use it :
# - download and extract your twitter archive (tweet.js will contain all your tweets with dates and ids)
# - put this script in the extracted directory
# - complete the secrets to access twitter's API on your behalf and, possibly, modify days_to_keep
# - delete the few junk characters at the beginning of tweet.js, until the first '[' (it crashed my json parser)
# - review the script !!!! It has not been thoroughly tested, it may have some unexpected behaviors...
# - run this script
# - forget this script, you can now use Mathew's script for your future deletions
#
# License : Unlicense http://unlicense.org/
import tweepy
import json
from datetime import datetime, timedelta, timezone
from dateutil.parser import parse
import os
from os import path
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
days_to_keep = 7
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_to_keep)
print(cutoff_date)
data_dir = path.join(os.getcwd(), 'data')
tweets = []
with open(path.join(data_dir, "tweet.js"), 'r', encoding='UTF-8') as fp:
js_file = fp.read()
contents = json.loads(js_file)
tweets.extend(contents)
for tweet in tweets:
t = tweet['tweet']
d = parse(t['created_at'])
if d < cutoff_date:
try:
api.destroy_status(t['id_str'])
print(t['created_at'] + " " + t['id_str'])
except tweepy.TweepError as e:
print("failed to delete " + t['id_str'])
print (e.api_code)
print (e.reason)
pass
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
Thanks for the awesome work.
I stumbled upon many small bugs (among other things related to datetime and json parsing), so I forked the script to a new gist that works.
If anyone is interested : https://gist.github.com/marrakchino/d6bd3438cd87ab419888edbc4dc1f0f5.
Best