Created
January 9, 2021 12:31
-
-
Save thendrix/c41eb72bf4a1d1887c9c8bf76bf381f1 to your computer and use it in GitHub Desktop.
TwitterPurge can remove all your tweets by using the archive function to access tweets not accessible via timeline API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# TwitterPurge is a simplified python script to perform operations on twitter archives. | |
# I wrote this in a single day while reading an API doc for the first time. | |
# | |
# Any tips you can spare will go to fund alt social media and tools | |
# BTC: bc1qe0el876trjjuuu3zr729n3w3zp4t2k92smklkz | |
# | |
# Setup python | |
# ========================================================== | |
# Download and install python3 from https://www.python.org/downloads/ | |
# Create a python3 sandbox with latest tweepy | |
# python3 -m venv sandbox | |
# source sandbox/bin/activate | |
# pip install tweepy | |
# | |
# Create Twitter API auth | |
# ========================================================== | |
# https://realpython.com/twitter-bot-python-tweepy/#creating-twitter-api-authentication-credentials | |
# | |
# Enable permission to read+write (post tweets) and write DMs | |
# Now reset your "Access token & secret" to update permissions | |
# | |
# Get an archive of your tweets via Twitter web interface | |
# ========================================================== | |
# 1. Request archive of your tweets | |
# 2. Wait until archived and download | |
# 3. Extract archive, and place this script into the same directory | |
# 4. Generate config/auth.json and add keys to the file | |
# 5. Purge tweets, likes, DMs, etc using this script | |
import json, html | |
import os, sys | |
try: | |
import tweepy | |
except: | |
tweepy = None | |
print('Missing tweepy see installation comment') | |
exit(-1) | |
# Ugly hack to expose global 'database' to ops for this script | |
db = {} | |
g_rate_limit_exceeded = False | |
g_config_filename = 'config/auth.json' | |
g_username = None | |
def RateLimitExceeded(): | |
global g_rate_limit_exceeded | |
g_rate_limit_exceeded = True | |
def GenerateAuthTemplate(): | |
if not os.path.exists(g_config_filename): | |
configPath = 'config' | |
try: | |
os.makedirs(configPath) | |
except OSError: | |
if not os.path.isdir(configPath): | |
print(str(OSError)) | |
# raise | |
return -1 | |
text = '{\n\t"key" : "",\n\t"secret" : "",\n\t"access_token" : "",\n\t"access_token_secret" : "",\n\t"username" : "@jack"\n}\n' | |
with open(g_config_filename, 'w') as fd: | |
fd.write(text) | |
fd.close() | |
def LoadTweepy(): | |
try: | |
fd = open(g_config_filename, 'r', encoding='UTF-8') | |
secret = json.load(fd) | |
except: | |
print(f'Failed to load "{g_config_filename}" use --gen-auth-template') | |
exit(0) | |
try: | |
auth = tweepy.OAuthHandler(secret["key"], secret["secret"]) | |
auth.set_access_token(secret["access_token"], secret["access_token_secret"]) | |
api = tweepy.API(auth, wait_on_rate_limit=True) | |
global g_username | |
g_username = secret["username"] | |
except: | |
print(f'Failed to load tweepy api. Did you add keys to "{g_config_filename}"?') | |
api = None | |
return api | |
def LogError(_msg): | |
print(_msg) | |
def DebugStatus(_status): | |
print(json.dumps(_status._json, indent=4, sort_keys=True)) | |
def ReadTextFile(_filename, _log=True): | |
try: | |
fd = open(_filename, 'r', encoding='UTF-8') | |
text = fd.read() | |
fd.close() | |
return text | |
except: | |
if _log: | |
LogError('Could not read file "' + _filename + '"') | |
return None | |
## Import a JSON file to perform operations on filtered JSON objects | |
def ImportJSON(_filename, _filter, _op): | |
text = ReadTextFile(_filename) | |
if not text: | |
return | |
# Strip off the start of the text to aid parser | |
idx = text.index('=', 1) + 1 | |
# Replace HTML escape characters as json module chokes on them | |
text = html.unescape(text[idx:]) | |
# Optionally purge unprintable characters besides CR if needed here | |
# text = "".join(c for c in text if c.isprintable() or '\n') | |
# Alter JSON to be accepted by parser | |
text = f'{{ "data" : {text} }}' | |
# text = '{ "data" :' + text + ' }' | |
# Convert text to json data | |
tweets = json.loads(text) | |
tweets = tweets['data'] | |
# Processing | |
for tweet in tweets: | |
select = tweet[_filter] | |
_op(select) | |
if g_rate_limit_exceeded: | |
print('Rate limit exceeded, so processing will stop') | |
return | |
# Operations to perform on parsed JSON objects | |
def OpListFavoriteIDs(_json): | |
print(_json['tweetId']) | |
def OpDestroyFavorite(_json): | |
api = LoadTweepy() | |
if not api: | |
LogError('API failed to load or auth') | |
return | |
id = _json['tweetId'] | |
try: | |
print(f'Destroy like: {id}') | |
api.destroy_favorite(id) | |
except: | |
print(f'Failed to destroy {id}') | |
def OpDestroyTweet(_json): | |
api = LoadTweepy() | |
if not api: | |
LogError('API failed to load or auth') | |
return | |
id = _json['id'] | |
try: | |
print(f'Destroy tweet: {id}') | |
api.destroy_status(id) | |
except: | |
print(f'Failed to destroy {id}') | |
def OpListDirectMessageIDs(_json): | |
for message in _json['messages']: | |
print(message['messageCreate']['id']) | |
def OpDestroyDirectMessageIDs(_json): | |
api = LoadTweepy() | |
if not api: | |
LogError('API failed to load or auth') | |
return | |
ids = [] | |
for message in _json['messages']: | |
id = message['messageCreate']['id'] | |
ids.append(id) | |
# Not efficient at all, but with rate limiting who cares | |
if not db["removed-direct-messages"]: | |
db["removed-direct-messages"] = [] | |
removed = db["removed-direct-messages"] | |
for id in ids: | |
if id in removed: | |
# print(f'{id} marked as previously destroyed') | |
continue | |
try: | |
print(f'Destroy direct message: {id}') | |
api.destroy_direct_message(id) | |
db["removed-direct-messages"].append(id) | |
# @todo - Write out db to disk as rate limiting will be slower anyway | |
# if api.get_direct_message(id): | |
# print(f'Destroy direct message: {id}') | |
# api.destroy_direct_message(id) | |
# db["removed-direct-messages"].append(id) | |
# # @todo - Write out db to disk as rate limiting will be slower anyway | |
# else: | |
# print(f'Direct message does not exist: {id}') | |
except tweepy.RateLimitError: | |
print(f'Failed to destroy {id} due to rate limiting') | |
RateLimitExceeded() | |
return | |
except: | |
print(f'Failed to destroy {id}') | |
return | |
# Higher level functions | |
def DestroyAllDirectMessages(): | |
filename = "data/direct-messages.js" | |
typeFilter = "dmConversation" | |
global db | |
# Read previous 'database' state to help with DM rate limits/restarts | |
filename = 'db.json' | |
try: | |
fd = open(filename, 'r', encoding='UTF-8') | |
db = json.load(fd) | |
# print(db) | |
except: | |
print(f'Failed to load "{filename}"') | |
ImportJSON(filename, typeFilter, OpDestroyDirectMessageIDs) | |
# Write new 'database' state | |
try: | |
with open(filename, 'w') as fd: | |
json.dump(db, fd) | |
except: | |
print(f'Failed to save "{filename}"') | |
def DestroyAllLikes(): | |
filename = "data/like.js" | |
typeFilter = "like" | |
ImportJSON(filename, typeFilter, OpDestroyFavorite) | |
def DumpLikeIds(): | |
filename = "data/like.js" | |
typeFilter = "like" | |
ImportJSON(filename, typeFilter, OpListFavoriteIDs) | |
# ImportJSON(filename, typeFilter, print) | |
def DumpDirectMessageIds(): | |
filename = "data/direct-messages.js" | |
typeFilter = "dmConversation" | |
ImportJSON(filename, typeFilter, OpListDirectMessageIDs) | |
# ImportJSON(filename, typeFilter, print) | |
def DestroyAllTweets(): | |
filename = "data/tweet.js" | |
typeFilter = "tweet" | |
ImportJSON(filename, typeFilter, OpDestroyTweet) | |
# unretweet(id) | |
def TweetActionTest(_api, _status, _args): | |
# @todo Filter... boolean test chain or just callbacks? | |
# Date range | |
# Date | |
# Contains string | |
# @todo Object that has common checks like substr and date range | |
# if object is None then consider it as 'ALL' | |
# substr = 'test' | |
# year = 2021 | |
# replyto = 'ReplyGuyParent' | |
# @todo Callback for action, default 'delete' | |
if _status.favorite_count < 10: | |
# if _status.retweet_count > 0: | |
# if _status.id == 1347381781826138115: | |
# if _status.in_reply_to_screen_name and status.in_reply_to_screen_name == replyto: | |
# if _status.id == 1347394733883027457: | |
# if substr in _status.text: | |
# if _status.created_at.year == year: | |
# print(f'Processing tweet: {_status.id}') | |
# DebugStatus(_status) | |
# print(f'{_status.id} : {_status.in_reply_to_screen_name}') | |
print(f'{_status.id} : {_status.text}') | |
# print(f'Destroy tweet: {_status.id}') | |
# _api.destroy_status(_status.id) | |
# else: | |
# print(f'Skipping tweet: {_status.id}') | |
def TweetActionDeleteAll(_api, _status, _args): | |
print(f'Destroy tweet: {_status.id}') | |
_api.destroy_status(_status.id) | |
# Apply filtered action to tweets | |
def TimelineTweetAction(_user, _operation, _count=500, _args=None): | |
api = LoadTweepy() | |
if not api: | |
LogError('API failed to load or auth') | |
return | |
try: | |
user = api.get_user(_user) | |
id = user.id | |
print(f'User {_user} = {id}') | |
except: | |
print('No user found') | |
return | |
try: | |
statusList = api.user_timeline(id, count=_count) | |
except: | |
print('timeline query failed') | |
return | |
print(f'Found {len(statusList)} tweets via API') | |
for status in statusList: | |
try: | |
_operation(api, status, _args) | |
except: | |
print(f'Failed to destroy {status.id}') | |
# Uses API only instead of archives (limited reach) | |
def TimelineDestroyTweets(_user=g_username, _count=500): | |
api = LoadTweepy() | |
if not api: | |
LogError('API failed to load or auth') | |
return | |
try: | |
user = api.get_user(_user) | |
id = user.id | |
print(f'User {_user} = {id}') | |
except: | |
print('No user found') | |
return | |
try: | |
statusList = api.user_timeline(id, count=_count) | |
except: | |
print('timeline query failed') | |
return | |
print(f'Found {len(statusList)} tweets via API') | |
for status in statusList: | |
try: | |
print(f'Destroy tweet: {status.id}') | |
api.destroy_status(status.id) | |
except: | |
print(f'Failed to destroy {status.id}') | |
def TimelineDestroyFavorites(_user=g_username, _count=500): | |
api = LoadTweepy() | |
if not api: | |
LogError('API failed to load or auth') | |
return | |
# Can only purge 19 at a time ('1 page') | |
c = 1 | |
while c > 0: | |
try: | |
favorites = api.favorites(_user) | |
except Exception as e: | |
print('favorites query failed') | |
print(e) | |
return | |
# Don't stop until no more to process | |
c = len(favorites) | |
print(f'Found {len(favorites)} favorites via API') | |
for i in favorites: | |
try: | |
print(f'Destroy favorite: {i.id}') | |
api.destroy_favorite(i.id) | |
except: | |
print(f'Failed to destroy {i.id}') | |
def Post(_text): | |
api = LoadTweepy() | |
if api: | |
api.update_status(_text) | |
def TestArgs(): | |
print('Yes, this is the third room.') | |
# Main entry | |
if __name__ == "__main__": | |
cmds = [ | |
# Archive based commands | |
('--archived-tweets', DestroyAllTweets, 'Delete archived tweets from Twitter'), | |
('--archived-likes', DestroyAllLikes, 'Delete archived likes from Twitter'), | |
('--archived-dms', DestroyAllDirectMessages, 'Delete archived DMs from Twitter'), | |
# Timeline based commands | |
('--timeline-tweets', TimelineDestroyTweets, 'Delete latest tweets via API'), | |
('--timeline-likes', TimelineDestroyFavorites, 'Delete latest likes via API'), | |
('--generate-auth', GenerateAuthTemplate, 'Create a default auth.json to fill out') | |
] | |
args = sys.argv[1:] | |
if not args or args == '--help' or args == '-h': | |
print('Delete tweets, likes, and DMs from Twitter') | |
for cmd in cmds: | |
print(f'\t{cmd[0]} \t{cmd[2]}') | |
else: | |
for cmd in cmds: | |
if cmd[0] == args[0]: | |
cmd[1]() | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment