thendrix · January 9, 2021 12:31
diff --git a/TwitterPurge.py b/TwitterPurge.py
 #!/usr/bin/env python3
 # TwitterPurge is a simplified python script to perform operations on twitter archives.
 # I wrote this in a single day while reading an API doc for the first time.
 #
 # Any tips you can spare will go to fund alt social media and tools
 # BTC: bc1qe0el876trjjuuu3zr729n3w3zp4t2k92smklkz
 #
 # Setup python
 # ==========================================================
 # Download and install python3 from https://www.python.org/downloads/
 # Create a python3 sandbox with latest tweepy
 # python3 -m venv sandbox
 # source sandbox/bin/activate
 # pip install tweepy
 #
 # Create Twitter API auth
 # ==========================================================
 # https://realpython.com/twitter-bot-python-tweepy/#creating-twitter-api-authentication-credentials
 #
 # Enable permission to read+write (post tweets) and write DMs
 # Now reset your "Access token & secret" to update permissions
 #
 # Get an archive of your tweets via Twitter web interface
 # ==========================================================
 # 1. Request archive of your tweets
 # 2. Wait until archived and download
 # 3. Extract archive, and place this script into the same directory
 # 4. Generate config/auth.json and add keys to the file
 # 5. Purge tweets, likes, DMs, etc using this script

 import json, html
 import os, sys


 try:
 	import tweepy
 except:
 	tweepy = None
 	print('Missing tweepy see installation comment')
 	exit(-1)


 # Ugly hack to expose global 'database' to ops for this script
 db = {}
 g_rate_limit_exceeded = False
 g_config_filename = 'config/auth.json'
 g_username = None

 def RateLimitExceeded():
 	global g_rate_limit_exceeded
 	g_rate_limit_exceeded = True

 def GenerateAuthTemplate():
 	if not os.path.exists(g_config_filename):
 		configPath = 'config'
 		try:
 			os.makedirs(configPath)
 		except OSError:
 			if not os.path.isdir(configPath):
 				print(str(OSError))
 				# raise
 				return -1

 		text = '{\n\t"key" : "",\n\t"secret" : "",\n\t"access_token" : "",\n\t"access_token_secret" : "",\n\t"username" : "@jack"\n}\n'
 		with open(g_config_filename, 'w') as fd:
 			fd.write(text)
 			fd.close()

 def LoadTweepy():
 	try:
 		fd = open(g_config_filename, 'r', encoding='UTF-8')
 		secret = json.load(fd)
 	except:
 		print(f'Failed to load "{g_config_filename}" use --gen-auth-template')
 		exit(0)

 	try:
 		auth = tweepy.OAuthHandler(secret["key"], secret["secret"])
 		auth.set_access_token(secret["access_token"], secret["access_token_secret"])
 		api = tweepy.API(auth, wait_on_rate_limit=True)
 		global g_username
 		g_username = secret["username"]
 	except:
 		print(f'Failed to load tweepy api. Did you add keys to "{g_config_filename}"?')
 		api = None

 	return api


 def LogError(_msg):
 	print(_msg)


 def DebugStatus(_status):
 	print(json.dumps(_status._json, indent=4, sort_keys=True))


 def ReadTextFile(_filename, _log=True):
 	try:
 		fd = open(_filename, 'r', encoding='UTF-8')
 		text = fd.read()
 		fd.close()
 		return text
 	except:
 		if _log:
 			LogError('Could not read file "' + _filename + '"')
 	return None

 ## Import a JSON file to perform operations on filtered JSON objects
 def ImportJSON(_filename, _filter, _op):
 	text = ReadTextFile(_filename)
 	if not text:
 		return

 	# Strip off the start of the text to aid parser
 	idx = text.index('=', 1) + 1

 	# Replace HTML escape characters as json module chokes on them
 	text = html.unescape(text[idx:])

 	# Optionally purge unprintable characters besides CR if needed here
 	# text = "".join(c for c in text if c.isprintable() or '\n')

 	# Alter JSON to be accepted by parser
 	text = f'{{ "data" : {text} }}'
 	# text = '{ "data" :' + text + ' }'

 	# Convert text to json data
 	tweets = json.loads(text)
 	tweets = tweets['data']

 	# Processing
 	for tweet in tweets:
 		select = tweet[_filter]
 		_op(select)

 		if g_rate_limit_exceeded:
 			print('Rate limit exceeded, so processing will stop')
 			return


 # Operations to perform on parsed JSON objects

 def OpListFavoriteIDs(_json):
 	print(_json['tweetId'])

 def OpDestroyFavorite(_json):
 	api = LoadTweepy()
 	if not api:
 		LogError('API failed to load or auth')
 		return
 	id = _json['tweetId']
 	try:
 		print(f'Destroy like: {id}')
 		api.destroy_favorite(id)
 	except:
 		print(f'Failed to destroy {id}')

 def OpDestroyTweet(_json):
 	api = LoadTweepy()
 	if not api:
 		LogError('API failed to load or auth')
 		return
 	id = _json['id']
 	try:
 		print(f'Destroy tweet: {id}')
 		api.destroy_status(id)
 	except:
 		print(f'Failed to destroy {id}')

 def OpListDirectMessageIDs(_json):
 	for message in _json['messages']:
 		print(message['messageCreate']['id'])

 def OpDestroyDirectMessageIDs(_json):
 	api = LoadTweepy()
 	if not api:
 		LogError('API failed to load or auth')
 		return

 	ids = []
 	for message in _json['messages']:
 		id = message['messageCreate']['id']
 		ids.append(id)

 	# Not efficient at all, but with rate limiting who cares
 	if not db["removed-direct-messages"]:
 		db["removed-direct-messages"] = []
 	removed = db["removed-direct-messages"]
 	for id in ids:
 		if id in removed:
 			# print(f'{id} marked as previously destroyed')
 			continue

 		try:
 			print(f'Destroy direct message: {id}')
 			api.destroy_direct_message(id)
 			db["removed-direct-messages"].append(id)
 			# @todo - Write out db to disk as rate limiting will be slower anyway

 			# if api.get_direct_message(id):
 			# 	print(f'Destroy direct message: {id}')
 			# 	api.destroy_direct_message(id)
 			# 	db["removed-direct-messages"].append(id)
 			# 	# @todo - Write out db to disk as rate limiting will be slower anyway
 			# else:
 			# 	print(f'Direct message does not exist: {id}')

 		except tweepy.RateLimitError:
 			print(f'Failed to destroy {id} due to rate limiting')
 			RateLimitExceeded()
 			return

 		except:
 			print(f'Failed to destroy {id}')
 			return


 # Higher level functions

 def DestroyAllDirectMessages():
 	filename = "data/direct-messages.js"
 	typeFilter = "dmConversation"

 	global db

 	# Read previous 'database' state to help with DM rate limits/restarts
 	filename = 'db.json'
 	try:
 		fd = open(filename, 'r', encoding='UTF-8')
 		db = json.load(fd)
 		# print(db)
 	except:
 		print(f'Failed to load "{filename}"')

 	ImportJSON(filename, typeFilter, OpDestroyDirectMessageIDs)

 	# Write new 'database' state
 	try:
 		with open(filename, 'w') as fd:
 			json.dump(db, fd)
 	except:
 		print(f'Failed to save "{filename}"')

 def DestroyAllLikes():
 	filename = "data/like.js"
 	typeFilter = "like"
 	ImportJSON(filename, typeFilter, OpDestroyFavorite)

 def DumpLikeIds():
 	filename = "data/like.js"
 	typeFilter = "like"
 	ImportJSON(filename, typeFilter, OpListFavoriteIDs)
 	# ImportJSON(filename, typeFilter, print)

 def DumpDirectMessageIds():
 	filename = "data/direct-messages.js"
 	typeFilter = "dmConversation"
 	ImportJSON(filename, typeFilter, OpListDirectMessageIDs)
 	# ImportJSON(filename, typeFilter, print)

 def DestroyAllTweets():
 	filename = "data/tweet.js"
 	typeFilter = "tweet"
 	ImportJSON(filename, typeFilter, OpDestroyTweet)


 # unretweet(id)

 def TweetActionTest(_api, _status, _args):
 	# @todo Filter... boolean test chain or just callbacks?
 	# Date range
 	# Date
 	# Contains string

 	# @todo Object that has common checks like substr and date range
 	# if object is None then consider it as 'ALL'
 	# substr = 'test'
 	# year = 2021
 	# replyto = 'ReplyGuyParent'

 	# @todo Callback for action, default 'delete'

 	if _status.favorite_count < 10:
 	# if _status.retweet_count > 0:
 	# if _status.id == 1347381781826138115:
 	# if _status.in_reply_to_screen_name and status.in_reply_to_screen_name == replyto:
 	# if _status.id == 1347394733883027457:
 	# if substr in _status.text:
 	# if _status.created_at.year == year:
 		# print(f'Processing tweet: {_status.id}')
 		# DebugStatus(_status)
 		# print(f'{_status.id} : {_status.in_reply_to_screen_name}')
 		print(f'{_status.id} : {_status.text}')

 		# print(f'Destroy tweet: {_status.id}')
 		# _api.destroy_status(_status.id)
 	# else:
 		# print(f'Skipping tweet: {_status.id}')


 def TweetActionDeleteAll(_api, _status, _args):
 	 print(f'Destroy tweet: {_status.id}')
 	 _api.destroy_status(_status.id)


 # Apply filtered action to tweets
 def TimelineTweetAction(_user, _operation, _count=500, _args=None):
 	api = LoadTweepy()
 	if not api:
 		LogError('API failed to load or auth')
 		return

 	try:
 		user = api.get_user(_user)
 		id = user.id
 		print(f'User {_user} = {id}')
 	except:
 		print('No user found')
 		return

 	try:
 		statusList = api.user_timeline(id, count=_count)
 	except:
 		print('timeline query failed')
 		return
 	print(f'Found {len(statusList)} tweets via API')
 	for status in statusList:
 		try:
 			_operation(api, status, _args)
 		except:
 			print(f'Failed to destroy {status.id}')

 # Uses API only instead of archives (limited reach)
 def TimelineDestroyTweets(_user=g_username, _count=500):
 	api = LoadTweepy()
 	if not api:
 		LogError('API failed to load or auth')
 		return

 	try:
 		user = api.get_user(_user)
 		id = user.id
 		print(f'User {_user} = {id}')
 	except:
 		print('No user found')
 		return

 	try:
 		statusList = api.user_timeline(id, count=_count)
 	except:
 		print('timeline query failed')
 		return
 	print(f'Found {len(statusList)} tweets via API')
 	for status in statusList:
 		try:
 			print(f'Destroy tweet: {status.id}')
 			api.destroy_status(status.id)
 		except:
 			print(f'Failed to destroy {status.id}')


 def TimelineDestroyFavorites(_user=g_username, _count=500):
 	api = LoadTweepy()
 	if not api:
 		LogError('API failed to load or auth')
 		return

 	# Can only purge 19 at a time ('1 page')
 	c = 1
 	while c > 0:
 		try:
 			favorites = api.favorites(_user)
 		except Exception as e:
 			print('favorites query failed')
 			print(e)
 			return

 		# Don't stop until no more to process
 		c = len(favorites)

 		print(f'Found {len(favorites)} favorites via API')
 		for i in favorites:
 			try:
 				print(f'Destroy favorite: {i.id}')
 				api.destroy_favorite(i.id)
 			except:
 				print(f'Failed to destroy {i.id}')

 def Post(_text):
 	api = LoadTweepy()
 	if api:
 		api.update_status(_text)


 def TestArgs():
 	print('Yes, this is the third room.')

 # Main entry
 if __name__ == "__main__":
 	cmds = [
 		# Archive based commands
 		('--archived-tweets', DestroyAllTweets, 'Delete archived tweets from Twitter'),
 		('--archived-likes', DestroyAllLikes, 'Delete archived likes from Twitter'),
 		('--archived-dms', DestroyAllDirectMessages, 'Delete archived DMs from Twitter'),
 		# Timeline based commands
 		('--timeline-tweets', TimelineDestroyTweets, 'Delete latest tweets via API'),
 		('--timeline-likes', TimelineDestroyFavorites, 'Delete latest likes via API'),
 		('--generate-auth', GenerateAuthTemplate, 'Create a default auth.json to fill out')
 		]

 	args = sys.argv[1:]
 	if not args or args == '--help' or args == '-h':
 		print('Delete tweets, likes, and DMs from Twitter')
 		for cmd in cmds:
 			print(f'\t{cmd[0]}  \t{cmd[2]}')
 	else:
 		for cmd in cmds:
 			if cmd[0] == args[0]:
 				cmd[1]()
 				break
	#!/usr/bin/env python3
	# TwitterPurge is a simplified python script to perform operations on twitter archives.
	# I wrote this in a single day while reading an API doc for the first time.
	#
	# Any tips you can spare will go to fund alt social media and tools
	# BTC: bc1qe0el876trjjuuu3zr729n3w3zp4t2k92smklkz
	#
	# Setup python
	# ==========================================================
	# Download and install python3 from https://www.python.org/downloads/
	# Create a python3 sandbox with latest tweepy
	# python3 -m venv sandbox
	# source sandbox/bin/activate
	# pip install tweepy
	#
	# Create Twitter API auth
	# ==========================================================
	# https://realpython.com/twitter-bot-python-tweepy/#creating-twitter-api-authentication-credentials
	#
	# Enable permission to read+write (post tweets) and write DMs
	# Now reset your "Access token & secret" to update permissions
	#
	# Get an archive of your tweets via Twitter web interface
	# ==========================================================
	# 1. Request archive of your tweets
	# 2. Wait until archived and download
	# 3. Extract archive, and place this script into the same directory
	# 4. Generate config/auth.json and add keys to the file
	# 5. Purge tweets, likes, DMs, etc using this script

	import json, html
	import os, sys


	try:
	import tweepy
	except:
	tweepy = None
	print('Missing tweepy see installation comment')
	exit(-1)


	# Ugly hack to expose global 'database' to ops for this script
	db = {}
	g_rate_limit_exceeded = False
	g_config_filename = 'config/auth.json'
	g_username = None

	def RateLimitExceeded():
	global g_rate_limit_exceeded
	g_rate_limit_exceeded = True

	def GenerateAuthTemplate():
	if not os.path.exists(g_config_filename):
	configPath = 'config'
	try:
	os.makedirs(configPath)
	except OSError:
	if not os.path.isdir(configPath):
	print(str(OSError))
	# raise
	return -1

	text = '{\n\t"key" : "",\n\t"secret" : "",\n\t"access_token" : "",\n\t"access_token_secret" : "",\n\t"username" : "@jack"\n}\n'
	with open(g_config_filename, 'w') as fd:
	fd.write(text)
	fd.close()

	def LoadTweepy():
	try:
	fd = open(g_config_filename, 'r', encoding='UTF-8')
	secret = json.load(fd)
	except:
	print(f'Failed to load "{g_config_filename}" use --gen-auth-template')
	exit(0)

	try:
	auth = tweepy.OAuthHandler(secret["key"], secret["secret"])
	auth.set_access_token(secret["access_token"], secret["access_token_secret"])
	api = tweepy.API(auth, wait_on_rate_limit=True)
	global g_username
	g_username = secret["username"]
	except:
	print(f'Failed to load tweepy api. Did you add keys to "{g_config_filename}"?')
	api = None

	return api


	def LogError(_msg):
	print(_msg)


	def DebugStatus(_status):
	print(json.dumps(_status._json, indent=4, sort_keys=True))


	def ReadTextFile(_filename, _log=True):
	try:
	fd = open(_filename, 'r', encoding='UTF-8')
	text = fd.read()
	fd.close()
	return text
	except:
	if _log:
	LogError('Could not read file "' + _filename + '"')
	return None

	## Import a JSON file to perform operations on filtered JSON objects
	def ImportJSON(_filename, _filter, _op):
	text = ReadTextFile(_filename)
	if not text:
	return

	# Strip off the start of the text to aid parser
	idx = text.index('=', 1) + 1

	# Replace HTML escape characters as json module chokes on them
	text = html.unescape(text[idx:])

	# Optionally purge unprintable characters besides CR if needed here
	# text = "".join(c for c in text if c.isprintable() or '\n')

	# Alter JSON to be accepted by parser
	text = f'{{ "data" : {text} }}'
	# text = '{ "data" :' + text + ' }'

	# Convert text to json data
	tweets = json.loads(text)
	tweets = tweets['data']

	# Processing
	for tweet in tweets:
	select = tweet[_filter]
	_op(select)

	if g_rate_limit_exceeded:
	print('Rate limit exceeded, so processing will stop')
	return


	# Operations to perform on parsed JSON objects

	def OpListFavoriteIDs(_json):
	print(_json['tweetId'])

	def OpDestroyFavorite(_json):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return
	id = _json['tweetId']
	try:
	print(f'Destroy like: {id}')
	api.destroy_favorite(id)
	except:
	print(f'Failed to destroy {id}')

	def OpDestroyTweet(_json):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return
	id = _json['id']
	try:
	print(f'Destroy tweet: {id}')
	api.destroy_status(id)
	except:
	print(f'Failed to destroy {id}')

	def OpListDirectMessageIDs(_json):
	for message in _json['messages']:
	print(message['messageCreate']['id'])

	def OpDestroyDirectMessageIDs(_json):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return

	ids = []
	for message in _json['messages']:
	id = message['messageCreate']['id']
	ids.append(id)

	# Not efficient at all, but with rate limiting who cares
	if not db["removed-direct-messages"]:
	db["removed-direct-messages"] = []
	removed = db["removed-direct-messages"]
	for id in ids:
	if id in removed:
	# print(f'{id} marked as previously destroyed')
	continue

	try:
	print(f'Destroy direct message: {id}')
	api.destroy_direct_message(id)
	db["removed-direct-messages"].append(id)
	# @todo - Write out db to disk as rate limiting will be slower anyway

	# if api.get_direct_message(id):
	# print(f'Destroy direct message: {id}')
	# api.destroy_direct_message(id)
	# db["removed-direct-messages"].append(id)
	# # @todo - Write out db to disk as rate limiting will be slower anyway
	# else:
	# print(f'Direct message does not exist: {id}')

	except tweepy.RateLimitError:
	print(f'Failed to destroy {id} due to rate limiting')
	RateLimitExceeded()
	return

	except:
	print(f'Failed to destroy {id}')
	return


	# Higher level functions

	def DestroyAllDirectMessages():
	filename = "data/direct-messages.js"
	typeFilter = "dmConversation"

	global db

	# Read previous 'database' state to help with DM rate limits/restarts
	filename = 'db.json'
	try:
	fd = open(filename, 'r', encoding='UTF-8')
	db = json.load(fd)
	# print(db)
	except:
	print(f'Failed to load "{filename}"')

	ImportJSON(filename, typeFilter, OpDestroyDirectMessageIDs)

	# Write new 'database' state
	try:
	with open(filename, 'w') as fd:
	json.dump(db, fd)
	except:
	print(f'Failed to save "{filename}"')

	def DestroyAllLikes():
	filename = "data/like.js"
	typeFilter = "like"
	ImportJSON(filename, typeFilter, OpDestroyFavorite)

	def DumpLikeIds():
	filename = "data/like.js"
	typeFilter = "like"
	ImportJSON(filename, typeFilter, OpListFavoriteIDs)
	# ImportJSON(filename, typeFilter, print)

	def DumpDirectMessageIds():
	filename = "data/direct-messages.js"
	typeFilter = "dmConversation"
	ImportJSON(filename, typeFilter, OpListDirectMessageIDs)
	# ImportJSON(filename, typeFilter, print)

	def DestroyAllTweets():
	filename = "data/tweet.js"
	typeFilter = "tweet"
	ImportJSON(filename, typeFilter, OpDestroyTweet)


	# unretweet(id)

	def TweetActionTest(_api, _status, _args):
	# @todo Filter... boolean test chain or just callbacks?
	# Date range
	# Date
	# Contains string

	# @todo Object that has common checks like substr and date range
	# if object is None then consider it as 'ALL'
	# substr = 'test'
	# year = 2021
	# replyto = 'ReplyGuyParent'

	# @todo Callback for action, default 'delete'

	if _status.favorite_count < 10:
	# if _status.retweet_count > 0:
	# if _status.id == 1347381781826138115:
	# if _status.in_reply_to_screen_name and status.in_reply_to_screen_name == replyto:
	# if _status.id == 1347394733883027457:
	# if substr in _status.text:
	# if _status.created_at.year == year:
	# print(f'Processing tweet: {_status.id}')
	# DebugStatus(_status)
	# print(f'{_status.id} : {_status.in_reply_to_screen_name}')
	print(f'{_status.id} : {_status.text}')

	# print(f'Destroy tweet: {_status.id}')
	# _api.destroy_status(_status.id)
	# else:
	# print(f'Skipping tweet: {_status.id}')


	def TweetActionDeleteAll(_api, _status, _args):
	print(f'Destroy tweet: {_status.id}')
	_api.destroy_status(_status.id)


	# Apply filtered action to tweets
	def TimelineTweetAction(_user, _operation, _count=500, _args=None):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return

	try:
	user = api.get_user(_user)
	id = user.id
	print(f'User {_user} = {id}')
	except:
	print('No user found')
	return

	try:
	statusList = api.user_timeline(id, count=_count)
	except:
	print('timeline query failed')
	return
	print(f'Found {len(statusList)} tweets via API')
	for status in statusList:
	try:
	_operation(api, status, _args)
	except:
	print(f'Failed to destroy {status.id}')

	# Uses API only instead of archives (limited reach)
	def TimelineDestroyTweets(_user=g_username, _count=500):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return

	try:
	user = api.get_user(_user)
	id = user.id
	print(f'User {_user} = {id}')
	except:
	print('No user found')
	return

	try:
	statusList = api.user_timeline(id, count=_count)
	except:
	print('timeline query failed')
	return
	print(f'Found {len(statusList)} tweets via API')
	for status in statusList:
	try:
	print(f'Destroy tweet: {status.id}')
	api.destroy_status(status.id)
	except:
	print(f'Failed to destroy {status.id}')


	def TimelineDestroyFavorites(_user=g_username, _count=500):
	api = LoadTweepy()
	if not api:
	LogError('API failed to load or auth')
	return

	# Can only purge 19 at a time ('1 page')
	c = 1
	while c > 0:
	try:
	favorites = api.favorites(_user)
	except Exception as e:
	print('favorites query failed')
	print(e)
	return

	# Don't stop until no more to process
	c = len(favorites)

	print(f'Found {len(favorites)} favorites via API')
	for i in favorites:
	try:
	print(f'Destroy favorite: {i.id}')
	api.destroy_favorite(i.id)
	except:
	print(f'Failed to destroy {i.id}')

	def Post(_text):
	api = LoadTweepy()
	if api:
	api.update_status(_text)


	def TestArgs():
	print('Yes, this is the third room.')

	# Main entry
	if __name__ == "__main__":
	cmds = [
	# Archive based commands
	('--archived-tweets', DestroyAllTweets, 'Delete archived tweets from Twitter'),
	('--archived-likes', DestroyAllLikes, 'Delete archived likes from Twitter'),
	('--archived-dms', DestroyAllDirectMessages, 'Delete archived DMs from Twitter'),
	# Timeline based commands
	('--timeline-tweets', TimelineDestroyTweets, 'Delete latest tweets via API'),
	('--timeline-likes', TimelineDestroyFavorites, 'Delete latest likes via API'),
	('--generate-auth', GenerateAuthTemplate, 'Create a default auth.json to fill out')
	]

	args = sys.argv[1:]
	if not args or args == '--help' or args == '-h':
	print('Delete tweets, likes, and DMs from Twitter')
	for cmd in cmds:
	print(f'\t{cmd[0]} \t{cmd[2]}')
	else:
	for cmd in cmds:
	if cmd[0] == args[0]:
	cmd[1]()
	break