Skip to content

Instantly share code, notes, and snippets.

@adacable
Created June 7, 2017 08:45
Show Gist options
  • Select an option

  • Save adacable/2af34b7678e523f9cba28bf68a4d19b8 to your computer and use it in GitHub Desktop.

Select an option

Save adacable/2af34b7678e523f9cba28bf68a4d19b8 to your computer and use it in GitHub Desktop.
########### GUIDANCE
# tweet deleter that deletes all of the tweets in an archive(twitter often leaves a few of your most recent ones out of the archive- use an api based deleter to do these) from the first ones you tweeted to the latest.
# no fancy options(like checking for highly favd tweets/etc)
# common errors: twitter won't let you unretweet tweets from accounts you don't have access to- expect a few "Sorry, you are not authorized to see this status" errors at the bottom/as you go through
# tweets that aren't deleted are shown twice: once as an error message as you progress through, and once at the bottom in a summary of all errors.
# takes about 1h/10k tweets
# hacky and ugly don't judge me
############ USAGE GUIDE
# download your twitter archive and unzip it.
# set the archive_folder variable in the "edit this" section to the path you unzipped it to
# get twitter credentials from https://apps.twitter.com
# run and check the first few tweets are going- you should be able to see your total number of tweets dropping on your profile page in real time.
# when it's finished, check there's no weird errors leaving tweets you want deleted behind(there shouldn't be, on a 50k test run)
# If it gets canceled, make a note of how many tweets were deleted, subtract 1, and set that to the offset, then run it again- it'll start from there
############ EDIT THIS
#leave my first offset tweets alone(useful if restarting after a sleep/network drop/etc)
offset = 0
#absolute path to the folder where you unzipped the archive twitter sent you(terminated in a /)
archive_folder= ""
#twitter api keys
consumer_key = ""
consumer_secret = ""
access_token = ""
access_secret = ""
#everything below here is hacky bullshit
import glob
import json
import tweepy
def main():
auth()
tweetsToDelete = getTweetIds(getFileNames(archive_folder))[offset:]
deleteTweets(tweetsToDelete)
announceErrors()
def progressInit(toCount):
global noOfTweets
global noOfTweetsDeleted
global errors
noOfTweets = len(toCount)
noOfTweetsDeleted = offset
errors = []
def announceErrors():
for printError in errors:
print printError['id']
print printError['error']
def auth():
global api
tweepyAuth = tweepy.OAuthHandler(consumer_key, consumer_secret)
tweepyAuth.set_access_token(access_token, access_secret)
api = tweepy.API(tweepyAuth)
def deleteTweet(tweetID):
api.destroy_status(tweetID)
print (progress(tweetID))
def deleteFailed(errorIn, tweetID):
print "failed to delete %s"%(tweetID)
print errorIn
global noOfTweetsDeleted
errors.append({'id':tweetID, 'error':errorIn})
noOfTweetsDeleted = noOfTweetsDeleted + 1
def progress(idprint):
global noOfTweetsDeleted
noOfTweetsDeleted = noOfTweetsDeleted + 1
return "%s/%s %s"%(noOfTweetsDeleted,noOfTweets+offset,idprint)
def deleteTweets(toDelete):
progressInit(toDelete)
for tid in toDelete:
try:
deleteTweet(tid)
except Exception as errorOut:
deleteFailed(errorOut,tid)
def getFileNames(dataFolder):
tweetFileNames = glob.glob(dataFolder+"data/js/tweets/*")
return tweetFileNames
def getTweetIds(tweetFileNames):
tweetIDs = []
for tweetFileName in tweetFileNames:
tweetFile = open(tweetFileName)
jsonInFile = tweetFile.read()
tweetFileList = json.loads(jsonInFile[33:])
for tweetJson in tweetFileList:
tweetIDs.append(tweetJson["id"])
return tweetIDs
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment