Skip to content

Instantly share code, notes, and snippets.

@robmiller
Created July 15, 2021 15:59
Show Gist options
  • Save robmiller/0b523deac3bf83663074ff6fc6f4e3d1 to your computer and use it in GitHub Desktop.
Save robmiller/0b523deac3bf83663074ff6fc6f4e3d1 to your computer and use it in GitHub Desktop.
Script for deleting tweets from a Twitter data export
#!/usr/bin/env ruby
gem "twitter", "~> 7.0"
gem "http", "~> 4.4"
require "twitter"
require "http"
require "json"
require "yaml"
require "yaml/store"
require "pathname"
# things you must configure
TWITTER_USER = ""
MAX_AGE_IN_DAYS = 365 # anything older than this is deleted
# Download your Twitter archive and point this to the
# tweet.js file contained in the archive
DATA_FILE = "path/to/tweet.js"
# archive old tweets?
ARCHIVE = false
# directory to store archived tweets in
ARCHIVE_DIR = Pathname("path/to/old_tweets")
# get these from dev.twitter.com
CONSUMER_KEY = ""
CONSUMER_SECRET = ""
OAUTH_TOKEN = ""
OAUTH_TOKEN_SECRET = ""
# any special favorites?
IDS_TO_SAVE_FOREVER = []
# any special keywords?
KEYWORDS_TO_SAVE_FOREVER = []
# don't delete tweets with this many likes or more
LIKE_THRESHOLD = 5
# don't delete tweets with this many RTs or more
RT_THRESHOLD = 5
### you shouldn't have to change anything below this line ###
MAX_AGE_IN_SECONDS = MAX_AGE_IN_DAYS*24*60*60
NOW_IN_SECONDS = Time.now
TWEETS_PER_REQUEST = 200
### METHODS ###
# There's enough data in the archive for us to construct this data type,
# which quacks like a Twitter API-returned Tweet and allows us to decide
# whether or not to delete a Tweet without hitting the API.
OfflineTweet = Struct.new(:id,
:text,
:created_at,
:favorite_count,
:retweet_count)
$already_deleted = YAML::Store.new("deleted_tweets.yaml")
def delete_from_twitter(tweet, client)
client.destroy_status(tweet.id)
rescue Twitter::Error::NotFound => e
$stderr.puts "Tweet already deleted"
$already_deleted.transaction { $already_deleted[tweet.id] = 1 }
rescue StandardError => e
puts e.inspect
puts "Error deleting #{tweet.id}; exiting"
exit
else
puts "Deleted #{tweet.id}"
end
def archive_tweet(tweet)
text = tweet.text
urls = text
.scan(%r[https?://t.co/\w+])
.map { |u| URI.parse(u) rescue nil }
.compact
.map { |url|
u = url
5.times do
response = HTTP.timeout(connect: 5, read: 10).get(u)
break if response.code == 200 || response["Location"].nil?
u = response["Location"]
rescue StandardError => e
$stderr.puts e.message
break
end
u
}
.compact
payload = YAML.dump({ "tweet" => text, "date" => tweet.created_at, "urls" => urls })
path = ARCHIVE_DIR + "#{tweet.id}.txt"
File.open(path, "w") do |f|
f.write(payload)
end
end
def keyword_in(tweet)
KEYWORDS_TO_SAVE_FOREVER.any? do |keyword|
tweet.text.include?(keyword)
end
end
### WE BEGIN ###
client = Twitter::REST::Client.new do |config|
config.consumer_key = CONSUMER_KEY
config.consumer_secret = CONSUMER_SECRET
config.access_token = OAUTH_TOKEN
config.access_token_secret = OAUTH_TOKEN_SECRET
end
puts
puts "What's that sound...?"
puts
tweets = JSON.parse(File.read(DATA_FILE))
.map { |t| t["tweet"] }
.map do |t|
OfflineTweet.new(
t["id_str"].to_i,
t["full_text"],
Time.parse(t["created_at"]),
t["favorite_count"].to_i,
t["retweet_count"].to_i,
)
end
puts
puts "Found #{tweets.length} tweets in archive data"
puts
tweets.each do |tweet|
next if $already_deleted.transaction { $already_deleted[tweet.id] }
if ARCHIVE
archive_tweet(tweet)
end
tweet_age = NOW_IN_SECONDS - tweet.created_at
tweet_age_in_days = (tweet_age/(24*60*60)).round
if (tweet_age < MAX_AGE_IN_SECONDS) then
puts "Ignored a tweet #{tweet_age_in_days} days old"
elsif IDS_TO_SAVE_FOREVER.include?(tweet.id) then
puts "Ignored a tweet that is to be saved forever"
elsif keyword_in(tweet) then
puts "Ignored a tweet with a favored keyword"
elsif tweet.favorite_count >= LIKE_THRESHOLD
puts "Ignored a tweet with >= #{LIKE_THRESHOLD} likes"
elsif tweet.retweet_count >= RT_THRESHOLD
puts "Ignored a tweet with >= #{RT_THRESHOLD} RTs"
else
puts "Deleting a tweet #{tweet_age_in_days} days old"
delete_from_twitter(tweet, client)
end
puts " #{tweet.text}"
puts ""
rescue Twitter::Error::TooManyRequests => e
$stderr.puts "Hit the rate limit; pausing for #{e.rate_limit.reset_in} seconds"
sleep e.rate_limit.reset_in
retry
rescue StandardError => e
puts e.inspect
exit
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment