Created
June 12, 2019 09:04
-
-
Save nathanKramer/6b88c88be507ce514ee48696a9d517c0 to your computer and use it in GitHub Desktop.
Shitty code to sort tweets by reply ratio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'twitter' | |
client = Twitter::REST::Client.new do |config| | |
config.consumer_key = "your_consumer_key" | |
config.consumer_secret = "your_consumer_secret" | |
config.access_token = "your_access_token" | |
config.access_token_secret = "your_access_token_secret" | |
end | |
def collect_with_max_id(collection = [], max_id = nil, &block) | |
response = yield(max_id) | |
collection += response | |
response.empty? ? collection.flatten : collect_with_max_id(collection, response.last.id - 1, &block) | |
end | |
def client.get_all_tweets(user) | |
collect_with_max_id do |max_id| | |
options = {count: 200, include_rts: false} | |
options[:max_id] = max_id unless max_id.nil? | |
user_timeline(user, options) | |
end | |
end | |
def get_reply_count(tweet) | |
doc = HTTParty.get(tweet.url.to_s) | |
page_parser = Nokogiri::HTML(doc) | |
reply_count_text = page_parser.css('span.ProfileTweet-actionCount') | |
.first | |
.attributes['data-tweet-stat-count'] | |
.value | |
reply_count_text.to_i | |
rescue StandardError => e | |
puts "Something went wrong, probably no replies #{e}" | |
0 | |
end | |
def client.ratio_report_for_user(handle) | |
puts "Retrieving as many tweets as Twitter's API will allow. . ." | |
all = get_all_tweets(handle).reject do |tweet| | |
# Some tweets go missing, which breaks :reply? | |
# This is unfortunate, because tweets starting with @ aren't always replies. | |
# But this will have to do. | |
replyish = tweet.text.start_with?('@') | |
tweet.reply? || replyish | |
end | |
puts "Retrieved #{all.count} tweets. . .}" | |
require 'nokogiri' | |
require 'HTTParty' | |
tweets_by_id = {} | |
all.each do |tweet| | |
tweets_by_id[tweet.id] = tweet | |
end | |
puts 'Scraping reply counts from the interwebs (this takes a wee while). . .' | |
reply_counts = {} | |
thread_count = all.count / 20 | |
chunks = all.each_slice(thread_count).to_a | |
chunks.map do |chunk| | |
Thread.new do | |
chunk.each { |tweet| reply_counts[tweet.id] = get_reply_count(tweet) } | |
end | |
end.each(&:join) | |
puts 'Done, now crunching some numbers...' | |
ratio_by_tweet_id = {} | |
all.each do |tweet| | |
reply_count = reply_counts[tweet.id] || 0 | |
popularity = (tweet.retweet_count + tweet.favorite_count) | |
popularity = 1 if popularity.zero? | |
ratio = reply_count / popularity.to_f | |
ratio_by_tweet_id[tweet.id] = ratio | |
end | |
sorted = all.sort_by { |tweet| -ratio_by_tweet_id[tweet.id] } | |
sorted.map do |tweet| | |
reply_count = reply_counts[tweet.id] | |
ratio = ratio_by_tweet_id[tweet.id] | |
"#{tweet.url}\nRatio: #{ratio}, Replies: #{reply_count}, RTs: #{tweet.retweet_count}, Favs: #{tweet.favorite_count}\n#{tweet.text}\n" | |
end.join("\n") | |
end | |
def save_report_for(client, handle) | |
puts "Beginning report for #{handle}" | |
report = client.ratio_report_for_user(handle) | |
File.open("#{handle}.txt", 'w') { |file| file.write(report) } | |
puts "Saved #{handle}.txt, all done!" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment