Created
December 17, 2016 02:53
-
-
Save paulohrpinheiro/2648fb56f55b22f0b9c3ccd54fc6c608 to your computer and use it in GitHub Desktop.
Quais são as palavras que você mais usa no twitter?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'twitter' | |
require 'words_counted' | |
def collect_with_max_id(collection=[], max_id=nil, &block) | |
response = yield(max_id) | |
collection += response | |
response.empty? ? collection.flatten : collect_with_max_id(collection, response.last.id - 1, &block) | |
end | |
client = Twitter::REST::Client.new do |cfg| | |
cfg.consumer_key = ENV['TWITTER_CONSUMER_KEY'] | |
cfg.consumer_secret = ENV['TWITTER_CONSUMER_SECRET'] | |
cfg.access_token = ENV['TWITTER_ACCESS_TOKEN'] | |
cfg.access_token_secret = ENV['TWITTER_ACCESS_TOKEN_SECRET'] | |
end | |
def client.get_all_tweets(user) | |
collect_with_max_id do |max_id| | |
options = {count: 200, include_rts: true} | |
options[:max_id] = max_id unless max_id.nil? | |
user_timeline(user, options) | |
end | |
end | |
tokeniser = WordsCounted::Tokeniser.new( | |
client.get_all_tweets(ARGV[0]).map { |t| t.text }.to_s, | |
) | |
counter = WordsCounted::Counter.new( | |
tokeniser.tokenise(exclude: [->(t) { t.length < 4 }, 'http https']) | |
) | |
counter.token_frequency.take(20).each { |w,c| puts "#{w}: #{c}" } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment