Skip to content

Instantly share code, notes, and snippets.

@tondol
Last active December 18, 2015 18:32
Show Gist options
  • Save tondol/3a8a5ee1f3f2256e6d20 to your computer and use it in GitHub Desktop.
Save tondol/3a8a5ee1f3f2256e6d20 to your computer and use it in GitHub Desktop.
特定アカウントのツイートIDを全件取得する
require 'nokogiri'
require 'pp'
require 'watir-webdriver'
SCREEN_NAME = ARGV.shift
FROM_OLDEST = Date.new(2011, 1, 1)
MY_EMAIL = "foo"
MY_PASSWORD = "barbaz"
@browser = Watir::Browser.new :chrome
@browser.goto("https://twitter.com/")
@browser.button.click
sleep 2
@browser.text_field(:id, "signin-email").value = MY_EMAIL
@browser.text_field(:id, "signin-password").value = MY_PASSWORD
@browser.form(:class, "LoginForm").submit
TWITTER_SESS = @browser.cookies["_twitter_sess"][:value]
def fetch_tweet_ids_html(from, to)
tweet_ids = []
@browser.goto("https://twitter.com/search?f=tweets&vertical=default&q=from%3A#{SCREEN_NAME}%20since%3A#{from}%20until%3A#{to}&src=typd")
html = Nokogiri::HTML.parse(@browser.html)
html.xpath("//*[contains(@class, 'original-tweet')][@data-item-id]").each {|e|
tweet_ids << e.attr("data-item-id")
}
min_position = $1 if @browser.html =~ /data-max-position="(.*?)"/
[tweet_ids.uniq, min_position]
end
def fetch_tweet_ids_json(from, to, max_position)
tweet_ids = []
body = `curl -s 'https://twitter.com/i/search/timeline?f=tweets&vertical=default&q=from%3A#{SCREEN_NAME}%20since%3A#{from}%20until%3A#{to}&src=typd&interval=30000&max_position=#{max_position}&reset_error_state=false' -H 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36' -H 'cookie: _twitter_sess=#{TWITTER_SESS}; ua="f5,m2,m5,rweb,msw"'`
json = JSON.parse(body)
json = json["inner"] if json.has_key?("inner")
html = Nokogiri::HTML.parse(json["items_html"])
html.xpath("//*[contains(@class, 'original-tweet')][@data-item-id]").each {|e|
tweet_ids << e.attr("data-item-id")
}
min_position = json["min_position"]
[tweet_ids.uniq, min_position]
end
def next_month(date)
if date.month == 12
Date.new(date.year + 1, 1, 1)
else
Date.new(date.year, date.month + 1, 1)
end
end
def prev_month(date)
if date.month == 1
Date.new(date.year - 1, 12, 1)
else
Date.new(date.year, date.month - 1, 1)
end
end
count = 0
from = Date.new(Date.today.year, Date.today.month, 1)
to = next_month(Date.today)
while true do
puts "since=#{from}, until=#{to}"
tweet_ids, max_position = fetch_tweet_ids_html(from, to)
count += tweet_ids.size
# puts "count=#{count}"
tweet_ids.each {|tweet_id|
puts "id=#{tweet_id}"
}
sleep 2
while true do
# puts "max_position=#{max_position}"
tweet_ids, max_position = fetch_tweet_ids_json(from, to, max_position)
count += tweet_ids.size
# puts "count=#{count}"
tweet_ids.each {|tweet_id|
puts "id=#{tweet_id}"
}
break if tweet_ids.empty?
sleep 2
end
from = prev_month(from)
to = prev_month(to)
break if from < FROM_OLDEST
end
@browser.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment