Last active
August 29, 2015 14:05
-
-
Save smison/5902646e896cc6536969 to your computer and use it in GitHub Desktop.
#tegaki_dt が含まれるツイートから画像を取得するスクリプト
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/local/bin/ruby | |
#coding: utf-8 | |
### requires ################################################## | |
require 'twitter' | |
require 'uri' | |
require 'open-uri' | |
require 'open_uri_redirections' | |
require 'nokogiri' | |
############################################################### | |
### parameters ################################################ | |
screen_name = 'smison' # 取得対象のuser名 | |
count = 200 # 一度に取得するtweet数(上限: 200) | |
max_page = 7 # tweetを取得する回数(count * max_pageがseachされるtweetの合計数となる) | |
key_file = './.key' # 認証情報を記録したファイルのpath | |
############################################################### | |
### main ###################################################### | |
tweets = [] | |
keys = File.read(key_file).split("\n") # 認証情報を読込 | |
client = Twitter::REST::Client.new do |config| | |
config.consumer_key = keys[0] | |
config.consumer_secret = keys[1] | |
config.oauth_token = keys[2] | |
config.oauth_token_secret = keys[3] | |
end | |
(1..max_page).each do |page| | |
begin | |
# tweetを取得する | |
client.user_timeline({:screen_name => screen_name, :count => count, :page => page}).each do |tweet| | |
tweets << "#{tweet['text']}" | |
end | |
rescue | |
# tweetの取得に失敗した場合はやり直す | |
p 'tweetの取得に失敗しました、retryします' | |
sleep 5 | |
retry | |
end | |
end | |
# #tegaki_dt を含むtweetを抽出し、更に画像ページへのurlを取得 | |
tweets_tegaki = tweets.select{|tweet| tweet =~ /\#tegaki_dt/} | |
urls = tweets_tegaki.map{|tweet| URI.extract(tweet, ['http'])}.flatten | |
urls.each_with_index do |url, index| | |
begin | |
# png画像のurlを取得 | |
html = open(url, :allow_redirections => :safe).read | |
img_url = Nokogiri::HTML.parse(html) | |
.search('a.media-thumbnail') | |
.search('img') | |
.first | |
.attributes['src'] | |
.value | |
# 画像を保存 | |
p img_url | |
File.open("./#{index}.png", 'wb') do |file| | |
open(img_url) do |data| | |
file.write(data.read) | |
end | |
end | |
sleep 2 | |
rescue | |
# 画像の取得/保存に失敗した場合はやり直す | |
p '画像の取得/保存に失敗しました、retryします' | |
sleep 5 | |
retry | |
end | |
end | |
############################################################### |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment