Created
April 12, 2012 17:30
-
-
Save tondol/2369385 to your computer and use it in GitHub Desktop.
Markov Twitter Bot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
require_relative 'twitterbot' | |
Twitter.configure {|config| | |
config.consumer_key = 'consumer key' | |
config.consumer_secret = 'consumer secret' | |
config.oauth_token = 'oauth token' | |
config.oauth_token_secret = 'oauth token secret' | |
} | |
counter = 0 | |
while counter < 10 | |
begin | |
bot = TwitterBot::Crawler.new('BOTのscreen_name', '取得元のscreen_name') | |
bot.study | |
bot.reply_to_mentions unless ARGV.include?("--no-reply") | |
bot.tweet unless ARGV.include?("--no-tweet") | |
break | |
rescue Exception => e | |
puts e | |
counter += 1 | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
require 'igo-ruby' | |
require 'net/http' | |
require 'twitter' | |
require 'uri' | |
class String | |
def is_mention? | |
match(/^@\w+\s*/) | |
end | |
def remove_uri | |
str = self | |
str = str.gsub(/^\.?(\s*@\w+)+/, '') # 文頭のIDを削除 | |
str = str.gsub(/(RT|QT)\s*@?\w+.*$/, '') # RT/QT以降を削除 | |
str = str.gsub(/http:\/\/\S+/, '') # URIを削除 | |
str = str.gsub(/\s+/, ' ').strip | |
end | |
def stringify | |
str = self | |
str = str.gsub(/<a\s.*?>(.*?)<\/a>/, '\1') # a要素を置換 | |
str = str.gsub(/<br\s?\/>/, "\n") # br要素を置換 | |
end | |
end | |
module TwitterBot | |
BEGIN_DELIMITER = '__BEGIN__' | |
END_DELIMITER = '__END__' | |
IGO_DIC_DIRECTORY = './ipadic' # 辞書ファイルがあるディレクトリを指定 | |
class Crawler | |
def initialize(bot_screen_name, src_screen_name) | |
@bot_screen_name = bot_screen_name | |
@src_screen_name = src_screen_name | |
@replied_users = Array.new | |
@markov = Markov.new | |
@markov_mention = Markov.new | |
@splitter = Splitter.new | |
end | |
def http_query(method, uri_str, query) | |
uri = URI.parse(uri_str) | |
query_string = query.map{|k,v| URI.encode(k) + "=" + URI.encode(v) }.join('&') | |
Net::HTTP.start(uri.host, uri.port) {|http| | |
if method == 'get' | |
query_string = '?' + query_string unless query_string.empty? | |
http.get(uri.path + query_string) | |
else | |
http.post(uri.path, query_string) | |
end | |
} | |
end | |
def get_favorited_tweets | |
response = http_query('get', "http://favstar.fm/users/#{@src_screen_name}/recent", {}) | |
matches = response.body.scan(/<p class='fs-tweet-text'>(.*?)<\/p>/m) | |
matches.flatten.map {|match| match.stringify } | |
end | |
def get_best_tweets | |
response = http_query('get', "http://favstar.fm/users/#{@src_screen_name}", {}) | |
matches = response.body.scan(/<p class='fs-tweet-text'>(.*?)<\/p>/m) | |
matches.flatten.map {|match| match.stringify } | |
end | |
def build_tweet() | |
counter = 0 | |
while counter <= 10 do | |
result = @markov.build.join('') | |
return result if result.size <= 140 # 140文字以内なら採用 | |
counter += 1 | |
end | |
raise StandardError.new('retry limit is exceeded') | |
end | |
def build_reply(screen_name) | |
counter = 0 | |
while counter <= 0 do | |
result = @markov_mention.build.join('') | |
result = "@#{screen_name} #{result}" | |
return result if result.size <= 140 # 140文字以内なら採用 | |
counter += 1 | |
end | |
raise StandardError.new('retry limit is exceeded') | |
end | |
def study | |
Twitter.user_timeline(@src_screen_name, { | |
"count" => 200, | |
}).each {|status| | |
formatted = status.text.remove_uri | |
words = @splitter.split(formatted) | |
if status.text.is_mention? | |
@markov_mention.study(words) | |
else | |
@markov.study(words) | |
end | |
puts "study: #{formatted}" | |
} | |
end | |
def reply_to_mentions | |
# reply済リストを取得 | |
Twitter.user_timeline(@bot_screen_name).each {|status| | |
screen_name = status.in_reply_to_screen_name | |
@replied_users << screen_name if screen_name | |
} | |
# reply | |
Twitter.mentions.each {|status| | |
screen_name = status.user.screen_name | |
next if status.created_at < Time.now - 3600 * 24 # 24時間以上前なら除外 | |
next if @replied_users.include?(screen_name) # reply済リストに含まれるなら除外 | |
next if screen_name == @bot_screen_name # 自分自身なら除外 | |
result = build_reply(screen_name) | |
Twitter.update(result, { | |
"in_reply_to_status_id" => status.id, | |
}) | |
@replied_users << screen_name # reply済リストに入れる | |
puts "reply: #{result}" | |
} | |
end | |
def tweet | |
# ランダムにモード決定 | |
random_value = rand | |
if random_value < 0.8 | |
# tweet using markov | |
result = build_tweet | |
Twitter.update(result) | |
puts "tweet(markov): #{result}" | |
elsif random_value < 0.9 | |
# tweet using favstar-best | |
result = get_best_tweets.sample.remove_uri | |
Twitter.update(result) | |
puts "tweet(best): #{result}" | |
else | |
# tweet using favstar-recent | |
result = get_favorited_tweets.sample.remove_uri | |
Twitter.update(result) | |
puts "tweet(recent): #{result}" | |
end | |
end | |
end | |
class Splitter | |
def initialize() | |
@tagger = Igo::Tagger.new(IGO_DIC_DIRECTORY) | |
end | |
def split(str) | |
array = Array.new | |
array << BEGIN_DELIMITER | |
array += @tagger.wakati(str) | |
array << END_DELIMITER | |
array | |
end | |
end | |
class Markov | |
def initialize() | |
@table = Array.new | |
end | |
def study(words) | |
return if words.size < 3 | |
for i in 0..(words.size - 3) do | |
@table << [words[i], words[i + 1], words[i + 2]] | |
end | |
end | |
def search1(key) | |
array = Array.new | |
@table.each {|row| | |
array << row[1] if row[0] == key | |
} | |
array.sample | |
end | |
def search2(key1, key2) | |
array = Array.new | |
@table.each {|row| | |
array << row[2] if row[0] == key1 && row[1] == key2 | |
} | |
array.sample | |
end | |
def build | |
array = Array.new | |
key1 = BEGIN_DELIMITER | |
key2 = search1(key1) | |
while key2 != END_DELIMITER do | |
array << key2 | |
key3 = search2(key1, key2) | |
key1 = key2 | |
key2 = key3 | |
end | |
array | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
favstar.fmの仕様変更に追従
24時間以内のリプライにのみ返信するように修正