Skip to content

Instantly share code, notes, and snippets.

@mildmojo
Last active December 20, 2015 08:29
Show Gist options
  • Save mildmojo/6100700 to your computer and use it in GitHub Desktop.
Save mildmojo/6100700 to your computer and use it in GitHub Desktop.
Script for pulling out all the Twitter haters blogged at http://gamerfury.tumblr.com (all sorts o' trigger warnings on that one).
#!/usr/bin/env ruby
#
# haters.rb
#
# Finds all the Twitter users posted to http://gamerfury.tumblr.com and prints
# them, newline-delimited, to STDOUT. Require this file in your own script to
# access ScumbagHaterFinder directly.
#
# Tested with Ruby 2.0.0.
#
# Example usage:
# $ ruby haters.rb --verbose
# hater1 https://twitter.com/hater1
# ...
#
# Example mass-block ('gem install t' for the lovely 't' CLI Twitter client)
# $ ruby haters.rb | xargs t block
#
require 'open-uri'
require 'json'
require 'csv'
require 'optparse'
require 'rss'
require 'rexml/document'
# ScumbagHaterFinder parses post descriptions for haters.
#
# Examples:
# # Just get post descriptions, no parsing.
# descriptions = ScumbagHaterFinder.new('http://gamerfury.tumblr.com').get_all
#
# # Provide a parser that pulls out the first '@username' in each description.
# haters = ScumbagHaterFinder.new('http://gamerfury.tumblr.com').get_all { |desc|
# desc.scan(/@(\S+)/).first
# }
#
class ScumbagHaterFinder < Struct.new(:base_url)
# Parser should return a hater name string or nil.
def get_all &parser
page_num = 0
haters = []
# By default, return whole description
parser ||= lambda { |desc|
desc
}
catch :no_more_pages do
loop do
page_num += 1
url = base_url + (page_num == 1 ? '/rss' : "/page/#{page_num}/rss")
STDERR.puts "Fetching page #{page_num}..."
page_haters = get_haters_from_rss(url, parser)
throw :no_more_pages if page_haters.empty?
haters.concat page_haters
end
end
haters
end
##############################################################################
private
##############################################################################
def get_haters_from_rss url, parser
rss = open(url) rescue StringIO.new
feed = RSS::Parser.parse(rss)
page_haters = feed.items.map { |item|
parser.call(item.description)
}
page_haters.compact.reject(&:empty?)
end
end
# ScumbagHaterPresenter formats hater arrays into string representations.
#
# Example:
# haters = ScumbagHaterFinder.new('http://gamerfury.tumblr.com').get_all
# presenter = ScumbagHaterPresenter.new(haters)
#
# presenter.as_text
# => "hater1\nhater2\n"
#
# presenter.as_text_verbose
# => "hater1 https://twitter.com/hater1\n"
#
# presenter.as_csv
# => "hater1,https://twitter.com/hater1\nhater2,https://twitter.com/hater2\n"
#
# presenter.as_json
# => "\{"haters\":[[\"hater1\",\"https://twitter.com/hater1\"]]}"
#
class ScumbagHaterPresenter < Struct.new(:haters)
def as_text
haters.join("\n")
end
def as_text_verbose
verbose_haters.map { |fields|
sprintf('%-25s %s', *fields)
}.join("\n")
end
def as_csv
CSV.generate do |csv|
verbose_haters.each do |hater|
csv << hater
end
end
end
def as_json
{ haters: verbose_haters }.to_json
end
##############################################################################
private
##############################################################################
def verbose_haters
haters.map { |hater| [hater, "https://twitter.com/#{hater}"] }
end
end
if __FILE__ == $0
options = {format: :as_text}
# Blargh, command-line arg parsing...
ARGV.options do |opts|
opts.banner = "Usage: #{File.basename($PROGRAM_NAME)} [--text|--json|--csv] [--output <FILE>] [--verbose]"
opts.separator ''
opts.on( '-h', '--help', 'Show this help' ) { raise 'help' }
opts.on( '-v', '--verbose', 'Verbose output' ) { options[:verbose] = true }
opts.on( '-o', '--output <FILE>', 'Write output to a file' ) do |file|
options[:output] = file
end
opts.on( '-t', '--text', 'Print hater list as newline-delimited text' ) do
options[:format] = :as_text
end
opts.on( '-j', '--json', 'Print hater list as json' ) do
options[:format] = :as_json
end
opts.on( '-c', '--csv', 'Print hater list as CSV' ) do
options[:format] = :as_csv
end
begin
opts.parse!
if options[:format] == :as_text && options[:verbose]
options[:format] = :as_text_verbose
end
rescue
puts opts
exit
end
end
BASE_URL = 'http://gamerfury.tumblr.com'
hater_finder = ScumbagHaterFinder.new(BASE_URL)
hater_parser = ->(desc) {
# Quick & dirty XML sanitization.
desc = desc.to_s.sub('&', '&amp;')
desc = desc.sub(/<[^>]*</, '<')
# GamerFury usernames are inside <em> tags in post descriptions.
# Wrap description in <root> tags so REXML doesn't throw a fit.
doc = REXML::Document.new("<root>#{desc}</root>")
ems = REXML::XPath.match(doc, '//em')
hater = ems.last
(hater && hater.text.to_s.sub('&amp;', '&')) || nil
}
haters = hater_finder.get_all(&hater_parser)
output = ScumbagHaterPresenter.new(haters).send(options[:format])
if options[:output]
File.open(options[:output], 'w') do |f|
f.write output + "\n"
end
else
puts output
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment