Last active
December 28, 2015 06:29
-
-
Save rabbitt/7457688 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# dependencies: | |
# gem install sinatra | |
# gem install mechanize | |
# | |
# to run: | |
# from command line type: ruby substitutions.rb & | |
# and in your browser, go to: localhost:4567/http://some.random.site.com/ | |
# | |
require 'uri' | |
require 'sinatra' | |
require 'mechanize' | |
$debug = !!ARGV.delete('debug') | |
def debug(message) | |
$stderr.puts(message) if $debug | |
end | |
SUBSTITUTIONS = { | |
'witnesses' => 'these dudes I know', | |
'allegedly' => 'kinda probably', | |
'new study' => 'tumblr post', | |
'rebuild' => 'avenge', | |
'space' => 'spaaace', | |
'google glass' => 'virtual boy', | |
'smartphone' => 'pokedex', | |
'electric' => 'atomic', | |
'senator' => 'elf-lord', | |
'car' => 'cat', | |
'election' => 'eating contest', | |
'congressional leaders' => 'river spirits', | |
'homeland security' => 'homestar runner', | |
'could not be reach for comment' => 'is guilty and everyone knows it' | |
} | |
agent = Mechanize.new { |agent| | |
agent.user_agent = '1288-Bot/1.0 (+https://gist.github.com/rabbitt/7457688)' | |
} | |
get '/*' do | |
url = params[:splat].first.gsub(%r{^(https?):[^a-z0-9-]*}, "\\1://") | |
url = URI.parse(url.include?('http') ? url : "http://#{url}") | |
root = agent.get(url.to_s).root | |
root.at_xpath('//head').tap { |head| | |
base = Nokogiri::XML::Node.new('base', head) | |
base['href'] = url.dup.tap { |u| u.query = ''; u.path = '/' }.to_s | |
head.add_child(base) | |
styles = Nokogiri::XML::Node.new('style', head) | |
styles.content = <<-'EOS'.gsub('^\s{6}', '') | |
.xkcd-replacement { background-color: pink; } | |
EOS | |
head.add_child(styles) | |
jquery = Nokogiri::XML::Node.new('script', head) | |
jquery['src'] = 'http://code.jquery.com/jquery-2.0.3.min.js' | |
head.add_child(jquery) | |
} | |
root.at_xpath('//body').tap { |body| | |
js = Nokogiri::XML::Node.new('script', body) | |
js.content = <<-'EOF'.gsub(/^.{6}/, '') | |
$('.xkcd-replacement').hover(function() { $(this).siblings('.xkcd-original').show(); $(this).hide(); }); | |
$('.xkcd-original').hover(function() {}, function() { $(this).siblings('.xkcd-replacement').show(); $(this).hide(); }); | |
EOF | |
body.add_child(js) | |
} | |
root.tap { |html| | |
page = html.to_html | |
SUBSTITUTIONS.each {|search, replace| | |
# find all uniq matches (ie: Google Glass, GOOGLE GLASS, google glass, etc) | |
page.scan(/#{search}/i).uniq.each do |match| | |
replacement = case match | |
# if it looks like a title style capitalized match, then capitalize all replacement words | |
when /^[A-Z][a-z]+/ then replace.split(/\s+/).map(&:capitalize).join(' ') | |
# if it looks like it's fully uppercase, then fully uppercase the replacement | |
when /^[A-Z]+\b/ then replace.upcase | |
# otherwise just replace it as is | |
else replace | |
end | |
debug " --> Searching for elements containing #{match.inspect}" | |
html.xpath(%Q{//*[contains(text(), "#{match}")]}).inject([]) { |list,element| | |
debug " --> Found Element #{element.name} with match #{match} - path: #{element.path}" | |
next list if (element.path.include?('/head/') || %w(style script iframe object noscript).include?(element.name)) | |
list.tap { |l| l << element } | |
}.each do |element| | |
repl_node = Nokogiri::XML::Node.new('span', element).tap {|node| | |
node['class'] = 'xkcd-replacement' | |
node.content = replacement | |
} | |
orig_node = Nokogiri::XML::Node.new('span', element).tap {|node| | |
node['class'] = 'xkcd-original' | |
node['style'] = 'display:none;' | |
node.content = match | |
} | |
children = element.content.split(/(#{Regexp.escape(match)})/).reject(&:empty?) | |
children = children.collect {|part| | |
part == match ? [repl_node.dup, orig_node.dup] : Nokogiri::XML::Text.new(part, element.parent) | |
}.flatten | |
element.children = Nokogiri::XML::NodeSet.new(element.document, children.flatten) | |
debug " --> Found #{match.inspect} and repalceed with #{replacement.inspect}\n --> Modified Element (#{element.name}): #{element.to_html.inspect}" | |
end | |
end | |
} | |
}.to_html | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Dependencies:
Run from command line:
ruby substitutions.rb [debug]
Then open your browser to:
http://localhost:4567/<url>
Examples:
http://localhost:4567/http://www.foxnews.com/politics/2013/11/11/white-texas-republican-wins-election-with-campaign-that-implied-black/
http://localhost:4567/http://www.foxnews.com/politics/2013/11/13/senator-says-us-ending-buys-russian-helicopters/