Created
March 27, 2014 14:28
-
-
Save rabbitt/9808748 to your computer and use it in GitHub Desktop.
substitutions a la xkcd: https://xkcd.com/1288/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# dependencies: | |
# gem install sinatra | |
# gem install mechanize | |
# | |
# to run: | |
# from command line type: ruby substitutions.rb & | |
# and in your browser, go to: localhost:4567/http://some.random.site.com/ | |
# | |
require 'uri' | |
require 'sinatra' | |
require 'mechanize' | |
$debug = !!ARGV.delete('debug') | |
def debug(message) | |
$stderr.puts(message) if $debug | |
end | |
SUBSTITUTIONS = { | |
'witnesses' => 'these dudes I know', | |
'allegedly' => 'kinda probably', | |
'new study' => 'tumblr post', | |
'rebuild' => 'avenge', | |
'space' => 'spaaace', | |
'google glass' => 'virtual boy', | |
'smartphone' => 'pokedex', | |
'electric' => 'atomic', | |
'senator' => 'elf-lord', | |
'car' => 'cat', | |
'election' => 'eating contest', | |
'congressional leaders' => 'river spirits', | |
'homeland security' => 'homestar runner', | |
'could not be reach for comment' => 'is guilty and everyone knows it' | |
} | |
agent = Mechanize.new { |agent| | |
agent.user_agent = '1288-Bot/1.0 (+https://gist.github.com/rabbitt/7457688)' | |
} | |
get '/*' do | |
url = params[:splat].first.gsub(%r{^(https?):[^a-z0-9-]*}, "\\1://") | |
url = URI.parse(url.include?('http') ? url : "http://#{url}") | |
root = agent.get(url.to_s).root | |
%w(a link script).each do |element| | |
root.css(element).each do |link| | |
next if link.attributes['href'].nil? | |
if (href = link.attributes['href'].value) && href.start_with?('/') | |
link.attributes['href'].value = url.dup.merge(URI(href)).to_s | |
puts "Converting #{href} -> #{url.dup.merge(URI(href)).to_s}" | |
end | |
end | |
end | |
%w(img script).each do |element| | |
root.css(element).each do |link| | |
next if link.attributes['src'].nil? | |
if (src = link.attributes['src'].value) && src.start_with?('/') | |
link.attributes['src'].value = url.dup.merge(URI(src)).to_s | |
puts "Converting #{src} -> #{url.dup.merge(URI(src)).to_s}" | |
end | |
end | |
end | |
root.at_xpath('//head').tap { |head| | |
base = Nokogiri::XML::Node.new('base', head) | |
base['href'] = url.dup.tap { |u| u.query = nil; u.path = (u.path.split('/').last.include?('.') ? File.dirname(u.path) : u.path) }.to_s | |
head.add_child(base) | |
styles = Nokogiri::XML::Node.new('style', head) | |
styles.content = <<-'EOS'.gsub('^\s{6}', '') | |
.xkcd-replacement { background-color: pink; } | |
EOS | |
head.add_child(styles) | |
jquery = Nokogiri::XML::Node.new('script', head) | |
jquery['src'] = 'http://code.jquery.com/jquery-2.0.3.min.js' | |
head.add_child(jquery) | |
} | |
root.at_xpath('//body').tap { |body| | |
js = Nokogiri::XML::Node.new('script', body) | |
js.content = <<-'EOF'.gsub(/^.{6}/, '') | |
$('.xkcd-replacement').hover(function() { $(this).siblings('.xkcd-original').show(); $(this).hide(); }); | |
$('.xkcd-original').hover(function() {}, function() { $(this).siblings('.xkcd-replacement').show(); $(this).hide(); }); | |
EOF | |
body.add_child(js) | |
} | |
root.tap { |html| | |
page = html.to_html | |
SUBSTITUTIONS.each {|search, replace| | |
# find all uniq matches (ie: Google Glass, GOOGLE GLASS, google glass, etc) | |
page.scan(/#{search}/i).uniq.each do |match| | |
replacement = case match | |
# if it looks like a title style capitalized match, then capitalize all replacement words | |
when /^[A-Z][a-z]+/ then replace.split(/\s+/).map(&:capitalize).join(' ') | |
# if it looks like it's fully uppercase, then fully uppercase the replacement | |
when /^[A-Z]+\b/ then replace.upcase | |
# otherwise just replace it as is | |
else replace | |
end | |
debug " --> Searching for elements containing #{match.inspect}" | |
html.xpath(%Q{//*[contains(text(), "#{match}")]}).inject([]) { |list,element| | |
debug " --> Found Element #{element.name} with match #{match} - path: #{element.path}" | |
next list if (element.path.include?('/head/') || %w(style script iframe object noscript).include?(element.name)) | |
list.tap { |l| l << element } | |
}.each do |element| | |
repl_node = Nokogiri::XML::Node.new('span', element).tap {|node| | |
node['class'] = 'xkcd-replacement' | |
node.content = replacement | |
} | |
orig_node = Nokogiri::XML::Node.new('span', element).tap {|node| | |
node['class'] = 'xkcd-original' | |
node['style'] = 'display:none;' | |
node.content = match | |
} | |
children = element.content.split(/(#{Regexp.escape(match)})/).reject(&:empty?) | |
children = children.collect {|part| | |
part == match ? [repl_node.dup, orig_node.dup] : Nokogiri::XML::Text.new(part, element.parent) | |
}.flatten | |
element.children = Nokogiri::XML::NodeSet.new(element.document, children.flatten) | |
debug " --> Found #{match.inspect} and repalceed with #{replacement.inspect}\n --> Modified Element (#{element.name}): #{element.to_html.inspect}" | |
end | |
end | |
} | |
}.to_html | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment