Skip to content

Instantly share code, notes, and snippets.

@epitron
Created December 13, 2013 19:28
Show Gist options
  • Save epitron/73db08093fc8248be3f1 to your computer and use it in GitHub Desktop.
Save epitron/73db08093fc8248be3f1 to your computer and use it in GitHub Desktop.
Turn a Rails controller into a web-proxy (web.archive.org-style)
require 'pp'
class ProxyController < ApplicationController
# Example headers:
# {"GATEWAY_INTERFACE"=>"CGI/1.1",
# "PATH_INFO"=>"/proxy/http:/google.com",
# "QUERY_STRING"=>"",
# "REMOTE_ADDR"=>"127.0.0.1",
# "REMOTE_HOST"=>"127.0.0.1",
# "REQUEST_METHOD"=>"GET",
# "REQUEST_URI"=>"http://localhost:3000/proxy/http://google.com/",
# "SCRIPT_NAME"=>"",
# "SERVER_NAME"=>"localhost",
# "SERVER_PORT"=>"3000",
# "SERVER_PROTOCOL"=>"HTTP/1.1",
# "SERVER_SOFTWARE"=>"WEBrick/1.3.1 (Ruby/2.0.0/2013-06-27)",
# "HTTP_USER_AGENT"=>
# "Opera/9.80 (X11; Linux x86_64) Presto/2.12.388 Version/12.16",
# "HTTP_HOST"=>"localhost:3000",
# "HTTP_ACCEPT"=>
# "text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1",
# "HTTP_ACCEPT_LANGUAGE"=>"en-US,en;q=0.9",
# "HTTP_ACCEPT_ENCODING"=>"gzip, deflate",
# "HTTP_COOKIE"=>
# "remember_user_token=BAhbB1sGaQZJIiIkMmEkMTAkU2E2NjJYTDIyVUxYYlVXbnhEM3dWTwY6BkVU--66f2f6480b5e1a7e1293959ba4653ee9f1a48311; _hibar_session=Qmhwc1JNSTlhUUsyblEybzFINGV3UGo2ZGNFWXRTS29jaXpqTlorYmQ1dncxcjIxakQ5LzhUSVVvMGx2QVhHMEdhTk1ITnlDSUY3akEwVmRZNk82c004aktHa0ZMVUZ3WEg5SE45SFl2MjhhVHFMTkRXVXhmdjhURWhkZExuMEc3Qk5FbWFld0p1RzRFU0xZYjI3em9Qbkltd2dIYUo3Uy9wRzNZWVkwcGdWc1E4Y3ZUTG1LOVRsdStDcDFrdGowa1Vad2VEUmFKeXQ4QlFvc01NZk1CR0pOcXI4VW8xZ3NmQXIzdVdoZEgrLzN5aGxLY0pDSndvbjFtd3EvVlJuYkttZzlTUzVpZSt3OXFrRWxpRFJNcVZiV0tlOEFBZ1ViWmlNRHBDN2dKUkRZSnprbzdRNHpQNmlkaUE1T2t2d1RrNVpnR3dwUllab0ZsT084dER3SXJ5czNObXdxbmh6djc3dWVQcVkyQVRGai9BRm5weDdLQWloSVFCcDZpQU5jLS1DSlNCd3lPNFAvV3JHVjAyZk8zNnVRPT0%3D--579c8e19bafc03e0f6e8f92639161fe22901ca76",
# "HTTP_CACHE_CONTROL"=>"no-cache",
# "HTTP_CONNECTION"=>"Keep-Alive",
# "HTTP_VERSION"=>"HTTP/1.1",
# "REQUEST_PATH"=>"/proxy/http://google.com/",
# "ORIGINAL_FULLPATH"=>"/proxy/http://google.com/",
# "ORIGINAL_SCRIPT_NAME"=>"",
# "ROUTES_25203060_SCRIPT_NAME"=>""}
def index
# request variables:
# referer
# base_url
# original_fullpath
# host
# port
# fullpath
# url
# host_with_port
# original_url
http = ::HTTPClient.new(
cookie_file: current_user.cookie_file,
verbose: true,
follow_redirects: false,
raise_exceptions: false,
logger: Rails.logger
)
logger.info "HTTP Referer: #{request.referer}"
# binding.pry
# logger.info "Rack environment:\n" + request.env.select{|k,v| v =~ /proxy/ }.pretty_inspect
if request.original_fullpath =~ %r{^/proxy/(.+)}
url = $1
url += "?#{request.query_string}" unless request.query_string.blank?
else
raise "original_fullpath is broken: #{request.original_fullpath}"
end
# loop do
# puts "* Downloading: #{url}"
# page = http.get(url)
# puts "* Info: #{page.link_info}"
# if page.redirect
# puts "* Following redirect: #{page.redirect}"
# url = page.redirect
# else
# break
# end
# end
puts "* Downloading: #{url}"
page = http.get(url)
puts "* Info: #{page.link_info}"
if page.mime_type[/^text\/html/]
# # set base url
# # head.add_child("<base href=\"#{url}\">")
rewrite_urls!(page, url)
body = page.parser.to_html
else
body = page.body
end
if page.redirect
logger.info "redirecting_to: #{page.redirect}"
redirect_to proxify_url(url, page.redirect)
else
render text: body, content_type: page.mime_type, status: page.code
end
end
protected
def fix_url(url)
url.gsub(" ", "%20")
end
def proxify_url(current_url, link)
current_url = URI.parse(fix_url(current_url)) unless current_url.is_a? URI
"/proxy/#{current_url + fix_url(link)}"
end
#
# Rewrite all urls on the page so that they're absolute urls (relative to "base"),
# and prefixed by "/proxy"
#
def rewrite_urls!(page, current_url)
# Add a <base href=""> tag
# page.at("head").children.before("<base href=\"#{base_url}\">")
current_url = URI.parse fix_url(current_url)
# Replace other tags...
tag_map = {
"a" => "href",
"img" => "src",
"form" => "action",
"script" => "src",
"link" => "href"
}
for tagname, attrname in tag_map
page.search(tagname).each do |node|
if tag_url = node[attrname]
tag_url = fix_url(tag_url)
begin
new_url = proxify_url(current_url, tag_url)
node[attrname] = new_url
rescue URI::InvalidURIError => e
logger.error e.inspect
node[attrname] = "javascript:alert('Unable to parse original url: #{tag_url}');"
end
end
end
end
if meta = page.at("meta") and meta["http-equiv"] == "refresh"
#<meta http-equiv="refresh" content="0; url=http://tinyurl.com/pemjju3">
meta["content"] = meta["content"].gsub(/(?<=\burl=)(.+)(?=\s|$)/) do |m|
proxify_url current_url, $1
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment