Created
September 7, 2010 13:19
-
-
Save lastk/568312 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' | |
require 'nokogiri' | |
require 'net/http' | |
# Script tested on 1.9.2-Head Ruby (Linux) | |
# | |
# Nokogiri gem install required | |
class RubyGet | |
attr_reader :hostsInfo | |
def initialize(url) | |
@url = "http://www.microsoft.com" | |
puts "#{yellowText('initialized script with')} #{blueText(@url)}" | |
@userAgent = "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.99 Safari/533.4" | |
@doc = Nokogiri::HTML(open(@url, "User-Agent" => @userAgent)) | |
@hostsInfo = [] | |
@someURLURLs = [] | |
@scriptURLs = [] | |
@cssURLs = [] | |
find_all_elements('img', @someURLURLs) | |
size_of_element('img', @someURLURLs) | |
find_all_elements('script', @scriptURLs) | |
size_of_element('script',@scriptURLs) | |
find_all_elements('link', @cssURLs) | |
size_of_element('link',@cssURLs) | |
puts "#{yellowText("Results found:")}" | |
@hostsInfo.each {|info| puts "#{info.inspect}"} | |
puts "Generating output file" | |
generate_output_file | |
end | |
# will find all elements in the @doc of name | |
# * element | |
# and populate the array | |
# * array | |
# with the elements src or href | |
def find_all_elements(element, array) | |
# Find all elements with CSS selector | |
@doc.css(element).each do |link| | |
# copy src url to array | |
if(element.eql? "link") | |
# Check if the <link> tag is a CSS type | |
if(link['type'].eql? "text/css") | |
# Check if the <link> href is not nil | |
if(link['href'] != nil) | |
# Add the href (not src) | |
array << link['href'] | |
end | |
end | |
else | |
# Check if the <tag> src is not nil | |
if(link['src'] != nil) | |
array << link['src'] | |
end | |
end | |
end | |
end | |
# will try to find all content-lengths in array | |
# * array | |
# for the element | |
# * element | |
def size_of_element(element, array) | |
urlThreads = [] | |
totalSize = 0 | |
# Find the content-length of the elements | |
for someURL in array | |
urlThreads << Thread.new(someURL) do |urlThread| | |
host = URI.parse(@url).host | |
# get path of someURL resourse | |
path = URI.parse(urlThread).path | |
# check if url is absolute or relative | |
if (URI.parse(urlThread).host != nil) | |
# in this case it's absolute (has a host) | |
host = URI.parse(urlThread).host | |
else | |
# if it's relative, check that a leading / is infront of the path | |
if (path.index('/') != 0) | |
path = "/#{path}" | |
end | |
end | |
# print out and add to totalSize | |
Net::HTTP.start(host) do |http| | |
query = "" | |
if(URI.parse(urlThread).query != nil) | |
query = "?" + URI.parse(urlThread).query | |
end | |
response = http.get(path + query) | |
store_content_length(element, response, host) | |
end | |
end | |
urlThreads.each {|thr| thr.join} | |
end | |
end | |
def store_content_length(element, response, host) | |
if (response.code == "200") | |
puts "[*] Element #{element} Code =#{greenText(response.code)}#{response.message} for #{host}" | |
hostFound = @hostsInfo.detect {|info| info[:host].eql? host} | |
if(hostFound == nil) | |
# if doesnt contain domain | |
hostInfo = { | |
host: host, | |
images: 0, | |
scripts: 0, | |
css: 0 | |
} | |
@hostsInfo << determine_content_type_and_store(hostInfo, response, element) | |
else | |
# if does contain domain | |
hostFound = determine_content_type_and_store(hostFound, response, element) | |
end | |
else | |
puts "[!!] Element #{element} Code =#{redText(response.code)}#{response.message} for #{host}" | |
end | |
end | |
def generate_output_file | |
lines = File.readlines('chartTemplate.html') | |
lines.each_with_index do |line, index| | |
line.chomp! | |
if(line.eql? "//0") | |
lines.insert(index+1,"images = [10,10,10]") | |
lines.insert(index+2,"scripts = [50,10,40]") | |
lines.insert(index+3,"css = [0,12,0]") | |
elsif (line.eql? "//1") | |
lines.insert(index+1,"var tickers = ['online.vodafone.com', 'statatse.webtrends.com', 'vodafoneuk.122.2o7.net'];") | |
elsif (line.eql? "//2") | |
lines.insert(index+1,"plot = $.jqplot('chart1', [images, scripts, css], {") | |
elsif (line.eql? "//3") | |
lines.insert(index+1,"yaxis:{ticks:[0, 20, 40, 60, 80]}") | |
end | |
end | |
File.open('chart.html', "w") do |file| | |
lines.each{|line| file.puts line } | |
end | |
end | |
def determine_content_type_and_store (hostInfo, response, element) | |
if (element.eql? "img") | |
hostInfo[:images] = hostInfo[:images] + response['content-length'].to_i | |
elsif (element.eql? "link") | |
hostInfo[:css] = hostInfo[:css] + response['content-length'].to_i | |
elsif (element.eql? "script") | |
hostInfo[:scripts] = hostInfo[:scripts] + response['content-length'].to_i | |
end | |
return hostInfo | |
end | |
def greenText(text) | |
"\e[32m #{text} \e[0m" | |
end | |
def redText(text) | |
"\e[31m #{text} \e[0m" | |
end | |
def yellowText(text) | |
"\e[33m #{text} \e[0m" | |
end | |
def blueText(text) | |
"\e[34m #{text} \e[0m" | |
end | |
end | |
rubyGet = RubyGet.new(ARGV[0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment