Created
November 8, 2020 06:34
-
-
Save eladmeidar/2b3834a0b6d7ae9e1a60557bcb3fa3f2 to your computer and use it in GitHub Desktop.
Simple ruby script to fetch possible logos from a domain.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Usage: ruby fetch_logos.rb <FULL_DOMAIN> | |
require 'httparty' | |
require 'nokogiri' | |
require 'css_parser' | |
require 'byebug' | |
class Array | |
def self.wrap(object) | |
if !object.is_a?(Array) | |
[object].flatten | |
else | |
object | |
end | |
end | |
end | |
domain = ARGV[0] | |
html = HTTParty.get(domain).body | |
# Usage: ruby fetch_logos.rb http://github.com | |
results = [] | |
response = Nokogiri::HTML(html) | |
# First try to find IMG src attributes that have the word 'logo' in them, usually indicating | |
# It's a logo | |
imgs = response.xpath("//img/@src").collect(&:value).select {|v| v.include?('logo')} | |
imgs.select {|link| ["jpg", "jpeg", "png", "svg"].any? {|ext| link.include?(ext)}}.each do |img| | |
img = [domain, img].join unless img.include?("http") | |
results << img | |
end | |
# Try to collect IMG src from elements that have 'header' or 'logo' in their CSS id or class | |
containers = response.xpath("//*[contains(normalize-space(@class), 'logo')]//img/@src").collect(&:value) | |
containers << response.xpath("//*[contains(normalize-space(@id), 'header')]//img/@src").collect(&:value) | |
containers << response.xpath("//*[contains(normalize-space(@id), 'logo')]//img/@src").collect(&:value) | |
containers << response.xpath("//*[contains(normalize-space(@class), 'header')]//img/@src").collect(&:value) | |
containers << response.xpath("//header//img/@src").collect(&:value) | |
containers.flatten.select {|link| ["jpg", "jpeg", "png", "svg"].any? {|ext| link.include?(ext)} }.each do |img| | |
img = [domain, img].join unless img.include?("http") | |
results << img | |
end | |
# Extract all url(*) from CSS and check for images with 'logo' in them | |
css_files = response.xpath('//link[@type="text/css"]/@href').collect(&:value) | |
css_files.each do |css_file| | |
if css_file.include?("http") | |
css_content = HTTParty.get("#{css_file}").body | |
else | |
css_content = HTTParty.get("#{domain}#{css_file}").body | |
end | |
parser = CssParser::Parser.new | |
parser.load_string!(css_content) | |
# byebug | |
parser.each_selector do |selector, declarations, specificity| | |
Array.wrap(declarations.match(/url\('?"?([^']+)'?"?\)/i)).compact.select {|url| url[0].downcase.include?('logo') }.each do |url| | |
img = [domain, url[1]].join unless url[1].include?("http") | |
results << img | |
end | |
end | |
end | |
puts JSON.dump(results.uniq) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment