funzoneq · July 22, 2016 14:13
diff --git a/fetch.rb b/fetch.rb
 require 'optparse'
 require 'httparty'
 require 'pp'

 options = { domain: nil }

 optparse = OptionParser.new do |opts|
  opts.banner = "Usage: example.rb [options]"

  opts.on("-dDOMAIN", "--domain=DOMAIN", "Domain to download") do |n|
    options[:domain] = n
  end

  opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
    options[:verbose] = v
  end

  opts.on("-h", "--help", "Prints this help") do
    puts opts
    exit
  end
 end.parse!

 if options[:domain].nil?
  puts "You need to specify a domain to graze content from. Eg. fetch.rb -d leaseweb.com"
  puts optparse
  exit
 end

 def wget(domain)
  "wget --recursive \
     -A html \
     --no-clobber \
     --html-extension \
     --convert-links \
     --restrict-file-names=windows \
     --domains #{domain} \
     --no-parent \
     http://#{domain}"
 end

 def find_html_files(domain)
  "find ./#{domain} -name '*.html' -type f -print0 | xargs -0 grep -o -E '\\w+' | sort -u"
 end

 # TODO: check how old the file is
 if !File.exist?('tlds-alpha-by-domain.txt')
  response = HTTParty.get('https://data.iana.org/TLD/tlds-alpha-by-domain.txt')
  File.write('tlds-alpha-by-domain.txt', response.parsed_response)
 end

 tlds = File.read('tlds-alpha-by-domain.txt').split("\n").keep_if{ |t| not t =~ /^#/ }.map{ |t| t.downcase }

 command = wget(options[:domain])
 #puts exec(command)

 command = find_html_files(options[:domain])
 words = `#{command}`.split("\n").map {|d| d.downcase }.uniq

 puts words.length

 tlds.each do |tld|
  words.each do |word|
    if word =~ /#{tld}$/i and tld != word
      domain = "#{word.gsub(tld, '')}.#{tld}"
      begin
        response = HTTParty.get("http://nslookup.io/free/#{domain}")
        puts "#{domain} = #{response.parsed_response['Free']}"
      rescue
        puts "failed for #{domain}"
      end
    end
  end
 end
	require 'optparse'
	require 'httparty'
	require 'pp'

	options = { domain: nil }

	optparse = OptionParser.new do \|opts\|
	opts.banner = "Usage: example.rb [options]"

	opts.on("-dDOMAIN", "--domain=DOMAIN", "Domain to download") do \|n\|
	options[:domain] = n
	end

	opts.on("-v", "--[no-]verbose", "Run verbosely") do \|v\|
	options[:verbose] = v
	end

	opts.on("-h", "--help", "Prints this help") do
	puts opts
	exit
	end
	end.parse!

	if options[:domain].nil?
	puts "You need to specify a domain to graze content from. Eg. fetch.rb -d leaseweb.com"
	puts optparse
	exit
	end

	def wget(domain)
	"wget --recursive \
	-A html \
	--no-clobber \
	--html-extension \
	--convert-links \
	--restrict-file-names=windows \
	--domains #{domain} \
	--no-parent \
	http://#{domain}"
	end

	def find_html_files(domain)
	"find ./#{domain} -name '*.html' -type f -print0 \| xargs -0 grep -o -E '\\w+' \| sort -u"
	end

	# TODO: check how old the file is
	if !File.exist?('tlds-alpha-by-domain.txt')
	response = HTTParty.get('https://data.iana.org/TLD/tlds-alpha-by-domain.txt')
	File.write('tlds-alpha-by-domain.txt', response.parsed_response)
	end

	tlds = File.read('tlds-alpha-by-domain.txt').split("\n").keep_if{ \|t\| not t =~ /^#/ }.map{ \|t\| t.downcase }

	command = wget(options[:domain])
	#puts exec(command)

	command = find_html_files(options[:domain])
	words = `#{command}`.split("\n").map {\|d\| d.downcase }.uniq

	puts words.length

	tlds.each do \|tld\|
	words.each do \|word\|
	if word =~ /#{tld}$/i and tld != word
	domain = "#{word.gsub(tld, '')}.#{tld}"
	begin
	response = HTTParty.get("http://nslookup.io/free/#{domain}")
	puts "#{domain} = #{response.parsed_response['Free']}"
	rescue
	puts "failed for #{domain}"
	end
	end
	end
	end
No results found