Created
May 31, 2018 01:23
-
-
Save cdesch/92c5b03235704dce2d4978d13cbc3b9d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'mechanize' | |
require 'uri' | |
require 'csv' | |
#this script looks up vendors from a seed file and creates a new seed file with the domain | |
# using the first google search result returned | |
def get_host_without_www(url) | |
uri = URI.parse(url) | |
uri = URI.parse("http://#{url}") if uri.scheme.nil? | |
host = uri.host.downcase | |
host.start_with?('www.') ? host[4..-1] : host | |
end | |
def get_domain_for_company(name) | |
agent = Mechanize.new | |
#agent.set_proxy '78.186.178.153', 8080 | |
page = agent.get('http://www.google.com/') | |
google_form = page.form('f') | |
google_form.q = name | |
page = agent.submit(google_form, google_form.buttons.first) | |
page.links.each do |link| | |
if link.href.to_s =~/url.q/ | |
str=link.href.to_s | |
strList=str.split(%r{=|&}) | |
url=strList[1] | |
#puts url | |
#puts get_host_without_www(url) | |
return get_host_without_www(url) | |
end | |
end | |
end | |
#puts get_domain_for_company('Reflect Media') | |
# puts "Seeding #{__method__.to_s}" | |
# | |
CSV.open("new_operators_seed.csv", "ab") do |csv| | |
#puts hashes.first.keys | |
csv << %w(id name domain) | |
CSV.foreach('db/operators_seed.csv', headers: true) do |row| | |
#vendor = GeoPathVendor.create!(name: row['name'], operator_id: row['id']) | |
# if vendor.name == 'Lamar' || vendor.name = 'Clear Channel' | |
# generate_vendor_users(vendor) | |
# end | |
# | |
# row['name'] | |
data = row.clone | |
begin | |
data['domain'] = get_domain_for_company(row['name']) | |
rescue | |
puts "could not get #{row['name']} domain" | |
data['domain'] = '' | |
end | |
csv << [data['id'], data['name'], data['domain']] | |
end | |
end | |
def write_hashes_to_file_panel(hashes, filename) | |
#attributes_to_scrub = %w(county media_type plant media_type operator panel_segments markets) | |
#attributes_to_scrub.map { |x| x.to_sym } | |
CSV.open("#{filename}.csv", "ab", headers: hashes.first.keys) do |csv| | |
#puts hashes.first.keys | |
csv << %w(id name domain) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment