Skip to content

Instantly share code, notes, and snippets.

@ahoward
Created September 20, 2016 22:56
Show Gist options
  • Select an option

  • Save ahoward/7f00432fc00baa48ae51acb7929647c8 to your computer and use it in GitHub Desktop.

Select an option

Save ahoward/7f00432fc00baa48ae51acb7929647c8 to your computer and use it in GitHub Desktop.
# encoding: utf-8
#
companies = Company.where(:locale => 'ko')
domains = Domain.where(:company_id.in => companies.map(&:id))
domains.destroy_all
companies.destroy_all
#
ko_csv = "#{ Rails.root }/data/ko.csv"
buf = File.read(ko_csv)
#
require 'csv'
csv = CSV.parse(buf)
#
headers = %w[
title url cei _a energy_transparency _b renewable_energy_commitment _c renewable_energy_champion re oil hydro amount_gas amount_coal amount_nuclear _d _e grade locale twitter_handle_en service_host_en twitter_handle_ko service_host_ko competitor_1 competitor_2 competitor_3 domains
]
#
header = nil
rows = []
#
csv.each do |row|
unless header
header = row
else
hash = Hash[*headers.zip(row).flatten]
rows.push(Map.for(hash))
end
end
#
companies = []
competitors = {}
rows.each do |row|
#
companies.push( company = Company.new )
#
%w[ title url cei energy_transparency grade locale domains ].each do |key|
company[key] = Coerce.string row[key]
end
%w[ amount_gas amount_coal amount_nuclear ].each do |key|
company[key] = row[key].to_i
end
%w[ renewable_energy_commitment renewable_energy_champion ].each do |key|
company[key] = Coerce.boolean row[key]
end
#
company[:twitter_handle] = row["twitter_handle_ko"]
company[:service_host] = row["service_host_ko"]
#
translation = company.translations.build :locale => :en
translation.twitter_handle = row["twitter_handle_en"]
translation.service_host = row["service_host_en"]
#
names = []
Coerce.list_of_strings(company[:domains]).each do |domain|
names.push(domain.gsub('*.', ''))
names.push(domain)
end
names.uniq!
names.each do |name|
domain = company.domains.build(:name => name)
end
#
company.raw_score ||= 0
#
unless company.url.blank?
company.url = company.url.downcase
end
#
if company.twitter_handle.blank?
company.twitter_handle = company.url
end
#
competitors[company.id] =
Coerce.list_of_strings(
row['competitor_1'],
row['competitor_2'],
row['competitor_3']
)
end
#
companies.each do |company|
puts('='*79)
p company.title
p company.as_document
p company.translations.map(&:as_document)
p company.domains.map(&:as_document)
company.save!
company.domains.each{|domain| domain.save!}
end
#
competitors.each do |company_id, list|
company = Company.find(company_id)
competitor_ids = []
list.each do |url|
domain = Domain.best_match_query_for(url)
if domain
competitor_ids.push(domain.company_id)
end
end
company.set(:competitor_ids, competitor_ids)
end
%w[
title
url
cei
energy_transparency
grade
locale
domains
amount_gas
amount_coal
amount_nuclear
renewable_energy_commitment
renewable_energy_champion
twitter_handle_en
service_host_en
twitter_handle_ko
service_host_ko
competitor_1
competitor_2
competitor_3
]
<<-__
{"id"=>"560ac3570fa604968800016f",
"_id"=>"560ac3570fa604968800016f",
"name"=>"equinix_com",
"title"=>"Equinix.com",
"slug"=>"equinix-com",
"description"=>nil,
"url"=>"http://equinix.com",
"energy_transparency"=>true,
"renewable_energy_commitment"=>true,
"renewable_energy_champion"=>false,
"cei"=>15,
"amount_gas"=>29,
"amount_coal"=>29,
"amount_nuclear"=>29,
"raw_score"=>nil,
"grade"=>"C",
"twitter_handle"=>"equinix",
"service_host"=>"Equinix.com",
"competitor_ids"=>
["560ac3570fa6049688000169",
"560ac3530fa6049688000043",
"560ac3520fa6049688000010"],
"locale"=>"en",
"created_at"=>Tue, 29 Sep 2015 16:59:03 UTC +00:00,
"updated_at"=>Mon, 20 Jul 2015 15:43:51 UTC +00:00}
{"title"=>"nuclearwebsite",
"url"=>"http://greenpeacekorea.org/nonuke",
"cei"=>"100",
"_a"=>"25",
"energy_transparency"=>"True",
"_b"=>"25",
"renewable_energy_commitment"=>"TRUE",
"_c"=>"25",
"renewable_energy_champion"=>"TRUE",
"re"=>"100",
"oil"=>nil,
"hydro"=>nil,
"amount_gas"=>nil,
"amount_coal"=>nil,
"amount_nuclear"=>nil,
"_d"=>nil,
"_e"=>nil,
"grade"=>"A",
"lang"=>"ko",
"twitter_handle_en"=>"@greenpeacekorea",
"service_host_en"=>"Google",
"twitter_handle_ko"=>"@greenpeacekorea",
"service_host_ko"=>"구글",
"competitor_1"=>nil,
"competitor_2"=>nil,
"competitor_3"=>nil,
"domains"=>"*.greenpeacekorea.org"}
__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment