-
-
Save ahoward/7f00432fc00baa48ae51acb7929647c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # encoding: utf-8 | |
| # | |
| companies = Company.where(:locale => 'ko') | |
| domains = Domain.where(:company_id.in => companies.map(&:id)) | |
| domains.destroy_all | |
| companies.destroy_all | |
| # | |
| ko_csv = "#{ Rails.root }/data/ko.csv" | |
| buf = File.read(ko_csv) | |
| # | |
| require 'csv' | |
| csv = CSV.parse(buf) | |
| # | |
| headers = %w[ | |
| title url cei _a energy_transparency _b renewable_energy_commitment _c renewable_energy_champion re oil hydro amount_gas amount_coal amount_nuclear _d _e grade locale twitter_handle_en service_host_en twitter_handle_ko service_host_ko competitor_1 competitor_2 competitor_3 domains | |
| ] | |
| # | |
| header = nil | |
| rows = [] | |
| # | |
| csv.each do |row| | |
| unless header | |
| header = row | |
| else | |
| hash = Hash[*headers.zip(row).flatten] | |
| rows.push(Map.for(hash)) | |
| end | |
| end | |
| # | |
| companies = [] | |
| competitors = {} | |
| rows.each do |row| | |
| # | |
| companies.push( company = Company.new ) | |
| # | |
| %w[ title url cei energy_transparency grade locale domains ].each do |key| | |
| company[key] = Coerce.string row[key] | |
| end | |
| %w[ amount_gas amount_coal amount_nuclear ].each do |key| | |
| company[key] = row[key].to_i | |
| end | |
| %w[ renewable_energy_commitment renewable_energy_champion ].each do |key| | |
| company[key] = Coerce.boolean row[key] | |
| end | |
| # | |
| company[:twitter_handle] = row["twitter_handle_ko"] | |
| company[:service_host] = row["service_host_ko"] | |
| # | |
| translation = company.translations.build :locale => :en | |
| translation.twitter_handle = row["twitter_handle_en"] | |
| translation.service_host = row["service_host_en"] | |
| # | |
| names = [] | |
| Coerce.list_of_strings(company[:domains]).each do |domain| | |
| names.push(domain.gsub('*.', '')) | |
| names.push(domain) | |
| end | |
| names.uniq! | |
| names.each do |name| | |
| domain = company.domains.build(:name => name) | |
| end | |
| # | |
| company.raw_score ||= 0 | |
| # | |
| unless company.url.blank? | |
| company.url = company.url.downcase | |
| end | |
| # | |
| if company.twitter_handle.blank? | |
| company.twitter_handle = company.url | |
| end | |
| # | |
| competitors[company.id] = | |
| Coerce.list_of_strings( | |
| row['competitor_1'], | |
| row['competitor_2'], | |
| row['competitor_3'] | |
| ) | |
| end | |
| # | |
| companies.each do |company| | |
| puts('='*79) | |
| p company.title | |
| p company.as_document | |
| p company.translations.map(&:as_document) | |
| p company.domains.map(&:as_document) | |
| company.save! | |
| company.domains.each{|domain| domain.save!} | |
| end | |
| # | |
| competitors.each do |company_id, list| | |
| company = Company.find(company_id) | |
| competitor_ids = [] | |
| list.each do |url| | |
| domain = Domain.best_match_query_for(url) | |
| if domain | |
| competitor_ids.push(domain.company_id) | |
| end | |
| end | |
| company.set(:competitor_ids, competitor_ids) | |
| end | |
| %w[ | |
| title | |
| url | |
| cei | |
| energy_transparency | |
| grade | |
| locale | |
| domains | |
| amount_gas | |
| amount_coal | |
| amount_nuclear | |
| renewable_energy_commitment | |
| renewable_energy_champion | |
| twitter_handle_en | |
| service_host_en | |
| twitter_handle_ko | |
| service_host_ko | |
| competitor_1 | |
| competitor_2 | |
| competitor_3 | |
| ] | |
| <<-__ | |
| {"id"=>"560ac3570fa604968800016f", | |
| "_id"=>"560ac3570fa604968800016f", | |
| "name"=>"equinix_com", | |
| "title"=>"Equinix.com", | |
| "slug"=>"equinix-com", | |
| "description"=>nil, | |
| "url"=>"http://equinix.com", | |
| "energy_transparency"=>true, | |
| "renewable_energy_commitment"=>true, | |
| "renewable_energy_champion"=>false, | |
| "cei"=>15, | |
| "amount_gas"=>29, | |
| "amount_coal"=>29, | |
| "amount_nuclear"=>29, | |
| "raw_score"=>nil, | |
| "grade"=>"C", | |
| "twitter_handle"=>"equinix", | |
| "service_host"=>"Equinix.com", | |
| "competitor_ids"=> | |
| ["560ac3570fa6049688000169", | |
| "560ac3530fa6049688000043", | |
| "560ac3520fa6049688000010"], | |
| "locale"=>"en", | |
| "created_at"=>Tue, 29 Sep 2015 16:59:03 UTC +00:00, | |
| "updated_at"=>Mon, 20 Jul 2015 15:43:51 UTC +00:00} | |
| {"title"=>"nuclearwebsite", | |
| "url"=>"http://greenpeacekorea.org/nonuke", | |
| "cei"=>"100", | |
| "_a"=>"25", | |
| "energy_transparency"=>"True", | |
| "_b"=>"25", | |
| "renewable_energy_commitment"=>"TRUE", | |
| "_c"=>"25", | |
| "renewable_energy_champion"=>"TRUE", | |
| "re"=>"100", | |
| "oil"=>nil, | |
| "hydro"=>nil, | |
| "amount_gas"=>nil, | |
| "amount_coal"=>nil, | |
| "amount_nuclear"=>nil, | |
| "_d"=>nil, | |
| "_e"=>nil, | |
| "grade"=>"A", | |
| "lang"=>"ko", | |
| "twitter_handle_en"=>"@greenpeacekorea", | |
| "service_host_en"=>"Google", | |
| "twitter_handle_ko"=>"@greenpeacekorea", | |
| "service_host_ko"=>"구글", | |
| "competitor_1"=>nil, | |
| "competitor_2"=>nil, | |
| "competitor_3"=>nil, | |
| "domains"=>"*.greenpeacekorea.org"} | |
| __ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment