Last active
December 16, 2015 06:19
-
-
Save D3MZ/5390631 to your computer and use it in GitHub Desktop.
Maxmind
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#They have really stupid encoding, and Ruby's CSV parser is equally as retarded. | |
#This script won't generate cleanest data because I'm ignoring whatever encoding they're using, but it's okay to insert into Mongo. | |
require 'pp' | |
require 'mongo' | |
require 'iso_country_codes' | |
require 'parallel' | |
class Maxmind | |
include Mongo | |
def initialize csv_path | |
@coll = MongoClient.new('localhost', 27017, pool_timeout: 300)['google']['cities'] | |
@path = csv_path | |
end | |
def parsed | |
file = File.open(@path, "r:ISO-8859-1") | |
headers = file.first.chomp.downcase.split(',') | |
cities = [] | |
file.each_line {|l| cities << Hash[headers.zip l.chomp.encode('UTF-8', :invalid => :replace).split(',')]} | |
cities | |
end | |
def normalized city | |
hash = { | |
"_id" => [city["latitude"],city["longitude"]].join(","), | |
"uri" => "http://www.maxmind.com/en/worldcities", | |
"date" => Time.now, | |
"country_code" => city["country"], | |
"city" => city["city"], | |
"accent_city" => city["accentcity"], | |
"region" => city["region"], | |
"geometry" => {"location" => {"latitude" => city["latitude"], | |
"longitude" => city["longitude"]} }, | |
"population" => city["population"].to_i | |
} | |
hash["country"] = IsoCountryCodes.find(city["country"]).name rescue nil | |
hash | |
end | |
def in_db? hash | |
raise("no _id") if hash["_id"].nil? | |
[email protected]_one("_id"=>hash["_id"],fields:{_id:1}).nil? | |
end | |
def insert_uniq hash | |
@coll.insert hash | |
rescue Mongo::OperationFailure | |
end | |
def dump_into_database | |
parsed.collect { |city| insert_uniq normalized city } | |
end | |
def parallel_dump_into_database | |
Parallel.each(parsed) { |city| insert_uniq normalized city } | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment