Skip to content

Instantly share code, notes, and snippets.

@D3MZ
Last active December 16, 2015 06:19
Show Gist options
  • Save D3MZ/5390631 to your computer and use it in GitHub Desktop.
Save D3MZ/5390631 to your computer and use it in GitHub Desktop.
Maxmind
#They have really stupid encoding, and Ruby's CSV parser is equally as retarded.
#This script won't generate cleanest data because I'm ignoring whatever encoding they're using, but it's okay to insert into Mongo.
require 'pp'
require 'mongo'
require 'iso_country_codes'
require 'parallel'
class Maxmind
include Mongo
def initialize csv_path
@coll = MongoClient.new('localhost', 27017, pool_timeout: 300)['google']['cities']
@path = csv_path
end
def parsed
file = File.open(@path, "r:ISO-8859-1")
headers = file.first.chomp.downcase.split(',')
cities = []
file.each_line {|l| cities << Hash[headers.zip l.chomp.encode('UTF-8', :invalid => :replace).split(',')]}
cities
end
def normalized city
hash = {
"_id" => [city["latitude"],city["longitude"]].join(","),
"uri" => "http://www.maxmind.com/en/worldcities",
"date" => Time.now,
"country_code" => city["country"],
"city" => city["city"],
"accent_city" => city["accentcity"],
"region" => city["region"],
"geometry" => {"location" => {"latitude" => city["latitude"],
"longitude" => city["longitude"]} },
"population" => city["population"].to_i
}
hash["country"] = IsoCountryCodes.find(city["country"]).name rescue nil
hash
end
def in_db? hash
raise("no _id") if hash["_id"].nil?
[email protected]_one("_id"=>hash["_id"],fields:{_id:1}).nil?
end
def insert_uniq hash
@coll.insert hash
rescue Mongo::OperationFailure
end
def dump_into_database
parsed.collect { |city| insert_uniq normalized city }
end
def parallel_dump_into_database
Parallel.each(parsed) { |city| insert_uniq normalized city }
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment