|
#!/usr/bin/env ruby |
|
require 'open3' |
|
require 'fileutils' |
|
require 'tempfile' |
|
|
|
CITIES_POPULATION = 500 |
|
LANGS = %w(ru en uk zh tr) |
|
IN_LANGS_QUERY_STR = LANGS.map{|w| "'#{w}'"}.join(',') |
|
|
|
def run_command(command) |
|
puts("+: " + command) |
|
|
|
Open3.popen2e(command) do |stdin, stdout_stderr, wait_thread| |
|
Thread.new do |
|
stdout_stderr.each { |l| puts l } |
|
end |
|
wait_thread.value |
|
end |
|
end |
|
|
|
def create_temp_file_and_run_sql(sql) |
|
file = Tempfile.new('tempsql') |
|
begin |
|
file.write(sql) |
|
file.flush |
|
run_command("psql --username=postgres -d geonames -f #{file.path}") |
|
ensure |
|
file.close |
|
file.unlink |
|
end |
|
end |
|
|
|
def download(directory) |
|
FileUtils.mkdir_p(directory) |
|
|
|
Dir.chdir(directory) do |
|
%W( |
|
http://download.geonames.org/export/dump/admin1CodesASCII.txt |
|
http://download.geonames.org/export/dump/admin2Codes.txt |
|
http://download.geonames.org/export/dump/allCountries.zip |
|
http://download.geonames.org/export/dump/alternateNames.zip |
|
http://download.geonames.org/export/dump/countryInfo.txt |
|
http://download.geonames.org/export/dump/iso-languagecodes.txt |
|
http://download.geonames.org/export/dump/featureCodes_en.txt |
|
http://download.geonames.org/export/dump/cities#{CITIES_POPULATION}.zip |
|
).each do |url| |
|
filename = url.split('/').last |
|
unzip = filename.split('.').last == 'zip' |
|
|
|
next if File.exists?(filename.sub('.zip', '')) || File.exists?(filename.sub('.zip', '.txt')) |
|
|
|
run_command("curl -s -o #{filename} #{url}") |
|
|
|
if unzip |
|
run_command("unzip #{filename}") |
|
FileUtils.rm(filename) |
|
end |
|
end |
|
end |
|
end |
|
|
|
def patch(directory) |
|
Dir.chdir(directory) do |
|
countryinfo = File.open('countryInfo.txt').read |
|
new_lines = [] |
|
countryinfo.lines.each do |line| |
|
next if line.start_with?('#') |
|
|
|
new_lines << line |
|
end |
|
File.open('countryInfo_patched.txt', 'w+').write(new_lines.join) |
|
|
|
featurecodes = File.open('featureCodes_en.txt').read |
|
new_lines = [] |
|
featurecodes.lines.each do |line| |
|
next if line.start_with?('null') |
|
|
|
fcode, label, description = line.split("\t") |
|
code, fclass = fcode.split('.') |
|
|
|
new_lines << [code, fclass, fcode, label, description].join("\t") |
|
end |
|
File.open('featureCodes_en_patched.txt', 'w+').write(new_lines.join) |
|
|
|
languagecodes = File.open('iso-languagecodes.txt').read |
|
first_line = languagecodes.lines.first |
|
new_lines = [] |
|
languagecodes.lines.each do |line| |
|
next if line == first_line |
|
|
|
new_lines << line |
|
end |
|
File.open('iso-languagecodes_patched.txt', 'w+').write(new_lines.join) |
|
|
|
admin1codes = File.open('admin1CodesASCII.txt').read |
|
new_lines = [] |
|
admin1codes.lines.each do |line| |
|
code, name, alt_name_english, geonameid = line.split("\t") |
|
countrycode, admin1_code = code.split('.') |
|
|
|
new_lines << [code, countrycode, admin1_code, name, alt_name_english, geonameid].join("\t") |
|
end |
|
File.open('admin1CodesASCII_patched.txt', 'w+').write(new_lines.join) |
|
|
|
admin2codes = File.open('admin2Codes.txt').read |
|
new_lines = [] |
|
admin2codes.lines.each do |line| |
|
code, name, alt_name_english, geonameid = line.split("\t") |
|
countrycode, admin1_code, admin2_code = code.split('.') |
|
|
|
new_lines << [code, countrycode, admin1_code, admin2_code, name, alt_name_english, geonameid].join("\t") |
|
end |
|
File.open('admin2Codes_patched.txt', 'w+').write(new_lines.join) |
|
end |
|
end |
|
|
|
def setup_database |
|
run_command("createdb --username=postgres geonames") |
|
run_command("createlang --username=postgres plpgsql geonames") |
|
|
|
%w( |
|
/usr/share/postgresql/9.6/contrib/postgis-2.5/postgis.sql |
|
/usr/share/postgresql/9.6/contrib/postgis-2.5/spatial_ref_sys.sql |
|
/usr/share/postgresql/9.6/contrib/postgis-2.5/postgis_comments.sql |
|
).each do |postgis_file| |
|
raise "Postgis file don't exist: #{postgis_file}! :(" unless File.exist?(postgis_file) |
|
run_command("psql --username=postgres -d geonames -f #{postgis_file}") |
|
end |
|
end |
|
|
|
def setup_tables |
|
setup_queries = <<-SQL |
|
CREATE EXTENSION "postgis"; |
|
|
|
CREATE TABLE geoname ( |
|
geonameid int, |
|
name varchar(200), |
|
asciiname varchar(200), |
|
alternatenames varchar(10000), |
|
latitude float, |
|
longitude float, |
|
fclass char(1), |
|
fcode varchar(10), |
|
country varchar(2), |
|
cc2 text, |
|
admin1 varchar(20), |
|
admin2 varchar(80), |
|
admin3 varchar(20), |
|
admin4 varchar(20), |
|
population bigint, |
|
elevation int, |
|
gtopo30 int, |
|
timezone varchar(40), |
|
moddate date |
|
); |
|
ALTER TABLE ONLY geoname ADD CONSTRAINT pk_geonameid PRIMARY KEY (geonameid); |
|
CREATE INDEX index_geoname_on_name ON geoname USING btree (name); |
|
|
|
CREATE TABLE alternatename ( |
|
alternatenameid int, |
|
geonameid int, |
|
isoLanguage varchar(7), |
|
alternatename varchar(200), |
|
ispreferredname boolean, |
|
isshortname boolean, |
|
iscolloquial boolean, |
|
ishistoric boolean |
|
); |
|
ALTER TABLE ONLY alternatename ADD CONSTRAINT pk_alternatenameid PRIMARY KEY (alternatenameid); |
|
|
|
CREATE TABLE admin1codes ( |
|
code varchar(11), |
|
countrycode char(3), |
|
admin1_code varchar(10), |
|
name varchar(200), |
|
alt_name_english varchar(200), |
|
geonameid int |
|
); |
|
ALTER TABLE ONLY admin1codes ADD CONSTRAINT pk_admin1id PRIMARY KEY (geonameid); |
|
|
|
CREATE TABLE admin2codes ( |
|
code varchar(50), |
|
countrycode char(2), |
|
admin1_code varchar(200), |
|
admin2_code varchar(200), |
|
name varchar(200), |
|
alt_name_english varchar(200), |
|
geonameid int |
|
); |
|
ALTER TABLE ONLY admin2codes ADD CONSTRAINT pk_admin2id PRIMARY KEY (geonameid); |
|
|
|
CREATE TABLE countryinfo ( |
|
iso_alpha2 char(2), |
|
iso_alpha3 char(3), |
|
iso_numeric integer, |
|
fips_code varchar(3), |
|
name varchar(200), |
|
capital varchar(200), |
|
areainsqkm double precision, |
|
population integer, |
|
continent varchar(2), |
|
tld varchar(10), |
|
currencycode varchar(3), |
|
currencyname varchar(20), |
|
phone varchar(20), |
|
postalcode varchar(100), |
|
postalcoderegex varchar(200), |
|
languages varchar(200), |
|
geonameid int, |
|
neighbors varchar(50), |
|
equivfipscode varchar(3) |
|
); |
|
ALTER TABLE ONLY countryinfo ADD CONSTRAINT pk_iso_alpha2 PRIMARY KEY (iso_alpha2); |
|
|
|
CREATE TABLE featurecodes ( |
|
code varchar(1), |
|
class varchar(10), |
|
fcode varchar (10), |
|
label varchar(100), |
|
description varchar(1000) |
|
); |
|
ALTER TABLE ONLY featurecodes ADD CONSTRAINT pk_fcode PRIMARY KEY (fcode); |
|
|
|
CREATE TABLE languagecodes ( |
|
iso_639_3 varchar(10), |
|
iso_639_2 varchar(10), |
|
iso_639_1 varchar(2), |
|
name varchar(1000) |
|
); |
|
ALTER TABLE ONLY languagecodes ADD CONSTRAINT pk_languageid PRIMARY KEY (iso_639_3); |
|
|
|
ALTER TABLE ONLY countryinfo ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid); |
|
ALTER TABLE ONLY alternatename ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid); |
|
|
|
CREATE TABLE cities ( |
|
geonameid int, |
|
name varchar(200), |
|
asciiname varchar(200), |
|
alternatenames varchar(10000), |
|
latitude float, |
|
longitude float, |
|
fclass char(1), |
|
fcode varchar(10), |
|
country varchar(2), |
|
cc2 text, |
|
admin1 varchar(20), |
|
admin2 varchar(80), |
|
admin3 varchar(20), |
|
admin4 varchar(20), |
|
population bigint, |
|
elevation int, |
|
gtopo30 int, |
|
timezone varchar(40), |
|
moddate date |
|
); |
|
ALTER TABLE ONLY cities ADD CONSTRAINT pk_cities_geonameid PRIMARY KEY (geonameid); |
|
CREATE INDEX index_cities_on_name ON cities USING btree (name); |
|
SQL |
|
create_temp_file_and_run_sql(setup_queries) |
|
end |
|
|
|
def populate(directory) |
|
directory = File.join(Dir.pwd, directory) |
|
|
|
queries = [] |
|
queries << "copy geoname (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,gtopo30,timezone,moddate) from '#{directory}/allCountries.txt' null as '';" |
|
queries << "copy cities (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,gtopo30,timezone,moddate) from '#{directory}/cities#{CITIES_POPULATION}.txt' null as '';" |
|
queries << "copy alternatename (alternatenameid,geonameid,isolanguage,alternatename,ispreferredname,isshortname,iscolloquial,ishistoric) from '#{directory}/alternateNames.txt' null as '';" |
|
queries << "copy countryinfo (iso_alpha2,iso_alpha3,iso_numeric,fips_code,name,capital,areainsqkm,population,continent,tld,currencycode,currencyname,phone,postalcode,postalcoderegex,languages,geonameid,neighbors,equivfipscode) from '#{directory}/countryInfo_patched.txt' null as '';" |
|
queries << "copy featurecodes (code, class, fcode, label, description) from '#{directory}/featureCodes_en_patched.txt' null as '';" |
|
queries << "copy languagecodes (iso_639_3, iso_639_2, iso_639_1, name) from '#{directory}/iso-languagecodes_patched.txt' null as '';" |
|
queries << "copy admin1codes (code, countrycode, admin1_code, name, alt_name_english, geonameid) from '#{directory}/admin1CodesASCII_patched.txt' null as '';" |
|
queries << "copy admin2codes (code, countrycode, admin1_code, admin2_code, name, alt_name_english, geonameid) from '#{directory}/admin2Codes_patched.txt' null as '';" |
|
|
|
queries.each do |populate_sql| |
|
create_temp_file_and_run_sql(populate_sql) |
|
end |
|
end |
|
|
|
def create_geometry |
|
geometry = "SELECT AddGeometryColumn ('public','geoname','geometry',4326,'POINT',2); |
|
UPDATE geoname SET geometry = ST_PointFromText('POINT(' || longitude || ' ' || latitude || ')', 4326); |
|
CREATE INDEX idx_geoname_geometry ON public.geoname USING gist(geometry);" |
|
|
|
create_temp_file_and_run_sql(geometry) |
|
end |
|
|
|
def drop_db |
|
puts 'drop db' |
|
run_command("dropdb --if-exists --username=postgres geonames") |
|
end |
|
|
|
def create_material_view |
|
query = <<-SQL |
|
DROP MATERIALIZED VIEW IF EXISTS l_cities; |
|
CREATE MATERIALIZED VIEW l_cities AS |
|
select |
|
geonameid as id, |
|
name as base_name, |
|
asciiname as ascii_name, |
|
country as country_code, |
|
ARRAY [alternatenames] as alternate_names, |
|
admin1 as state_code, |
|
admin2 as county_code, |
|
latitude, |
|
longitude, |
|
timezone |
|
from cities; |
|
|
|
DROP MATERIALIZED VIEW IF EXISTS l_city_translations; |
|
CREATE MATERIALIZED VIEW l_city_translations AS |
|
select |
|
geonameid as l_city_id, |
|
isolanguage as locale, |
|
alternatename as name |
|
from alternatename |
|
where EXISTS( |
|
SELECT |
|
FROM cities |
|
WHERE cities.geonameid = alternatename.geonameid |
|
) and isolanguage in (#{IN_LANGS_QUERY_STR}); |
|
|
|
DROP MATERIALIZED VIEW IF EXISTS l_states; |
|
CREATE MATERIALIZED VIEW l_states AS |
|
select |
|
geonameid as id, |
|
countrycode as country_code, |
|
admin1_code as state_code, |
|
name as base_name, |
|
alt_name_english as base_alt_name |
|
from admin1codes; |
|
|
|
DROP MATERIALIZED VIEW IF EXISTS l_states_translations; |
|
CREATE MATERIALIZED VIEW l_state_translations AS |
|
select |
|
geonameid as l_state_id, |
|
isolanguage as locale, |
|
alternatename as name |
|
from alternatename |
|
where EXISTS( |
|
SELECT |
|
FROM admin1codes |
|
WHERE admin1codes.geonameid = alternatename.geonameid |
|
) and isolanguage in (#{IN_LANGS_QUERY_STR}); |
|
|
|
|
|
DROP MATERIALIZED VIEW IF EXISTS l_counties; |
|
CREATE MATERIALIZED VIEW l_counties AS |
|
select |
|
geonameid as id, |
|
countrycode as country_code, |
|
admin1_code as state_code, |
|
admin2_code as county_code, |
|
name as base_name, |
|
alt_name_english as base_alt_name |
|
from admin2codes; |
|
|
|
DROP MATERIALIZED VIEW IF EXISTS l_county_translations; |
|
CREATE MATERIALIZED VIEW l_county_translations AS |
|
select |
|
geonameid as l_county_id, |
|
isolanguage as locale, |
|
alternatename as name |
|
from alternatename |
|
where EXISTS( |
|
SELECT |
|
FROM admin2codes |
|
WHERE admin2codes.geonameid = alternatename.geonameid |
|
) and isolanguage in (#{IN_LANGS_QUERY_STR}); |
|
SQL |
|
create_temp_file_and_run_sql query |
|
end |
|
|
|
download('tmp_geonames') |
|
patch('tmp_geonames') |
|
drop_db if ENV['DROP_DB'] == 'true' |
|
setup_database |
|
setup_tables |
|
populate('tmp_geonames') |
|
create_geometry |
|
|
|
create_material_view |