Skip to content

Instantly share code, notes, and snippets.

@doromones
Forked from kaspergrubbe/geonames_postgres.rb
Last active June 3, 2019 16:57
Show Gist options
  • Select an option

  • Save doromones/ef202727e654f58783513d672838479e to your computer and use it in GitHub Desktop.

Select an option

Save doromones/ef202727e654f58783513d672838479e to your computer and use it in GitHub Desktop.
Import files from Geonames.com into a PostgreSQL database that runs Postgis

Assumes the following:

Can authenticate to database by using the username postgres without a password.

Does the following:

Setup:

Initialize database and run it:

pg_ctl init -D vendor/postgresql
postgres -D vendor/postgresql

Setup user:

psql -p 5432 -h localhost -d postgres -c "CREATE USER postgres SUPERUSER;"

Run the script:

bundle ruby geonames_postgres.rb

Observe that the thing is working:

Make a query:

SELECT * FROM geoname ORDER BY geometry <-> ST_GeogFromText('POINT(12.026589 55.952455)') LIMIT 1

It should tell you something like:

2618269	Kregme	Kregme	Kregme,Kregome	55.94439	12.03401	P	PPL	DK		17	260			0		25	Europe/Copenhagen	2012-01-17	0101000020E610000053793BC269112840B05582C5E1F84B40
select
    geonameid as id,
    name as base_name,
    asciiname as ascii_name,
    country as country_code,
    ARRAY [alternatenames] as alternate_names,
    latitude,
    longitude,
    timezone
from cities;

select geonameid as city_id, isolanguage as locale, alternatename as name
from alternatename
where EXISTS(
        SELECT
        FROM cities
        WHERE cities.geonameid = alternatename.geonameid
    ) and isolanguage in ('ru', 'en', 'uk', 'zh', 'tr');
#!/usr/bin/env ruby
require 'open3'
require 'fileutils'
require 'tempfile'
CITIES_POPULATION = 500
LANGS = %w(ru en uk zh tr)
IN_LANGS_QUERY_STR = LANGS.map{|w| "'#{w}'"}.join(',')
def run_command(command)
puts("+: " + command)
Open3.popen2e(command) do |stdin, stdout_stderr, wait_thread|
Thread.new do
stdout_stderr.each { |l| puts l }
end
wait_thread.value
end
end
def create_temp_file_and_run_sql(sql)
file = Tempfile.new('tempsql')
begin
file.write(sql)
file.flush
run_command("psql --username=postgres -d geonames -f #{file.path}")
ensure
file.close
file.unlink
end
end
def download(directory)
FileUtils.mkdir_p(directory)
Dir.chdir(directory) do
%W(
http://download.geonames.org/export/dump/admin1CodesASCII.txt
http://download.geonames.org/export/dump/admin2Codes.txt
http://download.geonames.org/export/dump/allCountries.zip
http://download.geonames.org/export/dump/alternateNames.zip
http://download.geonames.org/export/dump/countryInfo.txt
http://download.geonames.org/export/dump/iso-languagecodes.txt
http://download.geonames.org/export/dump/featureCodes_en.txt
http://download.geonames.org/export/dump/cities#{CITIES_POPULATION}.zip
).each do |url|
filename = url.split('/').last
unzip = filename.split('.').last == 'zip'
next if File.exists?(filename.sub('.zip', '')) || File.exists?(filename.sub('.zip', '.txt'))
run_command("curl -s -o #{filename} #{url}")
if unzip
run_command("unzip #{filename}")
FileUtils.rm(filename)
end
end
end
end
def patch(directory)
Dir.chdir(directory) do
countryinfo = File.open('countryInfo.txt').read
new_lines = []
countryinfo.lines.each do |line|
next if line.start_with?('#')
new_lines << line
end
File.open('countryInfo_patched.txt', 'w+').write(new_lines.join)
featurecodes = File.open('featureCodes_en.txt').read
new_lines = []
featurecodes.lines.each do |line|
next if line.start_with?('null')
fcode, label, description = line.split("\t")
code, fclass = fcode.split('.')
new_lines << [code, fclass, fcode, label, description].join("\t")
end
File.open('featureCodes_en_patched.txt', 'w+').write(new_lines.join)
languagecodes = File.open('iso-languagecodes.txt').read
first_line = languagecodes.lines.first
new_lines = []
languagecodes.lines.each do |line|
next if line == first_line
new_lines << line
end
File.open('iso-languagecodes_patched.txt', 'w+').write(new_lines.join)
admin1codes = File.open('admin1CodesASCII.txt').read
new_lines = []
admin1codes.lines.each do |line|
code, name, alt_name_english, geonameid = line.split("\t")
countrycode, admin1_code = code.split('.')
new_lines << [code, countrycode, admin1_code, name, alt_name_english, geonameid].join("\t")
end
File.open('admin1CodesASCII_patched.txt', 'w+').write(new_lines.join)
admin2codes = File.open('admin2Codes.txt').read
new_lines = []
admin2codes.lines.each do |line|
code, name, alt_name_english, geonameid = line.split("\t")
countrycode, admin1_code, admin2_code = code.split('.')
new_lines << [code, countrycode, admin1_code, admin2_code, name, alt_name_english, geonameid].join("\t")
end
File.open('admin2Codes_patched.txt', 'w+').write(new_lines.join)
end
end
def setup_database
run_command("createdb --username=postgres geonames")
run_command("createlang --username=postgres plpgsql geonames")
%w(
/usr/share/postgresql/9.6/contrib/postgis-2.5/postgis.sql
/usr/share/postgresql/9.6/contrib/postgis-2.5/spatial_ref_sys.sql
/usr/share/postgresql/9.6/contrib/postgis-2.5/postgis_comments.sql
).each do |postgis_file|
raise "Postgis file don't exist: #{postgis_file}! :(" unless File.exist?(postgis_file)
run_command("psql --username=postgres -d geonames -f #{postgis_file}")
end
end
def setup_tables
setup_queries = <<-SQL
CREATE EXTENSION "postgis";
CREATE TABLE geoname (
geonameid int,
name varchar(200),
asciiname varchar(200),
alternatenames varchar(10000),
latitude float,
longitude float,
fclass char(1),
fcode varchar(10),
country varchar(2),
cc2 text,
admin1 varchar(20),
admin2 varchar(80),
admin3 varchar(20),
admin4 varchar(20),
population bigint,
elevation int,
gtopo30 int,
timezone varchar(40),
moddate date
);
ALTER TABLE ONLY geoname ADD CONSTRAINT pk_geonameid PRIMARY KEY (geonameid);
CREATE INDEX index_geoname_on_name ON geoname USING btree (name);
CREATE TABLE alternatename (
alternatenameid int,
geonameid int,
isoLanguage varchar(7),
alternatename varchar(200),
ispreferredname boolean,
isshortname boolean,
iscolloquial boolean,
ishistoric boolean
);
ALTER TABLE ONLY alternatename ADD CONSTRAINT pk_alternatenameid PRIMARY KEY (alternatenameid);
CREATE TABLE admin1codes (
code varchar(11),
countrycode char(3),
admin1_code varchar(10),
name varchar(200),
alt_name_english varchar(200),
geonameid int
);
ALTER TABLE ONLY admin1codes ADD CONSTRAINT pk_admin1id PRIMARY KEY (geonameid);
CREATE TABLE admin2codes (
code varchar(50),
countrycode char(2),
admin1_code varchar(200),
admin2_code varchar(200),
name varchar(200),
alt_name_english varchar(200),
geonameid int
);
ALTER TABLE ONLY admin2codes ADD CONSTRAINT pk_admin2id PRIMARY KEY (geonameid);
CREATE TABLE countryinfo (
iso_alpha2 char(2),
iso_alpha3 char(3),
iso_numeric integer,
fips_code varchar(3),
name varchar(200),
capital varchar(200),
areainsqkm double precision,
population integer,
continent varchar(2),
tld varchar(10),
currencycode varchar(3),
currencyname varchar(20),
phone varchar(20),
postalcode varchar(100),
postalcoderegex varchar(200),
languages varchar(200),
geonameid int,
neighbors varchar(50),
equivfipscode varchar(3)
);
ALTER TABLE ONLY countryinfo ADD CONSTRAINT pk_iso_alpha2 PRIMARY KEY (iso_alpha2);
CREATE TABLE featurecodes (
code varchar(1),
class varchar(10),
fcode varchar (10),
label varchar(100),
description varchar(1000)
);
ALTER TABLE ONLY featurecodes ADD CONSTRAINT pk_fcode PRIMARY KEY (fcode);
CREATE TABLE languagecodes (
iso_639_3 varchar(10),
iso_639_2 varchar(10),
iso_639_1 varchar(2),
name varchar(1000)
);
ALTER TABLE ONLY languagecodes ADD CONSTRAINT pk_languageid PRIMARY KEY (iso_639_3);
ALTER TABLE ONLY countryinfo ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid);
ALTER TABLE ONLY alternatename ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid);
CREATE TABLE cities (
geonameid int,
name varchar(200),
asciiname varchar(200),
alternatenames varchar(10000),
latitude float,
longitude float,
fclass char(1),
fcode varchar(10),
country varchar(2),
cc2 text,
admin1 varchar(20),
admin2 varchar(80),
admin3 varchar(20),
admin4 varchar(20),
population bigint,
elevation int,
gtopo30 int,
timezone varchar(40),
moddate date
);
ALTER TABLE ONLY cities ADD CONSTRAINT pk_cities_geonameid PRIMARY KEY (geonameid);
CREATE INDEX index_cities_on_name ON cities USING btree (name);
SQL
create_temp_file_and_run_sql(setup_queries)
end
def populate(directory)
directory = File.join(Dir.pwd, directory)
queries = []
queries << "copy geoname (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,gtopo30,timezone,moddate) from '#{directory}/allCountries.txt' null as '';"
queries << "copy cities (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,gtopo30,timezone,moddate) from '#{directory}/cities#{CITIES_POPULATION}.txt' null as '';"
queries << "copy alternatename (alternatenameid,geonameid,isolanguage,alternatename,ispreferredname,isshortname,iscolloquial,ishistoric) from '#{directory}/alternateNames.txt' null as '';"
queries << "copy countryinfo (iso_alpha2,iso_alpha3,iso_numeric,fips_code,name,capital,areainsqkm,population,continent,tld,currencycode,currencyname,phone,postalcode,postalcoderegex,languages,geonameid,neighbors,equivfipscode) from '#{directory}/countryInfo_patched.txt' null as '';"
queries << "copy featurecodes (code, class, fcode, label, description) from '#{directory}/featureCodes_en_patched.txt' null as '';"
queries << "copy languagecodes (iso_639_3, iso_639_2, iso_639_1, name) from '#{directory}/iso-languagecodes_patched.txt' null as '';"
queries << "copy admin1codes (code, countrycode, admin1_code, name, alt_name_english, geonameid) from '#{directory}/admin1CodesASCII_patched.txt' null as '';"
queries << "copy admin2codes (code, countrycode, admin1_code, admin2_code, name, alt_name_english, geonameid) from '#{directory}/admin2Codes_patched.txt' null as '';"
queries.each do |populate_sql|
create_temp_file_and_run_sql(populate_sql)
end
end
def create_geometry
geometry = "SELECT AddGeometryColumn ('public','geoname','geometry',4326,'POINT',2);
UPDATE geoname SET geometry = ST_PointFromText('POINT(' || longitude || ' ' || latitude || ')', 4326);
CREATE INDEX idx_geoname_geometry ON public.geoname USING gist(geometry);"
create_temp_file_and_run_sql(geometry)
end
def drop_db
puts 'drop db'
run_command("dropdb --if-exists --username=postgres geonames")
end
def create_material_view
query = <<-SQL
DROP MATERIALIZED VIEW IF EXISTS l_cities;
CREATE MATERIALIZED VIEW l_cities AS
select
geonameid as id,
name as base_name,
asciiname as ascii_name,
country as country_code,
ARRAY [alternatenames] as alternate_names,
admin1 as state_code,
admin2 as county_code,
latitude,
longitude,
timezone
from cities;
DROP MATERIALIZED VIEW IF EXISTS l_city_translations;
CREATE MATERIALIZED VIEW l_city_translations AS
select
geonameid as l_city_id,
isolanguage as locale,
alternatename as name
from alternatename
where EXISTS(
SELECT
FROM cities
WHERE cities.geonameid = alternatename.geonameid
) and isolanguage in (#{IN_LANGS_QUERY_STR});
DROP MATERIALIZED VIEW IF EXISTS l_states;
CREATE MATERIALIZED VIEW l_states AS
select
geonameid as id,
countrycode as country_code,
admin1_code as state_code,
name as base_name,
alt_name_english as base_alt_name
from admin1codes;
DROP MATERIALIZED VIEW IF EXISTS l_states_translations;
CREATE MATERIALIZED VIEW l_state_translations AS
select
geonameid as l_state_id,
isolanguage as locale,
alternatename as name
from alternatename
where EXISTS(
SELECT
FROM admin1codes
WHERE admin1codes.geonameid = alternatename.geonameid
) and isolanguage in (#{IN_LANGS_QUERY_STR});
DROP MATERIALIZED VIEW IF EXISTS l_counties;
CREATE MATERIALIZED VIEW l_counties AS
select
geonameid as id,
countrycode as country_code,
admin1_code as state_code,
admin2_code as county_code,
name as base_name,
alt_name_english as base_alt_name
from admin2codes;
DROP MATERIALIZED VIEW IF EXISTS l_county_translations;
CREATE MATERIALIZED VIEW l_county_translations AS
select
geonameid as l_county_id,
isolanguage as locale,
alternatename as name
from alternatename
where EXISTS(
SELECT
FROM admin2codes
WHERE admin2codes.geonameid = alternatename.geonameid
) and isolanguage in (#{IN_LANGS_QUERY_STR});
SQL
create_temp_file_and_run_sql query
end
download('tmp_geonames')
patch('tmp_geonames')
drop_db if ENV['DROP_DB'] == 'true'
setup_database
setup_tables
populate('tmp_geonames')
create_geometry
create_material_view
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment