-
-
Save EspadaV8/1357237 to your computer and use it in GitHub Desktop.
| #!/bin/bash | |
| #=============================================================================== | |
| # | |
| # FILE: getgeo.sh | |
| # | |
| # USAGE: ./getgeo.sh | |
| # | |
| # DESCRIPTION: run the script so that the geodata will be downloaded and inserted into your | |
| # database | |
| # | |
| # OPTIONS: --- | |
| # REQUIREMENTS: --- | |
| # BUGS: --- | |
| # NOTES: --- | |
| # AUTHOR: Andreas (aka Harpagophyt ) | |
| # COMPANY: <a href="http://forum.geonames.org/gforum/posts/list/926.page" target="_blank" rel="nofollow">http://forum.geonames.org/gforum/posts/list/926.page</a> | |
| # VERSION: 1.3 | |
| # CREATED: 07/06/2008 | |
| # REVISION: 1.1 2008-06-07 replace COPY continentCodes through INSERT statements. | |
| # 1.2 2008-11-25 Adjusted by Bastiaan Wakkie in order to not unnessisarily | |
| # download. | |
| # 1.3 2011-08-07 Updated script with tree changes. Removes 2 obsolete records from "countryinfo" dump image, | |
| # updated timeZones table with raw_offset and updated postalcode to varchar(20). | |
| #=============================================================================== | |
| #!/bin/bash | |
| WORKPATH="${HOME}/tmp/GIS/gisnames/geodata" | |
| TMPPATH="tmp" | |
| PCPATH="pc" | |
| PREFIX="_" | |
| DBHOST="127.0.0.1" | |
| DBPORT="5432" | |
| DBUSER="postgres" | |
| FILES="allCountries.zip alternateNames.zip userTags.zip admin1CodesASCII.txt admin2Codes.txt countryInfo.txt featureCodes_en.txt iso-languagecodes.txt timeZones.txt" | |
| psql -U $DBUSER -h $DBHOST -p $DBPORT -c "CREATE DATABASE geonames WITH TEMPLATE = template0 ENCODING = 'UTF8';" | |
| psql -U $DBUSER -h $DBHOST -p $DBPORT geonames <<EOT | |
| DROP TABLE geoname CASCADE; | |
| CREATE TABLE geoname ( | |
| geonameid INT, | |
| name VARCHAR(200), | |
| asciiname VARCHAR(200), | |
| alternatenames VARCHAR(6000), | |
| latitude FLOAT, | |
| longitude FLOAT, | |
| fclass CHAR(1), | |
| fcode VARCHAR(10), | |
| country VARCHAR(2), | |
| cc2 VARCHAR(60), | |
| admin1 VARCHAR(20), | |
| admin2 VARCHAR(80), | |
| admin3 VARCHAR(20), | |
| admin4 VARCHAR(20), | |
| population BIGINT, | |
| elevation INT, | |
| gtopo30 INT, | |
| timezone VARCHAR(40), | |
| moddate DATE | |
| ); | |
| DROP TABLE alternatename; | |
| CREATE TABLE alternatename ( | |
| alternatenameId INT, | |
| geonameid INT, | |
| isoLanguage VARCHAR(7), | |
| alternateName VARCHAR(300), | |
| isPreferredName BOOLEAN, | |
| isShortName BOOLEAN | |
| ); | |
| DROP TABLE countryinfo; | |
| CREATE TABLE "countryinfo" ( | |
| iso_alpha2 CHAR(2), | |
| iso_alpha3 CHAR(3), | |
| iso_numeric INTEGER, | |
| fips_code CHARACTER VARYING(3), | |
| country CHARACTER VARYING(200), | |
| capital CHARACTER VARYING(200), | |
| areainsqkm DOUBLE PRECISION, | |
| population INTEGER, | |
| continent CHAR(2), | |
| tld CHAR(10), | |
| currency_code CHAR(3), | |
| currency_name CHAR(15), | |
| phone CHARACTER VARYING(20), | |
| postal CHARACTER VARYING(60), | |
| postalRegex CHARACTER VARYING(200), | |
| languages CHARACTER VARYING(200), | |
| geonameId INT, | |
| neighbours CHARACTER VARYING(50), | |
| equivalent_fips_code CHARACTER VARYING(3) | |
| ); | |
| DROP TABLE iso_languagecodes; | |
| CREATE TABLE iso_languagecodes( | |
| iso_639_3 CHAR(4), | |
| iso_639_2 VARCHAR(50), | |
| iso_639_1 VARCHAR(50), | |
| language_name VARCHAR(200) | |
| ); | |
| DROP TABLE admin1CodesAscii; | |
| CREATE TABLE admin1CodesAscii ( | |
| code CHAR(20), | |
| name TEXT, | |
| nameAscii TEXT, | |
| geonameid INT | |
| ); | |
| DROP TABLE admin2CodesAscii; | |
| CREATE TABLE admin2CodesAscii ( | |
| code CHAR(80), | |
| name TEXT, | |
| nameAscii TEXT, | |
| geonameid INT | |
| ); | |
| DROP TABLE featureCodes; | |
| CREATE TABLE featureCodes ( | |
| code CHAR(7), | |
| name VARCHAR(200), | |
| description TEXT | |
| ); | |
| DROP TABLE timeZones; | |
| CREATE TABLE timeZones ( | |
| timeZoneId VARCHAR(200), | |
| GMT_offset NUMERIC(3,1), | |
| DST_offset NUMERIC(3,1), | |
| raw_offset NUMERIC(3,1) | |
| ); | |
| DROP TABLE continentCodes; | |
| CREATE TABLE continentCodes ( | |
| code CHAR(2), | |
| name VARCHAR(20), | |
| geonameid INT | |
| ); | |
| DROP TABLE postalcodes; | |
| CREATE TABLE postalcodes ( | |
| countrycode CHAR(2), | |
| postalcode VARCHAR(20), | |
| placename VARCHAR(180), | |
| admin1name VARCHAR(100), | |
| admin1code VARCHAR(20), | |
| admin2name VARCHAR(100), | |
| admin2code VARCHAR(20), | |
| admin3name VARCHAR(100), | |
| admin3code VARCHAR(20), | |
| latitude FLOAT, | |
| longitude FLOAT, | |
| accuracy SMALLINT | |
| ); | |
| ALTER TABLE ONLY alternatename | |
| ADD CONSTRAINT pk_alternatenameid PRIMARY KEY (alternatenameid); | |
| ALTER TABLE ONLY geoname | |
| ADD CONSTRAINT pk_geonameid PRIMARY KEY (geonameid); | |
| ALTER TABLE ONLY countryinfo | |
| ADD CONSTRAINT pk_iso_alpha2 PRIMARY KEY (iso_alpha2); | |
| ALTER TABLE ONLY countryinfo | |
| ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid); | |
| ALTER TABLE ONLY alternatename | |
| ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid); | |
| EOT | |
| # check if needed directories do already exsist | |
| if [ -d "$WORKPATH" ]; then | |
| echo "$WORKPATH exists..." | |
| sleep 0 | |
| else | |
| echo "$WORKPATH and subdirectories will be created..." | |
| mkdir -p $WORKPATH/{$TMPPATH,$PCPATH} | |
| echo "created $WORKPATH" | |
| fi | |
| echo | |
| echo ",---- STARTING (downloading, unpacking and preparing)" | |
| cd $WORKPATH/$TMPPATH | |
| for i in $FILES | |
| do | |
| wget -N -q "http://download.geonames.org/export/dump/$i" # get newer files | |
| if [ $i -nt $PREFIX$i ] || [ ! -e $PREFIX$i ] ; then | |
| cp -p $i $PREFIX$i | |
| unzip -u -q $i | |
| case "$i" in | |
| iso-languagecodes.txt) | |
| tail -n +2 iso-languagecodes.txt > iso-languagecodes.txt.tmp; | |
| ;; | |
| countryInfo.txt) | |
| grep -v '^#' countryInfo.txt | head -n -2 > countryInfo.txt.tmp; | |
| ;; | |
| timeZones.txt) | |
| tail -n +2 timeZones.txt > timeZones.txt.tmp; | |
| ;; | |
| esac | |
| echo "| $1 has been downloaded"; | |
| else | |
| echo "| $i is already the latest version" | |
| fi | |
| done | |
| # download the postalcodes. You must know yourself the url | |
| cd $WORKPATH/$PCPATH | |
| wget -q -N "http://download.geonames.org/export/zip/allCountries.zip" | |
| if [ $WORKPATH/$PCPATH/allCountries.zip -nt $WORKPATH/$PCPATH/allCountries$PREFIX.zip ] || [ ! -e $WORKPATH/$PCPATH/allCountries.zip ]; then | |
| echo "Attempt to unzip $WORKPATH/$PCPATH/allCountries.zip file..." | |
| unzip -u -q $WORKPATH/$PCPATH/allCountries.zip | |
| cp -p $WORKPATH/$PCPATH/allCountries.zip $WORKPATH/$PCPATH/allCountries$PREFIX.zip | |
| echo "| ....zip has been downloaded" | |
| else | |
| echo "| ....zip is already the latest version" | |
| fi | |
| echo "+---- FILL DATABASE ( this takes 2 days on my machine )" | |
| psql -e -U $DBUSER -h $DBHOST -p $DBPORT geonames <<EOT | |
| copy geoname (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,gtopo30,timezone,moddate) from '${WORKPATH}/${TMPPATH}/allCountries.txt' null as ''; | |
| copy postalcodes (countrycode,postalcode,placename,admin1name,admin1code,admin2name,admin2code,admin3name,admin3code,latitude,longitude,accuracy) from '${WORKPATH}/${PCPATH}/allCountries.txt' null as ''; | |
| copy timeZones (timeZoneId,GMT_offset,DST_offset,raw_offset) from '${WORKPATH}/${TMPPATH}/timeZones.txt.tmp' null as ''; | |
| copy featureCodes (code,name,description) from '${WORKPATH}/${TMPPATH}/featureCodes_en.txt' null as ''; | |
| copy admin1CodesAscii (code,name,nameAscii,geonameid) from '${WORKPATH}/${TMPPATH}/admin1CodesASCII.txt' null as ''; | |
| copy admin2CodesAscii (code,name,nameAscii,geonameid) from '${WORKPATH}/${TMPPATH}/admin2Codes.txt' null as ''; | |
| copy iso_languagecodes (iso_639_3,iso_639_2,iso_639_1,language_name) from '${WORKPATH}/${TMPPATH}/iso-languagecodes.txt.tmp' null as ''; | |
| copy countryInfo (iso_alpha2,iso_alpha3,iso_numeric,fips_code,country,capital,areainsqkm,population,continent,tld,currency_code,currency_name,phone,postal,postalRegex,languages,geonameid,neighbours,equivalent_fips_code) from '${WORKPATH}/${TMPPATH}/countryInfo.txt.tmp' null as ''; | |
| copy alternatename (alternatenameid,geonameid,isoLanguage,alternateName,isPreferredName,isShortName) from '${WORKPATH}/${TMPPATH}/alternateNames.txt' null as ''; | |
| INSERT INTO continentCodes VALUES ('AF', 'Africa', 6255146); | |
| INSERT INTO continentCodes VALUES ('AS', 'Asia', 6255147); | |
| INSERT INTO continentCodes VALUES ('EU', 'Europe', 6255148); | |
| INSERT INTO continentCodes VALUES ('NA', 'North America', 6255149); | |
| INSERT INTO continentCodes VALUES ('OC', 'Oceania', 6255150); | |
| INSERT INTO continentCodes VALUES ('SA', 'South America', 6255151); | |
| INSERT INTO continentCodes VALUES ('AN', 'Antarctica', 6255152); | |
| CREATE INDEX index_countryinfo_geonameid ON countryinfo USING hash (geonameid); | |
| CREATE INDEX index_alternatename_geonameid ON alternatename USING hash (geonameid); | |
| EOT | |
| echo "'----- DONE ( have fun... )" |
in postgres documentation
If you desire to store long strings with no specific upper limit,
use text or character varying without a length specifier, rather
than making up an arbitrary length limit.)
I would just take off the (n) if you are in a recent version of postgres
In current file versions
grep -v '^#' countryInfo.txt | head -n -2 > countryInfo.txt.tmp;
should change to
grep -v '^#' countryInfo.txt > countryInfo.txt.tmp;
Hi!
I have the following issue:
SQL error:
ERROR: extra data after last expected column
CONTEXT: COPY postalcodes, line 1: "2986043 Pic de Font Blanca Pic de Font Blanca Pic de Font Blanca,Pic du Port 42.64991 1.53335 T PK A..."
In statement:
copy postalcodes (countrycode,postalcode,placename,admin1name,admin1code,admin2name,
admin2code,admin3name,admin3code,latitude,longitude,accuracy) from './allCountries.txt' null as '';
Same problem for table alternatename ... Extra data after last expected column ...
Simple Solution:
`
CREATE TABLE alternatename (
alternatenameId int,
geonameid int,
isoLanguage varchar(7),
alternateName varchar(300),
isPreferredName boolean,
isShortName boolean,
useless1 boolean,
useless2 boolean,
);
copy alternatename (alternatenameid,geonameid,isoLanguage,alternateName,isPreferredName,isShortName,isColloquial,isHistoric,useless1,useless2) from '${WORKPATH}/${TMPPATH}/alternateNames.txt' null as '';
ALTER TABLE "public"."alternatename"
DROP COLUMN "useless1",
DROP COLUMN "useless2";
`
Maybe not the best solution, but for me... it works
Hi!
I have the following issue:SQL error: ERROR: extra data after last expected column CONTEXT: COPY postalcodes, line 1: "2986043 Pic de Font Blanca Pic de Font Blanca Pic de Font Blanca,Pic du Port 42.64991 1.53335 T PK A..." In statement: copy postalcodes (countrycode,postalcode,placename,admin1name,admin1code,admin2name, admin2code,admin3name,admin3code,latitude,longitude,accuracy) from './allCountries.txt' null as '';
It is old post, but i noticed the allcountries.txt is not for postalcodes table?
My version with few changes:
- Added support for schema + database
- Removed portal codes
https://gist.github.com/WhoAteDaCake/37823722bdf27fc03527f5b54c0ca6f0
Hi,
I used your script to import geonames database.
You should change geoname.aternatenames TYPE with TEXT because import creates an error : "ERROR: value too long for type character varying(6000)"
I changed it and I had a correct import.
Nevertheless, thank you for your script it's very cool one.