Last active
March 1, 2023 06:15
-
-
Save caged/6943733 to your computer and use it in GitHub Desktop.
Shell script to import data into Postgres from https://www.nhgis.org. If you use this, you'll need to edit line 14 to include the census data you're importing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# usage ./create-psql-database DATABASE | |
# | |
DB_NAME=$1 | |
FOLDER_NAME="nhgis0004" | |
# Drop and recreate database | |
dropdb --interactive "${DB_NAME}" | |
createdb --encoding UTF8 $DB_NAME | |
# Create the POSTGIS tables and data table | |
psql -d $DB_NAME -c 'CREATE EXTENSION postgis' | |
psql -d $DB_NAME -c "CREATE TABLE ${DB_NAME}_data (GISJOIN varchar(15),YEAR varchar(9), REGION varchar(30),REGIONA integer,DIVISION varchar(30),DIVISIONA integer,STATE varchar(30),STATEA integer,COUNTY text,COUNTYA integer,COUSUB varchar(30),COUSUBA integer,PLACE varchar(30),PLACEA integer,TRACTA integer,BLKGRPA integer,CONCITA integer,RES_ONLYA integer,AIANHH integer,AIANHHA integer,TRUSTA integer,AITSCEA integer,ANRCA integer,CBSA integer,CBSAA integer,CSA integer,CSAA integer,METDIV integer,METDIVA integer,NECTA integer,NECTAA integer,CNECTA integer,CNECTAA integer,NECTADIV integer,NECTADIVA integer,UA integer,UAA integer,CDCURRA integer,SLDUA integer,SLDLA integer,ZCTA5A integer,SUBMCDA integer,SDELM integer,SDELMA integer,SDSEC integer,SDSECA integer,SDUNI integer,SDUNIA integer,PUMA5A integer,NAME text,BTTRA integer,BTBGA integer,MNUE001 integer,MNUE002 integer,MNUE003 integer,MNUE004 integer,MNUE005 integer,MNUE006 integer,MNUE007 integer,MNUE008 integer,MNUE009 integer,MNUE010 integer,MN3E001 integer,MN3E002 integer,MN3E003 integer,MNUM001 integer,MNUM002 integer,MNUM003 integer,MNUM004 integer,MNUM005 integer,MNUM006 integer,MNUM007 integer,MNUM008 integer,MNUM009 integer,MNUM010 integer,MN3M001 integer,MN3M002 integer,MN3M003 integer)" | |
tar -xzm --totals -C /tmp -f data/us-blockgroup-race.zip | |
tar -xzm --totals -C /tmp -f data/us-blockgroup-shape.zip | |
# Create some temporary directories for reprojecting shape files | |
mkdir /tmp/census-shapefiles | |
mkdir /tmp/reprojected-census-shapefiles | |
# Extract individual state shapefiles to the same directory | |
for file in /tmp/nhgis0004_shape/*.zip; do \ | |
tar -xzm -C /tmp/census-shapefiles -f $file | |
done | |
# Iterate through all the shapefiles and append them to the table | |
for file in /tmp/census-shapefiles/*.shp; do \ | |
file_name=$(basename $file) | |
ogr2ogr -f 'ESRI Shapefile' -t_srs EPSG:4326 "/tmp/reprojected-census-shapefiles/${file_name}" $file | |
#shp2pgsql -a -s 4326 -i -D $file us_census_blockgroups | psql $DB_NAME | |
done | |
# We need to ensure content is UTF-8 | |
iconv -c -f UTF-8 -t UTF-8 < "/tmp/nhgis0004_csv/nhgis0004_ds184_20115_2011_blck_grp.csv" > "/tmp/nhgis0004_csv/nhgis0004_ds184_20115_2011_blck_grp-utf8.csv" | |
# Import UTF-8 CSV race data | |
psql -d $DB_NAME -c "COPY ${DB_NAME}_data FROM '/tmp/nhgis0004_csv/nhgis0004_ds184_20115_2011_blck_grp-utf8.csv' WITH CSV HEADER" | |
# Create initial table schema and then delete the data so we can iterate through | |
# all the files later to append them to the table | |
first_file=$(find /tmp/reprojected-census-shapefiles -name "*.shp" | head -n 1) | |
shp2pgsql -s 4326 -i -D $first_file "${DB_NAME}_blockgroups" | psql $DB_NAME | |
psql -c "delete from ${DB_NAME}_blockgroups" $DB_NAME | |
# Iterate through all the shapefiles and append them to the table | |
for file in /tmp/reprojected-census-shapefiles/*.shp; do \ | |
file_name=$(basename $file) | |
echo $file_name | |
shp2pgsql -a -s 4326 -i -D $file us_census_blockgroups | psql $DB_NAME | |
done | |
# Create the index | |
psql -c "create index ${DB_NAME}_blockgroups_gix on ${DB_NAME}_blockgroups using gist (geom)" $DB_NAME | |
rm -rf /tmp/census-shapefiles | |
rm -rf /tmp/reprojected-census-shapefiles |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment