Created
September 6, 2015 08:46
-
-
Save cjwinchester/319ef17b7f26a9bba513 to your computer and use it in GitHub Desktop.
Download, unzip and parse Census TIGER files for U.S. places, return a pipe-delimited text file with lat/lng.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| wget -O fips_ref.txt http://www2.census.gov/geo/docs/reference/state.txt | |
| wget --mirror --continue --no-directories ftp://ftp2.census.gov/geo/tiger/TIGER2015/PLACE/ | |
| for f in *.zip | |
| do | |
| unzip $f | |
| done | |
| echo "state_fips,place_fips,place,lat,lng" >> places.txt | |
| REC_COUNTER=0 | |
| for f in *.dbf | |
| do | |
| in2csv -f dbf -v $f | tail -n +2 | csvcut -c 1,2,5,15,16 >> places.txt | |
| DBF="$(file -b $f)" | |
| DB_SIZE="$(echo $DBF | grep -Eo "[0-9]+ records" | sed 's/ records//g')" | |
| let REC_COUNTER+=$DB_SIZE | |
| done | |
| # compare record count | |
| echo "Database records: $REC_COUNTER" | |
| wc -l places.txt | |
| # The DBF files are ASCII encoded, and a few had non-ASCII characters that borked on in2csv conversion. Here, we pause to manually convert those files to UTF-8 and add to places.txt. | |
| # join on FIPS file to get state names | |
| csvcut -c 1,3 -d "|" fips_ref.txt > states.txt | |
| csvjoin -c "state_fips,STATE" places.txt states.txt | csvcut -c 1,2,3,4,5,7 | csvsort -c 1 | csvformat -D "|" > us_places.txt | |
| # clean up | |
| rm .listing fips_ref.txt places.txt | |
| for f in *.dbf *.shp *.xml *.cpg *.prj *.shx | |
| do | |
| rm $f | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment