Created
March 28, 2018 19:51
-
-
Save a8dx/7e9d5af24101fc66aafa739577713b59 to your computer and use it in GitHub Desktop.
Create a better ZIP5-County lookup table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Filename: ZIP5_County_Lookup.do | |
Author: Anthony D'Agostino (ald at satanford dot edu) | |
Date Created: 02/09/2018 | |
Last Edited: 03/27/2018 | |
Purpose: Generate a merged zip5-county crosswalk that leverages the strengths of both the US Census and HUD ZCTA crosswalks. | |
*/ | |
clear | |
clear all | |
clear matrix | |
set more off | |
set maxvar 25000, permanently | |
loc basePath "<your path here>" | |
// read in county codes, downloaded from: https://www.census.gov/geo/reference/codes/cou.html | |
import delimited using "`basePath'/national_county.txt", delim(",") clear stringcols(_all) varnames(nonames) | |
ren v1 state | |
ren v2 state_fips | |
ren v3 county_fips | |
ren v4 county_name | |
ren v5 fipsclasscode | |
gen county = state_fips + county_fips | |
tempfile counties | |
save `counties' | |
*import excel using "`basePath'/ZIP_COUNTY_032011.xlsx", firstrow clear | |
import excel using "`basePath'/ZIP_COUNTY_122017.xlsx", firstrow clear | |
// 39455 unique entries before any transformations | |
merge m:1 county using `counties', update replace | |
tab _merge | |
drop if _merge == 2 | |
drop _merge | |
gsort zip -tot_ratio | |
bys zip: gen totOrder = _n | |
gsort zip -res_ratio | |
bys zip: gen resOrder = _n | |
tw (hist res_ratio if resOrder == 1), xtitle("Residential Address Percent") graphregion(color(white) lwidth(large)) | |
keep if resOrder == 1 | |
// some basic cleaning | |
replace county_name = "Oglala Lakota, SD" if county == "46102" | |
replace state = "SD" if county == "46102" | |
replace state_fips = "46" if county == "46102" | |
replace county_fips = "102" if county == "46102" | |
// see http://www.nws.noaa.gov/om/notification/scn17-57kusilvak_ak.htm | |
replace county_name = "Kusilvak Census Area" if county == "02158" | |
replace state = "AK" if county == "02158" | |
replace state_fips = "02" if county == "02158" | |
replace county_fips = "158" if county == "02158" | |
drop resOrder totOrder | |
ren zip zip5 | |
destring, replace | |
tempfile zip5_county | |
save `zip5_county' | |
import delimited using "`basePath'/zcta_county_rel_10.txt", delim(",") clear stringcols(_all) // varnames(nonames) | |
** 33120 unique ZCTA5 | |
ren state state_fips_rel | |
ren county county_fips_rel | |
gen county = state_fips_rel + county_fips_rel | |
merge m:1 county using `counties', update replace | |
tab _merge | |
drop if _merge == 2 | |
drop _merge | |
destring, replace | |
ren state state_rel | |
ren county county_rel | |
ren county_name county_name_rel | |
gen popabove50 = cond(zpoppct >= 50, 1, 0) | |
// apportionment: keep county matches with largest pop ZCTA5 share | |
gsort zcta5 -poppt | |
bys zcta5: gen popOrder = _n | |
keep if popOrder == 1 | |
ren zpop pop_zip5 | |
ren zcta5 zip5 | |
keep zip5 pop_zip5 zpoppct county_rel state_rel county_name_rel state_fips_rel county_fips_rel | |
merge 1:1 zip5 using `zip5_county', update replace | |
tab _merge | |
drop _merge | |
// prioritize Census values, replace with HUD where missing | |
foreach x in "county" "state_fips" "county_fips" { | |
gen `x'_final = `x'_rel | |
replace `x'_final = `x' if `x'_final == . | |
drop `x' | |
ren `x'_final `x' | |
} | |
foreach x in "county_name" "state" { | |
gen `x'_final = `x'_rel | |
replace `x'_final = `x' if `x'_final == "" | |
drop `x' | |
ren `x'_final `x' | |
} | |
keep zip5 county county_fips county_name state state_fips | |
save "`basePath'/ZIP5_County_Crosswalk", replace |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment