Created
August 12, 2021 15:25
-
-
Save akanik/f74218fc854d1e525fa34263d1c42d7a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data_dir10 = '../data/2010/tx2010.pl/' | |
FILES10 = { | |
'cols': '../data/2010/2010_PLSummaryFile_FieldNames.xlsx', | |
'geoheader': data_dir10+'txgeo2010.pl', | |
'seq01': data_dir10+'tx000012010.pl', | |
'seq02': data_dir10+'tx000022010.pl', | |
} | |
def return_dtypes_dict(df): | |
return dict(zip(df['Name'], df['dtype'])) | |
#################################### | |
## READ IN THE GEOHEADER | |
#################################### | |
geo_defs10 = pd.read_excel(FILES10['cols'],sheet_name='geo') | |
geo_cols10 = geo_defs10['Name'].tolist() | |
geo_dtypes10 = return_dtypes_dict(geo_defs10) | |
geo_widths10 = geo_defs10['Size'].astype(int).tolist() | |
gh_df10 = pd.read_fwf( | |
FILES10['geoheader'], | |
encoding='latin1', | |
header=None, | |
names=geo_cols10, | |
widths=geo_widths10, | |
dtype=geo_dtypes10 | |
) | |
#creating this slice so we can join on the human-readable geo name | |
gh_df10_place_hr = gh_df10[['LOGRECNO','NAME','STATE','PLACE']] | |
gh_df10_cnty_hr = gh_df10[['LOGRECNO','NAME','STATE','COUNTY']] | |
#################################### | |
## READ IN P1 | |
#################################### | |
p1_defs10 = pd.read_excel(FILES10['cols'],sheet_name='p1') | |
p1_cols10 = p1_defs10['Name'].tolist() | |
p1_dtypes10 = return_dtypes_dict(p1_defs10) | |
p1_df10 = pd.read_csv( | |
FILES10['seq01'], | |
header=None, | |
names=p1_cols10, | |
dtype=p1_dtypes10 | |
) | |
#################################### | |
## READ IN P2 | |
#################################### | |
p2_defs10 = pd.read_excel(FILES10['cols'],sheet_name='p2') | |
p2_cols10 = p2_defs10['Name'].tolist() | |
p2_dtypes10 = return_dtypes_dict(p2_defs10) | |
p2_df10 = pd.read_csv( | |
FILES10['seq02'], | |
header=None, | |
names=p2_cols10, | |
dtype=p2_dtypes10 | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment