Created
July 2, 2020 16:36
-
-
Save jpcoblentz/710ce986c92a4309ff8abcb275de4daa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import geopandas as gp | |
import re | |
from itertools import chain | |
medsl = pd.read_csv('/Users/jacobcoblentz/Downloads/2016-precinct-president.csv', encoding = 'LATIN1') | |
medsl = medsl[medsl.state_postal == 'NY'] | |
medsl = dict(tuple(medsl[['county_fips', 'precinct']].drop_duplicates().groupby('county_fips'))) | |
nyg = gp.read_file('/Users/jacobcoblentz/unfolded/voting/2016/projects/NY/NY_State_Full.geojson') | |
nyd = dict(tuple(nyg.groupby('county'))) | |
# 001 --albany | |
medsl[36001].precinct | |
tempED = medsl[36001].precinct.str.findall('ED([0-9]*)').str[-1] | |
tempW = medsl[36001].precinct.str.findall('W([0-9]*)').str[-1].fillna(0).astype(str) | |
# town name | |
tempT = medsl[36001].precinct.apply(lambda x: ''.join(x.split(' ')[1])[0:3]) | |
# merge to precinct Name | |
tempW | |
precName = tempT + tempW.str.rjust(3, '0') + tempED.str.rjust(3, '0') | |
medsl[36001]['MatchPrec'] = precName | |
nyd['001']['MatchPrec'] = nyd['001']['precinct'] | |
## 003 | |
medsl[36003]['MatchPrec'] = medsl[36003]['precinct'] | |
nyd['003']['MatchPrec'] = nyd['003']['precinct'] | |
## 005 -- ETC New_York_City | |
medsl[36005]['MatchPrec'] = medsl[36005].precinct.map(lambda x: ''.join(x.split('/')[::-1])) | |
nyd['005']['MatchPrec'] = nyd['005']['precinct'] | |
medsl[36047]['MatchPrec'] = medsl[36047].precinct.map(lambda x: ''.join(x.split('/')[::-1])) | |
nyd['047']['MatchPrec'] = nyd['047']['precinct'] | |
medsl[36061]['MatchPrec'] = medsl[36061].precinct.map(lambda x: ''.join(x.split('/')[::-1])) | |
nyd['061']['MatchPrec'] = nyd['061']['precinct'] | |
medsl[36081]['MatchPrec'] = medsl[36081].precinct.map(lambda x: ''.join(x.split('/')[::-1])) | |
nyd['081']['MatchPrec'] = nyd['081']['precinct'] | |
medsl[36085]['MatchPrec'] = medsl[36085].precinct.map(lambda x: ''.join(x.split('/')[::-1])) | |
nyd['085']['MatchPrec'] = nyd['085']['precinct'] | |
## 007 - Broome | |
medsl[36007]['MatchPrec'] = medsl[36007]['precinct'].str.replace('of Binghamton', '', regex = True).replace('Town of ', '', regex = True).replace('\s{2,}', ' ', regex = True) | |
nyd['007']['MatchPrec'] = nyd['007']['precinct'] | |
## 009 -- Cattaraugus | |
medsl[36009]['MatchPrec'] = medsl[36009]['precinct'] | |
nyd['009']['MatchPrec'] = nyd['009']['precinct'] | |
## 011 -- Cayuga | |
medsl[36011]['MatchPrec'] = [prec[0] for prec in medsl[36011]['precinct'].str.rsplit(' ', 1)] | |
medsl[36011]['MatchPrec'] = medsl[36011]['MatchPrec'].str.upper() | |
nyd['011']['MatchPrec'] = nyd['011']['precinct'] | |
## 019 -- Clinton | |
medsl[36019].precinct | |
# this is kind of hard, so using a loop | |
p = 'City of Plattsburgh W1P3' | |
tempP = medsl[36019].precinct | |
pList = [] | |
for p in tempP: | |
if 'City of' in p: | |
print(p) | |
ts = 'Ward' + ' ' + re.findall('W([0-9]*)', p)[-1] + '-' + re.findall('D|P([0-9]*)', p)[-1] | |
pList.append(ts) | |
else: | |
ts = ' '.join(p.split(' ')[:-1]) + ' ' + str(int(p.rsplit(' ', 1)[-1])) | |
pList.append(ts) | |
medsl[36019]['MatchPrec'] = pList | |
nyd['019']['MatchPrec'] = nyd['019']['precinct'] | |
## 023 -- cortland | |
medsl[36023]['precinct'] | |
nyd['023']['precinct'] | |
### # TODO: GET BACK TO THIS Oneida | |
## 027 -- Dutchess -- not possible | |
## 029 Erie | |
medsl[36029]['precinct'] | |
nyd['029']['MatchPrec'] = nyd['029']['precinct'].str.upper() | |
medsl[36029]['MatchPrec'] = medsl[36029]['precinct'].str.upper() | |
# 031 -- essex | |
nyd['031']['MatchPrec'] = nyd['031']['precinct'].str.upper() | |
medsl[36031]['MatchPrec'] = medsl[36031]['precinct'].str.upper() | |
# 043 Herkimer | |
# for loop to handle this | |
pList = [] | |
for p in medsl[36043].precinct.str.split(' '): | |
if 'City' in p: | |
pList.append(' '.join(p[2:4]) + ' ' + p[5] + '-' + p[6]) | |
else: | |
pList.append(' '.join(p[2:-2])) | |
medsl[36043]['MatchPrec'] = pList | |
nyd['043']['MatchPrec'] = nyd['043']['precinct'] | |
# 055 -- Monroe | |
medsl[36055]['MatchPrec'] = medsl[36055]['precinct'] | |
nyd['055']['MatchPrec'] = nyd['055']['precinct'] | |
# wtf nassau | |
# 36063 wtf niagra | |
# 36065 Onieda | |
medsl[36065]['MatchPrec'] = medsl[36065]['precinct'].str.upper() | |
nyd['065']['MatchPrec'] = nyd['065']['precinct'].str.upper() | |
# 3607 Fabius | |
medsl[36067]['precinct'] | |
nyd['067']['precinct'].unique() | |
medsl[36067]['MatchPrec'] = medsl[36067]['precinct'].str.upper() | |
nyd['067']['MatchPrec'] = nyd['067']['precinct'].str.upper() | |
# 36069 -- Ontario | |
medsl[36069]['precinct'] | |
medsl[36069]['precinct'] = medsl[36069]['precinct'].str.replace("Town of","").str.replace("City of","").str.replace("Ward","").str.replace(" ", " ") | |
nyd['069']['precinct'] = nyd['069']['precinct'].str.replace('-', " ") | |
medsl[36069]['MatchPrec'] = medsl[36069]['precinct'].str.upper() | |
nyd['069']['MatchPrec'] = nyd['069']['precinct'].str.upper() | |
# 36083 Rennselaer | |
medsl[36083]['precinct'] = medsl[36083]['precinct'].str.upper() | |
nyd['083']['precinct'] = nyd['083']['precinct'].str.replace("TH", "").str.replace("ST", "").str.replace(" - ", " ").str.replace("RD", "").str.replace("ND", "") | |
medsl[36083]['MatchPrec'] = medsl[36083]['precinct'].str.upper() | |
nyd['083']['MatchPrec'] = nyd['083']['precinct'].str.upper() | |
# 36087 | |
medsl[36087]['MatchPrec'] = medsl[36087]['precinct'].str.upper() | |
nyd['087']['MatchPrec'] = nyd['087']['precinct'].str.upper() | |
# 36089 | |
medsl[36089]['MatchPrec'] = medsl[36089]['precinct'].str.upper() | |
nyd['089']['MatchPrec'] = nyd['089']['precinct'].str.upper() | |
# 36101 | |
medsl[36101]['MatchPrec'] = medsl[36101]['precinct'].str.upper().str.replace(" ", "") | |
nyd['101']['MatchPrec'] = nyd['101']['precinct'].str.upper() | |
# 36107 | |
medsl[36107]['precinct'] = medsl[36107]['precinct'].str.upper().str.replace("TOWN OF", "").str.replace("\sLD.*$", "").str.replace(" ", "") | |
nyd['101']['precinct'].str.upper() | |
medsl[36101]['MatchPrec'] = medsl[36101]['precinct'].str.upper() | |
nyd['101']['MatchPrec'] = nyd['101']['precinct'].str.upper() | |
# 36109 | |
# 36111 | |
# 36113 | |
medsl[36113]['MatchPrec'] = medsl[36113]['precinct'].str.upper() | |
nyd['113']['MatchPrec'] = nyd['113']['precinct'].str.upper() | |
# 36115 | |
medsl[36115]['MatchPrec'] = medsl[36115]['precinct'].str.upper() | |
nyd['115']['MatchPrec'] = nyd['115']['precinct'].str.upper() | |
#### join in a loop | |
nyd_temp = {k: v for k,v in nyd.items() if 'MatchPrec' in v.columns} | |
meds_temp = {k: v for k,v in medsl.items() if 'MatchPrec' in v.columns} | |
for cty in nyd_temp.keys(): | |
cint = int(str(36) + str(cty)) | |
nyd_temp[cty].merge(meds_temp[cint],how = "inner", on = "MatchPrec", suffixes = ('gp_', 'medsl_')) | |
results = pd.read_csv('/Users/jacobcoblentz/Downloads/2016-precinct-president.csv', encoding = 'LATIN1') | |
results = results[results.state_postal == 'NY'] | |
results = results[results.party.isin(['republican', 'democratic'])] | |
results['County_Prec'] = results['county_fips'].astype(str) + '||' + results['precinct'] | |
results = results[['county_fips', 'County_Prec', 'precinct', 'party', 'votes']] | |
results = results.dropna() | |
results = results.groupby(['County_Prec', 'county_fips', 'precinct', 'party'], as_index = False)['votes'].agg('sum') | |
results = results.pivot(index='County_Prec', columns='party', values='votes') | |
results = results.reset_index() | |
new = results["County_Prec"].str.split("\|\|", n = 1, expand = True) | |
results["county_fips"]= new[0] | |
# making separate last name column from new data frame | |
results["precinct"]= new[1] | |
medslt = pd.concat(meds_temp).reset_index() | |
nyr = pd.concat(nyd_temp).reset_index().merge(medslt, on = 'MatchPrec', suffixes = ('_gp', '_medsl')) | |
results.county_fips = results.county_fips.astype(float) | |
nyr = nyr.merge(results, left_on = ['level_0_medsl', 'precinct_medsl'], right_on = ['county_fips', 'precinct']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment