Skip to content

Instantly share code, notes, and snippets.

@jpcoblentz
Created July 2, 2020 16:36
Show Gist options
  • Save jpcoblentz/710ce986c92a4309ff8abcb275de4daa to your computer and use it in GitHub Desktop.
Save jpcoblentz/710ce986c92a4309ff8abcb275de4daa to your computer and use it in GitHub Desktop.
import pandas as pd
import geopandas as gp
import re
from itertools import chain
medsl = pd.read_csv('/Users/jacobcoblentz/Downloads/2016-precinct-president.csv', encoding = 'LATIN1')
medsl = medsl[medsl.state_postal == 'NY']
medsl = dict(tuple(medsl[['county_fips', 'precinct']].drop_duplicates().groupby('county_fips')))
nyg = gp.read_file('/Users/jacobcoblentz/unfolded/voting/2016/projects/NY/NY_State_Full.geojson')
nyd = dict(tuple(nyg.groupby('county')))
# 001 --albany
medsl[36001].precinct
tempED = medsl[36001].precinct.str.findall('ED([0-9]*)').str[-1]
tempW = medsl[36001].precinct.str.findall('W([0-9]*)').str[-1].fillna(0).astype(str)
# town name
tempT = medsl[36001].precinct.apply(lambda x: ''.join(x.split(' ')[1])[0:3])
# merge to precinct Name
tempW
precName = tempT + tempW.str.rjust(3, '0') + tempED.str.rjust(3, '0')
medsl[36001]['MatchPrec'] = precName
nyd['001']['MatchPrec'] = nyd['001']['precinct']
## 003
medsl[36003]['MatchPrec'] = medsl[36003]['precinct']
nyd['003']['MatchPrec'] = nyd['003']['precinct']
## 005 -- ETC New_York_City
medsl[36005]['MatchPrec'] = medsl[36005].precinct.map(lambda x: ''.join(x.split('/')[::-1]))
nyd['005']['MatchPrec'] = nyd['005']['precinct']
medsl[36047]['MatchPrec'] = medsl[36047].precinct.map(lambda x: ''.join(x.split('/')[::-1]))
nyd['047']['MatchPrec'] = nyd['047']['precinct']
medsl[36061]['MatchPrec'] = medsl[36061].precinct.map(lambda x: ''.join(x.split('/')[::-1]))
nyd['061']['MatchPrec'] = nyd['061']['precinct']
medsl[36081]['MatchPrec'] = medsl[36081].precinct.map(lambda x: ''.join(x.split('/')[::-1]))
nyd['081']['MatchPrec'] = nyd['081']['precinct']
medsl[36085]['MatchPrec'] = medsl[36085].precinct.map(lambda x: ''.join(x.split('/')[::-1]))
nyd['085']['MatchPrec'] = nyd['085']['precinct']
## 007 - Broome
medsl[36007]['MatchPrec'] = medsl[36007]['precinct'].str.replace('of Binghamton', '', regex = True).replace('Town of ', '', regex = True).replace('\s{2,}', ' ', regex = True)
nyd['007']['MatchPrec'] = nyd['007']['precinct']
## 009 -- Cattaraugus
medsl[36009]['MatchPrec'] = medsl[36009]['precinct']
nyd['009']['MatchPrec'] = nyd['009']['precinct']
## 011 -- Cayuga
medsl[36011]['MatchPrec'] = [prec[0] for prec in medsl[36011]['precinct'].str.rsplit(' ', 1)]
medsl[36011]['MatchPrec'] = medsl[36011]['MatchPrec'].str.upper()
nyd['011']['MatchPrec'] = nyd['011']['precinct']
## 019 -- Clinton
medsl[36019].precinct
# this is kind of hard, so using a loop
p = 'City of Plattsburgh W1P3'
tempP = medsl[36019].precinct
pList = []
for p in tempP:
if 'City of' in p:
print(p)
ts = 'Ward' + ' ' + re.findall('W([0-9]*)', p)[-1] + '-' + re.findall('D|P([0-9]*)', p)[-1]
pList.append(ts)
else:
ts = ' '.join(p.split(' ')[:-1]) + ' ' + str(int(p.rsplit(' ', 1)[-1]))
pList.append(ts)
medsl[36019]['MatchPrec'] = pList
nyd['019']['MatchPrec'] = nyd['019']['precinct']
## 023 -- cortland
medsl[36023]['precinct']
nyd['023']['precinct']
### # TODO: GET BACK TO THIS Oneida
## 027 -- Dutchess -- not possible
## 029 Erie
medsl[36029]['precinct']
nyd['029']['MatchPrec'] = nyd['029']['precinct'].str.upper()
medsl[36029]['MatchPrec'] = medsl[36029]['precinct'].str.upper()
# 031 -- essex
nyd['031']['MatchPrec'] = nyd['031']['precinct'].str.upper()
medsl[36031]['MatchPrec'] = medsl[36031]['precinct'].str.upper()
# 043 Herkimer
# for loop to handle this
pList = []
for p in medsl[36043].precinct.str.split(' '):
if 'City' in p:
pList.append(' '.join(p[2:4]) + ' ' + p[5] + '-' + p[6])
else:
pList.append(' '.join(p[2:-2]))
medsl[36043]['MatchPrec'] = pList
nyd['043']['MatchPrec'] = nyd['043']['precinct']
# 055 -- Monroe
medsl[36055]['MatchPrec'] = medsl[36055]['precinct']
nyd['055']['MatchPrec'] = nyd['055']['precinct']
# wtf nassau
# 36063 wtf niagra
# 36065 Onieda
medsl[36065]['MatchPrec'] = medsl[36065]['precinct'].str.upper()
nyd['065']['MatchPrec'] = nyd['065']['precinct'].str.upper()
# 3607 Fabius
medsl[36067]['precinct']
nyd['067']['precinct'].unique()
medsl[36067]['MatchPrec'] = medsl[36067]['precinct'].str.upper()
nyd['067']['MatchPrec'] = nyd['067']['precinct'].str.upper()
# 36069 -- Ontario
medsl[36069]['precinct']
medsl[36069]['precinct'] = medsl[36069]['precinct'].str.replace("Town of","").str.replace("City of","").str.replace("Ward","").str.replace(" ", " ")
nyd['069']['precinct'] = nyd['069']['precinct'].str.replace('-', " ")
medsl[36069]['MatchPrec'] = medsl[36069]['precinct'].str.upper()
nyd['069']['MatchPrec'] = nyd['069']['precinct'].str.upper()
# 36083 Rennselaer
medsl[36083]['precinct'] = medsl[36083]['precinct'].str.upper()
nyd['083']['precinct'] = nyd['083']['precinct'].str.replace("TH", "").str.replace("ST", "").str.replace(" - ", " ").str.replace("RD", "").str.replace("ND", "")
medsl[36083]['MatchPrec'] = medsl[36083]['precinct'].str.upper()
nyd['083']['MatchPrec'] = nyd['083']['precinct'].str.upper()
# 36087
medsl[36087]['MatchPrec'] = medsl[36087]['precinct'].str.upper()
nyd['087']['MatchPrec'] = nyd['087']['precinct'].str.upper()
# 36089
medsl[36089]['MatchPrec'] = medsl[36089]['precinct'].str.upper()
nyd['089']['MatchPrec'] = nyd['089']['precinct'].str.upper()
# 36101
medsl[36101]['MatchPrec'] = medsl[36101]['precinct'].str.upper().str.replace(" ", "")
nyd['101']['MatchPrec'] = nyd['101']['precinct'].str.upper()
# 36107
medsl[36107]['precinct'] = medsl[36107]['precinct'].str.upper().str.replace("TOWN OF", "").str.replace("\sLD.*$", "").str.replace(" ", "")
nyd['101']['precinct'].str.upper()
medsl[36101]['MatchPrec'] = medsl[36101]['precinct'].str.upper()
nyd['101']['MatchPrec'] = nyd['101']['precinct'].str.upper()
# 36109
# 36111
# 36113
medsl[36113]['MatchPrec'] = medsl[36113]['precinct'].str.upper()
nyd['113']['MatchPrec'] = nyd['113']['precinct'].str.upper()
# 36115
medsl[36115]['MatchPrec'] = medsl[36115]['precinct'].str.upper()
nyd['115']['MatchPrec'] = nyd['115']['precinct'].str.upper()
#### join in a loop
nyd_temp = {k: v for k,v in nyd.items() if 'MatchPrec' in v.columns}
meds_temp = {k: v for k,v in medsl.items() if 'MatchPrec' in v.columns}
for cty in nyd_temp.keys():
cint = int(str(36) + str(cty))
nyd_temp[cty].merge(meds_temp[cint],how = "inner", on = "MatchPrec", suffixes = ('gp_', 'medsl_'))
results = pd.read_csv('/Users/jacobcoblentz/Downloads/2016-precinct-president.csv', encoding = 'LATIN1')
results = results[results.state_postal == 'NY']
results = results[results.party.isin(['republican', 'democratic'])]
results['County_Prec'] = results['county_fips'].astype(str) + '||' + results['precinct']
results = results[['county_fips', 'County_Prec', 'precinct', 'party', 'votes']]
results = results.dropna()
results = results.groupby(['County_Prec', 'county_fips', 'precinct', 'party'], as_index = False)['votes'].agg('sum')
results = results.pivot(index='County_Prec', columns='party', values='votes')
results = results.reset_index()
new = results["County_Prec"].str.split("\|\|", n = 1, expand = True)
results["county_fips"]= new[0]
# making separate last name column from new data frame
results["precinct"]= new[1]
medslt = pd.concat(meds_temp).reset_index()
nyr = pd.concat(nyd_temp).reset_index().merge(medslt, on = 'MatchPrec', suffixes = ('_gp', '_medsl'))
results.county_fips = results.county_fips.astype(float)
nyr = nyr.merge(results, left_on = ['level_0_medsl', 'precinct_medsl'], right_on = ['county_fips', 'precinct'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment