Skip to content

Instantly share code, notes, and snippets.

@jtemporal
Created June 9, 2017 09:45
Show Gist options
  • Save jtemporal/6039bfe59fb9519c5af11773c893a362 to your computer and use it in GitHub Desktop.
Save jtemporal/6039bfe59fb9519c5af11773c893a362 to your computer and use it in GitHub Desktop.
ipython history
import pandas as pd
import numpy as np
data = pd.read_csv('../serenata-data/2017-05-25-reimbursements.xz', dtype={'applicant_id': np.str,
'cnpj_cpf': np.str,
'congressperson_id': np.str,
'subquota_number': np.str},
low_memory=False)
data = data[data['subquota_description'] == 'Taxi, toll and parking']
data2014 = data[data['year'] == 2014]
data2014 = data2014[data2014['month'] >= 6]
dataall = data[data['year'] >= 2015]
data = data2014.append(dataall)
d = data.groupby('cnpj_cpf')['total_net_value'].agg({'sum': np.sum})
dat500 = d.sort_values('sum',ascending=False).head(500)
dat500.reset_index().head()
dat500 = dat500.reset_index()
dataset = pd.merge(dat500, companies, how='left', left_on='cnpj_cpf', right_on='cnpj')
companies = pd.read_csv('../serenata-data/2017-05-21-companies-no-geolocation.xz', low_memory=False)
companies['cnpj'] = companies['cnpj'].str.replace(r'\D', '')
dataset = pd.merge(dat500, companies, how='left', left_on='cnpj_cpf', right_on='cnpj')
dataset.shape
dataset.head()
dataset[['cnpj_cpf', 'name', 'trade_name', 'sum']].head()
writer = pd.ExcelWriter('output.xlsx') #requires openpyxl
dataset.to_excel(writer,'Sheet1')
writer.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment