Created
January 22, 2020 03:20
-
-
Save granttremblay/7fd4fc40a9e06ad668a1e0c8d38ccd2c to your computer and use it in GitHub Desktop.
Make Elise's Excel Spreadsheets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
A script for my cutie by her boy | |
''' | |
import numpy as np | |
import pandas as pd | |
import xlrd | |
savepath = '/Users/grant/Desktop/' | |
# Point to the Excel file here: | |
excel_file = 'data/Study_forG.xlsx' | |
# Read in the individual Excel sheets as Pandas dataframes | |
# We'll do this separately to be explicit, even though it's verbose | |
all_patients_sheet = pd.read_excel(excel_file, sheet_name='Demographic') | |
all_a1c_sheet = pd.read_excel(excel_file, sheet_name='A1C') | |
hispanic_patients_sheet = pd.read_excel( | |
excel_file, sheet_name='Hispanic Patients') | |
outpatient_visit_sheet = pd.read_excel( | |
excel_file, sheet_name='Outpatient visit') | |
providers_sheet = pd.read_excel( | |
excel_file, sheet_name='Endocrine Treating providers') | |
nutrition_sheet = pd.read_excel(excel_file, sheet_name='BCH nutrition visit') | |
ed_visits_sheet = pd.read_excel(excel_file, sheet_name='ED Visits') | |
inpatient_visit_sheet = pd.read_excel(excel_file, sheet_name='Inpatient visit') | |
hispanic_patient_mrns = hispanic_patients_sheet['MRN'].to_frame() | |
# Make average A1C for every MRN in the A1C sheet, then mask based on membership in Hispanic MRN list | |
all_average_a1cs = all_a1c_sheet.groupby('MRN').mean().reset_index() | |
hispanic_average_a1c = all_average_a1cs[all_average_a1cs['MRN'].isin( | |
hispanic_patient_mrns['MRN'])] | |
non_hispanic_average_a1c = all_average_a1cs[np.logical_not( | |
all_average_a1cs['MRN'].isin(hispanic_patient_mrns['MRN']))] | |
# Number of Outpatient visits | |
outpatient_visits = outpatient_visit_sheet.groupby( | |
'MRN').size().reset_index(name='Number of Outpatient Visits by MRN') | |
hispanic_outpatient_visits = outpatient_visits[outpatient_visits['MRN'].isin(hispanic_patient_mrns['MRN'])] | |
non_hispanic_outpatient_visits = outpatient_visits[np.logical_not( | |
outpatient_visits['MRN'].isin(hispanic_patient_mrns['MRN']))] | |
# Types of Providers | |
providers_all = providers_sheet.groupby( | |
['MDNE', 'NAME_SUFFIX']).size().reset_index(name='Number of Visits') | |
md_visits = providers_all[providers_all.values == "MD"] | |
dne_visits = providers_all[providers_all.values == "DNE"] | |
sw_visits = providers_all[providers_all.values == "SW"] | |
hispanic_md_visits = md_visits[md_visits['MDNE'].isin( | |
hispanic_patient_mrns['MRN'])] | |
hispanic_dne_visits = dne_visits[dne_visits['MDNE'].isin( | |
hispanic_patient_mrns['MRN'])] | |
hispanic_sw_visits = sw_visits[sw_visits['MDNE'].isin( | |
hispanic_patient_mrns['MRN'])] | |
non_hispanic_md_visits = md_visits[np.logical_not( | |
md_visits['MDNE'].isin(hispanic_patient_mrns['MRN']))] | |
non_hispanic_dne_visits = dne_visits[np.logical_not( | |
dne_visits['MDNE'].isin(hispanic_patient_mrns['MRN']))] | |
non_hispanic_sw_visits = sw_visits[np.logical_not( | |
sw_visits['MDNE'].isin(hispanic_patient_mrns['MRN']))] | |
# Nutrition visits | |
nutrition_visits = nutrition_sheet.groupby('MRN').size().reset_index( | |
name='Number of Nutrition Visits by MRN') | |
hispanic_nutrition_visits = nutrition_visits[nutrition_visits['MRN'].isin( | |
hispanic_patient_mrns['MRN'])] | |
non_hispanic_nutrition_visits = nutrition_visits[np.logical_not( | |
nutrition_visits['MRN'].isin(hispanic_patient_mrns['MRN']))] | |
# ED visits | |
ed_visits = ed_visits_sheet.groupby('MRN').size().reset_index( | |
name='Number of ED Visits by MRN') | |
hispanic_ed_visits = ed_visits[ed_visits['MRN'].isin( | |
hispanic_patient_mrns['MRN'])] | |
non_hispanic_ed_visits = ed_visits[np.logical_not( | |
ed_visits['MRN'].isin(hispanic_patient_mrns['MRN']))] | |
# Inpatient visits | |
inpatient_visits = inpatient_visit_sheet.groupby('MRN').size( | |
).reset_index(name='Number of Inpatient Visits by MRN') | |
hispanic_inpatient_visits = inpatient_visits[inpatient_visits['MRN'].isin( | |
hispanic_patient_mrns['MRN'])] | |
non_hispanic_inpatient_visits = inpatient_visits[np.logical_not( | |
inpatient_visits['MRN'].isin(hispanic_patient_mrns['MRN']))] | |
sheets_to_save = {'Hispanic Average A1C': hispanic_average_a1c.rename(columns={"MRN": "Hispanic MRN", "RESULT_VAL": "Hispanic Mean A1C"}), | |
'Non-Hispanic Average A1C': non_hispanic_average_a1c.rename(columns={"MRN": "Non-Hispanic MRN", "RESULT_VAL": "Non-Hispanic Mean A1C"}), | |
'Hispanic Outpatient Visits': hispanic_outpatient_visits.rename(columns={"MRN": "Hispanic MRN", "Number of Outpatient Visits by MRN": "Number of Outpatient Visits"}), | |
'Non-Hispanic Outpatient Visits': non_hispanic_outpatient_visits.rename(columns={"MRN": "Non-Hispanic MRN", "Number of Outpatient Visits by MRN": "Number of Outpatient Visits"}), | |
'Hispanic MD Visits': hispanic_md_visits.rename(columns={"MDNE": "Hispanic MRN", "NAME_SUFFIX": "Provider Type"}), | |
'Non-Hispanic MD Visits': non_hispanic_md_visits.rename(columns={"MDNE": "Non-Hispanic MRN", "NAME_SUFFIX": "Provider Type"}), | |
'Hispanic DNE Visits': hispanic_dne_visits.rename(columns={"MDNE": "Hispanic MRN", "NAME_SUFFIX": "Provider Type"}), | |
'Non-Hispanic DNE Visits': non_hispanic_dne_visits.rename(columns={"MDNE": "Non-Hispanic MRN", "NAME_SUFFIX": "Provider Type"}), | |
'Hispanic SW Visits': hispanic_sw_visits.rename(columns={"MDNE": "Hispanic MRN", "NAME_SUFFIX": "Provider Type"}), | |
'Non-Hispanic SW Visits': non_hispanic_sw_visits.rename(columns={"MDNE": "Non-Hispanic MRN", "NAME_SUFFIX": "Provider Type"}), | |
'Hispanic Nutrition Visits': hispanic_nutrition_visits.rename(columns={"MRN": "Hispanic MRN", "Number of Nutrition Visits by MRN": "Number of Nutrition Visits"}), | |
'Non-Hispanic Nutrition Visits': non_hispanic_nutrition_visits.rename(columns={"MRN": "Non-Hispanic MRN", "Number of Nutrition Visits by MRN": "Number of Nutrition Visits"}), | |
'Hispanic ED Visits': hispanic_ed_visits.rename(columns={"MRN": "Hispanic MRN", "Number of ED Visits by MRN": "Number of ED Visits"}), | |
'Non-Hispanic_Outpatient': non_hispanic_ed_visits.rename(columns={"MRN": "Non-Hispanic MRN", "Number of ED Visits by MRN": "Number of ED Visits"}), | |
'Hispanic Inpatient Visits': hispanic_inpatient_visits.rename(columns={"MRN": "Hispanic MRN", "Number of Inpatient Visits by MRN": "Number of Inpatient Visits"}), | |
'Non-Hispanic Inpatient Visits': non_hispanic_inpatient_visits.rename(columns={"MRN": "Non-Hispanic MRN", "Number of Inpatient Visits by MRN": "Number of Inpatient Visits"})} | |
with pd.ExcelWriter(savepath + 'data_for_E.xlsx') as writer: # doctest: +SKIP | |
for item in sheets_to_save: | |
sheets_to_save[item].to_excel(writer, sheet_name=item, index=False) | |
print("Saved {}".format(item)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment