Created
April 17, 2019 14:22
-
-
Save kennethbruskiewicz/2cc81e970c2452cd0a3c75e6304e7ac7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python File Pre-Processing | |
import pandas as pd | |
df = pd.read_csv("35509-0001-Data.tsv", sep='\t',nrows=100) | |
drugFilter0 = [] | |
# BINARY | |
drugEVER = ["CIGEVER", "SNFEVER", "CHEWEVER", "PIPEVER", "ALCEVER", "MJEVER", "COCEVER", "CRKEVER", "HEREVER", "INHEVER", "ANLEVER", "STMEVER", "SEDEVER", "BLNTEVER", "TRNEVER"] | |
drugFilterEVER = drugFilter0 + drugEVER | |
df_filteredEVER = df.filter(items=drugFilterEVER) | |
df_maskedEVER = df_filteredEVER.mask(df_filteredEVER > 80, 0) | |
df_maskedEVER_stacked = df_maskedEVER | |
df_maskedEVER_stacked['INDEX'] = range(0, len(df_maskedEVER_stacked)) | |
df_maskedEVER_stacked = df_maskedEVER_stacked.set_index(df_maskedEVER_stacked.columns[-1]).stack() | |
#df_maskedEVER_stacked.to_csv("df_maskedEVER_stacked.tsv", sep="\t") | |
df_maskedEVER.to_csv("everyDrugTheyTakeEVER.tsv", sep="\t") | |
df_maskedEVER.to_json(path_or_buf="everyDrugTheyTakeEVER.json", orient="table") | |
# BIN (6) | |
drug30EST = [ | |
"CG30EST", | |
"SN30EST", | |
"CH30EST", | |
"CI30EST", | |
"AL30EST", | |
"MR30EST", | |
"CC30EST", | |
"CR30EST", | |
"HR30EST", | |
"HL30EST" | |
] | |
drugFilter30EST = drugFilter0 + drug30EST | |
df_filtered30EST = df.filter(items=drugFilter30EST) | |
df_masked30EST = df_filtered30EST.mask(df_filtered30EST > 80, 0) | |
df_masked30EST_stacked = df_masked30EST | |
df_masked30EST_stacked['INDEX'] = range(0, len(df_masked30EST_stacked)) | |
df_masked30EST_stacked = df_masked30EST_stacked.set_index(df_masked30EST_stacked.columns[-1]).stack() | |
#df_masked30EST_stacked.to_csv("df_masked30EST_stacked.tsv", sep="\t") | |
df_masked30EST.to_csv("everyDrugTheyTake30EST.tsv", sep="\t") | |
df_masked30EST.to_json(path_or_buf="everyDrugTheyTake30EST.json", orient="index") | |
# QUANT (30) | |
drug30USE = [ | |
"CIG30USE", | |
"SNF30USE", | |
"CHW30USE", | |
"CGR30USE", | |
"HER30USE", | |
"HAL30USE", | |
"FUSE30USE" | |
] | |
drugFilter30USE = drugFilter0 + drug30USE | |
df_filtered30USE = df.filter(items=drugFilter30USE) | |
df_masked30USE = df_filtered30USE.mask(df_filtered30USE > 80, 0) | |
#df_stacked30USE = df_masked30USE.set_index(df.columns[0]).stack() | |
#df_masked30EST['INDEX'] = range(1, len(df) + 1) | |
df_masked30USE.to_csv("everyDrugTheyTake30USE.tsv", sep="\t") | |
df_masked30USE.to_json(path_or_buf="everyDrugTheyTake30USE.json", orient="index") | |
# QUANT (62) | |
drugAGE = [ | |
"IIALCAGE", | |
"IRALCAGE", | |
"IIANLAGE", | |
"IRANLAGE", | |
"BLNTAGE", | |
"CIGAGE", | |
"CRKAGE", | |
"IRCRKAGE", | |
"IICRKAGE", | |
"COCAGE", | |
"IRCOCAGE", | |
"IICOCAGE", | |
"HERAGE", | |
"IRHERAGE", | |
"IIHERAGE", | |
"INHAGE", | |
"IRINHAGE", | |
"IIINHAGE", | |
"MJAGE", | |
"IIMJAGE", | |
"IRMJAGE", | |
"SEDAGE", | |
"IISEDAGE", | |
"IRSEDAGE", | |
"STMAGE", | |
"IISTMAGE", | |
"IRSTMAGE", | |
"TRNAGE", | |
"IITRNAGE", | |
"IRTRNAGE" | |
] | |
drugFilterAGE = drugFilter0 + drugAGE | |
df_filteredAGE = df.filter(items=drugFilterAGE) | |
df_maskedAGE = df_filteredAGE.mask(df_filteredAGE > 80, 0) | |
#df_stackedAGE = df_filteredAGE.set_index(df.columns[0]).stack() | |
df_maskedAGE.to_csv("everyDrugTheyTakeAGE.tsv", sep="\t") | |
df_maskedAGE.to_json(path_or_buf="everyDrugTheyTakeAGE.json", orient="index") | |
# QUANT/NOM (12) | |
drugMONTHS = [ | |
"ALCMFU", | |
"COCMFU", | |
"BLNTMFU", | |
"CIGMFU", | |
"CRKMFU", | |
"HERMFU", | |
"INHMFU", | |
"MJMFU", | |
"SEDMFU", | |
"STMMFU", | |
"TRNMFU" | |
] | |
drugFilterMONTHS = drugFilter0 + drugMONTHS | |
df_filteredMONTHS = df.filter(items=drugFilterMONTHS) | |
df_maskedMONTHS = df_filteredMONTHS.mask(df_filteredMONTHS > 80, 0) | |
#df_stackedMONTHS = df_maskedMONTHS.set_index(df.columns[0]).stack() | |
#print(df_maskedMONTHS) | |
df_maskedMONTHS.to_csv("everyDrugTheyTakeMONTHS.tsv", sep="\t") | |
df_maskedMONTHS.to_json(path_or_buf="everyDrugTheyTakeMONTHS.json", orient="index") | |
# BIN (9) | |
drugLAST = [ | |
"ALCREC", | |
"COCREC", | |
"BLNTREC", | |
"CIGREC", | |
"CRKREC", | |
"HERREC", | |
"INHREC", | |
"MJREC", | |
"SEDREC", | |
"STMREC", | |
"TRNREC" | |
] | |
drugFilterLAST = drugFilter0 + drugLAST | |
df_filteredLAST = df.filter(items=drugFilterLAST) | |
df_maskedLAST = df_filteredLAST.mask(df_filteredLAST > 80, 0) | |
#df_stackedLAST = df_maskedLAST.set_index(df.columns[0]).stack() | |
df_maskedLAST.to_csv("everyDrugTheyTakeLAST.tsv", sep="\t") | |
df_maskedLAST.to_json(path_or_buf="everyDrugTheyTakeLAST.json", orient="index") | |
# QUANT (7) | |
drugWEEKRATE = [ | |
"ALDAYPWK", | |
"CCDAYPWK", | |
"CRDAYPWK", | |
"HRDAYPWK", | |
"HLDAYPWK", | |
"INDAYPWK", | |
"PRDAYPWK", | |
"OXDAYPWK", | |
"TRDAYPWK", | |
"STDAYPWK", | |
"MTDAYPWK", | |
"SVDAYPWK", | |
"MRDAYPWK" | |
] | |
drugFilterWEEKRATE = drugFilter0 + drugWEEKRATE | |
df_filteredWEEKRATE = df.filter(items=drugFilterWEEKRATE) | |
df_maskedWEEKRATE = df_filteredWEEKRATE.mask(df_filteredWEEKRATE > 80, 0) | |
#df_stackedWEEKRATE = df_maskedWEEKRATE.set_index(df.columns[0]).stack() | |
df_maskedWEEKRATE.to_csv("everyDrugTheyTakeWEEKRATE.tsv", sep="\t") | |
df_maskedWEEKRATE.to_json(path_or_buf="everyDrugTheyTakeWEEKRATE.json", orient="index") | |
drugFilter00 = drugFilter0 + drug30USE + drug30USE + drugAGE + drugMONTHS + drugLAST + drugWEEKRATE + drugEVER | |
df_filtered00 = df.filter(items=drugFilter00) | |
df_masked00 = df_filtered00.mask(df_filtered00 > 80, 0) | |
#df_stacked00 = df_masked00.set_index(df.columns[0]).stack() | |
df_masked00.to_csv("everyDrugTheyTake00.tsv", sep="\t") | |
#df_masked00.to_json(path_or_buf="everyDrugTheyTake00.json", orient="index") | |
""" | |
FREQUENCY DATA HERE WOW | |
""" | |
# QUANT (365) | |
drugTOTALDAYS = [ | |
"ALCYRTOT", | |
"MJYRTOT", | |
"COCYRTOT", | |
"CRKYRTOT", | |
"HERYRTOT", | |
"HALYRTOT", | |
"INHYRTOT", | |
"ANLYRTOT", | |
"OXYYRTOT", | |
"TRNYRTOT", | |
"STMYRTOT", | |
"MTHYRTOT", | |
"SEDYRTOT" | |
] | |
drugFilterTOTALDAYS = drugFilter0 + drugTOTALDAYS | |
df_filteredTOTALDAYS = df.filter(items=drugFilterTOTALDAYS) | |
df_maskedTOTALDAYS = df_filteredTOTALDAYS.mask(df_filteredTOTALDAYS > 980, 0) | |
df_maskedTOTALDAYS_stacked = df_maskedTOTALDAYS | |
#df_maskedTOTALDAYS_stacked['INDEX'] = range(0, len(df_masked30EST_stacked)) | |
df_maskedTOTALDAYS_stacked = df_maskedTOTALDAYS_stacked.set_index(df_maskedTOTALDAYS_stacked.columns[-1]).stack() | |
df_maskedTOTALDAYS.to_csv("df_maskedTOTALDAYS_stacked.tsv", sep="\t") | |
df_maskedTOTALDAYS.to_csv("everyDrugTheyTakeTOTALDAYS.tsv", sep="\t") | |
df_maskedTOTALDAYS.to_json(path_or_buf="everyDrugTheyTakeTOTALDAYS.json", orient="index") | |
df_masked000 = df_maskedTOTALDAYS | |
# QUANT (2013) | |
drugYEAR = [ | |
"BLNTYFU", | |
"CIGYFU", | |
"CRKYFU", | |
"COCYFU", | |
"HERYFU", | |
"INHYFU", | |
"MJYFU", | |
"SEDYFU", | |
"STMYFU", | |
"TRNYFU", | |
"ALCYFU" | |
] | |
drugFilterYEAR = drugFilter0 + drugYEAR | |
df_filteredYEAR = df.filter(items=drugFilterYEAR) | |
df_maskedYEAR = df_filteredYEAR.mask(df_filteredYEAR > 9800, 0) | |
#df_stackedYEAR = df_maskedYEAR.set_index(df.columns[0]).stack() | |
df_maskedYEAR.to_csv("everyDrugTheyTakeYEAR.tsv", sep="\t") | |
df_maskedYEAR.to_json(path_or_buf="everyDrugTheyTakeYEAR.json", orient="index") | |
df_masked0000 = df_maskedYEAR | |
df_combined = pd.concat([df_masked0000, df_masked000, df_masked00], axis=1, join='outer') | |
#df_combined_no0 = df_combined.set_index(df.columns[0]).stack() | |
df_combined.to_csv("everyDrugTheyTakeQuant_final.tsv", sep="\t") | |
#df_combined.to_json(path_or_buf="everyDrugTheyTakeQuant_final.json", orient="index") | |
# print(df_combined_no0) | |
#.to_csv("everyDrugTheyTake_final.tsv", sep="\t") | |
# Tableau Derived Statistic | |
""" | |
CORR({INCLUDE [ID] : SUM([Taken])}, {INCLUDE [ID (everyDrugTheyTake stacked.tsv1)] : SUM([Taken (everyDrugTheyTake stacked.tsv1)])}) | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment