Skip to content

Instantly share code, notes, and snippets.

@kennethbruskiewicz
Created April 17, 2019 14:22
Show Gist options
  • Save kennethbruskiewicz/2cc81e970c2452cd0a3c75e6304e7ac7 to your computer and use it in GitHub Desktop.
Save kennethbruskiewicz/2cc81e970c2452cd0a3c75e6304e7ac7 to your computer and use it in GitHub Desktop.
# Python File Pre-Processing
import pandas as pd
df = pd.read_csv("35509-0001-Data.tsv", sep='\t',nrows=100)
drugFilter0 = []
# BINARY
drugEVER = ["CIGEVER", "SNFEVER", "CHEWEVER", "PIPEVER", "ALCEVER", "MJEVER", "COCEVER", "CRKEVER", "HEREVER", "INHEVER", "ANLEVER", "STMEVER", "SEDEVER", "BLNTEVER", "TRNEVER"]
drugFilterEVER = drugFilter0 + drugEVER
df_filteredEVER = df.filter(items=drugFilterEVER)
df_maskedEVER = df_filteredEVER.mask(df_filteredEVER > 80, 0)
df_maskedEVER_stacked = df_maskedEVER
df_maskedEVER_stacked['INDEX'] = range(0, len(df_maskedEVER_stacked))
df_maskedEVER_stacked = df_maskedEVER_stacked.set_index(df_maskedEVER_stacked.columns[-1]).stack()
#df_maskedEVER_stacked.to_csv("df_maskedEVER_stacked.tsv", sep="\t")
df_maskedEVER.to_csv("everyDrugTheyTakeEVER.tsv", sep="\t")
df_maskedEVER.to_json(path_or_buf="everyDrugTheyTakeEVER.json", orient="table")
# BIN (6)
drug30EST = [
"CG30EST",
"SN30EST",
"CH30EST",
"CI30EST",
"AL30EST",
"MR30EST",
"CC30EST",
"CR30EST",
"HR30EST",
"HL30EST"
]
drugFilter30EST = drugFilter0 + drug30EST
df_filtered30EST = df.filter(items=drugFilter30EST)
df_masked30EST = df_filtered30EST.mask(df_filtered30EST > 80, 0)
df_masked30EST_stacked = df_masked30EST
df_masked30EST_stacked['INDEX'] = range(0, len(df_masked30EST_stacked))
df_masked30EST_stacked = df_masked30EST_stacked.set_index(df_masked30EST_stacked.columns[-1]).stack()
#df_masked30EST_stacked.to_csv("df_masked30EST_stacked.tsv", sep="\t")
df_masked30EST.to_csv("everyDrugTheyTake30EST.tsv", sep="\t")
df_masked30EST.to_json(path_or_buf="everyDrugTheyTake30EST.json", orient="index")
# QUANT (30)
drug30USE = [
"CIG30USE",
"SNF30USE",
"CHW30USE",
"CGR30USE",
"HER30USE",
"HAL30USE",
"FUSE30USE"
]
drugFilter30USE = drugFilter0 + drug30USE
df_filtered30USE = df.filter(items=drugFilter30USE)
df_masked30USE = df_filtered30USE.mask(df_filtered30USE > 80, 0)
#df_stacked30USE = df_masked30USE.set_index(df.columns[0]).stack()
#df_masked30EST['INDEX'] = range(1, len(df) + 1)
df_masked30USE.to_csv("everyDrugTheyTake30USE.tsv", sep="\t")
df_masked30USE.to_json(path_or_buf="everyDrugTheyTake30USE.json", orient="index")
# QUANT (62)
drugAGE = [
"IIALCAGE",
"IRALCAGE",
"IIANLAGE",
"IRANLAGE",
"BLNTAGE",
"CIGAGE",
"CRKAGE",
"IRCRKAGE",
"IICRKAGE",
"COCAGE",
"IRCOCAGE",
"IICOCAGE",
"HERAGE",
"IRHERAGE",
"IIHERAGE",
"INHAGE",
"IRINHAGE",
"IIINHAGE",
"MJAGE",
"IIMJAGE",
"IRMJAGE",
"SEDAGE",
"IISEDAGE",
"IRSEDAGE",
"STMAGE",
"IISTMAGE",
"IRSTMAGE",
"TRNAGE",
"IITRNAGE",
"IRTRNAGE"
]
drugFilterAGE = drugFilter0 + drugAGE
df_filteredAGE = df.filter(items=drugFilterAGE)
df_maskedAGE = df_filteredAGE.mask(df_filteredAGE > 80, 0)
#df_stackedAGE = df_filteredAGE.set_index(df.columns[0]).stack()
df_maskedAGE.to_csv("everyDrugTheyTakeAGE.tsv", sep="\t")
df_maskedAGE.to_json(path_or_buf="everyDrugTheyTakeAGE.json", orient="index")
# QUANT/NOM (12)
drugMONTHS = [
"ALCMFU",
"COCMFU",
"BLNTMFU",
"CIGMFU",
"CRKMFU",
"HERMFU",
"INHMFU",
"MJMFU",
"SEDMFU",
"STMMFU",
"TRNMFU"
]
drugFilterMONTHS = drugFilter0 + drugMONTHS
df_filteredMONTHS = df.filter(items=drugFilterMONTHS)
df_maskedMONTHS = df_filteredMONTHS.mask(df_filteredMONTHS > 80, 0)
#df_stackedMONTHS = df_maskedMONTHS.set_index(df.columns[0]).stack()
#print(df_maskedMONTHS)
df_maskedMONTHS.to_csv("everyDrugTheyTakeMONTHS.tsv", sep="\t")
df_maskedMONTHS.to_json(path_or_buf="everyDrugTheyTakeMONTHS.json", orient="index")
# BIN (9)
drugLAST = [
"ALCREC",
"COCREC",
"BLNTREC",
"CIGREC",
"CRKREC",
"HERREC",
"INHREC",
"MJREC",
"SEDREC",
"STMREC",
"TRNREC"
]
drugFilterLAST = drugFilter0 + drugLAST
df_filteredLAST = df.filter(items=drugFilterLAST)
df_maskedLAST = df_filteredLAST.mask(df_filteredLAST > 80, 0)
#df_stackedLAST = df_maskedLAST.set_index(df.columns[0]).stack()
df_maskedLAST.to_csv("everyDrugTheyTakeLAST.tsv", sep="\t")
df_maskedLAST.to_json(path_or_buf="everyDrugTheyTakeLAST.json", orient="index")
# QUANT (7)
drugWEEKRATE = [
"ALDAYPWK",
"CCDAYPWK",
"CRDAYPWK",
"HRDAYPWK",
"HLDAYPWK",
"INDAYPWK",
"PRDAYPWK",
"OXDAYPWK",
"TRDAYPWK",
"STDAYPWK",
"MTDAYPWK",
"SVDAYPWK",
"MRDAYPWK"
]
drugFilterWEEKRATE = drugFilter0 + drugWEEKRATE
df_filteredWEEKRATE = df.filter(items=drugFilterWEEKRATE)
df_maskedWEEKRATE = df_filteredWEEKRATE.mask(df_filteredWEEKRATE > 80, 0)
#df_stackedWEEKRATE = df_maskedWEEKRATE.set_index(df.columns[0]).stack()
df_maskedWEEKRATE.to_csv("everyDrugTheyTakeWEEKRATE.tsv", sep="\t")
df_maskedWEEKRATE.to_json(path_or_buf="everyDrugTheyTakeWEEKRATE.json", orient="index")
drugFilter00 = drugFilter0 + drug30USE + drug30USE + drugAGE + drugMONTHS + drugLAST + drugWEEKRATE + drugEVER
df_filtered00 = df.filter(items=drugFilter00)
df_masked00 = df_filtered00.mask(df_filtered00 > 80, 0)
#df_stacked00 = df_masked00.set_index(df.columns[0]).stack()
df_masked00.to_csv("everyDrugTheyTake00.tsv", sep="\t")
#df_masked00.to_json(path_or_buf="everyDrugTheyTake00.json", orient="index")
"""
FREQUENCY DATA HERE WOW
"""
# QUANT (365)
drugTOTALDAYS = [
"ALCYRTOT",
"MJYRTOT",
"COCYRTOT",
"CRKYRTOT",
"HERYRTOT",
"HALYRTOT",
"INHYRTOT",
"ANLYRTOT",
"OXYYRTOT",
"TRNYRTOT",
"STMYRTOT",
"MTHYRTOT",
"SEDYRTOT"
]
drugFilterTOTALDAYS = drugFilter0 + drugTOTALDAYS
df_filteredTOTALDAYS = df.filter(items=drugFilterTOTALDAYS)
df_maskedTOTALDAYS = df_filteredTOTALDAYS.mask(df_filteredTOTALDAYS > 980, 0)
df_maskedTOTALDAYS_stacked = df_maskedTOTALDAYS
#df_maskedTOTALDAYS_stacked['INDEX'] = range(0, len(df_masked30EST_stacked))
df_maskedTOTALDAYS_stacked = df_maskedTOTALDAYS_stacked.set_index(df_maskedTOTALDAYS_stacked.columns[-1]).stack()
df_maskedTOTALDAYS.to_csv("df_maskedTOTALDAYS_stacked.tsv", sep="\t")
df_maskedTOTALDAYS.to_csv("everyDrugTheyTakeTOTALDAYS.tsv", sep="\t")
df_maskedTOTALDAYS.to_json(path_or_buf="everyDrugTheyTakeTOTALDAYS.json", orient="index")
df_masked000 = df_maskedTOTALDAYS
# QUANT (2013)
drugYEAR = [
"BLNTYFU",
"CIGYFU",
"CRKYFU",
"COCYFU",
"HERYFU",
"INHYFU",
"MJYFU",
"SEDYFU",
"STMYFU",
"TRNYFU",
"ALCYFU"
]
drugFilterYEAR = drugFilter0 + drugYEAR
df_filteredYEAR = df.filter(items=drugFilterYEAR)
df_maskedYEAR = df_filteredYEAR.mask(df_filteredYEAR > 9800, 0)
#df_stackedYEAR = df_maskedYEAR.set_index(df.columns[0]).stack()
df_maskedYEAR.to_csv("everyDrugTheyTakeYEAR.tsv", sep="\t")
df_maskedYEAR.to_json(path_or_buf="everyDrugTheyTakeYEAR.json", orient="index")
df_masked0000 = df_maskedYEAR
df_combined = pd.concat([df_masked0000, df_masked000, df_masked00], axis=1, join='outer')
#df_combined_no0 = df_combined.set_index(df.columns[0]).stack()
df_combined.to_csv("everyDrugTheyTakeQuant_final.tsv", sep="\t")
#df_combined.to_json(path_or_buf="everyDrugTheyTakeQuant_final.json", orient="index")
# print(df_combined_no0)
#.to_csv("everyDrugTheyTake_final.tsv", sep="\t")
# Tableau Derived Statistic
"""
CORR({INCLUDE [ID] : SUM([Taken])}, {INCLUDE [ID (everyDrugTheyTake stacked.tsv1)] : SUM([Taken (everyDrugTheyTake stacked.tsv1)])})
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment