Skip to content

Instantly share code, notes, and snippets.

patient_data['Claim_Start'] = pd.to_datetime(patient_data['ClaimStartDt'] , format = '%Y-%m-%d')
patient_data['Claim_End'] = pd.to_datetime(patient_data['ClaimEndDt'],format = '%Y-%m-%d')
patient_data['DOB'] = pd.to_datetime(patient_data['DOB'] , format = '%Y-%m-%d')
patient_data['DOD'] = pd.to_datetime(patient_data['DOD'],format = '%Y-%m-%d')
patient_data['Claim_Days'] = ((patient_data['Claim_End'] - patient_data['Claim_Start']).dt.days) + 1
att_physician_count = patient_data['AttendingPhysician'].value_counts().to_dict()
patient_data['attend_physician_count']=patient_data['AttendingPhysician'].map(att_physician_count)
oper_physician_count = patient_data['OperatingPhysician'].value_counts().to_dict()
patient_data['operate_physician_count']=patient_data['OperatingPhysician'].map(oper_physician_count)
ben_count = patient_data['BeneID'].value_counts().to_dict()
patient_data['BeneID_count']=patient_data['BeneID'].map(ben_count)
prov_count = patient_data['Provider'].value_counts().to_dict()
train_d_inpatient['whether_admitted'] = 1
train_d_outpatient['whether_admitted'] = 0
patient_df = pd.DataFrame(columns = ['Procedure_data'])
patient_df['Procedure_data'] = pd.concat([patient_data["ClmProcedureCode_1"],patient_data["ClmProcedureCode_2"],patient_data["ClmProcedureCode_3"],patient_data["ClmProcedureCode_4"],patient_data["ClmProcedureCode_5"],patient_data["ClmProcedureCode_6"]],axis=0)
patient_df = patient_df.dropna()
plt.figure(figsize=(10, 7))
patient_df['Procedure_data'].value_counts().head(30).plot(x=patient_df['Procedure_data'] , kind = 'bar' , color = 'purple')
plt.title('Procedure Codes vs Count')
plt.xlabel('Procedure Codes')
plt.show()
patient_df = pd.DataFrame(columns = ['Diagnosis_data'])
patient_df['Diagnosis_data'] = pd.concat([patient_data["ClmDiagnosisCode_1"],patient_data["ClmDiagnosisCode_2"],patient_data["ClmDiagnosisCode_3"],patient_data["ClmDiagnosisCode_4"],patient_data["ClmDiagnosisCode_5"],patient_data["ClmDiagnosisCode_6"],patient_data["ClmDiagnosisCode_7"],patient_data["ClmDiagnosisCode_8"],patient_data["ClmDiagnosisCode_9"],patient_data["ClmDiagnosisCode_10"]],axis=0)
patient_df = patient_df.dropna()
plt.figure(figsize=(10, 7))
patient_df['Diagnosis_data'].value_counts().head(30).plot(x=patient_df['Diagnosis_data'] , kind = 'bar' , color = 'blue')
plt.title('Diagnosis Codes vs Count')
plt.xlabel('Diagnosis Codes')
plt.show()
s1 = outpatient_data['InscClaimAmtReimbursed'].value_counts()
s_s1 = sum(s1.tolist())
s2 = inpatient_data['InscClaimAmtReimbursed'].value_counts()
s_s2 = sum(s2.tolist())
outpatient_data["amountgrp"]=pd.cut(outpatient_data.InscClaimAmtReimbursed, [0,50,100,200,400,600,800,1000,1500,2000])
inpatient_data["amountgrp"]=pd.cut(inpatient_data.InscClaimAmtReimbursed, [0,2000,4000,6000,8000,10000,12000,14000,16000])
plt.style.use('fivethirtyeight')
counts = outpatient_data.groupby(['amountgrp', 'PotentialFraud']).InscClaimAmtReimbursed.count().unstack()
rcParams['figure.figsize'] = 5,5
sns.kdeplot(outpatient_data['InscClaimAmtReimbursed'],shade=True,color='green',legend=False)
plt.title('InscClaimAmtReimbursed in Outpatient')
plt.xlabel('InscClaimAmtReimbursed')
plt.show()
rcParams['figure.figsize'] = 5,5
sns.kdeplot(inpatient_data['InscClaimAmtReimbursed'],shade=True,color='green',legend=False)
plt.title('InscClaimAmtReimbursed in inpatient')
plt.xlabel('InscClaimAmtReimbursed')
plt.show()
outpatient_data["age_group"]=pd.cut(outpatient_data.Age, [30,40,50,60,70,80,90,100])
inpatient_data["age_group"]=pd.cut(inpatient_data.Age, [30,40,50,60,70,80,90,100])
fraud_count = outpatient_data['PotentialFraud'].value_counts().to_dict()
outpatient_data['fraud_Count']=outpatient_data['PotentialFraud'].map(age_count)
s1 = inpatient_data['Gender'].value_counts()
s_s1 = sum(s1.tolist())
s2 = outpatient_data['Gender'].value_counts()
age_count = inpatient_data['Age'].value_counts().to_dict()
inpatient_data['Age_Count']=inpatient_data['Age'].map(age_count)
sns.violinplot(x='PotentialFraud',y='Age', data=inpatient_data,width=0.5)
plt.xlabel('potential fraud')
plt.ylabel('Age_of_patients')
plt.title('Age_of_patients vs frauds')
sns.violinplot(x='PotentialFraud',y='Gender', data=inpatient_data,width=0.5)
plt.xlabel('potential fraud')