Skip to content

Instantly share code, notes, and snippets.

BeneID 0.000000
ClaimID 0.000000
ClaimStartDt 0.000000
ClaimEndDt 0.000000
Provider 0.000000
InscClaimAmtReimbursed 0.000000
AttendingPhysician 0.270149
OperatingPhysician 79.497538
OtherPhysician 64.218548
colors_list = ['#5bc0de','#d9534f']
plt.title("Potential Fraud distribution")
ax = patient_data['PotentialFraud'].value_counts().plot(kind='bar', figsize=(5,5), width=0.8,color = colors_list)
total = len(patient_data['PotentialFraud'])
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_height()/total)
x = p.get_x() + p.get_width() - 0.5
y = p.get_y() + p.get_height()
ax.annotate(percentage, (x, y))
plt.xlabel('Potential_Fraud')
physician_count = outpatients_['AttendingPhysician'].value_counts().to_dict()
outpatients_['physician_count']=outpatients_['AttendingPhysician'].map(physician_count)
physician_count = inpatients_['AttendingPhysician'].value_counts().to_dict()
inpatients_['physician_count']=inpatients_['AttendingPhysician'].map(physician_count)
ax = sns.countplot(y='AttendingPhysician',data=outpatients_,hue='PotentialFraud',order = outpatients_['AttendingPhysician'].value_counts().head(20).index)
d1 = outpatients_['AttendingPhysician'].value_counts().to_dict()
s_s1 = sum(list(d1.values()))
for p in ax.patches:
plt.figure(figsize=(15, 9))
sns.pointplot(x = outpatients_.physician_count, y = outpatients_.id_Count,hue=outpatients_.PotentialFraud)
plt.suptitle('Physician_attended vs Beneficiaries_count\n')
sns.pointplot(x = inpatients_.physician_count, y = inpatients_.id_Count,hue=inpatients_.PotentialFraud)
plt.suptitle('Physician_attended vs Beneficiaries_count\n')
plt.show()
inpatient_data['Admission_Date'] = pd.to_datetime(inpatient_data['AdmissionDt'] , format = '%Y-%m-%d')
inpatient_data['Discharge_Date'] = pd.to_datetime(inpatient_data['DischargeDt'],format = '%Y-%m-%d')
inpatient_data['ClaimStart_Date'] = pd.to_datetime(inpatient_data['ClaimStartDt'] , format = '%Y-%m-%d')
inpatient_data['ClaimEnd_Date'] = pd.to_datetime(inpatient_data['ClaimEndDt'],format = '%Y-%m-%d')
inpatient_data['DOB'] = pd.to_datetime(inpatient_data['DOB'] , format = '%Y-%m-%d')
inpatient_data['DOD'] = pd.to_datetime(inpatient_data['DOD'],format = '%Y-%m-%d')
inpatient_data['Age'] = round(((inpatient_data['ClaimStart_Date'] - inpatient_data['DOB']).dt.days + 1)/365.25)
age_count = inpatient_data['Age'].value_counts().to_dict()
inpatient_data['Age_Count']=inpatient_data['Age'].map(age_count)
sns.violinplot(x='PotentialFraud',y='Age', data=inpatient_data,width=0.5)
plt.xlabel('potential fraud')
plt.ylabel('Age_of_patients')
plt.title('Age_of_patients vs frauds')
sns.violinplot(x='PotentialFraud',y='Gender', data=inpatient_data,width=0.5)
plt.xlabel('potential fraud')
outpatient_data["age_group"]=pd.cut(outpatient_data.Age, [30,40,50,60,70,80,90,100])
inpatient_data["age_group"]=pd.cut(inpatient_data.Age, [30,40,50,60,70,80,90,100])
fraud_count = outpatient_data['PotentialFraud'].value_counts().to_dict()
outpatient_data['fraud_Count']=outpatient_data['PotentialFraud'].map(age_count)
s1 = inpatient_data['Gender'].value_counts()
s_s1 = sum(s1.tolist())
s2 = outpatient_data['Gender'].value_counts()
rcParams['figure.figsize'] = 5,5
sns.kdeplot(inpatient_data['InscClaimAmtReimbursed'],shade=True,color='green',legend=False)
plt.title('InscClaimAmtReimbursed in inpatient')
plt.xlabel('InscClaimAmtReimbursed')
plt.show()
rcParams['figure.figsize'] = 5,5
sns.kdeplot(outpatient_data['InscClaimAmtReimbursed'],shade=True,color='green',legend=False)
plt.title('InscClaimAmtReimbursed in Outpatient')
plt.xlabel('InscClaimAmtReimbursed')
plt.show()
s1 = outpatient_data['InscClaimAmtReimbursed'].value_counts()
s_s1 = sum(s1.tolist())
s2 = inpatient_data['InscClaimAmtReimbursed'].value_counts()
s_s2 = sum(s2.tolist())
outpatient_data["amountgrp"]=pd.cut(outpatient_data.InscClaimAmtReimbursed, [0,50,100,200,400,600,800,1000,1500,2000])
inpatient_data["amountgrp"]=pd.cut(inpatient_data.InscClaimAmtReimbursed, [0,2000,4000,6000,8000,10000,12000,14000,16000])
plt.style.use('fivethirtyeight')
counts = outpatient_data.groupby(['amountgrp', 'PotentialFraud']).InscClaimAmtReimbursed.count().unstack()