Skip to content

Instantly share code, notes, and snippets.

@PranjalDureja0002
PranjalDureja0002 / eda
Created March 1, 2021 12:03
Health_Fraud
colors_list = ['#5bc0de','#d9534f']
plt.title("Potential Fraud distribution")
ax = patient_data['PotentialFraud'].value_counts().plot(kind='bar', figsize=(5,5), width=0.8,color = colors_list)
total = len(patient_data['PotentialFraud'])
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_height()/total)
x = p.get_x() + p.get_width() - 0.5
y = p.get_y() + p.get_height()
ax.annotate(percentage, (x, y))
plt.xlabel('Potential_Fraud')
How do we know whether the services billed were actually performed? Is the actual patient listed and whether the eligibility is verified? Incorrect reporting of diagnoses or procedures …Or What?
patient_data_io = pd.merge(train_d_inpatient, train_d_outpatient, left_on = [ col for col in train_d_outpatient.columns if col in train_d_inpatient.columns], \
right_on = [ col for col in train_d_outpatient.columns if col in train_d_inpatient.columns], how = 'outer')
patient_data = pd.merge(patient_data_io,train_d_beneficiary,how='inner',on='BeneID' ).\
merge(label_d_data,how='outer',on='Provider')
per_miss_values = patient_data.isnull().sum() * 100 / len(patient_data)
per_miss_values = patient_data.isnull().sum() * 100 / len(patient_data)
BeneID 0.000000
ClaimID 0.000000
ClaimStartDt 0.000000
ClaimEndDt 0.000000
Provider 0.000000
InscClaimAmtReimbursed 0.000000
AttendingPhysician 0.270149
OperatingPhysician 79.497538
OtherPhysician 64.218548
AdmissionDt 92.749337
colors_list = ['#5bc0de','#d9534f']
plt.title("Potential Fraud distribution")
ax = patient_data['PotentialFraud'].value_counts().plot(kind='bar', figsize=(5,5), width=0.8,color = colors_list)
total = len(patient_data['PotentialFraud'])
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_height()/total)
x = p.get_x() + p.get_width() - 0.5
y = p.get_y() + p.get_height()
ax.annotate(percentage, (x, y))
plt.xlabel('Potential_Fraud')
physician_count = outpatients_['AttendingPhysician'].value_counts().to_dict()
outpatients_['physician_count']=outpatients_['AttendingPhysician'].map(physician_count)
physician_count = inpatients_['AttendingPhysician'].value_counts().to_dict()
inpatients_['physician_count']=inpatients_['AttendingPhysician'].map(physician_count)
ax = sns.countplot(y='AttendingPhysician',data=outpatients_,hue='PotentialFraud',order = outpatients_['AttendingPhysician'].value_counts().head(20).index)
d1 = outpatients_['AttendingPhysician'].value_counts().to_dict()
s_s1 = sum(list(d1.values()))
for p in ax.patches:
plt.figure(figsize=(15, 9))
sns.pointplot(x = outpatients_.physician_count, y = outpatients_.id_Count,hue=outpatients_.PotentialFraud)
plt.suptitle('Physician_attended vs Beneficiaries_count\n')
sns.pointplot(x = inpatients_.physician_count, y = inpatients_.id_Count,hue=inpatients_.PotentialFraud)
plt.suptitle('Physician_attended vs Beneficiaries_count\n')
plt.show()
inpatient_data['Admission_Date'] = pd.to_datetime(inpatient_data['AdmissionDt'] , format = '%Y-%m-%d')
inpatient_data['Discharge_Date'] = pd.to_datetime(inpatient_data['DischargeDt'],format = '%Y-%m-%d')
inpatient_data['ClaimStart_Date'] = pd.to_datetime(inpatient_data['ClaimStartDt'] , format = '%Y-%m-%d')
inpatient_data['ClaimEnd_Date'] = pd.to_datetime(inpatient_data['ClaimEndDt'],format = '%Y-%m-%d')
inpatient_data['DOB'] = pd.to_datetime(inpatient_data['DOB'] , format = '%Y-%m-%d')
inpatient_data['DOD'] = pd.to_datetime(inpatient_data['DOD'],format = '%Y-%m-%d')
inpatient_data['Age'] = round(((inpatient_data['ClaimStart_Date'] - inpatient_data['DOB']).dt.days + 1)/365.25)