wmlba · February 22, 2019 09:29
diff --git a/undersampling.py b/undersampling.py
 # Shuffle the Dataset.
 shuffled_df = credit_df.sample(frac=1,random_state=4)

 # Put all the fraud class in a separate dataset.
 fraud_df = shuffled_df.loc[shuffled_df['Class'] == 1]

 #Randomly select 492 observations from the non-fraud (majority class)
 non_fraud_df = shuffled_df.loc[shuffled_df['Class'] == 0].sample(n=492,random_state=42)

 # Concatenate both dataframes again
 normalized_df = pd.concat([fraud_df, non_fraud_df])

 #plot the dataset after the undersampling
 plt.figure(figsize=(8, 8))
 sns.countplot('Class', data=normalized_df)
 plt.title('Balanced Classes')
 plt.show()
	# Shuffle the Dataset.
	shuffled_df = credit_df.sample(frac=1,random_state=4)

	# Put all the fraud class in a separate dataset.
	fraud_df = shuffled_df.loc[shuffled_df['Class'] == 1]

	#Randomly select 492 observations from the non-fraud (majority class)
	non_fraud_df = shuffled_df.loc[shuffled_df['Class'] == 0].sample(n=492,random_state=42)

	# Concatenate both dataframes again
	normalized_df = pd.concat([fraud_df, non_fraud_df])

	#plot the dataset after the undersampling
	plt.figure(figsize=(8, 8))
	sns.countplot('Class', data=normalized_df)
	plt.title('Balanced Classes')
	plt.show()