ksdkamesh99 · June 21, 2020 05:40
diff --git a/preprocessing.py b/preprocessing.py
 # load CSV file

 data=pd.read_csv('winequality_red.csv')

 #plot the Countplot for the column quality

 sns.countplot(x='quality',data=data)

 # store the quality dataframe

 quality=data['quality']

 # now if quality is less than 6.5 then it is assigned as 0 and if it is above 6.5 it is assigned to be 1

 data['quality']=pd.cut(data['quality'],bins=(2,6.5,8),labels=[0,1])

 #change the datatype of data['quality'] from category to int64

 data['quality']=data['quality'].astype('int64')

 #Now plot correlation heat map

 plt.figure(figsize=(60,30))
 sns.heatmap(data.corr(),annot=True,fmt='.2f')
 plt.show()

 # Seperate data into features and labels

 x=data.iloc[:,:-1]
 y=data.iloc[:,-1]

 #Split the data into training and testing dataset by taking train_size as 75%

 x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.75,random_state=42)
	# load CSV file

	data=pd.read_csv('winequality_red.csv')

	#plot the Countplot for the column quality

	sns.countplot(x='quality',data=data)

	# store the quality dataframe

	quality=data['quality']

	# now if quality is less than 6.5 then it is assigned as 0 and if it is above 6.5 it is assigned to be 1

	data['quality']=pd.cut(data['quality'],bins=(2,6.5,8),labels=[0,1])

	#change the datatype of data['quality'] from category to int64

	data['quality']=data['quality'].astype('int64')

	#Now plot correlation heat map

	plt.figure(figsize=(60,30))
	sns.heatmap(data.corr(),annot=True,fmt='.2f')
	plt.show()

	# Seperate data into features and labels

	x=data.iloc[:,:-1]
	y=data.iloc[:,-1]

	#Split the data into training and testing dataset by taking train_size as 75%

	x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.75,random_state=42)
No results found