Skip to content

Instantly share code, notes, and snippets.

@skeltonmod
Created January 7, 2021 11:21
Show Gist options
  • Save skeltonmod/f747c609686affa4f73f44e11c1c9c84 to your computer and use it in GitHub Desktop.
Save skeltonmod/f747c609686affa4f73f44e11c1c9c84 to your computer and use it in GitHub Desktop.
# Let's first import the necessary packages
# For our data analysis and wrangling
import pandas as pd
import numpy as np
# for our GUI
from tkinter import *
# For our data visualization
import seaborn as sns
import matplotlib.pyplot as plt
# lastly, for our machine learning
# always hoist your valuables
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV # for tuning parameter
from sklearn import svm # for Support Vector Machine
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import metrics
ty = []
root = Tk()
root.title("Breast Cancer Predictor Expert Systems")
# Let's create the input fields for the necessary data.
fst = Entry(root, width=50, borderwidth=5)
snd = Entry(root, width=50, borderwidth=5)
trd = Entry(root, width=50, borderwidth=5)
fth = Entry(root, width=50, borderwidth=5)
ffth = Entry(root, width=50, borderwidth=5)
sxth = Entry(root, width=50, borderwidth=5)
svth = Entry(root, width=50, borderwidth=5)
eth = Entry(root, width=50, borderwidth=5)
# Let's put some labels within the corresponding input fields
mse = Label(root, text="Please enter the following data needed: ")
mse.grid(row=0, column=1)
fstl = Label(root, text="Radius_Mean")
sndl = Label(root, text="Perimeter_Mean")
trdl = Label(root, text="Area_Mean")
fthl = Label(root, text="Radius_Se")
ffthl = Label(root, text="Perimeter_Se")
sxthl = Label(root, text="Radius_Worst")
svthl = Label(root, text="Perimeter_Worst")
ethl = Label(root, text="Area_Worst")
# Let's position the input fields and labels
fst.grid(row=1, column=1)
snd.grid(row=2, column=1)
trd.grid(row=3, column=1)
fth.grid(row=4, column=1)
ffth.grid(row=5, column=1)
sxth.grid(row=6, column=1)
svth.grid(row=7, column=1)
eth.grid(row=8, column=1)
fstl.grid(row=1, column=0)
sndl.grid(row=2, column=0)
trdl.grid(row=3, column=0)
fthl.grid(row=4, column=0)
ffthl.grid(row=5, column=0)
sxthl.grid(row=6, column=0)
svthl.grid(row=7, column=0)
ethl.grid(row=8, column=0)
# Now, we create the model
# Let us now import our data and take a look
# always use relative file paths
data = pd.read_csv(r"data.csv")
print(data.head())
print(data.tail())
# We then also check for null values.
data.isnull().sum()
# and drp them
data.drop("Unnamed: 32", axis=1, inplace=True)
# Now, we find the correlation between the features, by plotting
# them in a correlation graph
features_mean = list(data.columns[2:12])
features_se = list(data.columns[12:22])
features_worst = list(data.columns[22:32])
# print(features_mean)
# print(features_se)
# print(features_worst)
# Now let's drop the columns that have correlation and high correlation.
drplist1 = ['id', 'diagnosis', 'perimeter_mean', 'radius_mean', 'compactness_mean', 'concave points_mean',
'radius_se', 'perimeter_se', 'radius_worst', 'perimeter_worst',
'compactness_worst', 'concave points_worst', 'compactness_se',
'concave points_se', 'texture_worst', 'area_worst']
x1 = data.drop(drplist1, axis=1)
# Now we do the modelling.
y = data.diagnosis
# split data train 70 % and test 30 %
x_train, x_test, y_train, y_test = train_test_split(x1, y, test_size=0.3, random_state=42)
# random forest classifier with n_estimators=10 (default)
rfne = RandomForestClassifier(random_state=43)
rfne = rfne.fit(x_train, y_train)
ac = accuracy_score(y_test, rfne.predict(x_test))
print('Accuracy is: ', ac)
cm = confusion_matrix(y_test, rfne.predict(x_test))
hm = sns.heatmap(cm, annot=True, fmt="d")
hm.get_ylim()
hm.set_ylim(2.0, 0)
def parseData(array):
# We will use these variables for our prediction
pred_var = ['radius_mean', 'perimeter_mean', 'area_mean', 'radius_se', 'perimeter_se', 'radius_worst',
'perimeter_worst', 'area_worst']
train, test = train_test_split(data, test_size=0.3) # in this our main data is split into train and test
print(train.shape)
print(test.shape)
train_X = train[pred_var] # taking the training data input
train_y = train.diagnosis # This is output of our training data
# https://medium.com/@hjhuney/implementing-a-random-forest-classification-model-in-python-583891c99652
test_X = test[pred_var] # taking test data inputs
test_y = test.diagnosis # output value of test data
rfcmodel = RandomForestClassifier(n_estimators=100) # let's try it with a simple random forest model
rfcmodel.fit(train_X, train_y) # now fit our model for training data
prediction = rfcmodel.predict(test_X) # predict for the test data
# prediction will contain the predicted value by our model predicted values of dignosis column for test inputs
# pass it as an array instead
# you're passing a 1 dimensional array, pass a 2 dimensional array instead... this is up to you Mr. Data Science
# yow = rfcmodel.predict(array)
print(f'Accuracy Check: {metrics.accuracy_score(prediction, test_y)}') # to check the accuracy
# here we will use accuracy measurement between our predicted value and our test output values
def enterCredata():
mylabel = Label(root, text="You have entered " + str(ty.copy()))
mylabel.grid(row=1, column=3)
def myClick():
# parseData()
# enterCredata()
# Abgao's retarded engineering degree
# basically loops through all the input boxes whilst skipping the empty ones to prevent ValueError
for x in root.winfo_children():
if x.winfo_class() == 'Entry':
if x.get() != '':
# push all to the array
print(x.get())
ty.append(float(x.get()))
enterCredata()
parseData(ty)
button1 = Button(root, text='Enter Credentials', command=myClick)
button1.grid(row=9, column=1)
# run the GUI
root.mainloop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment