Created
February 9, 2017 06:47
-
-
Save girisagar46/e6582727ac3705d76c1f78f52ac58f5c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# # *CSIT Board exam percentage score predition based on data of 120 students Midterm, FinalTerm, Attendance, InternalMarking, AssignmentScores* | |
# In[ ]: | |
# some basic imports | |
# pandas for csv data reading | |
import pandas as pd | |
# We'll use tree model (DecisionTree) | |
from sklearn import tree | |
# To print the accuracy score | |
from sklearn.metrics import accuracy_score | |
# In[ ]: | |
# Reading training data | |
train_df = pd.read_csv("trainingData.csv") | |
# Reading testing data | |
test_df = pd.read_csv("testingData.csv") | |
train_df.head() | |
# In[ ]: | |
# the normalize function since our class has numerical data, and we need to convert to categorical data | |
def normalize(num): | |
if num >= 0.80: | |
return "distinction" | |
elif num >= 0.60 and num < 0.80: | |
return "first div" | |
elif num >= 0.40 and num < 0.60: | |
return "second div" | |
else: | |
return "fail" | |
# In[ ]: | |
# X_train has first 5 attributes and y_train has the class label | |
# X is 2D and y is 1D | |
X_train, y_train = train_df.iloc[:,:-1], train_df.iloc[:, -1] | |
X_test, y_test = test_df.iloc[:,:-1], test_df.iloc[:,-1] | |
# Applying normalize funxtion to out class label series data | |
y_train = y_train.apply(normalize) | |
y_test = y_test.apply(normalize) | |
print X_train.shape, y_train.shape, X_test.shape, y_test.shape | |
# In[ ]: | |
# define our tree model | |
model = tree.DecisionTreeClassifier() | |
# fit our traing data into tree mddel | |
model.fit(X_train, y_train) | |
# predict on our test data | |
predictions = model.predict(X_test) | |
# print accuracy score based on testing data and prediction data | |
print accuracy_score(y_test, predictions) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment