Created
August 23, 2022 12:14
-
-
Save andrea-dagostino/d0608b8273bc2cdcc418ab43c8e5d97a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| df = pd.read_csv('wineQualityReds.csv') # download dataset -> https://www.kaggle.com/datasets/piyushgoyal443/red-wine-dataset | |
| # since the dataset contains the target variable in a range between 3 and 8, we map them from 1 to 5. | |
| quality_mapping = { | |
| 3: 0, | |
| 4: 1, | |
| 5: 2, | |
| 6: 3, | |
| 7: 4, | |
| 8: 5 | |
| } | |
| df.loc[:, 'quality'] = df.quality.map(quality_mapping) | |
| # split the dataset in two portions, training and test sets. | |
| # since the dataset has 1599 examples, we'll use 1000 for training and 599 for test | |
| # we use frac=1 to shuffle the data and reset the index | |
| df = df.sample(frac=1).reset_index(drop=True) | |
| df_train = df.head(1000) | |
| df_test = df.tail(599) | |
| # now we train a decision tree on the columns of interest | |
| from sklearn import tree | |
| from sklearn import metrics | |
| cols = [ | |
| 'fixed.acidity', 'volatile.acidity', 'citric.acid','residual.sugar', 'chlorides', 'free.sulfur.dioxide', | |
| 'total.sulfur.dioxide', 'density', 'pH', 'sulphates', 'alcohol', | |
| ] | |
| # train the model | |
| clf = tree.DecisionTreeClassifier(max_depth=3) | |
| clf.fit(df_train[cols], df_train.quality) | |
| # create predictions | |
| train_predictions = clf.predict(df_train[cols]) | |
| test_predictions = clf.predict(df_test[cols]) | |
| # compute accuracy | |
| train_accuracy = metrics.accuracy_score(df_train.quality, train_predictions) | |
| test_accuracy = metrics.accuracy_score(df_test.quality, test_predictions) | |
| print(f"Train accuracy: {round(train_accuracy, 3)}") | |
| print(f"Test accuracy: {round(test_accuracy, 3)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment