Created
March 6, 2016 20:18
-
-
Save ledovsky/242266d70e5304f2f68f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Нормализация признаков\n", | |
"\n", | |
"## Задание курса Machine learning - неделя 2 - линейные методы" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline\n", | |
"\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"\n", | |
"from sklearn.linear_model import Perceptron\n", | |
"from sklearn.preprocessing import StandardScaler\n", | |
"from sklearn.metrics import accuracy_score" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Чтение данных" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train = pd.read_csv('perceptron-train.csv', header=None)\n", | |
"test = pd.read_csv('perceptron-test.csv', header=None)\n", | |
"X_train = train.iloc[:,1:].values\n", | |
"Y_train = train.iloc[:, 0].values\n", | |
"X_test = test.iloc[:,1:].values\n", | |
"Y_test = test.iloc[:, 0].values" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Обучение перцептрона без нормализации" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Точность без нормализации - 0.360\n" | |
] | |
} | |
], | |
"source": [ | |
"clf = Perceptron(random_state=241)\n", | |
"clf.fit(X_train, Y_train)\n", | |
"score1 = clf.score(X_test, Y_test)\n", | |
"print 'Точность без нормализации - {:.3f}'.format(score1)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Нормализация и обучение нового перцептрона" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"scaler = StandardScaler()\n", | |
"X_train_s = scaler.fit_transform(X_train)\n", | |
"X_test_s = scaler.transform(X_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Точность после нормализации - 0.925\n", | |
"Разность в результатах - 0.565\n" | |
] | |
} | |
], | |
"source": [ | |
"clf.fit(X_train_s, Y_train)\n", | |
"score2 = clf.score(X_test_s, Y_test)\n", | |
"print 'Точность после нормализации - {:.3f}'.format(score2)\n", | |
"diff = score2 - score1\n", | |
"print 'Разность в результатах - {:.3f}'.format(diff)\n", | |
"with open('ans.txt', 'w') as f:\n", | |
" f.write('{:.3f}'.format(diff))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Повторение эксперимента, если задать столбец ответов как Boolean - повышает точность работы" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"Y_train = train.iloc[:, 0].values.astype(np.bool_)\n", | |
"Y_test = test.iloc[:, 0].values.astype(np.bool_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Точность без нормализации - 0.675\n" | |
] | |
} | |
], | |
"source": [ | |
"clf = Perceptron(random_state=241)\n", | |
"clf.fit(X_train, Y_train)\n", | |
"score1 = clf.score(X_test, Y_test)\n", | |
"print 'Точность без нормализации - {:.3f}'.format(score1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Точность после нормализации - 0.970\n", | |
"Разность в результатах - 0.295\n" | |
] | |
} | |
], | |
"source": [ | |
"clf.fit(X_train_s, Y_train)\n", | |
"score2 = clf.score(X_test_s, Y_test)\n", | |
"print 'Точность после нормализации - {:.3f}'.format(score2)\n", | |
"diff = score2 - score1\n", | |
"print 'Разность в результатах - {:.3f}'.format(diff)\n", | |
"with open('ans.txt', 'w') as f:\n", | |
" f.write('{:.3f}'.format(diff))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment