Created
July 12, 2018 07:29
-
-
Save artyomLisovskij/ced27c34b514b1017f360f760d3782c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Задача:\n", | |
"1. матрица (a1 b1 c1, a2 b2, c2, a3 b3 c3), найти детерминант матрицы для входящих параметров a1 b1 c1 a2 b2 c2 a3 b3 c3" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Для начала создадим обучающую выбурку и сохраним в файл формата csv." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Процедура для работы с файлом будет принимать множество данных, названия полей и имя файла." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import csv\n", | |
" \n", | |
"def writeCSV(fileName, data, fieldNames):\n", | |
" with open(fileName, 'w', newline='') as file:\n", | |
" writer = csv.DictWriter(file, fieldnames=fieldNames)\n", | |
" \n", | |
" writer.writeheader()\n", | |
" for i in data:\n", | |
" writer.writerow(i)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Функция для чтения данных." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def readCSV(fileName):\n", | |
" with open(fileName) as file:\n", | |
" reader = csv.DictReader(file)\n", | |
" for row in reader:\n", | |
" print(row)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Необходимо создать функцию которая будет генерировать матрицу и ответ к ней. \n", | |
"Матрица будет задаваться массивом 3x3." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import random\n", | |
"import numpy as np\n", | |
"from numpy.linalg import det as det\n", | |
"from numpy import array\n", | |
"\n", | |
"random.seed()\n", | |
"\n", | |
"def genMatrixWithDet():\n", | |
" minel = random.randint(-1000, 1000)\n", | |
" maxel = random.randint(-1000, 1000)\n", | |
" if minel > maxel:\n", | |
" minel, maxel = maxel, minel\n", | |
" matrix = array([[random.randint(minel, maxel) for v in range(3)] for val in range(3)])\n", | |
" ans = int(det(matrix))\n", | |
" \n", | |
" return matrix, ans" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(array([[-123, -59, -20],\n", | |
" [ 12, -125, -15],\n", | |
" [ -66, -116, -41]]), -310952)" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"genMatrixWithDet()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Заполним файл данными" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"titles = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'a3', 'b3','c3', 'det']\n", | |
"data = []\n", | |
"\n", | |
"for i in range(10000):\n", | |
" m, ans = genMatrixWithDet();\n", | |
" data.append({x : y for x, y in zip(titles, np.append(m.flatten(), ans))})\n", | |
" \n", | |
"writeCSV('train.csv', data, titles)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Загрузим данные" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a1</th>\n", | |
" <th>b1</th>\n", | |
" <th>c1</th>\n", | |
" <th>a2</th>\n", | |
" <th>b2</th>\n", | |
" <th>c2</th>\n", | |
" <th>a3</th>\n", | |
" <th>b3</th>\n", | |
" <th>c3</th>\n", | |
" <th>det</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-622</td>\n", | |
" <td>-665</td>\n", | |
" <td>-281</td>\n", | |
" <td>-151</td>\n", | |
" <td>-666</td>\n", | |
" <td>-239</td>\n", | |
" <td>-943</td>\n", | |
" <td>-205</td>\n", | |
" <td>-308</td>\n", | |
" <td>-48282287</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>496</td>\n", | |
" <td>202</td>\n", | |
" <td>404</td>\n", | |
" <td>161</td>\n", | |
" <td>426</td>\n", | |
" <td>384</td>\n", | |
" <td>676</td>\n", | |
" <td>492</td>\n", | |
" <td>598</td>\n", | |
" <td>-18706123</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>473</td>\n", | |
" <td>562</td>\n", | |
" <td>541</td>\n", | |
" <td>453</td>\n", | |
" <td>483</td>\n", | |
" <td>597</td>\n", | |
" <td>397</td>\n", | |
" <td>306</td>\n", | |
" <td>604</td>\n", | |
" <td>2264810</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>-769</td>\n", | |
" <td>-831</td>\n", | |
" <td>-798</td>\n", | |
" <td>-802</td>\n", | |
" <td>-741</td>\n", | |
" <td>-771</td>\n", | |
" <td>-818</td>\n", | |
" <td>-739</td>\n", | |
" <td>-700</td>\n", | |
" <td>-7556876</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>318</td>\n", | |
" <td>411</td>\n", | |
" <td>45</td>\n", | |
" <td>-37</td>\n", | |
" <td>428</td>\n", | |
" <td>14</td>\n", | |
" <td>-760</td>\n", | |
" <td>-491</td>\n", | |
" <td>-782</td>\n", | |
" <td>-105057195</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" a1 b1 c1 a2 b2 c2 a3 b3 c3 det\n", | |
"0 -622 -665 -281 -151 -666 -239 -943 -205 -308 -48282287\n", | |
"1 496 202 404 161 426 384 676 492 598 -18706123\n", | |
"2 473 562 541 453 483 597 397 306 604 2264810\n", | |
"3 -769 -831 -798 -802 -741 -771 -818 -739 -700 -7556876\n", | |
"4 318 411 45 -37 428 14 -760 -491 -782 -105057195" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from pandas import read_csv\n", | |
"\n", | |
"dataset = read_csv('train.csv', ',')\n", | |
"dataset.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Далее следует нормировать данные" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a1</th>\n", | |
" <th>b1</th>\n", | |
" <th>c1</th>\n", | |
" <th>a2</th>\n", | |
" <th>b2</th>\n", | |
" <th>c2</th>\n", | |
" <th>a3</th>\n", | |
" <th>b3</th>\n", | |
" <th>c3</th>\n", | |
" <th>det</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-0.624937</td>\n", | |
" <td>-0.668514</td>\n", | |
" <td>-0.281485</td>\n", | |
" <td>-0.146747</td>\n", | |
" <td>-0.669014</td>\n", | |
" <td>-0.242348</td>\n", | |
" <td>-0.949546</td>\n", | |
" <td>-0.205258</td>\n", | |
" <td>-0.312563</td>\n", | |
" <td>-0.222728</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.499246</td>\n", | |
" <td>0.205038</td>\n", | |
" <td>0.405921</td>\n", | |
" <td>0.167927</td>\n", | |
" <td>0.429577</td>\n", | |
" <td>0.382840</td>\n", | |
" <td>0.684157</td>\n", | |
" <td>0.499494</td>\n", | |
" <td>0.597990</td>\n", | |
" <td>-0.199345</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.476119</td>\n", | |
" <td>0.567758</td>\n", | |
" <td>0.543402</td>\n", | |
" <td>0.462431</td>\n", | |
" <td>0.486922</td>\n", | |
" <td>0.596588</td>\n", | |
" <td>0.402624</td>\n", | |
" <td>0.311426</td>\n", | |
" <td>0.604020</td>\n", | |
" <td>-0.182765</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>-0.772750</td>\n", | |
" <td>-0.835768</td>\n", | |
" <td>-0.800301</td>\n", | |
" <td>-0.803328</td>\n", | |
" <td>-0.744467</td>\n", | |
" <td>-0.776217</td>\n", | |
" <td>-0.823411</td>\n", | |
" <td>-0.745197</td>\n", | |
" <td>-0.706533</td>\n", | |
" <td>-0.190530</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0.320261</td>\n", | |
" <td>0.415617</td>\n", | |
" <td>0.045660</td>\n", | |
" <td>-0.031770</td>\n", | |
" <td>0.431590</td>\n", | |
" <td>0.011540</td>\n", | |
" <td>-0.764884</td>\n", | |
" <td>-0.494439</td>\n", | |
" <td>-0.788945</td>\n", | |
" <td>-0.267615</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" a1 b1 c1 a2 b2 c2 a3 \\\n", | |
"0 -0.624937 -0.668514 -0.281485 -0.146747 -0.669014 -0.242348 -0.949546 \n", | |
"1 0.499246 0.205038 0.405921 0.167927 0.429577 0.382840 0.684157 \n", | |
"2 0.476119 0.567758 0.543402 0.462431 0.486922 0.596588 0.402624 \n", | |
"3 -0.772750 -0.835768 -0.800301 -0.803328 -0.744467 -0.776217 -0.823411 \n", | |
"4 0.320261 0.415617 0.045660 -0.031770 0.431590 0.011540 -0.764884 \n", | |
"\n", | |
" b3 c3 det \n", | |
"0 -0.205258 -0.312563 -0.222728 \n", | |
"1 0.499494 0.597990 -0.199345 \n", | |
"2 0.311426 0.604020 -0.182765 \n", | |
"3 -0.745197 -0.706533 -0.190530 \n", | |
"4 -0.494439 -0.788945 -0.267615 " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from sklearn import preprocessing\n", | |
"from pandas import DataFrame\n", | |
"\n", | |
"x = dataset.values.astype(float)\n", | |
"min_max_scaler = preprocessing.MinMaxScaler(feature_range = (-1, 1))\n", | |
"x_scaled = min_max_scaler.fit_transform(x)\n", | |
"dataset = DataFrame(x_scaled, columns=titles)\n", | |
"dataset.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Составил тренировочный набор и тестовый" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.model_selection import train_test_split\n", | |
"\n", | |
"trg = dataset['det']\n", | |
"trn = dataset.drop('det', axis=1)\n", | |
"\n", | |
"Xtrn, Xtest, Ytrn, Ytest = train_test_split(trn, trg, test_size=0.3)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Будем использовать нейронную сеть так как зависимость нелинейная" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"MLPRegressor(activation='tanh', alpha=0.001, batch_size='auto', beta_1=0.9,\n", | |
" beta_2=0.999, early_stopping=False, epsilon=1e-08,\n", | |
" hidden_layer_sizes=(50, 9, 9), learning_rate='constant',\n", | |
" learning_rate_init=0.001, max_iter=10000, momentum=0.9,\n", | |
" nesterovs_momentum=True, power_t=0.5, random_state=None,\n", | |
" shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1,\n", | |
" verbose=False, warm_start=False)" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from sklearn.neural_network import MLPRegressor\n", | |
"\n", | |
"model = MLPRegressor(hidden_layer_sizes=(50, 9, 9), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.001)\n", | |
"model.fit(Xtrn, Ytrn)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Провем точность на тренировочных данных" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.966348868203\n", | |
"0.00819768036411\n" | |
] | |
} | |
], | |
"source": [ | |
"from sklearn.metrics import r2_score\n", | |
"from sklearn.metrics import mean_absolute_error\n", | |
"\n", | |
"print(r2_score(Ytest, model.predict(Xtest)))\n", | |
"print(mean_absolute_error(Ytest, model.predict(Xtest)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# hidden_layer_sizes=(100, 100, 100), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.001 R2=0.966 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(100, 100, 100), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.971 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(9), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.628 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(81), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.903 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(100), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.909 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(9, 9), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.80 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(9, 9, 9), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.634 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(50, 9), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.939 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(50, 40), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.947 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(81, 9), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.944 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(81, 30), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.948 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(81, 81), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.947 trn=7000 test=3000\n", | |
"#hidden_layer_sizes=(50, 9, 9), max_iter=10000, activation='tanh', solver='lbfgs', alpha=0.01 R2=0.968 trn=7000 test=3000" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Проверим результат на своем примере" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[695 751 309]\n", | |
" [598 902 865]\n", | |
" [685 893 585]]\n", | |
"-13766184\n", | |
"\n", | |
"expected = -0.195439473202\n", | |
"ans = [-0.19358827] \n", | |
"\n", | |
"\n", | |
"after inverse_transform\n", | |
"\n", | |
"expected = -13766184\n", | |
"ans = -11424692\n" | |
] | |
} | |
], | |
"source": [ | |
"m, a = genMatrixWithDet()\n", | |
"print(m)\n", | |
"print(a)\n", | |
"tmp = np.append(m.flatten(), a)\n", | |
"\n", | |
"x_y = min_max_scaler.transform(tmp.flatten().reshape(1, -1).astype(float))\n", | |
"x = np.delete(x_y, 9).reshape(1, -1)\n", | |
"\n", | |
"print('\\nexpected = ', x_y[0][9])\n", | |
"print('ans = ', model.predict(x), '\\n')\n", | |
"\n", | |
"print('\\nafter inverse_transform\\n')\n", | |
"\n", | |
"print('expected = ', a)\n", | |
"print('ans = ', int(min_max_scaler.inverse_transform(np.append(x, model.predict(x)).reshape(1, -1))[0][9]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment