Created
June 4, 2020 08:15
-
-
Save yongbin/f46262f49f95bfd84eb1d9c6dc8a49f1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# 패키지 불러오기" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"import seaborn as sns\n", | |
"import datetime\n", | |
"from xgboost import XGBClassifier\n", | |
"from sklearn.metrics import accuracy_score" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# 데이터 불러오기" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x_train = pd.read_csv('X_train.csv')\n", | |
"y_train = pd.read_csv('Y_train.csv')\n", | |
"x_test = pd.read_csv('X_test.csv')\n", | |
"y_test = pd.read_csv('Y_test.csv')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 평가 데이터 제작" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"Y_train = y_train['gender']\n", | |
"Y_test = y_test['gender']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 데이터 합치기" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>date_time</th>\n", | |
" <th>store</th>\n", | |
" <th>product</th>\n", | |
" <th>brand</th>\n", | |
" <th>corner</th>\n", | |
" <th>pc</th>\n", | |
" <th>part</th>\n", | |
" <th>imported</th>\n", | |
" <th>amount</th>\n", | |
" <th>discount</th>\n", | |
" <th>installment</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>2000-06-25 12:12</td>\n", | |
" <td>무역점</td>\n", | |
" <td>2116050008000</td>\n", | |
" <td>에스티로더</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>1</td>\n", | |
" <td>90000</td>\n", | |
" <td>9000</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>2000-06-25 12:42</td>\n", | |
" <td>무역점</td>\n", | |
" <td>4125440008000</td>\n", | |
" <td>시슬리</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>1</td>\n", | |
" <td>39000</td>\n", | |
" <td>3900</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>2000-08-26 18:10</td>\n", | |
" <td>본점</td>\n", | |
" <td>2116052008000</td>\n", | |
" <td>크리니크</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>잡화파트</td>\n", | |
" <td>1</td>\n", | |
" <td>175000</td>\n", | |
" <td>17500</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>2000-08-26 18:30</td>\n", | |
" <td>본점</td>\n", | |
" <td>4106430119900</td>\n", | |
" <td>듀퐁</td>\n", | |
" <td>수입의류</td>\n", | |
" <td>명품토탈</td>\n", | |
" <td>잡화파트</td>\n", | |
" <td>1</td>\n", | |
" <td>455000</td>\n", | |
" <td>45500</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>2000-09-03 18:02</td>\n", | |
" <td>무역점</td>\n", | |
" <td>2139141008000</td>\n", | |
" <td>랑콤</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>0</td>\n", | |
" <td>100000</td>\n", | |
" <td>10000</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid date_time store product brand corner pc part \\\n", | |
"0 0 2000-06-25 12:12 무역점 2116050008000 에스티로더 수입종합화장품 화장품 명품잡화 \n", | |
"1 0 2000-06-25 12:42 무역점 4125440008000 시슬리 수입종합화장품 화장품 명품잡화 \n", | |
"2 0 2000-08-26 18:10 본점 2116052008000 크리니크 수입종합화장품 화장품 잡화파트 \n", | |
"3 0 2000-08-26 18:30 본점 4106430119900 듀퐁 수입의류 명품토탈 잡화파트 \n", | |
"4 0 2000-09-03 18:02 무역점 2139141008000 랑콤 수입종합화장품 화장품 명품잡화 \n", | |
"\n", | |
" imported amount discount installment \n", | |
"0 1 90000 9000 3 \n", | |
"1 1 39000 3900 1 \n", | |
"2 1 175000 17500 3 \n", | |
"3 1 455000 45500 3 \n", | |
"4 0 100000 10000 3 " | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_test = pd.concat([x_train, x_test], ignore_index = True)\n", | |
"train_test.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 데이터 변환" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"train_test['date'] = train_test['date_time'].str[:10]\n", | |
"train_test['time'] = train_test['date_time'].str[11:13].astype('int')\n", | |
"del train_test['date_time']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>store</th>\n", | |
" <th>product</th>\n", | |
" <th>brand</th>\n", | |
" <th>corner</th>\n", | |
" <th>pc</th>\n", | |
" <th>part</th>\n", | |
" <th>imported</th>\n", | |
" <th>amount</th>\n", | |
" <th>discount</th>\n", | |
" <th>installment</th>\n", | |
" <th>date</th>\n", | |
" <th>time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>무역점</td>\n", | |
" <td>2116050008000</td>\n", | |
" <td>에스티로더</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>1</td>\n", | |
" <td>90000</td>\n", | |
" <td>9000</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-06-25</td>\n", | |
" <td>12</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>무역점</td>\n", | |
" <td>4125440008000</td>\n", | |
" <td>시슬리</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>1</td>\n", | |
" <td>39000</td>\n", | |
" <td>3900</td>\n", | |
" <td>1</td>\n", | |
" <td>2000-06-25</td>\n", | |
" <td>12</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>본점</td>\n", | |
" <td>2116052008000</td>\n", | |
" <td>크리니크</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>잡화파트</td>\n", | |
" <td>1</td>\n", | |
" <td>175000</td>\n", | |
" <td>17500</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-08-26</td>\n", | |
" <td>18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>본점</td>\n", | |
" <td>4106430119900</td>\n", | |
" <td>듀퐁</td>\n", | |
" <td>수입의류</td>\n", | |
" <td>명품토탈</td>\n", | |
" <td>잡화파트</td>\n", | |
" <td>1</td>\n", | |
" <td>455000</td>\n", | |
" <td>45500</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-08-26</td>\n", | |
" <td>18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>무역점</td>\n", | |
" <td>2139141008000</td>\n", | |
" <td>랑콤</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>0</td>\n", | |
" <td>100000</td>\n", | |
" <td>10000</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-09-03</td>\n", | |
" <td>18</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid store product brand corner pc part imported amount \\\n", | |
"0 0 무역점 2116050008000 에스티로더 수입종합화장품 화장품 명품잡화 1 90000 \n", | |
"1 0 무역점 4125440008000 시슬리 수입종합화장품 화장품 명품잡화 1 39000 \n", | |
"2 0 본점 2116052008000 크리니크 수입종합화장품 화장품 잡화파트 1 175000 \n", | |
"3 0 본점 4106430119900 듀퐁 수입의류 명품토탈 잡화파트 1 455000 \n", | |
"4 0 무역점 2139141008000 랑콤 수입종합화장품 화장품 명품잡화 0 100000 \n", | |
"\n", | |
" discount installment date time \n", | |
"0 9000 3 2000-06-25 12 \n", | |
"1 3900 1 2000-06-25 12 \n", | |
"2 17500 3 2000-08-26 18 \n", | |
"3 45500 3 2000-08-26 18 \n", | |
"4 10000 3 2000-09-03 18 " | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_test.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def ndiscount(x):\n", | |
" if x != 0:\n", | |
" return 1\n", | |
" else:\n", | |
" return 0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"train_test['ndiscount'] = train_test['discount'].apply(ndiscount)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>store</th>\n", | |
" <th>product</th>\n", | |
" <th>brand</th>\n", | |
" <th>corner</th>\n", | |
" <th>pc</th>\n", | |
" <th>part</th>\n", | |
" <th>imported</th>\n", | |
" <th>amount</th>\n", | |
" <th>discount</th>\n", | |
" <th>installment</th>\n", | |
" <th>date</th>\n", | |
" <th>time</th>\n", | |
" <th>ndiscount</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>무역점</td>\n", | |
" <td>2116050008000</td>\n", | |
" <td>에스티로더</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>1</td>\n", | |
" <td>90000</td>\n", | |
" <td>9000</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-06-25</td>\n", | |
" <td>12</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>무역점</td>\n", | |
" <td>4125440008000</td>\n", | |
" <td>시슬리</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>1</td>\n", | |
" <td>39000</td>\n", | |
" <td>3900</td>\n", | |
" <td>1</td>\n", | |
" <td>2000-06-25</td>\n", | |
" <td>12</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>본점</td>\n", | |
" <td>2116052008000</td>\n", | |
" <td>크리니크</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>잡화파트</td>\n", | |
" <td>1</td>\n", | |
" <td>175000</td>\n", | |
" <td>17500</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-08-26</td>\n", | |
" <td>18</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>본점</td>\n", | |
" <td>4106430119900</td>\n", | |
" <td>듀퐁</td>\n", | |
" <td>수입의류</td>\n", | |
" <td>명품토탈</td>\n", | |
" <td>잡화파트</td>\n", | |
" <td>1</td>\n", | |
" <td>455000</td>\n", | |
" <td>45500</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-08-26</td>\n", | |
" <td>18</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>무역점</td>\n", | |
" <td>2139141008000</td>\n", | |
" <td>랑콤</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>0</td>\n", | |
" <td>100000</td>\n", | |
" <td>10000</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-09-03</td>\n", | |
" <td>18</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid store product brand corner pc part imported amount \\\n", | |
"0 0 무역점 2116050008000 에스티로더 수입종합화장품 화장품 명품잡화 1 90000 \n", | |
"1 0 무역점 4125440008000 시슬리 수입종합화장품 화장품 명품잡화 1 39000 \n", | |
"2 0 본점 2116052008000 크리니크 수입종합화장품 화장품 잡화파트 1 175000 \n", | |
"3 0 본점 4106430119900 듀퐁 수입의류 명품토탈 잡화파트 1 455000 \n", | |
"4 0 무역점 2139141008000 랑콤 수입종합화장품 화장품 명품잡화 0 100000 \n", | |
"\n", | |
" discount installment date time ndiscount \n", | |
"0 9000 3 2000-06-25 12 1 \n", | |
"1 3900 1 2000-06-25 12 1 \n", | |
"2 17500 3 2000-08-26 18 1 \n", | |
"3 45500 3 2000-08-26 18 1 \n", | |
"4 10000 3 2000-09-03 18 1 " | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_test.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 데이터 분할" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = []\n", | |
"y = []" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for f in train_test.index:\n", | |
" if train_test['amount'][f] < 0:\n", | |
" x.append(f)\n", | |
" else :\n", | |
" y.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"train_test_purchase = train_test.drop(train_test.index[x])\n", | |
"train_test_refund = train_test.drop(train_test.index[y])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>store</th>\n", | |
" <th>product</th>\n", | |
" <th>brand</th>\n", | |
" <th>corner</th>\n", | |
" <th>pc</th>\n", | |
" <th>part</th>\n", | |
" <th>imported</th>\n", | |
" <th>amount</th>\n", | |
" <th>discount</th>\n", | |
" <th>installment</th>\n", | |
" <th>date</th>\n", | |
" <th>time</th>\n", | |
" <th>ndiscount</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>무역점</td>\n", | |
" <td>2116050008000</td>\n", | |
" <td>에스티로더</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>1</td>\n", | |
" <td>90000</td>\n", | |
" <td>9000</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-06-25</td>\n", | |
" <td>12</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>무역점</td>\n", | |
" <td>4125440008000</td>\n", | |
" <td>시슬리</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>1</td>\n", | |
" <td>39000</td>\n", | |
" <td>3900</td>\n", | |
" <td>1</td>\n", | |
" <td>2000-06-25</td>\n", | |
" <td>12</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>본점</td>\n", | |
" <td>2116052008000</td>\n", | |
" <td>크리니크</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>잡화파트</td>\n", | |
" <td>1</td>\n", | |
" <td>175000</td>\n", | |
" <td>17500</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-08-26</td>\n", | |
" <td>18</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>본점</td>\n", | |
" <td>4106430119900</td>\n", | |
" <td>듀퐁</td>\n", | |
" <td>수입의류</td>\n", | |
" <td>명품토탈</td>\n", | |
" <td>잡화파트</td>\n", | |
" <td>1</td>\n", | |
" <td>455000</td>\n", | |
" <td>45500</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-08-26</td>\n", | |
" <td>18</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>무역점</td>\n", | |
" <td>2139141008000</td>\n", | |
" <td>랑콤</td>\n", | |
" <td>수입종합화장품</td>\n", | |
" <td>화장품</td>\n", | |
" <td>명품잡화</td>\n", | |
" <td>0</td>\n", | |
" <td>100000</td>\n", | |
" <td>10000</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-09-03</td>\n", | |
" <td>18</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid store product brand corner pc part imported amount \\\n", | |
"0 0 무역점 2116050008000 에스티로더 수입종합화장품 화장품 명품잡화 1 90000 \n", | |
"1 0 무역점 4125440008000 시슬리 수입종합화장품 화장품 명품잡화 1 39000 \n", | |
"2 0 본점 2116052008000 크리니크 수입종합화장품 화장품 잡화파트 1 175000 \n", | |
"3 0 본점 4106430119900 듀퐁 수입의류 명품토탈 잡화파트 1 455000 \n", | |
"4 0 무역점 2139141008000 랑콤 수입종합화장품 화장품 명품잡화 0 100000 \n", | |
"\n", | |
" discount installment date time ndiscount \n", | |
"0 9000 3 2000-06-25 12 1 \n", | |
"1 3900 1 2000-06-25 12 1 \n", | |
"2 17500 3 2000-08-26 18 1 \n", | |
"3 45500 3 2000-08-26 18 1 \n", | |
"4 10000 3 2000-09-03 18 1 " | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_test_purchase.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>store</th>\n", | |
" <th>product</th>\n", | |
" <th>brand</th>\n", | |
" <th>corner</th>\n", | |
" <th>pc</th>\n", | |
" <th>part</th>\n", | |
" <th>imported</th>\n", | |
" <th>amount</th>\n", | |
" <th>discount</th>\n", | |
" <th>installment</th>\n", | |
" <th>date</th>\n", | |
" <th>time</th>\n", | |
" <th>ndiscount</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>1</td>\n", | |
" <td>본점</td>\n", | |
" <td>4234190015074</td>\n", | |
" <td>바바라</td>\n", | |
" <td>란제리</td>\n", | |
" <td>내의란제리</td>\n", | |
" <td>케주얼,구두,아동</td>\n", | |
" <td>1</td>\n", | |
" <td>-35000</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2000-06-17</td>\n", | |
" <td>12</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>1</td>\n", | |
" <td>본점</td>\n", | |
" <td>4229811011200</td>\n", | |
" <td>시슬리</td>\n", | |
" <td>영트랜드</td>\n", | |
" <td>영트렌디</td>\n", | |
" <td>케주얼,구두,아동</td>\n", | |
" <td>0</td>\n", | |
" <td>-73000</td>\n", | |
" <td>-3650</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-06-30</td>\n", | |
" <td>11</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>39</th>\n", | |
" <td>2</td>\n", | |
" <td>무역점</td>\n", | |
" <td>4301000017000</td>\n", | |
" <td>노티카</td>\n", | |
" <td>트래디셔널</td>\n", | |
" <td>트래디셔널</td>\n", | |
" <td>골프/유니캐쥬얼</td>\n", | |
" <td>0</td>\n", | |
" <td>-434500</td>\n", | |
" <td>-43450</td>\n", | |
" <td>1</td>\n", | |
" <td>2000-08-27</td>\n", | |
" <td>19</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>46</th>\n", | |
" <td>2</td>\n", | |
" <td>무역점</td>\n", | |
" <td>4502161930200</td>\n", | |
" <td>삼성</td>\n", | |
" <td>가전특정</td>\n", | |
" <td>가전</td>\n", | |
" <td>가정용품</td>\n", | |
" <td>0</td>\n", | |
" <td>-1416000</td>\n", | |
" <td>0</td>\n", | |
" <td>6</td>\n", | |
" <td>2001-01-03</td>\n", | |
" <td>10</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>60</th>\n", | |
" <td>3</td>\n", | |
" <td>천호점</td>\n", | |
" <td>4405620111000</td>\n", | |
" <td>지오다노</td>\n", | |
" <td>영캐쥬얼</td>\n", | |
" <td>영트랜디</td>\n", | |
" <td>영라이브</td>\n", | |
" <td>0</td>\n", | |
" <td>-74600</td>\n", | |
" <td>-3730</td>\n", | |
" <td>3</td>\n", | |
" <td>2000-08-27</td>\n", | |
" <td>11</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid store product brand corner pc part imported \\\n", | |
"18 1 본점 4234190015074 바바라 란제리 내의란제리 케주얼,구두,아동 1 \n", | |
"21 1 본점 4229811011200 시슬리 영트랜드 영트렌디 케주얼,구두,아동 0 \n", | |
"39 2 무역점 4301000017000 노티카 트래디셔널 트래디셔널 골프/유니캐쥬얼 0 \n", | |
"46 2 무역점 4502161930200 삼성 가전특정 가전 가정용품 0 \n", | |
"60 3 천호점 4405620111000 지오다노 영캐쥬얼 영트랜디 영라이브 0 \n", | |
"\n", | |
" amount discount installment date time ndiscount \n", | |
"18 -35000 0 1 2000-06-17 12 0 \n", | |
"21 -73000 -3650 3 2000-06-30 11 1 \n", | |
"39 -434500 -43450 1 2000-08-27 19 1 \n", | |
"46 -1416000 0 6 2001-01-03 10 0 \n", | |
"60 -74600 -3730 3 2000-08-27 11 1 " | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_test_refund.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# features 제작" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features = []" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 1.총구매액" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>총구매액</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>1742000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>2880100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>5601350</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>2996100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>1045000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 총구매액\n", | |
"0 0 1742000\n", | |
"1 1 2880100\n", | |
"2 2 5601350\n", | |
"3 3 2996100\n", | |
"4 4 1045000" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = train_test_purchase.groupby('custid')['amount'].agg([('총구매액', 'sum')])\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 2.구매건수" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>구매건수</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>24</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>28</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 구매건수\n", | |
"0 0 11\n", | |
"1 1 24\n", | |
"2 2 9\n", | |
"3 3 28\n", | |
"4 4 4" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = train_test_purchase.groupby('custid')['amount'].agg([('구매건수', 'count')])\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 3.환불건수" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>환불건수</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>8</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 환불건수\n", | |
"0 1 2\n", | |
"1 2 2\n", | |
"2 3 2\n", | |
"3 6 2\n", | |
"4 8 2" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = train_test_refund.groupby('custid')['amount'].agg([('환불건수', 'count')]).astype('int')\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 4.평균 구매가격" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>평균구매가격</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>158363.636364</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>120004.166667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>622372.222222</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>107003.571429</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>261250.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 평균구매가격\n", | |
"0 0 158363.636364\n", | |
"1 1 120004.166667\n", | |
"2 2 622372.222222\n", | |
"3 3 107003.571429\n", | |
"4 4 261250.000000" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = train_test_purchase.groupby('custid')['amount'].agg([('평균구매가격', 'mean')])\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 5.평균 할부개월" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>평균할부개월</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>2.818182</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>2.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>3.444444</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>2.571429</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>4.500000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 평균할부개월\n", | |
"0 0 2.818182\n", | |
"1 1 2.500000\n", | |
"2 2 3.444444\n", | |
"3 3 2.571429\n", | |
"4 4 4.500000" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = train_test_purchase.groupby('custid')['installment'].agg([('평균할부개월','mean')])\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 6.브랜드 다양성" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1900" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"n_brand = train_test_purchase['brand'].nunique()\n", | |
"n_brand" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>브랜드다양성</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0.003684</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>0.010000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>0.003684</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>0.011053</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>0.002105</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 브랜드다양성\n", | |
"0 0 0.003684\n", | |
"1 1 0.010000\n", | |
"2 2 0.003684\n", | |
"3 3 0.011053\n", | |
"4 4 0.002105" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"def brand(x):\n", | |
" return x.nunique() / n_brand\n", | |
"\n", | |
"f = train_test_purchase.groupby('custid')['brand'].agg([('브랜드다양성', brand)])\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 7.내점 일수" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>내점일수</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>12</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 내점일수\n", | |
"0 0 7\n", | |
"1 1 16\n", | |
"2 2 7\n", | |
"3 3 12\n", | |
"4 4 2" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = train_test_purchase.groupby('custid')['date'].agg([('내점일수','nunique')])\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 8.평균 할인금액" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>평균할인금액</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>15836.363636</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>2511.666667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>33171.111111</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>4515.714286</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>5450.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 평균할인금액\n", | |
"0 0 15836.363636\n", | |
"1 1 2511.666667\n", | |
"2 2 33171.111111\n", | |
"3 3 4515.714286\n", | |
"4 4 5450.000000" | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = train_test_purchase.groupby('custid')['discount'].agg([('평균할인금액','mean')])\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 9.파트별 구매건수" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>part</th>\n", | |
" <th>custid</th>\n", | |
" <th>가정용품</th>\n", | |
" <th>가정용품파트</th>\n", | |
" <th>골프/유니캐쥬얼</th>\n", | |
" <th>공산품</th>\n", | |
" <th>공산품파트</th>\n", | |
" <th>남성의류</th>\n", | |
" <th>남성정장스포츠</th>\n", | |
" <th>로얄부띠끄</th>\n", | |
" <th>로얄부틱</th>\n", | |
" <th>...</th>\n", | |
" <th>여성캐쥬얼</th>\n", | |
" <th>영라이브</th>\n", | |
" <th>영어덜트캐쥬얼</th>\n", | |
" <th>영캐릭터</th>\n", | |
" <th>영플라자</th>\n", | |
" <th>인터넷백화점</th>\n", | |
" <th>잡화</th>\n", | |
" <th>잡화파트</th>\n", | |
" <th>케주얼,구두,아동</th>\n", | |
" <th>패션잡화</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.090909</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.090909</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.545455</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>0.041667</td>\n", | |
" <td>0.041667</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.041667</td>\n", | |
" <td>0.25</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.208333</td>\n", | |
" <td>0.25</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.222222</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.111111</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.111111</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.111111</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>0.071429</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.107143</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.107143</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.214286</td>\n", | |
" <td>0.071429</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 32 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"part custid 가정용품 가정용품파트 골프/유니캐쥬얼 공산품 공산품파트 남성의류 \\\n", | |
"0 0 0.000000 0.000000 0.090909 0.000000 0.0 0.090909 \n", | |
"1 1 0.041667 0.041667 0.000000 0.000000 0.0 0.000000 \n", | |
"2 2 0.333333 0.000000 0.222222 0.000000 0.0 0.111111 \n", | |
"3 3 0.071429 0.000000 0.000000 0.107143 0.0 0.107143 \n", | |
"4 4 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 \n", | |
"\n", | |
"part 남성정장스포츠 로얄부띠끄 로얄부틱 ... 여성캐쥬얼 영라이브 영어덜트캐쥬얼 영캐릭터 영플라자 \\\n", | |
"0 0.000000 0.00 0.0 ... 0.0 0.000000 0.000000 0.0 0.0 \n", | |
"1 0.041667 0.25 0.0 ... 0.0 0.000000 0.000000 0.0 0.0 \n", | |
"2 0.000000 0.00 0.0 ... 0.0 0.000000 0.111111 0.0 0.0 \n", | |
"3 0.000000 0.00 0.0 ... 0.0 0.214286 0.071429 0.0 0.0 \n", | |
"4 0.000000 0.00 0.0 ... 0.0 0.000000 0.000000 0.0 0.0 \n", | |
"\n", | |
"part 인터넷백화점 잡화 잡화파트 케주얼,구두,아동 패션잡화 \n", | |
"0 0.0 0.000000 0.545455 0.00 0.0 \n", | |
"1 0.0 0.000000 0.208333 0.25 0.0 \n", | |
"2 0.0 0.111111 0.000000 0.00 0.0 \n", | |
"3 0.0 0.000000 0.000000 0.00 0.0 \n", | |
"4 0.0 0.000000 0.000000 0.00 0.0 \n", | |
"\n", | |
"[5 rows x 32 columns]" | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = pd.pivot_table(train_test_purchase, index='custid', columns='part', values='amount', aggfunc='count', fill_value=0)\n", | |
"\n", | |
"f = f.div(f.sum(axis = 1), axis = 0).fillna(0)\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 10.오전/오후 구매비율" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def time(x):\n", | |
" if x >= 12:\n", | |
" return 1\n", | |
" else :\n", | |
" return 0\n", | |
" \n", | |
"train_test_purchase['time'] = train_test_purchase['time'].apply(time)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>오전</th>\n", | |
" <th>오후</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>0.125000</td>\n", | |
" <td>0.875000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>0.222222</td>\n", | |
" <td>0.777778</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>0.071429</td>\n", | |
" <td>0.928571</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 오전 오후\n", | |
"0 0 0.000000 1.000000\n", | |
"1 1 0.125000 0.875000\n", | |
"2 2 0.222222 0.777778\n", | |
"3 3 0.071429 0.928571\n", | |
"4 4 0.000000 1.000000" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = pd.pivot_table(train_test_purchase, index='custid', columns='time', values='amount', aggfunc='count', fill_value=0)\n", | |
"\n", | |
"f.columns = ['오전', '오후']\n", | |
"\n", | |
"f = f.div(f.sum(axis = 1), axis = 0).fillna(0)\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 11.할인건수" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>할인건수</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 할인건수\n", | |
"0 0 11\n", | |
"1 1 9\n", | |
"2 2 7\n", | |
"3 3 18\n", | |
"4 4 2" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = train_test_purchase.groupby('custid')['ndiscount'].agg([('할인건수', 'sum')])\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 12.할부건수" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def isins(x):\n", | |
" if x > 0:\n", | |
" return 1\n", | |
" else:\n", | |
" return 0\n", | |
" \n", | |
"train_test_purchase['ins'] = train_test_purchase['installment'].apply(isins)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>할부건수</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>24</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>28</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 할부건수\n", | |
"0 0 11\n", | |
"1 1 24\n", | |
"2 2 9\n", | |
"3 3 28\n", | |
"4 4 4" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = train_test_purchase.groupby('custid')['ins'].agg([('할부건수', 'sum')])\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 13.계절별 구매건수" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"train_test_purchase['year'] = train_test_purchase['date'].str[:4].astype('int')\n", | |
"train_test_purchase['month'] = train_test_purchase['date'].str[5:7].astype('int')\n", | |
"train_test_purchase['day'] = train_test_purchase['date'].str[8:10].astype('int')\n", | |
"\n", | |
"def ismonth(x):\n", | |
" if x >= 3 and x <= 4:\n", | |
" return '봄'\n", | |
" elif x >= 5 and x <= 8:\n", | |
" return '여름'\n", | |
" elif x >= 9 and x <= 10:\n", | |
" return '가을'\n", | |
" else:\n", | |
" return '겨울'\n", | |
" \n", | |
"train_test_purchase['season'] = train_test_purchase['month'].apply(ismonth)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>season</th>\n", | |
" <th>custid</th>\n", | |
" <th>가을</th>\n", | |
" <th>겨울</th>\n", | |
" <th>봄</th>\n", | |
" <th>여름</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0.272727</td>\n", | |
" <td>0.272727</td>\n", | |
" <td>0.090909</td>\n", | |
" <td>0.363636</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>0.083333</td>\n", | |
" <td>0.208333</td>\n", | |
" <td>0.208333</td>\n", | |
" <td>0.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>0.444444</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.222222</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>0.107143</td>\n", | |
" <td>0.214286</td>\n", | |
" <td>0.214286</td>\n", | |
" <td>0.464286</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"season custid 가을 겨울 봄 여름\n", | |
"0 0 0.272727 0.272727 0.090909 0.363636\n", | |
"1 1 0.083333 0.208333 0.208333 0.500000\n", | |
"2 2 0.444444 0.333333 0.000000 0.222222\n", | |
"3 3 0.107143 0.214286 0.214286 0.464286\n", | |
"4 4 0.000000 0.000000 0.000000 1.000000" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = pd.pivot_table(train_test_purchase, index='custid', columns='season', values='amount', aggfunc='count', fill_value=0)\n", | |
"\n", | |
"f = f.div(f.sum(axis=1), axis=0).fillna(0)\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 14.주말별 구매건수" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"train_test_purchase['date'] = pd.to_datetime(train_test_purchase['date'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def week(x):\n", | |
" return x.dayofweek\n", | |
"\n", | |
"train_test_purchase['week'] = train_test_purchase['date'].apply(week)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def weekend(x):\n", | |
" if x >= 5:\n", | |
" return 1\n", | |
" else:\n", | |
" return 0\n", | |
" \n", | |
"train_test_purchase['week'] = train_test_purchase['week'].apply(weekend)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>주말</th>\n", | |
" <th>평일</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0.363636</td>\n", | |
" <td>0.636364</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>0.500000</td>\n", | |
" <td>0.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>0.666667</td>\n", | |
" <td>0.333333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>0.642857</td>\n", | |
" <td>0.357143</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>0.750000</td>\n", | |
" <td>0.250000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 주말 평일\n", | |
"0 0 0.363636 0.636364\n", | |
"1 1 0.500000 0.500000\n", | |
"2 2 0.666667 0.333333\n", | |
"3 3 0.642857 0.357143\n", | |
"4 4 0.750000 0.250000" | |
] | |
}, | |
"execution_count": 50, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"f = pd.pivot_table(train_test_purchase, index = 'custid', columns = 'week', values = 'amount', aggfunc = 'count', fill_value=0)\n", | |
"\n", | |
"f.columns = ['주말','평일']\n", | |
"\n", | |
"f = f.div(f.sum(axis=1), axis=0).fillna(0)\n", | |
"\n", | |
"f = f.reset_index()\n", | |
"f.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"features.append(f)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# features 합치기" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>총구매액</th>\n", | |
" <th>구매건수</th>\n", | |
" <th>환불건수</th>\n", | |
" <th>평균구매가격</th>\n", | |
" <th>평균할부개월</th>\n", | |
" <th>브랜드다양성</th>\n", | |
" <th>내점일수</th>\n", | |
" <th>평균할인금액</th>\n", | |
" <th>가정용품</th>\n", | |
" <th>...</th>\n", | |
" <th>오전</th>\n", | |
" <th>오후</th>\n", | |
" <th>할인건수</th>\n", | |
" <th>할부건수</th>\n", | |
" <th>가을</th>\n", | |
" <th>겨울</th>\n", | |
" <th>봄</th>\n", | |
" <th>여름</th>\n", | |
" <th>주말</th>\n", | |
" <th>평일</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>1742000</td>\n", | |
" <td>11</td>\n", | |
" <td>0.0</td>\n", | |
" <td>158363.636364</td>\n", | |
" <td>2.818182</td>\n", | |
" <td>0.003684</td>\n", | |
" <td>7</td>\n", | |
" <td>15836.363636</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>11</td>\n", | |
" <td>11</td>\n", | |
" <td>0.272727</td>\n", | |
" <td>0.272727</td>\n", | |
" <td>0.090909</td>\n", | |
" <td>0.363636</td>\n", | |
" <td>0.363636</td>\n", | |
" <td>0.636364</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>2880100</td>\n", | |
" <td>24</td>\n", | |
" <td>2.0</td>\n", | |
" <td>120004.166667</td>\n", | |
" <td>2.500000</td>\n", | |
" <td>0.010000</td>\n", | |
" <td>16</td>\n", | |
" <td>2511.666667</td>\n", | |
" <td>0.041667</td>\n", | |
" <td>...</td>\n", | |
" <td>0.125000</td>\n", | |
" <td>0.875000</td>\n", | |
" <td>9</td>\n", | |
" <td>24</td>\n", | |
" <td>0.083333</td>\n", | |
" <td>0.208333</td>\n", | |
" <td>0.208333</td>\n", | |
" <td>0.500000</td>\n", | |
" <td>0.500000</td>\n", | |
" <td>0.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>5601350</td>\n", | |
" <td>9</td>\n", | |
" <td>2.0</td>\n", | |
" <td>622372.222222</td>\n", | |
" <td>3.444444</td>\n", | |
" <td>0.003684</td>\n", | |
" <td>7</td>\n", | |
" <td>33171.111111</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>...</td>\n", | |
" <td>0.222222</td>\n", | |
" <td>0.777778</td>\n", | |
" <td>7</td>\n", | |
" <td>9</td>\n", | |
" <td>0.444444</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.222222</td>\n", | |
" <td>0.666667</td>\n", | |
" <td>0.333333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>2996100</td>\n", | |
" <td>28</td>\n", | |
" <td>2.0</td>\n", | |
" <td>107003.571429</td>\n", | |
" <td>2.571429</td>\n", | |
" <td>0.011053</td>\n", | |
" <td>12</td>\n", | |
" <td>4515.714286</td>\n", | |
" <td>0.071429</td>\n", | |
" <td>...</td>\n", | |
" <td>0.071429</td>\n", | |
" <td>0.928571</td>\n", | |
" <td>18</td>\n", | |
" <td>28</td>\n", | |
" <td>0.107143</td>\n", | |
" <td>0.214286</td>\n", | |
" <td>0.214286</td>\n", | |
" <td>0.464286</td>\n", | |
" <td>0.642857</td>\n", | |
" <td>0.357143</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>1045000</td>\n", | |
" <td>4</td>\n", | |
" <td>0.0</td>\n", | |
" <td>261250.000000</td>\n", | |
" <td>4.500000</td>\n", | |
" <td>0.002105</td>\n", | |
" <td>2</td>\n", | |
" <td>5450.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.750000</td>\n", | |
" <td>0.250000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 50 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 총구매액 구매건수 환불건수 평균구매가격 평균할부개월 브랜드다양성 내점일수 \\\n", | |
"0 0 1742000 11 0.0 158363.636364 2.818182 0.003684 7 \n", | |
"1 1 2880100 24 2.0 120004.166667 2.500000 0.010000 16 \n", | |
"2 2 5601350 9 2.0 622372.222222 3.444444 0.003684 7 \n", | |
"3 3 2996100 28 2.0 107003.571429 2.571429 0.011053 12 \n", | |
"4 4 1045000 4 0.0 261250.000000 4.500000 0.002105 2 \n", | |
"\n", | |
" 평균할인금액 가정용품 ... 오전 오후 할인건수 할부건수 가을 \\\n", | |
"0 15836.363636 0.000000 ... 0.000000 1.000000 11 11 0.272727 \n", | |
"1 2511.666667 0.041667 ... 0.125000 0.875000 9 24 0.083333 \n", | |
"2 33171.111111 0.333333 ... 0.222222 0.777778 7 9 0.444444 \n", | |
"3 4515.714286 0.071429 ... 0.071429 0.928571 18 28 0.107143 \n", | |
"4 5450.000000 0.000000 ... 0.000000 1.000000 2 4 0.000000 \n", | |
"\n", | |
" 겨울 봄 여름 주말 평일 \n", | |
"0 0.272727 0.090909 0.363636 0.363636 0.636364 \n", | |
"1 0.208333 0.208333 0.500000 0.500000 0.500000 \n", | |
"2 0.333333 0.000000 0.222222 0.666667 0.333333 \n", | |
"3 0.214286 0.214286 0.464286 0.642857 0.357143 \n", | |
"4 0.000000 0.000000 1.000000 0.750000 0.250000 \n", | |
"\n", | |
"[5 rows x 50 columns]" | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tr = pd.DataFrame({'custid' : train_test['custid'].unique()})\n", | |
"\n", | |
"for f in features:\n", | |
" tr = pd.merge(tr, f, how='left').fillna(0)\n", | |
"\n", | |
"tr.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 21.할인비율" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tr['할인비율'] = tr['할인건수'] / tr['구매건수'].round(2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 22.평균할인율" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tr['평균할인율'] = tr['평균할인금액'] / tr['평균구매가격'].round(2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 23.환불비율" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tr['환불비율'] = tr['환불건수'] / tr['구매건수'].round(2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 24.할부비율" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tr['할부비율'] = tr['할부건수'] / tr['구매건수']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 25.하루평균구매금액" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tr['하루구매금액'] = tr[\"총구매액\"] / tr['내점일수']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# 잘못된 피쳐 삭제" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"del tr['총구매액']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"del tr[\"평균구매가격\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"del tr['평균할인금액']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"del tr['할부건수']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"del tr['할인건수']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# featuers 나누기" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"X_train = tr[tr['custid'] <= x_train['custid'].unique().max()]\n", | |
"del X_train['custid']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"X_test = tr[tr['custid'] >= x_test['custid'].unique().min()]\n", | |
"del X_test['custid']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# 모델 제작" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[10:41:28] WARNING: C:\\Users\\Administrator\\workspace\\xgboost-win64_release_1.1.0\\src\\learner.cc:480: \n", | |
"Parameters: { n_extimators } might not be used.\n", | |
"\n", | |
" This may not be accurate due to some parameters are only used in language bindings but\n", | |
" passed down to XGBoost core. Or some parameters are not used but slip through this\n", | |
" verification. Please open an issue if you find above cases.\n", | |
"\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"xgb = XGBClassifier(n_extimators=500, learning_rate = 0.1, max_depth = 4)\n", | |
"xgb.fit(X_train, Y_train)\n", | |
"xgb_pred = xgb.predict(X_test)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# 평가" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"71.52\n", | |
"0.7151787946986746\n" | |
] | |
} | |
], | |
"source": [ | |
"print(accuracy_score(Y_test, xgb_pred).round(4) * 100)\n", | |
"print(accuracy_score(Y_test, xgb_pred))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>custid</th>\n", | |
" <th>구매건수</th>\n", | |
" <th>환불건수</th>\n", | |
" <th>평균할부개월</th>\n", | |
" <th>브랜드다양성</th>\n", | |
" <th>내점일수</th>\n", | |
" <th>가정용품</th>\n", | |
" <th>가정용품파트</th>\n", | |
" <th>골프/유니캐쥬얼</th>\n", | |
" <th>공산품</th>\n", | |
" <th>...</th>\n", | |
" <th>겨울</th>\n", | |
" <th>봄</th>\n", | |
" <th>여름</th>\n", | |
" <th>주말</th>\n", | |
" <th>평일</th>\n", | |
" <th>할인비율</th>\n", | |
" <th>평균할인율</th>\n", | |
" <th>환불비율</th>\n", | |
" <th>할부비율</th>\n", | |
" <th>하루구매금액</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>11</td>\n", | |
" <td>0.0</td>\n", | |
" <td>2.818182</td>\n", | |
" <td>0.003684</td>\n", | |
" <td>7</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.090909</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.272727</td>\n", | |
" <td>0.090909</td>\n", | |
" <td>0.363636</td>\n", | |
" <td>0.363636</td>\n", | |
" <td>0.636364</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.100000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.0</td>\n", | |
" <td>248857.142857</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>24</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2.500000</td>\n", | |
" <td>0.010000</td>\n", | |
" <td>16</td>\n", | |
" <td>0.041667</td>\n", | |
" <td>0.041667</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.208333</td>\n", | |
" <td>0.208333</td>\n", | |
" <td>0.500000</td>\n", | |
" <td>0.500000</td>\n", | |
" <td>0.500000</td>\n", | |
" <td>0.375000</td>\n", | |
" <td>0.020930</td>\n", | |
" <td>0.083333</td>\n", | |
" <td>1.0</td>\n", | |
" <td>180006.250000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>9</td>\n", | |
" <td>2.0</td>\n", | |
" <td>3.444444</td>\n", | |
" <td>0.003684</td>\n", | |
" <td>7</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.222222</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.222222</td>\n", | |
" <td>0.666667</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>0.777778</td>\n", | |
" <td>0.053298</td>\n", | |
" <td>0.222222</td>\n", | |
" <td>1.0</td>\n", | |
" <td>800192.857143</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>28</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2.571429</td>\n", | |
" <td>0.011053</td>\n", | |
" <td>12</td>\n", | |
" <td>0.071429</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.107143</td>\n", | |
" <td>...</td>\n", | |
" <td>0.214286</td>\n", | |
" <td>0.214286</td>\n", | |
" <td>0.464286</td>\n", | |
" <td>0.642857</td>\n", | |
" <td>0.357143</td>\n", | |
" <td>0.642857</td>\n", | |
" <td>0.042202</td>\n", | |
" <td>0.071429</td>\n", | |
" <td>1.0</td>\n", | |
" <td>249675.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>0.0</td>\n", | |
" <td>4.500000</td>\n", | |
" <td>0.002105</td>\n", | |
" <td>2</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.750000</td>\n", | |
" <td>0.250000</td>\n", | |
" <td>0.500000</td>\n", | |
" <td>0.020861</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.0</td>\n", | |
" <td>522500.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 50 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" custid 구매건수 환불건수 평균할부개월 브랜드다양성 내점일수 가정용품 가정용품파트 골프/유니캐쥬얼 \\\n", | |
"0 0 11 0.0 2.818182 0.003684 7 0.000000 0.000000 0.090909 \n", | |
"1 1 24 2.0 2.500000 0.010000 16 0.041667 0.041667 0.000000 \n", | |
"2 2 9 2.0 3.444444 0.003684 7 0.333333 0.000000 0.222222 \n", | |
"3 3 28 2.0 2.571429 0.011053 12 0.071429 0.000000 0.000000 \n", | |
"4 4 4 0.0 4.500000 0.002105 2 0.000000 0.000000 0.000000 \n", | |
"\n", | |
" 공산품 ... 겨울 봄 여름 주말 평일 할인비율 \\\n", | |
"0 0.000000 ... 0.272727 0.090909 0.363636 0.363636 0.636364 1.000000 \n", | |
"1 0.000000 ... 0.208333 0.208333 0.500000 0.500000 0.500000 0.375000 \n", | |
"2 0.000000 ... 0.333333 0.000000 0.222222 0.666667 0.333333 0.777778 \n", | |
"3 0.107143 ... 0.214286 0.214286 0.464286 0.642857 0.357143 0.642857 \n", | |
"4 0.000000 ... 0.000000 0.000000 1.000000 0.750000 0.250000 0.500000 \n", | |
"\n", | |
" 평균할인율 환불비율 할부비율 하루구매금액 \n", | |
"0 0.100000 0.000000 1.0 248857.142857 \n", | |
"1 0.020930 0.083333 1.0 180006.250000 \n", | |
"2 0.053298 0.222222 1.0 800192.857143 \n", | |
"3 0.042202 0.071429 1.0 249675.000000 \n", | |
"4 0.020861 0.000000 1.0 522500.000000 \n", | |
"\n", | |
"[5 rows x 50 columns]" | |
] | |
}, | |
"execution_count": 67, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tr.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment