Created
April 21, 2019 17:55
-
-
Save AlisonDavey/faa1d278cf89db4fb0c66d137a3cba92 to your computer and use it in GitHub Desktop.
Zara Data Challenge - top 10 with no ML
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Forecast fashion sales over three weeks in Zara's online store in one country https://www.zaratalent.com/data.html\n\nA walkthrough on how to achieve a top 10 position just by looking at the data." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%matplotlib inline", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import pandas as pd\nimport numpy as np", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "path = './data/'", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# Bet 1: Blocks for week 11" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_blocks = pd.read_csv(path+'product_blocks.csv')\ndf_products = pd.read_csv(path+'products.csv')\ndf_stock_and_sales = pd.read_csv(path+'stock_and_sales_day_0_day_63.csv')", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Rank blocks by revenue per item to identify the best performing blocks of products" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_blocks.info()", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 15238 entries, 0 to 15237\nData columns (total 2 columns):\nproduct_id 15238 non-null int64\nblock_id 15238 non-null int64\ndtypes: int64(2)\nmemory usage: 238.2 KB\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_blocks.head()", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 6, | |
"data": { | |
"text/plain": " product_id block_id\n0 612967398 0\n1 296892108 0\n2 139541214 0\n3 963923934 0\n4 938230141 0", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>product_id</th>\n <th>block_id</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>612967398</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>296892108</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>139541214</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>963923934</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>938230141</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Number of products in each block\ndf_n_items_in_block = df_blocks.groupby('block_id').count()\ndf_n_items_in_block.rename(columns = {'product_id': 'num_items'}, inplace=True)\ndf_n_items_in_block.head()", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 7, | |
"data": { | |
"text/plain": " num_items\nblock_id \n0 7\n1 7\n2 7\n3 6\n4 10", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>7</td>\n </tr>\n <tr>\n <th>1</th>\n <td>7</td>\n </tr>\n <tr>\n <th>2</th>\n <td>7</td>\n </tr>\n <tr>\n <th>3</th>\n <td>6</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_and_sales.head()", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 8, | |
"data": { | |
"text/plain": " date_number product_id color_id size_id sales stock\n0 0 310130 78467031 7026791 1 93\n1 0 310130 78467031 8015276 2 62\n2 0 310130 78467031 452052101 1 18\n3 0 310130 78467031 541898038 2 42\n4 0 310130 78467031 558685710 3 113", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date_number</th>\n <th>product_id</th>\n <th>color_id</th>\n <th>size_id</th>\n <th>sales</th>\n <th>stock</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>310130</td>\n <td>78467031</td>\n <td>7026791</td>\n <td>1</td>\n <td>93</td>\n </tr>\n <tr>\n <th>1</th>\n <td>0</td>\n <td>310130</td>\n <td>78467031</td>\n <td>8015276</td>\n <td>2</td>\n <td>62</td>\n </tr>\n <tr>\n <th>2</th>\n <td>0</td>\n <td>310130</td>\n <td>78467031</td>\n <td>452052101</td>\n <td>1</td>\n <td>18</td>\n </tr>\n <tr>\n <th>3</th>\n <td>0</td>\n <td>310130</td>\n <td>78467031</td>\n <td>541898038</td>\n <td>2</td>\n <td>42</td>\n </tr>\n <tr>\n <th>4</th>\n <td>0</td>\n <td>310130</td>\n <td>78467031</td>\n <td>558685710</td>\n <td>3</td>\n <td>113</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#add week\ndf_stock_and_sales['week']=(((df_stock_and_sales['date_number']-4)/7)+1).round(0).astype(int)\n# add blocks\ndf_stock_and_sales = df_stock_and_sales.merge(df_blocks, how='left', on=['product_id'])\n# add price\ndf_stock_and_sales = df_stock_and_sales.merge(df_products, how='left', on='product_id')\n# add revenue\ndf_stock_and_sales['revenue']=df_stock_and_sales['sales']*df_stock_and_sales['price']\ndf_sales_week_by_product=df_stock_and_sales.groupby(['week','product_id','family_id',\n 'subfamily_id', 'price','block_id']).agg({'sales':'sum', 'revenue':'sum'}).reset_index()\ndf_sales_week_by_block = df_sales_week_by_product.groupby(['week','block_id']).agg({'revenue':'sum'}).reset_index()\ndf_week_rev = df_sales_week_by_block.merge(df_n_items_in_block, on='block_id', how='left')\ndf_week_rev['revenue_per_item']=df_week_rev.revenue/df_week_rev.num_items\n\n#2130 rows\ndf_ranked_blocks_by_week = pd.DataFrame() \nfor w in list(range(10)):\n df_ranked_blocks_by_week[w]=list(df_week_rev[df_week_rev['week']==w].sort_values('revenue_per_item', ascending=0).iloc[:,1])[:2130]\n \n#df of rank for each block for each week\ndf_block_rank=pd.DataFrame(columns=list(range(10)))\nfor block in df_n_items_in_block.index:\n a = np.where(df_ranked_blocks_by_week==block)\n for i in range(len(a[1])): df_block_rank.loc[block,a[1][i]]=a[0][i]\n# give rank 2500 if dropped out of rankings\ndf_block_rank = df_block_rank.fillna(2500.)\ndf_block_rank.sort_values(9).head(10)", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 9, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 \\\n96 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2359 2500.0 3.0 4.0 9.0 6.0 6.0 4.0 0.0 1.0 \n2128 4.0 0.0 1.0 7.0 1.0 0.0 0.0 1.0 0.0 \n474 1.0 1.0 0.0 6.0 2.0 3.0 1.0 2.0 2.0 \n2037 14.0 9.0 3.0 4.0 4.0 2.0 2.0 3.0 3.0 \n2306 5.0 2.0 5.0 13.0 9.0 5.0 3.0 4.0 6.0 \n2469 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2335 1450.0 1777.0 1904.0 151.0 228.0 465.0 1180.0 1548.0 1764.0 \n1315 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n1016 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n\n 9 \n96 0.0 \n2359 1.0 \n2128 2.0 \n474 3.0 \n2037 4.0 \n2306 5.0 \n2469 6.0 \n2335 7.0 \n1315 8.0 \n1016 9.0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>2500.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>9.0</td>\n <td>6.0</td>\n <td>6.0</td>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>7.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>6.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>14.0</td>\n <td>9.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>4.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>4.0</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>5.0</td>\n <td>2.0</td>\n <td>5.0</td>\n <td>13.0</td>\n <td>9.0</td>\n <td>5.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>6.0</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>6.0</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>1450.0</td>\n <td>1777.0</td>\n <td>1904.0</td>\n <td>151.0</td>\n <td>228.0</td>\n <td>465.0</td>\n <td>1180.0</td>\n <td>1548.0</td>\n <td>1764.0</td>\n <td>7.0</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>8.0</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>9.0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Some of the best performing blocks are simply the ones that performed well in earlier weeks, others have come from nowhere, presumably due to the introduction of new products. Let's investigate.\n\nHow many blocks do we need to get to 50 items?" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Depending on the week, 12-16 blocks are all that are needed\nblks = df_block_rank.sort_values(9).index[:20]\nnp.cumsum(df_n_items_in_block.loc[blks])", | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 10, | |
"data": { | |
"text/plain": " num_items\n96 1\n2359 2\n2128 8\n474 9\n2037 13\n2306 16\n2469 17\n2335 19\n1315 20\n1016 21\n2443 27\n358 34\n799 36\n1278 37\n2503 41\n812 49\n390 52\n893 54\n393 56\n388 64", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>8</td>\n </tr>\n <tr>\n <th>474</th>\n <td>9</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>13</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>16</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>17</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>19</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>20</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>21</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>27</td>\n </tr>\n <tr>\n <th>358</th>\n <td>34</td>\n </tr>\n <tr>\n <th>799</th>\n <td>36</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>37</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>41</td>\n </tr>\n <tr>\n <th>812</th>\n <td>49</td>\n </tr>\n <tr>\n <th>390</th>\n <td>52</td>\n </tr>\n <tr>\n <th>893</th>\n <td>54</td>\n </tr>\n <tr>\n <th>393</th>\n <td>56</td>\n </tr>\n <tr>\n <th>388</th>\n <td>64</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "So let's look at how the top 16 blocks perform over these 9 weeks" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "blks = df_block_rank.sort_values(9).index[:16]\ndf_block_rank.loc[blks]", | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 11, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 \\\n96 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2359 2500.0 3.0 4.0 9.0 6.0 6.0 4.0 0.0 1.0 \n2128 4.0 0.0 1.0 7.0 1.0 0.0 0.0 1.0 0.0 \n474 1.0 1.0 0.0 6.0 2.0 3.0 1.0 2.0 2.0 \n2037 14.0 9.0 3.0 4.0 4.0 2.0 2.0 3.0 3.0 \n2306 5.0 2.0 5.0 13.0 9.0 5.0 3.0 4.0 6.0 \n2469 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2335 1450.0 1777.0 1904.0 151.0 228.0 465.0 1180.0 1548.0 1764.0 \n1315 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n1016 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2443 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 1753.0 1616.0 \n358 315.0 499.0 582.0 455.0 902.0 941.0 1144.0 1417.0 20.0 \n799 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 8.0 \n1278 2500.0 2500.0 2500.0 45.0 58.0 115.0 69.0 100.0 15.0 \n2503 13.0 4.0 6.0 14.0 10.0 13.0 7.0 8.0 10.0 \n812 1722.0 1560.0 1832.0 1918.0 2003.0 2500.0 40.0 6.0 9.0 \n\n 9 \n96 0.0 \n2359 1.0 \n2128 2.0 \n474 3.0 \n2037 4.0 \n2306 5.0 \n2469 6.0 \n2335 7.0 \n1315 8.0 \n1016 9.0 \n2443 10.0 \n358 11.0 \n799 12.0 \n1278 13.0 \n2503 14.0 \n812 15.0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>2500.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>9.0</td>\n <td>6.0</td>\n <td>6.0</td>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>7.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>6.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>14.0</td>\n <td>9.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>4.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>4.0</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>5.0</td>\n <td>2.0</td>\n <td>5.0</td>\n <td>13.0</td>\n <td>9.0</td>\n <td>5.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>6.0</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>6.0</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>1450.0</td>\n <td>1777.0</td>\n <td>1904.0</td>\n <td>151.0</td>\n <td>228.0</td>\n <td>465.0</td>\n <td>1180.0</td>\n <td>1548.0</td>\n <td>1764.0</td>\n <td>7.0</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>8.0</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>9.0</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1753.0</td>\n <td>1616.0</td>\n <td>10.0</td>\n </tr>\n <tr>\n <th>358</th>\n <td>315.0</td>\n <td>499.0</td>\n <td>582.0</td>\n <td>455.0</td>\n <td>902.0</td>\n <td>941.0</td>\n <td>1144.0</td>\n <td>1417.0</td>\n <td>20.0</td>\n <td>11.0</td>\n </tr>\n <tr>\n <th>799</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>8.0</td>\n <td>12.0</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>45.0</td>\n <td>58.0</td>\n <td>115.0</td>\n <td>69.0</td>\n <td>100.0</td>\n <td>15.0</td>\n <td>13.0</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>13.0</td>\n <td>4.0</td>\n <td>6.0</td>\n <td>14.0</td>\n <td>10.0</td>\n <td>13.0</td>\n <td>7.0</td>\n <td>8.0</td>\n <td>10.0</td>\n <td>14.0</td>\n </tr>\n <tr>\n <th>812</th>\n <td>1722.0</td>\n <td>1560.0</td>\n <td>1832.0</td>\n <td>1918.0</td>\n <td>2003.0</td>\n <td>2500.0</td>\n <td>40.0</td>\n <td>6.0</td>\n <td>9.0</td>\n <td>15.0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "In week 9, 10 blocks were already in the top 20 the week before, 6 came from nowhere. 7 blocks top 20 2 weeks before.\n- 8: 12 top20, +3 top100, 1 from nowhere. 9 top20 two weeks before.\n- 7: 11 top20, +1 top100, 4 from nowhere. 9 top20 two weeks before.\n- 6: 13 top20, +2 top100, 1 from nowhere. 10 top20 two weeks before.\n\nSo for week 11, let's keep 9 blocks from week 9. Which ones? Let's look at persistence in the top20." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "blks = df_block_rank.sort_values(9).index[:16]\n(df_block_rank.loc[blks]<20)*1", | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 12, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 9\n96 0 0 0 0 0 0 0 0 0 1\n2359 0 1 1 1 1 1 1 1 1 1\n2128 1 1 1 1 1 1 1 1 1 1\n474 1 1 1 1 1 1 1 1 1 1\n2037 1 1 1 1 1 1 1 1 1 1\n2306 1 1 1 1 1 1 1 1 1 1\n2469 0 0 0 0 0 0 0 0 0 1\n2335 0 0 0 0 0 0 0 0 0 1\n1315 0 0 0 0 0 0 0 0 0 1\n1016 0 0 0 0 0 0 0 0 0 1\n2443 0 0 0 0 0 0 0 0 0 1\n358 0 0 0 0 0 0 0 0 0 1\n799 0 0 0 0 0 0 0 0 1 1\n1278 0 0 0 0 0 0 0 0 1 1\n2503 1 1 1 1 1 1 1 1 1 1\n812 0 0 0 0 0 0 0 1 1 1", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>358</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>799</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>812</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Blocks [474, 2037, 2128, 2306, 2359, 2503] are persistent. Others stay top20 for 1, 3, or 5 weeks" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "blks = [474, 2037, 2128, 2306, 2359, 2503]\nnp.cumsum(df_n_items_in_block.loc[blks])", | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 13, | |
"data": { | |
"text/plain": " num_items\nblock_id \n474 1\n2037 5\n2128 11\n2306 14\n2359 15\n2503 19", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>474</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>5</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>11</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>14</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>15</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>19</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "We now have 6 blocks/19 items for the week 11 bet from the persistent blocks. Let's look now at the blocks that have recently arrived into the week 9 top20. They could stick around for 3 weeks." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "blks = [96, 358, 799, 812, 1016, 1278, 1315, 2335, 2443, 2469]\ndf_n_items_in_block.loc[blks]", | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 14, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n358 7\n799 2\n812 8\n1016 1\n1278 1\n1315 1\n2335 2\n2443 6\n2469 1", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>358</th>\n <td>7</td>\n </tr>\n <tr>\n <th>799</th>\n <td>2</td>\n </tr>\n <tr>\n <th>812</th>\n <td>8</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>6</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "The single and two item blocks are a good bet since they can easily be swapped out if necessary. Let's keep them [96, 799, 1016, 1278, 1315, 2335, 2469]. Now for week 11 we have 28 items. Let's find 22 more." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "blks = [96, 474, 799, 1016, 1278, 1315, 2037, 2128, 2306, 2335, 2359, 2469, 2503]\nnp.cumsum(df_n_items_in_block.loc[blks])", | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 15, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n474 2\n799 4\n1016 5\n1278 6\n1315 7\n2037 11\n2128 17\n2306 20\n2335 22\n2359 23\n2469 24\n2503 28", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>474</th>\n <td>2</td>\n </tr>\n <tr>\n <th>799</th>\n <td>4</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>5</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>6</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>7</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>11</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>17</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>20</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>22</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>23</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>24</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>28</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "dropped_blks = set(df_week_rev[df_week_rev['week']==9].sort_values('revenue_per_item', ascending=0)['block_id'].head(20))-set(blks)\ndropped_blks", | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 16, | |
"data": { | |
"text/plain": "{358, 388, 390, 393, 812, 893, 2443}" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_n_items_in_block.loc[dropped_blks]", | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 17, | |
"data": { | |
"text/plain": " num_items\nblock_id \n388 8\n390 3\n358 7\n393 2\n2443 6\n812 8\n893 2", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>388</th>\n <td>8</td>\n </tr>\n <tr>\n <th>390</th>\n <td>3</td>\n </tr>\n <tr>\n <th>358</th>\n <td>7</td>\n </tr>\n <tr>\n <th>393</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>6</td>\n </tr>\n <tr>\n <th>812</th>\n <td>8</td>\n </tr>\n <tr>\n <th>893</th>\n <td>2</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_week_rev[df_week_rev['week']==9].sort_values('revenue_per_item', ascending=0)[['block_id', 'revenue_per_item']].head(20)", | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 18, | |
"data": { | |
"text/plain": " block_id revenue_per_item\n21313 96 12170.550000\n23373 2359 8505.800000\n23156 2128 8125.225000\n21651 474 7096.450000\n23073 2037 5783.037500\n23324 2306 4411.500000\n23474 2469 4203.900000\n23351 2335 4049.875000\n22413 1315 4013.300000\n22140 1016 3859.900000\n23449 2443 3805.708333\n21547 358 3450.864286\n21942 799 3357.200000\n22380 1278 3347.550000\n23505 2503 3327.462500\n21955 812 3269.962500\n21579 390 3128.343333\n22030 893 3114.800000\n21582 393 3010.200000\n21577 388 2971.187500", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>block_id</th>\n <th>revenue_per_item</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>21313</th>\n <td>96</td>\n <td>12170.550000</td>\n </tr>\n <tr>\n <th>23373</th>\n <td>2359</td>\n <td>8505.800000</td>\n </tr>\n <tr>\n <th>23156</th>\n <td>2128</td>\n <td>8125.225000</td>\n </tr>\n <tr>\n <th>21651</th>\n <td>474</td>\n <td>7096.450000</td>\n </tr>\n <tr>\n <th>23073</th>\n <td>2037</td>\n <td>5783.037500</td>\n </tr>\n <tr>\n <th>23324</th>\n <td>2306</td>\n <td>4411.500000</td>\n </tr>\n <tr>\n <th>23474</th>\n <td>2469</td>\n <td>4203.900000</td>\n </tr>\n <tr>\n <th>23351</th>\n <td>2335</td>\n <td>4049.875000</td>\n </tr>\n <tr>\n <th>22413</th>\n <td>1315</td>\n <td>4013.300000</td>\n </tr>\n <tr>\n <th>22140</th>\n <td>1016</td>\n <td>3859.900000</td>\n </tr>\n <tr>\n <th>23449</th>\n <td>2443</td>\n <td>3805.708333</td>\n </tr>\n <tr>\n <th>21547</th>\n <td>358</td>\n <td>3450.864286</td>\n </tr>\n <tr>\n <th>21942</th>\n <td>799</td>\n <td>3357.200000</td>\n </tr>\n <tr>\n <th>22380</th>\n <td>1278</td>\n <td>3347.550000</td>\n </tr>\n <tr>\n <th>23505</th>\n <td>2503</td>\n <td>3327.462500</td>\n </tr>\n <tr>\n <th>21955</th>\n <td>812</td>\n <td>3269.962500</td>\n </tr>\n <tr>\n <th>21579</th>\n <td>390</td>\n <td>3128.343333</td>\n </tr>\n <tr>\n <th>22030</th>\n <td>893</td>\n <td>3114.800000</td>\n </tr>\n <tr>\n <th>21582</th>\n <td>393</td>\n <td>3010.200000</td>\n </tr>\n <tr>\n <th>21577</th>\n <td>388</td>\n <td>2971.187500</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "We could include [2443] and [358] even though they contain 13 items because the revenue was so high. Meaning we just need 9 more items. Let's investigate stock movements to find them." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "bet1_blks = [96, 358, 474, 799, 1016, 1278, 1315, 2037, 2128, 2306, 2335, 2359, 2443, 2469, 2503]\nnp.cumsum(df_n_items_in_block.loc[bet1_blks])", | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 19, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n358 8\n474 9\n799 11\n1016 12\n1278 13\n1315 14\n2037 18\n2128 24\n2306 27\n2335 29\n2359 30\n2443 36\n2469 37\n2503 41", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>358</th>\n <td>8</td>\n </tr>\n <tr>\n <th>474</th>\n <td>9</td>\n </tr>\n <tr>\n <th>799</th>\n <td>11</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>12</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>13</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>14</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>18</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>24</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>27</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>29</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>30</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>36</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>37</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>41</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_71_replen_72_77 = pd.read_csv(path+'stock_day_71_stock_replenishment_days_72-77.csv')", | |
"execution_count": 20, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_71_replen_72_77.head()", | |
"execution_count": 21, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 21, | |
"data": { | |
"text/plain": " product_id color_id size_id stock_at_day_71 \\\n0 151926 909958217 54666687 73 \n1 151926 909958217 111455099 16 \n2 151926 909958217 242364222 111 \n3 151926 909958217 513017386 47 \n4 151926 909958217 820445075 71 \n\n stock_replenishment_days_72 stock_replenishment_days_73 \\\n0 0 0 \n1 0 0 \n2 0 1 \n3 0 0 \n4 0 1 \n\n stock_replenishment_days_74 stock_replenishment_days_75 \\\n0 2 0 \n1 0 0 \n2 -1 1 \n3 0 1 \n4 -2 2 \n\n stock_replenishment_days_76 stock_replenishment_days_77 \n0 0 1 \n1 0 0 \n2 0 0 \n3 1 1 \n4 0 0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>product_id</th>\n <th>color_id</th>\n <th>size_id</th>\n <th>stock_at_day_71</th>\n <th>stock_replenishment_days_72</th>\n <th>stock_replenishment_days_73</th>\n <th>stock_replenishment_days_74</th>\n <th>stock_replenishment_days_75</th>\n <th>stock_replenishment_days_76</th>\n <th>stock_replenishment_days_77</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>151926</td>\n <td>909958217</td>\n <td>54666687</td>\n <td>73</td>\n <td>0</td>\n <td>0</td>\n <td>2</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>151926</td>\n <td>909958217</td>\n <td>111455099</td>\n <td>16</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>151926</td>\n <td>909958217</td>\n <td>242364222</td>\n <td>111</td>\n <td>0</td>\n <td>1</td>\n <td>-1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>151926</td>\n <td>909958217</td>\n <td>513017386</td>\n <td>47</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>4</th>\n <td>151926</td>\n <td>909958217</td>\n <td>820445075</td>\n <td>71</td>\n <td>0</td>\n <td>1</td>\n <td>-2</td>\n <td>2</td>\n <td>0</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_71_77 = df_stock_71_replen_72_77.groupby(['product_id']).sum().drop(['color_id','size_id'], axis=1)\n# add blocks\ndf_stock_71_77 = df_stock_71_77.merge(df_blocks, how='left', on=['product_id'])\n# add price\ndf_stock_71_77 = df_stock_71_77.merge(df_products, how='left', on='product_id')\n# add more columns\ndf_stock_71_77['replen_sum']=df_stock_71_77.stock_replenishment_days_72+df_stock_71_77.stock_replenishment_days_73+df_stock_71_77.stock_replenishment_days_74+df_stock_71_77.stock_replenishment_days_75+df_stock_71_77.stock_replenishment_days_76+df_stock_71_77.stock_replenishment_days_77\ndf_stock_71_77['stock_bf+replen'] = df_stock_71_77.stock_at_day_71 + df_stock_71_77.replen_sum\ndf_stock_71_77['maybe_revenue'] = df_stock_71_77.price * df_stock_71_77.replen_sum", | |
"execution_count": 22, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_71_77_by_block=df_stock_71_77.groupby(['block_id']).agg({'replen_sum':'sum', \n 'stock_bf+replen':'sum', 'maybe_revenue':'sum'}).reset_index()\ndf_stock_71_77_rev = df_stock_71_77_by_block.merge(df_n_items_in_block, on='block_id', how='left')\ndf_stock_71_77_rev['revenue_per_item']=df_stock_71_77_rev.maybe_revenue/df_stock_71_77_rev.num_items", | |
"execution_count": 23, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_71_77_rev.sort_values('revenue_per_item',ascending=0).head(30)", | |
"execution_count": 24, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 24, | |
"data": { | |
"text/plain": " block_id replen_sum stock_bf+replen maybe_revenue num_items \\\n405 439 611 950 36629.45 1 \n1296 1412 3533 5462 105743.35 5 \n1710 1850 392 1250 19580.40 1 \n91 96 578 3150 14999.10 1 \n1113 1207 520 1172 33945.05 3 \n2258 2443 1352 1954 65842.40 6 \n749 819 1097 1747 10915.15 1 \n2148 2321 812 831 48679.40 5 \n1427 1546 2883 6227 57431.85 6 \n2284 2470 476 476 19016.20 2 \n2315 2503 948 2048 35588.60 4 \n1694 1833 336 336 8719.20 1 \n1033 1121 1300 3816 86021.00 10 \n742 812 2849 7344 64996.55 8 \n2085 2254 731 2321 28369.45 4 \n464 505 1654 3043 69176.30 10 \n328 358 1226 2646 47313.02 7 \n1964 2128 865 4877 40082.75 6 \n1356 1473 1804 5078 39599.80 6 \n2307 2493 251 252 6513.45 1 \n2182 2359 217 2639 6499.15 1 \n1459 1580 3998 8622 51559.10 8 \n207 222 314 406 6264.30 1 \n2539 2749 822 1462 37278.90 6 \n1855 2010 917 2467 36474.15 6 \n1432 1551 969 2910 28748.83 5 \n660 726 2800 7191 56702.00 10 \n363 393 437 1416 11340.15 2 \n1846 2001 850 863 22057.50 4 \n941 1024 506 1055 16235.70 3 \n\n revenue_per_item \n405 36629.450000 \n1296 21148.670000 \n1710 19580.400000 \n91 14999.100000 \n1113 11315.016667 \n2258 10973.733333 \n749 10915.150000 \n2148 9735.880000 \n1427 9571.975000 \n2284 9508.100000 \n2315 8897.150000 \n1694 8719.200000 \n1033 8602.100000 \n742 8124.568750 \n2085 7092.362500 \n464 6917.630000 \n328 6759.002857 \n1964 6680.458333 \n1356 6599.966667 \n2307 6513.450000 \n2182 6499.150000 \n1459 6444.887500 \n207 6264.300000 \n2539 6213.150000 \n1855 6079.025000 \n1432 5749.766000 \n660 5670.200000 \n363 5670.075000 \n1846 5514.375000 \n941 5411.900000 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>block_id</th>\n <th>replen_sum</th>\n <th>stock_bf+replen</th>\n <th>maybe_revenue</th>\n <th>num_items</th>\n <th>revenue_per_item</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>405</th>\n <td>439</td>\n <td>611</td>\n <td>950</td>\n <td>36629.45</td>\n <td>1</td>\n <td>36629.450000</td>\n </tr>\n <tr>\n <th>1296</th>\n <td>1412</td>\n <td>3533</td>\n <td>5462</td>\n <td>105743.35</td>\n <td>5</td>\n <td>21148.670000</td>\n </tr>\n <tr>\n <th>1710</th>\n <td>1850</td>\n <td>392</td>\n <td>1250</td>\n <td>19580.40</td>\n <td>1</td>\n <td>19580.400000</td>\n </tr>\n <tr>\n <th>91</th>\n <td>96</td>\n <td>578</td>\n <td>3150</td>\n <td>14999.10</td>\n <td>1</td>\n <td>14999.100000</td>\n </tr>\n <tr>\n <th>1113</th>\n <td>1207</td>\n <td>520</td>\n <td>1172</td>\n <td>33945.05</td>\n <td>3</td>\n <td>11315.016667</td>\n </tr>\n <tr>\n <th>2258</th>\n <td>2443</td>\n <td>1352</td>\n <td>1954</td>\n <td>65842.40</td>\n <td>6</td>\n <td>10973.733333</td>\n </tr>\n <tr>\n <th>749</th>\n <td>819</td>\n <td>1097</td>\n <td>1747</td>\n <td>10915.15</td>\n <td>1</td>\n <td>10915.150000</td>\n </tr>\n <tr>\n <th>2148</th>\n <td>2321</td>\n <td>812</td>\n <td>831</td>\n <td>48679.40</td>\n <td>5</td>\n <td>9735.880000</td>\n </tr>\n <tr>\n <th>1427</th>\n <td>1546</td>\n <td>2883</td>\n <td>6227</td>\n <td>57431.85</td>\n <td>6</td>\n <td>9571.975000</td>\n </tr>\n <tr>\n <th>2284</th>\n <td>2470</td>\n <td>476</td>\n <td>476</td>\n <td>19016.20</td>\n <td>2</td>\n <td>9508.100000</td>\n </tr>\n <tr>\n <th>2315</th>\n <td>2503</td>\n <td>948</td>\n <td>2048</td>\n <td>35588.60</td>\n <td>4</td>\n <td>8897.150000</td>\n </tr>\n <tr>\n <th>1694</th>\n <td>1833</td>\n <td>336</td>\n <td>336</td>\n <td>8719.20</td>\n <td>1</td>\n <td>8719.200000</td>\n </tr>\n <tr>\n <th>1033</th>\n <td>1121</td>\n <td>1300</td>\n <td>3816</td>\n <td>86021.00</td>\n <td>10</td>\n <td>8602.100000</td>\n </tr>\n <tr>\n <th>742</th>\n <td>812</td>\n <td>2849</td>\n <td>7344</td>\n <td>64996.55</td>\n <td>8</td>\n <td>8124.568750</td>\n </tr>\n <tr>\n <th>2085</th>\n <td>2254</td>\n <td>731</td>\n <td>2321</td>\n <td>28369.45</td>\n <td>4</td>\n <td>7092.362500</td>\n </tr>\n <tr>\n <th>464</th>\n <td>505</td>\n <td>1654</td>\n <td>3043</td>\n <td>69176.30</td>\n <td>10</td>\n <td>6917.630000</td>\n </tr>\n <tr>\n <th>328</th>\n <td>358</td>\n <td>1226</td>\n <td>2646</td>\n <td>47313.02</td>\n <td>7</td>\n <td>6759.002857</td>\n </tr>\n <tr>\n <th>1964</th>\n <td>2128</td>\n <td>865</td>\n <td>4877</td>\n <td>40082.75</td>\n <td>6</td>\n <td>6680.458333</td>\n </tr>\n <tr>\n <th>1356</th>\n <td>1473</td>\n <td>1804</td>\n <td>5078</td>\n <td>39599.80</td>\n <td>6</td>\n <td>6599.966667</td>\n </tr>\n <tr>\n <th>2307</th>\n <td>2493</td>\n <td>251</td>\n <td>252</td>\n <td>6513.45</td>\n <td>1</td>\n <td>6513.450000</td>\n </tr>\n <tr>\n <th>2182</th>\n <td>2359</td>\n <td>217</td>\n <td>2639</td>\n <td>6499.15</td>\n <td>1</td>\n <td>6499.150000</td>\n </tr>\n <tr>\n <th>1459</th>\n <td>1580</td>\n <td>3998</td>\n <td>8622</td>\n <td>51559.10</td>\n <td>8</td>\n <td>6444.887500</td>\n </tr>\n <tr>\n <th>207</th>\n <td>222</td>\n <td>314</td>\n <td>406</td>\n <td>6264.30</td>\n <td>1</td>\n <td>6264.300000</td>\n </tr>\n <tr>\n <th>2539</th>\n <td>2749</td>\n <td>822</td>\n <td>1462</td>\n <td>37278.90</td>\n <td>6</td>\n <td>6213.150000</td>\n </tr>\n <tr>\n <th>1855</th>\n <td>2010</td>\n <td>917</td>\n <td>2467</td>\n <td>36474.15</td>\n <td>6</td>\n <td>6079.025000</td>\n </tr>\n <tr>\n <th>1432</th>\n <td>1551</td>\n <td>969</td>\n <td>2910</td>\n <td>28748.83</td>\n <td>5</td>\n <td>5749.766000</td>\n </tr>\n <tr>\n <th>660</th>\n <td>726</td>\n <td>2800</td>\n <td>7191</td>\n <td>56702.00</td>\n <td>10</td>\n <td>5670.200000</td>\n </tr>\n <tr>\n <th>363</th>\n <td>393</td>\n <td>437</td>\n <td>1416</td>\n <td>11340.15</td>\n <td>2</td>\n <td>5670.075000</td>\n </tr>\n <tr>\n <th>1846</th>\n <td>2001</td>\n <td>850</td>\n <td>863</td>\n <td>22057.50</td>\n <td>4</td>\n <td>5514.375000</td>\n </tr>\n <tr>\n <th>941</th>\n <td>1024</td>\n <td>506</td>\n <td>1055</td>\n <td>16235.70</td>\n <td>3</td>\n <td>5411.900000</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "high_stock_replen_rev_blks = df_stock_71_77_rev.sort_values('revenue_per_item',ascending=0).block_id.head(20)", | |
"execution_count": 25, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Some of these blocks are already in our selected blocks" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "set.intersection(*[set(high_stock_replen_rev_blks), set(bet1_blks)])", | |
"execution_count": 26, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 26, | |
"data": { | |
"text/plain": "{96, 358, 2128, 2443, 2503}" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Which of these high_stock_replen_rev_blks contain previously unseen items?" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "len(df_stock_71_77)", | |
"execution_count": 27, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 27, | |
"data": { | |
"text/plain": "8892" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_and_sales = df_stock_and_sales.groupby(['product_id']).sum().drop(['color_id','size_id'], axis=1)\ndf_stock_and_sales.head()", | |
"execution_count": 28, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 28, | |
"data": { | |
"text/plain": " date_number sales stock week block_id family_id \\\nproduct_id \n151926 1466 41 1343 216 47256 9505584888 \n213413 14091 505 41153 2191 703696 235930205385 \n310130 13926 465 38597 2178 768070 302427319085 \n455200 3339 18 1081 504 88200 62883139536 \n571044 13572 366 14684 2082 379908 287646181024 \n\n subfamily_id price revenue \nproduct_id \n151926 8052757464 622.80 1063.95 \n213413 4914486899 8518.65 10074.75 \n310130 237381383840 5762.75 6021.75 \n455200 30883995828 1886.85 539.10 \n571044 259826371420 5518.70 5837.70 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date_number</th>\n <th>sales</th>\n <th>stock</th>\n <th>week</th>\n <th>block_id</th>\n <th>family_id</th>\n <th>subfamily_id</th>\n <th>price</th>\n <th>revenue</th>\n </tr>\n <tr>\n <th>product_id</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>151926</th>\n <td>1466</td>\n <td>41</td>\n <td>1343</td>\n <td>216</td>\n <td>47256</td>\n <td>9505584888</td>\n <td>8052757464</td>\n <td>622.80</td>\n <td>1063.95</td>\n </tr>\n <tr>\n <th>213413</th>\n <td>14091</td>\n <td>505</td>\n <td>41153</td>\n <td>2191</td>\n <td>703696</td>\n <td>235930205385</td>\n <td>4914486899</td>\n <td>8518.65</td>\n <td>10074.75</td>\n </tr>\n <tr>\n <th>310130</th>\n <td>13926</td>\n <td>465</td>\n <td>38597</td>\n <td>2178</td>\n <td>768070</td>\n <td>302427319085</td>\n <td>237381383840</td>\n <td>5762.75</td>\n <td>6021.75</td>\n </tr>\n <tr>\n <th>455200</th>\n <td>3339</td>\n <td>18</td>\n <td>1081</td>\n <td>504</td>\n <td>88200</td>\n <td>62883139536</td>\n <td>30883995828</td>\n <td>1886.85</td>\n <td>539.10</td>\n </tr>\n <tr>\n <th>571044</th>\n <td>13572</td>\n <td>366</td>\n <td>14684</td>\n <td>2082</td>\n <td>379908</td>\n <td>287646181024</td>\n <td>259826371420</td>\n <td>5518.70</td>\n <td>5837.70</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "len(df_stock_and_sales)", | |
"execution_count": 29, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 29, | |
"data": { | |
"text/plain": "8722" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Products in df_stock_71_77 but not in df_stock_and_sales" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_and_sales.head(1)", | |
"execution_count": 30, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 30, | |
"data": { | |
"text/plain": " date_number sales stock week block_id family_id \\\nproduct_id \n151926 1466 41 1343 216 47256 9505584888 \n\n subfamily_id price revenue \nproduct_id \n151926 8052757464 622.8 1063.95 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date_number</th>\n <th>sales</th>\n <th>stock</th>\n <th>week</th>\n <th>block_id</th>\n <th>family_id</th>\n <th>subfamily_id</th>\n <th>price</th>\n <th>revenue</th>\n </tr>\n <tr>\n <th>product_id</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>151926</th>\n <td>1466</td>\n <td>41</td>\n <td>1343</td>\n <td>216</td>\n <td>47256</td>\n <td>9505584888</td>\n <td>8052757464</td>\n <td>622.8</td>\n <td>1063.95</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "new_prods = set(df_stock_71_77.product_id)-set(df_stock_and_sales.index)\nlen(list(new_prods))", | |
"execution_count": 31, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 31, | |
"data": { | |
"text/plain": "620" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Blocks containing new products not seen in first 63 days\nblks=[]\nfor prod in new_prods:\n blks.append(df_blocks[df_blocks['product_id']==prod].block_id.values[0])\nblks = sorted(set(blks))\nlen(blks)", | |
"execution_count": 32, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 32, | |
"data": { | |
"text/plain": "549" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "list(set.intersection(*[set(high_stock_replen_rev_blks), set(blks)]))", | |
"execution_count": 33, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 33, | |
"data": { | |
"text/plain": "[1121, 1412, 2470, 1833, 812, 505, 2254, 819, 439, 1207, 1850]" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "interesting=list(set.intersection(*[set(high_stock_replen_rev_blks), set(blks)]))\ndf_n_items_in_block.loc[interesting].sort_values('num_items')", | |
"execution_count": 34, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 34, | |
"data": { | |
"text/plain": " num_items\nblock_id \n1833 1\n819 1\n439 1\n1850 1\n2470 2\n1207 3\n2254 4\n1412 5\n812 8\n1121 10\n505 10", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1833</th>\n <td>1</td>\n </tr>\n <tr>\n <th>819</th>\n <td>1</td>\n </tr>\n <tr>\n <th>439</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2470</th>\n <td>2</td>\n </tr>\n <tr>\n <th>1207</th>\n <td>3</td>\n </tr>\n <tr>\n <th>2254</th>\n <td>4</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>5</td>\n </tr>\n <tr>\n <th>812</th>\n <td>8</td>\n </tr>\n <tr>\n <th>1121</th>\n <td>10</td>\n </tr>\n <tr>\n <th>505</th>\n <td>10</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "For our final 9 items take blocks [439, 819, 1207, 1833, 1850, 2470], these are blocks containing small numbers of items so can be swapped out if necessary." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "bet1_blks=sorted(list(set.union(*[set([439, 819, 1207, 1833, 1850, 2470]), set(bet1_blks)])))\nnp.cumsum(df_n_items_in_block.loc[bet1_blks])", | |
"execution_count": 35, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 35, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n358 8\n439 9\n474 10\n799 12\n819 13\n1016 14\n1207 17\n1278 18\n1315 19\n1833 20\n1850 21\n2037 25\n2128 31\n2306 34\n2335 36\n2359 37\n2443 43\n2469 44\n2470 46\n2503 50", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>358</th>\n <td>8</td>\n </tr>\n <tr>\n <th>439</th>\n <td>9</td>\n </tr>\n <tr>\n <th>474</th>\n <td>10</td>\n </tr>\n <tr>\n <th>799</th>\n <td>12</td>\n </tr>\n <tr>\n <th>819</th>\n <td>13</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>14</td>\n </tr>\n <tr>\n <th>1207</th>\n <td>17</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>18</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>19</td>\n </tr>\n <tr>\n <th>1833</th>\n <td>20</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>21</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>25</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>31</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>34</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>36</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>37</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>43</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>44</td>\n </tr>\n <tr>\n <th>2470</th>\n <td>46</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>50</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "So how do these blocks perform? Load the dataset for sales in the first 11 weeks to see." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_and_sales = pd.read_csv(path+'stock_and_sales_day_0_day_77.csv')", | |
"execution_count": 36, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# add week\ndf_stock_and_sales['week']=(((df_stock_and_sales['date_number']-4)/7)+1).round(0).astype(int)\n# add blocks\ndf_stock_and_sales = df_stock_and_sales.merge(df_blocks, how='left', on=['product_id'])\n# add price\ndf_stock_and_sales = df_stock_and_sales.merge(df_products, how='left', on='product_id')\n# add revenue\ndf_stock_and_sales['revenue']=df_stock_and_sales['sales']*df_stock_and_sales['price']\ndf_sales_week_by_product=df_stock_and_sales.groupby(['week','product_id','family_id',\n 'subfamily_id', 'price','block_id']).agg({'sales':'sum', 'revenue':'sum'}).reset_index()\ndf_sales_week_by_block = df_sales_week_by_product.groupby(['week','block_id']).agg({'revenue':'sum'}).reset_index()\ndf_week_rev = df_sales_week_by_block.merge(df_n_items_in_block, on='block_id', how='left')\ndf_week_rev['revenue_per_item']=df_week_rev.revenue/df_week_rev.num_items", | |
"execution_count": 37, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "total = 0\ndf_tmp = df_week_rev[df_week_rev.week==11]\nfor blk in bet1_blks:\n total+=df_tmp[df_tmp.block_id==blk].revenue.values\nprint(total)\nBet1_revenue = total\n\n#Position 3rd place", | |
"execution_count": 38, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[242829.65]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# Bet 2: Blocks for week 12" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#2130 rows\ndf_ranked_blocks_by_week = pd.DataFrame() \nfor w in list(range(12)):\n df_ranked_blocks_by_week[w]=list(df_week_rev[df_week_rev['week']==w].sort_values('revenue_per_item', ascending=0).iloc[:,1])[:2130]\n \n \n#df of rank for each block for each week\ndf_block_rank=pd.DataFrame(columns=list(range(12)))\nfor block in df_n_items_in_block.index:\n a = np.where(df_ranked_blocks_by_week==block)\n for i in range(len(a[1])): df_block_rank.loc[block,a[1][i]]=a[0][i]\n# give rank 2500 if dropped out of rankings\ndf_block_rank = df_block_rank.fillna(2500.)\ndf_block_rank.sort_values(11).head(10)", | |
"execution_count": 39, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 39, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 \\\n1850 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n96 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2359 2500.0 3.0 4.0 9.0 6.0 6.0 4.0 0.0 1.0 \n1412 834.0 1156.0 896.0 1091.0 1080.0 1089.0 1165.0 877.0 1419.0 \n2443 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 1753.0 1616.0 \n439 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2128 4.0 0.0 1.0 7.0 1.0 0.0 0.0 1.0 0.0 \n2335 1450.0 1777.0 1904.0 151.0 228.0 465.0 1180.0 1548.0 1764.0 \n861 2500.0 922.0 186.0 372.0 17.0 21.0 35.0 17.0 68.0 \n2037 14.0 9.0 3.0 4.0 4.0 2.0 2.0 3.0 3.0 \n\n 9 10 11 \n1850 2500.0 1.0 0.0 \n96 0.0 0.0 1.0 \n2359 1.0 2.0 2.0 \n1412 1385.0 19.0 3.0 \n2443 10.0 8.0 4.0 \n439 2500.0 4.0 5.0 \n2128 2.0 3.0 6.0 \n2335 7.0 6.0 7.0 \n861 58.0 128.0 8.0 \n2037 4.0 7.0 9.0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1850</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1.0</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>96</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>2500.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>9.0</td>\n <td>6.0</td>\n <td>6.0</td>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>834.0</td>\n <td>1156.0</td>\n <td>896.0</td>\n <td>1091.0</td>\n <td>1080.0</td>\n <td>1089.0</td>\n <td>1165.0</td>\n <td>877.0</td>\n <td>1419.0</td>\n <td>1385.0</td>\n <td>19.0</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1753.0</td>\n <td>1616.0</td>\n <td>10.0</td>\n <td>8.0</td>\n <td>4.0</td>\n </tr>\n <tr>\n <th>439</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>4.0</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>7.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>6.0</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>1450.0</td>\n <td>1777.0</td>\n <td>1904.0</td>\n <td>151.0</td>\n <td>228.0</td>\n <td>465.0</td>\n <td>1180.0</td>\n <td>1548.0</td>\n <td>1764.0</td>\n <td>7.0</td>\n <td>6.0</td>\n <td>7.0</td>\n </tr>\n <tr>\n <th>861</th>\n <td>2500.0</td>\n <td>922.0</td>\n <td>186.0</td>\n <td>372.0</td>\n <td>17.0</td>\n <td>21.0</td>\n <td>35.0</td>\n <td>17.0</td>\n <td>68.0</td>\n <td>58.0</td>\n <td>128.0</td>\n <td>8.0</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>14.0</td>\n <td>9.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>4.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>7.0</td>\n <td>9.0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Now let's repeat the exercise for the 2nd bet bearing in mind that a maximum of 20 items can be changed from bet 1.\n\nLook at how the selected blocks performed. 12 are in the top20. Keep them all." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_block_rank.loc[bet1_blks].sort_values(11)", | |
"execution_count": 40, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 40, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 \\\n1850 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n96 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2359 2500.0 3.0 4.0 9.0 6.0 6.0 4.0 0.0 1.0 \n2443 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 1753.0 1616.0 \n439 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2128 4.0 0.0 1.0 7.0 1.0 0.0 0.0 1.0 0.0 \n2335 1450.0 1777.0 1904.0 151.0 228.0 465.0 1180.0 1548.0 1764.0 \n2037 14.0 9.0 3.0 4.0 4.0 2.0 2.0 3.0 3.0 \n474 1.0 1.0 0.0 6.0 2.0 3.0 1.0 2.0 2.0 \n1016 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2306 5.0 2.0 5.0 13.0 9.0 5.0 3.0 4.0 6.0 \n358 315.0 499.0 582.0 455.0 902.0 941.0 1144.0 1417.0 20.0 \n2503 13.0 4.0 6.0 14.0 10.0 13.0 7.0 8.0 10.0 \n1833 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n1315 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n819 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n799 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 8.0 \n1207 1062.0 835.0 694.0 884.0 532.0 347.0 454.0 492.0 451.0 \n2470 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n1278 2500.0 2500.0 2500.0 45.0 58.0 115.0 69.0 100.0 15.0 \n2469 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n\n 9 10 11 \n1850 2500.0 1.0 0.0 \n96 0.0 0.0 1.0 \n2359 1.0 2.0 2.0 \n2443 10.0 8.0 4.0 \n439 2500.0 4.0 5.0 \n2128 2.0 3.0 6.0 \n2335 7.0 6.0 7.0 \n2037 4.0 7.0 9.0 \n474 3.0 5.0 10.0 \n1016 9.0 11.0 11.0 \n2306 5.0 10.0 12.0 \n358 11.0 16.0 16.0 \n2503 14.0 25.0 24.0 \n1833 2500.0 2500.0 62.0 \n1315 8.0 60.0 67.0 \n819 2500.0 375.0 71.0 \n799 12.0 38.0 78.0 \n1207 260.0 200.0 177.0 \n2470 2500.0 2500.0 298.0 \n1278 13.0 118.0 419.0 \n2469 6.0 23.0 754.0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1850</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1.0</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>96</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>2500.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>9.0</td>\n <td>6.0</td>\n <td>6.0</td>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1753.0</td>\n <td>1616.0</td>\n <td>10.0</td>\n <td>8.0</td>\n <td>4.0</td>\n </tr>\n <tr>\n <th>439</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>4.0</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>7.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>6.0</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>1450.0</td>\n <td>1777.0</td>\n <td>1904.0</td>\n <td>151.0</td>\n <td>228.0</td>\n <td>465.0</td>\n <td>1180.0</td>\n <td>1548.0</td>\n <td>1764.0</td>\n <td>7.0</td>\n <td>6.0</td>\n <td>7.0</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>14.0</td>\n <td>9.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>4.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>7.0</td>\n <td>9.0</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>6.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>5.0</td>\n <td>10.0</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>9.0</td>\n <td>11.0</td>\n <td>11.0</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>5.0</td>\n <td>2.0</td>\n <td>5.0</td>\n <td>13.0</td>\n <td>9.0</td>\n <td>5.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>6.0</td>\n <td>5.0</td>\n <td>10.0</td>\n <td>12.0</td>\n </tr>\n <tr>\n <th>358</th>\n <td>315.0</td>\n <td>499.0</td>\n <td>582.0</td>\n <td>455.0</td>\n <td>902.0</td>\n <td>941.0</td>\n <td>1144.0</td>\n <td>1417.0</td>\n <td>20.0</td>\n <td>11.0</td>\n <td>16.0</td>\n <td>16.0</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>13.0</td>\n <td>4.0</td>\n <td>6.0</td>\n <td>14.0</td>\n <td>10.0</td>\n <td>13.0</td>\n <td>7.0</td>\n <td>8.0</td>\n <td>10.0</td>\n <td>14.0</td>\n <td>25.0</td>\n <td>24.0</td>\n </tr>\n <tr>\n <th>1833</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>62.0</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>8.0</td>\n <td>60.0</td>\n <td>67.0</td>\n </tr>\n <tr>\n <th>819</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>375.0</td>\n <td>71.0</td>\n </tr>\n <tr>\n <th>799</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>8.0</td>\n <td>12.0</td>\n <td>38.0</td>\n <td>78.0</td>\n </tr>\n <tr>\n <th>1207</th>\n <td>1062.0</td>\n <td>835.0</td>\n <td>694.0</td>\n <td>884.0</td>\n <td>532.0</td>\n <td>347.0</td>\n <td>454.0</td>\n <td>492.0</td>\n <td>451.0</td>\n <td>260.0</td>\n <td>200.0</td>\n <td>177.0</td>\n </tr>\n <tr>\n <th>2470</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>298.0</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>45.0</td>\n <td>58.0</td>\n <td>115.0</td>\n <td>69.0</td>\n <td>100.0</td>\n <td>15.0</td>\n <td>13.0</td>\n <td>118.0</td>\n <td>419.0</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>6.0</td>\n <td>23.0</td>\n <td>754.0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "(df_block_rank.loc[df_block_rank.loc[bet1_blks].sort_values(11).index]<20)*1", | |
"execution_count": 41, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 41, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 9 10 11\n1850 0 0 0 0 0 0 0 0 0 0 1 1\n96 0 0 0 0 0 0 0 0 0 1 1 1\n2359 0 1 1 1 1 1 1 1 1 1 1 1\n2443 0 0 0 0 0 0 0 0 0 1 1 1\n439 0 0 0 0 0 0 0 0 0 0 1 1\n2128 1 1 1 1 1 1 1 1 1 1 1 1\n2335 0 0 0 0 0 0 0 0 0 1 1 1\n2037 1 1 1 1 1 1 1 1 1 1 1 1\n474 1 1 1 1 1 1 1 1 1 1 1 1\n1016 0 0 0 0 0 0 0 0 0 1 1 1\n2306 1 1 1 1 1 1 1 1 1 1 1 1\n358 0 0 0 0 0 0 0 0 0 1 1 1\n2503 1 1 1 1 1 1 1 1 1 1 0 0\n1833 0 0 0 0 0 0 0 0 0 0 0 0\n1315 0 0 0 0 0 0 0 0 0 1 0 0\n819 0 0 0 0 0 0 0 0 0 0 0 0\n799 0 0 0 0 0 0 0 0 1 1 0 0\n1207 0 0 0 0 0 0 0 0 0 0 0 0\n2470 0 0 0 0 0 0 0 0 0 0 0 0\n1278 0 0 0 0 0 0 0 0 1 1 0 0\n2469 0 0 0 0 0 0 0 0 0 1 0 0", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1850</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>96</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>439</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>358</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1833</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>819</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>799</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1207</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2470</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "bet2_blks = sorted(df_block_rank.loc[bet1_blks].sort_values(11).index[:12])\nnp.cumsum(df_n_items_in_block.loc[bet2_blks])", | |
"execution_count": 42, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 42, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n358 8\n439 9\n474 10\n1016 11\n1850 12\n2037 16\n2128 22\n2306 25\n2335 27\n2359 28\n2443 34", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>358</th>\n <td>8</td>\n </tr>\n <tr>\n <th>439</th>\n <td>9</td>\n </tr>\n <tr>\n <th>474</th>\n <td>10</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>11</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>12</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>16</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>22</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>25</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>27</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>28</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>34</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Now we need to swap out the 16 items in these 9 blocks." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "set(bet1_blks)-set(bet2_blks)", | |
"execution_count": 43, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 43, | |
"data": { | |
"text/plain": "{799, 819, 1207, 1278, 1315, 1833, 2469, 2470, 2503}" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "And find some new well-performing blocks." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_block_rank.sort_values(11).head(20)", | |
"execution_count": 44, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 44, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 \\\n1850 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n96 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2359 2500.0 3.0 4.0 9.0 6.0 6.0 4.0 0.0 1.0 \n1412 834.0 1156.0 896.0 1091.0 1080.0 1089.0 1165.0 877.0 1419.0 \n2443 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 1753.0 1616.0 \n439 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2128 4.0 0.0 1.0 7.0 1.0 0.0 0.0 1.0 0.0 \n2335 1450.0 1777.0 1904.0 151.0 228.0 465.0 1180.0 1548.0 1764.0 \n861 2500.0 922.0 186.0 372.0 17.0 21.0 35.0 17.0 68.0 \n2037 14.0 9.0 3.0 4.0 4.0 2.0 2.0 3.0 3.0 \n474 1.0 1.0 0.0 6.0 2.0 3.0 1.0 2.0 2.0 \n1016 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2306 5.0 2.0 5.0 13.0 9.0 5.0 3.0 4.0 6.0 \n8 2500.0 2500.0 2500.0 24.0 14.0 92.0 248.0 315.0 300.0 \n2321 2040.0 2128.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2254 21.0 17.0 44.0 107.0 94.0 140.0 117.0 139.0 125.0 \n358 315.0 499.0 582.0 455.0 902.0 941.0 1144.0 1417.0 20.0 \n1546 262.0 698.0 1328.0 1684.0 1661.0 56.0 192.0 28.0 39.0 \n481 24.0 28.0 14.0 52.0 12.0 16.0 13.0 33.0 17.0 \n2493 52.0 310.0 218.0 500.0 761.0 1355.0 1361.0 1870.0 1844.0 \n\n 9 10 11 \n1850 2500.0 1.0 0.0 \n96 0.0 0.0 1.0 \n2359 1.0 2.0 2.0 \n1412 1385.0 19.0 3.0 \n2443 10.0 8.0 4.0 \n439 2500.0 4.0 5.0 \n2128 2.0 3.0 6.0 \n2335 7.0 6.0 7.0 \n861 58.0 128.0 8.0 \n2037 4.0 7.0 9.0 \n474 3.0 5.0 10.0 \n1016 9.0 11.0 11.0 \n2306 5.0 10.0 12.0 \n8 2500.0 12.0 13.0 \n2321 125.0 14.0 14.0 \n2254 144.0 21.0 15.0 \n358 11.0 16.0 16.0 \n1546 27.0 13.0 17.0 \n481 26.0 29.0 18.0 \n2493 2500.0 2500.0 19.0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1850</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1.0</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>96</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>2500.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>9.0</td>\n <td>6.0</td>\n <td>6.0</td>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>834.0</td>\n <td>1156.0</td>\n <td>896.0</td>\n <td>1091.0</td>\n <td>1080.0</td>\n <td>1089.0</td>\n <td>1165.0</td>\n <td>877.0</td>\n <td>1419.0</td>\n <td>1385.0</td>\n <td>19.0</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1753.0</td>\n <td>1616.0</td>\n <td>10.0</td>\n <td>8.0</td>\n <td>4.0</td>\n </tr>\n <tr>\n <th>439</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>4.0</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>7.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>6.0</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>1450.0</td>\n <td>1777.0</td>\n <td>1904.0</td>\n <td>151.0</td>\n <td>228.0</td>\n <td>465.0</td>\n <td>1180.0</td>\n <td>1548.0</td>\n <td>1764.0</td>\n <td>7.0</td>\n <td>6.0</td>\n <td>7.0</td>\n </tr>\n <tr>\n <th>861</th>\n <td>2500.0</td>\n <td>922.0</td>\n <td>186.0</td>\n <td>372.0</td>\n <td>17.0</td>\n <td>21.0</td>\n <td>35.0</td>\n <td>17.0</td>\n <td>68.0</td>\n <td>58.0</td>\n <td>128.0</td>\n <td>8.0</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>14.0</td>\n <td>9.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>4.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>7.0</td>\n <td>9.0</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>6.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>5.0</td>\n <td>10.0</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>9.0</td>\n <td>11.0</td>\n <td>11.0</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>5.0</td>\n <td>2.0</td>\n <td>5.0</td>\n <td>13.0</td>\n <td>9.0</td>\n <td>5.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>6.0</td>\n <td>5.0</td>\n <td>10.0</td>\n <td>12.0</td>\n </tr>\n <tr>\n <th>8</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>24.0</td>\n <td>14.0</td>\n <td>92.0</td>\n <td>248.0</td>\n <td>315.0</td>\n <td>300.0</td>\n <td>2500.0</td>\n <td>12.0</td>\n <td>13.0</td>\n </tr>\n <tr>\n <th>2321</th>\n <td>2040.0</td>\n <td>2128.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>125.0</td>\n <td>14.0</td>\n <td>14.0</td>\n </tr>\n <tr>\n <th>2254</th>\n <td>21.0</td>\n <td>17.0</td>\n <td>44.0</td>\n <td>107.0</td>\n <td>94.0</td>\n <td>140.0</td>\n <td>117.0</td>\n <td>139.0</td>\n <td>125.0</td>\n <td>144.0</td>\n <td>21.0</td>\n <td>15.0</td>\n </tr>\n <tr>\n <th>358</th>\n <td>315.0</td>\n <td>499.0</td>\n <td>582.0</td>\n <td>455.0</td>\n <td>902.0</td>\n <td>941.0</td>\n <td>1144.0</td>\n <td>1417.0</td>\n <td>20.0</td>\n <td>11.0</td>\n <td>16.0</td>\n <td>16.0</td>\n </tr>\n <tr>\n <th>1546</th>\n <td>262.0</td>\n <td>698.0</td>\n <td>1328.0</td>\n <td>1684.0</td>\n <td>1661.0</td>\n <td>56.0</td>\n <td>192.0</td>\n <td>28.0</td>\n <td>39.0</td>\n <td>27.0</td>\n <td>13.0</td>\n <td>17.0</td>\n </tr>\n <tr>\n <th>481</th>\n <td>24.0</td>\n <td>28.0</td>\n <td>14.0</td>\n <td>52.0</td>\n <td>12.0</td>\n <td>16.0</td>\n <td>13.0</td>\n <td>33.0</td>\n <td>17.0</td>\n <td>26.0</td>\n <td>29.0</td>\n <td>18.0</td>\n </tr>\n <tr>\n <th>2493</th>\n <td>52.0</td>\n <td>310.0</td>\n <td>218.0</td>\n <td>500.0</td>\n <td>761.0</td>\n <td>1355.0</td>\n <td>1361.0</td>\n <td>1870.0</td>\n <td>1844.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>19.0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "((df_block_rank.sort_values(11)<20)*1).head(20)", | |
"execution_count": 45, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 45, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 9 10 11\n1850 0 0 0 0 0 0 0 0 0 0 1 1\n96 0 0 0 0 0 0 0 0 0 1 1 1\n2359 0 1 1 1 1 1 1 1 1 1 1 1\n1412 0 0 0 0 0 0 0 0 0 0 1 1\n2443 0 0 0 0 0 0 0 0 0 1 1 1\n439 0 0 0 0 0 0 0 0 0 0 1 1\n2128 1 1 1 1 1 1 1 1 1 1 1 1\n2335 0 0 0 0 0 0 0 0 0 1 1 1\n861 0 0 0 0 1 0 0 1 0 0 0 1\n2037 1 1 1 1 1 1 1 1 1 1 1 1\n474 1 1 1 1 1 1 1 1 1 1 1 1\n1016 0 0 0 0 0 0 0 0 0 1 1 1\n2306 1 1 1 1 1 1 1 1 1 1 1 1\n8 0 0 0 0 1 0 0 0 0 0 1 1\n2321 0 0 0 0 0 0 0 0 0 0 1 1\n2254 0 1 0 0 0 0 0 0 0 0 0 1\n358 0 0 0 0 0 0 0 0 0 1 1 1\n1546 0 0 0 0 0 0 0 0 0 0 1 1\n481 0 0 1 0 1 1 1 0 1 0 0 1\n2493 0 0 0 0 0 0 0 0 0 0 0 1", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1850</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>96</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>439</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>861</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>8</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2321</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2254</th>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>358</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1546</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>481</th>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2493</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "good_blks=df_block_rank.sort_values(11).index[:20]\nnew_blks=list(sorted(set(good_blks)-set(bet2_blks)))\ndf_block_rank.loc[new_blks].sort_values(11)", | |
"execution_count": 46, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 46, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 \\\n1412 834.0 1156.0 896.0 1091.0 1080.0 1089.0 1165.0 877.0 1419.0 \n861 2500.0 922.0 186.0 372.0 17.0 21.0 35.0 17.0 68.0 \n8 2500.0 2500.0 2500.0 24.0 14.0 92.0 248.0 315.0 300.0 \n2321 2040.0 2128.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2254 21.0 17.0 44.0 107.0 94.0 140.0 117.0 139.0 125.0 \n1546 262.0 698.0 1328.0 1684.0 1661.0 56.0 192.0 28.0 39.0 \n481 24.0 28.0 14.0 52.0 12.0 16.0 13.0 33.0 17.0 \n2493 52.0 310.0 218.0 500.0 761.0 1355.0 1361.0 1870.0 1844.0 \n\n 9 10 11 \n1412 1385.0 19.0 3.0 \n861 58.0 128.0 8.0 \n8 2500.0 12.0 13.0 \n2321 125.0 14.0 14.0 \n2254 144.0 21.0 15.0 \n1546 27.0 13.0 17.0 \n481 26.0 29.0 18.0 \n2493 2500.0 2500.0 19.0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1412</th>\n <td>834.0</td>\n <td>1156.0</td>\n <td>896.0</td>\n <td>1091.0</td>\n <td>1080.0</td>\n <td>1089.0</td>\n <td>1165.0</td>\n <td>877.0</td>\n <td>1419.0</td>\n <td>1385.0</td>\n <td>19.0</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>861</th>\n <td>2500.0</td>\n <td>922.0</td>\n <td>186.0</td>\n <td>372.0</td>\n <td>17.0</td>\n <td>21.0</td>\n <td>35.0</td>\n <td>17.0</td>\n <td>68.0</td>\n <td>58.0</td>\n <td>128.0</td>\n <td>8.0</td>\n </tr>\n <tr>\n <th>8</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>24.0</td>\n <td>14.0</td>\n <td>92.0</td>\n <td>248.0</td>\n <td>315.0</td>\n <td>300.0</td>\n <td>2500.0</td>\n <td>12.0</td>\n <td>13.0</td>\n </tr>\n <tr>\n <th>2321</th>\n <td>2040.0</td>\n <td>2128.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>125.0</td>\n <td>14.0</td>\n <td>14.0</td>\n </tr>\n <tr>\n <th>2254</th>\n <td>21.0</td>\n <td>17.0</td>\n <td>44.0</td>\n <td>107.0</td>\n <td>94.0</td>\n <td>140.0</td>\n <td>117.0</td>\n <td>139.0</td>\n <td>125.0</td>\n <td>144.0</td>\n <td>21.0</td>\n <td>15.0</td>\n </tr>\n <tr>\n <th>1546</th>\n <td>262.0</td>\n <td>698.0</td>\n <td>1328.0</td>\n <td>1684.0</td>\n <td>1661.0</td>\n <td>56.0</td>\n <td>192.0</td>\n <td>28.0</td>\n <td>39.0</td>\n <td>27.0</td>\n <td>13.0</td>\n <td>17.0</td>\n </tr>\n <tr>\n <th>481</th>\n <td>24.0</td>\n <td>28.0</td>\n <td>14.0</td>\n <td>52.0</td>\n <td>12.0</td>\n <td>16.0</td>\n <td>13.0</td>\n <td>33.0</td>\n <td>17.0</td>\n <td>26.0</td>\n <td>29.0</td>\n <td>18.0</td>\n </tr>\n <tr>\n <th>2493</th>\n <td>52.0</td>\n <td>310.0</td>\n <td>218.0</td>\n <td>500.0</td>\n <td>761.0</td>\n <td>1355.0</td>\n <td>1361.0</td>\n <td>1870.0</td>\n <td>1844.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>19.0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "new_blks = df_block_rank.loc[new_blks].sort_values(11).index", | |
"execution_count": 47, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_n_items_in_block.loc[new_blks]", | |
"execution_count": 48, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 48, | |
"data": { | |
"text/plain": " num_items\n1412 5\n861 4\n8 1\n2321 5\n2254 4\n1546 6\n481 1\n2493 1", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1412</th>\n <td>5</td>\n </tr>\n <tr>\n <th>861</th>\n <td>4</td>\n </tr>\n <tr>\n <th>8</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2321</th>\n <td>5</td>\n </tr>\n <tr>\n <th>2254</th>\n <td>4</td>\n </tr>\n <tr>\n <th>1546</th>\n <td>6</td>\n </tr>\n <tr>\n <th>481</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2493</th>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Select [1412, 861, 8, 2321, 481]" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "bet2_blks=sorted(list(set.union(*[set([1412, 861, 8, 2321, 481]), set(bet2_blks)])))\nnp.cumsum(df_n_items_in_block.loc[bet2_blks])", | |
"execution_count": 49, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 49, | |
"data": { | |
"text/plain": " num_items\nblock_id \n8 1\n96 2\n358 9\n439 10\n474 11\n481 12\n861 16\n1016 17\n1412 22\n1850 23\n2037 27\n2128 33\n2306 36\n2321 41\n2335 43\n2359 44\n2443 50", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>8</th>\n <td>1</td>\n </tr>\n <tr>\n <th>96</th>\n <td>2</td>\n </tr>\n <tr>\n <th>358</th>\n <td>9</td>\n </tr>\n <tr>\n <th>439</th>\n <td>10</td>\n </tr>\n <tr>\n <th>474</th>\n <td>11</td>\n </tr>\n <tr>\n <th>481</th>\n <td>12</td>\n </tr>\n <tr>\n <th>861</th>\n <td>16</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>17</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>22</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>23</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>27</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>33</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>36</td>\n </tr>\n <tr>\n <th>2321</th>\n <td>41</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>43</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>44</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>50</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "So how do these 2nd week bets perform? Load the dataset for sales in the first 12 weeks to see." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_and_sales = pd.read_csv(path+'stock_and_sales_day_0_day_84.csv')", | |
"execution_count": 50, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# add week\ndf_stock_and_sales['week']=(((df_stock_and_sales['date_number']-4)/7)+1).round(0).astype(int)\n# add blocks\ndf_stock_and_sales = df_stock_and_sales.merge(df_blocks, how='left', on=['product_id'])\n# add price\ndf_stock_and_sales = df_stock_and_sales.merge(df_products, how='left', on='product_id')\n# add revenue\ndf_stock_and_sales['revenue']=df_stock_and_sales['sales']*df_stock_and_sales['price']\ndf_sales_week_by_product=df_stock_and_sales.groupby(['week','product_id','family_id',\n 'subfamily_id', 'price','block_id']).agg({'sales':'sum', 'revenue':'sum'}).reset_index()\ndf_sales_week_by_block = df_sales_week_by_product.groupby(['week','block_id']).agg({'revenue':'sum'}).reset_index()\ndf_week_rev = df_sales_week_by_block.merge(df_n_items_in_block, on='block_id', how='left')\ndf_week_rev['revenue_per_item']=df_week_rev.revenue/df_week_rev.num_items", | |
"execution_count": 51, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "total = 0\ndf_tmp = df_week_rev[df_week_rev.week==12]\nfor blk in bet2_blks:\n total+=df_tmp[df_tmp.block_id==blk].revenue.values\nprint(total)\nBet2_revenue = total", | |
"execution_count": 52, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[208174.53]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "print (Bet1_revenue+Bet2_revenue)\n# 6th place", | |
"execution_count": 53, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[451004.18]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# Bet 3: Blocks for week 13" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#2130 rows\ndf_ranked_blocks_by_week = pd.DataFrame() \nfor w in list(range(13)):\n df_ranked_blocks_by_week[w]=list(df_week_rev[df_week_rev['week']==w].sort_values('revenue_per_item', ascending=0).iloc[:,1])[:2130]", | |
"execution_count": 54, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#df of rank for each block for each week\ndf_block_rank=pd.DataFrame(columns=list(range(13)))\nfor block in df_n_items_in_block.index:\n a = np.where(df_ranked_blocks_by_week==block)\n for i in range(len(a[1])): df_block_rank.loc[block,a[1][i]]=a[0][i]\n# give rank 2500 if dropped out of rankings\ndf_block_rank = df_block_rank.fillna(2500.)\ndf_block_rank.sort_values(12).head(20)", | |
"execution_count": 55, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 55, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 \\\n96 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n1850 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2609 2500.0 2500.0 2500.0 2500.0 56.0 48.0 325.0 321.0 149.0 \n2359 2500.0 3.0 4.0 9.0 6.0 6.0 4.0 0.0 1.0 \n587 2500.0 2500.0 2500.0 2500.0 34.0 14.0 22.0 102.0 110.0 \n831 85.0 124.0 84.0 200.0 170.0 203.0 236.0 259.0 328.0 \n2335 1450.0 1777.0 1904.0 151.0 228.0 465.0 1180.0 1548.0 1764.0 \n2037 14.0 9.0 3.0 4.0 4.0 2.0 2.0 3.0 3.0 \n1412 834.0 1156.0 896.0 1091.0 1080.0 1089.0 1165.0 877.0 1419.0 \n1750 1449.0 862.0 71.0 421.0 629.0 905.0 709.0 666.0 286.0 \n2128 4.0 0.0 1.0 7.0 1.0 0.0 0.0 1.0 0.0 \n487 68.0 84.0 118.0 165.0 125.0 8.0 15.0 27.0 25.0 \n1969 688.0 611.0 706.0 1122.0 1164.0 1514.0 884.0 1281.0 1206.0 \n2208 223.0 610.0 67.0 110.0 206.0 286.0 429.0 523.0 257.0 \n474 1.0 1.0 0.0 6.0 2.0 3.0 1.0 2.0 2.0 \n1153 1300.0 1365.0 295.0 246.0 396.0 334.0 462.0 590.0 597.0 \n1005 276.0 106.0 93.0 222.0 289.0 250.0 315.0 240.0 235.0 \n1124 1788.0 2085.0 432.0 587.0 250.0 258.0 312.0 461.0 426.0 \n1580 127.0 89.0 77.0 121.0 96.0 30.0 28.0 45.0 23.0 \n1474 1375.0 1408.0 1598.0 525.0 305.0 617.0 604.0 655.0 822.0 \n\n 9 10 11 12 \n96 0.0 0.0 1.0 0.0 \n1850 2500.0 1.0 0.0 1.0 \n2609 367.0 283.0 442.0 2.0 \n2359 1.0 2.0 2.0 3.0 \n587 100.0 112.0 53.0 4.0 \n831 435.0 475.0 402.0 5.0 \n2335 7.0 6.0 7.0 6.0 \n2037 4.0 7.0 9.0 7.0 \n1412 1385.0 19.0 3.0 8.0 \n1750 457.0 551.0 502.0 9.0 \n2128 2.0 3.0 6.0 10.0 \n487 29.0 43.0 39.0 11.0 \n1969 456.0 578.0 885.0 12.0 \n2208 306.0 525.0 368.0 13.0 \n474 3.0 5.0 10.0 14.0 \n1153 571.0 714.0 647.0 15.0 \n1005 279.0 303.0 388.0 16.0 \n1124 640.0 292.0 178.0 17.0 \n1580 46.0 35.0 25.0 18.0 \n1474 752.0 855.0 149.0 19.0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n <th>12</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2609</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>56.0</td>\n <td>48.0</td>\n <td>325.0</td>\n <td>321.0</td>\n <td>149.0</td>\n <td>367.0</td>\n <td>283.0</td>\n <td>442.0</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>2500.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>9.0</td>\n <td>6.0</td>\n <td>6.0</td>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>587</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>34.0</td>\n <td>14.0</td>\n <td>22.0</td>\n <td>102.0</td>\n <td>110.0</td>\n <td>100.0</td>\n <td>112.0</td>\n <td>53.0</td>\n <td>4.0</td>\n </tr>\n <tr>\n <th>831</th>\n <td>85.0</td>\n <td>124.0</td>\n <td>84.0</td>\n <td>200.0</td>\n <td>170.0</td>\n <td>203.0</td>\n <td>236.0</td>\n <td>259.0</td>\n <td>328.0</td>\n <td>435.0</td>\n <td>475.0</td>\n <td>402.0</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>1450.0</td>\n <td>1777.0</td>\n <td>1904.0</td>\n <td>151.0</td>\n <td>228.0</td>\n <td>465.0</td>\n <td>1180.0</td>\n <td>1548.0</td>\n <td>1764.0</td>\n <td>7.0</td>\n <td>6.0</td>\n <td>7.0</td>\n <td>6.0</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>14.0</td>\n <td>9.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>4.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>7.0</td>\n <td>9.0</td>\n <td>7.0</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>834.0</td>\n <td>1156.0</td>\n <td>896.0</td>\n <td>1091.0</td>\n <td>1080.0</td>\n <td>1089.0</td>\n <td>1165.0</td>\n <td>877.0</td>\n <td>1419.0</td>\n <td>1385.0</td>\n <td>19.0</td>\n <td>3.0</td>\n <td>8.0</td>\n </tr>\n <tr>\n <th>1750</th>\n <td>1449.0</td>\n <td>862.0</td>\n <td>71.0</td>\n <td>421.0</td>\n <td>629.0</td>\n <td>905.0</td>\n <td>709.0</td>\n <td>666.0</td>\n <td>286.0</td>\n <td>457.0</td>\n <td>551.0</td>\n <td>502.0</td>\n <td>9.0</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>7.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>6.0</td>\n <td>10.0</td>\n </tr>\n <tr>\n <th>487</th>\n <td>68.0</td>\n <td>84.0</td>\n <td>118.0</td>\n <td>165.0</td>\n <td>125.0</td>\n <td>8.0</td>\n <td>15.0</td>\n <td>27.0</td>\n <td>25.0</td>\n <td>29.0</td>\n <td>43.0</td>\n <td>39.0</td>\n <td>11.0</td>\n </tr>\n <tr>\n <th>1969</th>\n <td>688.0</td>\n <td>611.0</td>\n <td>706.0</td>\n <td>1122.0</td>\n <td>1164.0</td>\n <td>1514.0</td>\n <td>884.0</td>\n <td>1281.0</td>\n <td>1206.0</td>\n <td>456.0</td>\n <td>578.0</td>\n <td>885.0</td>\n <td>12.0</td>\n </tr>\n <tr>\n <th>2208</th>\n <td>223.0</td>\n <td>610.0</td>\n <td>67.0</td>\n <td>110.0</td>\n <td>206.0</td>\n <td>286.0</td>\n <td>429.0</td>\n <td>523.0</td>\n <td>257.0</td>\n <td>306.0</td>\n <td>525.0</td>\n <td>368.0</td>\n <td>13.0</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>6.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>5.0</td>\n <td>10.0</td>\n <td>14.0</td>\n </tr>\n <tr>\n <th>1153</th>\n <td>1300.0</td>\n <td>1365.0</td>\n <td>295.0</td>\n <td>246.0</td>\n <td>396.0</td>\n <td>334.0</td>\n <td>462.0</td>\n <td>590.0</td>\n <td>597.0</td>\n <td>571.0</td>\n <td>714.0</td>\n <td>647.0</td>\n <td>15.0</td>\n </tr>\n <tr>\n <th>1005</th>\n <td>276.0</td>\n <td>106.0</td>\n <td>93.0</td>\n <td>222.0</td>\n <td>289.0</td>\n <td>250.0</td>\n <td>315.0</td>\n <td>240.0</td>\n <td>235.0</td>\n <td>279.0</td>\n <td>303.0</td>\n <td>388.0</td>\n <td>16.0</td>\n </tr>\n <tr>\n <th>1124</th>\n <td>1788.0</td>\n <td>2085.0</td>\n <td>432.0</td>\n <td>587.0</td>\n <td>250.0</td>\n <td>258.0</td>\n <td>312.0</td>\n <td>461.0</td>\n <td>426.0</td>\n <td>640.0</td>\n <td>292.0</td>\n <td>178.0</td>\n <td>17.0</td>\n </tr>\n <tr>\n <th>1580</th>\n <td>127.0</td>\n <td>89.0</td>\n <td>77.0</td>\n <td>121.0</td>\n <td>96.0</td>\n <td>30.0</td>\n <td>28.0</td>\n <td>45.0</td>\n <td>23.0</td>\n <td>46.0</td>\n <td>35.0</td>\n <td>25.0</td>\n <td>18.0</td>\n </tr>\n <tr>\n <th>1474</th>\n <td>1375.0</td>\n <td>1408.0</td>\n <td>1598.0</td>\n <td>525.0</td>\n <td>305.0</td>\n <td>617.0</td>\n <td>604.0</td>\n <td>655.0</td>\n <td>822.0</td>\n <td>752.0</td>\n <td>855.0</td>\n <td>149.0</td>\n <td>19.0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_block_rank.loc[bet1_blks].sort_values(12)", | |
"execution_count": 56, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 56, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 \\\n96 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n1850 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2359 2500.0 3.0 4.0 9.0 6.0 6.0 4.0 0.0 1.0 \n2335 1450.0 1777.0 1904.0 151.0 228.0 465.0 1180.0 1548.0 1764.0 \n2037 14.0 9.0 3.0 4.0 4.0 2.0 2.0 3.0 3.0 \n2128 4.0 0.0 1.0 7.0 1.0 0.0 0.0 1.0 0.0 \n474 1.0 1.0 0.0 6.0 2.0 3.0 1.0 2.0 2.0 \n2306 5.0 2.0 5.0 13.0 9.0 5.0 3.0 4.0 6.0 \n1016 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n439 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n358 315.0 499.0 582.0 455.0 902.0 941.0 1144.0 1417.0 20.0 \n2443 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 1753.0 1616.0 \n1833 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2503 13.0 4.0 6.0 14.0 10.0 13.0 7.0 8.0 10.0 \n1315 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n819 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2470 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n1207 1062.0 835.0 694.0 884.0 532.0 347.0 454.0 492.0 451.0 \n799 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 8.0 \n2469 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n1278 2500.0 2500.0 2500.0 45.0 58.0 115.0 69.0 100.0 15.0 \n\n 9 10 11 12 \n96 0.0 0.0 1.0 0.0 \n1850 2500.0 1.0 0.0 1.0 \n2359 1.0 2.0 2.0 3.0 \n2335 7.0 6.0 7.0 6.0 \n2037 4.0 7.0 9.0 7.0 \n2128 2.0 3.0 6.0 10.0 \n474 3.0 5.0 10.0 14.0 \n2306 5.0 10.0 12.0 20.0 \n1016 9.0 11.0 11.0 22.0 \n439 2500.0 4.0 5.0 29.0 \n358 11.0 16.0 16.0 32.0 \n2443 10.0 8.0 4.0 33.0 \n1833 2500.0 2500.0 62.0 48.0 \n2503 14.0 25.0 24.0 94.0 \n1315 8.0 60.0 67.0 98.0 \n819 2500.0 375.0 71.0 119.0 \n2470 2500.0 2500.0 298.0 270.0 \n1207 260.0 200.0 177.0 371.0 \n799 12.0 38.0 78.0 425.0 \n2469 6.0 23.0 754.0 482.0 \n1278 13.0 118.0 419.0 1740.0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n <th>12</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>2500.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>9.0</td>\n <td>6.0</td>\n <td>6.0</td>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>1450.0</td>\n <td>1777.0</td>\n <td>1904.0</td>\n <td>151.0</td>\n <td>228.0</td>\n <td>465.0</td>\n <td>1180.0</td>\n <td>1548.0</td>\n <td>1764.0</td>\n <td>7.0</td>\n <td>6.0</td>\n <td>7.0</td>\n <td>6.0</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>14.0</td>\n <td>9.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>4.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>7.0</td>\n <td>9.0</td>\n <td>7.0</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>7.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>6.0</td>\n <td>10.0</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>6.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>5.0</td>\n <td>10.0</td>\n <td>14.0</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>5.0</td>\n <td>2.0</td>\n <td>5.0</td>\n <td>13.0</td>\n <td>9.0</td>\n <td>5.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>6.0</td>\n <td>5.0</td>\n <td>10.0</td>\n <td>12.0</td>\n <td>20.0</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>9.0</td>\n <td>11.0</td>\n <td>11.0</td>\n <td>22.0</td>\n </tr>\n <tr>\n <th>439</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>4.0</td>\n <td>5.0</td>\n <td>29.0</td>\n </tr>\n <tr>\n <th>358</th>\n <td>315.0</td>\n <td>499.0</td>\n <td>582.0</td>\n <td>455.0</td>\n <td>902.0</td>\n <td>941.0</td>\n <td>1144.0</td>\n <td>1417.0</td>\n <td>20.0</td>\n <td>11.0</td>\n <td>16.0</td>\n <td>16.0</td>\n <td>32.0</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1753.0</td>\n <td>1616.0</td>\n <td>10.0</td>\n <td>8.0</td>\n <td>4.0</td>\n <td>33.0</td>\n </tr>\n <tr>\n <th>1833</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>62.0</td>\n <td>48.0</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>13.0</td>\n <td>4.0</td>\n <td>6.0</td>\n <td>14.0</td>\n <td>10.0</td>\n <td>13.0</td>\n <td>7.0</td>\n <td>8.0</td>\n <td>10.0</td>\n <td>14.0</td>\n <td>25.0</td>\n <td>24.0</td>\n <td>94.0</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>8.0</td>\n <td>60.0</td>\n <td>67.0</td>\n <td>98.0</td>\n </tr>\n <tr>\n <th>819</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>375.0</td>\n <td>71.0</td>\n <td>119.0</td>\n </tr>\n <tr>\n <th>2470</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>298.0</td>\n <td>270.0</td>\n </tr>\n <tr>\n <th>1207</th>\n <td>1062.0</td>\n <td>835.0</td>\n <td>694.0</td>\n <td>884.0</td>\n <td>532.0</td>\n <td>347.0</td>\n <td>454.0</td>\n <td>492.0</td>\n <td>451.0</td>\n <td>260.0</td>\n <td>200.0</td>\n <td>177.0</td>\n <td>371.0</td>\n </tr>\n <tr>\n <th>799</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>8.0</td>\n <td>12.0</td>\n <td>38.0</td>\n <td>78.0</td>\n <td>425.0</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>6.0</td>\n <td>23.0</td>\n <td>754.0</td>\n <td>482.0</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>45.0</td>\n <td>58.0</td>\n <td>115.0</td>\n <td>69.0</td>\n <td>100.0</td>\n <td>15.0</td>\n <td>13.0</td>\n <td>118.0</td>\n <td>419.0</td>\n <td>1740.0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "(df_block_rank.loc[df_block_rank.loc[bet1_blks].sort_values(12).index]<20)*1", | |
"execution_count": 57, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 57, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 9 10 11 12\n96 0 0 0 0 0 0 0 0 0 1 1 1 1\n1850 0 0 0 0 0 0 0 0 0 0 1 1 1\n2359 0 1 1 1 1 1 1 1 1 1 1 1 1\n2335 0 0 0 0 0 0 0 0 0 1 1 1 1\n2037 1 1 1 1 1 1 1 1 1 1 1 1 1\n2128 1 1 1 1 1 1 1 1 1 1 1 1 1\n474 1 1 1 1 1 1 1 1 1 1 1 1 1\n2306 1 1 1 1 1 1 1 1 1 1 1 1 0\n1016 0 0 0 0 0 0 0 0 0 1 1 1 0\n439 0 0 0 0 0 0 0 0 0 0 1 1 0\n358 0 0 0 0 0 0 0 0 0 1 1 1 0\n2443 0 0 0 0 0 0 0 0 0 1 1 1 0\n1833 0 0 0 0 0 0 0 0 0 0 0 0 0\n2503 1 1 1 1 1 1 1 1 1 1 0 0 0\n1315 0 0 0 0 0 0 0 0 0 1 0 0 0\n819 0 0 0 0 0 0 0 0 0 0 0 0 0\n2470 0 0 0 0 0 0 0 0 0 0 0 0 0\n1207 0 0 0 0 0 0 0 0 0 0 0 0 0\n799 0 0 0 0 0 0 0 0 1 1 0 0 0\n2469 0 0 0 0 0 0 0 0 0 1 0 0 0\n1278 0 0 0 0 0 0 0 0 1 1 0 0 0", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n <th>12</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>439</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>358</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1833</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2503</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1315</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>819</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2470</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1207</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>799</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2469</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1278</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Only 7 blocks are in the top 20. Keep them. These are only 16 items. We need to keep at least 14 more." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "bet3_blks = sorted(df_block_rank.loc[bet1_blks].sort_values(12).index[:7])", | |
"execution_count": 58, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "np.cumsum(df_n_items_in_block.loc[bet3_blks])", | |
"execution_count": 59, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 59, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n474 2\n1850 3\n2037 7\n2128 13\n2335 15\n2359 16", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>474</th>\n <td>2</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>3</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>7</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>13</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>15</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>16</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "np.cumsum(df_n_items_in_block.loc[[1016, 439, 358, 2443]])", | |
"execution_count": 60, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 60, | |
"data": { | |
"text/plain": " num_items\nblock_id \n1016 1\n439 2\n358 9\n2443 15", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1016</th>\n <td>1</td>\n </tr>\n <tr>\n <th>439</th>\n <td>2</td>\n </tr>\n <tr>\n <th>358</th>\n <td>9</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>15</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "bet3_blks=sorted(list(set.union(*[set([1016, 439, 358, 2443]), set(bet3_blks)])))\nnp.cumsum(df_n_items_in_block.loc[bet3_blks])", | |
"execution_count": 61, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 61, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n358 8\n439 9\n474 10\n1016 11\n1850 12\n2037 16\n2128 22\n2335 24\n2359 25\n2443 31", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>358</th>\n <td>8</td>\n </tr>\n <tr>\n <th>439</th>\n <td>9</td>\n </tr>\n <tr>\n <th>474</th>\n <td>10</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>11</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>12</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>16</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>22</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>24</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>25</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>31</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Need new blocks containing 19 more items" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "good_blks=df_block_rank.sort_values(12).index[:20]\nnew_blks=list(sorted(set(good_blks)-set(bet3_blks)))\ndf_block_rank.loc[new_blks].sort_values(12)", | |
"execution_count": 62, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 62, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 \\\n2609 2500.0 2500.0 2500.0 2500.0 56.0 48.0 325.0 321.0 149.0 \n587 2500.0 2500.0 2500.0 2500.0 34.0 14.0 22.0 102.0 110.0 \n831 85.0 124.0 84.0 200.0 170.0 203.0 236.0 259.0 328.0 \n1412 834.0 1156.0 896.0 1091.0 1080.0 1089.0 1165.0 877.0 1419.0 \n1750 1449.0 862.0 71.0 421.0 629.0 905.0 709.0 666.0 286.0 \n487 68.0 84.0 118.0 165.0 125.0 8.0 15.0 27.0 25.0 \n1969 688.0 611.0 706.0 1122.0 1164.0 1514.0 884.0 1281.0 1206.0 \n2208 223.0 610.0 67.0 110.0 206.0 286.0 429.0 523.0 257.0 \n1153 1300.0 1365.0 295.0 246.0 396.0 334.0 462.0 590.0 597.0 \n1005 276.0 106.0 93.0 222.0 289.0 250.0 315.0 240.0 235.0 \n1124 1788.0 2085.0 432.0 587.0 250.0 258.0 312.0 461.0 426.0 \n1580 127.0 89.0 77.0 121.0 96.0 30.0 28.0 45.0 23.0 \n1474 1375.0 1408.0 1598.0 525.0 305.0 617.0 604.0 655.0 822.0 \n\n 9 10 11 12 \n2609 367.0 283.0 442.0 2.0 \n587 100.0 112.0 53.0 4.0 \n831 435.0 475.0 402.0 5.0 \n1412 1385.0 19.0 3.0 8.0 \n1750 457.0 551.0 502.0 9.0 \n487 29.0 43.0 39.0 11.0 \n1969 456.0 578.0 885.0 12.0 \n2208 306.0 525.0 368.0 13.0 \n1153 571.0 714.0 647.0 15.0 \n1005 279.0 303.0 388.0 16.0 \n1124 640.0 292.0 178.0 17.0 \n1580 46.0 35.0 25.0 18.0 \n1474 752.0 855.0 149.0 19.0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n <th>12</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2609</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>56.0</td>\n <td>48.0</td>\n <td>325.0</td>\n <td>321.0</td>\n <td>149.0</td>\n <td>367.0</td>\n <td>283.0</td>\n <td>442.0</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>587</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>34.0</td>\n <td>14.0</td>\n <td>22.0</td>\n <td>102.0</td>\n <td>110.0</td>\n <td>100.0</td>\n <td>112.0</td>\n <td>53.0</td>\n <td>4.0</td>\n </tr>\n <tr>\n <th>831</th>\n <td>85.0</td>\n <td>124.0</td>\n <td>84.0</td>\n <td>200.0</td>\n <td>170.0</td>\n <td>203.0</td>\n <td>236.0</td>\n <td>259.0</td>\n <td>328.0</td>\n <td>435.0</td>\n <td>475.0</td>\n <td>402.0</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>834.0</td>\n <td>1156.0</td>\n <td>896.0</td>\n <td>1091.0</td>\n <td>1080.0</td>\n <td>1089.0</td>\n <td>1165.0</td>\n <td>877.0</td>\n <td>1419.0</td>\n <td>1385.0</td>\n <td>19.0</td>\n <td>3.0</td>\n <td>8.0</td>\n </tr>\n <tr>\n <th>1750</th>\n <td>1449.0</td>\n <td>862.0</td>\n <td>71.0</td>\n <td>421.0</td>\n <td>629.0</td>\n <td>905.0</td>\n <td>709.0</td>\n <td>666.0</td>\n <td>286.0</td>\n <td>457.0</td>\n <td>551.0</td>\n <td>502.0</td>\n <td>9.0</td>\n </tr>\n <tr>\n <th>487</th>\n <td>68.0</td>\n <td>84.0</td>\n <td>118.0</td>\n <td>165.0</td>\n <td>125.0</td>\n <td>8.0</td>\n <td>15.0</td>\n <td>27.0</td>\n <td>25.0</td>\n <td>29.0</td>\n <td>43.0</td>\n <td>39.0</td>\n <td>11.0</td>\n </tr>\n <tr>\n <th>1969</th>\n <td>688.0</td>\n <td>611.0</td>\n <td>706.0</td>\n <td>1122.0</td>\n <td>1164.0</td>\n <td>1514.0</td>\n <td>884.0</td>\n <td>1281.0</td>\n <td>1206.0</td>\n <td>456.0</td>\n <td>578.0</td>\n <td>885.0</td>\n <td>12.0</td>\n </tr>\n <tr>\n <th>2208</th>\n <td>223.0</td>\n <td>610.0</td>\n <td>67.0</td>\n <td>110.0</td>\n <td>206.0</td>\n <td>286.0</td>\n <td>429.0</td>\n <td>523.0</td>\n <td>257.0</td>\n <td>306.0</td>\n <td>525.0</td>\n <td>368.0</td>\n <td>13.0</td>\n </tr>\n <tr>\n <th>1153</th>\n <td>1300.0</td>\n <td>1365.0</td>\n <td>295.0</td>\n <td>246.0</td>\n <td>396.0</td>\n <td>334.0</td>\n <td>462.0</td>\n <td>590.0</td>\n <td>597.0</td>\n <td>571.0</td>\n <td>714.0</td>\n <td>647.0</td>\n <td>15.0</td>\n </tr>\n <tr>\n <th>1005</th>\n <td>276.0</td>\n <td>106.0</td>\n <td>93.0</td>\n <td>222.0</td>\n <td>289.0</td>\n <td>250.0</td>\n <td>315.0</td>\n <td>240.0</td>\n <td>235.0</td>\n <td>279.0</td>\n <td>303.0</td>\n <td>388.0</td>\n <td>16.0</td>\n </tr>\n <tr>\n <th>1124</th>\n <td>1788.0</td>\n <td>2085.0</td>\n <td>432.0</td>\n <td>587.0</td>\n <td>250.0</td>\n <td>258.0</td>\n <td>312.0</td>\n <td>461.0</td>\n <td>426.0</td>\n <td>640.0</td>\n <td>292.0</td>\n <td>178.0</td>\n <td>17.0</td>\n </tr>\n <tr>\n <th>1580</th>\n <td>127.0</td>\n <td>89.0</td>\n <td>77.0</td>\n <td>121.0</td>\n <td>96.0</td>\n <td>30.0</td>\n <td>28.0</td>\n <td>45.0</td>\n <td>23.0</td>\n <td>46.0</td>\n <td>35.0</td>\n <td>25.0</td>\n <td>18.0</td>\n </tr>\n <tr>\n <th>1474</th>\n <td>1375.0</td>\n <td>1408.0</td>\n <td>1598.0</td>\n <td>525.0</td>\n <td>305.0</td>\n <td>617.0</td>\n <td>604.0</td>\n <td>655.0</td>\n <td>822.0</td>\n <td>752.0</td>\n <td>855.0</td>\n <td>149.0</td>\n <td>19.0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "new_blks = df_block_rank.loc[new_blks].sort_values(12).index\ndf_n_items_in_block.loc[new_blks]", | |
"execution_count": 63, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 63, | |
"data": { | |
"text/plain": " num_items\n2609 1\n587 2\n831 4\n1412 5\n1750 8\n487 8\n1969 4\n2208 5\n1153 6\n1005 4\n1124 9\n1580 8\n1474 7", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2609</th>\n <td>1</td>\n </tr>\n <tr>\n <th>587</th>\n <td>2</td>\n </tr>\n <tr>\n <th>831</th>\n <td>4</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>5</td>\n </tr>\n <tr>\n <th>1750</th>\n <td>8</td>\n </tr>\n <tr>\n <th>487</th>\n <td>8</td>\n </tr>\n <tr>\n <th>1969</th>\n <td>4</td>\n </tr>\n <tr>\n <th>2208</th>\n <td>5</td>\n </tr>\n <tr>\n <th>1153</th>\n <td>6</td>\n </tr>\n <tr>\n <th>1005</th>\n <td>4</td>\n </tr>\n <tr>\n <th>1124</th>\n <td>9</td>\n </tr>\n <tr>\n <th>1580</th>\n <td>8</td>\n </tr>\n <tr>\n <th>1474</th>\n <td>7</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "np.cumsum(df_n_items_in_block.loc[[2609, 587, 831, 1412, 1750]])", | |
"execution_count": 64, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 64, | |
"data": { | |
"text/plain": " num_items\nblock_id \n2609 1\n587 3\n831 7\n1412 12\n1750 20", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2609</th>\n <td>1</td>\n </tr>\n <tr>\n <th>587</th>\n <td>3</td>\n </tr>\n <tr>\n <th>831</th>\n <td>7</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>12</td>\n </tr>\n <tr>\n <th>1750</th>\n <td>20</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#Drop 1 item from current bet3_blks\ndf_n_items_in_block.loc[bet3_blks]", | |
"execution_count": 65, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 65, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n358 7\n439 1\n474 1\n1016 1\n1850 1\n2037 4\n2128 6\n2335 2\n2359 1\n2443 6", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>358</th>\n <td>7</td>\n </tr>\n <tr>\n <th>439</th>\n <td>1</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>4</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>6</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>6</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_block_rank.loc[[96, 439, 474, 1016, 1850, 2359]]", | |
"execution_count": 66, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 66, | |
"data": { | |
"text/plain": " 0 1 2 3 4 5 6 7 8 \\\n96 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n439 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n474 1.0 1.0 0.0 6.0 2.0 3.0 1.0 2.0 2.0 \n1016 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n1850 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 2500.0 \n2359 2500.0 3.0 4.0 9.0 6.0 6.0 4.0 0.0 1.0 \n\n 9 10 11 12 \n96 0.0 0.0 1.0 0.0 \n439 2500.0 4.0 5.0 29.0 \n474 3.0 5.0 10.0 14.0 \n1016 9.0 11.0 11.0 22.0 \n1850 2500.0 1.0 0.0 1.0 \n2359 1.0 2.0 2.0 3.0 ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n <th>5</th>\n <th>6</th>\n <th>7</th>\n <th>8</th>\n <th>9</th>\n <th>10</th>\n <th>11</th>\n <th>12</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n </tr>\n <tr>\n <th>439</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>4.0</td>\n <td>5.0</td>\n <td>29.0</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>6.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n <td>5.0</td>\n <td>10.0</td>\n <td>14.0</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>9.0</td>\n <td>11.0</td>\n <td>11.0</td>\n <td>22.0</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>2500.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>2500.0</td>\n <td>3.0</td>\n <td>4.0</td>\n <td>9.0</td>\n <td>6.0</td>\n <td>6.0</td>\n <td>4.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>1.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>3.0</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "bet3_blks=sorted(list(set.union(*[set([2609, 587, 831, 1412, 1750]), set(bet3_blks)])))", | |
"execution_count": 67, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "bet3_blks.remove(439)", | |
"execution_count": 68, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "np.cumsum(df_n_items_in_block.loc[bet3_blks])", | |
"execution_count": 69, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 69, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n358 8\n474 9\n587 11\n831 15\n1016 16\n1412 21\n1750 29\n1850 30\n2037 34\n2128 40\n2335 42\n2359 43\n2443 49\n2609 50", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>358</th>\n <td>8</td>\n </tr>\n <tr>\n <th>474</th>\n <td>9</td>\n </tr>\n <tr>\n <th>587</th>\n <td>11</td>\n </tr>\n <tr>\n <th>831</th>\n <td>15</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>16</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>21</td>\n </tr>\n <tr>\n <th>1750</th>\n <td>29</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>30</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>34</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>40</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>42</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>43</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>49</td>\n </tr>\n <tr>\n <th>2609</th>\n <td>50</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "So how do these bet 3 blocks for week 13 perform? Load the complete dataset for sales to see." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_stock_and_sales = pd.read_csv(path+'sales_stock.csv')", | |
"execution_count": 70, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# add week\ndf_stock_and_sales['week']=(((df_stock_and_sales['date_number']-4)/7)+1).round(0).astype(int)\n# add blocks\ndf_stock_and_sales = df_stock_and_sales.merge(df_blocks, how='left', on=['product_id'])\n# add price\ndf_stock_and_sales = df_stock_and_sales.merge(df_products, how='left', on='product_id')\n# add revenue\ndf_stock_and_sales['revenue']=df_stock_and_sales['sales']*df_stock_and_sales['price']\ndf_sales_week_by_product=df_stock_and_sales.groupby(['week','product_id','family_id',\n 'subfamily_id', 'price','block_id']).agg({'sales':'sum', 'revenue':'sum'}).reset_index()\ndf_sales_week_by_block = df_sales_week_by_product.groupby(['week','block_id']).agg({'revenue':'sum'}).reset_index()\ndf_week_rev = df_sales_week_by_block.merge(df_n_items_in_block, on='block_id', how='left')\ndf_week_rev['revenue_per_item']=df_week_rev.revenue/df_week_rev.num_items", | |
"execution_count": 71, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "total = 0\ndf_tmp = df_week_rev[df_week_rev.week==13]\nfor blk in bet3_blks:\n total+=df_tmp[df_tmp.block_id==blk].revenue.values\nprint(total)\nBet3_revenue = total", | |
"execution_count": 72, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[170291.95]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "print (Bet1_revenue+Bet2_revenue+Bet3_revenue)\n\n# 7th place", | |
"execution_count": 73, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[621296.13]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "621296.13/695860.61", | |
"execution_count": 74, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 74, | |
"data": { | |
"text/plain": "0.8928456663181438" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "This result is just 89% of the winning solution, so is far from optimal, and only 76% of the possible (see below)." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#2130 rows\ndf_ranked_blocks_by_week = pd.DataFrame() \nfor w in list(range(14)):\n df_ranked_blocks_by_week[w]=list(df_week_rev[df_week_rev['week']==w].sort_values('revenue_per_item', ascending=0).iloc[:,1])[:2130]", | |
"execution_count": 75, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "#df of rank for each block for each week\ndf_block_rank=pd.DataFrame(columns=list(range(14)))\nfor block in df_n_items_in_block.index:\n a = np.where(df_ranked_blocks_by_week==block)\n for i in range(len(a[1])): df_block_rank.loc[block,a[1][i]]=a[0][i]\n# give rank 2500 if dropped out of rankings\ndf_block_rank = df_block_rank.fillna(2500.)", | |
"execution_count": 76, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "wk_10=list(df_block_rank.sort_values(10).index[:16])\nnp.cumsum(df_n_items_in_block.loc[wk_10])", | |
"execution_count": 77, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 77, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n1850 2\n2359 3\n2128 9\n439 10\n474 11\n2335 13\n2037 17\n2443 23\n393 25\n2306 28\n1016 29\n8 30\n1546 36\n2321 41\n1403 51", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>3</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>9</td>\n </tr>\n <tr>\n <th>439</th>\n <td>10</td>\n </tr>\n <tr>\n <th>474</th>\n <td>11</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>13</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>17</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>23</td>\n </tr>\n <tr>\n <th>393</th>\n <td>25</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>28</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>29</td>\n </tr>\n <tr>\n <th>8</th>\n <td>30</td>\n </tr>\n <tr>\n <th>1546</th>\n <td>36</td>\n </tr>\n <tr>\n <th>2321</th>\n <td>41</td>\n </tr>\n <tr>\n <th>1403</th>\n <td>51</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_n_items_in_block.loc[wk_10]", | |
"execution_count": 78, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 78, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n1850 1\n2359 1\n2128 6\n439 1\n474 1\n2335 2\n2037 4\n2443 6\n393 2\n2306 3\n1016 1\n8 1\n1546 6\n2321 5\n1403 10", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>6</td>\n </tr>\n <tr>\n <th>439</th>\n <td>1</td>\n </tr>\n <tr>\n <th>474</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>4</td>\n </tr>\n <tr>\n <th>2443</th>\n <td>6</td>\n </tr>\n <tr>\n <th>393</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2306</th>\n <td>3</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>1</td>\n </tr>\n <tr>\n <th>8</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1546</th>\n <td>6</td>\n </tr>\n <tr>\n <th>2321</th>\n <td>5</td>\n </tr>\n <tr>\n <th>1403</th>\n <td>10</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "wk_10.remove(8)", | |
"execution_count": 79, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "total = 0\ndf_tmp = df_week_rev[df_week_rev.week==10]\nfor blk in wk_10:\n total+=df_tmp[df_tmp.block_id==blk].revenue.values\nprint(total)\nwk_10_revenue = total", | |
"execution_count": 80, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[306218.37]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "wk_12=list(df_block_rank.sort_values(12).index[:14])\nnp.cumsum(df_n_items_in_block.loc[wk_12])", | |
"execution_count": 81, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 81, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n1850 2\n2609 3\n2359 4\n587 6\n831 10\n2335 12\n2037 16\n1412 21\n1750 29\n2128 35\n487 43\n1969 47\n2208 52", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2609</th>\n <td>3</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>4</td>\n </tr>\n <tr>\n <th>587</th>\n <td>6</td>\n </tr>\n <tr>\n <th>831</th>\n <td>10</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>12</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>16</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>21</td>\n </tr>\n <tr>\n <th>1750</th>\n <td>29</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>35</td>\n </tr>\n <tr>\n <th>487</th>\n <td>43</td>\n </tr>\n <tr>\n <th>1969</th>\n <td>47</td>\n </tr>\n <tr>\n <th>2208</th>\n <td>52</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df_n_items_in_block.loc[wk_12]", | |
"execution_count": 82, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 82, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n1850 1\n2609 1\n2359 1\n587 2\n831 4\n2335 2\n2037 4\n1412 5\n1750 8\n2128 6\n487 8\n1969 4\n2208 5", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2609</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>1</td>\n </tr>\n <tr>\n <th>587</th>\n <td>2</td>\n </tr>\n <tr>\n <th>831</th>\n <td>4</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>4</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>5</td>\n </tr>\n <tr>\n <th>1750</th>\n <td>8</td>\n </tr>\n <tr>\n <th>2128</th>\n <td>6</td>\n </tr>\n <tr>\n <th>487</th>\n <td>8</td>\n </tr>\n <tr>\n <th>1969</th>\n <td>4</td>\n </tr>\n <tr>\n <th>2208</th>\n <td>5</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "wk_12.remove(2335)", | |
"execution_count": 83, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "total = 0\ndf_tmp = df_week_rev[df_week_rev.week==12]\nfor blk in wk_12:\n total+=df_tmp[df_tmp.block_id==blk].revenue.values\nprint(total)\nwk_12_revenue = total", | |
"execution_count": 84, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[278998.38]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "wk_13=list(df_block_rank.sort_values(13).index[:17])\nnp.cumsum(df_n_items_in_block.loc[wk_13])", | |
"execution_count": 85, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 85, | |
"data": { | |
"text/plain": " num_items\nblock_id \n96 1\n2335 3\n2359 4\n2208 9\n603 10\n2609 11\n1850 12\n416 15\n1580 23\n2037 27\n1412 32\n943 35\n2254 39\n1996 42\n1546 48\n1016 49\n2713 50", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>num_items</th>\n </tr>\n <tr>\n <th>block_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>96</th>\n <td>1</td>\n </tr>\n <tr>\n <th>2335</th>\n <td>3</td>\n </tr>\n <tr>\n <th>2359</th>\n <td>4</td>\n </tr>\n <tr>\n <th>2208</th>\n <td>9</td>\n </tr>\n <tr>\n <th>603</th>\n <td>10</td>\n </tr>\n <tr>\n <th>2609</th>\n <td>11</td>\n </tr>\n <tr>\n <th>1850</th>\n <td>12</td>\n </tr>\n <tr>\n <th>416</th>\n <td>15</td>\n </tr>\n <tr>\n <th>1580</th>\n <td>23</td>\n </tr>\n <tr>\n <th>2037</th>\n <td>27</td>\n </tr>\n <tr>\n <th>1412</th>\n <td>32</td>\n </tr>\n <tr>\n <th>943</th>\n <td>35</td>\n </tr>\n <tr>\n <th>2254</th>\n <td>39</td>\n </tr>\n <tr>\n <th>1996</th>\n <td>42</td>\n </tr>\n <tr>\n <th>1546</th>\n <td>48</td>\n </tr>\n <tr>\n <th>1016</th>\n <td>49</td>\n </tr>\n <tr>\n <th>2713</th>\n <td>50</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "total = 0\ndf_tmp = df_week_rev[df_week_rev.week==13]\nfor blk in wk_13:\n total+=df_tmp[df_tmp.block_id==blk].revenue.values\nprint(total)\nwk_13_revenue = total", | |
"execution_count": 86, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[216285.14]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "print (wk_10_revenue+wk_12_revenue+wk_13_revenue)", | |
"execution_count": 87, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[801501.89]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "621296.13/801501.89", | |
"execution_count": 88, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 88, | |
"data": { | |
"text/plain": "0.7751648969910726" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"gist": { | |
"id": "c1cdf98c4f8840f18d507c8fe2b3e992", | |
"data": { | |
"description": "Zara Data Challenge - top 10 with no ML", | |
"public": true | |
} | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.6.7", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/c1cdf98c4f8840f18d507c8fe2b3e992" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment