Created
September 24, 2024 16:55
-
-
Save calvinmccarter/07afe42774e3b793b8589e1884d8d323 to your computer and use it in GitHub Desktop.
m5 replication
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import sys\n", | |
"sys.path.append(\"../src\")\n", | |
"# autoreload\n", | |
"import lightgbm as lgb\n", | |
"\n", | |
"#from testbed.models.quantile_regression import QuantileRegression\n", | |
"from testbed.models.treeffuser import Treeffuser\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"\n", | |
"from pathlib import Path\n", | |
"from tqdm import tqdm\n", | |
"import pickle as pkl\n", | |
"\n", | |
"\n", | |
"\n", | |
"#from testbed.models.ngboost import NGBoostGaussian, NGBoostMixtureGaussian, NGBoostPoisson\n", | |
"from testbed.models.base_model import BayesOptProbabilisticModel\n", | |
"\n", | |
"\n", | |
"from functools import partial\n", | |
"\n", | |
"from jaxtyping import Float, Array\n", | |
"from typing import List, Callable\n", | |
"\n", | |
"import seaborn as sns\n", | |
"import matplotlib.pyplot as plt\n", | |
"from testbed.metrics.log_likelihood import LogLikelihoodFromSamplesMetric\n", | |
"from testbed.metrics.crps import CRPS\n", | |
"\n", | |
"\n", | |
"path = \"../src/testbed/data/m5\"\n", | |
"\n", | |
"# load autoreload extension\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# These are config variables\n", | |
"\n", | |
"PROCESS_FROM_SCRATCH = True\n", | |
"USE_SUBSET = True\n", | |
"CONTEXT_LENGTH = 20\n", | |
"RUN_DEPRECATED = False" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"columns of sell_prices_df:\n", | |
"store_id\n", | |
"item_id\n", | |
"wm_yr_wk\n", | |
"sell_price\n", | |
"\n", | |
"columns of sales_train_validation_df:\n", | |
"id\n", | |
"item_id\n", | |
"dept_id\n", | |
"cat_id\n", | |
"store_id\n", | |
"state_id\n", | |
"\n", | |
"columns of calendar_df:\n", | |
"date\n", | |
"wm_yr_wk\n", | |
"weekday\n", | |
"wday\n", | |
"month\n", | |
"year\n", | |
"d\n", | |
"event_name_1\n", | |
"event_type_1\n", | |
"event_name_2\n", | |
"event_type_2\n", | |
"snap_CA\n", | |
"snap_TX\n", | |
"snap_WI\n", | |
"number of zeros in sales_train_validation_df: 39777094\n" | |
] | |
} | |
], | |
"source": [ | |
"# READ IN DATA\n", | |
"\n", | |
"sell_prices_df = pd.read_csv(Path(path) / \"sell_prices.csv\")\n", | |
"sales_train_validation_df = pd.read_csv(Path(path) / \"sales_train_validation.csv\")\n", | |
"calendar_df = pd.read_csv(Path(path) / \"calendar.csv\")\n", | |
"\n", | |
"print(\"\\ncolumns of sell_prices_df:\")\n", | |
"[print(col) for col in sell_prices_df.columns]\n", | |
"print(\"\\ncolumns of sales_train_validation_df:\")\n", | |
"[print(col) for col in sales_train_validation_df.columns if not col.startswith(\"d_\")]\n", | |
"print(\"\\ncolumns of calendar_df:\") # ommit d_1, d_2, ..., d_1913\n", | |
"[print(col) for col in calendar_df.columns if not col.startswith(\"d_\")]\n", | |
"\n", | |
"\"\"\n", | |
"\n", | |
"# print number of zeros\n", | |
"print(\"number of zeros in sales_train_validation_df: \", (sales_train_validation_df == 0).sum().sum())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"number of zeros in sales_train_validation_df: 39777094 out of 58327370 entries\n", | |
"percentage of zeros in sales_train_validation_df: 68.20%\n" | |
] | |
} | |
], | |
"source": [ | |
"#num_zeros = sales_train_validation_df.isin([0]).sum().sum()\n", | |
"#total_entries = sales_train_validation_df.\n", | |
"\n", | |
"items_sold_cols = sales_train_validation_df.columns[sales_train_validation_df.columns.str.startswith(\"d_\")]\n", | |
"num_zeros = (sales_train_validation_df[items_sold_cols] == 0).sum().sum()\n", | |
"total_entries = sales_train_validation_df[items_sold_cols].shape[0] * sales_train_validation_df[items_sold_cols].shape[1]\n", | |
"\n", | |
"print(f\"number of zeros in sales_train_validation_df: {num_zeros} out of {total_entries} entries\")\n", | |
"print(f\"percentage of zeros in sales_train_validation_df: {num_zeros / total_entries * 100:.2f}%\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# add explicit columns for the day, month, year for ease of processing\n", | |
"calendar_df[\"date\"] = pd.to_datetime(calendar_df[\"date\"])\n", | |
"calendar_df[\"day\"] = calendar_df[\"date\"].dt.day\n", | |
"calendar_df[\"month\"] = calendar_df[\"date\"].dt.month\n", | |
"calendar_df[\"year\"] = calendar_df[\"date\"].dt.year\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Brief snapshots of the dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>wm_yr_wk</th>\n", | |
" <th>weekday</th>\n", | |
" <th>wday</th>\n", | |
" <th>month</th>\n", | |
" <th>year</th>\n", | |
" <th>d</th>\n", | |
" <th>event_name_1</th>\n", | |
" <th>event_type_1</th>\n", | |
" <th>event_name_2</th>\n", | |
" <th>event_type_2</th>\n", | |
" <th>snap_CA</th>\n", | |
" <th>snap_TX</th>\n", | |
" <th>snap_WI</th>\n", | |
" <th>day</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2011-01-29</td>\n", | |
" <td>11101</td>\n", | |
" <td>Saturday</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2011</td>\n", | |
" <td>d_1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>29</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2011-01-30</td>\n", | |
" <td>11101</td>\n", | |
" <td>Sunday</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>2011</td>\n", | |
" <td>d_2</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2011-01-31</td>\n", | |
" <td>11101</td>\n", | |
" <td>Monday</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>2011</td>\n", | |
" <td>d_3</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>31</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2011-02-01</td>\n", | |
" <td>11101</td>\n", | |
" <td>Tuesday</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>2011</td>\n", | |
" <td>d_4</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2011-02-02</td>\n", | |
" <td>11101</td>\n", | |
" <td>Wednesday</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2011</td>\n", | |
" <td>d_5</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date wm_yr_wk weekday wday month year d event_name_1 \\\n", | |
"0 2011-01-29 11101 Saturday 1 1 2011 d_1 NaN \n", | |
"1 2011-01-30 11101 Sunday 2 1 2011 d_2 NaN \n", | |
"2 2011-01-31 11101 Monday 3 1 2011 d_3 NaN \n", | |
"3 2011-02-01 11101 Tuesday 4 2 2011 d_4 NaN \n", | |
"4 2011-02-02 11101 Wednesday 5 2 2011 d_5 NaN \n", | |
"\n", | |
" event_type_1 event_name_2 event_type_2 snap_CA snap_TX snap_WI day \n", | |
"0 NaN NaN NaN 0 0 0 29 \n", | |
"1 NaN NaN NaN 0 0 0 30 \n", | |
"2 NaN NaN NaN 0 0 0 31 \n", | |
"3 NaN NaN NaN 1 1 0 1 \n", | |
"4 NaN NaN NaN 1 0 1 2 " | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"calendar_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>item_id</th>\n", | |
" <th>dept_id</th>\n", | |
" <th>cat_id</th>\n", | |
" <th>store_id</th>\n", | |
" <th>state_id</th>\n", | |
" <th>d_1</th>\n", | |
" <th>d_2</th>\n", | |
" <th>d_3</th>\n", | |
" <th>d_4</th>\n", | |
" <th>...</th>\n", | |
" <th>d_1904</th>\n", | |
" <th>d_1905</th>\n", | |
" <th>d_1906</th>\n", | |
" <th>d_1907</th>\n", | |
" <th>d_1908</th>\n", | |
" <th>d_1909</th>\n", | |
" <th>d_1910</th>\n", | |
" <th>d_1911</th>\n", | |
" <th>d_1912</th>\n", | |
" <th>d_1913</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>HOBBIES_1_001_CA_1_validation</td>\n", | |
" <td>HOBBIES_1_001</td>\n", | |
" <td>HOBBIES_1</td>\n", | |
" <td>HOBBIES</td>\n", | |
" <td>CA_1</td>\n", | |
" <td>CA</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>HOBBIES_1_002_CA_1_validation</td>\n", | |
" <td>HOBBIES_1_002</td>\n", | |
" <td>HOBBIES_1</td>\n", | |
" <td>HOBBIES</td>\n", | |
" <td>CA_1</td>\n", | |
" <td>CA</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>HOBBIES_1_003_CA_1_validation</td>\n", | |
" <td>HOBBIES_1_003</td>\n", | |
" <td>HOBBIES_1</td>\n", | |
" <td>HOBBIES</td>\n", | |
" <td>CA_1</td>\n", | |
" <td>CA</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>HOBBIES_1_004_CA_1_validation</td>\n", | |
" <td>HOBBIES_1_004</td>\n", | |
" <td>HOBBIES_1</td>\n", | |
" <td>HOBBIES</td>\n", | |
" <td>CA_1</td>\n", | |
" <td>CA</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>5</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>HOBBIES_1_005_CA_1_validation</td>\n", | |
" <td>HOBBIES_1_005</td>\n", | |
" <td>HOBBIES_1</td>\n", | |
" <td>HOBBIES</td>\n", | |
" <td>CA_1</td>\n", | |
" <td>CA</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 1919 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id item_id dept_id cat_id store_id \\\n", | |
"0 HOBBIES_1_001_CA_1_validation HOBBIES_1_001 HOBBIES_1 HOBBIES CA_1 \n", | |
"1 HOBBIES_1_002_CA_1_validation HOBBIES_1_002 HOBBIES_1 HOBBIES CA_1 \n", | |
"2 HOBBIES_1_003_CA_1_validation HOBBIES_1_003 HOBBIES_1 HOBBIES CA_1 \n", | |
"3 HOBBIES_1_004_CA_1_validation HOBBIES_1_004 HOBBIES_1 HOBBIES CA_1 \n", | |
"4 HOBBIES_1_005_CA_1_validation HOBBIES_1_005 HOBBIES_1 HOBBIES CA_1 \n", | |
"\n", | |
" state_id d_1 d_2 d_3 d_4 ... d_1904 d_1905 d_1906 d_1907 d_1908 \\\n", | |
"0 CA 0 0 0 0 ... 1 3 0 1 1 \n", | |
"1 CA 0 0 0 0 ... 0 0 0 0 0 \n", | |
"2 CA 0 0 0 0 ... 2 1 2 1 1 \n", | |
"3 CA 0 0 0 0 ... 1 0 5 4 1 \n", | |
"4 CA 0 0 0 0 ... 2 1 1 0 1 \n", | |
"\n", | |
" d_1909 d_1910 d_1911 d_1912 d_1913 \n", | |
"0 1 3 0 1 1 \n", | |
"1 1 0 0 0 0 \n", | |
"2 1 0 1 1 1 \n", | |
"3 0 1 3 7 2 \n", | |
"4 1 2 2 2 4 \n", | |
"\n", | |
"[5 rows x 1919 columns]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sales_train_validation_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>store_id</th>\n", | |
" <th>item_id</th>\n", | |
" <th>wm_yr_wk</th>\n", | |
" <th>sell_price</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>CA_1</td>\n", | |
" <td>HOBBIES_1_001</td>\n", | |
" <td>11325</td>\n", | |
" <td>9.58</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>CA_1</td>\n", | |
" <td>HOBBIES_1_001</td>\n", | |
" <td>11326</td>\n", | |
" <td>9.58</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>CA_1</td>\n", | |
" <td>HOBBIES_1_001</td>\n", | |
" <td>11327</td>\n", | |
" <td>8.26</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>CA_1</td>\n", | |
" <td>HOBBIES_1_001</td>\n", | |
" <td>11328</td>\n", | |
" <td>8.26</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>CA_1</td>\n", | |
" <td>HOBBIES_1_001</td>\n", | |
" <td>11329</td>\n", | |
" <td>8.26</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" store_id item_id wm_yr_wk sell_price\n", | |
"0 CA_1 HOBBIES_1_001 11325 9.58\n", | |
"1 CA_1 HOBBIES_1_001 11326 9.58\n", | |
"2 CA_1 HOBBIES_1_001 11327 8.26\n", | |
"3 CA_1 HOBBIES_1_001 11328 8.26\n", | |
"4 CA_1 HOBBIES_1_001 11329 8.26" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sell_prices_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Process the data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"TOTAL_ITEMS = 5000\n", | |
"# select a random subset of items\n", | |
"if USE_SUBSET:\n", | |
" np.random.seed(0)\n", | |
" unique_ids = sales_train_validation_df[\"id\"].unique()\n", | |
" ids = np.random.choice(sales_train_validation_df[\"id\"].unique(), TOTAL_ITEMS, replace=False)\n", | |
" sales_train_validation_df_sub = sales_train_validation_df[sales_train_validation_df[\"id\"].isin(ids)]\n", | |
" item_ids = sales_train_validation_df_sub[\"item_id\"].unique()\n", | |
" sell_prices_df_sub = sell_prices_df[sell_prices_df[\"item_id\"].isin(item_ids)]\n", | |
" calendar_df_sub = calendar_df\n", | |
"\n", | |
"\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"columns_sales_train_validation.head)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The strategy for processing the data is going to be the following. 1) We are going to have X and y where y is the next days sales for a given product. 3) X is made up of 10 previous prices, day of the week, + event types, cat_id, store_id, state_id" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def proc_train_test(sales_train_validation_df: pd.DataFrame, calendar_df: pd.DataFrame, sell_prices_df: pd.DataFrame, context_length: int, test_percentage: float, percentage_omittied: int = 0): #type annotation too long\n", | |
" \"\"\"\n", | |
" This function processes the data and returns the training and test data in two ways:\n", | |
" - undifferentiated: a list of all training and test data (X_train, y_train, X_test, y_test)\n", | |
" - differentiated: a list of training and test data for each product (X_train_prod, y_train_prod, X_test_prod, y_test_prod)\n", | |
" where X_train_prod[i] contains a list of all X_train values for the product i with similar grouping for y_train_prod and test\n", | |
"\n", | |
" This assumes from the dataframes that\n", | |
" - sales_train_validation_df:\n", | |
" - has columns with the format d_1, d_2, ...\n", | |
" - has columns item_id and store_id\n", | |
" - calendar_df:\n", | |
" - wday, month, event_name_1, event_name_2\n", | |
" - sell_prices_df:\n", | |
" - item_id, store_id, sell_price\n", | |
"\n", | |
" - percentage_omittied: percentage of the data to be omitted from the training data and the test data\n", | |
" (randomly selected)\n", | |
"\n", | |
" Returns:\n", | |
" - undifferentiated: Tuple of X_train, y_train, X_test, y_test\n", | |
" - differentiated: Tuple of X_train_prod, y_train_prod, X_test_prod, y_test_prod\n", | |
" \"\"\"\n", | |
" np.random.seed(0)\n", | |
" # First we need to get the training data\n", | |
" # We will use the first 1913 days as training data and the next\n", | |
"\n", | |
" X_train = []\n", | |
" y_train = []\n", | |
"\n", | |
" X_test = []\n", | |
" y_test = []\n", | |
"\n", | |
" # We will also return a second grouping of lists where X_train_prod[i] contains a\n", | |
" # a list of all X_train values for the product i with similar grouping for y_train_prod and test\n", | |
" X_train_prod = []\n", | |
" y_train_prod = []\n", | |
" X_test_prod = []\n", | |
" y_test_prod = []\n", | |
"\n", | |
"\n", | |
" # get all days that start with d_ and look for the maximum\n", | |
" total_days = max([int(x.split(\"_\")[1]) for x in sales_train_validation_df.columns if \"d_\" in x])\n", | |
" train_days = int(total_days * (1 - test_percentage))\n", | |
" print(\"train days\", train_days)\n", | |
" print(\"test days\", total_days - train_days)\n", | |
" print(\"total days\", total_days)\n", | |
"\n", | |
" # Precompute the required data\n", | |
" calendar_df_dict = calendar_df.set_index(\"d\").to_dict(orient=\"index\")\n", | |
" sell_prices_dict = sell_prices_df.groupby([\"item_id\", \"store_id\"])[\"sell_price\"].first().to_dict()\n", | |
"\n", | |
" pbar = tqdm(total=len(sales_train_validation_df))\n", | |
" for _, row in sales_train_validation_df.iterrows():\n", | |
" item_id = row[\"item_id\"]\n", | |
" store_id = row[\"store_id\"]\n", | |
"\n", | |
" X_train_prod.append([])\n", | |
" y_train_prod.append([])\n", | |
" X_test_prod.append([])\n", | |
" y_test_prod.append([])\n", | |
"\n", | |
" pbar.update(1)\n", | |
"\n", | |
" valid_size = int((train_days - context_length) * (1 - percentage_omittied))\n", | |
" valid_js = np.random.choice(range(1, train_days - context_length), valid_size, replace=False)\n", | |
"\n", | |
" valid_js = list(valid_js) + list(range(train_days, total_days - context_length))\n", | |
"\n", | |
" for j in valid_js:\n", | |
" x = []\n", | |
"\n", | |
" # Add sales values for the previous context_length days\n", | |
" x.extend(row[f\"d_{j+k}\"] for k in range(context_length))\n", | |
"\n", | |
" # Add additional features\n", | |
" current_day = f\"d_{j+context_length}\"\n", | |
" calendar_data = calendar_df_dict[current_day]\n", | |
" x.extend([\n", | |
" calendar_data[\"wday\"],\n", | |
" calendar_data[\"month\"],\n", | |
" store_id,\n", | |
" calendar_data[\"event_name_1\"],\n", | |
" calendar_data[\"event_name_2\"],\n", | |
" sell_prices_dict[(item_id, store_id)],\n", | |
" item_id\n", | |
" ])\n", | |
"\n", | |
" if j < train_days:\n", | |
" X_train.append(x)\n", | |
" y_train.append(row[current_day])\n", | |
" X_train_prod[-1].append(x)\n", | |
" y_train_prod[-1].append(row[current_day])\n", | |
"\n", | |
" else:\n", | |
" X_test.append(x)\n", | |
" y_test.append(row[current_day])\n", | |
" X_train_prod[-1].append(x)\n", | |
" y_train_prod[-1].append(row[current_day])\n", | |
"\n", | |
" undifferentiated = (X_train, y_train, X_test, y_test)\n", | |
" differentiated = (X_train_prod, y_train_prod, X_test_prod, y_test_prod)\n", | |
" return undifferentiated, differentiated" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"train days 1874\n", | |
"test days 39\n", | |
"total days 1913\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:14<00:00, 355.28it/s]\n" | |
] | |
} | |
], | |
"source": [ | |
"if PROCESS_FROM_SCRATCH:\n", | |
" undifferentiated, differentiated = proc_train_test(sales_train_validation_df_sub, calendar_df, sell_prices_df_sub, CONTEXT_LENGTH, 0.02, 0.99)\n", | |
" X_train, y_train, X_test, y_test = undifferentiated\n", | |
" X_train_prod, y_train_prod, X_test_prod, y_test_prod = differentiated\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(90000, 90000, 95000, 95000)" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(X_train), len(y_train), len(X_test), len(y_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"COL_NAMES = [\n", | |
" f\"day_{i}\" for i in range(1, CONTEXT_LENGTH+1)\n", | |
"] + [\"wday\", \"month\", \"store_id\", \"event_name_1\", \"event_name_2\", \"sell_price\", \"item_id\"]\n", | |
"\n", | |
"CAT_COLS = [\"store_id\", \"event_name_1\", \"event_name_2\", \"item_id\", \"wday\", \"month\"]\n", | |
"CAT_COLS_IDX = [COL_NAMES.index(col) for col in CAT_COLS]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"X_train_df = pd.DataFrame(X_train)\n", | |
"X_test_df = pd.DataFrame(X_test)\n", | |
"y_test_df = pd.DataFrame(y_test)\n", | |
"y_train_df = pd.DataFrame(y_train)\n", | |
"\n", | |
"X_train_df.columns = COL_NAMES\n", | |
"X_test_df.columns = COL_NAMES" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_1</th>\n", | |
" <th>day_2</th>\n", | |
" <th>day_3</th>\n", | |
" <th>day_4</th>\n", | |
" <th>day_5</th>\n", | |
" <th>day_6</th>\n", | |
" <th>day_7</th>\n", | |
" <th>day_8</th>\n", | |
" <th>day_9</th>\n", | |
" <th>day_10</th>\n", | |
" <th>...</th>\n", | |
" <th>day_18</th>\n", | |
" <th>day_19</th>\n", | |
" <th>day_20</th>\n", | |
" <th>wday</th>\n", | |
" <th>month</th>\n", | |
" <th>store_id</th>\n", | |
" <th>event_name_1</th>\n", | |
" <th>event_name_2</th>\n", | |
" <th>sell_price</th>\n", | |
" <th>item_id</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>6</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>CA_1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.98</td>\n", | |
" <td>HOBBIES_1_005</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>10</td>\n", | |
" <td>CA_1</td>\n", | |
" <td>EidAlAdha</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.98</td>\n", | |
" <td>HOBBIES_1_005</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>CA_1</td>\n", | |
" <td>SuperBowl</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.98</td>\n", | |
" <td>HOBBIES_1_005</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>...</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" <td>CA_1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.98</td>\n", | |
" <td>HOBBIES_1_005</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>3</td>\n", | |
" <td>CA_1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2.98</td>\n", | |
" <td>HOBBIES_1_005</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>89995</th>\n", | |
" <td>5</td>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>1</td>\n", | |
" <td>9</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" <td>WI_3</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.00</td>\n", | |
" <td>FOODS_3_827</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>89996</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>WI_3</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.00</td>\n", | |
" <td>FOODS_3_827</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>89997</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>WI_3</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.00</td>\n", | |
" <td>FOODS_3_827</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>89998</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>8</td>\n", | |
" <td>WI_3</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.00</td>\n", | |
" <td>FOODS_3_827</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>89999</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>11</td>\n", | |
" <td>WI_3</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1.00</td>\n", | |
" <td>FOODS_3_827</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>90000 rows × 27 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_1 day_2 day_3 day_4 day_5 day_6 day_7 day_8 day_9 day_10 \\\n", | |
"0 2 0 0 4 1 0 6 0 0 0 \n", | |
"1 3 3 1 1 1 0 0 3 0 0 \n", | |
"2 0 0 1 2 1 1 1 1 1 1 \n", | |
"3 0 2 0 1 3 2 1 1 2 2 \n", | |
"4 0 0 0 0 0 0 0 0 0 0 \n", | |
"... ... ... ... ... ... ... ... ... ... ... \n", | |
"89995 5 5 0 2 3 0 1 4 2 0 \n", | |
"89996 0 0 0 0 0 0 0 0 0 0 \n", | |
"89997 0 0 0 0 0 0 0 0 0 0 \n", | |
"89998 0 0 0 0 0 0 0 0 0 0 \n", | |
"89999 0 0 0 0 0 0 0 0 0 0 \n", | |
"\n", | |
" ... day_18 day_19 day_20 wday month store_id event_name_1 \\\n", | |
"0 ... 1 2 0 2 2 CA_1 NaN \n", | |
"1 ... 3 0 1 4 10 CA_1 EidAlAdha \n", | |
"2 ... 0 4 1 2 2 CA_1 SuperBowl \n", | |
"3 ... 2 0 2 4 4 CA_1 NaN \n", | |
"4 ... 0 0 0 4 3 CA_1 NaN \n", | |
"... ... ... ... ... ... ... ... ... \n", | |
"89995 ... 1 9 0 2 5 WI_3 NaN \n", | |
"89996 ... 0 0 0 1 3 WI_3 NaN \n", | |
"89997 ... 0 0 0 4 1 WI_3 NaN \n", | |
"89998 ... 0 0 0 2 8 WI_3 NaN \n", | |
"89999 ... 0 0 0 2 11 WI_3 NaN \n", | |
"\n", | |
" event_name_2 sell_price item_id \n", | |
"0 NaN 2.98 HOBBIES_1_005 \n", | |
"1 NaN 2.98 HOBBIES_1_005 \n", | |
"2 NaN 2.98 HOBBIES_1_005 \n", | |
"3 NaN 2.98 HOBBIES_1_005 \n", | |
"4 NaN 2.98 HOBBIES_1_005 \n", | |
"... ... ... ... \n", | |
"89995 NaN 1.00 FOODS_3_827 \n", | |
"89996 NaN 1.00 FOODS_3_827 \n", | |
"89997 NaN 1.00 FOODS_3_827 \n", | |
"89998 NaN 1.00 FOODS_3_827 \n", | |
"89999 NaN 1.00 FOODS_3_827 \n", | |
"\n", | |
"[90000 rows x 27 columns]" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"\n", | |
"X_train_df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_1</th>\n", | |
" <th>day_2</th>\n", | |
" <th>day_3</th>\n", | |
" <th>day_4</th>\n", | |
" <th>day_5</th>\n", | |
" <th>day_6</th>\n", | |
" <th>day_7</th>\n", | |
" <th>day_8</th>\n", | |
" <th>day_9</th>\n", | |
" <th>day_10</th>\n", | |
" <th>...</th>\n", | |
" <th>day_18</th>\n", | |
" <th>day_19</th>\n", | |
" <th>day_20</th>\n", | |
" <th>wday</th>\n", | |
" <th>month</th>\n", | |
" <th>store_id</th>\n", | |
" <th>event_name_1</th>\n", | |
" <th>event_name_2</th>\n", | |
" <th>sell_price</th>\n", | |
" <th>item_id</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>6</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>30</td>\n", | |
" <td>4</td>\n", | |
" <td>2.98</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>9</td>\n", | |
" <td>0</td>\n", | |
" <td>6</td>\n", | |
" <td>4</td>\n", | |
" <td>2.98</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>26</td>\n", | |
" <td>4</td>\n", | |
" <td>2.98</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>...</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>30</td>\n", | |
" <td>4</td>\n", | |
" <td>2.98</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>30</td>\n", | |
" <td>4</td>\n", | |
" <td>2.98</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 27 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_1 day_2 day_3 day_4 day_5 day_6 day_7 day_8 day_9 day_10 ... \\\n", | |
"0 2 0 0 4 1 0 6 0 0 0 ... \n", | |
"1 3 3 1 1 1 0 0 3 0 0 ... \n", | |
"2 0 0 1 2 1 1 1 1 1 1 ... \n", | |
"3 0 2 0 1 3 2 1 1 2 2 ... \n", | |
"4 0 0 0 0 0 0 0 0 0 0 ... \n", | |
"\n", | |
" day_18 day_19 day_20 wday month store_id event_name_1 event_name_2 \\\n", | |
"0 1 2 0 1 1 0 30 4 \n", | |
"1 3 0 1 3 9 0 6 4 \n", | |
"2 0 4 1 1 1 0 26 4 \n", | |
"3 2 0 2 3 3 0 30 4 \n", | |
"4 0 0 0 3 2 0 30 4 \n", | |
"\n", | |
" sell_price item_id \n", | |
"0 2.98 0 \n", | |
"1 2.98 0 \n", | |
"2 2.98 0 \n", | |
"3 2.98 0 \n", | |
"4 2.98 0 \n", | |
"\n", | |
"[5 rows x 27 columns]" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Encode the categorical columns as numbers\n", | |
"from sklearn.preprocessing import LabelEncoder\n", | |
"# Get only label of item_id\n", | |
"X_train_df[\"item_id\"] = X_train_df[\"item_id\"].apply(lambda x: x.split(\"_\")[1])\n", | |
"X_test_df[\"item_id\"] = X_test_df[\"item_id\"].apply(lambda x: x.split(\"_\")[1])\n", | |
"\n", | |
"\n", | |
"label_encoders = {}\n", | |
"for col in CAT_COLS:\n", | |
" le = LabelEncoder()\n", | |
" X_train_df[col] = le.fit_transform(X_train_df[col])\n", | |
" X_test_df[col] = le.transform(X_test_df[col])\n", | |
" label_encoders[col] = le\n", | |
"\n", | |
"\n", | |
"X_train_prod_processed = []\n", | |
"X_test_prod_processed = []\n", | |
"for i in range(len(X_train_prod)):\n", | |
" X_train_prod_processed.append(pd.DataFrame(X_train_prod[i], columns=COL_NAMES))\n", | |
" X_test_prod_processed.append(pd.DataFrame(X_test_prod[i], columns=COL_NAMES))\n", | |
" X_train_prod_processed[-1][\"item_id\"] = X_train_prod_processed[-1][\"item_id\"].apply(lambda x: x.split(\"_\")[1])\n", | |
" X_test_prod_processed[-1][\"item_id\"] = X_test_prod_processed[-1][\"item_id\"].apply(lambda x: x.split(\"_\")[1])\n", | |
" for col in CAT_COLS:\n", | |
" X_train_prod_processed[-1][col] = label_encoders[col].transform(X_train_prod_processed[-1][col])\n", | |
" X_test_prod_processed[-1][col] = label_encoders[col].transform(X_test_prod_processed[-1][col])\n", | |
"\n", | |
"X_train_df.head()\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# PPC" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \"Standard PPCs\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"def max_ppc(y_true: Float[Array, \"batch y_dim\"], y_samples: Float[Array, \"samples batch y_dim\"], number=0, name=\"\") -> None:\n", | |
" # rpeat y_true to match the shape of y_samples\n", | |
" max_ppc = np.max(y_samples, axis=1)\n", | |
" true_max = np.max(y_true)\n", | |
"\n", | |
" return max_ppc.flatten(), true_max.flatten(), \"max_ppc\"\n", | |
"\n", | |
"def quantile_ppc(y_true: Float[Array, \"batch y_dim\"], y_samples: Float[Array, \"samples batch y_dim\"], quantile=0.5, number=0, name=\"\") -> None:\n", | |
" # rpeat y_true to match the shape of y_samples\n", | |
" q = np.quantile(y_samples, quantile, axis=1)\n", | |
" true_q = np.quantile(y_true, quantile)\n", | |
" return q.flatten(), true_q.flatten(), f\"quantile_ppc_{quantile}\"\n", | |
"\n", | |
"def zeros(y_true: Float[Array, \"batch y_dim\"], y_samples: Float[Array, \"samples batch y_dim\"], number=0, name=\"\") -> None:\n", | |
" \"Count the number of zeros in the samples\"\n", | |
" zeros = np.sum(y_samples < 0.1, axis=1)\n", | |
" true_zeros = np.sum(y_true < 0.1)\n", | |
"\n", | |
" return zeros.flatten(), true_zeros.flatten(), \"zeros\"\n", | |
"\n", | |
"def percentage_zeros(y_true: Float[Array, \"batch y_dim\"], y_samples: Float[Array, \"samples batch y_dim\"], number=0, name=\"\") -> None:\n", | |
" \"Count the number of zeros in the samples\"\n", | |
" zeros = np.mean(y_samples < 0.1, axis=1)\n", | |
" true_zeros = np.mean(y_true < 0.1)\n", | |
"\n", | |
" return zeros.flatten(), true_zeros.flatten(), \"percentage_zeros\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def plot_ppcs(y_true: Float[Array, \"batch y_dim\"], y_samples: Float[Array, \"samples batch y_dim\"], ppcs: List[Callable],\n", | |
" number=0, name=\"\") -> None:\n", | |
" # plot the distribution of\n", | |
"\n", | |
" for ppc in ppcs:\n", | |
" ppc(y_true, y_samples, number=number, name=name)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \"Complex PPCs\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"def plot_model_comparisons(data, y_true, figsize=(12, 8), model_names=None):\n", | |
" \"\"\"\n", | |
" Plots model predictions against true values for each day.\n", | |
"\n", | |
" :param data: numpy array of shape [models, samples, days] containing model predictions\n", | |
" :param y_true: array of shape [days] containing the true values\n", | |
" :param figsize: tuple indicating the size of the figure\n", | |
" \"\"\"\n", | |
" sns.set(style=\"whitegrid\")\n", | |
" models, samples, days = data.shape\n", | |
"\n", | |
" # Create a figure and axis object\n", | |
" fig, ax = plt.subplots(figsize=figsize)\n", | |
"\n", | |
" # We will transform the data to a format suitable for seaborn\n", | |
" # Create a DataFrame with model, day, and sample values\n", | |
" plot_data = []\n", | |
" if model_names is None:\n", | |
" model_names = [f\"Model {i}\" for i in range(models)]\n", | |
"\n", | |
" for model_idx in range(models):\n", | |
" for day_idx in range(days):\n", | |
" for sample_idx in range(samples):\n", | |
" plot_data.append({\n", | |
" \"Day\": day_idx,\n", | |
" \"Value\": data[model_idx, sample_idx, day_idx],\n", | |
" \"Model\": model_names[model_idx]\n", | |
" })\n", | |
"\n", | |
" import pandas as pd\n", | |
" plot_data = pd.DataFrame(plot_data)\n", | |
"\n", | |
" # Use seaborn to plot the boxplots\n", | |
" sns.boxplot(x=\"Day\", y=\"Value\", hue=\"Model\", data=plot_data, ax=ax, width=0.6)\n", | |
"\n", | |
" # Plot true values\n", | |
" plt.plot(y_true, 'o', color='red', label='True Values')\n", | |
"\n", | |
" # Setting labels and title\n", | |
" plt.xticks(ticks=np.arange(days), labels=[f\"Day {i+1}\" for i in range(days)])\n", | |
" plt.xlabel('Days')\n", | |
" plt.ylabel('Values')\n", | |
" plt.title('Model Predictions vs. True Values')\n", | |
" plt.legend()\n", | |
"\n", | |
" # Show the plot\n", | |
" plt.show()\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Model Evaluation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def save_results_to_pkl(results: dict, dir_name, name):\n", | |
" if not Path(dir_name).exists():\n", | |
" Path(dir_name).mkdir(parents=True)\n", | |
"\n", | |
" path = Path(dir_name) / f\"{name}.pkl\"\n", | |
" with open(path, \"wb\") as f:\n", | |
" pkl.dump(results, f)\n", | |
"\n", | |
"\n", | |
"def load_results_from_pkl(dir_name, name):\n", | |
" path = Path(dir_name) / f\"{name}.pkl\"\n", | |
" with open(path, \"rb\") as f:\n", | |
" results = pkl.load(f)\n", | |
" return results\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Simple helper function to train a model and plot ppcs\n", | |
"\n", | |
"def get_ppcs(y_samples, X_test, y_test, ppcs, number=0, name=\"\") -> None:\n", | |
" \"\"\"\n", | |
" Returns a dictionary with the samples and the true values for each ppc\n", | |
" the dictionary a\n", | |
" \"\"\"\n", | |
" y_samples = np.array(y_samples)\n", | |
" #y_samples = np.maximum(y_samples, 0)\n", | |
" #y_samples = np.round(y_samples, 0)\n", | |
"\n", | |
" ppc_results = {}\n", | |
" for ppc in ppcs:\n", | |
" samples, true, name = ppc(y_test, y_samples, number=number, name=name)\n", | |
" ppc_results[name] = {\"samples\": samples, \"true\": true}\n", | |
"\n", | |
" return ppc_results\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" day_1 day_2 day_3 day_4 day_5 day_6 day_7 day_8 day_9 day_10 ... \\\n", | |
"0 2 0 0 4 1 0 6 0 0 0 ... \n", | |
"1 3 3 1 1 1 0 0 3 0 0 ... \n", | |
"2 0 0 1 2 1 1 1 1 1 1 ... \n", | |
"3 0 2 0 1 3 2 1 1 2 2 ... \n", | |
"4 0 0 0 0 0 0 0 0 0 0 ... \n", | |
"\n", | |
" day_18 day_19 day_20 wday month store_id event_name_1 event_name_2 \\\n", | |
"0 1 2 0 1 1 0 30 4 \n", | |
"1 3 0 1 3 9 0 6 4 \n", | |
"2 0 4 1 1 1 0 26 4 \n", | |
"3 2 0 2 3 3 0 30 4 \n", | |
"4 0 0 0 3 2 0 30 4 \n", | |
"\n", | |
" sell_price item_id \n", | |
"0 2.98 0 \n", | |
"1 2.98 0 \n", | |
"2 2.98 0 \n", | |
"3 2.98 0 \n", | |
"4 2.98 0 \n", | |
"\n", | |
"[5 rows x 27 columns]\n" | |
] | |
} | |
], | |
"source": [ | |
"print(X_train_df.head())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"EVAL_VALUES = 10_000 #len(X_test_df)\n", | |
"np.random.seed(0)\n", | |
"\n", | |
"eval_idx = np.random.choice(len(X_test_df), EVAL_VALUES, replace=False)\n", | |
"\n", | |
"X_train_np = X_train_df.values\n", | |
"X_test_np = X_test_df.values[eval_idx]\n", | |
"\n", | |
"y_train_np = y_train_df.values + np.random.normal(0, 0.01, y_train_df.shape)\n", | |
"y_test_np = y_test_df.values[eval_idx]\n", | |
"\n", | |
"# change to float to prevent errors\n", | |
"y_train_np = y_train_np.astype(np.float32)\n", | |
"y_test_np = y_test_np.astype(np.float32)\n", | |
"\n", | |
"dataset = {\n", | |
" \"X_train\": X_train_np,\n", | |
" \"X_test\": X_test_np,\n", | |
" \"y_train\": y_train_np,\n", | |
" \"y_test\": y_test_np,\n", | |
" \"col_names\": COL_NAMES,\n", | |
" \"cat_cols\": CAT_COLS,\n", | |
"}\n", | |
"\n", | |
"with open(\"dataset.pkl\", \"wb\") as f:\n", | |
" pkl.dump(dataset, f)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 1 started. Evaluating function at random point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 1 ended. Evaluation done at random point.\n", | |
"Time taken: 60.5465\n", | |
"Function value obtained: 0.5163\n", | |
"Current minimum: 0.5163\n", | |
"Iteration No: 2 started. Evaluating function at random point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 2 ended. Evaluation done at random point.\n", | |
"Time taken: 208.9847\n", | |
"Function value obtained: 0.5094\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 3 started. Evaluating function at random point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 3 ended. Evaluation done at random point.\n", | |
"Time taken: 32.5392\n", | |
"Function value obtained: 0.5211\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 4 started. Evaluating function at random point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 4 ended. Evaluation done at random point.\n", | |
"Time taken: 97.7318\n", | |
"Function value obtained: 0.5569\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 5 started. Evaluating function at random point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 5 ended. Evaluation done at random point.\n", | |
"Time taken: 106.7900\n", | |
"Function value obtained: 0.5512\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 6 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 6 ended. Search finished for the next optimal point.\n", | |
"Time taken: 497.2484\n", | |
"Function value obtained: 0.5337\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 7 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 7 ended. Search finished for the next optimal point.\n", | |
"Time taken: 469.7641\n", | |
"Function value obtained: 0.5615\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 8 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 8 ended. Search finished for the next optimal point.\n", | |
"Time taken: 657.2299\n", | |
"Function value obtained: 0.5154\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 9 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 9 ended. Search finished for the next optimal point.\n", | |
"Time taken: 570.9859\n", | |
"Function value obtained: 0.5293\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 10 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 10 ended. Search finished for the next optimal point.\n", | |
"Time taken: 560.4699\n", | |
"Function value obtained: 0.5511\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 11 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 11 ended. Search finished for the next optimal point.\n", | |
"Time taken: 582.5229\n", | |
"Function value obtained: 0.5504\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 12 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 12 ended. Search finished for the next optimal point.\n", | |
"Time taken: 628.3455\n", | |
"Function value obtained: 0.5275\n", | |
"Current minimum: 0.5094\n", | |
"Iteration No: 13 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 13 ended. Search finished for the next optimal point.\n", | |
"Time taken: 1070.3474\n", | |
"Function value obtained: 0.5044\n", | |
"Current minimum: 0.5044\n", | |
"Iteration No: 14 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 14 ended. Search finished for the next optimal point.\n", | |
"Time taken: 677.8123\n", | |
"Function value obtained: 0.5075\n", | |
"Current minimum: 0.5044\n", | |
"Iteration No: 15 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 15 ended. Search finished for the next optimal point.\n", | |
"Time taken: 492.3243\n", | |
"Function value obtained: 0.5117\n", | |
"Current minimum: 0.5044\n", | |
"Iteration No: 16 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 16 ended. Search finished for the next optimal point.\n", | |
"Time taken: 667.1462\n", | |
"Function value obtained: 0.5150\n", | |
"Current minimum: 0.5044\n", | |
"Iteration No: 17 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 17 ended. Search finished for the next optimal point.\n", | |
"Time taken: 730.5144\n", | |
"Function value obtained: 0.5215\n", | |
"Current minimum: 0.5044\n", | |
"Iteration No: 18 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 18 ended. Search finished for the next optimal point.\n", | |
"Time taken: 638.5987\n", | |
"Function value obtained: 0.5257\n", | |
"Current minimum: 0.5044\n", | |
"Iteration No: 19 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 19 ended. Search finished for the next optimal point.\n", | |
"Time taken: 539.1419\n", | |
"Function value obtained: 0.5108\n", | |
"Current minimum: 0.5044\n", | |
"Iteration No: 20 started. Searching for the next optimal point.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration No: 20 ended. Search finished for the next optimal point.\n", | |
"Time taken: 567.4410\n", | |
"Function value obtained: 0.5126\n", | |
"Current minimum: 0.5044\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
} | |
], | |
"source": [ | |
"MODEL_CLASSES = [Treeffuser]\n", | |
"NAMES = [\"Treeffuser\"]\n", | |
"\n", | |
"#MODEL_CLASSES = MODEL_CLASSES[-1:]\n", | |
"#NAMES = NAMES[-1:]\n", | |
"\n", | |
"NUM_SAMPLES = 100\n", | |
"HYPERS = [\n", | |
" {\"subsample\": 0.20, \"subsample_freq\": 1, \"verbose\": 0, \"num_leaves\":129, \"learning_rate\":0.5,\n", | |
" \"sde_manual_hyperparams\": {\"hyperparam_max\": 10}},\n", | |
" {},\n", | |
" {}\n", | |
"]\n", | |
"\n", | |
"results = []\n", | |
"for i in range(len(MODEL_CLASSES)):\n", | |
" model_cls = MODEL_CLASSES[i]\n", | |
" model = BayesOptProbabilisticModel(model_cls, n_iter_bayes_opt=20, frac_validation=0.01)\n", | |
" #model = model_cls(**HYPERS[i])\n", | |
"\n", | |
"\n", | |
" if False and model_cls == NGBoostPoisson:\n", | |
" # shuffle the data\n", | |
" np.random.seed(0)\n", | |
" idx = np.random.permutation(len(X_train_np))\n", | |
" X_train_np_ngb = X_train_np[idx]\n", | |
" y_train_np_ngb = y_train_np[idx].astype(np.int32)\n", | |
" model.fit(X_train_np_ngb, y_train_np_ngb)\n", | |
"\n", | |
" elif False and model_cls == NGBoostGaussian:\n", | |
" y_train_np_ngb = y_train_np + np.random.normal(0, 3, y_train_np.shape)\n", | |
" # rescale\n", | |
" #y_train_np_ngb = (y_train_np_ngb - np.mean(y_train_np_ngb)) / np.std(y_train_np_ngb)\n", | |
" model.fit(X_train_np, y_train_np_ngb)\n", | |
"\n", | |
" else:\n", | |
" model.fit(X_train_np, y_train_np)\n", | |
"\n", | |
" results.append({\n", | |
" \"model\": model,\n", | |
" \"model_name\": NAMES[i]\n", | |
" })\n", | |
"\n", | |
" save_results_to_pkl(results, \"m5\", \"results.pkl\")\n", | |
"\n", | |
"\n", | |
"results = load_results_from_pkl(\"m5\", \"results.pkl\")\n", | |
"\n", | |
"\n", | |
"# Save the results\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Model: Treeffuser\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
} | |
], | |
"source": [ | |
"\n", | |
"for i, result in enumerate(results):\n", | |
" model = result[\"model\"]\n", | |
" model_name = result[\"model_name\"]\n", | |
" print(f\"Model: {model_name}\")\n", | |
" y_samples = model.sample(X_test_np, NUM_SAMPLES)\n", | |
" results[i][\"y_samples\"] = y_samples" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"results_no_model = []\n", | |
"for result in results:\n", | |
" results_no_model.append({k: v for k, v in result.items() if k != \"model\"})\n", | |
"\n", | |
"# Don't uncomment or will overwrite the results\n", | |
"#save_results_to_pkl(results, \"m5\", \"results_final.pkl\")\n", | |
"#save_results_to_pkl(results_no_model, \"m5\", \"results_no_model_final.pkl\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"try:\n", | |
" results_no_model = load_results_from_pkl(\"m5\", \"results_final.pkl\")\n", | |
"except:\n", | |
" results_no_model = results" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Now we can actually fit some of the models" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0\n" | |
] | |
} | |
], | |
"source": [ | |
"ppcs = [max_ppc, zeros, percentage_zeros] + [partial(quantile_ppc, quantile=q) for q in [0.1, 0.5, 0.9, 0.99, 0.999]]\n", | |
"\n", | |
"for i, model_cls in enumerate(MODEL_CLASSES):\n", | |
" print(i)\n", | |
" ppc_results = get_ppcs(\n", | |
" y_samples=results_no_model[i][\"y_samples\"],\n", | |
" X_test=X_test_np,\n", | |
" y_test=y_test_np,\n", | |
" ppcs=ppcs,\n", | |
" number=i,\n", | |
" name=model_cls.__name__\n", | |
" )\n", | |
" results[i][\"ppc_results\"] = ppc_results\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Plot the PPCs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Titles for plots\n", | |
"ppc_tiles = {\n", | |
" \"max_ppc\": \"$\\max$\",\n", | |
" \"zeros\": r\"$\\text{zeros}$\",\n", | |
" \"quantile_ppc_0.99\": \"$q_{0.99}$\",\n", | |
" }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def set_plot_style():\n", | |
" \"\"\"\n", | |
" Sets a common plotting style for all of the figures that will be\n", | |
" used in the final paper.\n", | |
" \"\"\"\n", | |
"\n", | |
" # no grid but white with pretty ticks\n", | |
"\n", | |
"\n", | |
" # use latex font by default\n", | |
" plt.rc(\"text\", usetex=False)\n", | |
" plt.rc(\"font\", family=\"serif\")\n", | |
" sns.set_style(\"white\")\n", | |
"\n", | |
" # make it ready for a presentation\n", | |
" sns.set_context(\"talk\")\n", | |
"set_plot_style()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"samples [105. 108. 108. 122. 132. 172. 84. 94. 88. 182. 86. 95. 94. 168.\n", | |
" 115. 96. 112. 101. 111. 109. 100. 85. 92. 106. 92. 164. 94. 136.\n", | |
" 138. 173. 99. 97. 86. 112. 94. 108. 154. 93. 90. 120. 88. 105.\n", | |
" 82. 101. 137. 90. 117. 168. 88. 109. 92. 144. 99. 144. 94. 96.\n", | |
" 109. 97. 77. 94. 145. 106. 132. 153. 117. 167. 92. 98. 100. 91.\n", | |
" 98. 88. 120. 147. 92. 135. 131. 98. 97. 110. 118. 168. 101. 183.\n", | |
" 124. 92. 91. 163. 162. 94. 92. 107. 116. 154. 97. 140. 107. 130.\n", | |
" 118. 137.]\n", | |
"title max_ppc\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"samples [5647 5653 5697 5691 5594 5655 5665 5672 5601 5693 5679 5631 5663 5693\n", | |
" 5706 5631 5714 5695 5645 5590 5666 5672 5649 5628 5677 5646 5708 5707\n", | |
" 5658 5636 5666 5666 5699 5701 5631 5680 5709 5683 5630 5665 5738 5601\n", | |
" 5609 5644 5652 5706 5573 5603 5675 5634 5578 5671 5671 5681 5662 5737\n", | |
" 5669 5690 5704 5627 5643 5694 5683 5665 5603 5660 5639 5721 5645 5646\n", | |
" 5625 5648 5614 5630 5686 5627 5594 5621 5677 5680 5605 5593 5697 5707\n", | |
" 5705 5641 5747 5701 5630 5719 5657 5610 5671 5704 5636 5730 5662 5667\n", | |
" 5672 5760]\n", | |
"title zeros\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"samples [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", | |
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", | |
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", | |
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n", | |
" 1. 1. 1. 1.]\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"samples [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0.]\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAHFCAYAAAC6kC4uAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABcIUlEQVR4nO3de3yO9ePH8de9g82cGsbYchqbnGrCSDJaig6YnPJFSpIo+ioV1S/l9JW+5VxRkb6YQzIqwpCIOc55s5wP2zA2drDD/ftj3XfWht27N9fG+/l4eDzs+nyuz/W5drnnvc/1uT6XyWw2mxERERGR28rB6A6IiIiI3I0UwkREREQMoBAmIiIiYgCFMBEREREDKISJiIiIGEAhTERERMQACmEiIiIiBlAIExERETGAQpiIiIiIARTCRMRmiYmJzJkzh+eff56HHnqI+vXr4+/vT8eOHRk7dixRUVFGd7HApKenc/To0Wzbtm7dip+fH35+fqSnp1u3v/322/j5+TF8+PDb3c1cLVu2jO7du+Pv74+/vz/BwcF8//33ZGZmFkj7CxYswM/Pj0WLFhVIeyJ3G4UwEbHJunXrePTRRxk7dixbtmzBwcGBunXr4u7uTlRUFHPmzOHpp5/m448/Ji0tzeju2mXTpk089dRTLFu2zOiu2Oyjjz5ixIgR7N69G09PT7y8vNi/fz+jR4/mpZdesvvaRERE8J///KeAeityd3IyugMiUnzMnj2biRMnYjabCQoKYsiQIdStW9dafvHiRebNm8eXX37Jd999x759+/j6669xc3MzsNf598UXX+QYBQNo1KgRP/30EwBOTkXvx+jSpUuZN28eZcqUYebMmTRp0gSAffv2MXDgQDZt2sS0adMYOnRovtrfunUrQ4YM4erVqwXYa5G7j0bCRCRPduzYwaRJkzCbzQwePJhp06ZlC2AA5cuX57XXXuObb76hVKlS7Nq1i/HjxxvU48JTsmRJfHx88PHxMborOWRkZDBjxgwAhg8fbg1gAA0aNGDixIkAzJkzh8TERJvaTk1NZcqUKfTr14/Lly8XXKdF7lIKYSJyS5mZmbz77rtkZGTQpk0bhgwZctP6TZs25d///jcACxcuZNu2bbejmwJs376dEydO4OzsTMeOHXOUt2jRgurVq5OUlMTatWvz3O7x48d5/PHHmTp1KgBDhw7Fy8urwPotcjdSCBORW9q0aRPHjh0DYNCgQXnap2fPnlSpUgWAefPmZStr27btTSd0Wya4v/322znKYmNj+e9//8uzzz5Ls2bNqF+/Ps2aNaNHjx58/fXXpKSkZKt/6tQp/Pz8aNmyJWazmUWLFtG1a1frZPXu3buzZMkSzGazdZ+lS5fi5+dnDY8zZ87M1p8bTcy/mYyMDH744Qf69OlDs2bNaNCgAW3btuW9996zfm8Lwu7duwGoV68eJUuWzLVO48aNAWwKx+fOnePs2bM88MADhISE8Morr9jdV5G7XdGbzCAiRc769esBqFChAo0aNcrTPg4ODjz66KPMmzePTZs2kZmZiYODfb/37d69m5deeomEhARcXFyoVq0aTk5OnDp1il27drFr1y7Wrl3L3LlzcXR0zLav2WxmxIgR/Pjjj5QtW5aaNWty8uRJdu/eze7duzl69Kj1qcYKFSrQuHFjIiMjuXLlClWqVKFKlSrUqFEjX/2+evUqgwcPZvPmzQBUrlwZb29vjh07RkhICMuXL2fixIm0a9fOru8PZI1YAdx77703rGMZwbIl/Hl6evLll1/SunVru/onIn/TSJiI3NKhQ4cAcswBu5X77rsPyAohp06dsqsPGRkZvPnmmyQkJBAUFMRvv/3GihUrWLZsGVu2bLHe/ty+fTu//fZbjv0vXLjAihUrGDlyJH/88QdLly5l06ZNPPPMMwB88803XLx4EYDWrVszf/586tWrB0DHjh2ZP38+AwcOzFff33vvPTZv3kydOnVYtGgRGzduZOnSpWzZsoWBAweSkpLC8OHDiYyMzFf717OcQ/ny5W9Y55577gEgPj4+z+1Wr15dAUykgCmEicgtXbhwAYBy5crZtF/FihWtf7eEg/w6dOgQly5dokSJEnz88cfZ+uLs7MyAAQOsoz83CjPPPfccffr0sY6Subi48O6772IymUhPTyciIsKuPt6o3ytXrqRkyZLMnj0720iii4sLw4YNo3379qSmpjJ9+nS7j5ecnGxt+0ZcXV0Bcty6FZHbSyFMRAqNyWSy/t3eW5H169cnPDyc8PBw3N3dc5Rfu3bNGswsQeSf2rRpk2Obu7u7ddQoISHBrj7m5tdffwWgWbNmVK5cOdc6lgn0GzduJCMjw67j/fM27M1cf31E5PbTnDARuaXy5ctz7NgxLl26ZNN+19/uutntMVu4urry559/cuDAAU6cOMHJkyc5cuQIhw8fJjU1FeCGK8LfKARZRobsDUC5sbw9YN++ffTs2TPXOpZ+X716lZiYGKpWrZrv41nWZLO0mRvLCJjlvEXEGAphInJLdevWZefOnda5YXl14MABAEqXLo2np6fd/dizZw//93//Z23Xwt3dndatW3PgwIGbzj1zdna+afvXPyFZUCxrcV24cMF6W/dmEhIS7AphllHCmwVmSziuUKFCvo8jIvZTCBORW2rdujX/+9//uHjxIjt37rQucXAzmZmZbNiwAYDmzZvnurL8jUJPUlJSjm3R0dH06dOHlJQUateuTZcuXahbty4+Pj7WEa4ePXrY/QBAQbMsE/HCCy8wYsSIQj+eZQHZ06dP37COpSy/T3uKSMFQCBORW2rVqhU1atTg2LFjTJs2jdmzZ+eoM2DAAOrXr0+PHj2oXLkyoaGh1iUQevXqla2uZd7StWvXcj1ebGxsjm1z5swhJSWFWrVqsXjx4lzXwIqJibH11ApdzZo1AW76UvP4+Hj+/PNP61IY9szVskz8P3DgANeuXaNEiRI56uzatQsAf3//fB9HROynifkickuOjo6MHTsWR0dHNm3axOeff56t/Pz584SHhzN9+nSefPJJpk2bxocffghAYGAgDz30ULb6lltmf/75Z45jnTt3jn379uXYbhm98fHxyTWA/f7775w5cwYouLldBTFx3fIwwJYtW4iOjs61zqRJk3juuefo3bu33bdE/f398fT0JDk5mR9//DFH+ebNmzl+/DilS5fmscces+tYImIfhTARyZMHH3yQN998E5PJxPTp03n11Vc5fPgwkLUUxR9//MHAgQNJTExk8uTJXL16lZo1a/LJJ5/k2hbADz/8wI4dO6zbjx07xqBBg0hLS8uxj2VE6ffff2f79u3W7enp6axYsYJhw4ZZtxXU0guWSe43u7V3K02aNOHhhx8mPT2dl156iZ07d1rLrl27xvTp061vDnjppZfsforUZDLx6quvAjBu3Dg2bdpkLdu/f7/1lmjv3r0pU6ZMtn1jY2OJjo7mxIkTdvVBRPJGtyNFJM/69etH9erVefvtt1mzZg1r1qyhUqVKVK5cmatXr1pXa7c4deoUs2bNYsCAAZQqVcq6vW/fvixfvpzz58/z3HPPUbt2bSBrZKxs2bI8//zzfPvtt9naeuGFF1ixYgXx8fH06tWLGjVqUKpUKU6dOsXly5dxc3PD39+fXbt2ce7cuQI533r16hEWFkZoaCiHDx+mSZMmfPDBBza388knn/Dyyy+zZ88eevbsibe3N+XKlePkyZPWZTGef/55evToUSD97tq1K9u2bSM0NJQXX3yRGjVqUKJECaKiojCbzTzyyCMMHjw4x36ffvopP/zwA15eXqxbt65A+iIiN6aRMBGxSdu2bVm7di3vvPMOAQEBpKWlcfDgQRISEqhbty59+/Zl0aJFfPbZZ9xzzz3MnDmTNm3asGbNGmsbnp6eLFmyhB49elClShWOHz9OYmIizz77LMuXL7eGsutVrVqV5cuX07NnT2rUqMHZs2c5evQoFStWpHfv3ixfvpyhQ4cCWe92zG1yv61eeuklunbtyj333MOxY8esI3+2cnd35/vvv2f06NE0a9aMxMREDh8+jJOTE61bt2b69Om88847dvfXwmQyMXHiRMaNG4e/vz+xsbEcO3aMOnXq8OabbzJ9+vRcH5QQkdvLZC6MZ7JFRIArV67w/fffs3DhQmbPnm29pSgiIgphInIbmM1mrc4uIvIPuh0pIoVOAUxEJCeFMBEREREDKISJiIiIGEAhTERERMQACmEiIiIiBtBCMYWoTZs2XLx4ERcXF7y9vY3ujoiIiOTBqVOnSE1NpXz58oSFhRXacRTCCtHFixdJSUkhJSWFy5cvG90dERERscHFixcLtX2FsELk4uJCSkoKrq6u+Pj4GN0dESko6emwZ8/fX99/P2gFepE7RnR0NCkpKbi4uBTqcfRToxB5e3tz+fJlfHx8WLp0qdHdEZGCcukSuLv//fWePXDPPUb1RkQKWHBwMPv37y/0qUSamC8iIiJiAIUwEREREQMohImIiIgYQCFMRERExAAKYSIiIiIGUAgTERERMYCWqBARyQOz2YzZbM76IjMTrl8/KDMz64+IFBkmkwmTyWR0N26qWIawY8eO0alTJ7p27crIkSNt2jcmJobp06ezefNmzp07R8WKFWnbti2vvvoq5cuXL6Qei0hxlJyczOXLl0lMTCQ9Pf3vgowM+Oqrv78+cwZiYm5/B0XkplxcXHB3d6dcuXI4OBS9m39Fr0e3cP78eQYNGkRycrLN+544cYIuXbqwYMECXF1dadOmDY6OjsybN49OnTpx9uzZQuixiBRHCQkJHDt2jPj4+OwBDMDBAe677+8/RfCHu4hAamoq586dIyYm5u+R7CKkWI2EHTx4kNdff53jx4/na/8RI0YQFxfHkCFDGDx4MAAZGRmMHj2aBQsW8P777/PV9b/dishdKTk5mdOnTwNQunRp3N3dcXV1/fs36fR0iIj4e4fatfXaIpEiJiMjg4SEBGJjY7l06RKlSpWibNmyRncrm2LxU+Py5ct8+eWXzJ07l2vXruHt7c2pU6dsaiM8PJydO3dSq1YtBg0aZN3u6OjIqFGj2LhxIxs3buTIkSPUrl27oE9BRIqRy5cvA1kBzNvbO+e8kn+OfDk4aDRMpIhxcHCgQoUKpKenc/HiRRITE4tcCCsWPzXmzp3LrFmzKF++PDNmzKBTp042txEWFgZAUFBQjvvCzs7OPProowCsW7fO7v6KSPGWmJgIgLu7e5Gf2CsiN1emTBkArl69anBPcioWIczT05MRI0awatUq2rZtm682IiMjAfDz88u13DL6dfjw4fx1UkTuCGaz2ToHzNXV1eDeiIi9XP56kjkjI6PIzQsrFrcju3btancbMX89uVS5cuVcyz08PACIi4uz+1giUnxd/0O6KD5NJSK2uX4022w2F6nR7bvmJ4zlacob/WZr2Z6UlHTb+iQiIiJ3r7smhDk6OuapXqYWXBQREZHboFjcjiwIpUqVArLWDMlNSkpKtnoi+TVgbjip6QrzxZWjCZ6t7UjVe1w5dv4KplxuSTpkZlL9uq+PX0wiU7cu71gmTNSoqP8bpODdNSGsUqVK7N+/n9jY2FzLLdsrVap0O7sld6DU9EyFsGLMyQRmc1agyjSDKZd5vKZ/TO41m81kFq35vlKAHHL7RyBSAO6aX90sT0UeOXIk13LL9hs9PSkiInkTvuV3Jn70Hi90fYbOQS3p1PYhXuoZzGfjR3No394b7jdi8AA6PPwgmzeG3cbeFl9Lly7Fz88v29qXubl27RoBAQH4+fmxePHiPLX98ccf4+fnx9tvv21zv6ZMmYKfnx9jxoyxed+7zV0TwgIDAwH49ddfczyimpaWxtq1a7PVExER21yKv8g7rw/kgzdfI2zVTzi7lODBZi1o1LgJ6elprF7xI28MfJ7PJ3zEtRtMDZGCV6JECZ555hkAfvzxx1vWv3btGqGhoQB069atUPt2t7vjbkempaVx4sQJAKpVq4azszMA/v7+NGrUiIiICD777DOGDh2KyWQiIyODMWPGcPbsWdq0aYOvr6+R3RcRKZYSEy4z/JUXOHPqJHXrN+TVf7+Nj2/dbHV2hv/BlP+MYVXoMk4dP8b4yV/gqNc93Rbdu3dn7ty5hIeHc+bMGapWrXrDuuvWrePSpUv4+vrSuHHj29jLu88dNxIWExNDhw4d6NChg3VtMIvx48fj7u7OzJkz6dChA6+99hpPPPEE8+fPx9vbm9GjRxvUaxGR4u3z8R9x5tRJHniwGf+Z9lWOAAbQuGlzPvtqLp5VvNgfsZvvv/nSgJ7enWrXrk3jxo0xm80sX778pnWXLFkCaBTsdrjjQtjN+Pj4sGTJEoKDg0lMTCQsLAyTyUSfPn0ICQnRpHwRkXyIjjrM5o1hODo6MuzdD3Bycr5h3XL3uPPKGyMA+GHBPJKuXslZyWwmdMlCXv7Xs3Rs24K+wR2Y/ukELp7PuZh20tUrzPliGoOf70mXdo/Q5bFWDO73HN9//WXubQNbf9/Ie28MpnuHtnRs24L+PToxe9pnJFy+lKPu888+RYdHmnHy5El69epFgwYNePjhh5k6dSp+fn40a9aMa9eu5XqcgQMH4ufnl+MWYFhYGC+++CIBAQE0bNiQdu3aMWHCBOLj43Nt5+zZs7z//vu0adOGRo0a8cwzz1iDki26d+8O3PyWZExMDL///juurq507NjRun3Dhg0MHjyYRx55hAYNGuDv78+TTz7JxIkTre9avZlbzV/r2LEjfn5+bN26NUeZrd+v4qRYjgMPGTKEIUOG5Frm7e1901cPeXl5MW7cuMLqmojIXef39Vlzah94sBkelT1vWb9J84eo4FGJC3GxbApbS7unOmYrn/vVDE4c+5PavnUJeKgVhw/sY8XSEH7fsJaJ02ZT1fteAK6lpvLW4AH8GXWYChU9uL9xEzIzMzmwdw/ff/0Ff2xaz3+/nJMtFH45eRLLQv6Hk5MTderWo3xFDyIP7mfJ/O/4LWwN4z6fQRWve7N32Gymf//+JCcnExgYyP79+3n88cf5/fff2blzJxs3biQoKCjbLhcvXmTTpk2ULl2adu3aWbePHTuWOXPm4OzsTIMGDahUqRJ79+7l66+/ZtWqVXz77bdUq1bNWj8qKornn3+e8+fPU6NGDdq0acOxY8d49913qVOnTt4u0F+eeOIJxowZw59//klERASNGjXKUWfZsmVkZGTwzDPPWF92/cknn/DVV1/h5ORE48aN8ff3Jy4ujt27d3PkyBF+++03lixZYp3+U5Bs/X4VN8UyhImIFAkpKZCWhsPlS5gd8rYgdFFgdi6B2c2twNo7tH8fAL716uepvslkouEDjVn/6y9EHT6YI4SdOPYnr7/9Ho8/1QnImuv72fjRhK36if+O+5CJ02YB8PuGtfwZdZiGDzzImM+mWcNWYkICbw56kejIw/y+IYzWj2aFoHWrfmJZyP/wrOLF+xM+pUatrHcGZ2Rk8M2MySxdMI/xH7zLZ1/NzfZqG8si3j/99BOlS5cmMzMTBwcHgoOD2blzJ6GhoTlC2M8//0xaWhodO3akZMmSQNYI1Jw5c/D29mbGjBnWOcgZGRl88sknfP311wwbNozFixdjMpkwm82MHDmS8+fP88ILL/Dmm29aX6X1/fff2zyFxtXVlWeeeYZ58+bx448/5hrCli5dCvw9anbo0CFmzZpF2bJlWbBgAT4+Pta60dHRdOvWjcOHD7N582Zat25tU39uxdbvV3GkECYikh+TJkFICGRmUtx+Dzc7OHC5/0AufDyhQNqLv3gegHvcK+R5H/fyWXUvxOVcu7H5w62tAQzA2dmZ194ayY6tm9m/ZxdHj0RRs3Ydzv/1rt+KlStnG+0qU7YsQ958l9MnT1D7urlpId99A8Dgt961BjDIeqPKC4NeZ8e2LUQdOsDu7dvwbxqQrU/PPvsspUuXBv5+p2j79u0ZM2YMYWFhXLlyxVoOWOddde7c2brtyy+z5sCNHj0620Ngjo6OvPnmm2zatIl9+/axZcsWHnroIfbu3cuePXuoXr06w4cPz/Yu0169evHbb78RFmbbch7dunVj3rx5rFy5krfffjvb6NX27ds5duwYvr6++Pv7A3Dp0iUef/xx/P39swUwyJri07x5c9asWcPp06dt6kde2Pr9Ko7uqjlhIiIFZtEiKKavOTNlZlJu9hcF1p5lpMjZOe+/1zvd5KnItk88mWObi4srjZs2ByBi13YAGvk/CEDYqp94799D+PnHpcScOwtA/fv9afdUR7zuzYrIFy+c58SxP3F0dKThAw/maN/BwYEmzVsCsGdneI7yevXq5dhWunRpgoKCSE1N5ddff7VuP378OLt376ZatWo0adIEgLi4OI4cOYKTkxNNmzbN9fitWrUC4I8//gBgy5YtALRq1SrXV+89/vjjObbdip+fHw888ADx8fFs3LgxW5llnpllFAygefPmfP755zz//PPWbRkZGZw4cYJffvmFU6dOAVmjlQUpP9+v4kgjYSIi+dG1q3UkrLgxOzpy+cWXC6w9j0qVOXnsKJcvXcrzPhcvZI2eVfDI+UBUlapeuR/nr/lmF/4aAfOr14BXho3g6+mfsWPrZnZs3QyA173VadEqkPYdg6ni5Q1AXMw5ICtAdGzT/KZ9s9S93j333JNr3eDgYEJDQwkNDbWOellGwTp16mStd/ZsVjhMT0+nYcOGNz3+mTNnAKxP+Ht65j7P7t577811+6107dqV3bt38+OPP/Loo48CkJSUxC+//GK9ZXm9a9eusXLlSlatWkV0dDRnzpwhPT0dwHob8J/rb9orP9+v4kghTEQkP/79b3j1VUhL44RXLTLu4jlhdfzqsXPbHxzcF5HnfQ7+tXK+T52cazM6l3DJfae//qO/fm2xp7t0I/Cxx9ny2wZ2bN3Mvt07OX3yOIv/N4dli/7HqDETafZQK8x/vVeqdOkyNH3o4ZufT92co14ON3g3aPPmzalSpQp//PEHcXFxeHh4EBoaislkyhbCLKOFZcuWveXcqQYNGvzjtHMPOLmNjuXFk08+ybhx4wgLCyMhIYGyZcvy888/k5SURHBwsHVCPsCFCxfo3bs30dHRuLi40KBBA1q0aIGPjw/+/v7W+WX2yvzHLzP2fL+KE4UwEZH8cnUFV1cyy91DZjEKYQXt0fZPsvC7r9mxdTMx585S2bPKTevv3rGNM6dO4OLiQqu2j+UovxAXS7UaNXNsP3c2a8Sj0j+ewCxTthztnnyGdk9mjeD8GRXJvK9n8sdvG5g97TOaPdQK94oVAXBxdeXN9z/O13nmxsHBgY4dOzJz5kxWrVpFw4YNOX78OAEBAXh5/T2i5+HhAWRNjv/kk0/y1LZlBOxG863+uRZmXpUsWZKnn36a+fPn8/PPP9O9e3d++OEHIOfaYJ9++inR0dG0aNGCzz//nHLlymUrT0hIyNMxLSE2IyMj1/J/LnORn+9XcaQ5YSIiYhfvajUIav8UGRkZTPr4fdJusG4WZK2sP3XiWACe6tKdUqXL5Kiz/Y/fc2xLunqFnduy5kg1apw1z2re7C/oE9yBsNU/Z6tbq44vL782HIDYv24tVvasQuUqVblwPo7IQwdy7duED97ltRf/xca1q291ytkEBwcDWa/FW7lyZbZtFl5eXnh5eREbG8vevbm/P/ONN94gODiYn376CYCWLbPmqK1fvz7XtcjWrVtnUz+vZ5n39fPPPxMTE8P27duzTci32LlzJwDPP/98jgB25coVdu3aBdz6dqTbXyOvcXE513o7ffp0jkCZn+9XcaQQJiIidhs47C3urVGTfbt3MmLIAP6MisxRZ++uHbzxcj/OnDqJX70G9B2Q+8KdoUsWEr5lk/XrlJRkPvn4fa5euUKLVoHWyfaeVatyPjaG/33zlXWOmcXaVVlhyO++v5fNCO7RG4CJH47K0b/QJQvZsHYVx6KjqFv/5nOQ/ql69eo0btyY7du389NPP+Hm5pZtbTCLF198EYDhw4dz6NChbGWWJxYjIyN54IEHAKhfvz4BAQGcO3eO999/P1sQW7FixS1Xvr+Z++67jwYNGhAeHs78+fMxm83ZJuRbuLu7A7B27dpsQevixYu8/vrrXPprHmDqLd4FWrdu1lOq+/fvz7Yga2JiIqNGjcp1H1u/X8WRbkeKiIjd3NxK8d8vvmXSxx+w5bf1DO7Xk+o1ffCqVp3MzAyORR/h3Jms22qPdXiGV4a9dcOV9evcV5//e2sodRs0onyFihzYu5v4CxeoXtOH1976+z/sNu06sGHNanZs3cyL3TtSr+H9uJUqzYljf3Ly2FHcSpXipdf+ba3/VHBXog7tZ83PK3i9/7/w8a1LxUqVOXE0mlMnjuPg4MAbIz+k0i1up+bGsmZYXFwcwcHB1pGf6z333HPs3buXH374gS5dulCvXj08PT2Jiori6NGjODg4MH78+GzvdRw3bhzPP/88P/zwA1u2bOH+++/n7NmzRERE0LhxY+tIVX50796d9957j1mzZlGyZMlsK+RbvPDCC+zcuZOQkBC2b99OnTp1uHTpErt27eLatWvUqVOHqKgozp8/n8sR/latWjWeeOIJfvnlF/r160dAQAAuLi5s374dV1dXWrVqxW+//Wb396u40UiYiIgUCLdSpXlv3CQ+/nQqgY89QUpKMts2/8beXTsoWdKNp7t0Z/LseQx79wNc/1rANDeD3hhBn5cGEX/+PFt/34hLCVe69e7HpC++odxfIzOQNTF91NiJ9B3wKl73VufA3j1s/X0jaanX6NCpC9PmLMSnjp+1vslk4o2RH/LuRxNo1LgJZ06dJHzLJtLT02kd9DiffTWXwMeeyNe5t2/f3roo6/Vrg13PZDIxfvx4Pv/8cwICAjh+/Djr168nPT2dp556isWLF/PUU09l28fLy4uQkBBeeOEFnJ2drZPpR4wYwRtvvJGvvlo8+eSTlCpVirS0NNq3b0+ZMjlvDQcFBTFnzhxatGjB5cuXWbduHX/++SetWrVizpw5TJw4EYA1a9bkmFz/TxMnTmTo0KFUq1aN8PBw9u7dS7t27Vi6dGmuQSo/36/ixmQu6OdKxSo4OJj9+/dTv3596yrEcufr+/VWUtOL37IFksXJBD39nPByL0npStUw5fJUnGNmBjVORlm/PnZvnWL1dKTYxsEENSuWvnVFKZIyMzOtrzP08/O74ZOu17td/39rJExERETEAAphIiIiIgZQCBMRERExgEKYiIiIiAEUwkREREQMoBAmIiIiYgCFMBEREREDKISJiIiIGEAhTERERMQACmEiIiIiBlAIExERETGAQpiIiIiIARTCRERERAygECYiIiJiAIUwEREREQMohImIiEgOK1asYMqUKZw8edLortyxnIzugIiIiBQtkZGRjBgxgvT0dCIiIvjqq6+M7tIdSSNhIiIiks2HH36Iu7s7Q4YMYePGjaxevdroLt2RNBImIiIiVj/88APbt2/niy++oHXr1uzYsYOxY8fy8MMP4+bmZnT37igaCRMRyQ8HB+sfExS7PyK5SUhIYOLEiXTt2pXAwEBMJhPjxo3j6tWrTJ061eju3XE0EiYiYiMTQOPG1q+rG9aT/Psz7gpmozshRU7ZsmXZvHlztm2enp6Eh4cb1KM7m0KYiIjYZd7sL/jfN1/atM9z/QbwrxdfLqQe2e7C+Ti++PwTInaGk5SURLl73Pl05jc4ODry5eRP2LtzO1evXqV8+fIsXLiQKlWqGN1luQMohImI2GHA9PVcdiiBuRjc43N1dmTuCwEF3m7N2nVo0659tm0pycls+W09QI4yyz5FycTRo4jYuZ3yFSrSvOUjZGRkUMGjEu8OfYWIndvx8PCgbdu2ZGRkULlyZaO7K3cIhTARETukpmWQ6phZLEJYYWnZui0tW7fNti3m7BlrCHvz/Y9vf6dsdHBfBAAfTZqaLSBats+aNYu6desa0je5cymEiYjIXS/t2jUAPCp75rq9atWqt71Pt1NwcDD79+9n7ty5XLhwgUWLFnHgwAGuXr1KlSpV6Nq1K/3798fBQc/zFSSFMBERMYRlLtkbI/+PY9FHWLViGZkZmdz/YFPeH/+ptd7+iN38sGAeB/bu4cqVRNzLV6BJ85Z0792PSp65z83K6z4jBg9g7+4d1q+7tQ8EoJJnFWLPnbVub9q0KQDjxo3j9OnTTJ06lT59+jBy5Mgcx27SpAmJiYmsXbsWb29v6/YtW7bw7bffEhkZSVxcHPfccw8PPPAAffr0oVmzZjna2bFjB9988w07d+4kISEBDw8PWrVqxcCBA3OEwilTpjB16lTGjx9PZGQkixYtIiMjg+bNmzNjxoybXQbS09M5cuQIAAsWLOCnn36iTJky3HvvvZw+fZoTJ04wadIk4uLicj1fyT9FWhERMVTId9+wfPEC6jV6gBq16+BV7e/nTZeF/I+3Xu3Plt/W41HZk4CWj+Dq6srPPy5hyAu9OHxgX472bNnHv2lAtjlrrdo+Rpt27XmswzPZtrdv356nn36aatWq5escV6xYQb9+/di4cSNVqlShbdu2VK5cmV9//ZU+ffrw008/Zav/7bff0qtXL9asWWOt7+rqysKFC+ncuTMRERG5HueLL77gu+++48EHH8TPz4+aNWvesm9//vknqampAKxatYp///vf/P777/zwww9s3ryZ5557DoDvv/+ec+fO5ev8JXcaCRMREUOdOnGcjyZN4cGAhwDIzMwEYO/unXw15VNKlS7DB+M/pf79/tZ9loX8jy8nT2LMqLf4av5SXFxc87VPj74vAhC2+mcAhrw5ktJlylj3sWwfPXo0ZcuWBbJGtGw1efJkzGYzs2bN4uGHH7ZuX7hwIe+//z5TpkyhQ4cOAISHhzN+/HjKli3L9OnTadKkibX+nDlzGDt2LEOGDGHVqlW4urpmO87Ro0eZNWsWrVq1yva9vJmDBw9a/z5y5Eh69epl/drJyYm3336bVatWceHCBbZt28Yzzzxj8/lL7jQSJiIihrq3Rk1rAAOs844Wf/8tZrOZF14Zki1MAXTq9hxNmrfkfGyMNSjld5/bISYmBiDHSFrXrl159913GTZsGGZz1sptX331FWazmeHDh2cLYAB9+/blkUce4dy5c4SGhuY4jo+PjzWAAXmaw3Xo0CEA6tevbx31up6Liwt+fn4AXLhw4ZbtSd4phImIiKF86vjl2JaRkUHErqy5Wg80yX1ZjWYPZY0o7dkRnu99bpeAgKz+9OzZkwkTJrBlyxauXbuGg4MDffv2pV27dphMJjIyMti2bRsALVq0yLWtwMBAAP74448cZffdd5/NfbOEsJ49e2Iy5f6Yr2XErVSpUja3Lzem25EiImKosuXK5diWmHCZ1JQUAF7odvPbX3Ex5/K9z+3y0Ucf8dprr7F7926+/vprvv76a0qWLEnz5s156qmn6NChAw4ODly6dInk5GQAgoKCbtrmmTNncmxzd3e3uW+WENa8efMb1jl58iSQtXq+FByFMBERMZTJlPOmjGUuk6OjI4882u6m+1eqXCXf+xSGjIyMHNsqV67MwoUL2bVrF2FhYWzZsoX9+/cTFhZGWFgYISEhzJ4923oOTk5OtG+fc5Hb63l5eeXYdqORrBuJiYnh4sWLODo65tqepc6RI0dwdnbmwQcftKl9uTmFMBERKXLKliuHk7MzmRkZvD7iPUq4uBTKPvlhCTq5ha20tDSSkpJuuK+/vz/+/llz1a5cucKvv/7KRx99xNatW/n111957LHHcHZ2JiMjgzFjxuBSSOdgYRkFM5vNZGRk5DqHbOHChZjNZtq2bavbkQVMc8JERKTIcXJypl6DRmRmZrJt82+51pk97TMGP9+THxfNz/c++WEJInFxcTnKdu3alWPbmTNn6NSpU46nCkuXLk3nzp1p166dtZ6zszP+/v5kZmYSFhaW6/EnTJhAx44dmTt3br7PwcLyZGRmZma2pyQtjh49ytdff42joyNDhgyx+3iSnUKYiIgUScE9ewMw/dMJROzcnq3s9w3r+HHRfP48EkmduvXs2sdWltcX/fbbb0RHR1u3x8TEMGbMmBz1q1atSmJiIocPH+bbb7/NVhYTE2Nd8qJRo0YAvPhi1rIZo0ePZuvWrdnqr169mu+++45Dhw7RoEGDfJ+DhSV4OTs7M3bsWC5evGgtCw8P58UXXyQ5OZk33niDOnWK1vs+7wS6HSkiIkVSs4da0fP5/sz/dhZvv/YyPr5+VK7ixbkzp/kz6jAA/V55jXoN77drH1sFBARw//33s2fPHjp37kyLFi2yRt+2bcPb25tGjRrlWEx17NixvPjii4wbN46FCxdSu3ZtkpKS2LFjB8nJyXTs2NG6an5gYCCDBg1i+vTp9OnTh3r16uHt7c3JkyetoWn48OE0btw43+dgYbkd+eabbzJu3DgCAwOpWbMmiYmJnD59GshaFqN///52H0tyUggTEbGDi7MjLg4OxeIF3q7OjkZ3wWa9+79CwwceZPniBRzcF8Hxo3/iXr4CAS0foXP3XjRq3KRA9rGFg4MDs2fPZsaMGfzyyy/8/vvvVKxYke7duzNkyBCGDh2aY5+AgAC+//57Zs+ezc6dO1m3bh1ubm40aNCAZ599lo4dO2ar//rrr9OsWTPmzp3L7t27iYqKwsPDgzZt2tCvXz/rkhf2uHr1KidOnKBkyZL07t2be++9l5kzZxIZGYmzszMPP/ywdV0yKRwms2V1OClwlhei1q9fn6VLlxrdHblN+n69ldT0W69SLUWTkwl6+jnh5V6S0pWqYcplorJTZgbVK+dcVqE4+TPuCvrhnzcOJqhZsbTR3Shwu3btokePHjRs2JDFixcb3Z1Ck5mZyeHDWaOgfn5+eVrA9nb9/605YSIiInchy61Ny2r4cvvpdqSIiI3MADt3Wr8+7uVDhkPxutWnUTCxzAdTCDOOQpiISH5c92JkMwo1UvxoJMx4uh0pIiJyl8nMzCQqKgpQCDNSsRkJO3r0KNOmTWPHjh1cuHABT09P2rdvz4ABA2xewXfbtm3MmjWLPXv2cPXqVSpUqMBDDz3EwIEDqV69eiGdgYiISNHg4ODA7t27je7GXa9YjIRFREQQHBxMaGgoHh4eBAYGkpSUxMyZM+nRoweJiYl5bmvRokX06dOHDRs24O3tTWBgIE5OTixdupROnTrlutqxiIiISEEr8iEsLS2NoUOHkpSUxPjx4wkJCWHy5MmsWbOGtm3bEhkZyaRJk/LU1sWLFxkzZgwODg5MmTKFJUuWMHXqVFavXk2fPn1ISkpi1KhRhXxGIiIiIsUghK1cuZLTp0/TsmVLOnfubN3u6urK2LFjcXNzY/HixSQkJNyyre3bt5OcnMwDDzxgfVcXgKOjI2+88QaOjo4cOXIk22sbRERERApDkQ9hlheYXh+aLNzd3QkICCAtLY1Nmzbdsi3LAm1xcXFkZGRkK7t8+TIZGRk4OztTuvSdtyifiIiIFC1FPoRFRkYCN356w/JCUctquDfTpEkTSpUqxYkTJ3jrrbc4duwYKSkpREREMHjwYAB69+5NiRIlCqj3IiIiIrkr8k9HxsTEAFC5cuVcyz08PACIjY29ZVv33HMPU6ZMYfjw4axYsYIVK1ZYy1xdXfnwww/p0aNHAfRaRERE5OaK/EhYcnIykBWScmPZnpSUlKf2/Pz8eOqppzCZTNSvX59HH32Ue++9l5SUFObMmcO+ffsKpuMiUixl/LXqqtkMZi3BKlLsXf+KbJPJZGBPciryI2GOjo5kZt76Zch5eQ/5qVOn6N27NwkJCXzzzTe0aNHCuu+cOXMYN24c/fr1Y8WKFTcceRORO5sZSEqH9EwzmWnXcHApaXSXRMQOqampQFaeKGohrMiPhFkWYrV8E/8pJSUFADc3t1u29d///pczZ87w+uuvWwMYZCXj559/nqeffpqEhATmzJlTAD0XkeLqZGImydcySEtKzNMveCJSdFnWErV1YffbociPhFWqVIlLly4RFxdHlSpVcpRb5oJVqlTplm1t3boVgEceeSTX8sDAQEJDQ3VLUuQuF33ZjF/5dFyvXgHA2a00Ds4umMj6LdpszuT68XmzORNzZtH6DVsKjtlEnu7ISNGSkZFBQkKCddmpMmXKGNyjnIp8CPPz8yMyMpKoqCgaNWqUo/zIkSPWerdy+fJlAJyccj9tR0dHIGuBWBG5e11IMbPxVDqPeEOZtARKXr2Kk6MJ650MM1z/PHZq7GlQBrujHb7gaHQXxA733HOPQlh+WEanVq9eTZcuXbKVxcfHs3XrVlxcXLLdXryR2rVrc+DAAdatW0efPn1ylFvWGqtXr17BdF5Eiq3jiWZ+OpqOT7kM7i3jgNt1Py0dMjOpcTLS+vXZe33JdCjyszskn0wmEzUrFr1bWXJrLi4uuLu7U65cuSI3HwyKQQgLCgrCy8uL9evXs2DBAusSEikpKYwcOZKkpCR69+5N+fLlrfukpaVx4sQJAKpVq4azszMAzz33HKNGjeLzzz/nvvvuo2nTptZ9Fi1axJIlS3B2dua55567jWcoIkXVhRQzF1LMbIvJxAQ4/vUzvGRyIl+89ZK13qf/WU5yyaL3W7YUjBJODsxqeeu7LVK0mEymIhm8rlfkQ5irqysTJkygf//+fPDBB4SEhODt7c2uXbuIjY2lQYMGDBs2LNs+MTExdOjQAYC1a9fi7e0NQNeuXdm7dy8LFy7kX//6Fw0bNsTT05MjR45w9OhRnJ2dGTNmDD4+Prf9PEWkaDMD6X/N0c/IBIfrHhbKyPy7TO48jua/37giUpCKfAgDaNq0KYsWLWLq1Kls27aNI0eO4O3tTbdu3ejXr59NTzyMHj2aRx55hPnz57Nv3z4OHjyIu7s7Tz31FP379+e+++4rxDMRERERyVIsQhiAr68vkydPzlNdb2/vm77GKCgoiKCgoILqmoiIiIjNNL4qIiIiYgCFMBEREREDKISJiIiIGEAhTERERMQACmEiIiIiBlAIExERETGAQpiIiIiIARTCRERERAygECYiIiJiAIUwEREREQMohImIiIgYQCFMRERExAAKYSIiIiIGUAgTERERMYBCmIiIiIgBFMJEREREDKAQJiIiImIAhTARERERAyiEiYiIiBhAIUxERETEAAphIiIiIgZQCBMRERExgEKYiIiIiAEUwkREREQMoBAmIiIiYgCFMBEREREDKISJiIiIGEAhTERERMQACmEiIiIiBlAIExERETGAQpiIiIiIARTCRERERAygECYiIiJiAIUwEREREQMohImIiIgYQCFMRERExAAKYSIiIiIGUAgTERERMYBCmIiIiIgBFMJEREREDKAQJiIiImIAhTARERERAyiEiYiIiBhAIUxERETEAAphIiIiIgZQCBMRERExgEKYiIiIiAEUwkREREQMoBAmIiIiYgC7Q1hcXFxB9ENERETkrmJ3CGvTpg0DBw5k9erVpKWlFUSfRERERO54TvY2kJGRwfr169mwYQPlypXj6aefJjg4mPvuu68g+iciIiJyR7J7JCwsLIyhQ4dSvXp1Ll26xHfffUdwcDCdO3fmu+++Iz4+viD6KSIiInJHsTuEeXp6MnDgQH755RcWLlxI9+7dKVu2LAcPHmTs2LE88sgjvPbaa6xfv57MzMyC6LOIiIhIsWf37cjr3X///dx///2MHDmSdevW8eOPP7J582ZWr17Nr7/+SoUKFejUqROdO3fGx8fHpraPHj3KtGnT2LFjBxcuXMDT05P27dszYMAASpUqZVNbV69e5ZtvvuGXX37h5MmTODg4UK9ePfr27Uu7du1saktEREQkPwpliYoSJUrwxBNPMGPGDP744w/efvttSpYsyYULF5g9ezZPPfUUvXr1Ys2aNXlqLyIiguDgYEJDQ/Hw8CAwMJCkpCRmzpxJjx49SExMzHPfYmNj6dq1K1OmTCE+Pp6HH34YPz8/tm/fzpAhQ/juu+/ye9oiIiIieVagI2HXO3XqFCtWrGD16tUcPHgQs9kMQN26dTl//jw7duxg586dPPzww3z++ee4ubnl2k5aWhpDhw4lKSmJ8ePH07lzZwBSUlIYNmwY69atY9KkSfzf//1fnvo1atQooqOjad++PRMmTMDFxQWATZs2MXDgQMaPH89jjz2Gp6en/d8EERERkRso0JGwK1euEBISQq9evXjsscf4/PPPOXDgAGXLluVf//oXy5YtY9myZWzYsIHJkydToUIFNm3axJgxY27Y5sqVKzl9+jQtW7a0BjAAV1dXxo4di5ubG4sXLyYhIeGW/YuIiGDDhg1Ur16d//znP9YABvDwww/TuXNnKlWqxJ49e+z7RoiIiIjcQoEsUbFhwwZ+/PFH1q9fz7Vr1zCbzTg4ONCiRQu6dOlCUFAQJUqUsO7j6OhIu3btcHNzo3///qxZs+aGQSwsLAwg17la7u7uBAQEEBYWxqZNm+jQocNN+/rzzz8D0Ldv32z9sfjoo4/yfN4iIiIi9rA7hD388MNcunTJervx3nvvpXPnzgQHB9/yll6tWrUASE9Pv2GdyMhIAPz8/HItr1OnDmFhYRw+fPiWIWzfvn0APPDAAyQlJbFq1Sr27t1LRkYGDRs25Omnn842OiYiIiJSWOwOYfHx8bi6utKuXTu6dOlCQEBAnvdNTU2lW7duNGjQ4IZ1YmJiAKhcuXKu5R4eHkDWhPtbOXbsGAAXLlxgyJAhnD592lq2YMECZs6cyRdffGHzk5siIiIitrI7hH344Yc8+eSTlC5d2uZ9a9asyejRo29aJzk5GciaA5Yby/akpKRbHu/KlSsA/Pvf/8bb25sJEyZw3333cerUKT755BN+++03XnrpJZYvX56v8xERERHJqwJ5gffq1avzVHfGjBm8+eabNrXv6OiYp3qW26E3k5qaCmQFt7lz59K0aVNKly5N3bp1mTlzJr6+vpw+fZrFixfb1EcRERERW9kdwqZOncqSJUvyVHf16tV5XhvMwrIQqyVA/VNKSgrADZe4uF7JkiUBCA4OpkyZMtnKnJyc6NGjBwBbtmyxqY8iIiIitrLpduTp06dzDSjnz5+/6eiR2WzmzJkzREVF5SksXa9SpUpcunSJuLg4qlSpkqPcMhesUqVKt2yrQoUKXLlyBW9v71zLLdsvXrxoUx9FREREbGVTCKtQoQJTpkzJNgneZDJx4sQJ3nvvvVvubzabadGihU0d9PPzIzIykqioKBo1apSj/MiRI9Z6eWnr+PHj1sn+/xQXFwdknaeIiIhIYbLpdqSrqyvDhw+nSpUq1j9msxlnZ+ds2/75x8vLizp16tCpUyc++OADmzoYGBgIkOu8s/j4eLZu3YqLi0uewp2lrZUrV+a6LMbGjRsBaNasmU19FBEREbGVzU9HPv300zz99NPWr+vWrUvDhg35/vvvC7RjFkFBQXh5ebF+/XoWLFhgnbeVkpLCyJEjSUpKonfv3pQvX966T1paGidOnACgWrVqODs7A9ChQwemT5/On3/+yUcffcR7772Hk1PWt2DRokWsWrWKe+65h06dOhXKuYiIiIhY2L1ExeDBg3Odq1VQXF1dmTBhAv379+eDDz4gJCQEb29vdu3aRWxsLA0aNGDYsGHZ9omJibEu3Lp27VrrXK+SJUvy+eef079/fxYsWEBYWBiNGjXi+PHjREZGWo91faATERERKQx2Px05ePBgunTpUhB9uaGmTZuyaNEiHn/8cc6cOcP69espU6YMgwcPZs6cOdYnKPOiQYMGhIaG0rt3b0qUKMH69euJj4/nqaeeIiQkxHrLUkRERKQw2TQSZnkysnHjxtbX++RnOQdbJ+cD+Pr6Mnny5DzV9fb25vDhwzcs9/DwYNSoUYwaNcrmfoiIiIgUBJtCWL9+/XBwcGDlypXUrFnTus1kMuW5DZPJxIEDB2zrpYiIiMgdxuY5YZmZmTm25WW1+vzUFREREblT2RTCDh06lKdtIiIiInJzdk/MFxERERHb2b1Exc2kpKSwefNmMjMzadKkCffcc09hHk5ERESk2CiQEBYTE8OMGTOoWrUqAwYMACA6Opp+/fpZXwVUsmRJPv74Y+v6XSIiIiJ3M7tD2MWLF+nWrRuxsbHZ1th6//33iY2NxWQyUapUKa5cucJbb72Fn58fPj4+9h5WREREpFize07YnDlziImJoVq1anTv3h2A48ePs2PHDhwdHZk/fz7bt29nwIABpKen8+2339p7SBEREZFiz+4QtnHjRpycnJg9e7Z1JGz9+vVA1qKuDzzwAABDhgyhbNmy/PHHH/YeUkRERKTYszuEnTx5kho1aljfzwiwefNmTCYTDz30kHWbs7Mz3t7exMbG2ntIERERkWLP7hCWkpJCiRIlrF+np6cTHh4OQLNmzbLVTU5Otml1fREREZE7ld0hrFKlSpw+fZq0tDQAwsPDSUpKolSpUtZbkZD1BOXJkyepUqWKvYcUERERKfbsDmEBAQEkJCTwySefcOjQIT777DNMJhOtW7fG0dERgAsXLvDmm2+SkZGRr5d3i4iIiNxp7A5hL730Eq6ursydO5fOnTuzZ88eHB0deemllwDYvn07rVu3Jjw8nDJlyvDCCy/Y3WkRERGR4s7uEFarVi2+/vprGjZsSIkSJfD19WXGjBnUrVsXyLpdmZ6eTp06dZg/f362CfwiIiIid6sCWTHf39+fkJCQXMu8vb1ZtmyZNZSJiIiIyG14gbeDg4MCmIiIiMg/FOgLvJOTk0lMTCQjIwOz2XzDelWrVi3Iw4qIiIgUOwUSwjZs2MBnn33GoUOHblnXZDJx4MCBgjisiIiISLFldwjbvn07gwYNIjMz86ajXxZ5qSMiIiJyp7M7hM2aNYuMjAz8/PwYPHgwtWrVwtXVtSD6JiIiInLHsjuE7dq1CxcXF2bPnk3FihULok8iIiIidzy7n45MTk7Gx8dHAUxERETEBnaHsKpVq3LhwoWC6IuIiIjIXcPuEPbEE08QGxvLli1bCqI/IiIiIncFu0PYyy+/TO3atXnrrbdYs2YN165dK4h+iYiIiNzR7J6YP2rUKDw9PYmKimLIkCE4OjpSrlw5nJ2dc61vMpkICwuz97AiIiIixZrdIWzlypXWv5vNZtLT0286R8xkMtl7SBEREZFiz+4QNm7cuILoh4iIiMhdxe4Q1rlz54Loh4iIiMhdxe6J+SIiIiJiuwILYdeuXWPBggW8/PLLPPbYYzRv3hyAixcv8s4773DkyJGCOpSIiIhIsWf37UiAo0eP8sorr3D8+HHrC7otE/DPnDnDDz/8wE8//cSkSZMICgoqiEOKiIiIFGt2j4QlJiby4osvcuzYMapUqUK/fv2oVq2atbxMmTLUqlWL1NRUhg4dSmRkpL2HFBERESn27A5h3377LWfOnCEwMJCff/6ZESNGZHuPZPXq1QkNDSUoKIj09HS++eYbew8pIiIiUuzZHcJWr16Nk5MTY8aMwcXFJdc6jo6OjB49mhIlSrB161Z7DykiIiJS7Nkdwk6dOkWdOnWoUKHCTeuVL1+emjVrEhcXZ+8hRURERIo9u0OYyWQiJSUlT3UzMzMpUaKEvYcUERERKfbsDmHVq1fn5MmTtxzhOnv2LNHR0VSvXt3eQ4qIiIgUe3aHMMuE+9GjR1uXp/ina9euMXLkSMxmM23btrX3kCIiIiLFnt3rhPXt25clS5awZs0aunbtSocOHawv8N6wYQNRUVEsWrSI48eP4+HhQe/eve3utIiIiEhxZ3cIK126NF999RWvvPIK+/btY//+/daygQMHAmA2m/Hw8GDGjBmUK1fO3kOKiIiIFHsFsmK+j48PP/74IyEhIaxZs4aoqCiuXLlCyZIlqVmzJm3atKFXr16ULVu2IA4nIiIiUuwVSAgDKFmyJH379qVv374F1aSIiIjIHcuuEHbt2jW2b9/O1q1bOXv2LJcuXcJkMlG2bFl8fHx48MEHadKkifU9kiIiIiKSJV8hLC0tjblz5/LVV19x+fJl63az2ZwjcFWqVIkBAwbQs2dPHBzsfhhTRERE5I5gcwi7cuUKgwYNIjw83LokRenSpfHy8qJUqVKkpaWRmJjIqVOnSE9PJyYmho8//ph169YxZcoU3NzcCvwkRERERIobm0PY66+/zrZt23B0dKR79+50794dPz+/HPWuXbtGREQEixYtIjQ0lM2bN/PWW28xderUAum4iIiISHFmUwgLCwvj999/p3Tp0sycOZMmTZrcsG6JEiVo0qQJTZo0oUuXLgwcOJC1a9fyxx9/0Lx5c7s7LiIiIlKc2TRJa/ny5ZhMJt55552bBrB/atasGf/+978xm82Ehoba3EkRERGRO41NIezAgQOUKFGCZ555xuYDde7cGUdHRyIiImzeV0REROROY1MIi4uL495778XZ2dnmA7m5ueHt7c3Zs2dt3ldERETkTmNTCEtNTaV06dL5Pli5cuVITk7O9/4iIiIidwqbQlhGRgaOjo75PpiTkxOZmZn52vfo0aMMHz6cNm3a0KhRI9q1a8d///tfrl69mu/+WEyYMAE/Pz+mTJlid1siIiIieVEsVk+NiIggODiY0NBQPDw8CAwMJCkpiZkzZ9KjRw8SExPz3fbvv//ON998U4C9FREREbm1Ih/C0tLSGDp0KElJSYwfP56QkBAmT57MmjVraNu2LZGRkUyaNClfbV+8eJERI0ZYF50VERERuV1sXqw1MTGR8PDwfB0sPyNWK1eu5PTp07Rs2ZLOnTtbt7u6ujJ27Fjatm3L4sWLeeONNyhbtqxNbb/77rvEx8fTuHFjdu7caXPfRERERPLL5hAWFRVFnz59CqMvuQoLCwOgXbt2Ocrc3d0JCAggLCyMTZs20aFDhzy3+/333xMWFsbrr79OfHy8QpiIiIjcVjbfjjSbzXb9sVVkZCRArq9GAqhTpw4Ahw8fznObUVFRTJgwgcaNG/Pyyy/b3CcRERERe9k0ErZ27drC6scNxcTEAFC5cuVcyz08PACIjY3NU3upqam88cYbODs7M3HiRLue9hQRERHJL5tCmJeXV2H144Ys64q5urrmWm7ZnpSUlKf2/vOf/xAZGcmECRPw9vYumE6KiIiI2KjIPx2Z15GqvNzqXL9+PfPmzaNDhw506tTJzp6JiIiI5F+RD2GlSpUCsm4j5iYlJQXIei3SzcTFxfHOO+9QpUoVPvzww4LtpIiIiIiNbH468narVKkSly5dIi4ujipVquQot8wFq1Sp0k3bmTFjBhcvXuS+++5j9OjR2cr2798PwOrVqzl+/Dg+Pj688sorBXQGIiIiIjkV+RDm5+dHZGQkUVFRNGrUKEf5kSNHrPVuxjJn7ODBgxw8eDDXOpGRkURGRtKsWTOFMBERESlURf52ZGBgIJA1SvVP8fHxbN26FRcXF1q0aHHTdsaPH8/hw4dz/WNZ92zw4MEcPnyY7777rsDPQ0REROR6RT6EBQUF4eXlxfr161mwYIF1e0pKCiNHjiQpKYlu3bpRvnx5a1laWhrR0dFER0eTlpZmRLdFREREbqrI3450dXVlwoQJ9O/fnw8++ICQkBC8vb3ZtWsXsbGxNGjQgGHDhmXbJyYmxrp6/tq1a7UUhYiIiBQ5RX4kDKBp06YsWrSIxx9/nDNnzrB+/XrKlCnD4MGDmTNnjvUJShEREZHiosiPhFn4+voyefLkPNX19va26TVGI0eOZOTIkfntmoiIiIjNisVImIiIiMidRiFMRERExAAKYSIiIiIGUAgTERERMYBCmIiIiIgBFMJEREREDKAQJiIiImIAhTARERERAyiEiYiIiBhAIUxERETEAAphIiIiIgZQCBMRERExgEKYiIiIiAEUwkREREQMoBAmIiIiYgCFMBEREREDKISJiIiIGEAhTERERMQACmEiIiIiBlAIExERETGAQpiIiIiIARTCRERERAygECYiIiJiAIUwEREREQMohImIiIgYQCFMRERExAAKYSIiIiIGUAgTERERMYBCmIiIiIgBFMJEREREDKAQJiIiImIAhTARERERAyiEiYiIiBhAIUxERETEAAphIiIiIgZQCBMRERExgEKYiIiIiAEUwkREREQMoBAmIiIiYgCFMBEREREDKISJiIiIGEAhTERERMQACmEiIiIiBlAIExERETGAQpiIiIiIARTCRERERAygECYiIiJiAIUwEREREQMohImIiIgYQCFMRERExAAKYSIiIiIGUAgTERERMYBCmIiIiIgBFMJEREREDOBkdAfy6ujRo0ybNo0dO3Zw4cIFPD09ad++PQMGDKBUqVI2tbV+/XrmzZvHvn37uHLlCuXKlePBBx+kf//+NGrUqJDOQERERORvxWIkLCIiguDgYEJDQ/Hw8CAwMJCkpCRmzpxJjx49SExMzHNbn376KS+//DKbNm3Cy8uL1q1bU7ZsWVatWkXPnj1ZtmxZ4Z2IiIiIyF+K/EhYWloaQ4cOJSkpifHjx9O5c2cAUlJSGDZsGOvWrWPSpEn83//93y3b2r59O1988QVubm589dVXNGnSxFq2YMECPvjgA95//32aN2+Op6dnYZ2SiIiISNEfCVu5ciWnT5+mZcuW1gAG4OrqytixY3Fzc2Px4sUkJCTcsq3FixcD0L9//2wBDKBHjx60bt2a1NRUVq1aVbAnISIiIvIPRT6EhYWFAdCuXbscZe7u7gQEBJCWlsamTZtu2Zarqyu+vr4EBATkWl6rVi0AYmNj7eixiIiIyK0V+RAWGRkJgJ+fX67lderUAeDw4cO3bOv//u//CA0NzTEKZrFnzx4AqlSpkp+uioiIiORZkQ9hMTExAFSuXDnXcg8PD8D+0at169axc+dOnJ2dCQoKsqstERERkVsp8iEsOTkZyLqVmBvL9qSkpHwf4/Dhw7zzzjtA1nwxTcoXERGRwlbkQ5ijo2Oe6pnN5ny1HxERQd++fbl06RJt2rRhyJAh+WpHRERExBZFPoRZFmJNTU3NtTwlJQUANzc3m9v+5Zdf6NOnD/Hx8bRr147JkyfnOfSJiIiI2KPIh7BKlSoBEBcXl2u5ZS6YpV5eTZs2jaFDh5KcnMy//vUvPv/8c0qUKGFfZ0VERETyqMiHMMtTkVFRUbmWHzlyJFu9W8nMzOTtt99m8uTJODg4MHLkSN577z0cHIr8t0JERETuIEU+eQQGBgKwevXqHGXx8fFs3boVFxcXWrRokaf2Ro0axQ8//EDJkiWZNm0affr0KcjuioiIiORJkQ9hQUFBeHl5sX79ehYsWGDdnpKSwsiRI0lKSqJbt26UL1/eWpaWlkZ0dDTR0dGkpaVZty9btowlS5bg6OjIjBkzaNOmzW09FxERERGLIv/uSFdXVyZMmED//v354IMPCAkJwdvbm127dhEbG0uDBg0YNmxYtn1iYmLo0KEDAGvXrsXb25uMjAw+++wzACpWrMiSJUtYsmRJrsds1aoVHTt2LNTzEhERkbtbkQ9hAE2bNmXRokVMnTqVbdu2ceTIEby9venWrRv9+vWzPkF5M4cPH+bs2bNAVkgLDQ29YV13d3eFMBERESlUxSKEAfj6+jJ58uQ81fX29s7xGqN69erl6dVGIiIiIrdDkZ8TJiIiInInUggTERERMYBCmIiIiIgBFMJEREREDKAQJiIiImIAhTARERERAyiEiYiIiBhAIUxERETEAAphIiIiIgZQCBMRERExgEKYiIiIiAEUwkREREQMoBAmIiIiYgCFMBEREREDKISJiIiIGEAhTERERMQACmEiIiIiBlAIExERETGAQpiIiIiIARTCRERERAygECYiIiJiAIUwEREREQMohImIiIgYQCFMRERExAAKYSIiIiIGUAgTERERMYBCmIiIiIgBFMJEREREDKAQJiIiImIAhTARERERAyiEiYiIiBhAIUxERETEAAphIiIiIgZQCBMRERExgEKYiIiIiAEUwkREREQMoBAmIiIiYgCFMBEREREDKISJiIiIGEAhTERERMQACmEiIiIiBlAIExERETGAQpiIiIiIARTCRERERAygECYiIiJiAIUwEREREQMohImIiIgYQCFMRERExAAKYSIiIiIGUAgTERERMYBCmIiIiIgBFMJEREREDKAQJiIiImIAJ6M7kFdHjx5l2rRp7NixgwsXLuDp6Un79u0ZMGAApUqVsqmtmJgYpk+fzubNmzl37hwVK1akbdu2vPrqq5QvX76QzkBERETkb8ViJCwiIoLg4GBCQ0Px8PAgMDCQpKQkZs6cSY8ePUhMTMxzWydOnKBLly4sWLAAV1dX2rRpg6OjI/PmzaNTp06cPXu2EM9EREREJEuRD2FpaWkMHTqUpKQkxo8fT0hICJMnT2bNmjW0bduWyMhIJk2alOf2RowYQVxcHEOGDCE0NJTJkyezatUqevToQUxMDO+//34hno2IiIhIliIfwlauXMnp06dp2bIlnTt3tm53dXVl7NixuLm5sXjxYhISEm7ZVnh4ODt37qRWrVoMGjTIut3R0ZFRo0ZRtWpVNm7cyJEjRwrlXEREREQsinwICwsLA6Bdu3Y5ytzd3QkICCAtLY1Nmzblua2goCAcHLKfurOzM48++igA69ats7fbIiIiIjdV5ENYZGQkAH5+frmW16lTB4DDhw/b3Vbt2rXz3JaIiIiIPYr805ExMTEAVK5cOddyDw8PAGJjYwusrbi4OJv7KWLh4lTkf7cRO/3zGrs4OZCh637H0mdaCkuRD2HJyclA1hyw3Fi2JyUl3da28uLUqVMAREdHExwcXCBtiojxEtPTCa5W7e8NqyeDU5H/cSr5lAgELzO6F3I7RUdHA3//P15YivxPDUdHRzIzM29Zz2w256mtvMjL8fIiNTUVgJSUFPbv318gbYpIEXH9L3OawiByR7L8P15YinwIK1WqFJcuXbrhNyIlJQUANze3PLUFN/6mWtqydfHXGylfvjwXL17ExcUFb2/vAmlTRERECtepU6dITU0t9AXci3wIq1SpEpcuXSIuLo4qVarkKLfMBatUqVKe2tq/f/8N54/Z0lZeWJ7GFBEREfmnIj/b0PIkY1RUVK7lljW9bvTEY25t3WgdMFvaEhEREbFHkQ9hgYGBAKxevTpHWXx8PFu3bsXFxYUWLVrkua1ff/01xxyytLQ01q5dm62eiIiISGEp8iEsKCgILy8v1q9fz4IFC6zbU1JSGDlyJElJSXTr1i3bfdu0tDSio6OJjo4mLS3Nut3f359GjRoRGRnJZ599Zg1iGRkZjBkzhrNnz9KmTRt8fX1v3wmKiIjIXclkzstjhQYLDw+nf//+pKSkUL9+fby9vdm1axexsbE0aNCAuXPnZptMf+rUKevq92vXrs02KT46OppevXoRHx9PrVq1qFOnDgcPHuTEiRN4e3szf/78ApsTJiIiInIjRX4kDKBp06YsWrSIxx9/nDNnzrB+/XrKlCnD4MGDmTNnjk1PM/r4+LBkyRKCg4NJTEwkLCwMk8lEnz59CAkJUQATERGR26JYjISJiIiI3GmKxUiYiIiIyJ1GIUxERETEAAphIiIiIgZQCBMRERExgEKYiIiIiAEUwkREREQMoBAmIiIiYgCFMBEREREDOBndgTuJ2WzmxRdfJCIigu3bt9u8f3p6OkuWLGHhwoUcO3YMJycnGjZsyEsvvUTz5s0Locdiq5SUFObOnUtoaCgnT56kZMmSNG3alFdeeYX77rvPprb+9a9/ER4efsPyt956ixdffNHeLkseHT16lGnTprFjxw4uXLiAp6cn7du3Z8CAATa9lQMgJiaG6dOns3nzZs6dO0fFihVp27Ytr776arb33IpxCup6nzx5kqCgoJvW2bJli657EXLs2DE6depE165dGTlypE37FvRnWyvmF6Dx48fzzTffUKZMGZtDWGZmJsOHD2flypWUK1eOZs2acenSJXbs2IHZbOajjz6ia9euhdRzyYuUlBT69+9PeHg4lSpVwt/fn7NnzxIREYGzszMzZsygVatWeWrLbDbz4IMPkpaWxuOPP55rnaeffprWrVsX5CnIDURERNC3b1+SkpK4//778fT0ZOfOncTFxeHr68v//vc/ypQpk6e2Tpw4wXPPPWfdt2bNmhw4cICTJ09SuXJlFi5cSJUqVQr5jORmCvJ6//LLL7z++uvUrl37hr+IffjhhzYHeSkc58+fp0+fPkRHR9OnTx+bQlihfLbNYrerV6+a33rrLbOvr6/Z19fX/OCDD9rcRkhIiNnX19fcuXNn86VLl6zbN2/ebG7YsKG5YcOG5tOnTxdkt8VGn376qdnX19fcv39/c3JysnX7smXLzH5+fuYWLVqYExMT89RWdHS02dfX19ytW7fC6q7k0bVr18xt2rQx+/r6mpcuXWrdnpycbB44cKDZ19fX/MEHH+S5vR49eph9fX3NU6ZMsW5LT083v//++9Z/P2Kcgr7en3zyidnX19c8f/78QuitFKQDBw6YH3vsMev/1R9//LFN+xfGZ1tzwuxgNpv5+eef6dixI8uWLePee+/Nd1tffPEFAKNGjaJcuXLW7S1atKBv376kpqYyb948u/ss+XP16lW+++47HB0dGT16NK6urtayjh070qFDBy5cuMCPP/6Yp/YOHDgAQIMGDQqlv5J3K1eu5PTp07Rs2ZLOnTtbt7u6ujJ27Fjc3NxYvHgxCQkJt2wrPDycnTt3UqtWLQYNGmTd7ujoyKhRo6hatSobN27kyJEjhXIucmsFeb1Bn+Xi4PLly0ycOJFu3bpx/PhxvL29bW6jsD7bCmF2OH36NEOHDuXMmTO88MIL1iBlqyNHjnDy5Ek8PDxo3LhxjvInnngCgLCwMLv6K/m3fft2rl69SsOGDXMdbrb1Gu3fvx/QD+6iwHLN2rVrl6PM3d2dgIAA0tLS2LRpU57bCgoKwsEh+49XZ2dnHn30UQDWrVtnb7clnwryekPWZ9nZ2RlfX98C7acUnLlz5zJr1izKly/PjBkz6NSpk81tFNZnWyHMDs7OzgQHB7NixQpGjBiBi4tLvtqJjIwEwM/PL9fy2rVrYzKZOH78OKmpqfnur+Tf4cOHgZtfo+vr3YolhCUlJTFw4EAefvhh7r//foKDg/nf//5HZmZmAfRa8uJWn786deoAebu2efks57UtKRwFeb3PnDlDfHw8NWrUYOHChQQHB+Pv709AQACvvvoqe/fuLbiOS755enoyYsQIVq1aRdu2bfPVRmF9thXC7FC5cmXGjRtHzZo17WonJiYGgEqVKuVa7uLiQtmyZcnIyODChQt2HUvyJzY2FrjxNbJsP3/+/C3bMpvN1lsYo0eP5ujRo/j7++Pj48OhQ4f48MMPGTJkCBkZGQXUe7kZy+evcuXKuZZ7eHgAf/8bKIi24uLibO6nFIyCvN6WX6aioqIYN24cpUqVonnz5ri5ubFmzRp69uzJypUrC6jnkl9du3blhRdeyDaNxFaF9dnWEhV/6d27N9u2bctT3fDwcMqWLVtgx05KSgKgZMmSN6xjGWWz1BX72Hq9b3WNLNcnMzOT5OTkm17LkydPkpiYiKOjIx9//DHBwcHWsj179jB48GDWrFnDrFmzePnll/N6SpJPycnJADf8AW3ZnpfPXkG2JYWjIK+RJYTVqlWLGTNmUKNGDSDr58CXX37Jf//7X9555x0aNWpk15xhMV5hfbYVwv7i7u5+w4T7T/+8H2wvR0fHPNfVbaqCYev1LshrVK1aNbZs2UJCQoL1h7bF/fffz3vvvceQIUOYO3cuAwYMwGQy5fnYYjtHR8c8fa7MeVjNJ6//TvQ5Nk5BXu/BgwfTpUsXSpUqlW2NKAcHBwYOHMju3bsJCwtjwYIFvPnmm3b1W4xVWJ9thbC/TJ482bBjW9aPSUlJuWEdy1wwNze329KnO52t1/tW18hyfRwcHG46CmZRvnz5Gy7sFxgYiKOjI+fPn+fs2bNUrVrVpr6KbUqVKsWlS5duON/Scs3z8tmz/Du5VVtaM8o4BXm9nZycbjrC9eijjxIWFqa5YXeAwvpsa05YEWAZkbnRveSUlBQuX76Mg4OD9b6z3F63ukaW+QIVKlSwe6S0RIkS1oBmGQKXwmOZz3eja3ur+YC5tXWj+US2tCWFoyCv961YnqTW57j4K6zPtkJYEWB52uJG64tYtlevXj3fT2CKffJ6jW705Mz11qxZw/Dhw/n2229zLU9NTeXSpUs4ODjk+Zap5J/lmkVFReVabsu1Lch/J1I4CvJ6T5gwgSFDhtzwibizZ88C6A0Jd4DC+mwrhBUB1atXp2bNmpw5cybXYetffvkFgDZt2tzurslfHnzwQUqXLs3u3buto17Xs+UaJSYmEhoayty5c0lLS8tRvmLFCtLS0mjcuDGlS5e2v/NyU4GBgQCsXr06R1l8fDxbt27FxcWFFi1a5LmtX3/9NcecorS0NNauXZutntx+BXm99+3bx+rVq/npp59yLV++fDkAjzzySP47LEVCYX22FcJuszNnzhAdHc3Fixezbe/Tpw+QtWL+9ctQbNmyhblz51KiRAmef/7529lVuY6Liws9evQgLS2Nd955h6tXr1rLli9fzi+//EKFChV49tlns+2X2/V+7LHHqFixIqdPn2bcuHHZgtj27dsZN24cJpOJwYMHF/6JCUFBQXh5ebF+/XoWLFhg3Z6SksLIkSNJSkqiW7du2ebwpaWlER0dTXR0dLbr5+/vT6NGjYiMjOSzzz6z/rDOyMhgzJgxnD17ljZt2mhhTwMV5PV+7rnnAPj666/ZsmWLdXtGRgb/+c9/2LZtGzVq1OCZZ565DWcmBeF2f7b1Au8CdOrUKR599NGbvsDbsjTC4MGDGTJkiHV7ZmYmgwYNIiwsjNKlSxMQEEBiYiLbt2/HbDYzceJEnn766dt1KpKL5ORkevfuzd69e6lQoQJNmjTh3Llz7NmzBxcXF7766isCAgKy7XOj671161YGDhxIUlISVatWpV69ely8eJFdu3YB8Pbbbyt030bh4eH079+flJQU6tevj7e3N7t27SI2NpYGDRowd+7cbBNuLZ91gLVr12Z7DUp0dDS9evUiPj6eWrVqUadOHQ4ePMiJEyfw9vZm/vz5mhNmsIK83h999BHz5s3DZDJx//33U7lyZfbt28fp06fx8PBgzpw5+Pj43PZzlBubMmUKU6dOzfUF3rf7s62RsCLCwcGBKVOm8Pbbb1O1alU2bdpEdHQ0LVu2ZN68eQpgRUDJkiWZO3cugwYNokyZMoSFhXHu3Dkef/xxQkJCcgSwmwkICGDZsmV06dKFzMxMNmzYwNGjR2nTpg3z5s1TALvNmjZtyqJFi3j88cc5c+YM69evp0yZMgwePJg5c+bY9MSTj48PS5YsITg4mMTERMLCwjCZTPTp04eQkBAFsCKgIK/3e++9x+TJk2nWrBnR0dGsW7cOR0dH+vXrx/LlyxXA7iCF8dnWSJiIiIiIATQSJiIiImIAhTARERERAyiEiYiIiBhAIUxERETEAAphIiIiIgZQCBMRERExgEKYiIiIiAEUwkREREQMoBAmIiIiYgCFMBEREREDKISJiIiIGEAhTERERMQACmEiIiIiBvh/yNZ1bZL+C04AAAAASUVORK5CYII=", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"samples [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0.]\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"samples [3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 4. 3. 3. 3. 3. 4. 3. 4. 3. 4. 3. 4. 3. 3.\n", | |
" 3. 3. 3. 3. 3. 3. 3. 3. 3. 4. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.\n", | |
" 3. 3. 3. 3. 3. 4. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 4. 4. 3. 3. 4. 3.\n", | |
" 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 4. 3.\n", | |
" 3. 4. 3. 3.]\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"samples [13. 14. 15. 14. 15. 15. 14. 14. 13. 15. 15. 15. 13. 14. 14. 14. 15. 15.\n", | |
" 14. 14. 15. 15. 15. 14. 15. 16. 14. 15. 14. 15. 14. 14. 15. 14. 14. 15.\n", | |
" 14. 14. 14. 14. 14. 14. 14. 14. 15. 15. 15. 15. 15. 15. 14. 13. 14. 14.\n", | |
" 14. 15. 15. 14. 14. 15. 16. 14. 16. 14. 15. 14. 14. 14. 15. 15. 14. 14.\n", | |
" 15. 14. 14. 15. 14. 14. 14. 14. 14. 14. 15. 15. 15. 14. 14. 15. 14. 14.\n", | |
" 15. 15. 15. 14. 15. 14. 14. 14. 15. 15.]\n", | |
"title quantile_ppc_0.99\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"samples [33. 36. 40. 35. 37. 42. 35. 32. 38. 39. 40. 40. 37. 37. 43. 35. 40. 36.\n", | |
" 35. 42. 41. 46. 41. 39. 40. 40. 36. 40. 41. 36. 43. 37. 44. 33. 42. 29.\n", | |
" 34. 39. 45. 34. 36. 36. 39. 41. 35. 38. 40. 47. 41. 34. 31. 32. 40. 33.\n", | |
" 39. 35. 41. 40. 44. 33. 35. 37. 34. 34. 41. 39. 31. 37. 38. 38. 37. 40.\n", | |
" 33. 40. 38. 37. 35. 38. 33. 40. 40. 44. 37. 41. 38. 38. 40. 43. 40. 35.\n", | |
" 37. 40. 36. 38. 35. 39. 41. 37. 37. 40.]\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"def proc_title(title):\n", | |
" if title in ppc_tiles:\n", | |
" print(\"title\", title)\n", | |
" return ppc_tiles[title]\n", | |
"\n", | |
" x = title.replace(\"_\", \" \").capitalize()\n", | |
" x = x.replace(\"ppc\", \"\")\n", | |
" return x\n", | |
"\n", | |
"ppc_number = len(ppcs)\n", | |
"ppc_names = results[0][\"ppc_results\"].keys()\n", | |
"\n", | |
"for ppc_name in ppc_names:\n", | |
" fig, ax = plt.subplots()\n", | |
" for i, res in enumerate(results):\n", | |
" model_name = res[\"model_name\"]\n", | |
" samples = res[\"ppc_results\"][ppc_name][\"samples\"]\n", | |
" # make int\n", | |
" samples = np.maximum(samples, 0)\n", | |
" samples = np.round(samples)\n", | |
" true = res[\"ppc_results\"][ppc_name][\"true\"]\n", | |
" n_unique_samples = len(np.unique(samples))\n", | |
" discrete = n_unique_samples < 20\n", | |
"\n", | |
" print(\"samples\", samples)\n", | |
"\n", | |
" # plot a histogram of the samples but with integers (use nice binning)\n", | |
" if ppc_name == \"max_ppc\":\n", | |
" binwidth = 70\n", | |
" else:\n", | |
" binwidth = None\n", | |
" sns.histplot(samples, ax=ax, label=f\"{model_name}\" + r\" $\\hat{p}$\", discrete=discrete, stat=\"density\", binwidth=binwidth)\n", | |
" if i == 0:\n", | |
" ax.axvline(true, color=\"red\", label=\"Observed Value\")\n", | |
" ax.set_title(proc_title(f\"{ppc_name}\"))\n", | |
" ax.legend()\n", | |
"\n", | |
" if n_unique_samples < 2:\n", | |
" min_val = np.min(samples) - 1\n", | |
" max_val = np.max(samples) + 1\n", | |
"\n", | |
" ax.set_xlim(min_val, max_val)\n", | |
"\n", | |
"\n", | |
"\n", | |
" # save the figure\n", | |
" fig.savefig(f\"m5/{ppc_name}.png\", dpi=100)\n", | |
" # save as pdf\n", | |
" fig.savefig(f\"m5/{ppc_name}.pdf\")\n", | |
"\n", | |
" #max_x = true * 5\n", | |
" #max_samples = np.max(samples)\n", | |
" #if max_samples > max_x:\n", | |
" # ax.set_xlim(0, max_x)\n", | |
"\n", | |
"\n", | |
" plt.show()\n", | |
"\n", | |
"\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<matplotlib.legend.Legend at 0x7faceab84160>" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"for i, result in enumerate(results):\n", | |
" samples = result[\"y_samples\"][0]\n", | |
" samples = np.round(samples) + (i+1)/4\n", | |
" sns.histplot(samples.flatten(), stat=\"density\", label=result[\"model_name\"])\n", | |
"\n", | |
"sns.histplot(y_test_np.flatten(), stat=\"density\", color=\"red\", label=\"true\")\n", | |
"plt.xlim(0, 10)\n", | |
"plt.legend()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n", | |
"/Users/calvinm/miniconda3/envs/maskingtrees/lib/python3.9/site-packages/treeffuser/_base_tabular_diffusion.py:110: CastFloat32Warning: Input array is not float32; it has been recast to float32.\n", | |
" X = _check_array(X)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"mean shape (10,)\n", | |
"lower shape (10,)\n", | |
"upper shape (10,)\n", | |
"y_true shape (10,)\n", | |
"samples shape (1, 100, 10)\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 1200x800 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"mean shape (10,)\n", | |
"lower shape (10,)\n", | |
"upper shape (10,)\n", | |
"y_true shape (10,)\n", | |
"samples shape (1, 100, 10)\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 1200x800 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"mean shape (10,)\n", | |
"lower shape (10,)\n", | |
"upper shape (10,)\n", | |
"y_true shape (10,)\n", | |
"samples shape (1, 100, 10)\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 1200x800 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"np.random.seed(0)\n", | |
"\n", | |
"NUM_PRODS_TO_PLOT = 3\n", | |
"DAYS_TO_PLOT = 10\n", | |
"QUATILE = 0.9\n", | |
"NUM_SAMPLES = 100\n", | |
"\n", | |
"prods_to_plot = np.random.choice(range(len(X_train_prod_processed)), NUM_PRODS_TO_PLOT)\n", | |
"\n", | |
"means_to_plot = []\n", | |
"lower_q_s = []\n", | |
"upper_q_s = []\n", | |
"\n", | |
"prod_dict = {}\n", | |
"\n", | |
"for res in results:\n", | |
" model = res[\"model\"]\n", | |
"\n", | |
" for i in prods_to_plot:\n", | |
" if i not in prod_dict:\n", | |
" prod_dict[i] = {\n", | |
" \"means_to_plot\": [],\n", | |
" \"lower_q_s\": [],\n", | |
" \"upper_q_s\": [],\n", | |
" \"samples\": []\n", | |
" }\n", | |
"\n", | |
" X_prod_proc_i = X_train_prod_processed[i][-DAYS_TO_PLOT:]\n", | |
" y_prod_proc_i = y_train_prod[i][-DAYS_TO_PLOT:]\n", | |
"\n", | |
" samples = model.sample(X_prod_proc_i.values, NUM_SAMPLES)\n", | |
" samples = samples.astype(int)\n", | |
" samples = np.maximum(samples, 0)\n", | |
"\n", | |
" means = np.mean(samples, axis=0)\n", | |
" lower_q = np.quantile(samples, 1-QUATILE, axis=0)\n", | |
" upper_q = np.quantile(samples, QUATILE, axis=0)\n", | |
"\n", | |
" prod_dict[i][\"means_to_plot\"].append(means)\n", | |
" prod_dict[i][\"lower_q_s\"].append(lower_q)\n", | |
" prod_dict[i][\"upper_q_s\"].append(upper_q)\n", | |
" prod_dict[i][\"samples\"].append(samples)\n", | |
"\n", | |
"for i in prod_dict:\n", | |
" means_to_plot = np.array(prod_dict[i][\"means_to_plot\"]).squeeze()\n", | |
" lower_q_s = np.array(prod_dict[i][\"lower_q_s\"]).squeeze()\n", | |
" upper_q_s = np.array(prod_dict[i][\"upper_q_s\"]).squeeze()\n", | |
" samples = np.array(prod_dict[i][\"samples\"]).squeeze(-1)\n", | |
" y_true = np.array(y_train_prod[i][-DAYS_TO_PLOT:])\n", | |
"\n", | |
" model_names = [res[\"model\"].__class__.__name__ for res in results]\n", | |
"\n", | |
" print(\"mean shape\", means_to_plot.shape)\n", | |
" print(\"lower shape\", lower_q_s.shape)\n", | |
" print(\"upper shape\", upper_q_s.shape)\n", | |
" print(\"y_true shape\", y_true.shape)\n", | |
" print(\"samples shape\", samples.shape)\n", | |
"\n", | |
" #plot_predictions(y_true, means_to_plot, upper_q_s, lower_q_s, [res[\"model\"].__class__.__name__ for res in results])\n", | |
" plot_model_comparisons(samples, y_true, model_names=model_names)\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Log-likelihood" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Model Treeffuser has a NLL of {'nll_samples': 1.2198445752273326}\n", | |
"Model Treeffuser has a CRPS of {'crps_100': 0.6448642942915247}\n" | |
] | |
} | |
], | |
"source": [ | |
"for res in results:\n", | |
" model = res[\"model\"]\n", | |
" name = res[\"model_name\"]\n", | |
" nll = LogLikelihoodFromSamplesMetric(n_samples=100).compute(model=model, X_test=X_test_np, y_test=y_test_np, samples=res[\"y_samples\"])\n", | |
" crps = CRPS().compute(model=model, X_test=X_test_np, y_test=y_test_np, samples=res[\"y_samples\"])\n", | |
" print(f\"Model {name} has a NLL of {nll}\")\n", | |
" print(f\"Model {name} has a CRPS of {crps}\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Plot calibration plot" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def calibration_plot(y_samples: Float[np.ndarray, \"n_samples batch y_dim\"], y_test: Float[np.ndarray, \"batch y_dim\"]) -> None:\n", | |
" \"\"\"\n", | |
" We will plot the calibration plot for the model. Essentially, we will plot the\n", | |
" \"\"\"\n", | |
" assert y_test.shape[1] == 1, \"Only works for univariate outputs\"\n", | |
" n_samples = y_samples.shape[0]\n", | |
" y_samples = np.maximum(y_samples, 0.0)\n", | |
" y_samples = np.round(y_samples).astype(int)\n", | |
" y_test = y_test.astype(int)\n", | |
"\n", | |
" # Filter out the zeros\n", | |
" #non_zero_idx = y_test > 0\n", | |
" #y_test = y_test[non_zero_idx]\n", | |
" #y_samples = y_samples[:, non_zero_idx]\n", | |
"\n", | |
" y_test_expanded = y_test[np.newaxis, :].repeat(n_samples, axis=0)\n", | |
" prob_of_event = np.mean(y_samples <= y_test_expanded, axis=0)\n", | |
" prob_of_event_sorted = np.sort(prob_of_event.flatten())\n", | |
" return np.linspace(0, 1, len(prob_of_event)), prob_of_event_sorted\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<Axes: ylabel='Count'>" | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"y_ex = None\n", | |
"for res in results:\n", | |
" samples = res[\"y_samples\"]\n", | |
" model = res[\"model\"]\n", | |
" x,y = calibration_plot(samples, y_test_np)\n", | |
" plt.plot(x, y, label=res[\"model_name\"])\n", | |
" y_ex = y\n", | |
"\n", | |
"\n", | |
"plt.plot([0, 1], [0, 1], linestyle=\"--\", color=\"black\")\n", | |
"\n", | |
"plt.xlabel(\"Predicted Probability\")\n", | |
"plt.ylabel(\"True Probability\")\n", | |
"plt.legend()\n", | |
"\n", | |
"plt.show()\n", | |
"\n", | |
"# plot the distribution of y\n", | |
"\n", | |
"sns.histplot(y_ex, bins=20)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Quantile prediction plot for the model\n", | |
"def make_quantile_plot(results):\n", | |
" quantiles = np.linspace(0.9, 0.999, 10)\n", | |
"\n", | |
" means = []\n", | |
" stds = []\n", | |
"\n", | |
" for res in results:\n", | |
" samples = res[\"y_samples\"] # shape [n_samples, batch, y_dim]\n", | |
" samples = np.maximum(samples, 0)\n", | |
" samples = np.round(samples)\n", | |
"\n", | |
" m, s = [], []\n", | |
"\n", | |
" for q in quantiles:\n", | |
" quantile_samples = np.quantile(samples, q, axis=1)\n", | |
" mean = np.mean(quantile_samples, axis=0)\n", | |
" std = np.std(quantile_samples, axis=0)\n", | |
" m.append(mean)\n", | |
" s.append(std)\n", | |
"\n", | |
" m = np.array(m).squeeze()\n", | |
" s = np.array(s).squeeze()\n", | |
" plt.plot(quantiles, m, label=res[\"model_name\"])\n", | |
" plt.fill_between(quantiles, m - s, m + s, alpha=0.3)\n", | |
"\n", | |
" means.append(m)\n", | |
" stds.append(s)\n", | |
"\n", | |
"\n", | |
" true_quantiles = []\n", | |
" for q in quantiles:\n", | |
" true_quantiles.append(np.quantile(y_test_np, q))\n", | |
"\n", | |
" plt.plot(quantiles, true_quantiles, label=\"True\")\n", | |
"\n", | |
" # set log scale on y axis\n", | |
"\n", | |
"\n", | |
" # set log scale on x axis\n", | |
" plt.xscale(\"log\")\n", | |
" plt.legend()\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"make_quantile_plot(results)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.19" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment