Created
January 22, 2023 03:18
-
-
Save reachsumit/9b31afe74c560c9f804081af3e1b4a1d to your computer and use it in GitHub Desktop.
N-BEATS end-to-end demo in PyTorch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import os\nimport pandas as pd\nimport numpy as np\nfrom tqdm import trange, tqdm\n\nfrom io import BytesIO\nfrom urllib.request import urlopen\nfrom zipfile import ZipFile\n\nfrom pandas import read_csv\nfrom scipy import stats\n\nwindow_size = 192\nstride_size = 24\ntarget_window_size = 24\nhistory_size = 150","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:39:45.546954Z","iopub.execute_input":"2023-01-21T07:39:45.547247Z","iopub.status.idle":"2023-01-21T07:39:46.307778Z","shell.execute_reply.started":"2023-01-21T07:39:45.547187Z","shell.execute_reply":"2023-01-21T07:39:46.306636Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"code","source":"train_start = '2011-01-01 00:00:00'\ntrain_end = '2014-08-31 23:00:00'\ntest_start = '2014-08-25 00:00:00' #need additional 7 days as given info\ntest_end = '2014-09-07 23:00:00'\n\nname = 'LD2011_2014.txt'\nsave_name = 'elect'\nsave_path = os.path.join('data', save_name)\n\nif not os.path.exists(save_path):\n os.makedirs(save_path)\ncsv_path = os.path.join(save_path, name)\nif not os.path.exists(csv_path):\n zipurl = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip'\n with urlopen(zipurl) as zipresp:\n with ZipFile(BytesIO(zipresp.read())) as zfile:\n zfile.extractall(save_path)\n\ndata_frame = pd.read_csv(csv_path, sep=\";\", index_col=0, parse_dates=True, decimal=',')\ndata_frame = data_frame.resample('1H',label = 'left',closed = 'right').sum()[train_start:test_end]\ndata_frame.fillna(0, inplace=True) # (32304, 370)","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:39:46.313914Z","iopub.execute_input":"2023-01-21T07:39:46.316890Z","iopub.status.idle":"2023-01-21T07:40:03.327828Z","shell.execute_reply.started":"2023-01-21T07:39:46.316848Z","shell.execute_reply":"2023-01-21T07:40:03.326797Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"# train_data = data_frame[train_start:train_end].values\n# test_data = data_frame[test_start:test_end].values\n\ntrain_data = data_frame[train_start:train_end]\ntest_data = data_frame[test_start:test_end]\nfrom sklearn.preprocessing import MinMaxScaler\nscaler = MinMaxScaler()\nscaler.fit(train_data)\ntrain_target_df = pd.DataFrame(scaler.transform(train_data), index=train_data.index, columns=train_data.columns)\ntest_target_df = pd.DataFrame(scaler.transform(test_data), index=test_data.index, columns=test_data.columns)\ntrain_data = train_target_df.values\ntest_data = test_target_df.values","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:40:03.330304Z","iopub.execute_input":"2023-01-21T07:40:03.331078Z","iopub.status.idle":"2023-01-21T07:40:03.490229Z","shell.execute_reply.started":"2023-01-21T07:40:03.331037Z","shell.execute_reply":"2023-01-21T07:40:03.489235Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"class TimeseriesSampler:\n def __init__(self,\n timeseries: np.ndarray,\n insample_size: int=window_size,\n outsample_size: int=target_window_size,\n window_sampling_limit: int=history_size * target_window_size,\n batch_size: int = 8):\n self.timeseries = [ts for ts in timeseries]\n self.window_sampling_limit = window_sampling_limit\n self.batch_size = batch_size\n self.insample_size = insample_size\n self.outsample_size = outsample_size\n\n def __iter__(self):\n while True:\n insample = np.zeros((self.batch_size, self.insample_size))\n insample_mask = np.zeros((self.batch_size, self.insample_size))\n outsample = np.zeros((self.batch_size, self.outsample_size))\n outsample_mask = np.zeros((self.batch_size, self.outsample_size))\n sampled_ts_indices = np.random.randint(len(self.timeseries), size=self.batch_size)\n for i, sampled_index in enumerate(sampled_ts_indices):\n sampled_timeseries = self.timeseries[sampled_index]\n cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit),\n high=len(sampled_timeseries),\n size=1)[0]\n\n insample_window = sampled_timeseries[max(0, cut_point - self.insample_size):cut_point]\n insample[i, -len(insample_window):] = insample_window\n insample_mask[i, -len(insample_window):] = 1.0\n outsample_window = sampled_timeseries[\n cut_point:min(len(sampled_timeseries), cut_point + self.outsample_size)]\n outsample[i, :len(outsample_window)] = outsample_window\n outsample_mask[i, :len(outsample_window)] = 1.0\n yield insample, insample_mask, outsample, outsample_mask\n \n def last_insample_window(self):\n insample = np.zeros((len(self.timeseries), self.insample_size))\n insample_mask = np.zeros((len(self.timeseries), self.insample_size))\n for i, ts in enumerate(self.timeseries):\n ts_last_window = ts[-self.insample_size:]\n insample[i, -len(ts):] = ts_last_window\n insample_mask[i, -len(ts):] = 1.0\n return insample, insample_mask","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:40:03.492967Z","iopub.execute_input":"2023-01-21T07:40:03.493421Z","iopub.status.idle":"2023-01-21T07:40:03.713101Z","shell.execute_reply.started":"2023-01-21T07:40:03.493384Z","shell.execute_reply":"2023-01-21T07:40:03.712116Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"train_loader = TimeseriesSampler(timeseries=train_data.T)","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:40:03.716170Z","iopub.execute_input":"2023-01-21T07:40:03.716674Z","iopub.status.idle":"2023-01-21T07:40:03.725795Z","shell.execute_reply.started":"2023-01-21T07:40:03.716630Z","shell.execute_reply":"2023-01-21T07:40:03.724771Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"import numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nclass GenericBasis(nn.Module):\n def __init__(self, backcast_size, forecast_size):\n super().__init__()\n self.backcast_size, self.forecast_size = backcast_size, forecast_size\n\n def forward(self, theta):\n return theta[:, :self.backcast_size], theta[:, -self.forecast_size:]\n\nclass NBeatsBlock(nn.Module):\n def __init__(self,\n input_size,\n theta_size: int,\n basis_function: nn.Module,\n layers: int,\n layer_size: int):\n super().__init__()\n self.layers = nn.ModuleList([nn.Linear(in_features=input_size, out_features=layer_size)] +\n [nn.Linear(in_features=layer_size, out_features=layer_size)\n for _ in range(layers - 1)])\n self.basis_parameters = nn.Linear(in_features=layer_size, out_features=theta_size)\n self.basis_function = basis_function\n\n def forward(self, x: torch.Tensor):\n block_input = x\n for layer in self.layers:\n block_input = torch.relu(layer(block_input))\n basis_parameters = self.basis_parameters(block_input)\n return self.basis_function(basis_parameters)\n\n\nclass NBeats(nn.Module):\n def __init__(self, blocks: nn.ModuleList):\n super().__init__()\n self.blocks = blocks\n\n def forward(self, x: torch.Tensor, input_mask: torch.Tensor) -> torch.Tensor:\n residuals = x.flip(dims=(1,))\n input_mask = input_mask.flip(dims=(1,))\n forecast = x[:, -1:]\n for i, block in enumerate(self.blocks):\n backcast, block_forecast = block(residuals)\n residuals = (residuals - backcast) * input_mask\n forecast = forecast + block_forecast\n return forecast\n\ndef to_tensor(array: np.ndarray):\n return torch.tensor(array, dtype=torch.float32)","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:40:03.727795Z","iopub.execute_input":"2023-01-21T07:40:03.728183Z","iopub.status.idle":"2023-01-21T07:40:05.532157Z","shell.execute_reply.started":"2023-01-21T07:40:03.728149Z","shell.execute_reply":"2023-01-21T07:40:05.531205Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"import torch.optim as optim\nfrom tqdm import trange, tqdm\n\ndef train(model, device=torch.device('cuda'), iterations=1000, num_epochs = 1, learning_rate = 1e-3):\n optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n loss_summary = []\n loss_fn = F.mse_loss\n training_set=iter(train_loader)\n \n for epoch in range(num_epochs):\n model.train()\n \n pbar = trange(iterations)\n for iteration in pbar:\n x, x_mask, y, y_mask = map(to_tensor, next(training_set))\n optimizer.zero_grad()\n\n loss = torch.zeros(1, device=device, dtype=torch.float32)\n out = model(x.to(device), x_mask.to(device))\n loss = loss_fn(out.float(), y.squeeze().to(device).float())\n\n pbar.set_description(f\"Loss:{loss.item()}\")\n loss.backward()\n optimizer.step()\n\n loss_summary.append(loss.cpu().detach())\n \n return loss_summary, optimizer\n\ndef evaluate(model, optimizer, device=torch.device('cuda')):\n forecasts = []\n test_windows = test_data.T.shape[1]//target_window_size\n\n with torch.no_grad():\n model.eval()\n for i in trange(test_windows):\n window_input_set = np.concatenate([train_data.T, test_data.T[:, :i * target_window_size]],\n axis=1)\n input_set = TimeseriesSampler(timeseries=window_input_set)\n x, x_mask = map(to_tensor, input_set.last_insample_window())\n window_forecast = model(x.to(device), x_mask.to(device)).cpu().detach().numpy()\n forecasts = window_forecast if len(forecasts) == 0 else np.concatenate([forecasts, window_forecast],\n axis=1)\n \n return np.sqrt(np.mean((forecasts-test_data.T)**2))","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:45:36.339905Z","iopub.execute_input":"2023-01-21T07:45:36.340268Z","iopub.status.idle":"2023-01-21T07:45:36.353471Z","shell.execute_reply.started":"2023-01-21T07:45:36.340239Z","shell.execute_reply":"2023-01-21T07:45:36.352452Z"},"trusted":true},"execution_count":22,"outputs":[]},{"cell_type":"code","source":"model = NBeats(nn.ModuleList([NBeatsBlock(input_size=window_size,\n theta_size=window_size + target_window_size,\n basis_function=GenericBasis(backcast_size=window_size,\n forecast_size=target_window_size),\n layers=4,\n layer_size=512)\n for _ in range(30)])).cuda()# stacks","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:40:05.549422Z","iopub.execute_input":"2023-01-21T07:40:05.549803Z","iopub.status.idle":"2023-01-21T07:40:08.628729Z","shell.execute_reply.started":"2023-01-21T07:40:05.549763Z","shell.execute_reply":"2023-01-21T07:40:08.627650Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"loss, optimizer = train(model, num_epochs=3)","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:40:08.630110Z","iopub.execute_input":"2023-01-21T07:40:08.630484Z","iopub.status.idle":"2023-01-21T07:42:41.750205Z","shell.execute_reply.started":"2023-01-21T07:40:08.630450Z","shell.execute_reply":"2023-01-21T07:42:41.749229Z"},"trusted":true},"execution_count":9,"outputs":[{"name":"stderr","text":"Loss:0.008311772719025612: 100%|██████████| 1000/1000 [00:50<00:00, 19.85it/s]\nLoss:0.005937961395829916: 100%|██████████| 1000/1000 [00:52<00:00, 18.96it/s]\nLoss:0.006405656225979328: 100%|██████████| 1000/1000 [00:49<00:00, 20.01it/s]\n","output_type":"stream"}]},{"cell_type":"code","source":"evaluate(model, optimizer)","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:42:41.753361Z","iopub.execute_input":"2023-01-21T07:42:41.754341Z","iopub.status.idle":"2023-01-21T07:42:42.298935Z","shell.execute_reply.started":"2023-01-21T07:42:41.754301Z","shell.execute_reply":"2023-01-21T07:42:42.297783Z"},"trusted":true},"execution_count":10,"outputs":[{"name":"stderr","text":"100%|██████████| 14/14 [00:00<00:00, 26.55it/s]\n","output_type":"stream"},{"execution_count":10,"output_type":"execute_result","data":{"text/plain":"0.07997294110010518"},"metadata":{}}]},{"cell_type":"code","source":"evaluate(model, optimizer)","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:45:45.547009Z","iopub.execute_input":"2023-01-21T07:45:45.547390Z","iopub.status.idle":"2023-01-21T07:45:46.125262Z","shell.execute_reply.started":"2023-01-21T07:45:45.547357Z","shell.execute_reply":"2023-01-21T07:45:46.124309Z"},"trusted":true},"execution_count":23,"outputs":[{"name":"stderr","text":"100%|██████████| 14/14 [00:00<00:00, 24.78it/s]\n","output_type":"stream"},{"execution_count":23,"output_type":"execute_result","data":{"text/plain":"0.07997294110010518"},"metadata":{}}]},{"cell_type":"code","source":"test_rmse","metadata":{"execution":{"iopub.status.busy":"2023-01-21T07:43:14.732789Z","iopub.execute_input":"2023-01-21T07:43:14.733141Z","iopub.status.idle":"2023-01-21T07:43:14.739562Z","shell.execute_reply.started":"2023-01-21T07:43:14.733112Z","shell.execute_reply":"2023-01-21T07:43:14.738314Z"},"trusted":true},"execution_count":13,"outputs":[{"execution_count":13,"output_type":"execute_result","data":{"text/plain":"0.07997294110010518"},"metadata":{}}]}]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment