Last active
April 14, 2024 04:01
-
-
Save taruma/50460ebfaab5a30c41e7f1a1ac0853e2 to your computer and use it in GitHub Desktop.
taruma_hk53_tensor.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "taruma_hk53_tensor.ipynb", | |
"provenance": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/taruma/50460ebfaab5a30c41e7f1a1ac0853e2/taruma_hk53_tensor.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "mKmUJwglptVI" | |
}, | |
"source": [ | |
"Berdasarkan isu [#53](): **request: buat tabel/tensor untuk pemodelan deep learning LSTM**\n", | |
"\n", | |
"Deskripsi permasalahan:\n", | |
"- Input data dalam bentuk DataFrame harus diubah ke tensor 3D sebagai input pemodelan RNN (LSTM)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "ST52BbNivuGi" | |
}, | |
"source": [ | |
"# DATASET" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Y9TYpf_evke8" | |
}, | |
"source": [ | |
"FILE_PATH = 'dataset_hidrologi_pamarayan_1998_2008.csv'\n", | |
"DRIVE_DROP_PATH = '/content/'\n", | |
"DATASET = DRIVE_DROP_PATH + FILE_PATH" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "6M89iWvQvruQ" | |
}, | |
"source": [ | |
"# LOAD DATASET" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qEEei04SvrFk" | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np" | |
], | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "dwsP8P1sv7SU", | |
"outputId": "cd954888-f896-4034-9b9e-7e1ba625ad6c", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 313 | |
} | |
}, | |
"source": [ | |
"dataset = pd.read_csv(DATASET, index_col=0, parse_dates=True)['19980301':] # 2 bulan pertama tidak ada data di debit\n", | |
"dataset.head()" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" hujan_bojong_manik hujan_gunung_tunggal hujan_pasir_ona \\\n", | |
"1998-03-01 0.0 0.0 3.0 \n", | |
"1998-03-02 0.0 4.0 36.0 \n", | |
"1998-03-03 4.5 0.0 0.0 \n", | |
"1998-03-04 0.0 0.0 46.0 \n", | |
"1998-03-05 32.0 0.0 0.0 \n", | |
"\n", | |
" hujan_sampang_peundeuy hujan_cimarga hujan_bd_pamarayan \\\n", | |
"1998-03-01 7.0 0.0 12.0 \n", | |
"1998-03-02 9.0 26.0 0.0 \n", | |
"1998-03-03 11.0 10.0 2.0 \n", | |
"1998-03-04 5.0 24.0 6.0 \n", | |
"1998-03-05 22.0 8.0 14.0 \n", | |
"\n", | |
" hujan_ciminyak_cilaki hujan_gardu_tanjak debit_bd_pamarayan \n", | |
"1998-03-01 0.0 0.0 90.12 \n", | |
"1998-03-02 5.0 32.0 97.90 \n", | |
"1998-03-03 3.0 21.0 88.90 \n", | |
"1998-03-04 11.0 13.0 90.30 \n", | |
"1998-03-05 0.0 21.0 210.06 " | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-f0c26b57-2c84-4657-a8b6-f5ec19c66bdf\" class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>hujan_bojong_manik</th>\n", | |
" <th>hujan_gunung_tunggal</th>\n", | |
" <th>hujan_pasir_ona</th>\n", | |
" <th>hujan_sampang_peundeuy</th>\n", | |
" <th>hujan_cimarga</th>\n", | |
" <th>hujan_bd_pamarayan</th>\n", | |
" <th>hujan_ciminyak_cilaki</th>\n", | |
" <th>hujan_gardu_tanjak</th>\n", | |
" <th>debit_bd_pamarayan</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1998-03-01</th>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>7.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>12.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>90.12</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1998-03-02</th>\n", | |
" <td>0.0</td>\n", | |
" <td>4.0</td>\n", | |
" <td>36.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>26.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>32.0</td>\n", | |
" <td>97.90</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1998-03-03</th>\n", | |
" <td>4.5</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>11.0</td>\n", | |
" <td>10.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>21.0</td>\n", | |
" <td>88.90</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1998-03-04</th>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>46.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>24.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>11.0</td>\n", | |
" <td>13.0</td>\n", | |
" <td>90.30</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1998-03-05</th>\n", | |
" <td>32.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>22.0</td>\n", | |
" <td>8.0</td>\n", | |
" <td>14.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>21.0</td>\n", | |
" <td>210.06</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
" <div class=\"colab-df-buttons\">\n", | |
"\n", | |
" <div class=\"colab-df-container\">\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f0c26b57-2c84-4657-a8b6-f5ec19c66bdf')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
"\n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
"\n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" .colab-df-buttons div {\n", | |
" margin-bottom: 4px;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-f0c26b57-2c84-4657-a8b6-f5ec19c66bdf button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-f0c26b57-2c84-4657-a8b6-f5ec19c66bdf');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
"\n", | |
"\n", | |
"<div id=\"df-e4fd14ee-049e-4914-94d0-9e7eec4b3cf0\">\n", | |
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-e4fd14ee-049e-4914-94d0-9e7eec4b3cf0')\"\n", | |
" title=\"Suggest charts\"\n", | |
" style=\"display:none;\">\n", | |
"\n", | |
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <g>\n", | |
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
" </g>\n", | |
"</svg>\n", | |
" </button>\n", | |
"\n", | |
"<style>\n", | |
" .colab-df-quickchart {\n", | |
" --bg-color: #E8F0FE;\n", | |
" --fill-color: #1967D2;\n", | |
" --hover-bg-color: #E2EBFA;\n", | |
" --hover-fill-color: #174EA6;\n", | |
" --disabled-fill-color: #AAA;\n", | |
" --disabled-bg-color: #DDD;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-quickchart {\n", | |
" --bg-color: #3B4455;\n", | |
" --fill-color: #D2E3FC;\n", | |
" --hover-bg-color: #434B5C;\n", | |
" --hover-fill-color: #FFFFFF;\n", | |
" --disabled-bg-color: #3B4455;\n", | |
" --disabled-fill-color: #666;\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart {\n", | |
" background-color: var(--bg-color);\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: var(--fill-color);\n", | |
" height: 32px;\n", | |
" padding: 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart:hover {\n", | |
" background-color: var(--hover-bg-color);\n", | |
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: var(--button-hover-fill-color);\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart-complete:disabled,\n", | |
" .colab-df-quickchart-complete:disabled:hover {\n", | |
" background-color: var(--disabled-bg-color);\n", | |
" fill: var(--disabled-fill-color);\n", | |
" box-shadow: none;\n", | |
" }\n", | |
"\n", | |
" .colab-df-spinner {\n", | |
" border: 2px solid var(--fill-color);\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" animation:\n", | |
" spin 1s steps(1) infinite;\n", | |
" }\n", | |
"\n", | |
" @keyframes spin {\n", | |
" 0% {\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" border-left-color: var(--fill-color);\n", | |
" }\n", | |
" 20% {\n", | |
" border-color: transparent;\n", | |
" border-left-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" }\n", | |
" 30% {\n", | |
" border-color: transparent;\n", | |
" border-left-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" border-right-color: var(--fill-color);\n", | |
" }\n", | |
" 40% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" }\n", | |
" 60% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" }\n", | |
" 80% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" }\n", | |
" 90% {\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" }\n", | |
" }\n", | |
"</style>\n", | |
"\n", | |
" <script>\n", | |
" async function quickchart(key) {\n", | |
" const quickchartButtonEl =\n", | |
" document.querySelector('#' + key + ' button');\n", | |
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
" quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
" try {\n", | |
" const charts = await google.colab.kernel.invokeFunction(\n", | |
" 'suggestCharts', [key], {});\n", | |
" } catch (error) {\n", | |
" console.error('Error during call to suggestCharts:', error);\n", | |
" }\n", | |
" quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
" }\n", | |
" (() => {\n", | |
" let quickchartButtonEl =\n", | |
" document.querySelector('#df-e4fd14ee-049e-4914-94d0-9e7eec4b3cf0 button');\n", | |
" quickchartButtonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
" })();\n", | |
" </script>\n", | |
"</div>\n", | |
" </div>\n", | |
" </div>\n" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "dataframe", | |
"variable_name": "dataset", | |
"summary": "{\n \"name\": \"dataset\",\n \"rows\": 3959,\n \"fields\": [\n {\n \"column\": \"hujan_bojong_manik\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12.044965367071466,\n \"min\": 0.0,\n \"max\": 180.0,\n \"num_unique_values\": 134,\n \"samples\": [\n 60.6,\n 48.0,\n 5.9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hujan_gunung_tunggal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12.304554433026713,\n \"min\": 0.0,\n \"max\": 99.5,\n \"num_unique_values\": 135,\n \"samples\": [\n 10.5,\n 37.0,\n 85.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hujan_pasir_ona\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13.616252378084882,\n \"min\": 0.0,\n \"max\": 135.0,\n \"num_unique_values\": 138,\n \"samples\": [\n 76.0,\n 45.0,\n 35.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hujan_sampang_peundeuy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.510072972258792,\n \"min\": 0.0,\n \"max\": 140.0,\n \"num_unique_values\": 96,\n \"samples\": [\n 105.0,\n 64.0,\n 60.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hujan_cimarga\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13.728183115731136,\n \"min\": 0.0,\n \"max\": 133.0,\n \"num_unique_values\": 107,\n \"samples\": [\n 94.0,\n 13.5,\n 8.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hujan_bd_pamarayan\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13.329691046075206,\n \"min\": 0.0,\n \"max\": 163.0,\n \"num_unique_values\": 91,\n \"samples\": [\n 34.0,\n 4.0,\n 16.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hujan_ciminyak_cilaki\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17.4724310475462,\n \"min\": 0.0,\n \"max\": 275.0,\n \"num_unique_values\": 109,\n \"samples\": [\n 122.0,\n 55.0,\n 16.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hujan_gardu_tanjak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.768919075302353,\n \"min\": 0.0,\n \"max\": 148.0,\n \"num_unique_values\": 89,\n \"samples\": [\n 31.0,\n 69.0,\n 23.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"debit_bd_pamarayan\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 105.07702293539884,\n \"min\": 0.0,\n \"max\": 2561.58,\n \"num_unique_values\": 2326,\n \"samples\": [\n 85.68,\n 20.06,\n 82.39\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 3 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "bDeGhjW2wBMq", | |
"outputId": "c75ebbc7-acdc-4c1c-a40d-f4f526fc91f2", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"source": [ | |
"dataset.info()" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"DatetimeIndex: 3959 entries, 1998-03-01 to 2008-12-31\n", | |
"Data columns (total 9 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 hujan_bojong_manik 3959 non-null float64\n", | |
" 1 hujan_gunung_tunggal 3959 non-null float64\n", | |
" 2 hujan_pasir_ona 3959 non-null float64\n", | |
" 3 hujan_sampang_peundeuy 3959 non-null float64\n", | |
" 4 hujan_cimarga 3959 non-null float64\n", | |
" 5 hujan_bd_pamarayan 3959 non-null float64\n", | |
" 6 hujan_ciminyak_cilaki 3959 non-null float64\n", | |
" 7 hujan_gardu_tanjak 3959 non-null float64\n", | |
" 8 debit_bd_pamarayan 3959 non-null float64\n", | |
"dtypes: float64(9)\n", | |
"memory usage: 309.3 KB\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "uv79w2klw0lB" | |
}, | |
"source": [ | |
"# FUNGSI" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "JVuM5AIFwopz" | |
}, | |
"source": [ | |
"import numpy as np\n", | |
"\n", | |
"\n", | |
"def _columns_index(dataframe, columns):\n", | |
" \"\"\"\n", | |
" Get the index of columns in a dataframe.\n", | |
"\n", | |
" Args:\n", | |
" dataframe (pandas.DataFrame): The dataframe to search for column indices.\n", | |
" columns (list): A list of column names.\n", | |
"\n", | |
" Returns:\n", | |
" list: A list of column indices corresponding to the input column names.\n", | |
" \"\"\"\n", | |
" column_names = dataframe.columns\n", | |
" column_indices = []\n", | |
"\n", | |
" for column in columns:\n", | |
" column_indices.append(column_names.get_loc(column))\n", | |
" return column_indices\n", | |
"\n", | |
"\n", | |
"def _get_y(array, timesteps, columns_index):\n", | |
" \"\"\"\n", | |
" Get the target variable(s) from the input array.\n", | |
"\n", | |
" Parameters:\n", | |
" array (ndarray): The input array.\n", | |
" timesteps (int): The number of timesteps to skip from the beginning of the array.\n", | |
" columns_index (list): The indices of the columns to extract.\n", | |
"\n", | |
" Returns:\n", | |
" ndarray: The target variable(s) extracted from the input array.\n", | |
" \"\"\"\n", | |
" y = []\n", | |
" for col in columns_index:\n", | |
" y.append(array[timesteps:, col])\n", | |
"\n", | |
" if len(columns_index) == 1:\n", | |
" return y[0]\n", | |
" return np.stack(y, axis=1)\n", | |
"\n", | |
"\n", | |
"def _get_x_tensor(array, timesteps, columns_index):\n", | |
" \"\"\"\n", | |
" Generate a tensor of input features for a given array.\n", | |
"\n", | |
" Parameters:\n", | |
" array (numpy.ndarray): The input array.\n", | |
" timesteps (int): The number of timesteps to consider for each feature.\n", | |
" columns_index (list): The indices of the columns to include in the tensor.\n", | |
"\n", | |
" Returns:\n", | |
" numpy.ndarray: The tensor of input features.\n", | |
"\n", | |
" \"\"\"\n", | |
" X = [] # pylint: disable=invalid-name\n", | |
" rows, _ = array.shape\n", | |
"\n", | |
" for col in columns_index:\n", | |
" array_each_column = []\n", | |
" for row in range(timesteps, rows):\n", | |
" array_each_column.append(array[row - timesteps : row, col])\n", | |
" X.append(array_each_column)\n", | |
"\n", | |
" return np.stack(X, axis=2)\n", | |
"\n", | |
"\n", | |
"def tensor_array(\n", | |
" dataframe,\n", | |
" timesteps,\n", | |
" X_columns=None, # pylint: disable=invalid-name\n", | |
" y_out=False,\n", | |
" y_columns=None,\n", | |
"):\n", | |
" \"\"\"\n", | |
" Convert a pandas DataFrame into a tensor array for input to a machine learning model.\n", | |
"\n", | |
" Args:\n", | |
" dataframe (pandas.DataFrame): The input DataFrame containing the data.\n", | |
" timesteps (int): The number of timesteps to consider for each sample.\n", | |
" X_columns (list, optional): The list of column names to be used as input features.\n", | |
" If None, all columns will be used. Defaults to None.\n", | |
" y_out (bool, optional): Whether to include the output labels in the tensor array.\n", | |
" Defaults to False.\n", | |
" y_columns (list, optional): The list of column names to be used as output labels.\n", | |
" Only applicable if y_out is True. Defaults to None.\n", | |
"\n", | |
" Returns:\n", | |
" numpy.ndarray: The tensor array representing the input data.\n", | |
"\n", | |
" If y_out is True, the function also returns:\n", | |
" numpy.ndarray: The tensor array representing the output labels.\n", | |
" \"\"\"\n", | |
"\n", | |
" _, n_cols = dataframe.shape\n", | |
" array = dataframe.values\n", | |
"\n", | |
" # pylint: disable=invalid-name\n", | |
" # X array\n", | |
" if X_columns is None:\n", | |
" X_index = range(n_cols)\n", | |
" else:\n", | |
" X_index = _columns_index(dataframe, X_columns)\n", | |
"\n", | |
" X = _get_x_tensor(array, timesteps=timesteps, columns_index=X_index)\n", | |
"\n", | |
" # y array\n", | |
" if y_out is True:\n", | |
" if y_columns is None:\n", | |
" y_index = [n_cols - 1]\n", | |
" else:\n", | |
" y_index = _columns_index(dataframe, y_columns)\n", | |
"\n", | |
" y = _get_y(array, timesteps=timesteps, columns_index=y_index)\n", | |
" return X, y\n", | |
"\n", | |
" return X\n" | |
], | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "mVeBugfmrUnZ" | |
}, | |
"source": [ | |
"# DATASET\n", | |
"\n", | |
"Dataset memiliki $3959$ baris dengan $8$ variabel independen, dan $1$ variabel dependen.\n", | |
"\n", | |
"$8$ variabel bebas:\n", | |
"- `hujan_bojong_manik`,\n", | |
"- `hujan_gunung_tunggal`,\n", | |
"- `hujan_pasir_ona`,\n", | |
"- `hujan_sampang_peundeuy`,\n", | |
"- `hujan_cimarga`,\n", | |
"- `hujan_bd_pamarayan`,\n", | |
"- `hujan_ciminyak_cilaki`,\n", | |
"- `hujan_gardu_tanjak`,\n", | |
"\n", | |
"$1$ variabel terikat:\n", | |
"- `debit_bd_pamarayan`\n", | |
"\n", | |
"Dengan menggunakan timesteps sebesar $5$ hari, maka dimensi tensor input 3d sebesar $X = (3954, 5, 9)$ dengan output $y = (3954,)$\n", | |
"\n", | |
"$$y^{t} = f(X_i^{t_s}, y^{t_s})$$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "7RwShuq4mWWI" | |
}, | |
"source": [ | |
"TIMESTEPS = 5\n", | |
"\n", | |
"X, y = tensor_array(\n", | |
" dataset, timesteps=TIMESTEPS,\n", | |
" X_columns=None, y_out=True, y_columns=['debit_bd_pamarayan']\n", | |
" )" | |
], | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "j2tTMJjTuKl_", | |
"outputId": "7b5d2029-37d9-4fe5-83c4-c4cd1b15a048", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"source": [ | |
"X.shape" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(3954, 5, 9)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 9 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "6mjlJf7guL5Q", | |
"outputId": "169b98da-bea7-4a96-b2e3-b3096078c167", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"source": [ | |
"y.shape" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(3954,)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "xovTkgrLuOBU", | |
"outputId": "b9af1264-a692-4e75-f747-fa790d78a601", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"source": [ | |
"print(X)" | |
], | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"[[[ 0. 0. 3. ... 0. 0. 90.12]\n", | |
" [ 0. 4. 36. ... 5. 32. 97.9 ]\n", | |
" [ 4.5 0. 0. ... 3. 21. 88.9 ]\n", | |
" [ 0. 0. 46. ... 11. 13. 90.3 ]\n", | |
" [ 32. 0. 0. ... 0. 21. 210.06]]\n", | |
"\n", | |
" [[ 0. 4. 36. ... 5. 32. 97.9 ]\n", | |
" [ 4.5 0. 0. ... 3. 21. 88.9 ]\n", | |
" [ 0. 0. 46. ... 11. 13. 90.3 ]\n", | |
" [ 32. 0. 0. ... 0. 21. 210.06]\n", | |
" [ 12. 0. 7. ... 16. 0. 82.9 ]]\n", | |
"\n", | |
" [[ 4.5 0. 0. ... 3. 21. 88.9 ]\n", | |
" [ 0. 0. 46. ... 11. 13. 90.3 ]\n", | |
" [ 32. 0. 0. ... 0. 21. 210.06]\n", | |
" [ 12. 0. 7. ... 16. 0. 82.9 ]\n", | |
" [ 14. 0. 11. ... 25. 22. 274.42]]\n", | |
"\n", | |
" ...\n", | |
"\n", | |
" [[ 0. 13. 0. ... 0. 3. 21.27]\n", | |
" [ 17. 27. 0. ... 0. 2. 83.27]\n", | |
" [ 14. 23.5 4. ... 1. 35. 209.27]\n", | |
" [ 12. 15.7 7. ... 0. 14. 134.83]\n", | |
" [ 10. 19. 0. ... 0. 0. 81.88]]\n", | |
"\n", | |
" [[ 17. 27. 0. ... 0. 2. 83.27]\n", | |
" [ 14. 23.5 4. ... 1. 35. 209.27]\n", | |
" [ 12. 15.7 7. ... 0. 14. 134.83]\n", | |
" [ 10. 19. 0. ... 0. 0. 81.88]\n", | |
" [ 7. 21.7 11. ... 0. 12. 20.14]]\n", | |
"\n", | |
" [[ 14. 23.5 4. ... 1. 35. 209.27]\n", | |
" [ 12. 15.7 7. ... 0. 14. 134.83]\n", | |
" [ 10. 19. 0. ... 0. 0. 81.88]\n", | |
" [ 7. 21.7 11. ... 0. 12. 20.14]\n", | |
" [ 6.05 17.5 21. ... 0. 10. 208.54]]]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Cd4d6j3JuSE8", | |
"outputId": "68c4f4ac-f22d-4429-9f77-e01ca8d878f2", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"source": [ | |
"print(y)" | |
], | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"[ 82.9 274.42 216.36 ... 20.14 208.54 208.14]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "MNFcE3TiqJ8s" | |
}, | |
"source": [ | |
"# Changelog\n", | |
"\n", | |
"```\n", | |
"- 20240414 - 1.1.0 / 0.5.0 - Refactor/Documentation\n", | |
"- 20190926 - 1.0.0 - Initial\n", | |
"```\n", | |
"\n", | |
"#### Copyright © 2019-2024 [Taruma Sakti Megariansyah](https://taruma.github.io)\n", | |
"\n", | |
"Source code in this notebook is licensed under a [MIT License](https://choosealicense.com/licenses/mit/). Data in this notebook is licensed under a [Creative Common Attribution 4.0 International](https://creativecommons.org/licenses/by/4.0/)." | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment