This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Data wrangling | |
| import pandas as pd | |
| # JSON functionalities | |
| import json | |
| # Reading a json in the .gz format | |
| def read_json(file_path:str) -> pd.DataFrame: | |
| """ | |
| Reads the provided special JSON file format |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Data wrangling | |
| import pandas as pd | |
| # Accuracy metrics | |
| from sklearn.metrics import precision_score, recall_score | |
| # Reading data for classification | |
| d = pd.read_csv("data/random_forest/telecom_churn.csv") | |
| # Setting the features used |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def best_split(self) -> tuple: | |
| """ | |
| Given the X features and Y targets calculates the best split | |
| for a decision tree | |
| """ | |
| # Creating a dataset for spliting | |
| df = self.X.copy() | |
| df['Y'] = self.Y | |
| # Getting the GINI impurity for the base input |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # self - an instance of the RandomForestClassifier class | |
| # self.X - pandas dataframe containing feature information | |
| # self.Y - a list of binary response variable | |
| # self.X_obs_fraction - a float in range [0, 1] | |
| def bootstrap_sample(self): | |
| """ | |
| Function that creates a bootstraped sample with the class instance parameters | |
| """ | |
| # Sampling the number of rows with repetition |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Data wrangling | |
| import pandas as pd | |
| # Array math | |
| import numpy as np | |
| # Quick value count calculator | |
| from collections import Counter | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Data wrangling | |
| import pandas as pd | |
| # Array math | |
| import numpy as np | |
| # Quick value count calculator | |
| from collections import Counter | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Defining the grid of parameters | |
| n_estimators = [30, 100, 200, 300, 600, 900] | |
| max_depth = [4, 6, 8, 12, 14, 16] | |
| # Number of rows to test on | |
| nrows = 30000 | |
| # Creating a dictionary | |
| gpu_dict = { | |
| 'objective': ['reg:squarederror'], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Defining the dictionaries | |
| cpu_dict = { | |
| 'objective': 'reg:squarederror' | |
| } | |
| gpu_dict = { | |
| 'objective': 'reg:squarederror', | |
| 'tree_method': 'gpu_hist' | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class NNMultistepModel(): | |
| def __init__( | |
| self, | |
| X, | |
| Y, | |
| n_outputs, | |
| n_lag, | |
| n_ft, | |
| n_layer, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def create_X_Y(ts: np.array, lag=1, n_ahead=1, target_index=0) -> tuple: | |
| """ | |
| A method to create X and Y matrix from a time series array for the training of | |
| deep learning models | |
| """ | |
| # Extracting the number of features that are passed from the array | |
| n_features = ts.shape[1] | |
| # Creating placeholder lists | |
| X, Y = [], [] |