Skip to content

Instantly share code, notes, and snippets.

@Eligijus112
Eligijus112 / read_json.py
Created February 27, 2022 09:06
Read json about amazon reviews
# Data wrangling
import pandas as pd
# JSON functionalities
import json
# Reading a json in the .gz format
def read_json(file_path:str) -> pd.DataFrame:
"""
Reads the provided special JSON file format
@Eligijus112
Eligijus112 / rf_growth.py
Created June 15, 2021 06:20
A snippet on how to use the custom RandomForest class
# Data wrangling
import pandas as pd
# Accuracy metrics
from sklearn.metrics import precision_score, recall_score
# Reading data for classification
d = pd.read_csv("data/random_forest/telecom_churn.csv")
# Setting the features used
@Eligijus112
Eligijus112 / best_split_rf.py
Created June 14, 2021 19:29
The function of getting the best split for a random forest decision tree
def best_split(self) -> tuple:
"""
Given the X features and Y targets calculates the best split
for a decision tree
"""
# Creating a dataset for spliting
df = self.X.copy()
df['Y'] = self.Y
# Getting the GINI impurity for the base input
@Eligijus112
Eligijus112 / bootsrap_df.py
Created June 14, 2021 16:16
Bootsrapping of a pandas dataframe
# self - an instance of the RandomForestClassifier class
# self.X - pandas dataframe containing feature information
# self.Y - a list of binary response variable
# self.X_obs_fraction - a float in range [0, 1]
def bootstrap_sample(self):
"""
Function that creates a bootstraped sample with the class instance parameters
"""
# Sampling the number of rows with repetition
@Eligijus112
Eligijus112 / RegressionTree.py
Created April 25, 2021 15:00
Regression tree implementation
# Data wrangling
import pandas as pd
# Array math
import numpy as np
# Quick value count calculator
from collections import Counter
@Eligijus112
Eligijus112 / Node.py
Created April 12, 2021 06:14
Node for a decision tree
# Data wrangling
import pandas as pd
# Array math
import numpy as np
# Quick value count calculator
from collections import Counter
@Eligijus112
Eligijus112 / hp-speed-xgoobst.py
Created March 22, 2021 18:21
Gist for evaluation of xgboost speeds for different hp
# Defining the grid of parameters
n_estimators = [30, 100, 200, 300, 600, 900]
max_depth = [4, 6, 8, 12, 14, 16]
# Number of rows to test on
nrows = 30000
# Creating a dictionary
gpu_dict = {
'objective': ['reg:squarederror'],
@Eligijus112
Eligijus112 / nrow-time-xgboost.py
Created March 21, 2021 16:28
Speed compare between CPU and GPU xgboost
# Defining the dictionaries
cpu_dict = {
'objective': 'reg:squarederror'
}
gpu_dict = {
'objective': 'reg:squarederror',
'tree_method': 'gpu_hist'
}
@Eligijus112
Eligijus112 / NNMultistepModel.py
Created December 4, 2020 08:44
Class to create a model object used for sequence modeling
class NNMultistepModel():
def __init__(
self,
X,
Y,
n_outputs,
n_lag,
n_ft,
n_layer,
@Eligijus112
Eligijus112 / createXY.py
Last active February 3, 2022 12:37
A function to create X and Y training matrices for sequence modeling
def create_X_Y(ts: np.array, lag=1, n_ahead=1, target_index=0) -> tuple:
"""
A method to create X and Y matrix from a time series array for the training of
deep learning models
"""
# Extracting the number of features that are passed from the array
n_features = ts.shape[1]
# Creating placeholder lists
X, Y = [], []