Skip to content

Instantly share code, notes, and snippets.

@Eligijus112
Eligijus112 / sklearn_ft_importance.py
Created June 2, 2022 04:14
sklearn feature importance pipeline
# Loading the data
_cali_data = fetch_california_housing(as_frame=True)
X, y = _cali_data.data, _cali_data.target
# Droping the geo coordinate featuress
X = X.drop(columns=['Latitude', 'Longitude'])
# Droping the population feature; In real life modeling, this could be used as weight.
# For educational and inference purposes, we drop it.
X = X.drop(columns=['Population'])
@Eligijus112
Eligijus112 / feature_importance_calculation.py
Created June 2, 2022 04:01
Calculation of feature importance
# Calculating the importance of each node
importance = {}
for node in node_pairs:
importance[node] = round(node_importance(node, *node_pairs[node], n_entries_weight, i_sq), 3)
print(f"Node importance: {importance}")
# Going from node importance to feature importance
feature_importance = {}
for node in node_pairs:
@Eligijus112
Eligijus112 / node_importance.py
Created June 2, 2022 03:53
Calculates the node importance in a decision tree
def node_importance(
node_main: str,
node_left: str,
node_right: str,
n_entries_weight: dict,
i_sq: dict
) -> float:
"""
Calculated the importance of the node_main
@Eligijus112
Eligijus112 / tree_data.py
Last active June 2, 2022 03:47
Data for feature importance calculation
# Entries in the nodes
n_entries = {
"node 1": 15480,
"node 2": 12163,
"node 3": 3317,
"node 4": 5869,
"node 5": 6294,
"node 6": 2317,
"node 7": 1000,
"node 8": 2454,
@Eligijus112
Eligijus112 / regression_tree_fit.py
Created May 31, 2022 05:29
Fitting a regression tree
# Train test spliting
from sklearn.model_selection import train_test_split
# Importing the sklearn implementation
from sklearn.tree import DecisionTreeRegressor
# Precision metrics
from sklearn.metrics import mean_absolute_error
# Spliting the data into training and testing sets
@Eligijus112
Eligijus112 / california.py
Created May 31, 2022 04:44
California dataset loading
# Dataset loading
from sklearn.datasets import fetch_california_housing
# Loading the data
_cali_data = fetch_california_housing(as_frame=True)
# Features and target
X, y = _cali_data.data, _cali_data.target
# Droping the geo coordinate featuress
@Eligijus112
Eligijus112 / GradientBoostingRegression.py
Created March 26, 2022 09:19
Gradient boosting for regression implementation
# The base class with the weak learner
from regression.tree import Tree
# Data wrangling
import pandas as pd
# Python infinity
from math import inf
@Eligijus112
Eligijus112 / RegressionTree.py
Created March 25, 2022 18:55
A regression tree growing implementation
# Data wrangling
import pandas as pd
# Infinity constant
from math import inf
class Tree():
"""
Class to fit a regression tree to the given data
@Eligijus112
Eligijus112 / QuickSort.py
Created March 20, 2022 09:47
Quick sort implementation
# Base class
from algorithms.BaseClass import BaseClass
class QuickSort(BaseClass):
def __init__(self, arr):
# Inheriting from base class
super().__init__(arr)
# Indication whether the current element is partitioned or not
@Eligijus112
Eligijus112 / MergeSort.py
Last active March 20, 2022 09:45
Merge sort in Python
# Base class
from algorithms.BaseClass import BaseClass
class MergeSort(BaseClass):
def __init__(self, arr):
# Inheriting from base class
super().__init__(arr)
# Initiating the recursive splitting of the arrays