Created
September 4, 2025 16:27
-
-
Save Ogaday/5ec1145fe80aebcf1c938d19f4381e82 to your computer and use it in GitHub Desktop.
Example models
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import Literal, Optional | |
| import numpy as np | |
| from scipy.spatial.distance import cdist | |
| from sklearn.base import BaseEstimator, RegressorMixin | |
| from sklearn.datasets import make_regression | |
| from sklearn.model_selection import train_test_split | |
| def make_train_test_data( | |
| n_samples: int = 100, bias: float = 0.0, noise: float = 0.0, seed: Optional[int] = None | |
| ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: | |
| """Generate a 1D linear dataset with a train/test split. | |
| Parameters | |
| ---------- | |
| n_samples | |
| The number of total rows in the combined dataset (train & test). | |
| bias | |
| The offset used to generate the dataset. | |
| noise | |
| The random error used to generate the dataset. | |
| seed | |
| The random seed used to generate the dataset. Used to control reproducibility. | |
| Returns | |
| ------- | |
| X_train, X_test, y_train, y_test: tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray] | |
| """ | |
| state = np.random.RandomState(seed=seed) | |
| X, y = make_regression( | |
| n_samples=n_samples, | |
| n_features=1, | |
| n_informative=1, | |
| bias=bias, | |
| noise=noise, | |
| random_state=state, | |
| ) | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=state) | |
| return X_train, X_test, y_train, y_test | |
| class LinearModel(RegressorMixin, BaseEstimator): | |
| """Simple 1D linear regression model with fixed intercept and gradient. | |
| Attributes | |
| ---------- | |
| intercept | |
| The intercept, or offset of the model | |
| gradient | |
| The gradient, or slope of the model | |
| """ | |
| def __init__(self, intercept: float = 0, gradient: float = 0): | |
| """Create a new LinearModel instance. | |
| Parameters | |
| ---------- | |
| intercept | |
| The intercept, or offset of the model | |
| gradient | |
| The gradient, or slope of the model | |
| """ | |
| self.intercept = intercept | |
| self.gradient = gradient | |
| def fit(self, X, y) -> "LinearModel": | |
| """Fit the linear model. | |
| This a no-op, as intercept and gradient are supplied on initialisation. | |
| Parameters | |
| ---------- | |
| X | |
| 1D training features, with shape (n_samples, 1). | |
| y | |
| 1D training targets, with shape (n_samples,). | |
| Returns | |
| ------- | |
| LinearModel | |
| Linear model instance. | |
| """ | |
| self.is_fitted_ = True | |
| return self | |
| def predict(self, X) -> np.ndarray: | |
| """Produce predictions. | |
| Parameters | |
| ---------- | |
| X | |
| 1D features, with shape (n_samples, 1). | |
| Returns | |
| ------- | |
| np.ndarray | |
| Predicted targets. | |
| """ | |
| return np.array(X * self.gradient + self.intercept).ravel() | |
| class NeighboursModel(RegressorMixin, BaseEstimator): | |
| """Simple k-nearest neighbours model. | |
| Attributes | |
| ---------- | |
| k_neighbours | |
| The number of neighbours. | |
| """ | |
| def __init__(self, k_neighbours: int = 1): | |
| """Create a new NeighboursModel instance. | |
| Parameters | |
| ---------- | |
| k_neighbours | |
| The number of neighbours. | |
| """ | |
| self.k_neighbours = k_neighbours | |
| def fit(self, X, y) -> "NeighboursModel": | |
| """Fit the neighbours model. | |
| Parameters | |
| ---------- | |
| X | |
| 1D training features, with shape (n_samples, 1). | |
| y | |
| 1D training targets, with shape (n_samples,). | |
| Returns | |
| ------- | |
| NeighboursModel | |
| k-nn model instance. | |
| """ | |
| self._X = X | |
| self._y = y | |
| self.is_fitted = True | |
| return self | |
| def predict(self, X): | |
| """Produce predictions. | |
| Parameters | |
| ---------- | |
| X | |
| 1D features, with shape (n_samples, 1). | |
| Returns | |
| ------- | |
| np.ndarray | |
| Predicted targets. | |
| """ | |
| return self._y[cdist(X, self._X).argsort(axis=1)[:, : self.k_neighbours]].mean(axis=1) | |
| def train_model( | |
| model_type: Literal["linear_regression", "nearest_neighbour"], | |
| lr_intercept: float = 0.0, | |
| lr_gradient: float = 0.0, | |
| k_neighbours: int = 1, | |
| n_samples: int = 100, | |
| bias: int = 10, | |
| noise: float = 10, | |
| seed: int = 42, | |
| ): | |
| """Train a model with the supplied parameters. | |
| Generates a dataset with the bias, noise and seed parameters, and fits and scores the model. | |
| Returns | |
| ------- | |
| score: float | |
| The R2 score of the model on the test set. | |
| """ | |
| X_train, X_test, y_train, y_test = make_train_test_data( | |
| n_samples=n_samples, bias=bias, noise=noise, seed=seed | |
| ) | |
| if model_type == "linear_regression": | |
| model = LinearModel(intercept=lr_intercept, gradient=lr_gradient) | |
| elif model_type == "nearest_neighbour": | |
| model = NeighboursModel(k_neighbours=k_neighbours) | |
| else: | |
| raise ValueError(f"Unrecognised model_type: '{model_type}'") | |
| return model.fit(X_train, y_train).score(X_test, y_test) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment