Ogaday · September 4, 2025 16:27
diff --git a/fit.ipynb b/fit.ipynb
diff --git a/models.py b/models.py
 from typing import Literal, Optional

 import numpy as np
 from scipy.spatial.distance import cdist
 from sklearn.base import BaseEstimator, RegressorMixin
 from sklearn.datasets import make_regression
 from sklearn.model_selection import train_test_split


 def make_train_test_data(
    n_samples: int = 100, bias: float = 0.0, noise: float = 0.0, seed: Optional[int] = None
 ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Generate a 1D linear dataset with a train/test split.

    Parameters
    ----------
    n_samples
        The number of total rows in the combined dataset (train & test).
    bias
        The offset used to generate the dataset.
    noise
        The random error used to generate the dataset.
    seed
        The random seed used to generate the dataset. Used to control reproducibility.

    Returns
    -------
    X_train, X_test, y_train, y_test: tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
    """
    state = np.random.RandomState(seed=seed)
    X, y = make_regression(
        n_samples=n_samples,
        n_features=1,
        n_informative=1,
        bias=bias,
        noise=noise,
        random_state=state,
    )
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=state)
    return X_train, X_test, y_train, y_test


 class LinearModel(RegressorMixin, BaseEstimator):
    """Simple 1D linear regression model with fixed intercept and gradient.

    Attributes
    ----------
    intercept
        The intercept, or offset of the model
    gradient
        The gradient, or slope of the model
    """

    def __init__(self, intercept: float = 0, gradient: float = 0):
        """Create a new LinearModel instance.

        Parameters
        ----------
        intercept
            The intercept, or offset of the model
        gradient
            The gradient, or slope of the model
        """
        self.intercept = intercept
        self.gradient = gradient

    def fit(self, X, y) -> "LinearModel":
        """Fit the linear model.

        This a no-op, as intercept and gradient are supplied on initialisation.

        Parameters
        ----------
        X
            1D training features, with shape (n_samples, 1).
        y
            1D training targets, with shape (n_samples,).

        Returns
        -------
        LinearModel
            Linear model instance.
        """
        self.is_fitted_ = True
        return self

    def predict(self, X) -> np.ndarray:
        """Produce predictions.

        Parameters
        ----------
        X
            1D features, with shape (n_samples, 1).

        Returns
        -------
        np.ndarray
            Predicted targets.
        """
        return np.array(X * self.gradient + self.intercept).ravel()


 class NeighboursModel(RegressorMixin, BaseEstimator):
    """Simple k-nearest neighbours model.

    Attributes
    ----------
    k_neighbours
        The number of neighbours.
    """

    def __init__(self, k_neighbours: int = 1):
        """Create a new NeighboursModel instance.

        Parameters
        ----------
        k_neighbours
            The number of neighbours.
        """
        self.k_neighbours = k_neighbours

    def fit(self, X, y) -> "NeighboursModel":
        """Fit the neighbours model.

        Parameters
        ----------
        X
            1D training features, with shape (n_samples, 1).
        y
            1D training targets, with shape (n_samples,).

        Returns
        -------
        NeighboursModel
            k-nn model instance.
        """
        self._X = X
        self._y = y
        self.is_fitted = True
        return self

    def predict(self, X):
        """Produce predictions.

        Parameters
        ----------
        X
            1D features, with shape (n_samples, 1).

        Returns
        -------
        np.ndarray
            Predicted targets.
        """
        return self._y[cdist(X, self._X).argsort(axis=1)[:, : self.k_neighbours]].mean(axis=1)


 def train_model(
    model_type: Literal["linear_regression", "nearest_neighbour"],
    lr_intercept: float = 0.0,
    lr_gradient: float = 0.0,
    k_neighbours: int = 1,
    n_samples: int = 100,
    bias: int = 10,
    noise: float = 10,
    seed: int = 42,
 ):
    """Train a model with the supplied parameters.

    Generates a dataset with the bias, noise and seed parameters, and fits and scores the model.

    Returns
    -------
    score: float
        The R2 score of the model on the test set.
    """
    X_train, X_test, y_train, y_test = make_train_test_data(
        n_samples=n_samples, bias=bias, noise=noise, seed=seed
    )

    if model_type == "linear_regression":
        model = LinearModel(intercept=lr_intercept, gradient=lr_gradient)
    elif model_type == "nearest_neighbour":
        model = NeighboursModel(k_neighbours=k_neighbours)
    else:
        raise ValueError(f"Unrecognised model_type: '{model_type}'")

    return model.fit(X_train, y_train).score(X_test, y_test)
	from typing import Literal, Optional

	import numpy as np
	from scipy.spatial.distance import cdist
	from sklearn.base import BaseEstimator, RegressorMixin
	from sklearn.datasets import make_regression
	from sklearn.model_selection import train_test_split


	def make_train_test_data(
	n_samples: int = 100, bias: float = 0.0, noise: float = 0.0, seed: Optional[int] = None
	) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
	"""Generate a 1D linear dataset with a train/test split.

	Parameters
	----------
	n_samples
	The number of total rows in the combined dataset (train & test).
	bias
	The offset used to generate the dataset.
	noise
	The random error used to generate the dataset.
	seed
	The random seed used to generate the dataset. Used to control reproducibility.

	Returns
	-------
	X_train, X_test, y_train, y_test: tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
	"""
	state = np.random.RandomState(seed=seed)
	X, y = make_regression(
	n_samples=n_samples,
	n_features=1,
	n_informative=1,
	bias=bias,
	noise=noise,
	random_state=state,
	)
	X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=state)
	return X_train, X_test, y_train, y_test


	class LinearModel(RegressorMixin, BaseEstimator):
	"""Simple 1D linear regression model with fixed intercept and gradient.

	Attributes
	----------
	intercept
	The intercept, or offset of the model
	gradient
	The gradient, or slope of the model
	"""

	def __init__(self, intercept: float = 0, gradient: float = 0):
	"""Create a new LinearModel instance.

	Parameters
	----------
	intercept
	The intercept, or offset of the model
	gradient
	The gradient, or slope of the model
	"""
	self.intercept = intercept
	self.gradient = gradient

	def fit(self, X, y) -> "LinearModel":
	"""Fit the linear model.

	This a no-op, as intercept and gradient are supplied on initialisation.

	Parameters
	----------
	X
	1D training features, with shape (n_samples, 1).
	y
	1D training targets, with shape (n_samples,).

	Returns
	-------
	LinearModel
	Linear model instance.
	"""
	self.is_fitted_ = True
	return self

	def predict(self, X) -> np.ndarray:
	"""Produce predictions.

	Parameters
	----------
	X
	1D features, with shape (n_samples, 1).

	Returns
	-------
	np.ndarray
	Predicted targets.
	"""
	return np.array(X * self.gradient + self.intercept).ravel()


	class NeighboursModel(RegressorMixin, BaseEstimator):
	"""Simple k-nearest neighbours model.

	Attributes
	----------
	k_neighbours
	The number of neighbours.
	"""

	def __init__(self, k_neighbours: int = 1):
	"""Create a new NeighboursModel instance.

	Parameters
	----------
	k_neighbours
	The number of neighbours.
	"""
	self.k_neighbours = k_neighbours

	def fit(self, X, y) -> "NeighboursModel":
	"""Fit the neighbours model.

	Parameters
	----------
	X
	1D training features, with shape (n_samples, 1).
	y
	1D training targets, with shape (n_samples,).

	Returns
	-------
	NeighboursModel
	k-nn model instance.
	"""
	self._X = X
	self._y = y
	self.is_fitted = True
	return self

	def predict(self, X):
	"""Produce predictions.

	Parameters
	----------
	X
	1D features, with shape (n_samples, 1).

	Returns
	-------
	np.ndarray
	Predicted targets.
	"""
	return self._y[cdist(X, self._X).argsort(axis=1)[:, : self.k_neighbours]].mean(axis=1)


	def train_model(
	model_type: Literal["linear_regression", "nearest_neighbour"],
	lr_intercept: float = 0.0,
	lr_gradient: float = 0.0,
	k_neighbours: int = 1,
	n_samples: int = 100,
	bias: int = 10,
	noise: float = 10,
	seed: int = 42,
	):
	"""Train a model with the supplied parameters.

	Generates a dataset with the bias, noise and seed parameters, and fits and scores the model.

	Returns
	-------
	score: float
	The R2 score of the model on the test set.
	"""
	X_train, X_test, y_train, y_test = make_train_test_data(
	n_samples=n_samples, bias=bias, noise=noise, seed=seed
	)

	if model_type == "linear_regression":
	model = LinearModel(intercept=lr_intercept, gradient=lr_gradient)
	elif model_type == "nearest_neighbour":
	model = NeighboursModel(k_neighbours=k_neighbours)
	else:
	raise ValueError(f"Unrecognised model_type: '{model_type}'")

	return model.fit(X_train, y_train).score(X_test, y_test)