Source code for DLL.MachineLearning.SupervisedLearning.GaussianProcesses._GaussianProcessRegressor

import torch
from math import floor

from ..Kernels import _Base
from ....DeepLearning.Optimisers import ADAM
from ....DeepLearning.Optimisers._BaseOptimiser import BaseOptimiser
from ....Exceptions import NotFittedError
from ....Data.Metrics import _round_dictionary


[docs] class GaussianProcessRegressor: """ Implements the Gaussian process regression model. Args: covariance_function (:ref:`kernel_section_label`, optional): The kernel function expressing how similar are different samples. noise (int | float, optional): The artificially added noise to the model. Is added as variance to each sample. Must be non-negative. Defaults to 0. epsilon (float, optional): Implemented similarly to noise. Makes sure the covariance matrix is positive definite and hence invertible. Must be positive. Defaults to 1e-5. If one gets a RunTimeError for a matrix not being invertible, one should increase this parameter. device (torch.device, optional): The device of all matricies. Defaults to torch.device("cpu"). Attributes: n_features (int): The number of features. Available after fitting. """ def __init__(self, covariance_function, noise=0, epsilon=1e-5, device=torch.device("cpu")): if not isinstance(covariance_function, _Base): raise TypeError("covariance_function must be from DLL.MachineLearning.Supervisedlearning.Kernels.") if not isinstance(noise, int | float) or noise < 0: raise ValueError("noise must b non-negative.") if not isinstance(epsilon, int | float) or epsilon <= 0: raise ValueError("epsilon must be positive.") if not isinstance(device, torch.device): raise TypeError("device must be an instance of torch.device.") self.covariance_function = covariance_function self.noise = noise self.epsilon = epsilon self.device = device def _get_covariance_matrix(self, X1, X2): return self.covariance_function(X1, X2).to(X1.dtype).to(self.device)
[docs] def fit(self, X, y): """ Fits the GaussianProcessRegressor model to the input data. Args: X (torch.Tensor of shape (n_samples, n_features)): The input data, where each row is a sample and each column is a feature. y (torch.Tensor of shape (n_samples,)): The target values corresponding to each sample. Should be normalized to zero mean and one variance. Returns: None Raises: TypeError: If the input matrix or the target matrix is not a PyTorch tensor. ValueError: If the input matrix or the target matrix is not the correct shape. """ if not isinstance(X, torch.Tensor) or not isinstance(y, torch.Tensor): raise TypeError("The input matrix and the target matrix must be a PyTorch tensor.") if X.ndim != 2: raise ValueError("The input matrix must be a 2 dimensional tensor.") if y.ndim != 1 or len(y) != len(X): raise ValueError("The targets must be 1 dimensional with the same number of samples as the input data") if len(y) < 2: raise ValueError("There must be atleast 2 samples.") # variance = torch.var(y) # mean = torch.mean(y) # if not torch.allclose(variance, torch.ones_like(variance)): # raise ValueError("y must have one variance. Use DLL.Data.Preprocessing.StandardScaler to scale the data.") # if not torch.allclose(mean, torch.zeros_like(mean)): # raise ValueError("y must have zero mean. Use DLL.Data.Preprocessing.StandardScaler to scale the data.") self.n_features = X.shape[1] self.X = X self.Y = y.unsqueeze(dim=1) self.prior_covariance_matrix = self._get_covariance_matrix(X, X) + (self.noise + self.epsilon) * torch.eye(len(X), device=self.device) self.inverse_prior_covariance_matrix = torch.linalg.inv(self.prior_covariance_matrix)
[docs] def predict(self, X): """ Applies the fitted GaussianProcessRegressor model to the input data, predicting the correct values. Args: X (torch.Tensor of shape (n_samples, n_features)): The input data to be regressed. Returns: mean, covariance (tuple[torch.Tensor of shape (n_samples,), torch.Tensor of shape (n_samples, n_samples)): A tuple containing the posterior mean and posterior covariance. As the prediction, one should use the mean. Raises: NotFittedError: If the GaussianProcessRegressor model has not been fitted before predicting. TypeError: If the input matrix is not a PyTorch tensor. ValueError: If the input matrix is not the correct shape. """ if not hasattr(self, "inverse_prior_covariance_matrix"): raise NotFittedError("GaussianProcessRegressor.fit() must be called before predicting.") if not isinstance(X, torch.Tensor): raise TypeError("The input matrix must be a PyTorch tensor.") if X.ndim != 2 or X.shape[1] != self.n_features: raise ValueError("The input matrix must be a 2 dimensional tensor with the same number of features as the fitted tensor.") k_1 = self._get_covariance_matrix(self.X, X) k_2 = self._get_covariance_matrix(X, X) + (self.noise + self.epsilon) * torch.eye(len(X), device=self.device) mean = k_1.T @ self.inverse_prior_covariance_matrix @ self.Y posterior_covariance = k_2 - k_1.T @ self.inverse_prior_covariance_matrix @ k_1 return mean, posterior_covariance
[docs] def log_marginal_likelihood(self): """ Computes the log marginal likelihood of the current model. This value is used to optimize hyperparameters. Returns: log marginal likelihood (float): The log marginal likelihood of the current model. """ if not hasattr(self, "inverse_prior_covariance_matrix"): raise NotFittedError("GaussianProcessRegressor.fit() must be called before calculating the log marginal likelihood.") L = torch.linalg.cholesky(self.prior_covariance_matrix) alpha = torch.cholesky_solve(self.Y, L) lml = -0.5 * self.Y.T @ alpha - torch.sum(torch.log(torch.diagonal(L))) - 0.5 * len(self.Y) * torch.log(torch.tensor(2 * torch.pi, device=self.device)) return lml.item()
[docs] def train_kernel(self, epochs=10, optimiser=None, callback_frequency=1, verbose=False): """ Trains the current covariance function parameters by maximizing the log marginal likelihood. Args: epochs (int, optional): The number of optimisation rounds. Must be a positive integer. Defaults to 10. optimiser (:ref:`optimisers_section_label` | None, optional): The optimiser used for training the model. If None, the Adam optimiser is used. callback_frequency (int, optional): The number of iterations between printing info from training. Must be a positive integer. Defaults to 1, which means that every iteration, info is printed assuming verbose=True. verbose (bool, optional): If True, prints the log marginal likelihood of the model during training. Defaults to False. Returns: history (dict[str, torch.Tensor], the tensor is floor(epochs / callback_frequency) long.): A dictionary tracking the evolution of the log marginal likelihood at intervals defined by callback_frequency. The tensor can be accessed with history["log marginal likelihood"]. Raises: NotFittedError: If the GaussianProcessRegressor model has not been fitted before training the kernel. TypeError: If the parameters are of wrong type. ValueError: If epochs is not a positive integer. """ if not hasattr(self, "inverse_prior_covariance_matrix"): raise NotFittedError("GaussianProcessRegressor.fit() must be called before calculating the log marginal likelihood.") if not isinstance(epochs, int) or epochs <= 0: raise ValueError("epochs must be a positive integer.") if not isinstance(optimiser, BaseOptimiser) and optimiser is not None: raise TypeError("optimiser must be from DLL.DeepLearning.Optimisers") if not isinstance(verbose, bool): raise TypeError("verbose must be a boolean.") optimiser = ADAM() if optimiser is None else optimiser optimiser.initialise_parameters(list(self.covariance_function.parameters().values())) history = {"log marginal likelihood": torch.zeros(floor(epochs / callback_frequency))} for epoch in range(epochs): # form the derivative function def derivative(parameter_derivative): derivative = 0.5 * self.Y.T @ self.inverse_prior_covariance_matrix @ parameter_derivative @ self.inverse_prior_covariance_matrix @ self.Y if parameter_derivative.ndim == 2: derivative = derivative.squeeze(1) derivative -= 0.5 * torch.trace(self.inverse_prior_covariance_matrix @ parameter_derivative) else: derivative = derivative.squeeze((1, 2)) derivative -= 0.5 * (self.inverse_prior_covariance_matrix @ parameter_derivative).diagonal(dim1=1, dim2=2).sum(dim=-1) # minus sign, since goal is to maximise the log_marginal_likelihood return -derivative # calculate the derivatives self.covariance_function.update(derivative, self.X) # update the parameters optimiser.update_parameters() self.prior_covariance_matrix = self._get_covariance_matrix(self.X, self.X) + (self.noise + self.epsilon) * torch.eye(len(self.X), device=self.device) self.inverse_prior_covariance_matrix = torch.linalg.inv(self.prior_covariance_matrix) if epoch % callback_frequency == 0: lml = self.log_marginal_likelihood() history["log marginal likelihood"][int(epoch / callback_frequency)] = lml if verbose: params = {} for key, param in self.covariance_function.parameters().items(): if len(param) == 1: params[key] = round(param.item(), 3) continue for i, param_ in enumerate(param): params[key + "_" + str(i + 1)] = round(param_.item(), 3) print(f"Epoch: {epoch + 1} - Log marginal likelihood: {lml} - Parameters: {params}") return history
# def is_positive_definite(matrix): # print("-----------------") # if not torch.allclose(matrix, matrix.T): # print(f"Matrix not symmetric: {torch.linalg.norm(matrix.T - matrix)}") # eigenvalues = torch.linalg.eigvalsh(matrix) # print(f"Minimum eigenvalue: {torch.min(eigenvalues).item()}")