Source code for DLL.MachineLearning.SupervisedLearning.NaiveBayes._MultinomialNaiveBayes

import torch

from ....Exceptions import NotFittedError


[docs] class MultinomialNaiveBayes: """ The MultinomialNaiveBayes classifier model. Applies the Bayes theorem to classify samples with positive integer features. Attributes: n_features (int): The number of features. Available after fitting. n_classes (int): The number of classes. Available after fitting. """
[docs] def fit(self, X, y, alpha=1): """ Fits the MultinomialNaiveBayes model to the input data by calculating the prior probabilities and likelihoods. Args: X (torch.Tensor of shape (n_samples, n_features)): The input data, where each row is a sample and each column is a feature. Must contain only non-negative integers. y (torch.Tensor of shape (n_samples,)): The labels corresponding to each sample. Every element must be in [0, ..., n_classes - 1]. alpha (float | int): Laplacian smoothing parameter. Must be non-negative. For no smoothing, alpha is set to zero. Defaults to 1. Returns: None Raises: TypeError: If the input matrix or the label matrix is not a PyTorch tensor. ValueError: If the input matrix or the label matrix is not the correct shape. """ if not isinstance(X, torch.Tensor) or not isinstance(y, torch.Tensor): raise TypeError("The input matrix and the label matrix must be PyTorch tensors.") if X.ndim != 2: raise ValueError("The input matrix must be a 2 dimensional tensor.") if y.ndim != 1 or y.shape[0] != X.shape[0]: raise ValueError("The labels must be 1 dimensional with the same number of samples as the input data") vals = torch.unique(y).numpy() if set(vals) != {*range(len(vals))}: raise ValueError("y must only contain the values in [0, ..., n_classes - 1].") if (X.dtype == torch.int or X.dtype == torch.int8 or X.dtype == torch.int16 or X.dtype == torch.int32 or X.dtype == torch.int64) and torch.all(X >= 0): raise ValueError("The features must be non-negative integers. If your features are not non-negative integers, consider other NaiveBayes models.") if not isinstance(alpha, int | float) or alpha < 0: raise ValueError("alpha must be a non-negative real number.") self.n_classes = len(vals) self.n_features = X.shape[1] self.priors = torch.zeros((self.n_classes,), dtype=torch.float32) self.likelihoods = torch.zeros((self.n_classes, X.shape[1]), dtype=torch.float32) for i in range(self.n_classes): X_cls = X[y == i] self.priors[i] = len(X_cls) / len(y) self.likelihoods[i] = (X_cls.sum(dim=0) + alpha) / (X_cls.sum() + alpha * X.shape[1]) # laplace smoothing
[docs] def predict(self, X): """ Applies the fitted MultinomialNaiveBayes model to the input data, predicting the labels. Args: X (torch.Tensor of shape (n_samples, n_features)): The input data to be classified. Returns: labels (torch.Tensor of shape (n_samples,)): The predicted target values corresponding to each sample. Raises: NotFittedError: If the MultinomialNaiveBayes model has not been fitted before predicting. TypeError: If the input matrix is not a PyTorch tensor. ValueError: If the input matrix is not the correct shape. """ if not hasattr(self, "priors"): raise NotFittedError("MultinomialNaiveBayes.fit() must be called before predicting.") if not isinstance(X, torch.Tensor): raise TypeError("The input matrix must be a PyTorch tensor.") if X.ndim != 2 or X.shape[1] != self.n_features: raise ValueError("The input matrix must be a 2 dimensional tensor with the same number of features as the fitted tensor.") if (X.dtype == torch.int or X.dtype == torch.int8 or X.dtype == torch.int16 or X.dtype == torch.int32 or X.dtype == torch.int64) and torch.all(X >= 0): raise ValueError("The features must be non-negative integers. If your features are not non-negative integers, consider other NaiveBayes models.") posteriors = torch.zeros((self.n_classes, len(X)), dtype=torch.float32) for i in range(self.n_classes): prior = torch.log(self.priors[i]) posterior = (X * torch.log(self.likelihoods[i])).sum(dim=1) + prior posteriors[i] = posterior return torch.argmax(posteriors, dim=0)
[docs] def predict_proba(self, X): """ Applies the fitted MultinomialNaiveBayes model to the input data, predicting the labels. Args: X (torch.Tensor of shape (n_samples, n_features)): The input data to be classified. Returns: labels (torch.Tensor of shape (n_samples,)): The predicted target values corresponding to each sample. Raises: NotFittedError: If the MultinomialNaiveBayes model has not been fitted before predicting. TypeError: If the input matrix is not a PyTorch tensor. ValueError: If the input matrix is not the correct shape. """ if not hasattr(self, "priors"): raise NotFittedError("MultinomialNaiveBayes.fit() must be called before predicting.") if not isinstance(X, torch.Tensor): raise TypeError("The input matrix must be a PyTorch tensor.") if X.ndim != 2 or X.shape[1] != self.n_features: raise ValueError("The input matrix must be a 2 dimensional tensor with the same number of features as the fitted tensor.") if (X.dtype == torch.int or X.dtype == torch.int8 or X.dtype == torch.int16 or X.dtype == torch.int32 or X.dtype == torch.int64) and torch.all(X >= 0): raise ValueError("The features must be non-negative integers. If your features are not non-negative integers, consider other NaiveBayes models.") posteriors = torch.zeros((self.n_classes, len(X)), dtype=torch.float32) for i in range(self.n_classes): prior = torch.log(self.priors[i]) posterior = (X * torch.log(self.likelihoods[i])).sum(dim=1) + prior posteriors[i] = posterior prob_normalizers = torch.logsumexp(posteriors, dim=0) log_probs = posteriors - prob_normalizers probs = torch.exp(log_probs).T if self.n_classes == 2: # binary classification probs = probs[:, 1] return probs