Source code for DLL.MachineLearning.UnsupervisedLearning.DimensionalityReduction._PCA
import torch
from ....Exceptions import NotFittedError
[docs]
class PCA:
"""
Principal Component Analysis (PCA) class for dimensionality reduction.
Args:
n_components (int): Number of principal components to keep. The number must be a positive integer.
Attributes:
components (torch.Tensor): Principal components extracted from the data.
explained_variance (torch.Tensor): Variance explained by the selected components.
"""
def __init__(self, n_components=2, epsilon=1e-10):
if not isinstance(n_components, int) or n_components < 1:
raise ValueError("n_components must be a positive integer.")
self.n_components = n_components
self.epsilon = epsilon
[docs]
def fit(self, X, normalize=True):
"""
Fits the PCA model to the input data by calculating the principal components.
The input data is always centered and if `normalize=True`, also normalized so that the standard deviation is 1 along each axis.
Args:
X (torch.Tensor of shape (n_samples, n_features)): The input data, where each row is a sample and each column is a feature.
normalize (bool, optional): Whether to normalize the data before computing the PCA. Defaults to True.
Returns:
None
Raises:
TypeError: If the input matrix is not a PyTorch tensor or if the `normalize` parameter is not boolean.
ValueError: If the input matrix is not the correct shape.
"""
if not isinstance(X, torch.Tensor):
raise TypeError("The input matrix must be a PyTorch tensor.")
if not isinstance(normalize, bool):
raise TypeError("The normalize parameter must be a boolean.")
if X.ndim != 2 or X.shape[0] == 1:
raise ValueError("The input matrix must be a 2 dimensional tensor with atleast 2 samples.")
self.normalize = normalize
self.mean = X.mean(dim=0)
X = (X - self.mean)
if self.normalize:
self.standard_deviation = X.std(dim=0, unbiased=True)
X = X / (self.standard_deviation + self.epsilon)
covariance = torch.cov(X.T)
eig_vals, eig_vecs = torch.linalg.eig(covariance)
indicies = torch.argsort(eig_vals.real, descending=True)
self.components = eig_vecs.real.T[indicies][:self.n_components]
self.explained_variance = eig_vals.real[indicies][:self.n_components]
[docs]
def transform(self, X):
"""
Applies the fitted PCA model to the input data, transforming it into the reduced feature space.
Args:
X (torch.Tensor of shape (n_samples, n_features)): The input data to be transformed.
Returns:
X_new (torch.Tensor of shape (n_samples, n_components)): The data transformed into the principal component space.
Raises:
NotFittedError: If the PCA model has not been fitted before transforming.
TypeError: If the input matrix is not a PyTorch tensor.
ValueError: If the input matrix is not the correct shape.
"""
if not hasattr(self, "mean"):
raise NotFittedError("PCA.fit() must be called before transforming.")
if not isinstance(X, torch.Tensor):
raise TypeError("The input matrix must be a PyTorch tensor.")
if X.ndim != 2 or X.shape[1] != len(self.mean):
raise ValueError("The input matrix must be a 2 dimensional tensor with the same number of features as the fitted tensor.")
X = (X - self.mean)
if self.normalize:
X = X / (self.standard_deviation + self.epsilon)
return X @ self.components.T
[docs]
def fit_transform(self, X, normalize=True):
"""
First finds the principal components of X and then transforms X to fitted space.
Args:
X (torch.Tensor of shape (n_samples, n_features)): The input data to be transformed.
normalize (bool, optional): Whether to normalize the data before computing the PCA. Defaults to True.
Returns:
X_new (torch.Tensor of shape (n_samples, n_components)): The data transformed into the principal component space.
"""
self.fit(X, normalize=normalize)
return self.transform(X)