Source code for DLL.DeepLearning.Layers._Bidirectional

import torch
from copy import deepcopy

from ._BaseLayer import BaseLayer
from . import RNN, LSTM
from ...Exceptions import NotCompiledError


[docs] class Bidirectional(BaseLayer): """ The bidirectional wrapper for LSTM or RNN layers. Args: layer (DLL.DeepLearning.Layers.RNN or LSTM object): The input is passed to this layer in forward and reverse. The results of each layer are concatanated together along the feature axis. """ def __init__(self, layer, **kwargs): if not isinstance(layer, RNN) and not isinstance(layer, LSTM): raise TypeError("layer must be an instance of DLL.DeepLearning.Layers.RNN or LSTM") # Change the layers output shape to have at least 1 dimensional features. if len(layer.output_shape) == 0 or (len(layer.output_shape) == 1 and not layer.return_last): layer.output_shape = (*layer.output_shape, 1) output_shape = (*layer.output_shape[:-1], 2 * layer.output_shape[-1]) super().__init__(output_shape, **kwargs) self.name = "Bidirectional" self.forward_layer = layer self.backward_layer = deepcopy(layer) def initialise_layer(self, input_shape, data_type, device): """ :meta private: """ if not isinstance(input_shape, tuple | list) or len(input_shape) != 2: raise ValueError("input_shape must be a tuple of length 2.") if not isinstance(data_type, torch.dtype): raise TypeError("data_type must be an instance of torch.dtype") if not isinstance(device, torch.device): raise TypeError('device must be one of torch.device("cpu") or torch.device("cuda")') self.forward_layer.initialise_layer(input_shape, data_type, device) self.backward_layer.initialise_layer(input_shape, data_type, device) super().initialise_layer(input_shape, data_type, device)
[docs] def forward(self, input, training=False, **kwargs): """ Computes the forward values of the RNN or LSTM layer for both normal input and reverse input and concatanates the results along the feature axis. Args: input (torch.Tensor of shape (batch_size, sequence_length, input_size)): The input to the layer. Must be a torch.Tensor of the spesified shape given by layer.input_shape. training (bool, optional): The boolean flag deciding if the model is in training mode. Defaults to False. Returns: torch.Tensor of shape (n_samples, 2 * RNN.output_shape[-1]) or (n_samples, sequence_length, 2 * RNN.output_shape[-1]): The output tensor after the transformations with the spesified shape. """ if not isinstance(input, torch.Tensor): raise TypeError("input must be a torch.Tensor.") if input.shape[1:] != self.input_shape: raise ValueError(f"Input shape {input.shape[1:]} does not match the expected shape {self.input_shape}.") if not isinstance(training, bool): raise TypeError("training must be a boolean.") forward_val = self.forward_layer.forward(input, training=training) backward_val = self.backward_layer.forward(input.flip(1), training=training) if not self.backward_layer.return_last: backward_val = backward_val.flip(1) self.output = torch.cat((forward_val, backward_val), dim=-1) return self.output
[docs] def backward(self, dCdy, **kwargs): """ Calculates the gradient of the loss function with respect to the input of the layer. Also calculates the gradients of the loss function with respect to the model parameters. Args: dCdy (torch.Tensor of the same shape as returned from the forward method): The gradient given by the next layer. Returns: torch.Tensor of shape (n_samples, sequence_length, input_size): The new gradient after backpropagation through the layer. """ if not isinstance(dCdy, torch.Tensor): raise TypeError("dCdy must be a torch.Tensor.") if dCdy.shape[1:] != self.output.shape[1:]: raise ValueError(f"dCdy is not the same shape as the spesified output_shape ({dCdy.shape[1:], self.output.shape[1:]}).") forward_grad = dCdy[..., :self.output_shape[-1] // 2] backward_grad = dCdy[..., self.output_shape[-1] // 2:] dCdx_forward = self.forward_layer.backward(forward_grad, **kwargs) dCdx_backward = self.backward_layer.backward(backward_grad.flip(1), **kwargs) if not self.backward_layer.return_last: dCdx_backward = dCdx_backward.flip(1) dCdx = dCdx_backward + dCdx_forward return dCdx
def get_parameters(self): """ :meta private: """ return (*self.forward_layer.get_parameters(), *self.backward_layer.get_parameters(), *super().get_parameters())
[docs] def get_nparams(self): return self.forward_layer.get_nparams() + self.backward_layer.get_nparams()
[docs] def summary(self, offset=""): if not hasattr(self, "input_shape"): raise NotCompiledError("layer must be initialized correctly before calling layer.summary().") super_summary = offset + f"{self.name} - (Input, Output): ({self.input_shape}, {self.output_shape})" sublayer_offset = offset + " " forward_summary = "\n" + offset + self.forward_layer.summary(sublayer_offset) backward_summary = "\n" + offset + self.backward_layer.summary(sublayer_offset) return super_summary + forward_summary + backward_summary