Source code for DLL.DeepLearning.Layers._Bidirectional

import torch
from copy import deepcopy

from ._BaseLayer import BaseLayer
from . import RNN, LSTM
from ...Exceptions import NotCompiledError



[docs]
class Bidirectional(BaseLayer):
    """
    The bidirectional wrapper for LSTM or RNN layers.

    Args:
        layer (DLL.DeepLearning.Layers.RNN or LSTM object): The input is passed to this layer in forward and reverse. The results of each layer are concatanated together along the feature axis.
    """
    def __init__(self, layer, **kwargs):
        if not isinstance(layer, RNN) and not isinstance(layer, LSTM):
            raise TypeError("layer must be an instance of DLL.DeepLearning.Layers.RNN or LSTM")

        # Change the layers output shape to have at least 1 dimensional features.
        if len(layer.output_shape) == 0 or (len(layer.output_shape) == 1 and not layer.return_last):
            layer.output_shape = (*layer.output_shape, 1)
        output_shape = (*layer.output_shape[:-1], 2 * layer.output_shape[-1])

        super().__init__(output_shape, **kwargs)
        self.name = "Bidirectional"
        self.forward_layer = layer
        self.backward_layer = deepcopy(layer)

    def initialise_layer(self, input_shape, data_type, device):
        """
        :meta private:
        """
        if not isinstance(input_shape, tuple | list) or len(input_shape) != 2:
            raise ValueError("input_shape must be a tuple of length 2.")
        if not isinstance(data_type, torch.dtype):
            raise TypeError("data_type must be an instance of torch.dtype")
        if not isinstance(device, torch.device):
            raise TypeError('device must be one of torch.device("cpu") or torch.device("cuda")')
    
        self.forward_layer.initialise_layer(input_shape, data_type, device)
        self.backward_layer.initialise_layer(input_shape, data_type, device)

        super().initialise_layer(input_shape, data_type, device)


[docs]
    def forward(self, input, training=False, **kwargs):
        """
        Computes the forward values of the RNN or LSTM layer for both normal input and reverse input and concatanates the results along the feature axis.

        Args:
            input (torch.Tensor of shape (batch_size, sequence_length, input_size)): The input to the layer. Must be a torch.Tensor of the spesified shape given by layer.input_shape.
            training (bool, optional): The boolean flag deciding if the model is in training mode. Defaults to False.

        Returns:
            torch.Tensor of shape (n_samples, 2 * RNN.output_shape[-1]) or (n_samples, sequence_length, 2 * RNN.output_shape[-1]): The output tensor after the transformations with the spesified shape.
        """
        if not isinstance(input, torch.Tensor):
            raise TypeError("input must be a torch.Tensor.")
        if input.shape[1:] != self.input_shape:
            raise ValueError(f"Input shape {input.shape[1:]} does not match the expected shape {self.input_shape}.")
        if not isinstance(training, bool):
            raise TypeError("training must be a boolean.")

        forward_val = self.forward_layer.forward(input, training=training)
        backward_val = self.backward_layer.forward(input.flip(1), training=training)
        if not self.backward_layer.return_last:
            backward_val = backward_val.flip(1)
        self.output = torch.cat((forward_val, backward_val), dim=-1)
        return self.output



[docs]
    def backward(self, dCdy, **kwargs):
        """
        Calculates the gradient of the loss function with respect to the input of the layer. Also calculates the gradients of the loss function with respect to the model parameters.

        Args:
            dCdy (torch.Tensor of the same shape as returned from the forward method): The gradient given by the next layer.

        Returns:
            torch.Tensor of shape (n_samples, sequence_length, input_size): The new gradient after backpropagation through the layer.
        """
        if not isinstance(dCdy, torch.Tensor):
            raise TypeError("dCdy must be a torch.Tensor.")
        if dCdy.shape[1:] != self.output.shape[1:]:
            raise ValueError(f"dCdy is not the same shape as the spesified output_shape ({dCdy.shape[1:], self.output.shape[1:]}).")
        
        forward_grad = dCdy[..., :self.output_shape[-1] // 2]
        backward_grad = dCdy[..., self.output_shape[-1] // 2:]
        dCdx_forward = self.forward_layer.backward(forward_grad, **kwargs)
        dCdx_backward = self.backward_layer.backward(backward_grad.flip(1), **kwargs)
        if not self.backward_layer.return_last:
            dCdx_backward = dCdx_backward.flip(1)

        dCdx = dCdx_backward + dCdx_forward
        return dCdx

    
    def get_parameters(self):
        """
        :meta private:
        """
        return (*self.forward_layer.get_parameters(), *self.backward_layer.get_parameters(), *super().get_parameters())


[docs]
    def get_nparams(self):
        return self.forward_layer.get_nparams() + self.backward_layer.get_nparams()

    

[docs]
    def summary(self, offset=""):
        if not hasattr(self, "input_shape"):
            raise NotCompiledError("layer must be initialized correctly before calling layer.summary().")

        super_summary = offset + f"{self.name} - (Input, Output): ({self.input_shape}, {self.output_shape})"
        sublayer_offset = offset + "    "
        forward_summary = "\n" + offset + self.forward_layer.summary(sublayer_offset)
        backward_summary = "\n" + offset + self.backward_layer.summary(sublayer_offset)
        return super_summary + forward_summary + backward_summary