Source code for DLL.DeepLearning.Initialisers._Kaiming_He

import torch
from math import sqrt

from ._Initialiser import Initialiser


[docs] class Kaiming_Uniform(Initialiser): """ The Kaiming He uniform initialiser. Kaiming He initialiser should be used for the ReLU or any other activation, which is nonlinear close to origin. Args: mode (str, optional): Determines if the variance is constant in the forward or backward propagation. If "input_dim", variance is constant in forward propagation, while if "output_dim", the variance is constant in back propagation. Defaults to "input_dim". """ def __init__(self, mode="input_dim"): if mode not in ["input_dim", "output_dim"]: raise ValueError('mode must be one of "input_dim" or "output_dim".') self.mode = mode
[docs] def initialise(self, shape, data_type=torch.float32, device=torch.device("cpu")): """ Initialises a tensor of the wanted shape with values in :math:`U(-a, a)`, where :math:`a = \\sqrt{\\frac{6}{d}}`. Args: shape (torch.Size): The shape of the wanted tensor. data_type (torch.dtype, optional): The data type used in the returned tensor. Defaults to torch.float32. device (torch.device, optional): The device of the tensor. Determines if the computation is made using the gpu or the cpu. Defaults to torch.device("cpu"). """ input_dim, output_dim = self._get_dims(shape) a = sqrt(6/(input_dim if self.mode == "input_dim" else output_dim)) return 2 * a * torch.rand(size=shape, dtype=data_type, device=device) - a
[docs] class Kaiming_Normal(Initialiser): """ The Kaiming He normal initialiser. Kaiming He initialiser should be used for the ReLU or any other activation, which is nonlinear close to origin. Args: mode (str, optional): Determines if the variance is constant in the forward or backward propagation. If "input_dim", variance is constant in forward propagation, while if "output_dim", the variance is constant in back propagation. Defaults to "input_dim". """ def __init__(self, mode="input_dim"): if mode not in ["input_dim", "output_dim"]: raise ValueError('mode must be one of "input_dim" or "output_dim".') self.mode = mode
[docs] def initialise(self, shape, data_type=torch.float32, device=torch.device("cpu")): """ Initialises a tensor of the wanted shape with values in :math:`N(0, \\sigma^2)`, where :math:`\\sigma = \\sqrt{\\frac{2}{d_{\\text{input}} + d_{\\text{output}}}}`. Args: shape (torch.Size): The shape of the wanted tensor. data_type (torch.dtype, optional): The data type used in the returned tensor. Defaults to torch.float32. device (torch.device, optional): The device of the tensor. Determines if the computation is made using the gpu or the cpu. Defaults to torch.device("cpu"). """ input_dim, output_dim = self._get_dims(shape) return torch.normal(mean=0, std=sqrt(2/(input_dim if self.mode == "input_dim" else output_dim)), size=shape, dtype=data_type, device=device)