Source code for DLL.DeepLearning.Initialisers._Kaiming_He
import torch
from math import sqrt
from ._Initialiser import Initialiser
[docs]
class Kaiming_Uniform(Initialiser):
"""
The Kaiming He uniform initialiser. Kaiming He initialiser should be used for the ReLU or any other activation, which is nonlinear close to origin.
Args:
mode (str, optional): Determines if the variance is constant in the forward or backward propagation. If "input_dim", variance is constant in forward propagation, while if "output_dim", the variance is constant in back propagation. Defaults to "input_dim".
"""
def __init__(self, mode="input_dim"):
if mode not in ["input_dim", "output_dim"]:
raise ValueError('mode must be one of "input_dim" or "output_dim".')
self.mode = mode
[docs]
def initialise(self, shape, data_type=torch.float32, device=torch.device("cpu")):
"""
Initialises a tensor of the wanted shape with values in :math:`U(-a, a)`, where :math:`a = \\sqrt{\\frac{6}{d}}`.
Args:
shape (torch.Size): The shape of the wanted tensor.
data_type (torch.dtype, optional): The data type used in the returned tensor. Defaults to torch.float32.
device (torch.device, optional): The device of the tensor. Determines if the computation is made using the gpu or the cpu. Defaults to torch.device("cpu").
"""
input_dim, output_dim = self._get_dims(shape)
a = sqrt(6/(input_dim if self.mode == "input_dim" else output_dim))
return 2 * a * torch.rand(size=shape, dtype=data_type, device=device) - a
[docs]
class Kaiming_Normal(Initialiser):
"""
The Kaiming He normal initialiser. Kaiming He initialiser should be used for the ReLU or any other activation, which is nonlinear close to origin.
Args:
mode (str, optional): Determines if the variance is constant in the forward or backward propagation. If "input_dim", variance is constant in forward propagation, while if "output_dim", the variance is constant in back propagation. Defaults to "input_dim".
"""
def __init__(self, mode="input_dim"):
if mode not in ["input_dim", "output_dim"]:
raise ValueError('mode must be one of "input_dim" or "output_dim".')
self.mode = mode
[docs]
def initialise(self, shape, data_type=torch.float32, device=torch.device("cpu")):
"""
Initialises a tensor of the wanted shape with values in :math:`N(0, \\sigma^2)`, where :math:`\\sigma = \\sqrt{\\frac{2}{d_{\\text{input}} + d_{\\text{output}}}}`.
Args:
shape (torch.Size): The shape of the wanted tensor.
data_type (torch.dtype, optional): The data type used in the returned tensor. Defaults to torch.float32.
device (torch.device, optional): The device of the tensor. Determines if the computation is made using the gpu or the cpu. Defaults to torch.device("cpu").
"""
input_dim, output_dim = self._get_dims(shape)
return torch.normal(mean=0, std=sqrt(2/(input_dim if self.mode == "input_dim" else output_dim)), size=shape, dtype=data_type, device=device)