Source code for pyml.neural_network.layer.transformation.convolutional

"""Convolutional layer for 1D, 2D and 3D
"""

from pyml.neural_network.layer.transformation import _Transformation, _TrainableTransformation
import numpy as np
import math

[docs]class Convolutional(_Transformation, _TrainableTransformation): """Convolutional layer for a neural network. This class implements a convolutional layer with options for various parameters like kernel shape, padding, and stride. It is used for performing convolution or cross-correlation operations. Parameters ---------- in_channels : int The number of input channels or depth. out_channels : int The number of output channels. kernel_shape : int or tuple[int,int] The shape of the convolutional kernel. If it's an integer, it's treated as a square kernel. If it's a tuple, it should specify the height and width of the kernel. conv_operation : str, optional The type of convolution operation, either 'convolution' or 'cross-correlation'. By default 'cross-correlation'. Not implemented yet. padding : int or tuple[int,int] or str, optional Padding to apply to the input. It can be an integer, a tuple of two integers for height and width padding, or one of the strings 'valid', 'full', or 'same'. By default 0. stride : int or tuple[int,int], optional The stride to use during convolution. It can be an integer or a tuple of two integers specifying the vertical and horizontal strides. By default 1. weight_regularizer_l1 : float, optional L1 regularization strength for kernel weights, by default 0. weight_regularizer_l2 : float, optional L2 regularization strength for kernel weights, by default 0. bias_regularizer_l1 : float, optional L1 regularization strength for biases, by default 0. bias_regularizer_l2 : float, optional L2 regularization strength for biases, by default 0. Attributes ---------- kernel_shape : tuple[int, int, int, int] The shape of the convolutional kernel in the format (output channels, input channels, height, width). flip_kernel : bool True if using cross-correlation, False if using convolution. """ def __init__( self, in_channels:int, out_channels:int, kernel_shape: int|tuple[int,int], conv_operation:str='cross-correlation', padding:int|tuple[int,int]|str=0, stride:int|tuple[int,int]=1, weight_regularizer_l1:float=0, weight_regularizer_l2:float=0, bias_regularizer_l1:float=0, bias_regularizer_l2:float=0, ) -> None: super().__init__() # Set channels # TODO : Remove variables later and access them only via kernel shape self.in_channels = in_channels self.out_channels = out_channels # Set kernel size # TODO : Remove kernel shape later and access its only trough kernel.shape if isinstance(kernel_shape, int): self.kernel_shape = (self.out_channels, self.in_channels, kernel_shape, kernel_shape) else: if len(kernel_shape) == 2: self.kernel_shape = (self.out_channels, self.in_channels, kernel_shape[0], kernel_shape[1]) self.num_kernels = self.out_channels # Set convolution operation type self.flip_kernel = False if conv_operation != 'convolution': self.flip_kernel = True # Set striding if isinstance(stride, int): self.stride = (stride, stride) else: self.stride = stride # Set padding if isinstance(padding, int): self.padding = (padding, padding) elif isinstance(padding, tuple): self.padding = padding elif padding == 'valid': self.padding = (0, 0) elif padding == 'full': self.padding = (self.kernel_height - 1, self.kernel_width - 1) elif padding == 'same': self.padding = ((self.kernel_height - 1) / 2, (self.kernel_width - 1) / 2) # Init weights (kernels) and biases sd = 1.0 / math.sqrt(self.kernel_shape[0]) self.weights = np.random.uniform(-sd, sd, self.kernel_shape) self.biases = np.random.uniform(-sd, sd, (self.out_channels, 1, 1)) # Set regularization # TODO : set regularization terms in parent class self.weight_regularizer_l1 = weight_regularizer_l1 self.weight_regularizer_l2 = weight_regularizer_l2 self.bias_regularizer_l1 = bias_regularizer_l1 self.bias_regularizer_l2 = bias_regularizer_l2
[docs] @staticmethod def apply_striding(X:np.ndarray, stride:int|tuple[int, int]=1) -> np.ndarray: """Performs striding on a matrix. Parameters ---------- X : numpy.ndarray Input matrix of the dimension (Number of Batches, Channels/Depth, Height, Width) stride : int or tuple[int, int], optional Specifies the iteration step size of rows and columns to keep. If set to 1, all columns and rows will be kept. Stride can either be a single int or a tuple, where the first parameter sets the stride for rows (height), and the second parameter sets the stride for columns (width). By default 1. Returns ------- numpy.ndarray Input matrix with applied stride. """ if isinstance(stride, int): vertical_stride = stride horizontal_stride = stride else: vertical_stride, horizontal_stride = stride if vertical_stride == 1 and horizontal_stride == 1: return X return X[..., ::vertical_stride, ::horizontal_stride]
[docs] def apply_padding(self, X:np.ndarray) -> np.ndarray: """Apply padding to a given input matrix. Parameters ---------- X : numpy.ndarray Input matrix of the shape (Number of Batches, Channels/Depth, Height, Width). Returns ------- numpy.ndarray Input matrix with applied padding. """ X_pad = np.pad( X, ((0, 0), (0, 0), (self.padding[0], self.padding[0]), (self.padding[1], self.padding[1])), 'constant', constant_values=0 ) return X_pad
[docs] def apply_kernel(self, X_slice:np.ndarray, kernel:np.ndarray, bias:np.ndarray) -> int: """Apply the convolution operation to a slice of the input using a given kernel and bias. Parameters ---------- X_slice : numpy.ndarray A slice of the input matrix that matches the shape of the convolutional kernel. kernel : numpy.ndarray The convolutional kernel. bias : numpy.ndarray The bias associated with the kernel. Returns ------- int The result of applying the convolution operation to the input slice using the given kernel and bias. """ s = np.multiply(X_slice, kernel) + bias s = np.sum(s) return s
[docs] def forward(self, inputs:np.ndarray) -> None: """Perform the forward pass through the convolutional layer. Parameters ---------- inputs : numpy.ndarray Input data of the shape (Number of Batches, Channels/Depth, Height, Width). """ self.inputs = inputs (in_num_batches, in_channels, in_height, in_width) = inputs.shape assert in_channels == self.in_channels, 'Number of channels of input does not match number of specified number of channels' vertical_padding, horizontal_padding = self.padding vertical_stride, horizontal_stride = self.stride _, _, kernel_height, kernel_width = self.kernel_shape out_height = int((in_height - self.kernel_shape[2] + 2 * vertical_padding) / vertical_stride) + 1 out_width = int((in_width - self.kernel_shape[3] + 2 * horizontal_padding) / horizontal_stride) + 1 output = np.zeros((in_num_batches, self.out_channels, out_height, out_width)) X_pad = self.apply_padding(inputs) # Iterate through batches for i in range(in_num_batches): # Iterate through output channels for c in range(self.out_channels): # Iterate through output height for h in range(out_height): # Iterate through output witdh for w in range(out_width): vertical_start = h * vertical_stride vertical_end = vertical_start + kernel_height horizontal_start = w * horizontal_stride horizontal_end = horizontal_start + kernel_width X_slice = X_pad[i, :, vertical_start:vertical_end, horizontal_start:horizontal_end] output[i, c, h, w] = self.apply_kernel(X_slice, self.weights[c, ...], self.biases[c, ...]) self.output = output
[docs] def backward(self, dvalues:np.ndarray) -> None: """Perform the backward pass through the convolutional layer. Parameters ---------- dvalues : numpy.ndarray The gradients of the loss with respect to the layer's output. """ (in_num_batches, in_channels, in_height, in_width) = self.inputs.shape (out_num_batches, out_channels, out_height, out_width) = dvalues.shape assert in_num_batches == out_num_batches, 'Batch sizes of inputs and gradients are not equal' vertical_padding, horizontal_padding = self.padding vertical_stride, horizontal_stride = self.stride _, _, kernel_height, kernel_width = self.kernel_shape # Init gradients dweights = np.zeros(self.kernel_shape) dbiases = np.zeros(self.biases.shape) dinputs = np.zeros(self.inputs.shape) # Add padding X_pad = self.apply_padding(self.inputs) dinputs_pad = self.apply_padding(dinputs) # Iterate through batches for i in range(out_num_batches): i_X_pad = X_pad[i] i_dinputs_pad = dinputs_pad[i] # Iterate through gradient channels for c in range(out_channels): # Iterate through gradient height for h in range(out_height): # Iterate through gradient witdh for w in range(out_width): vertical_start = h * vertical_stride vertical_end = vertical_start + kernel_height horizontal_start = w * horizontal_stride horizontal_end = horizontal_start + kernel_width x_slice = i_X_pad[:, vertical_start:vertical_end, horizontal_start:horizontal_end] i_dinputs_pad[:, vertical_start:vertical_end, horizontal_start:horizontal_end] += self.weights[c, ...] * dvalues[i, c, h, w] dweights[c, ...] += x_slice * dvalues[i, c, h, w] dbiases[c, ...] += dvalues[i, c, h, w] # TODO : Optimize if vertical_padding == 0 and horizontal_padding == 0: dinputs[i, :, :, :] = i_dinputs_pad elif vertical_padding == 0 and horizontal_padding != 0: dinputs[i, :, :, :] = i_dinputs_pad[:, :, horizontal_padding:-horizontal_padding] elif vertical_padding != 0 and horizontal_padding == 0: dinputs[i, :, :, :] = i_dinputs_pad[:, vertical_padding:-vertical_padding, :] else: dinputs[i, :, :, :] = i_dinputs_pad[:, vertical_padding:-vertical_padding, horizontal_padding:-horizontal_padding] self.dweights = dweights self.dbiases = dbiases self.dinputs = dinputs
[docs] def get_parameters(self) -> tuple[np.ndarray, np.ndarray]: """Return parameters Returns ------- tuple[numpy.ndarray, numpy.ndarray] Returns on first index the weights (kernel) and on second index the biases """ return self.weights, self.biases
[docs] def set_parameters(self, weights:np.ndarray, biases:np.ndarray) -> None: """Sets the parameters for this layer Parameters ---------- weights : numpy.ndarray Weights (kernel) of this layer. biases : numpy.ndarray Biases of this layer. """ self.weights = weights self.biases = biases