"""Convolutional layer for 1D, 2D and 3D
"""
from pyml.neural_network.layer.transformation import _Transformation, _TrainableTransformation
import numpy as np
import math
[docs]class Convolutional(_Transformation, _TrainableTransformation):
"""Convolutional layer for a neural network.
This class implements a convolutional layer with options for various parameters like kernel shape,
padding, and stride.
It is used for performing convolution or cross-correlation operations.
Parameters
----------
in_channels : int
The number of input channels or depth.
out_channels : int
The number of output channels.
kernel_shape : int or tuple[int,int]
The shape of the convolutional kernel.
If it's an integer, it's treated as a square kernel.
If it's a tuple, it should specify the height and width of the kernel.
conv_operation : str, optional
The type of convolution operation, either 'convolution' or 'cross-correlation'.
By default 'cross-correlation'.
Not implemented yet.
padding : int or tuple[int,int] or str, optional
Padding to apply to the input.
It can be an integer, a tuple of two integers for height and width padding,
or one of the strings 'valid', 'full', or 'same'.
By default 0.
stride : int or tuple[int,int], optional
The stride to use during convolution.
It can be an integer or a tuple of two integers specifying the vertical and horizontal strides.
By default 1.
weight_regularizer_l1 : float, optional
L1 regularization strength for kernel weights, by default 0.
weight_regularizer_l2 : float, optional
L2 regularization strength for kernel weights, by default 0.
bias_regularizer_l1 : float, optional
L1 regularization strength for biases, by default 0.
bias_regularizer_l2 : float, optional
L2 regularization strength for biases, by default 0.
Attributes
----------
kernel_shape : tuple[int, int, int, int]
The shape of the convolutional kernel in the format (output channels, input channels, height, width).
flip_kernel : bool
True if using cross-correlation, False if using convolution.
"""
def __init__(
self,
in_channels:int,
out_channels:int,
kernel_shape: int|tuple[int,int],
conv_operation:str='cross-correlation',
padding:int|tuple[int,int]|str=0,
stride:int|tuple[int,int]=1,
weight_regularizer_l1:float=0,
weight_regularizer_l2:float=0,
bias_regularizer_l1:float=0,
bias_regularizer_l2:float=0,
) -> None:
super().__init__()
# Set channels
# TODO : Remove variables later and access them only via kernel shape
self.in_channels = in_channels
self.out_channels = out_channels
# Set kernel size
# TODO : Remove kernel shape later and access its only trough kernel.shape
if isinstance(kernel_shape, int):
self.kernel_shape = (self.out_channels, self.in_channels, kernel_shape, kernel_shape)
else:
if len(kernel_shape) == 2:
self.kernel_shape = (self.out_channels, self.in_channels, kernel_shape[0], kernel_shape[1])
self.num_kernels = self.out_channels
# Set convolution operation type
self.flip_kernel = False
if conv_operation != 'convolution':
self.flip_kernel = True
# Set striding
if isinstance(stride, int):
self.stride = (stride, stride)
else:
self.stride = stride
# Set padding
if isinstance(padding, int):
self.padding = (padding, padding)
elif isinstance(padding, tuple):
self.padding = padding
elif padding == 'valid':
self.padding = (0, 0)
elif padding == 'full':
self.padding = (self.kernel_height - 1, self.kernel_width - 1)
elif padding == 'same':
self.padding = ((self.kernel_height - 1) / 2, (self.kernel_width - 1) / 2)
# Init weights (kernels) and biases
sd = 1.0 / math.sqrt(self.kernel_shape[0])
self.weights = np.random.uniform(-sd, sd, self.kernel_shape)
self.biases = np.random.uniform(-sd, sd, (self.out_channels, 1, 1))
# Set regularization
# TODO : set regularization terms in parent class
self.weight_regularizer_l1 = weight_regularizer_l1
self.weight_regularizer_l2 = weight_regularizer_l2
self.bias_regularizer_l1 = bias_regularizer_l1
self.bias_regularizer_l2 = bias_regularizer_l2
[docs] @staticmethod
def apply_striding(X:np.ndarray, stride:int|tuple[int, int]=1) -> np.ndarray:
"""Performs striding on a matrix.
Parameters
----------
X : numpy.ndarray
Input matrix of the dimension (Number of Batches, Channels/Depth, Height, Width)
stride : int or tuple[int, int], optional
Specifies the iteration step size of rows and columns to keep.
If set to 1, all columns and rows will be kept.
Stride can either be a single int or a tuple, where the first parameter sets
the stride for rows (height), and the second parameter sets the stride for columns (width).
By default 1.
Returns
-------
numpy.ndarray
Input matrix with applied stride.
"""
if isinstance(stride, int):
vertical_stride = stride
horizontal_stride = stride
else:
vertical_stride, horizontal_stride = stride
if vertical_stride == 1 and horizontal_stride == 1:
return X
return X[..., ::vertical_stride, ::horizontal_stride]
[docs] def apply_padding(self, X:np.ndarray) -> np.ndarray:
"""Apply padding to a given input matrix.
Parameters
----------
X : numpy.ndarray
Input matrix of the shape (Number of Batches, Channels/Depth, Height, Width).
Returns
-------
numpy.ndarray
Input matrix with applied padding.
"""
X_pad = np.pad(
X,
((0, 0), (0, 0), (self.padding[0], self.padding[0]), (self.padding[1], self.padding[1])),
'constant',
constant_values=0
)
return X_pad
[docs] def apply_kernel(self, X_slice:np.ndarray, kernel:np.ndarray, bias:np.ndarray) -> int:
"""Apply the convolution operation to a slice of the input using a given kernel and bias.
Parameters
----------
X_slice : numpy.ndarray
A slice of the input matrix that matches the shape of the convolutional kernel.
kernel : numpy.ndarray
The convolutional kernel.
bias : numpy.ndarray
The bias associated with the kernel.
Returns
-------
int
The result of applying the convolution operation to the input slice using the given kernel and bias.
"""
s = np.multiply(X_slice, kernel) + bias
s = np.sum(s)
return s
[docs] def forward(self, inputs:np.ndarray) -> None:
"""Perform the forward pass through the convolutional layer.
Parameters
----------
inputs : numpy.ndarray
Input data of the shape (Number of Batches, Channels/Depth, Height, Width).
"""
self.inputs = inputs
(in_num_batches, in_channels, in_height, in_width) = inputs.shape
assert in_channels == self.in_channels, 'Number of channels of input does not match number of specified number of channels'
vertical_padding, horizontal_padding = self.padding
vertical_stride, horizontal_stride = self.stride
_, _, kernel_height, kernel_width = self.kernel_shape
out_height = int((in_height - self.kernel_shape[2] + 2 * vertical_padding) / vertical_stride) + 1
out_width = int((in_width - self.kernel_shape[3] + 2 * horizontal_padding) / horizontal_stride) + 1
output = np.zeros((in_num_batches, self.out_channels, out_height, out_width))
X_pad = self.apply_padding(inputs)
# Iterate through batches
for i in range(in_num_batches):
# Iterate through output channels
for c in range(self.out_channels):
# Iterate through output height
for h in range(out_height):
# Iterate through output witdh
for w in range(out_width):
vertical_start = h * vertical_stride
vertical_end = vertical_start + kernel_height
horizontal_start = w * horizontal_stride
horizontal_end = horizontal_start + kernel_width
X_slice = X_pad[i, :, vertical_start:vertical_end, horizontal_start:horizontal_end]
output[i, c, h, w] = self.apply_kernel(X_slice, self.weights[c, ...], self.biases[c, ...])
self.output = output
[docs] def backward(self, dvalues:np.ndarray) -> None:
"""Perform the backward pass through the convolutional layer.
Parameters
----------
dvalues : numpy.ndarray
The gradients of the loss with respect to the layer's output.
"""
(in_num_batches, in_channels, in_height, in_width) = self.inputs.shape
(out_num_batches, out_channels, out_height, out_width) = dvalues.shape
assert in_num_batches == out_num_batches, 'Batch sizes of inputs and gradients are not equal'
vertical_padding, horizontal_padding = self.padding
vertical_stride, horizontal_stride = self.stride
_, _, kernel_height, kernel_width = self.kernel_shape
# Init gradients
dweights = np.zeros(self.kernel_shape)
dbiases = np.zeros(self.biases.shape)
dinputs = np.zeros(self.inputs.shape)
# Add padding
X_pad = self.apply_padding(self.inputs)
dinputs_pad = self.apply_padding(dinputs)
# Iterate through batches
for i in range(out_num_batches):
i_X_pad = X_pad[i]
i_dinputs_pad = dinputs_pad[i]
# Iterate through gradient channels
for c in range(out_channels):
# Iterate through gradient height
for h in range(out_height):
# Iterate through gradient witdh
for w in range(out_width):
vertical_start = h * vertical_stride
vertical_end = vertical_start + kernel_height
horizontal_start = w * horizontal_stride
horizontal_end = horizontal_start + kernel_width
x_slice = i_X_pad[:, vertical_start:vertical_end, horizontal_start:horizontal_end]
i_dinputs_pad[:, vertical_start:vertical_end, horizontal_start:horizontal_end] += self.weights[c, ...] * dvalues[i, c, h, w]
dweights[c, ...] += x_slice * dvalues[i, c, h, w]
dbiases[c, ...] += dvalues[i, c, h, w]
# TODO : Optimize
if vertical_padding == 0 and horizontal_padding == 0:
dinputs[i, :, :, :] = i_dinputs_pad
elif vertical_padding == 0 and horizontal_padding != 0:
dinputs[i, :, :, :] = i_dinputs_pad[:, :, horizontal_padding:-horizontal_padding]
elif vertical_padding != 0 and horizontal_padding == 0:
dinputs[i, :, :, :] = i_dinputs_pad[:, vertical_padding:-vertical_padding, :]
else:
dinputs[i, :, :, :] = i_dinputs_pad[:, vertical_padding:-vertical_padding, horizontal_padding:-horizontal_padding]
self.dweights = dweights
self.dbiases = dbiases
self.dinputs = dinputs
[docs] def get_parameters(self) -> tuple[np.ndarray, np.ndarray]:
"""Return parameters
Returns
-------
tuple[numpy.ndarray, numpy.ndarray]
Returns on first index the weights (kernel) and on second index the biases
"""
return self.weights, self.biases
[docs] def set_parameters(self, weights:np.ndarray, biases:np.ndarray) -> None:
"""Sets the parameters for this layer
Parameters
----------
weights : numpy.ndarray
Weights (kernel) of this layer.
biases : numpy.ndarray
Biases of this layer.
"""
self.weights = weights
self.biases = biases