Source code for pyml.neural_network.optimizer.sgd
"""Stochastic Gradient Descent (SGD) Optimizer
"""
from pyml.neural_network.layer.transformation import _Transformation
from pyml.neural_network.optimizer import _Optimizer
from pyml.exceptions import OutsideSpecifiedRange
import numpy as np
[docs]class SGD(_Optimizer):
"""Stochastic Gradient Descent (SGD) Optimizer.
This optimizer performs stochastic gradient descent with optional momentum
and learning rate decay.
Parameters
----------
learning_rate : float, optional
The initial learning rate, by default 1
decay : float, optional
The learning rate decay factor, by default 0
momentum : float, optional
The momentum factor for gradient updates, by default 0
Raises
------
OutsideSpecifiedRange
If momentum value is outside the range [0, 1].
"""
def __init__(
self,
learning_rate:float = 1,
decay:float = 0,
momentum:float=0
) -> None:
super().__init__(learning_rate, decay)
if momentum < 0 or momentum > 1:
raise OutsideSpecifiedRange(momentum, 'Momentum', 0, 1)
self.momentum = momentum
[docs] def update_parameters(self, layer:_Transformation) -> None:
"""Update the weights and biases of the given layer using SGD.
This method updates the weights and biases of the specified layer using
stochastic gradient descent with optional momentum.
Parameters
----------
layer : _Transformation
The layer to update.
Note
----
If the layer does not have momentum arrays for weights and biases,
this method initializes them and performs updates using momentum.
Otherwise, updates are performed without momentum.
"""
# Check if the layer has momentum arrays for weight and bias updates.
if not hasattr(layer, 'weight_momentums'):
# If not, initialize them with zeros.
layer.weight_momentums = np.zeros_like(layer.weights)
layer.bias_momentums = np.zeros_like(layer.biases)
# Compute momentums for weights and biases.
weight_updates = \
self.momentum * layer.weight_momentums - \
self.current_learning_rate * layer.dweights
layer.weight_momentums = weight_updates
# Build bias updates
bias_updates = \
self.momentum * layer.bias_momentums - \
self.current_learning_rate * layer.dbiases
layer.bias_momentums = bias_updates
# Vanilla SGD updates (if momentum isn't used)
else:
# Calculate weight and bias updates without momentum.
weight_updates = -self.current_learning_rate * \
layer.dweights
bias_updates = -self.current_learning_rate * \
layer.dbiases
# Update the weights and biases using the computed updates.
layer.weights += weight_updates
layer.biases += bias_updates