Source code for pyml.neural_network.optimizer.sgd

"""Stochastic Gradient Descent (SGD) Optimizer
"""

from pyml.neural_network.layer.transformation import _Transformation
from pyml.neural_network.optimizer import _Optimizer
from pyml.exceptions import OutsideSpecifiedRange

import numpy as np

[docs]class SGD(_Optimizer): """Stochastic Gradient Descent (SGD) Optimizer. This optimizer performs stochastic gradient descent with optional momentum and learning rate decay. Parameters ---------- learning_rate : float, optional The initial learning rate, by default 1 decay : float, optional The learning rate decay factor, by default 0 momentum : float, optional The momentum factor for gradient updates, by default 0 Raises ------ OutsideSpecifiedRange If momentum value is outside the range [0, 1]. """ def __init__( self, learning_rate:float = 1, decay:float = 0, momentum:float=0 ) -> None: super().__init__(learning_rate, decay) if momentum < 0 or momentum > 1: raise OutsideSpecifiedRange(momentum, 'Momentum', 0, 1) self.momentum = momentum
[docs] def update_parameters(self, layer:_Transformation) -> None: """Update the weights and biases of the given layer using SGD. This method updates the weights and biases of the specified layer using stochastic gradient descent with optional momentum. Parameters ---------- layer : _Transformation The layer to update. Note ---- If the layer does not have momentum arrays for weights and biases, this method initializes them and performs updates using momentum. Otherwise, updates are performed without momentum. """ # Check if the layer has momentum arrays for weight and bias updates. if not hasattr(layer, 'weight_momentums'): # If not, initialize them with zeros. layer.weight_momentums = np.zeros_like(layer.weights) layer.bias_momentums = np.zeros_like(layer.biases) # Compute momentums for weights and biases. weight_updates = \ self.momentum * layer.weight_momentums - \ self.current_learning_rate * layer.dweights layer.weight_momentums = weight_updates # Build bias updates bias_updates = \ self.momentum * layer.bias_momentums - \ self.current_learning_rate * layer.dbiases layer.bias_momentums = bias_updates # Vanilla SGD updates (if momentum isn't used) else: # Calculate weight and bias updates without momentum. weight_updates = -self.current_learning_rate * \ layer.dweights bias_updates = -self.current_learning_rate * \ layer.dbiases # Update the weights and biases using the computed updates. layer.weights += weight_updates layer.biases += bias_updates