Source code for pyml.neural_network.layer.activation.softmax

"""Softmax activation function used for multiclass classification problems as
final compoment of the network.
"""

from pyml.neural_network.layer.activation import _Activation
import numpy as np

[docs]class Softmax(_Activation):
    """Softmax activation function

    The softmax activation function is defined as
    :math:`\sigma (\mathbf {z} )_{j}={\\frac {e^{z_{j}}}{\sum _{k=1}^{K}e^{z_{k}}}}`.
    """

    def __init__(self) -> None:
        super().__init__()

[docs]    def forward(self, inputs:np.ndarray) -> None:
        """Computes a forward pass

        The softmax activation function is defined as
        :math:`\sigma (\mathbf {z} )_{j}={\\frac {e^{z_{j}}}{\sum _{k=1}^{K}e^{z_{k}}}}`.

        Parameters
        ----------
        inputs : np.ndarray
            Input values from previous neural layer.
        """

        self.inputs = inputs

        # Compute unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))

        # Add noise to data
        # exp_values = exp_values + 0.000001 * np.random.randn(*exp_values.shape)

        # Normalize probabilities for each data point (axis=1)
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)

        self.output = probabilities

[docs]    def backward(self, dvalues:np.ndarray) -> None:
        """Computes the backward step

        The derivative of the softmax function will be calculated as follows:
        :math:`\\frac {\partial S(z_{i})}{\partial z_{j}} = {\\begin{cases}  S(z_{i}) \cdot (1 - S(z_{i})) & \\text{if } i = j \\\ - S(z_{i}) \cdot S(z_{j}) & \\text{if } i \\neq j \end{cases}}`.
        
        Parameters
        ----------
        dvalues : numpy.ndarray
            Derived gradient from the previous layers (reversed order).
        """

        # Create uninitialized array
        self.dinputs = np.empty_like(dvalues)

        # Enumerate outputs and gradients
        for index, (single_output, single_dvalues) in \
                enumerate(zip(self.output, dvalues)):
            # Flatten output array
            single_output = single_output.reshape(-1, 1)
            # Calculate Jacobian matrix of the output
            jacobian_matrix = np.diagflat(single_output) - \
                              np.dot(single_output, single_output.T)
            
            # Calculate sample-wise gradient
            # and add it to the array of sample gradients
            self.dinputs[index] = np.dot(jacobian_matrix,
                                         single_dvalues)


[docs]    def predictions(self, outputs:np.ndarray) -> np.ndarray:
        """Converts outputs to predictions

        The softmax activation function calculates the confidence for each class.
        Since the the softmax function does not only support binary tasks, but also multiclass problems,
        the user must receive the index of the class with the highest confidence score.

        Parameters
        ----------
        outputs : numpy.ndarray
            Output computed by the softmax activation function

        Returns
        -------
        numpy.ndarray
            Matrix containing the class indices with the highest confidence
        """
        return np.argmax(outputs, axis=1)