Source code for pyml.neural_network.nn

"""Neural network

This module provides a simple implementation of a neural network."""

from __future__ import annotations

# Import of parent classes
from pyml.neural_network.layer import _Layer
from pyml.neural_network.loss import _Loss
from pyml.neural_network.optimizer import _Optimizer
from pyml.neural_network.layer.activation import _Activation
from pyml.neural_network.layer.transformation import _Transformation, _TrainableTransformation, Input, Dropout
from pyml.utils.accuracy import _Accuracy

# Import layers, losses, optimizers
from pyml.neural_network.layer.activation import Softmax
from pyml.neural_network.loss import CategoricalCrossentropy, Softmax_CategoricalCrossentropy

# Exceptions
from pyml.exceptions import HyperparametersNotSpecified

import pickle
import copy
import os
import numpy as np

[docs]class InconsistentLayerSizes(Exception): """Exception raised when neurons of two consecutive layers do not match. Parameters ---------- output_size_previous_layer : int Ouput neurons from previous layers input_size_current_layer : int Input size of current layer Examples -------- >>> from pyml.nn import NN >>> model = NN() >>> model.add(Dense(4, 16)) >>> model.add(Activation_ReLU()) >>> model.add(Layer_Dense(20, 10)) InconsistentLayerSizes: The output size of the previous layer: 16 and the input size of the current layer: 17 do not match. """ def __init__(self, output_size_previous_layer:int, input_size_current_layer:int) -> None: self.message = f'The output size of the previous layer: {output_size_previous_layer} ' + \ f'and the input size of the current layer: {input_size_current_layer} do not match.' super().__init__(self.message)
[docs]class NN(): """Neural network class for regression & classification This class provides functionality for building, training, and evaluating neural networks. Attributes ---------- layers : list[_Layer] A list of layers in the neural network. trainable_layers : list[_TrainableTransformation] A list of trainable layers in the neural network. loss : _Loss The loss function used for training the network. optimizer : _Optimizer The optimizer used for updating network parameters during training. accuracy : _Accuracy The accuracy metric used for evaluating the network's performance. finished_build : bool Indicates whether the neural network has been built. Examples -------- >>> from pyml.nn import NN >>> network = NN() >>> network.add_layer(Dense(4, 512)) >>> network.add_layer(ReLU()) >>> network.add_layer(Dropout(0.2)) >>> network.add_layer(Dense(512, NUM_CLASSES)) >>> network.add_layer(Softmax()) >>> model.set_loss(CategoricalCrossentropy()) >>> model.set_optimizer(Adam(learning_rate=0.005, decay=5e-5)) >>> model.set_accuracy(MultiClassAccuracy()) >>> model.build() """ def __init__(self): self.layers = [] self.trainable_layers = [] # Hyperparameters self.loss = None self.optimizer = None self.accuracy = None self.finished_build = False self.softmax_classifier_output = None def _get_last_connected_layer(self) -> None: for layer in reversed(self.layers): if isinstance(layer, _Transformation) and not isinstance(layer, Dropout): return layer
[docs] def add_layer(self, layer: _Layer) -> None: """Adds layer to the model Iteratively adds layers to the model. These layers can be e.g. conventional dense layers or activation functions. Be aware: the order of appending the layers is crucial and matters. Parameters ---------- layer : _Layer The layer to be added to the network. Raises ------ InconsistentLayerSizes Raised when adding a layer where input size does not match the output size of the previous layer Examples -------- >>> from pyml.nn import NN >>> model = NN() >>> model.add(Dense(4, 16)) >>> model.add(ReLU()) >>> model.add(Dense(16, 32)) >>> model.add(ReLU()) >>> ... """ self.layers.append(layer) return # TODO: Update to handle convolutions and reshape layer if not self.layers or isinstance(layer, _Activation) or isinstance(layer, Dropout): self.layers.append(layer) return last_layer = self._get_last_connected_layer() if last_layer.output_size != layer.input_size: raise InconsistentLayerSizes(last_layer.output_size, layer.input_size) self.layers.append(layer)
[docs] def set_loss(self, loss:_Loss) -> None: """Set the loss function for the neural network. Parameters ---------- loss : _Loss The loss function to be used for training. """ self.loss = loss
[docs] def set_optimizer(self, optimizer:_Optimizer) -> None: """Set the optimizer for the neural network. Parameters ---------- optimizer : _Optimizer The optimizer to be used for updating parameters. """ self.optimizer = optimizer
[docs] def set_accuracy(self, accuracy:_Accuracy) -> None: """Set the accuracy metric for the neural network. Parameters ---------- accuracy : _Accuracy The accuracy metric to be used for evaluation. """ self.accuracy = accuracy
[docs] def check_hyperparameters(self) -> None: """Check if essential hyperparameters are specified. Raises ------ HyperparametersNotSpecified If any of the essential hyperparameters is not specified. """ if self.loss is None: raise HyperparametersNotSpecified('loss') elif self.optimizer is None: raise HyperparametersNotSpecified('optimizer') elif self.accuracy is None: raise HyperparametersNotSpecified('accuracy')
[docs] def build(self) -> None: """Build the neural network architecture. This method sets up the connections between layers and prepares the network for training. """ # Check that all hyperparameters are initialized self.check_hyperparameters() # Set input layer self.input_layer = Input() # Count the number of layers layer_count = len(self.layers) # Set order of the layers and specifiy their direct neighbors # and retrieve all trainable layers for i in range(layer_count): if i == 0: self.layers[i].set_adjacent_layers( previous_layer = self.input_layer, next_layer = self.layers[i+1] ) elif i < layer_count - 1: self.layers[i].set_adjacent_layers( previous_layer = self.layers[i-1], next_layer = self.layers[i+1] ) else: self.layers[i].set_adjacent_layers( previous_layer = self.layers[i-1], next_layer = self.loss ) # Final layer is not the loss self.output_layer = self.layers[i] # Retrieve the trainable layers if isinstance(self.layers[i], _TrainableTransformation): self.trainable_layers.append(self.layers[i]) # Pass the trainable layers to the loss instance self.loss.set_trainable_layers(self.trainable_layers) # Create a combined activation and loss function object with faster gradient calculation # if using Softmax output activation and Categorical Cross-Entropy loss function if isinstance(self.layers[-1], Softmax) and \ isinstance(self.loss, CategoricalCrossentropy): self.softmax_classifier_output = \ Softmax_CategoricalCrossentropy() self.finished_build = True
[docs] @staticmethod def print_summary( context:str, accuracy:float, loss:float, data_loss:float=None, regularization_loss:float=None, learning_rate:float=None, ) -> None: """Print a summary of training or evaluation results. Parameters ---------- context : str Context for the summary (e.g., 'training', 'validation'). accuracy : float The accuracy achieved during training or evaluation. loss : float The total loss achieved during training or evaluation. data_loss : float, optional The data loss component of the total loss, by default None. regularization_loss : float, optional The regularization loss component of the total loss, by default None. learning_rate : float, optional The learning rate used during training, by default None. """ # Set values to --- if not specified data_loss = '- - -' if data_loss is None else f'{data_loss:.3f}' regularization_loss = '- - -' if regularization_loss is None else f'{regularization_loss:.3f}' learning_rate = '- - -' if learning_rate is None else f'{learning_rate:.3f}' print( f'{context}, ' + f'acc: {accuracy:.3f}, ' + f'loss: {loss:.3f} (' + f'data_loss: {data_loss}, ' + f'reg_loss: {regularization_loss}), ' + f'lr: {learning_rate}' )
[docs] def train( self, X:np.ndarray, y:np.ndarray, *, epochs:int=1, batch_size:int=None, validation_data:np.ndarray=None, verbose:int=0, print_summary_every:int=1, save_file_path:str=None ) -> None: """Train the neural network. Examples -------- >>> X_train, y_train, X_test, y_test = ... # Load training data >>> network.train(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32) Parameters ---------- X : numpy.ndarray The input training data. y : numpy.ndarray The target training data. epochs : int, optional The number of training epochs, by default 1. batch_size : int, optional The batch size for training, by default None. validation_data : numpy.ndarray, optional Validation data for evaluation during training. Should include data (X) and their labels (y), by default None. verbose : int, optional Verbosity level (0: no prints, 1: epoch summary, 2: detailed prints), by default 0. print_summary_every : int, optional Print summary every 'print_summary_every' steps, by default 1. save_file_path : str, optional Path to save the model parameters after training, by default None. TODO: NOT IMPLEMENTED YET """ # VERBOSE DESCRIPTION------------------------| # VERBOSE == 0: no prints at all | # VERBOSE == 1: print only epoch summary | # VERBOSE == 2: print also after major steps | # -------------------------------------------| # Check if model has already been build, if not build now if not self.finished_build: self.build() if verbose>1: print('training: model had to been build') # Check that all hyperparameters are initialized self.check_hyperparameters() if verbose>1: print(f'training: all hyperparameters have been initialized') # Initialize accuracy instance self.accuracy.init(y) if verbose>1: print(f'training: accuracy instance has been initialized') # Compute step size # Training step size describes how many many batches will be forwarded during each epoch training_step_size = 1 if batch_size is not None: # training step size = ⌊ count of tranings data / batch size ⌋ training_step_size = len(X) // batch_size # Add single training step if some trainings data will remain left over if training_step_size * batch_size < len(X): training_step_size += 1 if verbose>1: print(f'training: training steps size has been computed -- training step size: {training_step_size}, batch size: {batch_size}') # Start training if verbose>1: print(f'training: start training') for epoch in range(epochs): # Print current epoch if verbose>0: print(f'training: Epoch: {epoch}/{epochs}') # Reset loss and accuracy self.loss.reset() self.accuracy.reset() for training_step in range(training_step_size): # Print current training step if verbose>1: print(f'training: Training step: {training_step}/{training_step_size}') # Compute the batch if batch_size is None: X_batch = X y_batch = y else: lower_limit = training_step*batch_size upper_limit = (training_step+1)*batch_size X_batch = X[lower_limit:upper_limit] y_batch = y[lower_limit:upper_limit] # Compute forward pass if verbose>1: print(f'training: Compute forward pass') output = self.forward(X_batch, training=True) # Compute loss if verbose>1: print(f'training: Compute loss') data_loss, regularization_loss = self.loss.calculate( output, y_batch, include_regularization=True ) # Combine loss of data loss and regularization loss loss = data_loss + regularization_loss # Compute backward if verbose>1: print(f'training: Compute backward') self.backward(output, y_batch) # Update parameter weights if verbose>1: print(f'training: Update parameters') self.optimizer.pre_update_parameters() for layer in self.trainable_layers: self.optimizer.update_parameters(layer) self.optimizer.post_update_parameters() # Calculate accuracy predictions = self.output_layer.predictions(output) accuracy = self.accuracy.calculate(predictions, y_batch) # Print step size summary if verbose>1 and (not training_step % print_summary_every or training_step == training_step_size - 1): NN.print_summary( 'training', accuracy, loss, data_loss, regularization_loss, self.optimizer.current_learning_rate ) # Print epoch summary epoch_data_loss, epoch_regularization_loss = \ self.loss.calculate_accumulated( include_regularization=True) epoch_loss = epoch_data_loss + epoch_regularization_loss epoch_accuracy = self.accuracy.calculate_accumulated() if verbose > 0: NN.print_summary( 'epoch', epoch_accuracy, epoch_loss, epoch_data_loss, epoch_regularization_loss, self.optimizer.current_learning_rate ) # Evaluate model on validaten data if validation_data is not None: self.evaluate(*validation_data, batch_size=batch_size, verbose=verbose) if verbose>1: print(f'training: Training finished')
[docs] def _forward(self, layer:_Layer, X:np.ndarray, training:bool=False) -> None: """Perform a forward pass through a layer. If the layer is a dropout layer, the training context (true or false is also passed). Parameters ---------- layer : _Layer The layer to perform the forward pass on. X : numpy.ndarray The input data. training : bool, optional Indicates if the network is in training mode, by default False. """ if isinstance(layer, Dropout): layer.forward(X, training) else: layer.forward(X)
[docs] def forward(self, X:np.ndarray, training:bool=False) -> np.ndarray: """Perform a forward pass through the entire neural network. Parameters ---------- X : numpy.ndarray The input data. training : bool, optional Indicates if the network is in training mode, by default False. Returns ------- numpy.ndarray The output of the network. """ # Forward data trough input layer self._forward(self.input_layer, X, training) # Iterate through remaining layers for layer in self.layers: self._forward(layer, layer.previous_layer.output, training) # Return output of last layer return layer.output
[docs] def backward(self, output:np.ndarray, y:np.ndarray) -> None: """Perform a backward pass through the neural network. Parameters ---------- output : numpy.ndarray The output of the network. y : numpy.ndarray The target data. """ # Conduct backward pass if softmax classifier if self.softmax_classifier_output is not None: self.softmax_classifier_output.backward(output, y) # Skip backward step for last layer since we combined activation and loss self.layers[-1].dinputs = self.softmax_classifier_output.dinputs for layer in reversed(self.layers[:-1]): layer.backward(layer.next_layer.dinputs) # Finish return # Compute backward as usual self.loss.backward(output, y) for layer in reversed(self.layers): layer.backward(layer.next_layer.dinputs)
[docs] def evaluate(self, X_val:np.ndarray, y_val:np.ndarray, *, batch_size:int=None, verbose:int=0) -> None: """Evaluate the neural network on validation data. Parameters ---------- X_val : numpy.ndarray The input validation data. y_val : numpy.ndarray The target validation data. batch_size : int, optional The batch size for evaluation, by default None. verbose : int, optional Verbosity level (0: no prints, 1: summary prints), by default 0. """ # Default step size validation_step_size = 1 # Calculate number of batches if batch_size is not None: # training step size = ⌊ count of tranings data / batch size ⌋ training_step_size = len(X_val) // batch_size # Add single training step if some trainings data will remain left over if training_step_size * batch_size < len(X_val): training_step_size += 1 # Reset loss and accuracy self.loss.reset() self.accuracy.reset() # Iterate over steps for validation_step in range(validation_step_size): # Compute the batch if batch_size is None: X_batch = X_val y_batch = y_val else: lower_limit = validation_step*batch_size upper_limit = (validation_step+1)*batch_size X_batch = X_val[lower_limit:upper_limit] y_batch = y_val[lower_limit:upper_limit] # Perform the forward pass output = self.forward(X_batch, training=False) # Calculate the loss self.loss.calculate(output, y_batch) # Get predictions and calculate an accuracy predictions = self.output_layer.predictions(output) self.accuracy.calculate(predictions, y_batch) # Retrieve validation loss and accuracy validation_loss = self.loss.calculate_accumulated() validation_accuracy = self.accuracy.calculate_accumulated() # Print a summary if verbose > 0: NN.print_summary( 'validation', validation_accuracy, validation_loss )
[docs] def predict(self, X:np.ndarray, *, batch_size:int=None) -> np.ndarray: """Generate predictions using the trained neural network. Parameters ---------- X : np.ndarray The input data for which predictions are to be generated. batch_size : int, optional The batch size for prediction, by default None. Returns ------- numpy.ndarray The predictions generated by the network. """ # Default value if batch size is not being set prediction_step_size = 1 # Calculate number of batches if batch_size is not None: # training step size = ⌊ count of tranings data / batch size ⌋ prediction_step_size = len(X) // batch_size # Add single training step if some trainings data will remain left over if prediction_step_size * batch_size < len(X): prediction_step_size += 1 # Model output output = [] for prediction_step in range(prediction_step_size): # Get batch if batch_size is None: batch_X = X else: lower_limit = prediction_step*batch_size upper_limit = (prediction_step+1)*batch_size batch_X = X[lower_limit:upper_limit] # Perform the forward pass batch_output = self.forward(batch_X, training=False) # Append batch prediction to the list of predictions output.append(batch_output) # Stack output output = np.vstack(output) return output
[docs] def get_model_parameters(self) -> list[np.ndarray]: """_summary_ Returns ------- list[numpy.ndarray] A list of numpy arrays containing the parameters of each trainable layer. """ parameters = [] for layer in self.trainable_layers: parameters.append(layer.get_parameters()) return parameters
[docs] def set_model_parameters(self, parameters:list[tuple[np.ndarray]]) -> None: """Set the parameters of the trainable layers in the model. Parameters ---------- parameters : list[tuple[numpy.ndarray]] A list storing parameters for each trainable layer within a tuple. """ for parameter, layer in zip(parameters, self.trainable_layers): layer.set_parameters(*parameter)
[docs] def save_model_parameters(self, path:str) -> None: """Save the model parameters to a file. Parameters ---------- path : str The path to the file where the parameters will be saved. """ with open(path, 'wb') as f: pickle.dumb(self.get_model_parameters())
[docs] def load_model_parameters(self, path:str) -> None: """Load and set the model parameters from a file. Parameters ---------- path : str The path to the file from which the parameters will be loaded. """ with open(path, 'rb') as f: self.set_parameters(pickle.load(f))
[docs] def save_model(self, path:str) -> None: """Save the entire trained model to a file. Parameters ---------- path : str The path to the file where the model will be saved. Examples -------- >>> network.save_model('trained_model.model') """ # make copy of current model model_copy = copy.deepcopy(self) # Reset loss and accuracy model_copy.loss.reset() model_copy.accuracy.reset() # Remove data related attributes in model model_copy.input_layer.__dict__.pop('output', None) model_copy.loss.__dict__.pop('dinputs', None) for layer in model_copy.layers: for property in ['inputs', 'output', 'dinputs', 'dweights', 'dbiases']: layer.__dict__.pop(property, None) # Save model with open(path, 'wb') as f: pickle.dumb(model_copy, f)
[docs] @staticmethod def load(path:str) -> NN: """Load a trained model from a file. Parameters ---------- path : str The path to the file from which the model will be loaded. Returns ------- NN The loaded trained model. Examples -------- >>> loaded_model = NN.load('trained_model.model') """ with open(path, 'rb') as f: model = pickle.load(f) return model