Source code for pytagi.nn.sequential

from typing import List, Tuple

import cutagi
import numpy as np

from pytagi.nn.base_layer import BaseLayer
from pytagi.nn.data_struct import BaseDeltaStates, BaseHiddenStates


[docs] class Sequential: """A sequential container for layers. Layers are added to the container in the order they are passed in the constructor. This class acts as a Python wrapper for the C++/CUDA backend `cutagi.Sequential`. Example: >>> import pytagi.nn as nn >>> model = nn.Sequential( ... nn.Linear(10, 20), ... nn.ReLU(), ... nn.Linear(20, 5) ... ) >>> mu_in = np.random.randn(1, 10) >>> var_in = np.abs(np.random.randn(1, 10)) >>> mu_out, var_out = model(mu_in, var_in) """ def __init__(self, *layers: BaseLayer): """Initializes the Sequential model with a sequence of layers. :param layers: A variable number of layer instances (e.g., Linear, ReLU) that will be executed in sequence. :type layers: BaseLayer """ backend_layers = [layer._cpp_backend for layer in layers] self._cpp_backend = cutagi.Sequential(backend_layers)
[docs] def __call__( self, mu_x: np.ndarray, var_x: np.ndarray = None ) -> Tuple[np.ndarray, np.ndarray]: """An alias for the forward pass. :param mu_x: The mean of the input data. :type mu_x: np.ndarray :param var_x: The variance of the input data. Defaults to None. :type var_x: np.ndarray, optional :return: A tuple containing the mean and variance of the output. :rtype: Tuple[np.ndarray, np.ndarray] """ return self.forward(mu_x, var_x)
@property
[docs] def layers(self) -> List[BaseLayer]: """The list of layers in the model.""" return self._cpp_backend.layers
@property
[docs] def output_z_buffer(self) -> BaseHiddenStates: """The output hidden states buffer from the forward pass.""" return self._cpp_backend.output_z_buffer
@output_z_buffer.setter def output_z_buffer(self, value: BaseHiddenStates): """Sets the output hidden states buffer. :param value: The new output hidden states buffer. :type value: BaseHiddenStates """ self._cpp_backend.output_z_buffer = value @property
[docs] def input_delta_z_buffer(self) -> BaseDeltaStates: """The input delta states buffer used in the backward pass.""" return self._cpp_backend.input_delta_z_buffer
@input_delta_z_buffer.setter def input_delta_z_buffer(self, value: BaseDeltaStates): """Sets the input delta states buffer. :param value: The new input delta states buffer. :type value: BaseDeltaStates """ self._cpp_backend.input_delta_z_buffer = value @property
[docs] def output_delta_z_buffer(self) -> BaseDeltaStates: """The output delta states buffer from the backward pass.""" return self._cpp_backend.output_delta_z_buffer
@output_delta_z_buffer.setter def output_delta_z_buffer(self, value: BaseDeltaStates): """Sets the output delta states buffer. :param value: The new output delta states buffer. :type value: BaseDeltaStates """ self._cpp_backend.output_delta_z_buffer = value @property
[docs] def z_buffer_size(self) -> int: """The size of the hidden state (`z`) buffer.""" return self._cpp_backend.z_buffer_size
@z_buffer_size.setter def z_buffer_size(self, value: int): """Sets the size of the hidden state (`z`) buffer. :param value: The new buffer size. :type value: int """ self._cpp_backend.z_buffer_size = value @property
[docs] def z_buffer_block_size(self) -> int: """The block size of the hidden state (`z`) buffer.""" return self._cpp_backend.z_buffer_block_size
@z_buffer_block_size.setter def z_buffer_block_size(self, value: int): """Sets the block size of the hidden state (`z`) buffer. :param value: The new buffer block size. :type value: int """ self._cpp_backend.z_buffer_block_size = value @property
[docs] def device(self) -> str: """The computational device ('cpu' or 'cuda') the model is on.""" return self._cpp_backend.device
@device.setter def device(self, value: str): """Sets the computational device. :param value: The device to set, e.g., 'cpu' or 'cuda:0'. :type value: str """ self._cpp_backend.device = value @property
[docs] def input_state_update(self) -> bool: """Flag indicating if the input state should be updated.""" return self._cpp_backend.input_state_update
@input_state_update.setter def input_state_update(self, value: bool): """Sets the flag for updating the input state. :param value: The new boolean value. :type value: bool """ self._cpp_backend.input_state_update = value @property
[docs] def num_samples(self) -> int: """The number of samples used for Monte Carlo estimation. This is used for debugging purposes""" return self._cpp_backend.num_samples
@num_samples.setter def num_samples(self, value: int): """Sets the number of samples for Monte Carlo estimation. This is used for debugging purposes :param value: The number of samples. :type value: int """ self._cpp_backend.num_samples = value
[docs] def to_device(self, device: str): """Moves the model and its parameters to a specified device. :param device: The target device, e.g., 'cpu' or 'cuda:0'. :type device: str """ self._cpp_backend.to_device(device)
[docs] def params_to_device(self): """Moves the model parameters to the currently configured CUDA device.""" self._cpp_backend.params_to_device()
[docs] def params_to_host(self): """Moves the model parameters from the CUDA device to the host (CPU).""" self._cpp_backend.params_to_host()
[docs] def set_threads(self, num_threads: int): """Sets the number of CPU threads to use for computation. :param num_threads: The number of threads. :type num_threads: int """ self._cpp_backend.set_threads(num_threads)
[docs] def train(self): """Sets the model to training mode.""" self._cpp_backend.train()
[docs] def eval(self): """Sets the model to evaluation mode.""" self._cpp_backend.eval()
[docs] def forward( self, mu_x: np.ndarray, var_x: np.ndarray = None ) -> Tuple[np.ndarray, np.ndarray]: """Performs a forward pass through the network. :param mu_x: The mean of the input data. :type mu_x: np.ndarray :param var_x: The variance of the input data. Defaults to None. :type var_x: np.ndarray, optional :return: A tuple containing the mean and variance of the output. :rtype: Tuple[np.ndarray, np.ndarray] """ self._cpp_backend.forward(mu_x, var_x) return self.get_outputs()
[docs] def backward(self): """Performs a backward pass to update the network parameters.""" self._cpp_backend.backward()
[docs] def smoother(self) -> Tuple[np.ndarray, np.ndarray]: """Performs a smoother pass (e.g., Rauch-Tung-Striebel smoother). This is used with the SLSTM to refine estimates by running backwards through time. :return: A tuple containing the mean and variance of the smoothed output. :rtype: Tuple[np.ndarray, np.ndarray] """ self._cpp_backend.smoother() return self.get_outputs_smoother()
[docs] def step(self): """Performs a single step of inference to update the parameters.""" self._cpp_backend.step()
[docs] def reset_lstm_states(self): """Resets the hidden and cell states of all LSTM layers in the model.""" self._cpp_backend.reset_lstm_states()
[docs] def output_to_host(self) -> List[float]: """Copies the raw output data from the device to the host. :return: A list of floating-point values representing the flattened output. :rtype: List[float] """ return self._cpp_backend.output_to_host()
[docs] def delta_z_to_host(self) -> List[float]: """Copies the raw delta Z (error signal) data from the device to the host. :return: A list of floating-point values representing the flattened delta Z. :rtype: List[float] """ return self._cpp_backend.delta_z_to_host()
[docs] def set_delta_z(self, delta_mu: np.ndarray, delta_var: np.ndarray): """Sets the delta Z (error signal) on the device for the backward pass. :param delta_mu: The mean of the error signal. :type delta_mu: np.ndarray :param delta_var: The variance of the error signal. :type delta_var: np.ndarray """ self._cpp_backend.set_delta_z(delta_mu, delta_var)
[docs] def get_layer_stack_info(self) -> str: """Gets a string representation of the layer stack architecture. :return: A descriptive string of the model's layers. :rtype: str """ return self._cpp_backend.get_layer_stack_info()
[docs] def preinit_layer(self): """Pre-initializes the layers in the model.""" self._cpp_backend.preinit_layer()
[docs] def get_neg_var_w_counter(self) -> dict: """Counts the number of negative variance weights in each layer. :return: A dictionary where keys are layer names and values are the counts of negative variances. :rtype: dict """ return self._cpp_backend.get_neg_var_w_counter()
[docs] def save(self, filename: str): """Saves the model's state to a binary file. :param filename: The path to the file where the model will be saved. :type filename: str """ self._cpp_backend.save(filename)
[docs] def load(self, filename: str): """Loads the model's state from a binary file. :param filename: The path to the file from which to load the model. :type filename: str """ self._cpp_backend.load(filename)
[docs] def save_csv(self, filename: str): """Saves the model parameters to a CSV file. :param filename: The base path for the CSV file(s). :type filename: str """ self._cpp_backend.save_csv(filename)
[docs] def load_csv(self, filename: str): """Loads the model parameters from a CSV file. :param filename: The base path of the CSV file(s). :type filename: str """ self._cpp_backend.load_csv(filename)
[docs] def parameters( self, ) -> List[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]: """Gets all model parameters. :return: A list where each element is a tuple containing the parameters for a layer: (mu_w, var_w, mu_b, var_b). :rtype: List[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]] """ return self._cpp_backend.parameters()
[docs] def load_state_dict(self, state_dict: dict): """Loads the model's parameters from a state dictionary. :param state_dict: A dictionary containing the model's state. :type state_dict: dict """ self._cpp_backend.load_state_dict(state_dict)
[docs] def state_dict(self) -> dict: """Gets the model's parameters as a state dictionary. :return: A dictionary where each key is the layer name and the value is a tuple of parameters: (mu_w, var_w, mu_b, var_b). :rtype: dict """ return self._cpp_backend.state_dict()
[docs] def params_from(self, other: "Sequential"): """Copies parameters from another Sequential model. :param other: The source model from which to copy parameters. :type other: Sequential """ self._cpp_backend.params_from(other)
[docs] def get_outputs(self) -> Tuple[np.ndarray, np.ndarray]: """Gets the outputs from the last forward pass. :return: A tuple containing the mean and variance of the output. :rtype: Tuple[np.ndarray, np.ndarray] """ return self._cpp_backend.get_outputs()
[docs] def get_outputs_smoother(self) -> Tuple[np.ndarray, np.ndarray]: """Gets the outputs from the last smoother pass. :return: A tuple containing the mean and variance of the smoothed output. :rtype: Tuple[np.ndarray, np.ndarray] """ return self._cpp_backend.get_outputs_smoother()
[docs] def get_input_states(self) -> Tuple[np.ndarray, np.ndarray]: """Gets the input states of the model. :return: A tuple containing the mean and variance of the input states. :rtype: Tuple[np.ndarray, np.ndarray] """ return self._cpp_backend.get_input_states()
[docs] def get_norm_mean_var(self) -> dict: """Gets the mean and variance from normalization layers. :return: A dictionary where each key is a normalization layer name and the value is a tuple of four arrays: (mu_batch, var_batch, mu_ema_batch, var_ema_batch). :rtype: dict """ return self._cpp_backend.get_norm_mean_var()
[docs] def get_lstm_states(self, time_step: int = -1) -> dict: """Get the LSTM states for all LSTM layers as a dictionary. :param time_step: The time step at which to retrieve the smoothed SLSTM states. If not provided or -1, retrieves the unsmoothed current LSTM states. :type time_step: int, optional :return: A dictionary mapping layer indices to a 4-tuple of numpy arrays: (mu_h_prior, var_h_prior, mu_c_prior, var_c_prior). :rtype: dict """ return self._cpp_backend.get_lstm_states(time_step)
[docs] def set_lstm_states(self, states: dict) -> None: """Sets the states for all LSTM layers. :param states: A dictionary mapping layer indices to a 4-tuple of numpy arrays: (mu_h_prior, var_h_prior, mu_c_prior, var_c_prior). :type states: dict """ self._cpp_backend.set_lstm_states(states)