Source code for pytagi.nn.lstm

import cutagi

from pytagi.nn.base_layer import BaseLayer



[docs]
class LSTM(BaseLayer):
    """
    A **Long Short-Term Memory (LSTM)** layer for RNNs. It inherits from BaseLayer.
    """

    def __init__(
        self,
        input_size: int,
        output_size: int,
        seq_len: int,
        bias: bool = True,
        gain_weight: float = 1.0,
        gain_bias: float = 1.0,
        init_method: str = "He",
    ):
        """
        Initializes the LSTM layer.

        Args:
            input_size: The number of features in the input tensor at each time
                        step.
            output_size: The size of the hidden state (:math:`h_t`), which is the
                         number of features in the output tensor at each time
                         step.
            seq_len: The maximum length of the input sequence. This is often
                     required for efficient memory allocation in C++/CUDA
                     backends like cuTAGI.
            bias: If True, the internal gates and cell state updates will include
                  an additive bias vector. Defaults to True.
            gain_weight: Scaling factor applied to the initialized weights
                         (:math:`W`). Defaults to 1.0.
            gain_bias: Scaling factor applied to the initialized biases
                       (:math:`b`). Defaults to 1.0.
            init_method: The method used for initializing the weights and
                         biases (e.g., "He", "Xavier"). Defaults to "He".
        """
        super().__init__()

        self.input_size = input_size
        self.output_size = output_size
        self.seq_len = seq_len
        self.bias = bias
        self.gain_weight = gain_weight
        self.gain_bias = gain_bias
        self.init_method = init_method

        self._cpp_backend = cutagi.LSTM(
            input_size,
            output_size,
            seq_len,
            bias,
            gain_weight,
            gain_bias,
            init_method,
        )


[docs]
    def get_layer_info(self) -> str:
        """
        Retrieves a descriptive string containing information about the layer's
        configuration (e.g., input/output size, sequence length) from the
        C++ backend.
        """
        return self._cpp_backend.get_layer_info()



[docs]
    def get_layer_name(self) -> str:
        """
        Retrieves the name of the layer (e.g., 'LSTM') from the C++ backend.
        """
        return self._cpp_backend.get_layer_name()



[docs]
    def init_weight_bias(self):
        """
        Initializes the various weight matrices and bias vectors used by the
        LSTM's gates (input, forget, output) and cell state updates, using
        the specified method and gain factors. This task is delegated to the
        C++ backend.
        """
        self._cpp_backend.init_weight_bias()