Source code for pykal.algorithm_library.controllers.lqr

import numpy as np
from numpy.typing import NDArray
from typing import Tuple
from scipy.linalg import solve_discrete_are



[docs]
class LQR:
    """
    Linear Quadratic Regulator (LQR) for optimal state feedback control.

    Based on Kalman (1960), "Contributions to the theory of optimal control",
    Boletin de la Sociedad Matematica Mexicana, Vol. 5, No. 2, pp. 102-119.

    The LQR controller computes the optimal state feedback gain that minimizes
    a quadratic cost function for linear discrete-time systems.
    """


[docs]
    @staticmethod
    def compute_gain(
        A: NDArray,
        B: NDArray,
        Q: NDArray,
        R: NDArray
    ) -> Tuple[NDArray, NDArray]:
        """
        Compute the optimal LQR feedback gain by solving the Discrete-Time
        Algebraic Riccati Equation (DARE).

        Parameters
        ----------
        A : NDArray
            State transition matrix, shape (n, n)
        B : NDArray
            Control input matrix, shape (n, m)
        Q : NDArray
            State cost matrix (positive semi-definite), shape (n, n)
            Penalizes deviation from reference state
        R : NDArray
            Control cost matrix (positive definite), shape (m, m)
            Penalizes control effort

        Returns
        -------
        K : NDArray
            Optimal feedback gain matrix, shape (m, n)
        P : NDArray
            Solution to the DARE, shape (n, n)

        Notes
        -----
        The LQR controller minimizes the infinite-horizon quadratic cost:

        .. math::

            J = \\sum_{k=0}^{\\infty} \\left( x_k^T Q x_k + u_k^T R u_k \\right)

        subject to the discrete-time linear dynamics:

        .. math::

            x_{k+1} = A x_k + B u_k

        The optimal control law is:

        .. math::

            u_k = -K (x_k - x_{\\text{ref}})

        where the gain :math:`K` is computed by solving the DARE:

        .. math::

            A^T P A - P - A^T P B (B^T P B + R)^{-1} B^T P A + Q = 0

        and then:

        .. math::

            K = (B^T P B + R)^{-1} B^T P A

        **Ridge Regularization:**
        To ensure numerical stability, a small ridge term :math:`\\epsilon I`
        is added to :math:`R` if it is near-singular.

        Examples
        --------
        >>> import numpy as np
        >>> # Simple double integrator: x[k+1] = [[1, 0.1], [0, 1]] x[k] + [[0], [0.1]] u[k]
        >>> A = np.array([[1.0, 0.1], [0.0, 1.0]])
        >>> B = np.array([[0.0], [0.1]])
        >>> Q = np.eye(2)  # Equal weight on position and velocity
        >>> R = np.array([[1.0]])  # Control effort penalty
        >>> K, P = LQR.compute_gain(A, B, Q, R)
        >>> K.shape
        (1, 2)
        >>> P.shape
        (2, 2)
        >>> # Verify closed-loop stability (all eigenvalues inside unit circle)
        >>> np.all(np.abs(np.linalg.eigvals(A - B @ K)) < 1.0)
        True
        """
        n = A.shape[0]
        m = B.shape[1]

        # Add ridge regularization to R for numerical stability
        ridge = 1e-9
        R_reg = R + ridge * np.eye(m)

        # Solve Discrete-Time Algebraic Riccati Equation (DARE)
        try:
            P = solve_discrete_are(A, B, Q, R_reg)
        except np.linalg.LinAlgError as e:
            raise ValueError(
                f"Failed to solve DARE. The system may be uncontrollable. "
                f"Original error: {e}"
            )

        # Compute optimal feedback gain
        # K = (B^T P B + R)^{-1} B^T P A
        BT_P_B = B.T @ P @ B
        try:
            K = np.linalg.solve(BT_P_B + R_reg, B.T @ P @ A)
        except np.linalg.LinAlgError:
            # Fallback to pseudo-inverse if singular
            K = np.linalg.pinv(BT_P_B + R_reg) @ B.T @ P @ A

        return K, P



[docs]
    @staticmethod
    def f(
        lqr_state: Tuple[NDArray, NDArray],
        xhat_k: NDArray,
        xref_k: NDArray
    ) -> Tuple[NDArray, NDArray]:
        """
        Perform one state update step for the LQR controller.

        For time-invariant LQR, the controller state (optimal gain K and
        DARE solution P) remains constant across time steps.

        Parameters
        ----------
        lqr_state : Tuple[NDArray, NDArray]
            A tuple ``(K, P)`` containing:
                - ``K`` : the optimal feedback gain matrix, shape (m, n)
                - ``P`` : the DARE solution matrix, shape (n, n)
        xhat_k : NDArray
            The current state estimate at time k, shape (n,) or (n, 1)
        xref_k : NDArray
            The reference state at time k, shape (n,) or (n, 1)

        Returns
        -------
        (K, P) : Tuple[NDArray, NDArray]
            The unchanged controller state (time-invariant for infinite-horizon LQR)

        Notes
        -----
        For **time-invariant** infinite-horizon LQR, the optimal gain :math:`K`
        is constant and computed once via ``compute_gain()``. This function
        simply passes through the state unchanged.

        For **time-varying** or finite-horizon LQR (not implemented here), this
        function would update :math:`K_k` and :math:`P_k` at each time step using
        backward Riccati recursion.

        The parameters ``xhat_k`` and ``xref_k`` are included in the signature
        for compatibility with the DynamicalSystem framework but are not used
        in the state update for time-invariant LQR.

        Examples
        --------
        >>> import numpy as np
        >>> A = np.array([[1.0, 0.1], [0.0, 1.0]])
        >>> B = np.array([[0.0], [0.1]])
        >>> Q = np.eye(2)
        >>> R = np.array([[1.0]])
        >>> K, P = LQR.compute_gain(A, B, Q, R)
        >>> lqr_state = (K, P)
        >>> xhat_k = np.array([1.0, 0.5])
        >>> xref_k = np.array([0.0, 0.0])
        >>> K_new, P_new = LQR.standard_f(lqr_state, xhat_k, xref_k)
        >>> np.allclose(K_new, K)  # State unchanged for time-invariant LQR
        True
        >>> np.allclose(P_new, P)
        True
        """
        K, P = lqr_state
        # For time-invariant LQR, state doesn't change
        return (K, P)



[docs]
    @staticmethod
    def h(
        lqr_state: Tuple[NDArray, NDArray],
        xhat_k: NDArray,
        xref_k: NDArray
    ) -> NDArray:
        """
        Compute the optimal control input from the current controller state.

        Parameters
        ----------
        lqr_state : Tuple[NDArray, NDArray]
            A tuple ``(K, P)`` containing:
                - ``K`` : the optimal feedback gain matrix, shape (m, n)
                - ``P`` : the DARE solution matrix (unused in output computation)
        xhat_k : NDArray
            The current state estimate at time k, shape (n,) or (n, 1)
        xref_k : NDArray
            The reference state at time k, shape (n,) or (n, 1)

        Returns
        -------
        uk : NDArray
            The control input at time k, shape (m,) or (m, 1)

        Notes
        -----
        This function computes the optimal LQR control law:

        .. math::

            u_k = -K (x_k - x_{\\text{ref},k})

        where:
            - :math:`K` is the optimal feedback gain matrix
            - :math:`x_k` is the current state estimate
            - :math:`x_{\\text{ref},k}` is the reference (setpoint) state

        **Negative feedback:** The negative sign ensures that the control input
        opposes the error :math:`(x_k - x_{\\text{ref},k})`, driving the state
        toward the reference.

        **Reference tracking:** When :math:`x_{\\text{ref},k} = 0`, this reduces
        to the standard regulator form :math:`u_k = -K x_k`. For non-zero
        references, the controller tracks :math:`x_{\\text{ref},k}`.

        Examples
        --------
        >>> import numpy as np
        >>> A = np.array([[1.0, 0.1], [0.0, 1.0]])
        >>> B = np.array([[0.0], [0.1]])
        >>> Q = np.eye(2)
        >>> R = np.array([[1.0]])
        >>> K, P = LQR.compute_gain(A, B, Q, R)
        >>> lqr_state = (K, P)
        >>> xhat_k = np.array([1.0, 0.5])  # Current state
        >>> xref_k = np.array([0.0, 0.0])  # Regulate to origin
        >>> uk = LQR.standard_h(lqr_state, xhat_k, xref_k)
        >>> uk.shape
        (1,)
        >>> # Control should oppose the error (negative of K @ error)
        >>> error = xhat_k - xref_k
        >>> np.allclose(uk, -K @ error)
        True
        """
        K, _ = lqr_state

        # Ensure inputs are column vectors for matrix multiplication
        xhat_k = np.atleast_1d(xhat_k).flatten()
        xref_k = np.atleast_1d(xref_k).flatten()

        # Compute tracking error
        error = xhat_k - xref_k

        # Optimal control law: u = -K * (x - x_ref)
        uk = -K @ error

        # Return as 1D array
        return uk.flatten()




# Module-level aliases for convenience
# Allows usage like: from pykal.algorithm_library.controllers import lqr; lqr.f(...)
compute_gain = LQR.compute_gain
f = LQR.f
h = LQR.h