diff --git a/doc/rcfs.pdf b/doc/rcfs.pdf index 12418fcd96622bf4a862310768196a1adc0648a9..c922fa01a21d8bdc2bf649653d4d017d6084269a 100644 Binary files a/doc/rcfs.pdf and b/doc/rcfs.pdf differ diff --git a/doc/rcfs.tex b/doc/rcfs.tex index b0b082e34f526f18af69e28b0336fe2913b57593..8547bd7699cbf20beb8e2eb3eb23d5f59fa0898e 100644 --- a/doc/rcfs.tex +++ b/doc/rcfs.tex @@ -1751,9 +1751,9 @@ By substituting $\bm{x}_{{t+1}}=\bm{A}_t\bm{x}_t+\bm{B}_t\bm{u}_t$ into \eqref{e An optimal control command $\bm{\hat{u}}_t$ can be computed by differentiating \eqref{eq:ct1} with respect to $\bm{u}_t$ and equating to zero, providing a feedback law \begin{equation} - \bm{\hat{u}}_t = -\bm{K}_t \, \bm{x}_t, + \bm{\hat{u}}_t = \bm{K}_t \, \bm{x}_t, \quad\text{with}\quad - \bm{K}_t = \bm{Q}_{\bm{u}\bm{u},t}^{-1} \, \bm{Q}_{\bm{u}\bm{x},t}. + \bm{K}_t = -\bm{Q}_{\bm{u}\bm{u},t}^{-1} \, \bm{Q}_{\bm{u}\bm{x},t}. \label{eq:uLQRrecursive} \end{equation} @@ -1767,7 +1767,7 @@ By introducing \eqref{eq:uLQRrecursive} back into \eqref{eq:ct1}, the resulting We observe that \eqref{eq:V} has the same quadratic form as \eqref{eq:vhatT}, so that \eqref{eq:qtLQR} can be solved recursively. We thus obtain a backward recursion procedure in which $\bm{V}_t$ is evaluated recursively from $t=T-1$ to $t=1$, starting from $\bm{V}_T=\bm{Q}_T$, which corresponds to a Riccati equation. -After all feedback gain matrices $\bm{K}_t$ have been computed by backward recursion, a forward recursion can be used to compute the evolution of the state, starting from $\bm{x}_1$, by using the linear system $\bm{x}_{t+1}=\bm{A}_t\bm{x}_t+\bm{B}_t\bm{u}_t$ and the control policy $\bm{u}_t = -\bm{K}_t \bm{x}_t$, see Algorithm \ref{alg:LQRrecursive} for the summary of the overall procedure. +After all feedback gain matrices $\bm{K}_t$ have been computed by backward recursion, a forward recursion can be used to compute the evolution of the state, starting from $\bm{x}_1$, by using the linear system $\bm{x}_{t+1}=\bm{A}_t\bm{x}_t+\bm{B}_t\bm{u}_t$ and the control policy $\bm{u}_t = \bm{K}_t \bm{x}_t$, see Algorithm \ref{alg:LQRrecursive} for the summary of the overall procedure. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1816,8 +1816,8 @@ where the augmented form in \eqref{eq:cBatchAugm} has the same form as the stand For a tracking problem, we can see that the resulting optimal control policy takes the form \begin{equation} - \bm{\hat{u}}_t = -\bm{\tilde{K}}_t \; \bm{\tilde{x}}_t - = \bm{K}_t \, (\bm{\mu}_t - \bm{x}_t) + \bm{u}^\text{ff}_t, + \bm{\hat{u}}_t = \bm{\tilde{K}}_t \; \bm{\tilde{x}}_t + = \bm{K}_t \, (\bm{x}_t - \bm{\mu}_t) + \bm{u}^\text{ff}_t, \label{eq:uLQTrecursive} \end{equation} characterized by a feedback gain matrix $\bm{K}_t$ extracted from $\bm{\tilde{K}}_t = \big[\bm{K}_t, \bm{k}_t\big]$, and a feedforward term $\bm{u}^\text{ff}_t = -\bm{k}_t - \bm{K}_t \bm{\mu}_t$ depending on $\bm{\mu}_t$. @@ -1849,24 +1849,24 @@ whose solution is corresponding to open loop control commands. We can see in \eqref{eq:LQRsolution} that the open loop control commands $\bm{\hat{u}}$ (in a vector form) is linear with respect to $\bm{x}_1$. -Thus, by introducing a matrix $\bm{F}$ to describe $\bm{u}=-\bm{F}\bm{x}_1$, we can alternatively define the optimization problem as +Thus, by introducing a matrix $\bm{F}$ to describe $\bm{u}=\bm{F}\bm{x}_1$, we can alternatively define the optimization problem as \begin{equation*} \min_{\bm{F}} \;\; \bm{x}^{\!\trsp} \bm{Q} \bm{x} + - (-\bm{F}\bm{x}_1)^{\!\trsp} \,\bm{R}\, (-\bm{F}\bm{x}_1), + (\bm{F}\bm{x}_1)^{\!\trsp} \,\bm{R}\, \bm{F}\bm{x}_1, \quad\text{s.t.}\quad - \bm{x}=(\bm{S}_{\bm{x}}-\bm{S}_{\bm{u}}\bm{F})\bm{x}_1, + \bm{x}=(\bm{S}_{\bm{x}}+\bm{S}_{\bm{u}}\bm{F})\bm{x}_1, \end{equation*} whose least squares solution is \begin{equation} - \bm{F} = {\big(\bm{S}_{\bm{u}}^\trsp \bm{Q} \bm{S}_{\bm{u}} + \bm{R}\big)}^{-1} + \bm{F} = -{\big(\bm{S}_{\bm{u}}^\trsp \bm{Q} \bm{S}_{\bm{u}} + \bm{R}\big)}^{-1} \bm{S}_{\bm{u}}^\trsp \bm{Q} \bm{S}_{\bm{x}}. \label{eq:LQR_LS_F} \end{equation} By decomposing $\bm{F}$ as block matrices $\bm{F}_t$ with $t\in\{1,\ldots,T-1\}$, $\bm{F}$ can be used to iteratively reconstruct regulation gains $\bm{K}_t$, by starting from $\bm{K}_1=\bm{F}_1$, $\bm{P}_1=\bm{I}$, and by computing recursively \begin{equation} - \bm{P}_t = \bm{P}_{t-1} {(\bm{A}_{t-1} - \bm{B}_{t-1} \bm{K}_{t-1})}^{-1}, \quad + \bm{P}_t = \bm{P}_{t-1} {(\bm{A}_{t-1} + \bm{B}_{t-1} \bm{K}_{t-1})}^{-1}, \quad \bm{K}_t = \bm{F}_t \; \bm{P}_t, \label{eq:K_from_F} \end{equation} @@ -2176,6 +2176,7 @@ and Hessian matrices %with gradients $\{\frac{\partial c}{\partial\bm{x}_t}, \frac{\partial c}{\partial\bm{u}_t}\}$, and Hessian matrices $\{\frac{\partial^2 c}{\partial\bm{x}_t^2}, \frac{\partial^2 c}{\partial\bm{x}_t\bm{u}_t}, \frac{\partial^2c}{\partial\bm{u}_t^2}\}$. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Batch formulation of iLQR}\label{sec:iLQRbatch} \begin{flushright} @@ -2187,7 +2188,7 @@ A solution in batch form can be computed by minimizing over $\bm{u}\!=\!{\begin{ At a trajectory level, we denote $\bm{x}\!=\!{\begin{bmatrix}\bm{x}_1^\trsp, \bm{x}_2^\trsp, \ldots, \bm{x}_T^\trsp \end{bmatrix}}^\trsp$ %\!\in\!\mathbb{R}^{DCT} the evolution of the state and $\bm{u}\!=\!{\begin{bmatrix}\bm{u}_1^\trsp, \bm{u}_2^\trsp, \ldots, \bm{u}_{T-1}^\trsp \end{bmatrix}}^\trsp$ %\!\in\!\mathbb{R}^{d(T-1)} the evolution of the control commands. -The evolution of the state in \eqref{eq:DS} becomes $\Delta\bm{x} = \bm{S}_{\bm{u}} \Delta\bm{u}$, see Appendix \ref{app:Su} for details.\footnote{Note that $\bm{S}_{\bm{x}} \Delta\bm{x}_1\!=\!\bm{0}$ because $\Delta\bm{x}_1\!=\!\bm{0}$ (as we want our motion to start from $\bm{x}_1$).} +The evolution of the state in \eqref{eq:DS} becomes $\Delta\bm{x} = \bm{S}_{\bm{u}} \Delta\bm{u} + \bm{S}_{\bm{x}} \Delta\bm{x}_1$, see Appendix \ref{app:Su} for details. The minimization problem can then be rewritten in batch form as \begin{equation} @@ -2214,30 +2215,48 @@ The minimization problem can then be rewritten in batch form as \end{equation} By inserting the constraint into the cost, we obtain the optimization problem -\begin{equation} - \min_{\Delta\bm{u}} \quad - \Delta\bm{u}^\trsp \bm{S}_{\bm{u}}^\trsp \bm{g}_{\bm{x}} + \Delta\bm{u}^\trsp \bm{g}_{\bm{u}} + - \frac{1}{2} \Delta\bm{u}^\trsp \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{x}\bm{x}} \bm{S}_{\bm{u}} \Delta\bm{u} + - \Delta\bm{u}^\trsp \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{u}\bm{x}}^\trsp \Delta\bm{u} + - \frac{1}{2} \Delta\bm{u}^\trsp \bm{H}_{\bm{u}\bm{u}} \Delta\bm{u}, +\begin{multline} + \min_{\Delta\bm{u}} \quad + \Delta\bm{u}^\trsp \big( \bm{S}_{\bm{u}}^\trsp \bm{g}_{\bm{x}} + \bm{g}_{\bm{u}} \big) + + \frac{1}{2} \Delta\bm{u}^\trsp + \big( \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{xx}}\bm{S}_{\bm{u}} + \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{ux}}^\trsp + \bm{H}_{\bm{uu}} \big) \Delta\bm{u} + + \Delta\bm{u}^\trsp \big( \bm{H}_{\bm{ux}} \bm{S}_{\bm{x}} + \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{xx}}\bm{S}_{\bm{x}} \big) \Delta\bm{x}_1 + \\ + + \frac{1}{2} \Delta\bm{x}_1^\trsp \bm{S}_{\bm{x}}^\trsp \bm{H}_{\bm{xx}}\bm{S}_{\bm{x}} \Delta\bm{x}_1 + + \Delta\bm{x}_1^\trsp \bm{S}_{\bm{x}}^\trsp \bm{g}_{\bm{x}} + , \label{eq:minu} -\end{equation} +\end{multline} which can be solved analytically by differentiating with respect to $\Delta\bm{u}$ and equating to zero, namely, \begin{equation} - \bm{S}_{\bm{u}}^\trsp \bm{g}_{\bm{x}} + \bm{g}_{\bm{u}} +\\ - \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{x}\bm{x}} \bm{S}_{\bm{u}} \Delta\bm{u} + - 2 \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{u}\bm{x}}^\trsp \Delta\bm{u} + - \bm{H}_{\bm{u}\bm{u}} \Delta\bm{u} = 0, + \bm{S}_{\bm{u}}^\trsp \bm{g}_{\bm{x}} + \bm{g}_{\bm{u}} + + \big( \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{xx}}\bm{S}_{\bm{u}} + \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{ux}}^\trsp + \bm{H}_{\bm{uu}} \big) \Delta\bm{u} + + \big( \bm{H}_{\bm{ux}} \bm{S}_{\bm{x}} + \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{xx}}\bm{S}_{\bm{x}} \big) \Delta\bm{x}_1 + = 0, \end{equation} providing the least squares solution +\begin{align} + \Delta\bm{\hat{u}} &= + - {\Big( \underbrace{\bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{x}\bm{x}} \bm{S}_{\bm{u}} + \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{u}\bm{x}}^\trsp + + \bm{H}_{\bm{u}\bm{u}}}_{\bm{Q}_{\bm{u}\bm{u}}} \Big)}^{-1} + \Big( + \big( \underbrace{\bm{H}_{\bm{ux}} \bm{S}_{\bm{x}} + \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{xx}}\bm{S}_{\bm{x}}}_{\bm{Q}_{\bm{u}\bm{x}}} \big) + \Delta\bm{x}_1 + + \underbrace{\bm{S}_{\bm{u}}^\trsp \bm{g}_{\bm{x}} + \bm{g}_{\bm{u}}}_{\bm{q}_{\bm{u}}} + \Big) + \nonumber\\ + & = - \; \bm{Q}_{\bm{u}\bm{u}}^{-1} \; (\bm{Q}_{\bm{u}\bm{x}}\Delta\bm{x}_1 + \bm{q}_{\bm{u}}). + \label{eq:du_general0} +\end{align} + +As a batch formulation, we seek for a motion to starting from $\bm{x}_1$, meaning that $\Delta\bm{x}_1\!=\!\bm{0}$, which further simplifies the above equation to \begin{equation} - \Delta\bm{\hat{u}} = - {\left( \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{x}\bm{x}} \bm{S}_{\bm{u}} + 2 \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{u}\bm{x}}^\trsp + \bm{H}_{\bm{u}\bm{u}} \right)}^{-1} - \left(-\bm{S}_{\bm{u}}^\trsp \bm{g}_{\bm{x}} - \bm{g}_{\bm{u}}\right), + \Delta\bm{\hat{u}} = - \; \bm{Q}_{\bm{u}\bm{u}}^{-1} \; \bm{q}_{\bm{u}} \label{eq:du_general} \end{equation} which can be used to update the control commands estimate at each iteration step of the iLQR algorithm. + \begin{figure} \centering \includegraphics[width=.9\columnwidth]{images/iLQR-eqs-overview01.png} @@ -2434,61 +2453,40 @@ The complete iLQR procedure is described in Algorithm \ref{alg:iLQRrecursive} (i %\label{eq:qt} %\end{equation} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Least squares formulation of recursive iLQR}\label{sec:iLQRLS} \begin{flushright} \filename{iLQR\_manipulator\_recursive\_LS.*} \end{flushright} -First, note that \eqref{eq:duhat} can be rewritten as -\begin{equation} - \Delta\bm{\hat{u}}_t = \bm{\tilde{K}}_t \, \begin{bmatrix} \Delta\bm{x}_t \\ 1 \end{bmatrix}, - \;\text{with}\quad - \bm{\tilde{K}}_t = - \bm{Q}_{\bm{u}\bm{u},t}^{-1} \Big[ \bm{Q}_{\bm{u}\bm{x},t}, \; \bm{q}_{\bm{u},t} \Big]. -\label{eq:duhat2} -\end{equation} +First, we rewrite the batch solution of \eqref{eq:du_general0} as +\begin{align} + \Delta\bm{\hat{u}} &= -\; \bm{Q}_{\bm{u}\bm{u}}^{-1} \; (\bm{Q}_{\bm{u}\bm{x}}\Delta\bm{x}_1 + \bm{q}_{\bm{u}}) + \\ + &= \bm{F} \; \Delta\bm{\tilde{x}}_1 + \quad\text{with}\quad + \bm{F} = - \; \bm{Q}_{\bm{u}\bm{u}}^{-1} \big[\; \bm{Q}_{\bm{u}\bm{x}}, \; \bm{q}_{\bm{u}} \big] + \;,\quad + \Delta\bm{\tilde{x}}_1 = \begin{bmatrix} \Delta\bm{x}_1 \\ 1 \end{bmatrix}. +\end{align} -Also, \eqref{eq:minu} can be rewritten as +As in Section \ref{sec:LQRLS}, we then decompose $\bm{F}$ as block matrices $\bm{F}_t$ with $t\in\{1,\ldots,T-1\}$. $\bm{F}$ can then be used to iteratively reconstruct regulation gains $\bm{\tilde{K}}_t$, by starting from $\bm{\tilde{K}}_1=\bm{F}_1$, $\bm{P}_1=\bm{I}$, and by computing with forward recursion \begin{equation*} - \min_{\Delta\bm{u}} \quad \underbrace{\frac{1}{2} - \begin{bmatrix} \Delta\bm{u} \\ 1 \end{bmatrix}^\trsp - \begin{bmatrix} \bm{C} & \bm{c} \\ \bm{c}^\trsp & 0 \end{bmatrix} - \begin{bmatrix} \Delta\bm{u} \\ 1 \end{bmatrix}}_{\frac{1}{2} \Delta\bm{u}^\trsp \bm{C} \Delta\bm{u} + \Delta\bm{u}^\trsp \bm{c}}, - \quad\text{where}\quad - \left\{ - \begin{aligned} - \bm{c} &= \bm{S}_{\bm{u}}^\trsp \bm{g}_{\bm{x}} + \bm{g}_{\bm{u}}, \\ - \bm{C} &= \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{x}\bm{x}} \bm{S}_{\bm{u}} - + 2 \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{u}\bm{x}}^\trsp - + \bm{H}_{\bm{u}\bm{u}}. - \end{aligned} - \right. + \bm{P}_t = \bm{P}_{t-1} {(\bm{A}_{t-1} + \bm{B}_{t-1} \bm{\tilde{K}}_{t-1})}^{-1}, \quad + \bm{\tilde{K}}_t = \bm{F}_t \; \bm{P}_t, \end{equation*} - -Similarly to Section \ref{sec:LQRLS}, we set -\begin{equation} - \Delta\bm{u} = -\bm{F} \begin{bmatrix} \Delta\bm{x}_1 \\ 1 \end{bmatrix} - = -\bm{F} \begin{bmatrix} \bm{0} \\ 1 \end{bmatrix} - = -\bm{F} \Delta\bm{\tilde{x}}_1, -\end{equation} -and redefine the optimization problem as -\begin{equation} - \min_{\bm{F}} \quad \frac{1}{2} \Delta\bm{\tilde{x}}_1^\trsp \bm{F}^\trsp \bm{C} \bm{F} \Delta\bm{\tilde{x}}_1 - - \Delta\bm{\tilde{x}}_1^\trsp \bm{F}^\trsp \bm{c}. -\end{equation} - -By differentiating w.r.t.\ $\bm{F}$ and equating to zero, we get -\begin{equation} - \bm{F} \Delta\bm{\tilde{x}}_1 = \bm{C}^{-1} \bm{c}. -\end{equation} - -Similarly to Section \ref{sec:LQRLS}, we decompose $\bm{F}$ as block matrices $\bm{F}_t$ with $t\in\{1,\ldots,T-1\}$. $\bm{F}$ can then be used to iteratively reconstruct regulation gains $\bm{K}_t$, by starting from $\bm{K}_1=\bm{F}_1$, $\bm{P}_1=\bm{I}$, and by computing with forward recursion -\begin{equation} - \bm{P}_t = \bm{P}_{t-1} {(\bm{A}_{t-1} - \bm{B}_{t-1} \bm{K}_{t-1})}^{-1}, \quad - \bm{K}_t = \bm{F}_t \; \bm{P}_t, -\end{equation} from $t=2$ to $t=T-1$. +These gains provide the controller +\begin{align} + \Delta\bm{\hat{u}}_t &= \bm{\tilde{K}}_t \, \begin{bmatrix} \Delta\bm{x}_t \\ 1 \end{bmatrix} + \nonumber\\ + & = \bm{k}_t + \bm{K}_t \, \Delta\bm{x}_t, + \label{eq:duhat2} +\end{align} +with a formulation as in \eqref{eq:duhat}, where $\bm{k}_t$ is a feedforward command and $\bm{K}_t$ is a feedback gain matrix. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Updates by considering step sizes} diff --git a/matlab/LQT_recursive.m b/matlab/LQT_recursive.m index c396f8230e10be81e32855872c58ca68dd28de1f..f8466840f63be0a44d5cee52f4dac4778884f37e 100644 --- a/matlab/LQT_recursive.m +++ b/matlab/LQT_recursive.m @@ -78,8 +78,8 @@ for n=1:2 if t==25 && n==2 x = x + xn; %Simulated noise on the state end - Ka = (B' * P(:,:,t) * B + Ra) \ B' * P(:,:,t) * A; %Feedback gain - u = -Ka * x; %Feedback control on augmented state (resulting in feedforward part and feedback on state) + Ka = -(B' * P(:,:,t) * B + Ra) \ B' * P(:,:,t) * A; %Feedback gain + u = Ka * x; %Feedback control on augmented state (resulting in feedforward part and feedback on state) % u_ff = -Ka(:,end); %Feedforward part on state % u = u_ff - Ka(:,1:end-1) * x(1:end-1); %Feedforward and feedback part on state diff --git a/matlab/LQT_recursive_LS.m b/matlab/LQT_recursive_LS.m index 7a0c4379fe2b1b341da7f0c1960c3fb940dc217f..d1d94e4260c7828cc9425ecf800a3646ea24e31e 100644 --- a/matlab/LQT_recursive_LS.m +++ b/matlab/LQT_recursive_LS.m @@ -81,13 +81,13 @@ Ra = speye((param.nbData-1)*param.nbVarU) * param.r; %Control cost matrix %xn = [randn(param.nbVarU, 1) * 5E0; randn(param.nbVar-param.nbVarU, 1) * 0; 0]; %Simulated noise on state xn = [-1; -.1; zeros(param.nbVarX-param.nbVarU, 1)]; %Simulated noise on state -F = (Su' * Qa * Su + Ra) \ Su' * Qa * Sx; +F = -(Su' * Qa * Su + Ra) \ Su' * Qa * Sx; Ka(:,:,1) = F(1:param.nbVarU,:); P = eye(param.nbVarX); for t=2:param.nbData-1 id = (t-1)*param.nbVarU + [1:param.nbVarU]; - P = P / (A - B * Ka(:,:,t-1)); + P = P / (A + B * Ka(:,:,t-1)); Ka(:,:,t) = F(id,:) * P; %Feedback gain on augmented state end %Reproduction with feedback controller on augmented state @@ -97,9 +97,9 @@ for n=1:2 if t==25 && n==2 x = x + xn; %Simulated noise on the state end - u = -Ka(:,:,t) * x; %Feedback control on augmented state (resulting in feedforward part and feedback on state) -% u_ff = -Ka(:,end,t); %Feedforward part on state -% u = u_ff - Ka(:,1:end-1,t) * x(1:end-1); %Feedforward and feedback part on state + u = Ka(:,:,t) * x; %Feedback control on augmented state (resulting in feedforward part and feedback on state) +% u_ff = Ka(:,end,t); %Feedforward part on state +% u = u_ff -Ka(:,1:end-1,t) * x(1:end-1); %Feedforward and feedback part on state x = A * x + B * u; %Update of state vector r(n).x(:,t) = x; %Log data diff --git a/matlab/LQT_recursive_LS_multiAgents.m b/matlab/LQT_recursive_LS_multiAgents.m index b0d53f684d4b5da277c8c86e674144852bf6719b..baec924f8febf8df3dd25aec95ab7794cd1c0a54 100644 --- a/matlab/LQT_recursive_LS_multiAgents.m +++ b/matlab/LQT_recursive_LS_multiAgents.m @@ -87,20 +87,20 @@ xn = [-1; 1; 0; 0; zeros(param.nbVarX-param.nbVarU, 1)]; %Simulated noise on sta tn = round(param.nbData / 4); -F = (Su' * Qa * Su + Ra) \ Su' * Qa * Sx; +F = -(Su' * Qa * Su + Ra) \ Su' * Qa * Sx; Ka(:,:,1) = F(1:param.nbVarU,:); P = eye(param.nbVarX); for t=2:param.nbData-1 id = (t-1)*param.nbVarU + [1:param.nbVarU]; - P = P / (A - B * Ka(:,:,t-1)); + P = P / (A + B * Ka(:,:,t-1)); Ka(:,:,t) = F(id,:) * P; %Feedback gain on augmented state end %Reproduction with feedback controller on augmented state for n=1:2 x = [-1; 0; 1; 0; zeros(param.nbVar-param.nbVarU,1); 1]; %Augmented state space for t=1:param.nbData-1 - u = -Ka(:,:,t) * x; %Feedback control on augmented state (resulting in feedback and feedforward terms on state) + u = Ka(:,:,t) * x; %Feedback control on augmented state (resulting in feedback and feedforward terms on state) x = A * x + B * u; %Update of state vector if t==tn && n==2 x = x + xn; %Simulated noise on the state diff --git a/matlab/iLQR_manipulator_recursive_LS.m b/matlab/iLQR_manipulator_recursive_LS.m index e8a6910b11a29eade09228b091b84afb30dd94b1..d33acfe3525619db45feab0cb82ae456b7678807 100644 --- a/matlab/iLQR_manipulator_recursive_LS.m +++ b/matlab/iLQR_manipulator_recursive_LS.m @@ -68,14 +68,14 @@ for n=1:param.nbMaxIter Quu = Su' * J' * Q * J * Su + R; Qux = Su' * J' * Q * J * Sx; qu = Su' * J' * Q * f(:) + R * uref(:); - F = Quu \ [Qux, qu]; + F = -Quu \ [Qux, qu]; %Forward pass Ka(:,:,1) = F(1:param.nbVarU,:); P = eye(param.nbVarX+1); for t=2:param.nbData-1 id = (t-1)*param.nbVarU + [1:param.nbVarU]; - P = P / (Aa - Ba * Ka(:,:,t-1)); + P = P / (Aa + Ba * Ka(:,:,t-1)); Ka(:,:,t) = F(id,:) * P; %Feedback gain on augmented state end @@ -85,8 +85,8 @@ for n=1:param.nbMaxIter while 1 xtmp(:,1) = x0; for t=1:param.nbData-1 - k(:,t) = -Ka(:,end,t); - K(:,:,t) = -Ka(:,1:end-1,t); + k(:,t) = Ka(:,end,t); + K(:,:,t) = Ka(:,1:end-1,t); du(:,t) = k(:,t) + K(:,:,t) * (xtmp(:,t) - xref(:,t)); utmp(:,t) = uref(:,t) + du(:,t) * alpha; xtmp(:,t+1) = A * xtmp(:,t) + B * utmp(:,t); %System evolution diff --git a/python/LQT_recursive.py b/python/LQT_recursive.py index bb10eedb80bb1ed8e009e77b0410f7a6bc633a70..1b6f585df05aa09f07331696c292a4a40f8b9760 100644 --- a/python/LQT_recursive.py +++ b/python/LQT_recursive.py @@ -11,6 +11,7 @@ License: GPL-3.0-only import numpy as np import matplotlib.pyplot as plt +import math # General parameters # =============================== @@ -33,12 +34,12 @@ R = np.eye(param.nbVarPos) * param.rfactor # Dynamical System settings (discrete version) A1d = np.zeros((param.nbDeriv, param.nbDeriv)) for i in range(param.nbDeriv): - A1d += np.diag(np.ones((param.nbDeriv - i,)), i) * param.dt ** i / np.math.factorial(i) # Discrete 1D + A1d += np.diag(np.ones((param.nbDeriv - i,)), i) * param.dt ** i / math.factorial(i) # Discrete 1D B1d = np.zeros((param.nbDeriv, 1)) for i in range(0, param.nbDeriv): - B1d[param.nbDeriv - i - 1, :] = param.dt ** (i + 1) * 1 / np.math.factorial(i + 1) # Discrete 1D + B1d[param.nbDeriv - i - 1, :] = param.dt ** (i + 1) * 1 / math.factorial(i + 1) # Discrete 1D A0 = np.kron(A1d, np.eye(param.nbVarPos)) # Discrete nD B0 = np.kron(B1d, np.eye(param.nbVarPos)) # Discrete nD @@ -113,8 +114,8 @@ for n in range(2): x = np.hstack([ np.zeros(param.nbVar) , 1 ]) for t in range(param.nbData): Z_bar = B.T @ P[:, :, t] @ B + R - K = np.linalg.inv(Z_bar.T @ Z_bar) @ Z_bar.T @ B.T @ P[:, :, t] @ A # Feedback gain - u = -K @ x # Acceleration command with FB on augmented state (resulting in feedback and feedforward terms) + K = -np.linalg.inv(Z_bar.T @ Z_bar) @ Z_bar.T @ B.T @ P[:, :, t] @ A # Feedback gain + u = K @ x # Acceleration command with FB on augmented state (resulting in feedback and feedforward terms) x = A @ x + B @ u # Update of state vector if t==25 and n==1: diff --git a/python/LQT_recursive_LS.py b/python/LQT_recursive_LS.py index 9e007503268fd38187a3e8a92f8d9d773cf597fc..442fa671c73d0383bf63709bd429610712a44748 100644 --- a/python/LQT_recursive_LS.py +++ b/python/LQT_recursive_LS.py @@ -74,13 +74,13 @@ for i in range(1,param.nbData): # Build recursive least squares solution for the feedback gains, using u_k = - F_k @ x0 = - K_k @ x_k # ===================================== -F = np.linalg.inv(Su.T @ Q @ Su + R) @ Su.T @ Q @ Sx +F = -np.linalg.inv(Su.T @ Q @ Su + R) @ Su.T @ Q @ Sx K = np.zeros((param.nbData-1, param.nbVarPos, param.nbVar)) P = np.eye(param.nbVar) K[0] = F[:param.nbVarPos] for k in range(1, param.nbData-1): k_slice = slice(k*param.nbVarPos, (k+1)*param.nbVarPos) - P = P @ np.linalg.inv(A - B @ K[k-1]) + P = P @ np.linalg.inv(A + B @ K[k-1]) K[k] = F[k_slice] @ P @@ -96,7 +96,7 @@ X_batch = [] X_batch.append(x[:-1,:].copy()) for k in range(param.nbData-1): - u = - K[k] @ x + u = K[k] @ x x = A @ x + B @ (u + np.random.normal(0, 1e+1, (param.nbVarPos, num_repro))) X_batch.append(x[:-1,:].copy()) diff --git a/python/LQT_recursive_LS_multiAgents.py b/python/LQT_recursive_LS_multiAgents.py index 24903d71943cfc8031c7545b5a457ff5f300337a..a9c862deeba2dec311918dc9284d67fb2e68ab33 100644 --- a/python/LQT_recursive_LS_multiAgents.py +++ b/python/LQT_recursive_LS_multiAgents.py @@ -99,13 +99,13 @@ for i in range(1,param.nbData): # Build recursive least squares solution for the feedback gains, using u_k = - F_k @ x0 = - K_k @ x_k # ===================================== -F = np.linalg.inv(Su.T @ Q @ Su + R) @ Su.T @ Q @ Sx +F = -np.linalg.inv(Su.T @ Q @ Su + R) @ Su.T @ Q @ Sx K = np.zeros((param.nbData-1, param.nbVarU, param.nbVar)) P = np.eye(param.nbVar) K[0] = F[:param.nbVarU] for k in range(1, param.nbData-1): k_slice = slice(k*param.nbVarU, (k+1)*param.nbVarU) - P = P @ np.linalg.inv(A - B @ K[k-1]) + P = P @ np.linalg.inv(A + B @ K[k-1]) K[k] = F[k_slice] @ P @@ -122,7 +122,7 @@ X_batch = [] X_batch.append(x[:-1,:].copy()) for k in range(param.nbData-1): - u = - K[k] @ x + u = K[k] @ x x = A @ x + B @ (u + np.random.normal(0, 5e+0, (param.nbVarU, num_repro))) X_batch.append(x[:-1,:].copy())