diff --git a/doc/rcfs.pdf b/doc/rcfs.pdf index e5821b12433ebdd56668995dadd79e7fbab208ad..c7239615d9077cbd89a3f057e68e7aabaedc94c2 100644 Binary files a/doc/rcfs.pdf and b/doc/rcfs.pdf differ diff --git a/doc/rcfs.tex b/doc/rcfs.tex index 7f9735c7cc9be7d89192a06a59377ca9405baf48..5f0196725bc48205d4df8b341d9d3d2d20fed50e 100644 --- a/doc/rcfs.tex +++ b/doc/rcfs.tex @@ -1,5 +1,5 @@ -\documentclass[10pt,a4paper,twocolumn]{article} -\usepackage{graphicx,amsmath,amssymb,bm,xcolor,soul,nicefrac,listings,algorithm2e,dsfont} +\documentclass[10pt,a4paper]{article} %twocolumn +\usepackage{graphicx,amsmath,amssymb,bm,xcolor,soul,nicefrac,listings,algorithm2e,dsfont,caption,subcaption,wrapfig,sidecap} %\usepackage[hidelinks]{hyperref} %pseudocode @@ -32,22 +32,22 @@ \makeatother \lstdefinestyle{mystyle}{ - backgroundcolor=\color{backcolour}, + backgroundcolor=\color{backcolour}, commentstyle=\color{codegreen}, keywordstyle=\color{magenta}, numberstyle=\tiny\color{codegray}, stringstyle=\color{codepurple}, basicstyle=\ttfamily\scriptsize, - breakatwhitespace=false, - breaklines=true, - captionpos=b, - keepspaces=true, - numbersep=5pt, - numbers=left, - numbersep=5pt, - showspaces=false, + breakatwhitespace=false, + breaklines=true, + captionpos=b, + keepspaces=true, + numbersep=5pt, + numbers=left, + numbersep=5pt, + showspaces=false, showstringspaces=false, - showtabs=false, + showtabs=false, tabsize=2 } \lstset{style=mystyle} @@ -81,6 +81,15 @@ % pdfpagemode=FullScreen, %} +%\usepackage{hyperref} +%\hypersetup{ +% colorlinks, +% citecolor=black, +% filecolor=black, +% linkcolor=black, +% urlcolor=black +%} + \title{\huge A practical guide to learning and control problems in robotics\\solved with second-order optimization} \author{Sylvain Calinon, Idiap Research Institute} \date{} @@ -89,7 +98,7 @@ \begin{document} \maketitle - +\tableofcontents %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Introduction} @@ -115,16 +124,15 @@ A univariate trajectory $\bm{x}^\tp{1D}\in\mathbb{R}^T$ of $T$ datapoints can be \end{equation} A multivariate trajectory $\bm{x}\in\mathbb{R}^{DT}$ of $T$ datapoints of dimension $D$ can similarly be computed as -\begin{align} - \bm{x} &= \sum_{k=1}^{K} \bm{\Psi}_{k} \; w_k = \bm{\Psi} \; \bm{w}, - \label{eq:Psi}\\ - \text{with}\; \bm{\Psi} &= \bm{\phi} \otimes \bm{C} = \left[\begin{matrix} +\begin{equation} + \bm{x} = \sum_{k=1}^{K} \bm{\Psi}_{k} \; w_k = \bm{\Psi} \; \bm{w}, + \quad\text{with}\quad \bm{\Psi} = \bm{\phi} \otimes \bm{C} = \left[\begin{matrix} \bm{C}\phi_{1,1} & \bm{C}\phi_{2,1} & \cdots & \bm{C}\phi_{K,1} \\ \bm{C}\phi_{1,2} & \bm{C}\phi_{2,2} & \cdots & \bm{C}\phi_{K,2} \\ \vdots & \vdots & \ddots & \vdots \\ \bm{C}\phi_{1,T} & \bm{C}\phi_{2,T} & \cdots & \bm{C}\phi_{K,T} - \end{matrix}\right],\nonumber -\end{align} + \end{matrix}\right], \label{eq:Psi} +\end{equation} where $\otimes$ the Kronecker product operator and $\bm{C}$ is a coordination matrix that can for example be set to identity. Such encoding aims at encoding the movement as a weighted superposition of simpler movements, whose compression aims at working in a subspace of reduced dimensionality, while denoising the signal and capturing the essential aspects of a movement. This can be illustrated as basic building blocks that can be differently assembled to form more elaborated movements, often referred to as \emph{movement primitives} (MP). @@ -137,10 +145,17 @@ In \eqref{eq:Psi}, $\bm{\phi}$ can be any set of basis functions, including some \begin{figure} \centering -\includegraphics[width=\columnwidth]{images/MP_basisFcts01.png}\\[8mm] -\includegraphics[width=\columnwidth]{images/MP_basisFcts_mat01.png} +\begin{subfigure}[c]{.45\textwidth} + \centering + \includegraphics[width=\textwidth]{images/MP_basisFcts01.png} +\end{subfigure} +\hfill +\begin{subfigure}[c]{.45\textwidth} + \centering + \includegraphics[width=\textwidth]{images/MP_basisFcts_mat01.png} +\end{subfigure} \caption{\footnotesize -Examples of basis functions. \emph{top:} Representation in timeline form for $K=5$ and $T=300$. \emph{Bottom:} Representation in matrix form for $K=5$ and $T=20$, with a grayscale colormap where white pixels are $0$ and black pixels are $1$. +Examples of basis functions. \emph{Left:} Representation in timeline form for $K=5$ and $T=300$. \emph{Right:} Representation in matrix form for $K=5$ and $T=20$, with a grayscale colormap where white pixels are $0$ and black pixels are $1$. } \label{fig:MP} \end{figure} @@ -157,10 +172,8 @@ where $\bm{I}_K$ is an identity matrix of size $K$ and $\bm{1}_{\frac{T}{K}}$ is Gaussian radial basis functions can be computed in matrix form as \begin{equation} -\begin{aligned} - \bm{\phi} &= \exp(-\lambda \; \bm{E} \odot \bm{E}),\\ - \text{with}\quad \bm{E} &= \bm{t} \; \bm{1}_K^\trsp - \bm{1}_T \; {\bm{\mu}^\tp{s}}^\trsp, -\end{aligned} + \bm{\phi} = \exp(-\lambda \; \bm{E} \odot \bm{E}), + \quad\text{with}\quad \bm{E} = \bm{t} \; \bm{1}_K^\trsp - \bm{1}_T \; {\bm{\mu}^\tp{s}}^\trsp, \end{equation} where $\lambda$ is bandwidth parameter, $\odot$ is the elementwise (Hadamard) product operator, $\bm{t}\in\mathbb{R}^T$ is a vector with entries linearly spaced between $0$ to $1$, $\bm{\mu}^\tp{s}\in\mathbb{R}^K$ is a vector containing the RBF centers linearly spaced on the $[0,1]$ range, and the $\exp(\cdot)$ function is applied to each element of the matrix, see Fig.~\ref{fig:MP}. @@ -224,17 +237,30 @@ Indeed, we first note that $\exp(ai)$ is composed of a real part and an imaginar where we used the properties $\cos(0)=1$ and $\cos(-a)=\cos(a)$ of the cosine function. Since we do not need direct correspondences between the Fourier transform and discrete cosine transform as in the above, we can omit the scaling factors for $w_k$ and directly write the decomposition as in \eqref{eq:dct}. +\newpage + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Newton's method for minimization}\label{sec:Newton} -\begin{figure} +\begin{wrapfigure}{r}{.36\textwidth} +%\vspace{-20pt} \centering -\includegraphics[width=.7\columnwidth]{images/NewtonOptim01.jpg} +\includegraphics[width=.34\textwidth]{images/NewtonOptim01.jpg} \caption{\footnotesize Newton's method for minimization, starting from an initial estimate $x_1$ and converging to a local minimum (red point) after 5 iterations. } \label{fig:Newton} -\end{figure} +%\vspace{-20pt} +\end{wrapfigure} + +%\begin{figure} +%\centering +%\includegraphics[width=.7\columnwidth]{images/NewtonOptim01.jpg} +%\caption{\footnotesize +%Newton's method for minimization, starting from an initial estimate $x_1$ and converging to a local minimum (red point) after 5 iterations. +%} +%\label{fig:Newton} +%\end{figure} Newton's method attempts to solve $\min_x f(x)$ or $\max_x f(x)$ from an initial guess $x_1$ by using a sequence of second-order Taylor approximations of $f$ around the iterates, see Fig.~\ref{fig:Newton}. The second-order Taylor expansion of $f$ around $x_k$ is \begin{equation*} @@ -285,6 +311,7 @@ The update rule then becomes The Gauss--Newton algorithm is the workhorse of many robotics problems, including inverse kinematics and optimal control, as we will see in the next sections. +\newpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Forward kinematics (FK) for a planar robot manipulator}\label{sec:FK} @@ -292,14 +319,25 @@ The Gauss--Newton algorithm is the workhorse of many robotics problems, includin \filename{IK.*} \end{flushright} -\begin{figure} +%\begin{figure} +%\centering +%\includegraphics[width=.6\columnwidth]{images/f_ee01.png} +%\caption{\footnotesize +%Forward kinematics for a planar robot with two links. +%} +%\label{fig:FK} +%\end{figure} + +\begin{wrapfigure}{r}{.36\textwidth} +%\vspace{-20pt} \centering -\includegraphics[width=.6\columnwidth]{images/f_ee01.png} +\includegraphics[width=.34\textwidth]{images/f_ee01.png} \caption{\footnotesize -Forward kinematics for a planar robot with two links. +Forward kinematics for a planar robot with two links. } \label{fig:FK} -\end{figure} +%\vspace{-20pt} +\end{wrapfigure} The \emph{forward kinematics} (FK) function of a planar robot manipulator is defined as \begin{align*} @@ -327,23 +365,39 @@ The position and orientation of all articulations can similarly be computed with \end{bmatrix} \!\!, \label{eq:FKall} \end{align} -with -\begin{align*} - \tilde{f}_{1,1} &= \ell_1 \!\cos(x_1),\\ - \tilde{f}_{2,1} &= \ell_1 \sin(x_1),\\ - \tilde{f}_{3,1} &= x_1,\\[2mm] - \tilde{f}_{1,2} &= \ell_1 \!\cos(x_1) \!+\! \ell_2 \!\cos(x_1\!+\!x_2),\\ - \tilde{f}_{2,2} &= \ell_1 \sin(x_1) \!+\! \ell_2 \!\sin(x_1\!+\!x_2),\\ - \tilde{f}_{3,2} &= x_1 + x_2,\\[2mm] - \tilde{f}_{1,3} &= \ell_1 \!\cos(x_1) \!+\! \ell_2 \!\cos(x_1\!+\!x_2) \!+\! \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3),\\ - \tilde{f}_{2,3} &= \ell_1 \sin(x_1) \!+\! \ell_2 \!\sin(x_1\!+\!x_2) \!+\! \ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3),\\ - \tilde{f}_{3,3} &= x_1 + x_2 + x_3,\\ - \vdots & -\end{align*} +\begin{equation*} +\text{with}\quad +\begin{alignat*}{3} + & \tilde{f}_{1,1} = \ell_1 \!\cos(x_1), && + \tilde{f}_{1,2} = \ell_1 \!\cos(x_1) \!+\! \ell_2 \!\cos(x_1\!+\!x_2), && + \tilde{f}_{1,3} = \ell_1 \!\cos(x_1) \!+\! \ell_2 \!\cos(x_1\!+\!x_2) \!+\! \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3),\\ + & \tilde{f}_{2,1} = \ell_1 \sin(x_1), && + \tilde{f}_{2,2} = \ell_1 \sin(x_1) \!+\! \ell_2 \!\sin(x_1\!+\!x_2), && + \tilde{f}_{2,3} = \ell_1 \sin(x_1) \!+\! \ell_2 \!\sin(x_1\!+\!x_2) \!+\! \ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3), \quad\ldots\\ + & \tilde{f}_{3,1} = x_1, && + \tilde{f}_{3,2} = x_1 + x_2, && + \tilde{f}_{3,3} = x_1 + x_2 + x_3. +\end{alignat*} +\end{equation*} + +%\begin{align*} +% \tilde{f}_{1,1} &= \ell_1 \!\cos(x_1),\\ +% \tilde{f}_{2,1} &= \ell_1 \sin(x_1),\\ +% \tilde{f}_{3,1} &= x_1,\\[2mm] +% +% \tilde{f}_{1,2} &= \ell_1 \!\cos(x_1) \!+\! \ell_2 \!\cos(x_1\!+\!x_2),\\ +% \tilde{f}_{2,2} &= \ell_1 \sin(x_1) \!+\! \ell_2 \!\sin(x_1\!+\!x_2),\\ +% \tilde{f}_{3,2} &= x_1 + x_2,\\[2mm] +% +% \tilde{f}_{1,3} &= \ell_1 \!\cos(x_1) \!+\! \ell_2 \!\cos(x_1\!+\!x_2) \!+\! \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3),\\ +% \tilde{f}_{2,3} &= \ell_1 \sin(x_1) \!+\! \ell_2 \!\sin(x_1\!+\!x_2) \!+\! \ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3),\\ +% \tilde{f}_{3,3} &= x_1 + x_2 + x_3,\\ +% \vdots & +%\end{align*} In Python, this can be coded for the end-effector position part as -\begin{lstlisting}[language=Python] -D = 3 #State space dimension (joint angles) +\begin{lstlisting}[language=Python,linewidth=\textwidth] +D = 3 #State space dimension (joint angles) x = np.ones(D) * np.pi / D #Robot pose l = np.array([2, 2, 1]) #Links lengths L = np.tril(np.ones([D,D])) #Transformation matrix @@ -370,24 +424,42 @@ The Jacobian corresponding to the end-effector forward kinematics function can b \end{bmatrix} \!\!, \end{align*} with -\begin{align*} - J_{1,1} &= -\ell_1 \!\sin(x_1) \!-\! \ell_2 \!\sin(x_1\!+\!x_2) \!-\! \ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3) \!-\! \ldots,\\ - J_{2,1} &= \ell_1 \!\cos(x_1) \!+\! \ell_2 \!\cos(x_1\!+\!x_2) \!+\! \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3) \!+\! \ldots,\\ - J_{3,1} &= 1,\\[2mm] - J_{1,2} &= -\ell_2 \!\sin(x_1\!+\!x_2) \!-\! \ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3) \!-\! \ldots,\\ - J_{2,2} &= \ell_2 \!\cos(x_1\!+\!x_2) \!+\! \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3) \!+\! \ldots,\\ - J_{3,2} &= 1,\\[2mm] - J_{1,3} &= -\ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3) \!-\! \ldots,\\ - J_{2,3} &= \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3) \!+\! \ldots,\\ - J_{3,3} &= 1,\\ - \vdots & -\end{align*} -%J = [-sin(T * x)' * T * diag(model.l); ... -% cos(T * x)' * T * diag(model.l); ... -% ones(1, size(x,1))]; %x1,x2,o +\begin{equation*} +\scriptsize +\begin{alignat*}{3} + & J_{1,1} = -\ell_1 \!\sin(x_1) \!-\! \ell_2 \!\sin(x_1\!+\!x_2) \!-\! \ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3) \!-\! \ldots, + && J_{1,2} = -\ell_2 \!\sin(x_1\!+\!x_2) \!-\! \ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3) \!-\! \ldots, + && J_{1,3} = -\ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3) \!-\! \ldots, + \\ + & J_{2,1} = \ell_1 \!\cos(x_1) \!+\! \ell_2 \!\cos(x_1\!+\!x_2) \!+\! \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3) \!+\! \ldots, + && J_{2,2} = \ell_2 \!\cos(x_1\!+\!x_2) \!+\! \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3) \!+\! \ldots, + && J_{2,3} = \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3) \!+\! \ldots, \quad\ldots + \\ + & J_{3,1} = 1, + && J_{3,2} = 1, + && J_{3,3} = 1, +\end{alignat*} +\end{equation*} + +%with +%\begin{align*} +% J_{1,1} &= -\ell_1 \!\sin(x_1) \!-\! \ell_2 \!\sin(x_1\!+\!x_2) \!-\! \ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3) \!-\! \ldots,\\ +% J_{2,1} &= \ell_1 \!\cos(x_1) \!+\! \ell_2 \!\cos(x_1\!+\!x_2) \!+\! \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3) \!+\! \ldots,\\ +% J_{3,1} &= 1,\\[2mm] +% J_{1,2} &= -\ell_2 \!\sin(x_1\!+\!x_2) \!-\! \ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3) \!-\! \ldots,\\ +% J_{2,2} &= \ell_2 \!\cos(x_1\!+\!x_2) \!+\! \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3) \!+\! \ldots,\\ +% J_{3,2} &= 1,\\[2mm] +% J_{1,3} &= -\ell_3 \!\sin(x_1\!+\!x_2\!+\!x_3) \!-\! \ldots,\\ +% J_{2,3} &= \ell_3 \!\cos(x_1\!+\!x_2\!+\!x_3) \!+\! \ldots,\\ +% J_{3,3} &= 1,\\ +% \vdots & +%\end{align*} +%%J = [-sin(T * x)' * T * diag(model.l); ... +%% cos(T * x)' * T * diag(model.l); ... +%% ones(1, size(x,1))]; %x1,x2,o In Python, this can be coded for the end-effector position part as -\begin{lstlisting}[language=Python] +\begin{lstlisting}[language=Python,linewidth=\textwidth] J = np.array([-np.sin(L @ x).T @ np.diag(l) @ L, np.cos(L @ x).T @ np.diag(l) @ L]) #Jacobian (for end-effector) \end{lstlisting} @@ -428,12 +500,10 @@ The dynamic equation of a planar robot with an arbitrary number of links can be \end{bmatrix} = \begin{bmatrix} \sum_{k=1}^j l_k c_k \\ \sum_{k=1}^j l_k s_k \end{bmatrix}, - \label{eq:position} -\end{equation} -where -\begin{align} \nonumber + \quad\text{where}\quad c_k = \cos(\sum_{i=1}^{k} x_i), \ \ \ s_k = \sin(\sum_{i=1}^{k} x_i), -\end{align} + \label{eq:position} +\end{equation} whose corresponding velocity can be calculated as \begin{equation*} @@ -443,11 +513,11 @@ whose corresponding velocity can be calculated as \end{equation*} By knowing the velocity, the j-\emph{th} point kinetic energy can be expressed as -\begin{align*} - T_j &= \frac{1}{2} m_j \big(\dot{f}_{j,1}^2+\dot{f}_{j,2}^2\big) \\ - &= \frac{1}{2} m_j \Bigg(\!\!\Big(\!-\sum_{k=1}^j l_k \Big(\sum_{i=1}^{k} \dot{x}_i\Big) s_k \!\Big)^{\!2} \!+\! +\begin{equation*} + T_j = \frac{1}{2} m_j \big(\dot{f}_{j,1}^2+\dot{f}_{j,2}^2\big) + = \frac{1}{2} m_j \Bigg(\!\!\Big(\!-\sum_{k=1}^j l_k \Big(\sum_{i=1}^{k} \dot{x}_i\Big) s_k \!\Big)^{\!2} \!+\! \Big(\sum_{k=1}^j l_k \Big(\sum_{i=1}^{k} \dot{x}_i\Big) c_k \!\Big)^{\!2}\!\Bigg), -\end{align*} +\end{equation*} and its potential energy as \begin{equation*} U_j = m_j \, g \, f_{j,2} = m_j g \Big(\sum_{k=1}^j l_k s_k\Big). @@ -459,7 +529,7 @@ The kinetic and potential energies are more dependent on absolute angles $\sum_{ \end{equation*} so the kinetic energy can be redefined as \begin{equation*}\label{eq:kinetic} -T_j = \frac{1}{2} m_j \Big(\Big(-\sum_{k=1}^j l_k \dot{q}_k S_k\Big)^2+\Big(\sum_{k=1}^j l_k \dot{q}_{k} C_k\Big)^2\Big). + T_j = \frac{1}{2} m_j \Big(\Big(-\sum_{k=1}^j l_k \dot{q}_k S_k\Big)^2+\Big(\sum_{k=1}^j l_k \dot{q}_{k} C_k\Big)^2\Big). \end{equation*} The robot's total kinetic and potential energies are the sum of their corresponding values in each point mass. So, the Lagrangian of the whole system can be written as @@ -467,19 +537,15 @@ The robot's total kinetic and potential energies are the sum of their correspond L = \sum_{j=1}^N T_j - U_j, \end{equation*} where $N$ is the number of links. Using the Euler-Lagrange equation, we can write -\begin{equation*} - u_z = \frac{d}{dt} \frac{\partial L}{\partial \dot{q}_z} - \frac{\partial L}{\partial q_z}, -\end{equation*} -\begin{equation*} - u_z = \frac{d}{dt} \frac{\partial (\sum_{j=z}^N T_j)}{\partial \dot{q}_z} - \frac{\partial \sum_{j=z}^N (T_j-U_j)}{\partial q_z}, -\end{equation*} \begin{equation} - u_z = \sum_{j=z}^N \Big(\frac{d}{dt} \frac{\partial T_j}{\partial \dot{q}_z} - \frac{\partial T_j}{\partial q_z} + \frac{\partial U_j}{\partial q_z}\Big), + u_z = \frac{d}{dt} \frac{\partial L}{\partial \dot{q}_z} - \frac{\partial L}{\partial q_z} + \quad= \frac{d}{dt} \frac{\partial (\sum_{j=z}^N T_j)}{\partial \dot{q}_z} - \frac{\partial \sum_{j=z}^N (T_j-U_j)}{\partial q_z} + \quad= \sum_{j=z}^N \Big(\frac{d}{dt} \frac{\partial T_j}{\partial \dot{q}_z} - \frac{\partial T_j}{\partial q_z} + \frac{\partial U_j}{\partial q_z}\Big), \label{eq:Lag_with_generalized_forces} \end{equation} where $u_z$ is the generalized force related to $q_z$. This force can be found from the corresponding generalized work $w_z$, which can be described by \begin{equation}\label{eq:generalized_work} -w_z = \begin{cases} (\tau_{z}-\tau_{z+1})\dot{q}_z & z<N \\ \tau_{N}\dot{q}_N & z=N\end{cases}, + w_z = \begin{cases} (\tau_{z}-\tau_{z+1})\dot{q}_z & z<N \\ \tau_{N}\dot{q}_N & z=N\end{cases}, \end{equation} where $\tau_z$ is the torque applied at z-\emph{th} joint. The fact that we need subtraction in the first line of \eqref{eq:generalized_work} is that when $\tau_{z+1}$ is applied on link $l_{z+1}$, its reaction is applied on link $l_z$, except for the last joint where there is no reaction from the proceeding links. Please note that if we had used relative angles in our formulation, we did not need to consider this reaction as it will be cancelled by itself at link $z+1$ (as $\dot{q}_{z+1} = \dot{q}_z + \dot{x}_{z+1}$). That said, generalized forces can be defined as \begin{equation*} @@ -487,15 +553,17 @@ where $\tau_z$ is the torque applied at z-\emph{th} joint. The fact that we need \end{equation*} By substituting the derived equations into \eqref{eq:Lag_with_generalized_forces}, the dynamic equation of a general planar robot with an arbitrary number of links can be expressed as (for more details, please refer to Appendix \ref{sec:FD_derivation}) -\begin{multline}\label{eq:arranged_dyn_eq} - \sum_{j=z}^{N}m_j \Big(\sum_{k=1}^{j}l_zl_k c_{z-k}\ddot{q}_k\Big) = \\ +\begin{equation} + \sum_{j=z}^{N}m_j \Big(\sum_{k=1}^{j}l_zl_k c_{z-k}\ddot{q}_k\Big) = u_z - \sum_{j=z}^{N}m_j \Big(\sum_{k=1}^{j} l_zl_k s_{z-k}\dot{q}_k^2\Big) - \sum_{j=z}^{N}m_j g l_z c_z, -\end{multline} -where -\begin{align*} + \quad\text{where}\quad + \left\{ + \begin{align*} c_{h-k} &= c_h c_k - s_h s_k,\\ s_{h-k} &= s_h c_k - c_h s_k. -\end{align*} + \end{align*}\right. + \label{eq:arranged_dyn_eq} +\end{equation} In \eqref{eq:arranged_dyn_eq}, the coefficient of $\ddot{q}_k$ can be written as \begin{equation*} @@ -507,40 +575,35 @@ where $\mathds{1}(\cdot)$ is a function that returns 0 if it receives a negative \end{equation*} By concatenation of the results for all z's, the dynamic equation of the whole system can be expressed as -\begin{equation}\label{eq:matix_form_dynamic_abs_angle_net_forces} -\bm{M} \ddot{\bm{q}} = \bm{u} + \bm{g} + \bm{C}\text{diag}(\bm{\dot{q}})\bm{\dot{q}}, +\begin{equation} + \bm{M} \ddot{\bm{q}} = \bm{u} + \bm{g} + \bm{C}\text{diag}(\bm{\dot{q}})\bm{\dot{q}}, + \label{eq:matix_form_dynamic_abs_angle_net_forces} \end{equation} where \begin{equation*} -\bm{q}= \begin{bmatrix} -q_1 \\ q_2 \\ \vdots \\ q_{N-1} \\ q_N -\end{bmatrix}, \ \ \ \bm{u}= \begin{bmatrix} -u_1 \\ u_2 \\ \vdots \\ u_{N-1} \\ u_N -\end{bmatrix}, -\end{equation*} -\begin{equation*} -\bm{M}(z,k)= l_z l_k C_{z-k} \sum_{j=z}^N m_j \mathds{1}(j-k),, -\end{equation*} -\begin{equation*} -\bm{C}(z,k)= -l_zl_k S_{z-k} \sum_{j=z}^N m_j \mathds{1}(j-k), + \bm{q}= \begin{bmatrix} + q_1 \\ q_2 \\ \vdots \\ q_{N-1} \\ q_N + \end{bmatrix}, \ \ \ \bm{u}= \begin{bmatrix} + u_1 \\ u_2 \\ \vdots \\ u_{N-1} \\ u_N + \end{bmatrix},\quad + \bm{M}(z,k)= l_z l_k C_{z-k} \sum_{j=z}^N m_j \mathds{1}(j-k),\quad + \bm{C}(z,k)= -l_zl_k S_{z-k} \sum_{j=z}^N m_j \mathds{1}(j-k), \end{equation*} \begin{equation*} \bm{g}= \begin{bmatrix} -- \sum_{j=1}^N m_j l_1 g \cos(q_1) \\ - \sum_{j=2}^N m_j l_2 g \cos(q_2) \\ \vdots \\ - \sum_{j={N-1}}^N m_jl_{N-1} g \cos(q_{N-1}) \\ - m_Nl_N g \cos(q_N) + - \sum_{j=1}^N m_j l_1 g \cos(q_1) \\ - \sum_{j=2}^N m_j l_2 g \cos(q_2) \\ \vdots \\ - \sum_{j={N-1}}^N m_jl_{N-1} g \cos(q_{N-1}) \\ - m_Nl_N g \cos(q_N) \end{bmatrix}. \end{equation*} To use this equation with relative angles and torque commands, one can replace the absolute angles and general forces by \begin{equation*} \bm{q} = \bm{L} \bm{x}, \ \ \ \bm{u} = \bm{L}^{-\trsp} \bm{\tau}, -\end{equation*} -where -\begin{equation*} -\bm{x}= \begin{bmatrix} -x_1 \\ x_2 \\ \vdots \\ x_{N-1} \\ x_N -\end{bmatrix}, \ \ \ \bm{\tau}= \begin{bmatrix} -\tau_1 \\ \tau_2 \\ \vdots \\ \tau_{N-1} \\ \tau_N -\end{bmatrix}. + \quad\text{where}\quad + \bm{x}= \begin{bmatrix} + x_1 \\ x_2 \\ \vdots \\ x_{N-1} \\ x_N + \end{bmatrix}, \ \ \ \bm{\tau}= \begin{bmatrix} + \tau_1 \\ \tau_2 \\ \vdots \\ \tau_{N-1} \\ \tau_N + \end{bmatrix}. \end{equation*} By substituting these variables into \eqref{eq:matix_form_dynamic_abs_angle_net_forces}, we would have @@ -565,8 +628,7 @@ Linear quadratic tracking (LQT) is a simple form of optimal control that trades c &= {\big(\bm{\mu}_T\!-\!\bm{x}_T\big)}^\trsp \bm{Q}_T - \big(\bm{\mu}_T\!-\!\bm{x}_T\big) \nonumber\\ - &\hspace{18mm} + + \big(\bm{\mu}_T\!-\!\bm{x}_T\big) + \sum_{t=1}^{T-1} \Big({\big(\bm{\mu}_t\!-\!\bm{x}_t\big)}^\trsp \bm{Q}_t \big(\bm{\mu}_t\!-\!\bm{x}_t\big) @@ -698,15 +760,15 @@ Figure \ref{fig:LQT_smooth} shows an example of LQT for a task that consist of p \end{flushright} If we assume that the control commands profile $\bm{u}$ is composed of \emph{control primitives} (CP) with $\bm{u}=\bm{\Psi}\bm{w}$, the objective becomes -\begin{multline*} +\begin{equation*} \min_{\bm{w}} {(\bm{x}-\bm{\mu})}^{\!\trsp} \bm{Q} (\bm{x}-\bm{\mu}) \;+\; - \bm{w}^{\!\trsp} \bm{\Psi}^{\!\trsp} \bm{R} \bm{\Psi} \bm{w},\\ - \;\text{s.t.}\quad + \bm{w}^{\!\trsp} \bm{\Psi}^{\!\trsp} \bm{R} \bm{\Psi} \bm{w}, + \quad\text{s.t.}\quad \bm{x}=\bm{S}_{\bm{x}}\bm{x}_1+\bm{S}_{\bm{u}} \bm{\Psi} \bm{w}, -\end{multline*} +\end{equation*} with a solution given by \begin{equation*} \bm{\hat{w}} = {\big(\bm{\Psi}^{\!\trsp} \bm{S}_{\bm{u}}^\trsp \bm{Q} \bm{S}_{\bm{u}} \bm{\Psi} + \bm{\Psi}^{\!\trsp} \bm{R} \bm{\Psi} \big)}^{-1} @@ -750,13 +812,12 @@ Set $\bm{V}_T=\bm{Q}_T$ \\ The LQR problem is formulated as the minimization of the cost \begin{equation} -\begin{aligned} - c(\bm{x}_1,\bm{u}) &= c_T(\bm{x}_T) + \sum_{t=1}^{T-1} c_t(\bm{x}_t,\bm{u}_t) \\ - &= \bm{x}_T^\trsp \bm{Q}_T \bm{x}_T + + c(\bm{x}_1,\bm{u}) = c_T(\bm{x}_T) + \sum_{t=1}^{T-1} c_t(\bm{x}_t,\bm{u}_t) + \;= \bm{x}_T^\trsp \bm{Q}_T \bm{x}_T + \sum_{t=1}^{T-1} \Big(\bm{x}_t^\trsp \bm{Q}_t \bm{x}_t + - \bm{u}_t^\trsp \bm{R}_t \, \bm{u}_t \Big), \\ - & \hspace{5mm}\text{s.t.}\quad \bm{x}_{t+1} = \bm{A}_t \bm{x}_{t} + \bm{B}_t \bm{u}_{t}, -\end{aligned} + \bm{u}_t^\trsp \bm{R}_t \, \bm{u}_t \Big), + \quad\text{s.t.}\quad + \bm{x}_{t+1} = \bm{A}_t \bm{x}_{t} + \bm{B}_t \bm{u}_{t}, \label{eq:cLQR} \end{equation} where $c$ without index refers to the total cost (cumulative cost). @@ -806,40 +867,36 @@ With \eqref{eq:qtLQR}, the dynamic programming recursion takes the form \end{equation} By substituting $\bm{x}_{{t+1}}=\bm{A}_t\bm{x}_t+\bm{B}_t\bm{u}_t$ into \eqref{eq:ct0}, $\hat{v}_t$ can be rewritten as -\begin{equation*} +\begin{equation} \hat{v}_t = \min_{\bm{u}_t} \begin{bmatrix} \bm{x}_t \\ \bm{u}_t \end{bmatrix}^\trsp \begin{bmatrix} \bm{Q}_{\bm{x}\bm{x},t} & \bm{Q}_{\bm{u}\bm{x},t}^\trsp \\ \bm{Q}_{\bm{u}\bm{x},t} & \bm{Q}_{\bm{u}\bm{u},t} \end{bmatrix} \begin{bmatrix} \bm{x}_t \\ \bm{u}_t \end{bmatrix}, -\end{equation*} -\begin{equation} -\begin{aligned} - \text{where}\quad - \bm{Q}_{\bm{x}\bm{x},t} &= \bm{A}_t^\trsp \bm{V}_{t+1} \bm{A}_t + \bm{Q}_t, \\ - \bm{Q}_{\bm{u}\bm{u},t} &= \bm{B}_t^\trsp \bm{V}_{t+1} \bm{B}_t + \bm{R}_t, \\ - \bm{Q}_{\bm{u}\bm{x},t} &= \bm{B}_t^\trsp \bm{V}_{t+1} \bm{A}_t. -\end{aligned} -\label{eq:ct1} + \quad\text{where}\quad + \left\{ + \begin{aligned} + \bm{Q}_{\bm{x}\bm{x},t} &= \bm{A}_t^\trsp \bm{V}_{t+1} \bm{A}_t + \bm{Q}_t, \\ + \bm{Q}_{\bm{u}\bm{u},t} &= \bm{B}_t^\trsp \bm{V}_{t+1} \bm{B}_t + \bm{R}_t, \\ + \bm{Q}_{\bm{u}\bm{x},t} &= \bm{B}_t^\trsp \bm{V}_{t+1} \bm{A}_t. + \end{aligned} + \right. + \label{eq:ct1} \end{equation} An optimal control command $\bm{\hat{u}}_t$ can be computed by differentiating \eqref{eq:ct1} with respect to $\bm{u}_t$ and equating to zero, providing a feedback law \begin{equation} -\begin{aligned} - \bm{\hat{u}}_t &= -\bm{K}_t \, \bm{x}_t, \\ - \text{with}\quad - \bm{K}_t &= \bm{Q}_{\bm{u}\bm{u},t}^{-1} \, \bm{Q}_{\bm{u}\bm{x},t}. -\end{aligned} -\label{eq:uLQRrecursive} + \bm{\hat{u}}_t = -\bm{K}_t \, \bm{x}_t, + \quad\text{with}\quad + \bm{K}_t = \bm{Q}_{\bm{u}\bm{u},t}^{-1} \, \bm{Q}_{\bm{u}\bm{x},t}. + \label{eq:uLQRrecursive} \end{equation} By introducing \eqref{eq:uLQRrecursive} back into \eqref{eq:ct1}, the resulting value function $\hat{v}_t$ has the quadratic form \begin{equation} -\begin{aligned} - \hat{v}_t &= \bm{x}_t^\trsp \bm{V}_t \bm{x}_t, \\ - \text{with}\quad - \bm{V}_t &= \bm{Q}_{\bm{x}\bm{x},t} - \bm{Q}_{\bm{u}\bm{x},t}^\trsp \, \bm{Q}_{\bm{u}\bm{u},t}^{-1} \, \bm{Q}_{\bm{u}\bm{x},t}. -\end{aligned} -\label{eq:V} + \hat{v}_t = \bm{x}_t^\trsp \bm{V}_t \bm{x}_t, + \quad\text{with}\quad + \bm{V}_t = \bm{Q}_{\bm{x}\bm{x},t} - \bm{Q}_{\bm{u}\bm{x},t}^\trsp \, \bm{Q}_{\bm{u}\bm{u},t}^{-1} \, \bm{Q}_{\bm{u}\bm{x},t}. + \label{eq:V} \end{equation} We observe that \eqref{eq:V} has the same quadratic form as \eqref{eq:vhatT}, so that \eqref{eq:qtLQR} can be solved recursively. We thus obtain a backward recursion procedure in which $\bm{V}_t$ is evaluated recursively from $t=T-1$ to $t=1$, starting from $\bm{V}_T=\bm{Q}_T$, which corresponds to a Riccati equation. @@ -865,18 +922,17 @@ In order to extend the above development to linear quadratic tracking (LQT), the \label{eq:ABaugm} \end{equation} and an augmented tracking weight -\begin{align*} - \bm{\tilde{Q}}_t &= \begin{bmatrix} \bm{Q}_t^{-1}\!+\!\bm{\mu}_t\bm{\mu}_t^\trsp & \bm{\mu}_t \\ \bm{\mu}_t^\trsp & 1 \end{bmatrix}^{-1} \\ - &= \begin{bmatrix} \bm{I} & \bm{0} \\ -\bm{\mu}_t^\trsp & 1 \end{bmatrix} +\begin{equation*} + \bm{\tilde{Q}}_t = \begin{bmatrix} \bm{Q}_t^{-1}\!+\!\bm{\mu}_t\bm{\mu}_t^\trsp & \bm{\mu}_t \\ \bm{\mu}_t^\trsp & 1 \end{bmatrix}^{-1} + = \begin{bmatrix} \bm{I} & \bm{0} \\ -\bm{\mu}_t^\trsp & 1 \end{bmatrix} \begin{bmatrix} \bm{Q}_t & \bm{0} \\ \bm{0} & 1 \end{bmatrix} \begin{bmatrix} \bm{I} & -\bm{\mu}_t \\ \bm{0} & 1 \end{bmatrix}, -\end{align*} +\end{equation*} which is used to define the cost \begin{align} c &= {\big(\bm{\mu}_T-\bm{x}_T\big)}^\trsp \bm{Q}_T - \big(\bm{\mu}_T-\bm{x}_T\big) \nonumber\\ - &\hspace{18mm} + + \big(\bm{\mu}_T-\bm{x}_T\big) + \sum_{t=1}^{T-1} \Big({\big(\bm{\mu}_t-\bm{x}_t\big)}^\trsp \bm{Q}_t \big(\bm{\mu}_t-\bm{x}_t\big) @@ -888,17 +944,15 @@ which is used to define the cost \sum_{t=1}^{T-1} \Big(\bm{\tilde{x}}_t^\trsp \bm{\tilde{Q}}_t \bm{\tilde{x}}_t \;+\; \bm{u}_t^\trsp \bm{R}_t \bm{u}_t \Big), -\label{eq:cBatchAugm} + \label{eq:cBatchAugm} \end{align} where the augmented form in \eqref{eq:cBatchAugm} has the same form as the standard LQR cost in \eqref{eq:cLQR}, allowing the tracking problem to be solved in the same way by using this augmented state representation. Additional verification details can be found in Appendix \ref{app:LQTLQR}. For a tracking problem, we can see that the resulting optimal control policy takes the form \begin{equation} -\begin{aligned} - \bm{\hat{u}}_t &= -\bm{\tilde{K}}_t \; \bm{\tilde{x}}_t \\ - &= \bm{K}_t \, (\bm{\mu}_t - \bm{x}_t) + \bm{u}^\text{ff}_t, -\end{aligned} -\label{eq:uLQTrecursive} + \bm{\hat{u}}_t = -\bm{\tilde{K}}_t \; \bm{\tilde{x}}_t + = \bm{K}_t \, (\bm{\mu}_t - \bm{x}_t) + \bm{u}^\text{ff}_t, + \label{eq:uLQTrecursive} \end{equation} characterized by a feedback gain matrix $\bm{K}_t$ extracted from $\bm{\tilde{K}}_t = \big[\bm{K}_t, \bm{k}_t\big]$, and a feedforward term $\bm{u}^\text{ff}_t = -\bm{k}_t - \bm{K}_t \bm{\mu}_t$ depending on $\bm{\mu}_t$. %K = Ka(:,1:model0.nbVar); @@ -929,13 +983,11 @@ corresponding to open loop control commands. By introducing a matrix $\bm{F}$ to describe $\bm{u}=-\bm{F}\bm{x}_1$, we can alternatively define the optimization problem as \begin{equation*} -\begin{aligned} - \min_{\bm{F}} \;\;& \bm{x}^{\!\trsp} \bm{Q} \bm{x} + \min_{\bm{F}} \;\; \bm{x}^{\!\trsp} \bm{Q} \bm{x} + - (-\bm{F}\bm{x}_1)^{\!\trsp} \,\bm{R}\, (-\bm{F}\bm{x}_1), \\ - \text{s.t.} \;\;& + (-\bm{F}\bm{x}_1)^{\!\trsp} \,\bm{R}\, (-\bm{F}\bm{x}_1), + \quad\text{s.t.}\quad \bm{x}=(\bm{S}_{\bm{x}}-\bm{S}_{\bm{u}}\bm{F})\bm{x}_1, -\end{aligned} \end{equation*} whose least squares solution is \begin{equation} @@ -946,10 +998,8 @@ whose least squares solution is We decompose $\bm{F}$ as block matrices $\bm{F}_t$ with $t\in\{1,\ldots,T-1\}$. $\bm{F}$ can then be used to iteratively reconstruct regulation gains $\bm{K}_t$, by starting from $\bm{K}_1=\bm{F}_1$, $\bm{P}_1=\bm{I}$, and by computing recursively \begin{equation} -\begin{aligned} - \bm{P}_t &= \bm{P}_{t-1} {(\bm{A}_{t-1} - \bm{B}_{t-1} \bm{K}_{t-1})}^{-1}, \\ - \bm{K}_t &= \bm{F}_t \; \bm{P}_t, -\end{aligned} + \bm{P}_t = \bm{P}_{t-1} {(\bm{A}_{t-1} - \bm{B}_{t-1} \bm{K}_{t-1})}^{-1}, \quad + \bm{K}_t = \bm{F}_t \; \bm{P}_t, \label{eq:K_from_F} \end{equation} which can then be used in a feedback controller as in \eqref{eq:uLQRrecursive}. @@ -967,7 +1017,7 @@ This least squares formulation of LQT (LQT-LS) yields the same controller as wit \begin{figure} \centering -\includegraphics[width=.8\columnwidth]{images/LQT_recursive_LS_multiAgents01.png} +\includegraphics[width=.6\textwidth]{images/LQT_recursive_LS_multiAgents01.png} \caption{\footnotesize Least squares formulation of recursive LQR to control multiple agents (as point mass systems), where the task of each agent is to reach a desired target at the end of the motion, and to meet the other agent in the middle of the motion (e.g., for a handover task, see main text for the alternative option of nonzero offdiagonal elements at different time steps). We then test the adaptation capability of the agents by simulating a perturbation at 1/4 of the movement. The original and adapted movements are depicted in dashed and solid lines, respectively. The initial positions are represented with black points, the targets are in red, the optimized meeting points are in blue, and the perturbation is in green. } @@ -1004,8 +1054,8 @@ Figure \ref{fig:LQT_recursive_LS_multiAgents} presents an example with the contr \begin{figure} \centering -\includegraphics[width=.4\columnwidth]{images/LQT_CP_DMP01.png}\hspace{6mm} -\includegraphics[width=.5\columnwidth]{images/LQT_CP_DMP02.png} +\includegraphics[width=.35\textwidth]{images/LQT_CP_DMP01.png}\hspace{6mm} +\includegraphics[width=.4\textwidth]{images/LQT_CP_DMP02.png} \caption{\footnotesize Linear quadratic tracking (LQT) with control primitives applied to a trajectory tracking task, with a formulation similar to dynamical movement primitives (DMP). \emph{Left:} The observed ``S'' shape (in blue) is reproduced by starting from a different initial position (black point), with a perturbation simulated at 1/4 of the movement (green points) to show the capability of the approach to recover from perturbations. The trajectory in dashed line shows the result without perturbation and trajectory in solid line shows the result with perturbation. @@ -1019,12 +1069,12 @@ The dynamical movement primitives (DMP) \cite{Ijspeert13} approach proposes to r Linear quadratic tracking (LQT) with control primitives can be used in a similar fashion as in DMP, by requesting a target to be reached at the end of the movement and by requesting the observed acceleration profile to be tracked, while encoding the control commands as radial basis functions. The controller can be estimated either as the open-loop control commands \eqref{eq:uLQT}, or as the closed-loop controller \eqref{eq:uLQTrecursive}. In the latter case, the matrix $\bm{F}$ in \eqref{eq:LQR_LS_F} is estimated by using control primitives and an augmented state space formulation, namely -\begin{align*} - \bm{\hat{W}} &= {\big(\bm{\Psi}^{\!\trsp} \bm{S}_{\bm{u}}^\trsp \bm{\tilde{Q}} \bm{S}_{\bm{u}} \bm{\Psi} + - \bm{\Psi}^{\!\trsp} \bm{R} \bm{\Psi}\big)}^{-1} \bm{\Psi}^{\!\trsp} \bm{S}_{\bm{u}}^\trsp \bm{\tilde{Q}} \bm{S}_{\bm{x}},\\ - \bm{F} &= \bm{\Psi}\bm{\hat{W}}, +\begin{equation*} + \bm{\hat{W}} = {\big(\bm{\Psi}^{\!\trsp} \bm{S}_{\bm{u}}^\trsp \bm{\tilde{Q}} \bm{S}_{\bm{u}} \bm{\Psi} + + \bm{\Psi}^{\!\trsp} \bm{R} \bm{\Psi}\big)}^{-1} \bm{\Psi}^{\!\trsp} \bm{S}_{\bm{u}}^\trsp \bm{\tilde{Q}} \bm{S}_{\bm{x}}, \quad + \bm{F} = \bm{\Psi}\bm{\hat{W}}, %\label{eq:LQR_LS_CP_F} -\end{align*} +\end{equation*} which is used to compute feedback gains $\bm{\tilde{K}}_t$ on the augmented state with \eqref{eq:K_from_F}. The resulting controller $\bm{\hat{u}}_t = -\bm{\tilde{K}}_t \bm{\tilde{x}}_t$ tracks the acceleration profile while smoothly reaching the desired goal at the end of the movement, with a smooth transition between the two. The main difference with DMP is that the smooth transition between the two behaviors is directly optimized by the system, and the parameters of the feedback controller are automatically optimized (in DMP, stiffness and damping ratio). @@ -1035,6 +1085,7 @@ In addition, the LQT formulation allows the resulting controller to be formalize Moreover, the above problem formulation can be extended to iterative LQR, providing an opportunity to consider obstacle avoidance and constraints within the DMP formulation, as well as to describe costs in task space with a DMP acting in joint angle space. +\newpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{iLQR optimization} @@ -1061,16 +1112,15 @@ and Jacobian matrices \end{equation*} The cost function $c(\bm{x}_t,\bm{u}_t)$ for time step $t$ can similarly be approximated by a second order Taylor expansion around the point $(\bm{\hat{x}}_t, \bm{\hat{u}}_t)$, namely -\begin{multline} - c(\bm{x}_t,\bm{u}_t) \approx c(\bm{\hat{x}}_t,\bm{\hat{u}}_t) + \Delta\bm{x}_t^\trsp \frac{\partial c}{\partial\bm{x}_t} + +\begin{equation} +\begin{aligned} + c(\bm{x}_t,\bm{u}_t) &\approx c(\bm{\hat{x}}_t,\bm{\hat{u}}_t) + \Delta\bm{x}_t^\trsp \frac{\partial c}{\partial\bm{x}_t} + \Delta\bm{u}_t^\trsp \frac{\partial c}{\partial\bm{u}_t} + - \\ \frac{1}{2}\Delta\bm{x}_t^\trsp \frac{\partial^2c}{\partial\bm{x}_t^2} \Delta\bm{x}_t + \Delta\bm{x}_t^\trsp \frac{\partial^2c}{\partial\bm{x}_t\bm{u}_t} \Delta\bm{u}_t + \frac{1}{2}\Delta\bm{u}_t^\trsp \frac{\partial^2c}{\partial\bm{u}_t^2} \Delta\bm{u}_t, - \\[2mm] - \iff c(\bm{x}_t,\bm{u}_t) \approx c(\bm{\hat{x}}_t,\bm{\hat{u}}_t) + \hspace{40mm} \\ + \iff c(\bm{x}_t,\bm{u}_t) &\approx c(\bm{\hat{x}}_t,\bm{\hat{u}}_t) + \frac{1}{2} \begin{bmatrix} 1 \\ \Delta\bm{x}_t \\ \Delta\bm{u}_t @@ -1083,8 +1133,9 @@ The cost function $c(\bm{x}_t,\bm{u}_t)$ for time step $t$ can similarly be appr \begin{bmatrix} 1 \\ \Delta\bm{x}_t \\ \Delta\bm{u}_t \end{bmatrix}, - \label{eq:dc} -\end{multline} +\end{aligned} +\label{eq:dc} +\end{equation} with gradients \begin{equation*} \bm{g}_{\bm{x},t}=\frac{\partial c}{\partial\bm{x}_t}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad @@ -1093,9 +1144,7 @@ with gradients and Hessian matrices \begin{equation*} \bm{H}_{\bm{x}\bm{x},t}=\frac{\partial^2c}{\partial\bm{x}_t^2}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad - \bm{H}_{\bm{u}\bm{u},t}=\frac{\partial^2c}{\partial\bm{u}_t^2}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, -\end{equation*} -\begin{equation*} + \bm{H}_{\bm{u}\bm{u},t}=\frac{\partial^2c}{\partial\bm{u}_t^2}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad \bm{H}_{\bm{u}\bm{x},t}=\frac{\partial^2c}{\partial\bm{u}_t\partial\bm{x}_t}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}. \end{equation*} @@ -1116,8 +1165,9 @@ the evolution of the control commands. The evolution of the state in \eqref{eq:DS} becomes $\Delta\bm{x} = \bm{S}_{\bm{u}} \Delta\bm{u}$, see Appendix \ref{app:Su} for details.\footnote{Note that $\bm{S}_{\bm{x}} \Delta\bm{x}_1\!=\!\bm{0}$ because $\Delta\bm{x}_1\!=\!\bm{0}$ (as we want our motion to start from $\bm{x}_1$).} The minimization problem can then be rewritten in batch form as -\begin{multline} - \min_{\Delta\bm{u}} \Delta c(\Delta\bm{x},\Delta\bm{u}), \quad\text{s.t.}\quad \Delta\bm{x} = \bm{S}_{\bm{u}} \Delta\bm{u}, \quad\text{where}\\ +\begin{equation} + \min_{\Delta\bm{u}} \Delta c(\Delta\bm{x},\Delta\bm{u}), \quad\text{s.t.}\quad \Delta\bm{x} = \bm{S}_{\bm{u}} \Delta\bm{u}, + \quad\text{where}\quad \Delta c(\Delta\bm{x},\Delta\bm{u}) = \frac{1}{2} \begin{bmatrix} @@ -1136,17 +1186,17 @@ The minimization problem can then be rewritten in batch form as %\Delta\bm{x}^\trsp \bm{H}_{\bm{x}\bm{u}} \Delta\bm{u} + %\frac{1}{2} \Delta\bm{u}^\trsp \bm{H}_{\bm{u}\bm{u}} \Delta\bm{u}. \label{eq:dc_batch} -\end{multline} +\end{equation} By inserting the constraint into the cost, we obtain the optimization problem -\begin{multline} +\begin{equation} \min_{\Delta\bm{u}} \quad \Delta\bm{u}^\trsp \bm{S}_{\bm{u}}^\trsp \bm{g}_{\bm{x}} + \Delta\bm{u}^\trsp \bm{g}_{\bm{u}} + - \frac{1}{2} \Delta\bm{u}^\trsp \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{x}\bm{x}} \bm{S}_{\bm{u}} \Delta\bm{u} +\\ + \frac{1}{2} \Delta\bm{u}^\trsp \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{x}\bm{x}} \bm{S}_{\bm{u}} \Delta\bm{u} + \Delta\bm{u}^\trsp \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{u}\bm{x}}^\trsp \Delta\bm{u} + \frac{1}{2} \Delta\bm{u}^\trsp \bm{H}_{\bm{u}\bm{u}} \Delta\bm{u}, \label{eq:minu} -\end{multline} +\end{equation} which can be solved analytically by differentiating with respect to $\Delta\bm{u}$ and equating to zero, namely, \begin{equation} \bm{S}_{\bm{u}}^\trsp \bm{g}_{\bm{x}} + \bm{g}_{\bm{u}} +\\ @@ -1199,7 +1249,7 @@ This dynamic programming recursion takes the form \label{eq:vhatTiLQR} \end{equation} and we then have with \eqref{eq:qtiLQR} that -\begin{multline} +\begin{equation} \hat{v}_{t} = \min_{\Delta\bm{u}_{t}} \begin{bmatrix} 1 \\ \Delta\bm{x}_{t} \\ \Delta\bm{u}_{t} @@ -1212,7 +1262,7 @@ and we then have with \eqref{eq:qtiLQR} that \begin{bmatrix} 1 \\ \Delta\bm{x}_{t} \\ \Delta\bm{u}_{t} \end{bmatrix} - + \\ + + \begin{bmatrix} 1 \\ \Delta\bm{x}_{t+1} \end{bmatrix}^{\!\trsp} @@ -1224,10 +1274,10 @@ and we then have with \eqref{eq:qtiLQR} that 1 \\ \Delta\bm{x}_{t+1} \end{bmatrix}. \label{eq:ct0iLQR} -\end{multline} +\end{equation} By substituting $\Delta\bm{x}_{t+1} = \bm{A}_{t} \Delta\bm{x}_{t} + \bm{B}_{t} \Delta\bm{u}_{t}$ into \eqref{eq:ct0iLQR}, $\hat{v}_{t}$ can be rewritten as -\begin{equation*} +\begin{equation} \hat{v}_{t} = \min_{\Delta\bm{u}_{t}} \begin{bmatrix} 1 \\ \Delta\bm{x}_t \\ \Delta\bm{u}_t @@ -1240,24 +1290,23 @@ By substituting $\Delta\bm{x}_{t+1} = \bm{A}_{t} \Delta\bm{x}_{t} + \bm{B}_{t} \ \begin{bmatrix} 1 \\ \Delta\bm{x}_t \\ \Delta\bm{u}_t \end{bmatrix}, -\end{equation*} -\begin{equation} -\begin{aligned} - \text{where} \hspace{5mm} \bm{q}_{\bm{x},t} &= \bm{g}_{\bm{x},t} + \bm{A}_t^\trsp \bm{v}_{\bm{x},t+1}, \\ - \bm{q}_{\bm{u},t} &= \bm{g}_{\bm{u},t} + \bm{B}_t^\trsp \, \bm{v}_{\bm{x},t+1}, \\ - \bm{Q}_{\bm{x}\bm{x},t} &\approx \bm{H}_{\bm{x}\bm{x},t} + \bm{A}_t^\trsp \, \bm{V}_{\bm{x}\bm{x},t+1} \, \bm{A}_t, \\ - \bm{Q}_{\bm{u}\bm{u},t} &\approx \bm{H}_{\bm{u}\bm{u},t} + \bm{B}_t^\trsp \, \bm{V}_{\bm{x}\bm{x},t+1} \, \bm{B}_t, \\ - \bm{Q}_{\bm{u}\bm{x},t} &\approx \bm{H}_{\bm{u}\bm{x},t} + \bm{B}_t^\trsp \, \bm{V}_{\bm{x}\bm{x},t+1} \, \bm{A}_t, -\end{aligned} -\label{eq:qt} %{eq:ct1iLQR} + \quad\text{where}\quad + \left\{ + \begin{aligned} + \bm{q}_{\bm{x},t} &= \bm{g}_{\bm{x},t} + \bm{A}_t^\trsp \bm{v}_{\bm{x},t+1}, \\ + \bm{q}_{\bm{u},t} &= \bm{g}_{\bm{u},t} + \bm{B}_t^\trsp \, \bm{v}_{\bm{x},t+1}, \\ + \bm{Q}_{\bm{x}\bm{x},t} &\approx \bm{H}_{\bm{x}\bm{x},t} + \bm{A}_t^\trsp \, \bm{V}_{\bm{x}\bm{x},t+1} \, \bm{A}_t, \\ + \bm{Q}_{\bm{u}\bm{u},t} &\approx \bm{H}_{\bm{u}\bm{u},t} + \bm{B}_t^\trsp \, \bm{V}_{\bm{x}\bm{x},t+1} \, \bm{B}_t, \\ + \bm{Q}_{\bm{u}\bm{x},t} &\approx \bm{H}_{\bm{u}\bm{x},t} + \bm{B}_t^\trsp \, \bm{V}_{\bm{x}\bm{x},t+1} \, \bm{A}_t, + \end{aligned} + \right. + \label{eq:qt} %{eq:ct1iLQR} \end{equation} with gradients \begin{equation*} \bm{g}_{\bm{x},t} = \frac{\partial c}{\partial\bm{x}_t}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad \bm{g}_{\bm{u},t} = \frac{\partial c}{\partial\bm{u}_t}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad - \bm{v}_{\bm{x},t} = \frac{\partial \hat{v}}{\partial\bm{x}_t}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, -\end{equation*} -\begin{equation*} + \bm{v}_{\bm{x},t} = \frac{\partial \hat{v}}{\partial\bm{x}_t}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad \bm{q}_{\bm{x},t} = \frac{\partial q}{\partial\bm{x}_t}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad \bm{q}_{\bm{u},t} = \frac{\partial q}{\partial\bm{u}_t}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \end{equation*} @@ -1269,17 +1318,13 @@ Jacobian matrices and Hessian matrices \begin{equation*} \bm{H}_{\bm{x}\bm{x},t} \!=\! \frac{\partial^2 c}{\partial\bm{x}_t^2}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad - \bm{H}_{\bm{u}\bm{u},t} \!=\! \frac{\partial^2 c}{\partial\bm{u}_t^2}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, -\end{equation*} -\begin{equation*} + \bm{H}_{\bm{u}\bm{u},t} \!=\! \frac{\partial^2 c}{\partial\bm{u}_t^2}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad \bm{H}_{\bm{u}\bm{x},t} \!=\! \frac{\partial^2 c}{\partial\bm{u}_t\partial\bm{x}_t}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad \bm{V}_{\bm{x}\bm{x},t} \!=\! \frac{\partial^2 \hat{v}}{\partial\bm{x}_t^2}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \end{equation*} \begin{equation*} \bm{Q}_{\bm{x}\bm{x},t} = \frac{\partial^2 q}{\partial\bm{x}_t^2}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad - \bm{Q}_{\bm{u}\bm{u},t} = \frac{\partial^2 q}{\partial\bm{u}_t^2}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, -\end{equation*} -\begin{equation*} + \bm{Q}_{\bm{u}\bm{u},t} = \frac{\partial^2 q}{\partial\bm{u}_t^2}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}, \quad \bm{Q}_{\bm{u}\bm{x},t} = \frac{\partial^2 q}{\partial\bm{u}_t\partial\bm{x}_t}\Big|_{\bm{\hat{x}}_t,\bm{\hat{u}}_t}. \end{equation*} %Bold lower case letter refer to gradient vectors and bold upper case letter refer to Hessian matrices. @@ -1287,13 +1332,15 @@ and Hessian matrices Minimizing \eqref{eq:qt} w.r.t.~$\Delta\bm{u}_t$ can be achieved by differentiating the equation and equating to zero, yielding the controller \begin{equation} -\begin{aligned} \Delta\bm{\hat{u}}_t = \bm{k}_t + \bm{K}_t \, \Delta\bm{x}_t, \;\text{with}\quad + \left\{ + \begin{aligned} \bm{k}_t &= - \bm{Q}_{\bm{u}\bm{u},t}^{-1} \, \bm{q}_{\bm{u},t}, \\ \bm{K}_t &= - \bm{Q}_{\bm{u}\bm{u},t}^{-1} \, \bm{Q}_{\bm{u}\bm{x},t}, -\end{aligned} -\label{eq:duhat} + \end{aligned} + \right. + \label{eq:duhat} \end{equation} where $\bm{k}_t$ is a feedforward command and $\bm{K}_t$ is a feedback gain matrix. @@ -1359,30 +1406,26 @@ First, note that \eqref{eq:duhat} can be rewritten as \begin{equation} \Delta\bm{\hat{u}}_t = \bm{\tilde{K}}_t \, \begin{bmatrix} \Delta\bm{x}_t \\ 1 \end{bmatrix}, \;\text{with}\quad - \bm{\tilde{K}}_t &= - \bm{Q}_{\bm{u}\bm{u},t}^{-1} \Big[ \bm{Q}_{\bm{u}\bm{x},t}, \; \bm{q}_{\bm{u},t} \Big]. + \bm{\tilde{K}}_t = - \bm{Q}_{\bm{u}\bm{u},t}^{-1} \Big[ \bm{Q}_{\bm{u}\bm{x},t}, \; \bm{q}_{\bm{u},t} \Big]. \label{eq:duhat2} \end{equation} Also, \eqref{eq:minu} can be rewritten as \begin{equation*} - \min_{\Delta\bm{u}} \quad \frac{1}{2} + \min_{\Delta\bm{u}} \quad \underbrace{\frac{1}{2} \begin{bmatrix} \Delta\bm{u} \\ 1 \end{bmatrix}^\trsp \begin{bmatrix} \bm{C} & \bm{c} \\ \bm{c}^\trsp & 0 \end{bmatrix} - \begin{bmatrix} \Delta\bm{u} \\ 1 \end{bmatrix}, -\end{equation*} -or -\begin{equation*} - \min_{\Delta\bm{u}} \quad \frac{1}{2} \Delta\bm{u}^\trsp \bm{C} \Delta\bm{u} + \Delta\bm{u}^\trsp \bm{c}, -\end{equation*} -\begin{equation} -\begin{aligned} - \text{where}\quad + \begin{bmatrix} \Delta\bm{u} \\ 1 \end{bmatrix}}_{\frac{1}{2} \Delta\bm{u}^\trsp \bm{C} \Delta\bm{u} + \Delta\bm{u}^\trsp \bm{c}}, + \quad\text{where}\quad + \left\{ + \begin{aligned} \bm{c} &= \bm{S}_{\bm{u}}^\trsp \bm{g}_{\bm{x}} + \bm{g}_{\bm{u}}, \\ \bm{C} &= \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{x}\bm{x}} \bm{S}_{\bm{u}} + 2 \bm{S}_{\bm{u}}^\trsp \bm{H}_{\bm{u}\bm{x}}^\trsp - + \bm{H}_{\bm{u}\bm{u}}, -\end{aligned} -\end{equation} + + \bm{H}_{\bm{u}\bm{u}}. + \end{aligned} + \right. +\end{equation*} Similarly to Section \ref{sec:LQRLS}, we set \begin{equation} @@ -1403,10 +1446,8 @@ By differentiating w.r.t.\ $\bm{F}$ and equating to zero, we get Similarly to Section \ref{sec:LQRLS}, we decompose $\bm{F}$ as block matrices $\bm{F}_t$ with $t\in\{1,\ldots,T-1\}$. $\bm{F}$ can then be used to iteratively reconstruct regulation gains $\bm{K}_t$, by starting from $\bm{K}_1=\bm{F}_1$, $\bm{P}_1=\bm{I}$, and by computing with forward recursion \begin{equation} -\begin{aligned} - \bm{P}_t &= \bm{P}_{t-1} {(\bm{A}_{t-1} - \bm{B}_{t-1} \bm{K}_{t-1})}^{-1}, \\ - \bm{K}_t &= \bm{F}_t \; \bm{P}_t, -\end{aligned} + \bm{P}_t = \bm{P}_{t-1} {(\bm{A}_{t-1} - \bm{B}_{t-1} \bm{K}_{t-1})}^{-1}, \quad + \bm{K}_t = \bm{F}_t \; \bm{P}_t, \end{equation} from $t=2$ to $t=T-1$. @@ -1504,10 +1545,11 @@ We consider a cost defined by where $\bm{Q}_t$ and $\bm{R}_t$ are weight matrices trading off task and control costs. Such cost is quadratic on $\bm{f}(\bm{x}_t)$ but non-quadratic on $\bm{x}_t$. For the batch formulation of iLQR, the cost in \eqref{eq:dc} then becomes -\begin{multline} - \Delta c(\Delta\bm{x}_t,\Delta\bm{u}_t) \approx 2 \Delta\bm{x}_t^\trsp \bm{J}(\bm{x}_t)^\trsp \bm{Q}_t \bm{f}(\bm{x}_t) + 2 \Delta\bm{u}_t^\trsp \bm{R}_t \, \bm{u} +\\ +\begin{equation} + \Delta c(\Delta\bm{x}_t,\Delta\bm{u}_t) \approx + 2 \Delta\bm{x}_t^\trsp \bm{J}(\bm{x}_t)^\trsp \bm{Q}_t \bm{f}(\bm{x}_t) + 2 \Delta\bm{u}_t^\trsp \bm{R}_t \, \bm{u} + \Delta\bm{x}_t^\trsp \bm{J}(\bm{x}_t)^\trsp \bm{Q}_t \bm{J}(\bm{x}_t) \Delta\bm{x}_t + \Delta\bm{u}_t^\trsp \bm{R}_t \Delta\bm{u}_t, -\end{multline} +\end{equation} which used gradients \begin{equation} \bm{g}_{\bm{x},t} = 2 \bm{J}(\bm{x}_t)^\trsp \bm{Q}_t \bm{f}(\bm{x}_t) @@ -1528,17 +1570,17 @@ with $\bm{J}(\bm{x}_t)=\frac{\partial\bm{f}(\bm{x}_t)}{\partial\bm{x}_t}$ a Jaco At a trajectory level, the evolution of the tracking and control weights is represented by $\bm{Q}\!=\!\mathrm{blockdiag}(\bm{Q}_1,\bm{Q}_2,\ldots,\bm{Q}_T)$ and $\bm{R}\!=\!\mathrm{blockdiag}(\bm{R}_{1},\bm{R}_{2},\ldots,\bm{R}_{T-1})$, respectively. %\in\mathbb{R}^{DCT\times DCT} With a slight abuse of notation, we define $\bm{f}(\bm{x})$ as a vector concatenating the vectors $\bm{f}(\bm{x}_t)$, and $\bm{J}(\bm{x})$ as a block-diagonal concatenation of the Jacobian matrices $\bm{J}(\bm{x}_t)$. The minimization problem \eqref{eq:minu} then becomes -\begin{multline} - \min_{\Delta\bm{u}} \quad 2 \Delta\bm{x}^{\!\trsp} \bm{J}(\bm{x})^\trsp \bm{Q} \bm{f}(\bm{x}) + 2 \Delta\bm{u}^{\!\trsp} \bm{R} \, \bm{u} \; + \\ +\begin{equation} + \min_{\Delta\bm{u}} \quad 2 \Delta\bm{x}^{\!\trsp} \bm{J}(\bm{x})^\trsp \bm{Q} \bm{f}(\bm{x}) + 2 \Delta\bm{u}^{\!\trsp} \bm{R} \, \bm{u} \; + \Delta\bm{x}^{\!\trsp} \bm{J}(\bm{x})^\trsp \bm{Q} \bm{J}(\bm{x}) \Delta\bm{x} + \Delta\bm{u}^{\!\trsp} \bm{R} \Delta\bm{u}, \quad \text{s.t.} \quad \Delta\bm{x} = \bm{S}_{\bm{u}} \Delta\bm{u}, -\end{multline} +\end{equation} whose least squares solution is given by -\begin{multline} - \Delta\bm{\hat{u}} \!=\! {\big(\bm{S}_{\bm{u}}^\trsp \bm{J}(\bm{x})^\trsp \bm{Q} \bm{J}(\bm{x}) \bm{S}_{\bm{u}} \!+\! \bm{R}\big)}^{\!\!-1} \\ - \!\big(- \bm{S}_{\bm{u}}^\trsp \bm{J}(\bm{x})^\trsp \bm{Q} \bm{f}(\bm{x}) - \bm{R} \, \bm{u} \big), +\begin{equation} + \Delta\bm{\hat{u}} \!=\! {\Big(\bm{S}_{\bm{u}}^\trsp \bm{J}(\bm{x})^\trsp \bm{Q} \bm{J}(\bm{x}) \bm{S}_{\bm{u}} \!+\! \bm{R}\Big)}^{\!\!-1} + \Big(- \bm{S}_{\bm{u}}^\trsp \bm{J}(\bm{x})^\trsp \bm{Q} \bm{f}(\bm{x}) - \bm{R} \, \bm{u} \Big), \label{eq:du} -\end{multline} +\end{equation} which can be used to update the control commands estimates at each iteration step of the iLQR algorithm. In the next sections, we show examples of functions $\bm{f}(\bm{x})$ that can rely on this formulation. @@ -1564,7 +1606,7 @@ In the next sections, we show examples of functions $\bm{f}(\bm{x})$ that can re \begin{figure} \centering -\includegraphics[width=\columnwidth]{images/transformations01.jpg} +\includegraphics[width=.7\columnwidth]{images/transformations01.jpg} \caption{\footnotesize Typical transformations involved in a manipulation task involving a robot, a vision system, a visual marker on the object, and a desired grasping location on the object. } @@ -1618,14 +1660,25 @@ The approach can similarly be extended to target objects/landmarks with position %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsubsection{Bounded joint space} -\begin{figure} +%\begin{figure} +%\centering +%\includegraphics[width=.7\columnwidth]{images/fcut01.png} +%\caption{\footnotesize +%ReLU-like functions used in optimization costs. The derivatives of these functions are simple, providing Jacobians whose entries are either $0$ or $\pm 1$. +%} +%\label{fig:fcut} +%\end{figure} + +\begin{wrapfigure}{r}{.36\textwidth} +\vspace{-20pt} \centering -\includegraphics[width=.7\columnwidth]{images/fcut01.png} +\includegraphics[width=.34\textwidth]{images/fcut01.png} \caption{\footnotesize -ReLU-like functions used in optimization costs. The derivatives of these functions are simple, providing Jacobians whose entries are either $0$ or $\pm 1$. +ReLU-like functions used in optimization costs. The derivatives of these functions are simple, providing Jacobians whose entries are either $0$ or $\pm 1$. } \label{fig:fcut} -\end{figure} +\vspace{-60pt} +\end{wrapfigure} The iLQR solution in \eqref{eq:du} can be used to keep the state within a boundary (e.g., joint angle limits). We denote $\bm{f}(\bm{x})=\bm{f}^\tp{cut}(\bm{x})$ as the vertical concatenation of $\bm{f}^\tp{cut}(\bm{x}_t)$ and $\bm{J}(\bm{x})=\bm{J}^\tp{cut}(\bm{x})$ as a diagonal concatenation of diagonal Jacobian matrices $\bm{J}^\tp{cut}(\bm{x}_t)$. Each element $i$ of $\bm{f}^\tp{cut}(\bm{x}_t)$ and $\bm{J}^\tp{cut}(\bm{x}_t)$ is defined as \begin{align*} @@ -1668,7 +1721,7 @@ Based on the above definitions, $\bm{f}(\bm{x})$ and $\bm{J}(\bm{x})$ are in thi \begin{figure} \centering -\includegraphics[width=\columnwidth]{images/iLQR_objectBoundaries01.jpg} +\includegraphics[width=.7\textwidth]{images/iLQR_objectBoundaries01.jpg} \caption{\footnotesize Example of a viapoints task in which a planar robot with 3 joints needs to sequentially reach 2 objects, with object boundaries defining the allowed reaching points on the objects surfaces. \emph{Left:} Reaching task with two viapoints at $t=25$ and $t=50$. \emph{Right:} Corresponding values of the cost function for the end-effector space at $t=25$ and $t=50$. } @@ -1700,14 +1753,34 @@ see also Fig.~\ref{fig:iLQR_manipulator}. \filename{iLQR\_manipulator\_CoM.*} \end{flushright} -\begin{figure} +%\begin{figure} +%\centering +%\includegraphics[width=.3\textwidth]{images/iLQR_CoM01.png} +%\caption{\footnotesize +%Reaching task with a humanoid (side view) by keeping the center of mass in an area defined by the feet location. The starting and ending poses are depicted in different grayscale levels. The corresponding center of mass is depicted by a darker semi-filled disc. The area allowed for the center of mass is depicted as a transparent red rectangle. The task consists of reaching the green object with the end-effector at the end of the movement, while always keeping the center of mass within the desired bounding box during the movement. +%} +%\label{fig:iLQR_CoM} +%\end{figure} + +%\begin{wrapfigure}{r}{.46\textwidth} +%\vspace{-20pt} +%\centering +%\includegraphics[width=.44\textwidth]{images/iLQR_CoM01.png} +%\caption{\footnotesize +%Reaching task with a humanoid (side view) by keeping the center of mass in an area defined by the feet location. The starting and ending poses are depicted in different grayscale levels. The corresponding center of mass is depicted by a darker semi-filled disc. The area allowed for the center of mass is depicted as a transparent red rectangle. The task consists of reaching the green object with the end-effector at the end of the movement, while always keeping the center of mass within the desired bounding box during the movement. +%} +%\label{fig:iLQR_CoM} +%\vspace{-20pt} +%\end{wrapfigure} + +\begin{SCfigure}[50] \centering -\includegraphics[width=.4\columnwidth]{images/iLQR_CoM01.png} +\includegraphics[width=.3\textwidth]{images/iLQR_CoM01.png} \caption{\footnotesize Reaching task with a humanoid (side view) by keeping the center of mass in an area defined by the feet location. The starting and ending poses are depicted in different grayscale levels. The corresponding center of mass is depicted by a darker semi-filled disc. The area allowed for the center of mass is depicted as a transparent red rectangle. The task consists of reaching the green object with the end-effector at the end of the movement, while always keeping the center of mass within the desired bounding box during the movement. } \label{fig:iLQR_CoM} -\end{figure} +\end{SCfigure} If we assume an equal mass for each link concentrated at the joint (i.e., assuming that the motors and gripper are heavier than the link structures), the forward kinematics function to determine the center of a mass of an articulated chain can simply be computed with \begin{equation*} @@ -1745,6 +1818,17 @@ Reaching tasks with a bimanual robot (frontal view). \emph{Left:} with a target \label{fig:iLQR_bimanual} \end{figure} +%\begin{wrapfigure}{r}{.36\textwidth} +%\vspace{-20pt} +%\centering +%\includegraphics[width=.34\textwidth]{images/fcut01.png} +%\caption{\footnotesize +%Reaching tasks with a bimanual robot (frontal view). \emph{Left:} with a target for each hand. \emph{Right:} with a target for the hand on the left, while keeping the center of mass at the same location during the whole movement. +%} +%\label{fig:iLQR_bimanual} +%\vspace{-20pt} +%\end{wrapfigure} + We consider a 5-link planar bimanual robot with a torso link shared by the two arms, see Fig.~\ref{fig:iLQR_bimanual}. We define the forward kinematics function as \begin{equation*} \bm{f} = \begin{bmatrix} @@ -1756,16 +1840,16 @@ We consider a 5-link planar bimanual robot with a torso link shared by the two a \end{equation*} %fee = [fkin(x(1:3,:), param.l(1:3)); fkin(x([1,4:5],:), param.l([1,4:5]))] where the first two and last two dimensions of $\bm{f}$ correspond to the position of the left and right end-effectors, respectively. The corresponding Jacobian matrix $\bm{J}\in\mathbb{R}^{4\times 5}$ has entries -\begin{align*} - \bm{J}_\ty{[1,2],[1,2,3]} &= \begin{bmatrix} +\begin{equation*} + \bm{J}_\ty{[1,2],[1,2,3]} = \begin{bmatrix} -\sin(\bm{L}\bm{x}_\ty{[1,2,3]})^\trsp \diag(\bm{\ell}_\ty{[1,2,3]}) \bm{L} \\ \cos(\bm{L}\bm{x}_\ty{[1,2,3]})^\trsp \diag(\bm{\ell}_\ty{[1,2,3]}) \bm{L} - \end{bmatrix},\\ - \bm{J}_\ty{[3,4],[1,4,5]} &= \begin{bmatrix} + \end{bmatrix},\quad + \bm{J}_\ty{[3,4],[1,4,5]} = \begin{bmatrix} -\sin(\bm{L}\bm{x}_\ty{[1,4,5]})^\trsp \diag(\bm{\ell}_\ty{[1,4,5]}) \bm{L} \\ \cos(\bm{L}\bm{x}_\ty{[1,4,5]})^\trsp \diag(\bm{\ell}_\ty{[1,4,5]}) \bm{L} \end{bmatrix},\\ -\end{align*} +\end{equation*} where the remaining entries are zeros. %J = [-sin(L * x([1,2,3],t))' * diag(param.l([1,2,3])) * L; ... % cos(L * x([1,2,3],t))' * diag(param.l([1,2,3])) * L]; @@ -1789,17 +1873,17 @@ with the corresponding Jacobian matrix $\bm{J}^\tp{CoM}\in\mathbb{R}^{2\times 5} \bm{J}^\tp{CoM-L}_{[1,2],1} + \bm{J}^\tp{CoM-R}_{[1,2],1}, & \bm{J}^\tp{CoM-L}_{[1,2],[2,3]}, & \bm{J}^\tp{CoM-R}_{[1,2],[2,3]} \end{bmatrix}, \end{equation*} -\begin{align*} +\begin{equation*} \text{with}\quad - \bm{J}^\tp{CoM-L} &= \frac{1}{6}\begin{bmatrix} + \bm{J}^\tp{CoM-L} = \frac{1}{6}\begin{bmatrix} -\sin(\bm{L}\bm{x}_\ty{[1,2,3]})^\trsp \bm{L} \; \diag(\bm{\ell}_\ty{[1,2,3]}^\trsp \bm{L}) \\ \cos(\bm{L}\bm{x}_\ty{[1,2,3]})^\trsp \bm{L} \; \diag(\bm{\ell}_\ty{[1,2,3]}^\trsp \bm{L}) - \end{bmatrix},\\ - \bm{J}^\tp{CoM-R} &= \frac{1}{6}\begin{bmatrix} + \end{bmatrix},\quad + \bm{J}^\tp{CoM-R} = \frac{1}{6}\begin{bmatrix} -\sin(\bm{L}\bm{x}_\ty{[1,4,5]})^\trsp \bm{L} \; \diag(\bm{\ell}_\ty{[1,4,5]}^\trsp \bm{L}) \\ \cos(\bm{L}\bm{x}_\ty{[1,4,5]})^\trsp \bm{L} \; \diag(\bm{\ell}_\ty{[1,4,5]}^\trsp \bm{L}) \end{bmatrix}. -\end{align*} +\end{equation*} %J0l = [-sin(L * x(1:3,t))' * L * diag(param.l(1:3)' * L); ... % cos(L * x(1:3,t))' * L * diag(param.l(1:3)' * L)] / 6; %J0r = [-sin(L * x([1,4:5],t))' * L * diag(param.l([1,4:5])' * L); ... @@ -1813,14 +1897,25 @@ with the corresponding Jacobian matrix $\bm{J}^\tp{CoM}\in\mathbb{R}^{2\times 5} \filename{iLQR\_obstacle.*} \end{flushright} -\begin{figure} +%\begin{figure} +%\centering +%\includegraphics[width=.5\columnwidth]{images/iLQR_ellipsoidObstacle01.png} +%\caption{\footnotesize +%Reaching task with obstacle avoidance. +%} +%\label{fig:iLQR_obstacle} +%\end{figure} + +\begin{wrapfigure}{r}{.32\textwidth} +%\vspace{-20pt} \centering -\includegraphics[width=.5\columnwidth]{images/iLQR_ellipsoidObstacle01.png} +\includegraphics[width=.30\textwidth]{images/iLQR_ellipsoidObstacle01.png} \caption{\footnotesize Reaching task with obstacle avoidance. } \label{fig:iLQR_obstacle} -\end{figure} +\vspace{-80pt} +\end{wrapfigure} By taking as example a point-mass system, iLQR can be used to solve an ellipsoidal obstacle avoidance problem with the cost (typically used with other costs, see Fig.~\ref{fig:iLQR_obstacle} for an example). We first define a ReLU-like function and its gradient as (see also Fig.~\ref{fig:fcut}-\emph{right}) \begin{align*} @@ -1851,14 +1946,14 @@ A similar principle can be applied to robot manipulators by composing forward ki \filename{iLQR\_bimanual\_manipulability.*} \end{flushright} -\begin{figure} +\begin{SCfigure}[50] \centering -\includegraphics[width=.6\columnwidth]{images/iLQR_manipulability01.png} +\includegraphics[width=.4\columnwidth]{images/iLQR_manipulability01.png} \caption{\footnotesize Example of iLQR optimization with a cost on manipulability, where the goal is to reach, at the end of the motion, a desired manipulability defined at the center of mass (CoM) of a bimanual planar robot. The initial pose of the bimanual robot is displayed in light gray and the final pose of the robot is displayed in dark gray. The desired manipulability is represented by the red ellipse. The achieved manipulability at the end of the motion is represented by the gray ellipse. Both manipulability ellipses are displayed at the CoM reached at the end of the motion, displayed as a semi-filled dark gray disc (the CoM at the beginning of the motion is displayed in lighter gray color). We can see that the posture adopted by the robot to approach the desired manipulability is to extend both arms, which is the pose allowing the robot to swiftly move its center of mass in case of unexpected perturbations. } \label{fig:iLQR_manipulability} -\end{figure} +\end{SCfigure} Skills transfer can exploit manipulability ellipsoids in the form of geometric descriptors representing the skills to be transferred to the robot. As these ellipsoids lie on symmetric positive definite (SPD) manifolds, Riemannian geometry can be used to learn and reproduce these descriptors in a probabilistic manner \cite{Calinon20RAM}. @@ -1872,7 +1967,7 @@ In \cite{Jaquier21IJRR}, we showed that a geometric cost on manipulability can a \begin{align} c &= \|\bm{A}\|^2_\text{F}, \quad\text{with}\quad \bm{A} = \log\!\big( \bm{S}^\frac{1}{2} \bm{M}(\bm{x}) \bm{S}^\frac{1}{2} \big), \nonumber\\ \iff\; - c&= \text{trace}(\bm{A}\bm{A}^\trsp)=\sum_i\text{trace}(\bm{A}_i\bm{A}_i^\trsp) = \sum_i\bm{A}_i^\trsp\bm{A}_i \nonumber\\ + c &= \text{trace}(\bm{A}\bm{A}^\trsp)=\sum_i\text{trace}(\bm{A}_i\bm{A}_i^\trsp) = \sum_i\bm{A}_i^\trsp\bm{A}_i \nonumber\\ &= {\text{vec}(\bm{A})}^\trsp \text{vec}(\bm{A}), \label{eq:cManipulability} \end{align} @@ -1896,7 +1991,7 @@ The approach can also be extended to other forms of symmetric positive definite \begin{figure} \centering -\includegraphics[width=\columnwidth]{images/iLQR_humanoid_CP01.png} +\includegraphics[width=.6\columnwidth]{images/iLQR_humanoid_CP01.png} \caption{\footnotesize Reaching task with control primitives. } @@ -1905,12 +2000,12 @@ Reaching task with control primitives. The use of control primitives can be extended to iLQR, by considering $\Delta\bm{u}=\bm{\Psi}\Delta\bm{w}$. For problems with quadratic cost on $\bm{f}(\bm{x}_t)$ (see previous Section), the update of the weights vector is then given by -\begin{multline} +\begin{equation} \Delta\bm{\hat{w}} \!=\! {\Big( \bm{\Psi}^{\!\trsp}\bm{S}_{\bm{u}}^\trsp \bm{J}(\bm{x})^\trsp \bm{Q} \bm{J}(\bm{x}) \bm{S}_{\bm{u}} \bm{\Psi} - \!+\! \bm{\Psi}^{\!\trsp} \bm{R}\, \bm{\Psi} \Big)}^{\!\!-1} \\ - \!\Big(- \bm{\Psi}^{\!\trsp} \bm{S}_{\bm{u}}^\trsp \bm{J}(\bm{x})^\trsp \bm{Q} \bm{f}(\bm{x}) - \bm{R} \, \bm{u} \Big). + + \bm{\Psi}^{\!\trsp} \bm{R}\, \bm{\Psi} \Big)}^{\!\!-1} + \Big(- \bm{\Psi}^{\!\trsp} \bm{S}_{\bm{u}}^\trsp \bm{J}(\bm{x})^\trsp \bm{Q} \bm{f}(\bm{x}) - \bm{R} \, \bm{u} \Big). \label{eq:dw} -\end{multline} +\end{equation} For a 5-link kinematic chain, by setting the coordination matrix in \eqref{eq:Psi2} as \begin{equation*} @@ -1925,14 +2020,25 @@ Note also that the iLQR updates in \eqref{eq:dw} can usually be computed faster %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{iLQR for spacetime optimization} -\begin{figure} +%\begin{figure} +%\centering +%\includegraphics[width=\columnwidth]{images/iLQR_spacetime01.png} +%\caption{\footnotesize +%Spacetime optimization with iLQR for a viapoints task, where the estimated durations between consecutive viapoints are shown in blue. +%} +%\label{fig:iLQR_spacetime} +%\end{figure} + +\begin{wrapfigure}{r}{.52\textwidth} +%\vspace{-20pt} \centering -\includegraphics[width=\columnwidth]{images/iLQR_spacetime01.png} +\includegraphics[width=.50\textwidth]{images/iLQR_spacetime01.png} \caption{\footnotesize Spacetime optimization with iLQR for a viapoints task, where the estimated durations between consecutive viapoints are shown in blue. } \label{fig:iLQR_spacetime} -\end{figure} +\vspace{-60pt} +\end{wrapfigure} We define a phase variable $s_t$ starting from $s_1=0$ at the beginning of the movement. With an augmented state $\bm{x}_t={[\mathbf{x}_t^\trsp, s_t]}^\trsp$ and control command $\bm{u}_t={[{\bm{u}^{\mathbf{x}}_t}^\trsp, u^s_t]}^\trsp$, we define the evolution of the system $\bm{x}_{t+1}=\bm{d}(\bm{x}_t,\bm{u}_t)$ as @@ -1962,15 +2068,15 @@ Figure \ref{fig:iLQR_spacetime} shows a viapoints task example in which both pat \filename{iLQR\_manipulator\_object\_affordance.*} \end{flushright} -\begin{figure} +\begin{SCfigure}[50] \centering -\includegraphics[width=.5\columnwidth]{images/iLQR_objAffordances01.png}\hspace{5mm} -\includegraphics[width=.4\columnwidth]{images/iLQR_objAffordances02.png} +\includegraphics[width=.3\textwidth]{images/iLQR_objAffordances01.png}\hspace{5mm} +\includegraphics[width=.2\textwidth]{images/iLQR_objAffordances02.png}\vspace{15mm} \caption{\footnotesize Manipulation with object affordances cost. The depicted \emph{pick-and-place} task requires the offset at the picking location to be the same as the offset at the placing location, which can be achieved by setting a precision matrix with non-zero offdiagonal entries, see main text for details. In this example, an additional cost is set for choosing the picking and placing points within the object boundaries, which was also used for the task depicted in Fig.~\ref{fig:iLQR_manipulator}. We can see with the resulting motions that when picking the object, the robot anticipates what will be done later with this object, which can be viewed as a basic form of object affordance. In the two situations depicted in the figure, the robot efficiently chooses a grasping point close to one of the corners of the object, different in the two situations, so that the robot can bring the object at the desired location with less effort and without reaching joint angle limits. } \label{fig:iLQR_manipulator_objAffordances} -\end{figure} +\end{SCfigure} Spatial and/or temporal constraints can be considered by setting $\left[\begin{smallmatrix}\bm{I}&-\bm{I}\\-\bm{I}&\bm{I}\end{smallmatrix}\right]$ in the corresponding entries of the precision matrix $\bm{Q}$. With this formulation, we can constraint two positions to be the same without having to predetermine the position at which the two should meet. Indeed, we can see that a cost $c\!=\!\left[\begin{smallmatrix}x_i\\x_j\end{smallmatrix}\right]^\trsp\left[\begin{smallmatrix}1&-1\\-1&1\end{smallmatrix}\right]\left[\begin{smallmatrix}x_i\\x_j\end{smallmatrix}\right]\!=\!(x_i-x_j)^2$ is minimized when $x_i\!=\!x_j$. @@ -1985,7 +2091,7 @@ Such cost can for example be used to define costs on object affordances, see Fig \begin{figure} \centering -\includegraphics[width=\columnwidth]{images/car-bicopter01.jpg} +\includegraphics[width=.6\columnwidth]{images/car-bicopter01.jpg} \caption{\footnotesize iLQR for car and bicopter control/planning problems. } @@ -1995,15 +2101,15 @@ iLQR for car and bicopter control/planning problems. \noindent\textbf{Velocity Control Input} \newline % A car motion with position $(x_1,x_2)$, car orientation $\theta$, and front wheels orientation $\phi$ (see Fig.~\ref{fig:iLQR_car_bicopter}-\emph{left}) is characterized by equations -\begin{align*} - \dot{x}_1 &= u_1 \cos(\theta),\\ - \dot{x}_2 &= u_1 \sin(\theta),\\ - \dot{\theta} &= \frac{u_1}{\ell} \tan(\phi),\\ - \dot{\phi} &= u_2, -\end{align*} +\begin{equation*} + \dot{x}_1 = u_1 \cos(\theta),\quad + \dot{x}_2 = u_1 \sin(\theta),\quad + \dot{\theta} = \frac{u_1}{\ell} \tan(\phi),\quad + \dot{\phi} = u_2, +\end{equation*} where $\ell$ is the distance between the front and back axles, $u_1$ is the back wheels velocity command and $u_2$ is the front wheels steering velocity command. Its first order Taylor expansion around $(\hat{\theta}, \hat{\phi}, \hat{u}_1)$ is -\begin{multline} +\begin{equation} \setlength\arraycolsep{2pt} \footnotesize %\scriptsize %\tiny \begin{bmatrix} \Delta\dot{x}_1 \\ \Delta\dot{x}_2 \\ \Delta\dot{\theta} \\ \Delta\dot{\phi} \end{bmatrix} = @@ -2013,7 +2119,7 @@ Its first order Taylor expansion around $(\hat{\theta}, \hat{\phi}, \hat{u}_1)$ 0 & 0 & 0 & 0 \end{bmatrix} \begin{bmatrix} \Delta x_1 \\ \Delta x_2 \\ \Delta\theta \\ \Delta\phi \end{bmatrix} - + \\ + + \setlength\arraycolsep{2pt} \footnotesize %\scriptsize %\tiny \begin{bmatrix} \cos(\hat{\theta}) & 0 \\ @@ -2022,23 +2128,23 @@ Its first order Taylor expansion around $(\hat{\theta}, \hat{\phi}, \hat{u}_1)$ 0 & 1 \end{bmatrix} \begin{bmatrix} \Delta u_1 \\ \Delta u_2 \end{bmatrix}, -\end{multline} +\end{equation} which can then be converted to a discrete form with \eqref{eq:AdAc}. \newline \noindent\textbf{Acceleration Control Input}\newline % A car motion with position $(x_1,x_2)$, car orientation $\theta$, and front wheels orientation $\phi$ (see Fig.~\ref{fig:iLQR_car_bicopter}-\emph{left}) is characterized by equations -\begin{align*} - \dot{x}_1 &= v \cos(\theta),\\ - \dot{x}_2 &= v \sin(\theta),\\ - \dot{\theta} &= \frac{v}{\ell} \tan(\phi),\\ - \dot{v} &= u_1,\\ - \dot{\phi} &= u_2, -\end{align*} +\begin{equation*} + \dot{x}_1 = v \cos(\theta),\quad + \dot{x}_2 = v \sin(\theta),\quad + \dot{\theta} = \frac{v}{\ell} \tan(\phi),\quad + \dot{v} = u_1,\quad + \dot{\phi} = u_2, +\end{equation*} where $\ell$ is the distance between the front and back axles, $u_1$ is the back wheels acceleration command and $u_2$ is the front wheels steering velocity command. Its first order Taylor expansion around $(\hat{\theta}, \hat{\phi}, \hat{v})$ is -\begin{multline} +\begin{equation} \setlength\arraycolsep{2pt} \footnotesize %\scriptsize %\tiny \begin{bmatrix} \Delta\dot{x}_1 \\ \Delta\dot{x}_2 \\ \Delta\dot{\theta} \\ \Delta\dot{v} \\ \Delta\dot{\phi} \end{bmatrix} = @@ -2049,7 +2155,7 @@ Its first order Taylor expansion around $(\hat{\theta}, \hat{\phi}, \hat{v})$ is 0 & 0 & 0 & 0 & 0 \end{bmatrix} \begin{bmatrix} \Delta x_1 \\ \Delta x_2 \\ \Delta\theta \\ \Delta v \\ \Delta\phi \end{bmatrix} - + \\ + + \setlength\arraycolsep{2pt} \footnotesize %\scriptsize %\tiny \begin{bmatrix} 0 & 0 \\ @@ -2059,7 +2165,7 @@ Its first order Taylor expansion around $(\hat{\theta}, \hat{\phi}, \hat{v})$ is 0 & 1 \end{bmatrix} \begin{bmatrix} \Delta u_1 \\ \Delta u_2 \end{bmatrix}, -\end{multline} +\end{equation} which can then be converted to a discrete form with \eqref{eq:AdAc}. @@ -2070,13 +2176,13 @@ which can then be converted to a discrete form with \eqref{eq:AdAc}. \end{flushright} A planar bicopter of mass $m$ and inertia $I$ actuated by two propellers at distance $\ell$ (see Fig.~\ref{fig:iLQR_car_bicopter}-\emph{right}) is characterized by equations -\begin{align*} - \ddot{x}_1 &= -\frac{1}{m} (u_1 + u_2) \sin(\theta),\\ - \ddot{x}_2 &= \frac{1}{m} (u_1 + u_2) \cos(\theta) - g,\\ - \ddot{\theta} &= \frac{\ell}{I} (u_1 - u_2), -\end{align*} +\begin{equation*} + \ddot{x}_1 = -\frac{1}{m} (u_1 + u_2) \sin(\theta),\quad + \ddot{x}_2 = \frac{1}{m} (u_1 + u_2) \cos(\theta) - g,\quad + \ddot{\theta} = \frac{\ell}{I} (u_1 - u_2), +\end{equation*} with acceleration $g\!=\!9.81$ due to gravity. Its first order Taylor expansion around $(\hat{\theta},\hat{u}_2,\hat{u}_2)$ is -\begin{multline} +\begin{equation} \setlength\arraycolsep{2pt} \footnotesize %\scriptsize %\tiny \begin{bmatrix} \Delta\dot{x}_1 \\ \Delta\dot{x}_2 \\ \Delta\dot{\theta} \\ \Delta\ddot{x}_1 \\ \Delta\ddot{x}_2 \\ \Delta\ddot{\theta} \end{bmatrix} = @@ -2088,7 +2194,7 @@ with acceleration $g\!=\!9.81$ due to gravity. Its first order Taylor expansion 0 & 0 & 0 & 0 & 0 & 0 \end{bmatrix} \begin{bmatrix} \Delta x_1 \\ \Delta x_2 \\ \Delta\theta \\ \Delta\dot{x}_1 \\ \Delta\dot{x}_2 \\ \Delta\dot{\theta} \end{bmatrix} - + \\ + + \setlength\arraycolsep{2pt} \footnotesize %\scriptsize %\tiny \begin{bmatrix} 0 & 0 \\ 0 & 0 \\ 0 & 0 \\ @@ -2096,7 +2202,7 @@ with acceleration $g\!=\!9.81$ due to gravity. Its first order Taylor expansion \frac{1}{m} \cos(\hat{\theta}) & \frac{1}{m} \cos(\hat{\theta}) \\ \frac{\ell}{I} & -\frac{\ell}{I} \end{bmatrix} \begin{bmatrix} \Delta u_1 \\ \Delta u_2 \end{bmatrix}, -\end{multline} +\end{equation} which can then be converted to a discrete form with \eqref{eq:AdAc}. @@ -2169,64 +2275,62 @@ which can then be converted to a discrete form with \eqref{eq:AdAc}. \end{flushright} The nonlinear dynamic equation of a planar robot presented in Section \ref{sec:FD} can be expressed as -\begin{equation}\label{eq:dis_dynamic} -\begin{bmatrix} \bm{q}_{t+1} \\ \dot{\bm{q}}_{t+1} -\end{bmatrix}= \begin{bmatrix} \bm{q}_{t} + \dot{\bm{q}}_{t} dt \\ \dot{\bm{q}}_{t} + \bm{M}^{-1}\Big(\bm{u} + \bm{g} + \bm{C}\text{diag}(\bm{\dot{q}}_t)\bm{\dot{q}_t}\Big) dt -\end{bmatrix}. +\begin{equation} + \begin{bmatrix} \bm{q}_{t+1} \\ \dot{\bm{q}}_{t+1} + \end{bmatrix}= \begin{bmatrix} \bm{q}_{t} + \dot{\bm{q}}_{t} dt \\ \dot{\bm{q}}_{t} + \bm{M}^{-1}\Big(\bm{u} + \bm{g} + \bm{C}\text{diag}(\bm{\dot{q}}_t)\bm{\dot{q}_t}\Big) dt + \end{bmatrix}. + \label{eq:dis_dynamic} \end{equation} For simplicity we start with \eqref{eq:matix_form_dynamic_abs_angle_net_forces}, and at the end, we will describe how the result can be converted to the other choice of variables. The iLQR method requires \eqref{eq:dis_dynamic} to be linearized around the current states and inputs.The corresponding $\bm{A}_t$ and $\bm{B}_t$ can be found according to \eqref{eq:DS} as -\begin{align} -\bm{A}_t =& \begin{bmatrix} -\bm{I} & \bm{I} dt \\ \bm{A}_{21}dt & \bm{I}+2 \bm{M}^{-1} \bm{C} \text{diag}(\dot{\bm{ -q}}_t) dt -\end{bmatrix}, \nonumber \\ - \bm{B}_t =& \begin{bmatrix} -\bm{0} \\ \bm{M}^{-1} dt -\end{bmatrix}.\nonumber -\end{align} +\begin{equation*} + \bm{A}_t = \begin{bmatrix} + \bm{I} & \bm{I} dt \\ \bm{A}_{21}dt & \bm{I}+2 \bm{M}^{-1} \bm{C} \text{diag}(\dot{\bm{ + q}}_t) dt + \end{bmatrix}, \quad + \bm{B}_t = \begin{bmatrix} + \bm{0} \\ \bm{M}^{-1} dt + \end{bmatrix}. +\end{equation*} $\bm{A}_{21} = \frac{\partial \ddot{\bm{q}}_t}{\partial \bm{q}_t}$ can be found by taking the derivation of \eqref{eq:matix_form_dynamic_abs_angle_net_forces} w.r.t. $\bm{q}_t$. The $p$-\emph{th} column of $\bm{A}_{21}$ is the partial derivation of $\ddot{\bm{q}}_t$ w.r.t. $p$-emph{th} joint angle at time $t$ $q_p^t$ which can be formulated as -\begin{multline*} - \frac{\partial \ddot{\bm{q}}_t}{\partial q_p^t} = \frac{\partial \bm{M}^{-1}}{\partial q_p^t} \Big(\bm{u} + \bm{g} + \bm{C}\text{diag}(\bm{\dot{q}}_t)\bm{\dot{q}}_t\Big) + \\ +\begin{equation*} + \frac{\partial \ddot{\bm{q}}_t}{\partial q_p^t} = \frac{\partial \bm{M}^{-1}}{\partial q_p^t} + \Big(\bm{u} + \bm{g} + \bm{C}\text{diag}(\bm{\dot{q}}_t)\bm{\dot{q}}_t\Big) + \bm{M}^{-1} \Big(\frac{\partial \bm{g}}{\partial q_p^t} + \frac{\partial \bm{C}}{\partial q_p^t} \text{diag}(\dot{\bm{q}}_t)\dot{\bm{q}}_t\Big). -\end{multline*} +\end{equation*} Having this done for all \emph{p}'s at time $t$, we can write -\begin{multline*} - \frac{\partial \ddot{\bm{q}}_t}{\partial \bm{q}_t} = \Big(\bm{M}^{-1}\Big)'\Big(\bm{u} + \bm{g} + \bm{C}\text{diag}(\bm{\dot{q}}_t)\bm{\dot{q}}_t\Big) + \\ +\begin{equation*} + \frac{\partial \ddot{\bm{q}}_t}{\partial \bm{q}_t} = \Big(\bm{M}^{-1}\Big)'\Big(\bm{u} + \bm{g} + \bm{C}\text{diag}(\bm{\dot{q}}_t)\bm{\dot{q}}_t\Big) + \bm{M}^{-1} \Big(\bm{g}' + \bm{C}' \text{diag}(\dot{\bm{q}}_t)\dot{\bm{q}}_t\Big), -\end{multline*} +\end{equation*} where \begin{equation*} -\bm{g}(i,j)=\begin{cases} + \bm{g}(i,j)=\begin{cases} \sum_{j=i}^N m_jl_i g s_i & \text{if } i = j \\ 0 & \text{otherwise} \end{cases}, \end{equation*} \begin{equation*} -\bm{C}' =\begin{bmatrix} -\frac{\partial \bm{C}}{\partial q_1^t} & \frac{\partial \bm{C}}{\partial q_2^t} & \cdots & \frac{\partial \bm{C}}{\partial q_N^t} -\end{bmatrix}, -\end{equation*} -\begin{equation*} -\bm{C}'(h,k,p)=\begin{cases} + \bm{C}' =\begin{bmatrix} + \frac{\partial \bm{C}}{\partial q_1^t} & \frac{\partial \bm{C}}{\partial q_2^t} & \cdots & \frac{\partial \bm{C}}{\partial q_N^t} + \end{bmatrix},\quad + \bm{C}'(h,k,p)=\begin{cases} -l_hl_k c_{h-k} \sum_{j=h}^N m_j \mathds{1}(j-k) & \text{if} \ p = h \neq k \\ l_hl_k c_{h-k} \sum_{j=h}^N m_j \mathds{1}(j-k) & \text{if} \ p = k \neq h\\ 0 & \text{otherwise} \end{cases}, \end{equation*} \begin{equation*} -(\bm{M}^{-1})' =\begin{bmatrix} -\frac{\partial \bm{M}^{-1}}{\partial q_1^t} & \frac{\partial \bm{M}^{-1}}{\partial q_2^t} & \cdots & \frac{\partial \bm{M}^{-1}}{\partial q_N^t} -\end{bmatrix}, -\end{equation*} -\begin{equation*} -\Big(\bm{M}^{-1}\Big)'= -\bm{M}^{-1}\bm{M}'\bm{M}^{-1}, + (\bm{M}^{-1})' =\begin{bmatrix} + \frac{\partial \bm{M}^{-1}}{\partial q_1^t} & \frac{\partial \bm{M}^{-1}}{\partial q_2^t} & \cdots & \frac{\partial \bm{M}^{-1}}{\partial q_N^t} + \end{bmatrix},\quad + \Big(\bm{M}^{-1}\Big)'= -\bm{M}^{-1}\bm{M}'\bm{M}^{-1}, \end{equation*} \begin{equation*} -\bm{M}'(h,k,p)= \begin{cases} + \bm{M}'(h,k,p)= \begin{cases} -l_hl_k s_{h-k} \sum_{j=k}^N m_j \mathds{1}(j-k) & \text{if} \ p = h \neq k\\ l_hl_k s_{h-k} \sum_{j=h}^N m_j \mathds{1}(j-k) & \text{if} \ p = k \neq h \\ 0 & \text{otherwise} @@ -2237,38 +2341,36 @@ According to Section \ref{sec:iLQRbatch}, we can concatenate the results for all \begin{equation}\label{eq:transfer_matrix_abs_ang_gen_force} \begin{bmatrix} \Delta \bm{q}_1 \\ \Delta \bm{\dot{q}}_1 \\ \vdots \\ \Delta \bm{q}_T \\ \Delta \bm{\dot{q}}_T -\end{bmatrix} = \bm{S}_u^q \begin{bmatrix} -\Delta \bm{u}_1 \\ \Delta \bm{u}_2 \\ \vdots \\ \Delta \bm{u}_{T-1} -\end{bmatrix}, + \end{bmatrix} = \bm{S}_u^q \begin{bmatrix} + \Delta \bm{u}_1 \\ \Delta \bm{u}_2 \\ \vdots \\ \Delta \bm{u}_{T-1} + \end{bmatrix}, \end{equation} where $T$ is the total time horizon. These variables can be converted to relative angles and joint torque commands by \begin{equation*} \begin{bmatrix} \Delta \bm{q}_1 \\ \Delta \bm{\dot{q}}_1 \\ \vdots \\ \Delta \bm{q}_T \\ \Delta \bm{\dot{q}}_T -\end{bmatrix} = \underset{\bm{T}_x}{\underbrace{\begin{bmatrix} -\bm{T} & \bm{0} & \hdots & \bm{0} \\ \bm{0} & \bm{T} & \hdots & \bm{0} \\ \vdots & \vdots & \ddots & \vdots \\ \bm{0} & \bm{0} &\hdots & \bm{T} -\end{bmatrix}}} \begin{bmatrix} - \Delta \bm{x}_1 \\ \Delta \bm{\dot{x}}_1 \\ \vdots \\ \Delta \bm{x}_T \\ \Delta \bm{\dot{x}}_T -\end{bmatrix}, -\end{equation*} -\begin{equation*} -\begin{bmatrix} -\Delta \bm{u}_1 \\ \Delta \bm{u}_2 \\ \vdots \\ \Delta \bm{u}_{T-1} -\end{bmatrix} = \underset{\bm{T}_u}{\underbrace{\begin{bmatrix} -\bm{T}^{-\trsp} & \bm{0} & \hdots & \bm{0} \\ \bm{0} & \bm{T}^{-\trsp} & \hdots & \bm{0} \\ \vdots & \vdots & \ddots & \vdots \\ \bm{0} & \bm{0} &\hdots & \bm{T}^{-\trsp} -\end{bmatrix}}} \begin{bmatrix} -\Delta \bm{\tau}_1 \\ \Delta \bm{\tau}_2 \\ \vdots \\ \Delta \bm{\tau}_{T-1} -\end{bmatrix}, + \end{bmatrix} = \underset{\bm{T}_x}{\underbrace{\begin{bmatrix} + \bm{T} & \bm{0} & \hdots & \bm{0} \\ \bm{0} & \bm{T} & \hdots & \bm{0} \\ \vdots & \vdots & \ddots & \vdots \\ \bm{0} & \bm{0} &\hdots & \bm{T} + \end{bmatrix}}} \begin{bmatrix} + \Delta \bm{x}_1 \\ \Delta \bm{\dot{x}}_1 \\ \vdots \\ \Delta \bm{x}_T \\ \Delta \bm{\dot{x}}_T + \end{bmatrix},\quad + \begin{bmatrix} + \Delta \bm{u}_1 \\ \Delta \bm{u}_2 \\ \vdots \\ \Delta \bm{u}_{T-1} + \end{bmatrix} = \underset{\bm{T}_u}{\underbrace{\begin{bmatrix} + \bm{T}^{-\trsp} & \bm{0} & \hdots & \bm{0} \\ \bm{0} & \bm{T}^{-\trsp} & \hdots & \bm{0} \\ \vdots & \vdots & \ddots & \vdots \\ \bm{0} & \bm{0} &\hdots & \bm{T}^{-\trsp} + \end{bmatrix}}} \begin{bmatrix} + \Delta \bm{\tau}_1 \\ \Delta \bm{\tau}_2 \\ \vdots \\ \Delta \bm{\tau}_{T-1} + \end{bmatrix}, \end{equation*} so \eqref{eq:transfer_matrix_abs_ang_gen_force} can be rewritten as \begin{equation*} \begin{bmatrix} \Delta \bm{x}_1 \\ \Delta \bm{\dot{x}}_1 \\ \vdots \\ \Delta \bm{x}_T \\ \Delta \bm{\dot{x}}_T -\end{bmatrix} = \bm{S}_{\tau}^x \begin{bmatrix} -\Delta \bm{\tau}_1 \\ \Delta \bm{\tau}_2 \\ \vdots \\ \Delta \bm{\tau}_{T-1} -\end{bmatrix} = \bm{T}_x^{-1} \bm{S}_u^q \bm{T}_u \begin{bmatrix} -\Delta \bm{\tau}_1 \\ \Delta \bm{\tau}_2 \\ \vdots \\ \Delta \bm{\tau}_{T-1} -\end{bmatrix}. + \end{bmatrix} = \bm{S}_{\tau}^x \begin{bmatrix} + \Delta \bm{\tau}_1 \\ \Delta \bm{\tau}_2 \\ \vdots \\ \Delta \bm{\tau}_{T-1} + \end{bmatrix} = \bm{T}_x^{-1} \bm{S}_u^q \bm{T}_u \begin{bmatrix} + \Delta \bm{\tau}_1 \\ \Delta \bm{\tau}_2 \\ \vdots \\ \Delta \bm{\tau}_{T-1} + \end{bmatrix}. \end{equation*} @@ -2300,8 +2402,8 @@ By writing \bm{x}_{2} &= \bm{A}_1 \bm{x}_1 + \bm{B}_1 \bm{u}_1 ,\\ \bm{x}_{3} &= \bm{A}_2 \bm{x}_2 + \bm{B}_2 \bm{u}_2 = \bm{A}_2 (\bm{A}_1 \bm{x}_1 + \bm{B}_1 \bm{u}_1) + \bm{B}_2 \bm{u}_2 ,\\[-2mm] &\vdots\\[-2mm] - \bm{x}_{T} &= \left(\prod_{t=1}^{T-1} \bm{A}_{T-t}\right) \bm{x}_1 \;+ \left(\prod_{t=1}^{T-2} \bm{A}_{T-t}\right) \bm{B}_1 \bm{u}_1 \;+ \\ - & \hspace{14mm} \left(\prod_{t=1}^{T-3} \bm{A}_{T-t}\right) \bm{B}_2 \bm{u}_2 \;+\; \cdots \;+\; \bm{B}_{T-1} \bm{u}_{T-1}, + \bm{x}_{T} &= \left(\prod_{t=1}^{T-1} \bm{A}_{T-t}\right) \bm{x}_1 \;+ \left(\prod_{t=1}^{T-2} \bm{A}_{T-t}\right) \bm{B}_1 \bm{u}_1 + + \left(\prod_{t=1}^{T-3} \bm{A}_{T-t}\right) \bm{B}_2 \bm{u}_2 \;+\; \cdots \;+\; \bm{B}_{T-1} \bm{u}_{T-1}, \end{align*} in a matrix form, we get an expression of the form $\bm{x}=\bm{S}_{\bm{x}}\bm{x}_1+\bm{S}_{\bm{u}}\bm{u}$, with \begin{equation} @@ -2324,10 +2426,7 @@ in a matrix form, we get an expression of the form $\bm{x}=\bm{S}_{\bm{x}}\bm{x} \prod_{t=1}^{T-1} \bm{A}_{T-t} \end{bmatrix}}_{\bm{S}_{\bm{x}}} \bm{x}_1 - \quad + \hspace{36mm} - \label{eq:SxSu} -\end{equation} -\begin{equation*} + + \footnotesize %\scriptsize %\tiny \underbrace{ \begin{bmatrix} @@ -2344,7 +2443,8 @@ in a matrix form, we get an expression of the form $\bm{x}=\bm{S}_{\bm{x}}\bm{x} \vdots\\ \bm{u}_{T\!-\!1} \end{bmatrix}}_{\bm{u}}, -\end{equation*} + \label{eq:SxSu} +\end{equation} where $\bm{S}_{\bm{x}}\!\in\!\mathbb{R}^{dT\times d}$, $\bm{x}_1\!\in\!\mathbb{R}^d$, $\bm{S}_{\bm{u}}\!\in\!\mathbb{R}^{dT\times d(T-1)}$ and $\bm{u}\!\in\!\mathbb{R}^{d(T-1)}$. @@ -2361,49 +2461,55 @@ A single integrator is simply defined as $\bm{x}_{t+1} = \bm{x}_t + \bm{u}_t \De \section{Derivation of motion equation for a planar robot}\label{sec:FD_derivation} We derive each element in \eqref{eq:Lag_with_generalized_forces} individually for $j \geq z$, then combine them altogether to derive the dynamic equation of the system. In this regard, for the first element in \eqref{eq:Lag_with_generalized_forces}, we can write -\begin{align*} - \frac{\partial T_j}{\partial \dot{q}_z} &= m_j \Big(\frac{\partial \dot{f}_{j,1}}{\partial \dot{q}_z} \dot{f}_{j,1} - + \frac{\partial \dot{f}_{j,2}}{\partial \dot{q}_z} \dot{f}_{j,2}\Big) = \\ - &= m_j \Big(\Big(- l_z s_z\Big) \dot{f}_{j,1} +\Big(l_z c_z\Big) \dot{f}_{j,2}\Big), \nonumber -\end{align*} +\begin{equation*} + \frac{\partial T_j}{\partial \dot{q}_z} = m_j \Big(\frac{\partial \dot{f}_{j,1}}{\partial \dot{q}_z} \dot{f}_{j,1} + + \frac{\partial \dot{f}_{j,2}}{\partial \dot{q}_z} \dot{f}_{j,2}\Big) = + m_j \Big(\Big(- l_z s_z\Big) \dot{f}_{j,1} +\Big(l_z c_z\Big) \dot{f}_{j,2}\Big), \nonumber +\end{equation*} whose time derivative can be expressed as -\begin{align*} - \frac{d}{dt} \frac{\partial T_j}{\partial \dot{q}_z} &= - m_j \Big(\Big(- l_z \dot{q}_z c_z\Big) \dot{f}_{j,1} - \Big(l_z s_z\Big) \ddot{f}_{j,1} \\ - &\hspace{6mm} - \Big(l_z \dot{q}_z s_z\Big) \dot{f}_{j,2} + \Big(l_z c_z\Big) \ddot{f}_{j,2}\Big) \\ -\end{align*} +\begin{equation*} + \frac{d}{dt} \frac{\partial T_j}{\partial \dot{q}_z} = + m_j \Big(\Big(- l_z \dot{q}_z c_z\Big) \dot{f}_{j,1} - \Big(l_z s_z\Big) \ddot{f}_{j,1} + - \Big(l_z \dot{q}_z s_z\Big) \dot{f}_{j,2} + \Big(l_z c_z\Big) \ddot{f}_{j,2}\Big), +\end{equation*} where -\begin{align*} - \ddot{f}_{j,1} &= -\sum_{k=1}^j l_k \ddot{q}_k s_k -\sum_{k=1}^j l_k \dot{q}_k^2 c_k \\ - \ddot{f}_{j,2} &= \sum_{k=1}^j l_k \ddot{q}_k c_k -\sum_{k=1}^j l_k \dot{q}_k^2 s_k. -\end{align*} +\begin{equation*} + \ddot{f}_{j,1} = -\sum_{k=1}^j l_k \ddot{q}_k s_k -\sum_{k=1}^j l_k \dot{q}_k^2 c_k,\quad + \ddot{f}_{j,2} = \sum_{k=1}^j l_k \ddot{q}_k c_k -\sum_{k=1}^j l_k \dot{q}_k^2 s_k. +\end{equation*} For the second term in \eqref{eq:Lag_with_generalized_forces}, we can write -\begin{align*} - \frac{\partial T_j}{\partial q_z} &= m_j \Big(\frac{\partial \dot{f}_{j,1}}{\partial q_z} \dot{f}_{j,1} + - \frac{\partial \dot{f}_{j,2}}{\partial q_z} \dot{f}_{j,2}\Big) \label{eq:lagrangian_second_term}\\ - &= m_j \Bigg(\Big(-l_z \dot{q}_z c_z \Big) \dot{f}_{j,1} \nonumber - \Big(l_z \dot{q}_z s_z\Big) \dot{f}_{j,2}\Bigg), -\end{align*} +\begin{equation} + \frac{\partial T_j}{\partial q_z} = m_j \Big(\frac{\partial \dot{f}_{j,1}}{\partial q_z} \dot{f}_{j,1} + + \frac{\partial \dot{f}_{j,2}}{\partial q_z} \dot{f}_{j,2}\Big) + = m_j \Bigg(\Big(-l_z \dot{q}_z c_z \Big) \dot{f}_{j,1} \nonumber - \Big(l_z \dot{q}_z s_z\Big) \dot{f}_{j,2}\Bigg), + \label{eq:lagrangian_second_term} +\end{equation} and finally, the potential energy term can be calculated as \begin{equation*} \frac{\partial U_j}{\partial q_z} = m_j g l_z c_z. \end{equation*} The z-\emph{th} generalized force can be found by substituting the derived terms to \eqref{eq:Lag_with_generalized_forces} as -\begin{multline*} - u_z = \sum_{j=z}^N \Big(m_j \Big(\Big(- l_z \dot{q}_z c_z\Big) \dot{r}_{j,1} - \Big(l_z s_k\Big) \ddot{r}_{j,1} - \Big(l_z \dot{q}_z s_z\Big) \dot{r}_{j,2} \\ + \Big(l_z c_z\Big) \ddot{r}_{j,2}\Big) - m_j \Big(\Big(-l_z \dot{q}_z c_z\Big)\dot{r}_{j,1} - \Big(l_z \dot{q}_z s_z\Big) \dot{r}_{j,2}\Big) \\ + m_j g l_z c_z\Big)\\ = \sum_{j=z}^N \Big(m_j \Big(\Big(-l_z s_z\Big) \ddot{r}_{j,1} + \Big(l_z c_z\Big) \ddot{r}_{j,2}\Big) +m_j g l_z c_z\Big)\\ -= \sum_{j=z}^N \Big(m_j \Big(\Big(-l_z s_z\Big) \Big(-\sum_{k=1}^j l_k \ddot{q}_k s_k -\sum_{k=1}^j l_k \dot{q}_k^2 c_k\Big) + \\ \Big(l_z c_z\Big) \Big(\sum_{k=1}^j l_k \ddot{q}_k c_k- \sum_{k=1}^j l_k \dot{q}_k^2 s_k\Big)\Big) +m_j g l_z c_z \Big) \\= \sum_{j=z}^{N}m_j \Big( \sum_{k=1}^{j} l_z l_k c_{z-k}\ddot{q}_k + \sum_{k=1}^{j} l_z l_k s_{z-k}\dot{q}_k^2\Big) + \sum_{j=z}^{N}m_j g l_z c_z, -\end{multline*} +\begin{align*} + u_z &= \sum_{j=z}^N \Big(m_j \Big(\Big(- l_z \dot{q}_z c_z\Big) \dot{r}_{j,1} - \Big(l_z s_k\Big) \ddot{r}_{j,1} - \Big(l_z \dot{q}_z s_z\Big) \dot{r}_{j,2} + + \Big(l_z c_z\Big) \ddot{r}_{j,2}\Big) - m_j \Big(\Big(-l_z \dot{q}_z c_z\Big)\dot{r}_{j,1} - \Big(l_z \dot{q}_z s_z\Big) \dot{r}_{j,2}\Big) + + m_j g l_z c_z\Big)\\ + &= \sum_{j=z}^N \Big(m_j \Big(\Big(-l_z s_z\Big) \ddot{r}_{j,1} + \Big(l_z c_z\Big) \ddot{r}_{j,2}\Big) +m_j g l_z c_z\Big)\\ + &= \sum_{j=z}^N \Big(m_j \Big(\Big(-l_z s_z\Big) \Big(-\sum_{k=1}^j l_k \ddot{q}_k s_k -\sum_{k=1}^j l_k \dot{q}_k^2 c_k\Big) + + \Big(l_z c_z\Big) \Big(\sum_{k=1}^j l_k \ddot{q}_k c_k- \sum_{k=1}^j l_k \dot{q}_k^2 s_k\Big)\Big) +m_j g l_z c_z \Big) \\ + &= \sum_{j=z}^{N}m_j \Big( \sum_{k=1}^{j} l_z l_k c_{z-k}\ddot{q}_k + \sum_{k=1}^{j} l_z l_k s_{z-k}\dot{q}_k^2\Big) + \sum_{j=z}^{N}m_j g l_z c_z, +\end{align*} where \begin{equation*} c_{h-k} = c_hc_k-s_hs_k, \ \ \ s_{h-k} = s_hc_k-c_hs_k. \end{equation*} By rearranging the order of elements, we can write -\begin{multline*} - \sum_{j=z}^{N}m_j \Big(\sum_{k=1}^{j}l_zl_k c_{z-k}\ddot{q}_k\Big) = \\ +\begin{equation*} + \sum_{j=z}^{N}m_j \Big(\sum_{k=1}^{j}l_zl_k c_{z-k}\ddot{q}_k\Big) = u_z - \sum_{j=z}^{N}m_j \Big(\sum_{k=1}^{j} l_zl_k s_{z-k}\dot{q}_k^2\Big) - \sum_{j=z}^{N}m_j g l_z c_z. -\end{multline*} +\end{equation*} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -2418,7 +2524,7 @@ The evolution of the system is described by the linear relation $\bm{\dot{x}}_t= For $t\!\leq\!\frac{T}{4}$ (left hand holding the ball), we have \begin{equation} \setlength\arraycolsep{2.5pt} - \scriptsize %\tiny + %\scriptsize %\tiny \underbrace{ \left[\begin{array}{c} \mathbf{\dot{x}}_{1,t}\\ @@ -2492,7 +2598,7 @@ For $t\!\leq\!\frac{T}{4}$ (left hand holding the ball), we have At $t\!=\!\frac{T}{2}$ (right hand hitting the ball), we have \begin{equation} \setlength\arraycolsep{2pt} - \scriptsize %\tiny + %\scriptsize %\tiny \underbrace{ \left[\begin{array}{c} \mathbf{\dot{x}}_{1,t}\\ @@ -2566,7 +2672,7 @@ At $t\!=\!\frac{T}{2}$ (right hand hitting the ball), we have For $\frac{T}{4}\!<\!t\!<\!\frac{T}{2}$ and $t\!>\!\frac{T}{2}$ (free motion of the ball), we have \begin{equation} \setlength\arraycolsep{2pt} - \scriptsize %\tiny + %\scriptsize %\tiny \underbrace{ \left[\begin{array}{c} \mathbf{\dot{x}}_{1,t}\\ @@ -2652,19 +2758,19 @@ which can then be used to define sparse transfer matrices $\bm{S}_{\bm{x}}$ and \section{Equivalence between LQT and LQR with augmented state space}\label{app:LQTLQR} The equivalence between the original LQT problem and the corresponding LQR problem with an augmented state space can be shown by using the block matrices composition rule -\begin{align*} +\begin{equation*} %\setlength\arraycolsep{2.5pt} - \bm{M} &= + \bm{M} = \footnotesize %\scriptsize %\tiny \begin{bmatrix} \bm{M}_{11} & \bm{M}_{12} \\ \bm{M}_{21} & \bm{M}_{22} \end{bmatrix} - \quad\iff\\ - \bm{M}^{-1} &= + \quad\iff\quad + \bm{M}^{-1} = \footnotesize %\scriptsize %\tiny \begin{bmatrix} \bm{I} & \bm{0} \\ -\bm{M}_{22}^{-1} \bm{M}_{21} & \bm{I} \end{bmatrix} \begin{bmatrix} \bm{S}^{-1} & \bm{0} \\ \bm{0} & \bm{M}_{22}^{-1} \end{bmatrix} \begin{bmatrix} \bm{I} & -\bm{M}_{12} \bm{M}_{22}^{-1} \\ \bm{0} & \bm{I} \end{bmatrix}, -\end{align*} +\end{equation*} where $\bm{M}_{11}\!\in\!\mathbb{R}^{d\times d}$, $\bm{M}_{12}\!\in\!\mathbb{R}^{d\times D}$, $\bm{M}_{21}\!\in\!\mathbb{R}^{D\times d}$, $\bm{M}_{22}\!\in\!\mathbb{R}^{D\times D}$, and $\bm{S} = \bm{M}_{11} - \bm{M}_{12} \bm{M}_{22}^{-1} \bm{M}_{21}$ the Schur complement of the matrix $\bm{M}$.