diff --git a/README.md b/README.md index aaa7ccc2728ee1f138d797c7c507f688ca7e3b60..3fcc5c3175190b2fe08a0892bfa97b35ad3d9efc 100644 --- a/README.md +++ b/README.md @@ -13,15 +13,17 @@ RCFS is a collection of source codes to study robotics through simple 2D example | IK_num | Inverse kinematics with numerical computation for a planar manipulator | ✅ | ✅ | | | | FD | Forward Dynamics computed in matrix form for a planar manipulator | ✅ | ✅ | | | | FD_recursive | Forward Dynamics with recursive computation for a planar manipulator | ✅ | ✅ | | | +| LQR_infHor | Infinite Horizon Linear quadratic regulator (LQR) applied to a point mass system | ✅ | ✅ | | | | LQT | Linear quadratic tracking (LQT) applied to a viapoint task (batch formulation) | ✅ | ✅ | | | | LQT_tennisServe | LQT in a ballistic task mimicking a bimanual tennis serve problem (batch formulation) | ✅ | | | | | LQT_recursive | LQT applied to a viapoint task with a recursive formulation based on augmented state space to find a controller) | ✅ | ✅ | | | +| LQT_nullspace | Batch LQT with nullspace formulation | ✅ | ✅ | | | | LQT_recursive_LS | LQT applied to a viapoint task with a recursive formulation based on least squares and an augmented state space to find a controller | ✅ | ✅ | | | | LQT_recursive_LS_multiAgents | LQT applied to a multi-agent system with recursive formulation based on least squares and augmented state, by using a precision matrix with nonzero offdiagonal elements to find a controller in which the two agents coordinate their movements to find an optimal meeting point | ✅ | ✅ | | | | LQT_CP | LQT with control primitives applied to a viapoint task (batch formulation) | ✅ | ✅ | | | | LQT_CP_DMP | LQT with control primitives applied to trajectory tracking with a formulation similar to dynamical movement primitives (DMP), by using the least squares formulation of recursive LQR on an augmented state space | ✅ | ✅ | | | | iLQR_obstacle | Iterative linear quadratic regulator (iLQR) applied to a viapoint task with obstacles avoidance (batch formulation) | ✅ | ✅ | | | -| iLQR_obstacle_GPIS | iLQR with obstacles represented as Gaussian process implicit surfaces (GPIS) | ✅ | | | | +| iLQR_obstacle_GPIS | iLQR with obstacles represented as Gaussian process implicit surfaces (GPIS) | ✅ | | ✅ | | | iLQR_manipulator | iLQR applied to a planar manipulator for a viapoints task (batch formulation) | ✅ | ✅ | ✅ | | | iLQR_manipulator_recursive | iLQR applied to a planar manipulator for a viapoints task (recursive formulation to find a controller) | ✅ | ✅ | | | | iLQR_manipulator_CoM | iLQR applied to a planar manipulator for a tracking problem involving the center of mass (CoM) and the end-effector (batch formulation) | ✅ | ✅ | | | @@ -43,7 +45,6 @@ Additional reading material can be be found as [video lectures](https://tube.swi | Filename | Main responsible | |----------|------------------| -| Example similar to demo_OC_LQT_nullspace_minimal01.m | Hakan | | Example similar to demo_OC_LQT_Lagrangian01.m (inclusion of constraints) | ??? | diff --git a/doc/rcfs.pdf b/doc/rcfs.pdf index 09e980a1ea28dc6445bfc3d459d09351fd5f62a6..e5821b12433ebdd56668995dadd79e7fbab208ad 100644 Binary files a/doc/rcfs.pdf and b/doc/rcfs.pdf differ diff --git a/doc/rcfs.tex b/doc/rcfs.tex index 3298338a459d93caf084a9235e479acec600077a..7f9735c7cc9be7d89192a06a59377ca9405baf48 100644 --- a/doc/rcfs.tex +++ b/doc/rcfs.tex @@ -1360,7 +1360,7 @@ First, note that \eqref{eq:duhat} can be rewritten as \Delta\bm{\hat{u}}_t = \bm{\tilde{K}}_t \, \begin{bmatrix} \Delta\bm{x}_t \\ 1 \end{bmatrix}, \;\text{with}\quad \bm{\tilde{K}}_t &= - \bm{Q}_{\bm{u}\bm{u},t}^{-1} \Big[ \bm{Q}_{\bm{u}\bm{x},t}, \; \bm{q}_{\bm{u},t} \Big]. -\label{eq:duhat} +\label{eq:duhat2} \end{equation} Also, \eqref{eq:minu} can be rewritten as diff --git a/matlab/LQR_infHor.m b/matlab/LQR_infHor.m new file mode 100644 index 0000000000000000000000000000000000000000..baf67c599835da34c8e7d1c4acf85bcd5e5238f1 --- /dev/null +++ b/matlab/LQR_infHor.m @@ -0,0 +1,168 @@ +%% Point-mass LQR with infinite horizon +%% +%% Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +%% Written by Sylvain Calinon <https://calinon.ch> +%% +%% This file is part of RCFS. +%% +%% RCFS is free software: you can redistribute it and/or modify +%% it under the terms of the GNU General Public License version 3 as +%% published by the Free Software Foundation. +%% +%% RCFS is distributed in the hope that it will be useful, +%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +%% GNU General Public License for more details. +%% +%% You should have received a copy of the GNU General Public License +%% along with RCFS. If not, see <http://www.gnu.org/licenses/>. + + +function LQR_infHor + +%% Parameters +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +param.nbData = 200; %Number of datapoints +param.nbRepros = 4; %Number of reproductions + +param.nbVarPos = 2; %Dimension of position data (here: x1,x2) +param.nbDeriv = 2; %Number of static & dynamic features (D=2 for [x,dx]) +param.nbVar = param.nbVarPos * param.nbDeriv; %Dimension of state vector in the tangent space +param.dt = 1E-2; %Time step duration +param.rfactor = 4E-2; %Control cost in LQR + +%Control cost matrix +R = eye(param.nbVarPos) * param.rfactor; + +%Target and desired covariance +param.Mu = [randn(param.nbVarPos,1); zeros(param.nbVarPos*(param.nbDeriv-1),1)]; + +[Ar,~] = qr(randn(param.nbVarPos)); +xCov = Ar * diag(rand(param.nbVarPos,1)) * Ar' * 1E-1 + +%% Discrete dynamical System settings +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +A1d = zeros(param.nbDeriv); +for i=0:param.nbDeriv-1 + A1d = A1d + diag(ones(param.nbDeriv-i,1),i) * param.dt^i * 1/factorial(i); %Discrete 1D +end +B1d = zeros(param.nbDeriv,1); +for i=1:param.nbDeriv + B1d(param.nbDeriv-i+1) = param.dt^i * 1/factorial(i); %Discrete 1D +end +A = kron(A1d, eye(param.nbVarPos)); %Discrete nD +B = kron(B1d, eye(param.nbVarPos)); %Discrete nD + + +%% discrete LQR with infinite horizon +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +Q = blkdiag(inv(xCov), zeros(param.nbVarPos*(param.nbDeriv-1))); %Precision matrix +P = solveAlgebraicRiccati_eig_discrete(A, B*(R\B'), (Q+Q')/2); +L = (B' * P * B + R) \ B' * P * A; %Feedback gain (discrete version) + +for n=1:param.nbRepros + x = [ones(param.nbVarPos,1)+randn(param.nbVarPos,1)*5E-1; zeros(param.nbVarPos*(param.nbDeriv-1),1)]; + for t=1:param.nbData + r(n).Data(:,t) = x; + u = L * (param.Mu - x); %Compute acceleration (with only feedback terms) + x = A * x + B * u; + end +end + + +%% Plots +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +figure('position',[10,10,650,650]); hold on; axis off; grid off; +plotGMM(param.Mu(1:2), xCov(1:2,1:2), [.8 0 0], .3); +for n=1:param.nbRepros + plot(r(n).Data(1,:), r(n).Data(2,:), '-','linewidth',1,'color',[0 0 0]); +end +%plot(param.Mu(1,1), param.Mu(2,1), 'r.','markersize',80); +axis equal; +%print('-dpng','graphs/demo_MPC_infHor01.png'); + + +%% Timeline plot +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +labList = {'x_1','x_2','dx_1','dx_2','ddx_1','ddx_2'}; +figure('position',[720 10 600 650],'color',[1 1 1]); +for j=1:param.nbVar +subplot(param.nbVar+1,1,j); hold on; +for n=1:param.nbRepros + plot(r(n).Data(j,:), '-','linewidth',1,'color',[0 0 0]); +end +if j<7 + ylabel(labList{j},'fontsize',14,'interpreter','latex'); +end +end +%Speed profile +if param.nbDeriv>1 +subplot(param.nbVar+1,1,param.nbVar+1); hold on; +for n=1:param.nbRepros + sp = sqrt(r(n).Data(3,:).^2 + r(n).Data(4,:).^2); + plot(sp, '-','linewidth',1,'color',[0 0 0]); +end +ylabel('|dx|','fontsize',14); +xlabel('t','fontsize',14); +end +%print('-dpng','graphs/demo_LQR_infHor01.png'); + +pause; +close all; +end + +%%%%%%%%%%%%%%%%%%%%%% +% Solve Algebraic Ricatty discrete equation +function X = solveAlgebraicRiccati_eig_discrete(A, G, Q) + n = size(A,1); + + %Symplectic matrix (see https://en.wikipedia.org/wiki/Algebraic_Riccati_equation) + %Z = [A+B*(R\B')/A'*Q, -B*(R\B')/A'; -A'\Q, A'^-1]; + Z = [A+G/A'*Q, -G/A'; -A'\Q, inv(A')]; + + + %Since Z is symplectic, if it does not have any eigenvalues on the unit circle, + %then exactly half of its eigenvalues are inside the unit circle. + [V,D] = eig(Z); + U = []; + for j=1:2*n + if norm(D(j,j)) < 1 %inside unit circle + U = [U V(:,j)]; + end + end + + X = real(U(n+1:end,:) / U(1:n,:)); +end + +%%%%%%%%%%%%%%%%%%%%%% +% Plot a 2D Mixture of Gaussians +function [h, X] = plotGMM(Mu, Sigma, color, valAlpha) + nbStates = size(Mu,2); + nbDrawingSeg = 100; + darkcolor = color * .7; %max(color-0.5,0); + t = linspace(-pi, pi, nbDrawingSeg); + if nargin<4 + valAlpha = 1; + end + + h = []; + X = zeros(2,nbDrawingSeg,nbStates); + for i=1:nbStates + [V,D] = eig(Sigma(:,:,i)); + R = real(V*D.^.5); + X(:,:,i) = R * [cos(t); sin(t)] + repmat(Mu(:,i), 1, nbDrawingSeg); + if nargin>3 %Plot with alpha transparency + h = [h patch(X(1,:,i), X(2,:,i), color, 'lineWidth', 1, 'EdgeColor', color, 'facealpha', valAlpha,'edgealpha', valAlpha)]; + h = [h plot(Mu(1,:), Mu(2,:), '.', 'markersize', 10, 'color', darkcolor)]; + else %Plot without transparency + h = [h patch(X(1,:,i), X(2,:,i), color, 'lineWidth', 1, 'EdgeColor', darkcolor)]; + h = [h plot(Mu(1,:), Mu(2,:), '.', 'markersize', 10, 'color', darkcolor)]; + end + end +end + + + diff --git a/matlab/LQT_nullspace.m b/matlab/LQT_nullspace.m new file mode 100644 index 0000000000000000000000000000000000000000..7e6631f123349a80f3a5b766143426853914187e --- /dev/null +++ b/matlab/LQT_nullspace.m @@ -0,0 +1,109 @@ +%% Batch LQT with nullspace formulation +%% +%% Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +%% Written by Sylvain Calinon <https://calinon.ch> +%% +%% This file is part of RCFS. +%% +%% RCFS is free software: you can redistribute it and/or modify +%% it under the terms of the GNU General Public License version 3 as +%% published by the Free Software Foundation. +%% +%% RCFS is distributed in the hope that it will be useful, +%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +%% GNU General Public License for more details. +%% +%% You should have received a copy of the GNU General Public License +%% along with RCFS. If not, see <http://www.gnu.org/licenses/>. + +function LQT_nullspace + +%% Parameters +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +param.nbData = 50; %Number of datapoints +param.nbRepros = 60; %Number of stochastic reproductions +param.nbPoints = 1; %Number of keypoints +param.nbVar = 2; %Dimension of state vector +param.dt = 1E-1; %Time step duration +param.rfactor = 1E-4; %param.dt^nbDeriv; %Control cost in LQR + + +R = speye((param.nbData-1)*param.nbVar) * param.rfactor; +x0 = zeros(param.nbVar,1); %Initial point + + +%% Dynamical System settings +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +Su = [zeros(param.nbVar, param.nbVar*(param.nbData-1)); kron(tril(ones(param.nbData-1)), eye(param.nbVar)*param.dt)]; +Sx = kron(ones(param.nbData,1), eye(param.nbVar)); + + +%% Task setting +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +tl = linspace(1,param.nbData,param.nbPoints+1); +tl = round(tl(2:end)); %[param.nbData/2, param.nbData]; +%Mu = rand(param.nbVarPos,param.nbPoints) - 0.5; +Mu = [20; 10]; + +Sigma = repmat(eye(param.nbVar)*1E-3, [1,1,param.nbPoints]); +MuQ = zeros(param.nbVar*param.nbData,1); +Q = zeros(param.nbVar*param.nbData); +for t=1:length(tl) + id(:,t) = [1:param.nbVar] + (tl(t)-1) * param.nbVar; + Q(id(:,t), id(:,t)) = inv(Sigma(:,:,t)); + MuQ(id(:,t)) = Mu(:,t); +end + + +%% Batch LQR reproduction +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%Precomputation of basis functions to generate control commands u (here, with Bernstein basis functions) +nbRBF = 10; +H = buildPhiBernstein(param.nbData-1,nbRBF); + +%Reproduction with nullspace planning +[V,D] = eig(Q); +U = V * D.^.5; +J = U' * Su; %Jacobian + +%Left pseudoinverse solution +pinvJ = (J' * J + R) \ J'; %Left pseudoinverse +N = speye((param.nbData-1)*param.nbVar) - pinvJ * J; %Nullspace projection matrix +u1 = pinvJ * U' * (MuQ - Sx * x0); %Principal task (least squares solution) +x = reshape(Sx*x0+Su*u1, param.nbVar, param.nbData); %Reshape data for plotting + +%General solutions +for n=1:param.nbRepros + w = randn(param.nbVar,nbRBF) * 1E1; %Random weights + u2 = w * H'; %Reconstruction of control signals by a weighted superposition of basis functions + u = u1 + N * u2(:); + r(n).x = reshape(Sx*x0+Su*u, param.nbVar, param.nbData); %Reshape data for plotting +end + + +%% Plot 2D +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +figure('position',[10 10 800 800],'color',[1 1 1],'name','x1-x2 plot'); hold on; axis off; +for n=1:param.nbRepros + plot(r(n).x(1,:), r(n).x(2,:), '-','linewidth',1,'color',.9-[.7 .7 .7].*rand(1)); +end +plot(x(1,:), x(2,:), '-','linewidth',2,'color',[.8 0 0]); +plot(x(1,1), x(2,1), 'o','linewidth',2,'markersize',8,'color',[.8 0 0]); +plot(MuQ(id(1,:)), MuQ(id(2,:)), '.','markersize',30,'color',[.8 0 0]); +axis equal; +%print('-dpng','graphs/LQT_nullspace02.png'); + +pause(10); +close all; +end + +%Building Bernstein basis functions +function phi = buildPhiBernstein(nbData, nbFct) + t = linspace(0, 1, nbData); + phi = zeros(nbData, nbFct); + for i=1:nbFct + phi(:,i) = factorial(nbFct-1) ./ (factorial(i-1) .* factorial(nbFct-i)) .* (1-t).^(nbFct-i) .* t.^(i-1); + end +end + diff --git a/matlab/MP.m b/matlab/MP.m index 9482effbcd7e8a58479bd5cefe69581ffc52dd15..22e0e419222092f7dac5ae4e8870d273ee8d7368 100644 --- a/matlab/MP.m +++ b/matlab/MP.m @@ -77,7 +77,7 @@ clrmap = lines(param.nbFct); subplot(1,3,1); hold on; axis off; l(1) = plot(x(1:2:end,:), x(2:2:end,:), 'linewidth',2,'color',[.2 .2 .2]); l(2) = plot(x_hat(1:2:end,:), x_hat(2:2:end,:), 'linewidth',2,'color',[.9 .0 .0]); -legend(l,{'Demonstration','Reproduction'},'fontsize',20); +legend('Demonstration','Reproduction'); axis tight; axis equal; %Plot basis functions (display only the real part for Fourier basis functions) diff --git a/matlab/iLQR_manipulator_obstacle.m b/matlab/iLQR_manipulator_obstacle.m index cd7f986ea825a51531e0ffc5489326046f3273b7..803316391d56810959e69510bc9859c29b01b50f 100644 --- a/matlab/iLQR_manipulator_obstacle.m +++ b/matlab/iLQR_manipulator_obstacle.m @@ -2,7 +2,8 @@ %% (viapoints task with position+orientation including bounding boxes on f(x)) %% %% Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ -%% Written by Sylvain Calinon <https://calinon.ch> +%% Written by Teguh Lembono <teguh.lembono@idiap.ch>, +%% Sylvain Calinon <https://calinon.ch> %% %% This file is part of RCFS. %% @@ -24,7 +25,7 @@ function iLQR_manipulator_obstacle %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% param.dt = 1E-2; %Time step size param.nbData = 50; %Number of datapoints -param.nbIter = 50; %Number of iterations for iLQR +param.nbIter = 30; %Number of iterations for iLQR param.nbPoints = 1; %Number of viapoints param.nbObstacles = 2; %Number of obstacles param.nbVarX = 3; %State space dimension (q1,q2,q3) @@ -35,6 +36,7 @@ param.sz2 = [.5, .8]; %Size of obstacles param.q = 1E2; %Tracking weight term param.q2 = 1E0; %Obstacle avoidance weight term param.r = 1E-3; %Control weight term +param.useBatch = true; %use batch computation for collision cost param.Mu = [[3; -1; 0]]; %Viapoints (x1,x2,o) @@ -64,11 +66,16 @@ Su0 = [zeros(param.nbVarX, param.nbVarX*(param.nbData-1)); tril(kron(ones(param. Sx0 = kron(ones(param.nbData,1), eye(param.nbVarX)); Su = Su0(idx,:); +tic; for n=1:param.nbIter x = reshape(Su0 * u + Sx0 * x0, param.nbVarX, param.nbData); %System evolution [f, J] = f_reach(x(:,tl), param); %Tracking objective - [f2, J2, id2] = f_avoid(x, param); %Avoidance objective + if param.useBatch + [f2, J2, id2] = f_avoid2(x, param); %Avoidance objective + else + [f2, J2, id2] = f_avoid(x, param); %Avoidance objective + end Su2 = Su0(id2,:); du = (Su' * J' * J * Su * param.q + Su2' * J2' * J2 * Su2 * param.q2 + R) \ ... @@ -81,7 +88,11 @@ for n=1:param.nbIter utmp = u + du * alpha; xtmp = reshape(Su0 * utmp + Sx0 * x0, param.nbVarX, param.nbData); ftmp = f_reach(xtmp(:,tl), param); %Tracking objective - ftmp2 = f_avoid(xtmp, param); %Avoidance objective + if param.useBatch + ftmp2 = f_avoid2(xtmp, param); %Avoidance objective + else + ftmp2 = f_avoid(xtmp, param); %Avoidance objective + end cost = norm(ftmp(:))^2 * param.q + norm(ftmp2(:))^2 * param.q2 + norm(utmp)^2 * param.r; if cost < cost0 || alpha < 1E-3 break; @@ -89,13 +100,14 @@ for n=1:param.nbIter alpha = alpha * 0.5; end u = u + du * alpha; - + disp(cost) if norm(du * alpha) < 1E-2 break; %Stop iLQR when solution is reached end end disp(['iLQR converged in ' num2str(n) ' iterations.']); - +toc +%disp(tEnd-tStart) %Log data r.x = x; r.f = []; @@ -192,6 +204,7 @@ function [f, J, id] = f_avoid(x, param) f = [f; ftmp]; Jrob = [jkin(x(1:j,t), param.l(1:j)), zeros(param.nbVarF, param.nbVarU-j)]; Jtmp = -e' * param.U2(:,:,i)' * Jrob(1:2,:); %quadratic form + J = blkdiag(J, Jtmp); id = [id, (t-1) * param.nbVarU + [1:param.nbVarU]']; end @@ -199,3 +212,46 @@ function [f, J, id] = f_avoid(x, param) end end end + +%%%%%%%%%%%%%%%%%%%%%% +%Forward kinematics for all robot articulations in batch +function f = fkin0batch(x, L) + T = tril(ones(size(x,1))); + T2 = T * diag(L); + f = [(T2 * cos(T * x))(:), ... + (T2 * sin(T * x))(:)]; +end + +%%%%%%%%%%%%%%%%%%%%%% +%Cost and gradient for an obstacle avoidance task with batch computation +function [f, J, id] = f_avoid2(x, param) + f=[]; J=[]; id=[]; + for i=1:param.nbObstacles + xee_batch = fkin0batch(x, param.l); + n= size(xee_batch,1); + e = (xee_batch - repmat(param.Mu2(1:2,i)', n , 1)) * param.U2(:,:,i); + ftmp = ones(n,1) - sum(e.^2, 2); %quadratic form + + %get the indices where there is collision (ftmp > 0) + idx_active = linspace(1,n,n)(ftmp>0); + + for idx=1:size(idx_active,2) + j = idx_active(idx); + t = floor((j-1)/param.nbVarX)+1; + k = mod(j-1,param.nbVarX)+1; + + %compute Jacobian + Jrob = [jkin(x(1:k,t), param.l(1:k)), zeros(param.nbVarF, param.nbVarX-k)]; + Jtmp = -e(j,:) * param.U2(:,:,i)' * Jrob(1:2,:); %quadratic form + Jj = zeros(1,param.nbVarX*param.nbData); + Jj(:,param.nbVarX*(t-1)+1:param.nbVarX*(t)) = Jtmp; + J = [J; Jj]; + id = [id, (t-1) * param.nbVarX + [1:param.nbVarX]']; + end + + f = [f; ftmp(idx_active(:))]; #keep only active value + end + + id = unique(id); #remove duplicate + J = J(:,id); #keep only active value +end \ No newline at end of file diff --git a/python/LQR_infHor.py b/python/LQR_infHor.py new file mode 100644 index 0000000000000000000000000000000000000000..e0e53fb6e38fe689bef6dd0b2fddebb53d44f563 --- /dev/null +++ b/python/LQR_infHor.py @@ -0,0 +1,131 @@ +''' + Point-mass LQR with infinite horizon + + Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ + Written by Jeremy Maceiras <jeremy.maceiras@idiap.ch>, + Sylvain Calinon <https://calinon.ch> + + This file is part of RCFS. + + RCFS is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License version 3 as + published by the Free Software Foundation. + + RCFS is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with RCFS. If not, see <http://www.gnu.org/licenses/>. +''' + +import numpy as np +import copy +from scipy.linalg import solve_discrete_are as solve_algebraic_riccati_discrete +from scipy.stats import multivariate_normal +from math import factorial +import matplotlib.pyplot as plt + +# Plot a 2D Gaussians +def plot_gaussian(mu, sigma): + w, h = 100, 100 + + std = [np.sqrt(sigma[0, 0]), np.sqrt(sigma[1, 1])] + x = np.linspace(mu[0] - 3 * std[0], mu[0] + 3 * std[0], w) + y = np.linspace(mu[1] - 3 * std[1], mu[1] + 3 * std[1], h) + + x, y = np.meshgrid(x, y) + + x_ = x.flatten() + y_ = y.flatten() + xy = np.vstack((x_, y_)).T + + normal_rv = multivariate_normal(mu, sigma) + z = normal_rv.pdf(xy) + z = z.reshape(w, h, order='F') + + plt.contourf(x, y, z.T,levels=1,colors=["white","red"],alpha=.5) + +# Parameters +# =============================== + +param = lambda: None # Lazy way to define an empty class in python +param.nbData = 200 # Number of datapoints +param.nbRepros = 4 #Number of reproductions +param.nbVarPos = 2 #Dimension of position data (here: x1,x2) +param.nbDeriv = 2 #Number of static & dynamic features (D=2 for [x,dx]) +param.nbVar = param.nbVarPos * param.nbDeriv #Dimension of state vector in the tangent space +param.dt = 1e-2 #Time step duration +param.rfactor = 4e-2 #Control cost in LQR + +# Control cost matrix +R = np.identity(param.nbVarPos) * param.rfactor + +# Target and desired covariance +param.Mu = np.hstack(( np.random.uniform(size=param.nbVarPos) , np.zeros(param.nbVarPos) )) +Ar,_ = np.linalg.qr(np.random.uniform(size=(param.nbVarPos,param.nbVarPos))) +xCov = Ar @ np.diag(np.random.uniform(size=param.nbVarPos)) @ Ar.T * 1e-1 + +# Discrete dynamical System settings +# =================================== + +A1d = np.zeros((param.nbDeriv,param.nbDeriv)) +B1d = np.zeros((param.nbDeriv,1)) + +for i in range(param.nbDeriv): + A1d += np.diag( np.ones(param.nbDeriv-i) ,i ) * param.dt**i * 1/factorial(i) + B1d[param.nbDeriv-i-1] = param.dt**(i+1) * 1/factorial(i+1) + +A = np.kron(A1d,np.identity(param.nbVarPos)) +B = np.kron(B1d,np.identity(param.nbVarPos)) + +# Discrete LQR with infinite horizon +# =================================== + +Q = np.zeros((param.nbVar,param.nbVar)) +Q[:param.nbVarPos,:param.nbVarPos] = np.linalg.inv(xCov) # Precision matrix +P = solve_algebraic_riccati_discrete(A,B,Q,R) +L = np.linalg.inv(B.T @ P @ B + R) @ B.T @ P @ A # Feedback gain (discrete version) + +reproducitons = [] + +for i in range(param.nbRepros): + xt = np.zeros(param.nbVar) + xt[:param.nbVarPos] = 1+np.random.uniform(param.nbVarPos)*2 + xs = [copy.deepcopy(xt)] + for t in range(param.nbData): + u = L @ (param.Mu - xt) + xt = A @ xt + B @ u + xs += [copy.deepcopy(xt)] + reproducitons += [ np.asarray(xs) ] + +# Plots +# ====== + +plt.figure() +plt.title("Position") + +for r in reproducitons: + plt.plot(r[:,0],r[:,1],c="black") +plot_gaussian(param.Mu[:param.nbVarPos],xCov) +plt.axis("off") +plt.gca().set_aspect('equal', adjustable='box') + +fig,axs = plt.subplots(5) + +for r in reproducitons: + axs[0].plot(r[:,0],c="black",alpha=.4,linestyle="dashed") + axs[1].plot(r[:,1],c="black",alpha=.4,linestyle="dashed") + axs[2].plot(r[:,2],c="black",alpha=.4,linestyle="dashed") + axs[3].plot(r[:,3],c="black",alpha=.4,linestyle="dashed") + axs[4].plot(np.linalg.norm(r[:,2:4],axis=1),c="black",alpha=.4,linestyle="dashed") + +axs[0].set_ylabel("$x_1$") +axs[1].set_ylabel("$x_2$") +axs[2].set_ylabel("$\dot{x}_1$") +axs[3].set_ylabel("$\dot{x}_2$") +axs[4].set_ylabel("$| \dot{x} |$") +plt.xlabel("T") + +plt.show() diff --git a/python/LQT_nullspace.py b/python/LQT_nullspace.py index 633a05751e5c00497c0ab8c00426a78512e9b095..2486aed080104dbc5a802634e9931d8c3e5b20e3 100644 --- a/python/LQT_nullspace.py +++ b/python/LQT_nullspace.py @@ -1,8 +1,8 @@ ''' - Linear Quadratic tracker applied on a via point example + Batch LQT with nullspace formulation Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ - Written by Jeremy Maceiras <jeremy.maceiras@idiap.ch>, + Written by Hakan Girgin <hakan.girgin@idiap.ch>, Sylvain Calinon <https://calinon.ch> This file is part of RCFS. @@ -25,52 +25,62 @@ from math import factorial import matplotlib.pyplot as plt from scipy.linalg import block_diag +## Precomputation of basis functions to generate structured stochastic u through Bezier curves +# Building Bernstein basis functions +def build_phi_bernstein(nb_data, nb_fct): + t = np.linspace(0,1,nb_data) + phi = np.zeros((nb_data,nb_fct)) + for i in range(nb_fct): + phi[:,i] = factorial(nb_fct-1) / (factorial(i) * factorial(nb_fct-1-i)) * (1-t)**(nb_fct-1-i) * t**i + return phi + # Parameters # =============================== -dt = 1e-1 # Time step length -nbPoints = 1 # Number of targets -nbDeriv = 1 # Order of the dynamical system -nbVarPos = 2 # Number of position variable -nbData = 50 # Number of datapoints -rfactor = 1e-4 # Control weight term -nbRepros = 60 # Number of stochastic reproductions -nb_var = nbVarPos * nbDeriv # Dimension of state vector +param = lambda: None # Lazy way to define an empty class in python +param.dt = 1e-1 # Time step length +param.nbPoints = 1 # Number of targets +param.nbDeriv = 1 # Order of the dynamical system +param.nbVarPos = 2 # Number of position variable +param.nbData = 50 # Number of datapoints +param.rfactor = 1e-4 # Control weight term +param.nbRepros = 60 # Number of stochastic reproductions +param.nbVar = param.nbVarPos * param.nbDeriv # Dimension of state vector # Dynamical System settings (discrete) # ===================================== -A1d = np.zeros((nbDeriv,nbDeriv)) -B1d = np.zeros((nbDeriv,1)) +A1d = np.zeros((param.nbDeriv,param.nbDeriv)) +B1d = np.zeros((param.nbDeriv,1)) -for i in range(nbDeriv): - A1d += np.diag( np.ones(nbDeriv-i) ,i ) * dt**i * 1/factorial(i) - B1d[nbDeriv-i-1] = dt**(i+1) * 1/factorial(i+1) +for i in range(param.nbDeriv): + A1d += np.diag( np.ones(param.nbDeriv-i) ,i ) * param.dt**i * 1/factorial(i) + B1d[param.nbDeriv-i-1] = param.dt**(i+1) * 1/factorial(i+1) -A = np.kron(A1d,np.identity(nbVarPos)) -B = np.kron(B1d,np.identity(nbVarPos)) +A = np.kron(A1d,np.identity(param.nbVarPos)) +B = np.kron(B1d,np.identity(param.nbVarPos)) # Build Sx and Su transfer matrices -Su = np.zeros((nb_var*nbData,nbVarPos * (nbData-1))) -Sx = np.kron(np.ones((nbData,1)),np.eye(nb_var,nb_var)) +Su = np.zeros((param.nbVar*param.nbData,param.nbVarPos * (param.nbData-1))) +Sx = np.kron(np.ones((param.nbData,1)),np.eye(param.nbVar,param.nbVar)) M = B -for i in range(1, nbData): - Sx[i*nb_var:nbData*nb_var,:] = np.dot(Sx[i*nb_var:nbData*nb_var,:], A) - Su[nb_var*i:nb_var*i+M.shape[0],0:M.shape[1]] = M +for i in range(1, param.nbData): + Sx[i*param.nbVar:param.nbData*param.nbVar,:] = np.dot(Sx[i*param.nbVar:param.nbData*param.nbVar,:], A) + Su[param.nbVar*i:param.nbVar*i+M.shape[0],0:M.shape[1]] = M M = np.hstack((np.dot(A,M),B)) # [0,nb_state_var-1] # Cost function settings # ===================================== -R = np.identity( (nbData-1) * nbVarPos ) * rfactor # Control cost matrix +R = np.identity( (param.nbData-1) * param.nbVarPos ) * param.rfactor # Control cost matrix -t_list = np.stack([nbData-1]) # viapoint time list +t_list = np.stack([param.nbData-1]) # viapoint time list mu_list = np.stack([np.array([20,10.])]) # viapoint list -Q_list = np.stack([np.eye(nb_var)*1e3]) # viapoint precision list +Q_list = np.stack([np.eye(param.nbVar)*1e3]) # viapoint precision list -mus = np.zeros((nbData, nb_var)) -Qs = np.zeros((nbData, nb_var, nb_var)) +mus = np.zeros((param.nbData, param.nbVar)) +Qs = np.zeros((param.nbData, param.nbVar, param.nbVar)) for i,t in enumerate(t_list): mus[t] = mu_list[i] Qs[t] = Q_list[i] @@ -83,33 +93,25 @@ Q = block_diag(*Qs) # Batch LQR Reproduction # ===================================== -## Precomputation of basis functions to generate structured stochastic u through Bezier curves -# Building Bernstein basis functions -def build_phi_bernstein(nb_data, nb_fct): - t = np.linspace(0,1,nb_data) - phi = np.zeros((nb_data,nb_fct)) - for i in range(nb_fct): - phi[:,i] = factorial(nb_fct-1) / (factorial(i) * factorial(nb_fct-1-i)) * (1-t)**(nb_fct-1-i) * t**i - return phi nbRBF = 10 -H = build_phi_bernstein(nbData-1, nbRBF) +H = build_phi_bernstein(param.nbData-1, nbRBF) J = Q @ Su N = np.eye(J.shape[-1]) - np.linalg.pinv(J) @ J # nullspace operator of LQT # Principal Task -x0 = np.zeros(nb_var) +x0 = np.zeros(param.nbVar) u1 = np.linalg.solve(Su.T @ Q @ Su + R, Su.T @ Q @ (muQ - Sx @ x0)) -x1 = (Sx @ x0 + Su @ u1).reshape((-1,nb_var)) +x1 = (Sx @ x0 + Su @ u1).reshape((-1,param.nbVar)) # Secondary Task -repr_x = np.zeros((nbRepros, nbData, nb_var)) -for n in range(nbRepros): - w = np.random.randn(nbRBF, nbVarPos) * 1E1 # Random weights +repr_x = np.zeros((param.nbRepros, param.nbData, param.nbVar)) +for n in range(param.nbRepros): + w = np.random.randn(nbRBF, param.nbVarPos) * 1E1 # Random weights u2 = H @ w # Reconstruction of control signals by a weighted superposition of basis functions u = u1 + N @ u2.flatten() - repr_x[n] = np.reshape(Sx @ x0 + Su @ u, (-1, nb_var)) # Reshape data for plotting + repr_x[n] = np.reshape(Sx @ x0 + Su @ u, (-1, param.nbVar)) # Reshape data for plotting # Plotting # ========= @@ -120,7 +122,7 @@ for mu in mu_list: plt.scatter(mu[0], mu[1],c='red',s=100) plt.plot(x1[:,0] , x1[:,1], c='black') -for i in range(nbRepros): +for i in range(param.nbRepros): plt.plot(repr_x[i, :, 0], repr_x[i, :, 1], c='blue', alpha=0.1, zorder=0) plt.axis("off") @@ -130,24 +132,24 @@ fig,axs = plt.subplots(2,1) for i,t in enumerate(t_list): axs[0].scatter(t, mu_list[i][0],c='red') -for i in range(nbRepros): +for i in range(param.nbRepros): axs[0].plot(repr_x[i, :, 0], c='blue', alpha=0.1, zorder=0) axs[0].plot(x1[:,0], c='black') axs[0].set_ylabel("$x_1$") -axs[0].set_xticks([0,nbData]) +axs[0].set_xticks([0,param.nbData]) axs[0].set_xticklabels(["0","T"]) for i,t in enumerate(t_list): axs[1].scatter(t, mu_list[i][1],c='red') -for i in range(nbRepros): +for i in range(param.nbRepros): axs[1].plot(repr_x[i, :, 1], c='blue', alpha=0.1, zorder=0) axs[1].plot(x1[:,1], c='black') axs[1].set_ylabel("$x_2$") axs[1].set_xlabel("$t$") -axs[1].set_xticks([0,nbData]) +axs[1].set_xticks([0,param.nbData]) axs[1].set_xticklabels(["0","T"]) plt.show() diff --git a/python/iLQR_manipulator_obstacle.py b/python/iLQR_manipulator_obstacle.py index 17eafe4518087ef48f9da4cb6c01af4d294bf347..ff77cbb4d52faede7812138a5f85c2a53f178288 100644 --- a/python/iLQR_manipulator_obstacle.py +++ b/python/iLQR_manipulator_obstacle.py @@ -3,6 +3,7 @@ Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ Written by Jeremy Maceiras <jeremy.maceiras@idiap.ch>, + Teguh Lembono <teguh.lembono@idiap.ch>, Sylvain Calinon <https://calinon.ch> This file is part of RCFS. @@ -24,6 +25,8 @@ import numpy as np import numpy.matlib import matplotlib.pyplot as plt import matplotlib.patches as patches +import time + # Helper functions # =============================== @@ -37,15 +40,15 @@ def logmap(f,f0): # Forward kinematics for E-E def fkin(x,ls): - x = x.T - A = np.tril(np.ones([len(ls),len(ls)])) - f = np.vstack((ls @ np.cos(A @ x), - ls @ np.sin(A @ x), - np.mod(np.sum(x,0)+np.pi, 2*np.pi) - np.pi)) #x1,x2,o (orientation as single Euler angle for planar robot) - return f.T + x = x.T + A = np.tril(np.ones([len(ls),len(ls)])) + f = np.vstack((ls @ np.cos(A @ x), + ls @ np.sin(A @ x), + np.mod(np.sum(x,0)+np.pi, 2*np.pi) - np.pi)) #x1,x2,o (orientation as single Euler angle for planar robot) + return f.T # Forward Kinematics for all joints -def fkin0(param,x): +def fkin0(x, param): T = np.tril(np.ones([param.nbVarX,param.nbVarX])) T2 = np.tril(np.matlib.repmat(param.linkLengths,len(x),1)) f = np.vstack(( @@ -58,8 +61,18 @@ def fkin0(param,x): )) return f +# Forward Kinematics for all joints in batch +def fkin0batch(x, param): + T = np.tril(np.ones([param.nbVarX,param.nbVarX])) + T2 = np.tril(np.matlib.repmat(param.linkLengths,x.shape[1],1)) + f = np.vstack(( + (T2 @ np.cos(T@x.T)).flatten(order='F'), + (T2 @ np.sin(T@x.T)).flatten(order='F') + )).T + return f + # Residual and Jacobian for an obstacle avoidance task -def f_avoid(param,x): +def f_avoid(x, param): f=[] idx=[] J=np.zeros((0,0)) @@ -74,7 +87,7 @@ def f_avoid(param,x): if ftmp > 0: f += [ftmp] - J_rbt = np.hstack(( jkin(x[t,:(j+1)] , param.linkLengths[:(j+1)]), np.zeros(( param.nbVarF , param.nbVarU-(j+1))) )) + J_rbt = np.hstack(( jkin(x[t,:(j+1)] , param.linkLengths[:(j+1)]), np.zeros(( param.nbVarF , param.nbVarX-(j+1))) )) J_tmp = (-e.T @ param.U_obs[i].T @ J_rbt[:2]).reshape((-1,1)) #J_tmp = 1*(J_rbt[:2].T @ param.U_obs[i] @ e).reshape((-1,1)) @@ -83,12 +96,45 @@ def f_avoid(param,x): J2[-J_tmp.shape[0]:,-J_tmp.shape[1]:] = J_tmp J = J2 # Numpy does not provid a blockdiag function... - idx.append( (t-1)*param.nbVarU + np.array(range(param.nbVarU)) ) + idx.append( t*param.nbVarX + np.array(range(param.nbVarX)) ) f = np.array(f) idx = np.array(idx) return f,J.T,idx +def f_avoid2(x, param): + f, J, idx = [], [], [] + + for i in range(param.nbObstacles): + f_x = fkin0batch(x, param) + e = (f_x- param.Obs[i][:2])@param.U_obs[i] + ftmp = 1 - np.linalg.norm(e, axis=1)**2 + + #check for value larger than zeros + idx_active = np.arange(len(ftmp))[ftmp>0] + + #compute Jacobian for each idx_active + for j in idx_active: + t = j//param.nbVarX # which time step + k = j%param.nbVarX # which link + + J_rbt = np.hstack((jkin(x[t,:(k+1)] , param.linkLengths[:(k+1)]), np.zeros(( param.nbVarF , param.nbVarX-(k+1))) )) + J_tmp = (-e[j].T @ param.U_obs[i].T @ J_rbt[:2]) + J_j = np.zeros(param.nbVarX*param.nbData) + J_j[param.nbVarX*t: param.nbVarX*(t+1)] = J_tmp + J += [J_j] + + #Get indices + idx += np.arange(param.nbVarX*t, param.nbVarX*(t+1)).tolist() + f += [ftmp[idx_active]] + + idx = np.array(list(set(idx))) + f = np.concatenate(f) + if len(idx) > 0: + J = np.vstack(J) + J = J[:,idx] + return f, J,idx + # Jacobian with analytical computation (for single time step) def jkin(x,ls): T = np.tril( np.ones((len(ls),len(ls))) ) @@ -100,7 +146,7 @@ def jkin(x,ls): return J # Residual and Jacobian for a via-point reaching task -def f_reach(param,x): +def f_reach(x, param): f = logmap(fkin(x,param.linkLengths),param.mu) J = np.zeros(( len(x) * param.nbVarF , len(x) * param.nbVarX )) @@ -125,6 +171,7 @@ param.sizeObstacle = [.5,.8] # Size of objects param.r = 1e-3 # Control weight term param.Q_track = 1e2 param.Q_avoid = 1e0 +param.useBatch = False #Batch computation for collision avoidance cost # Task parameters # =============================== @@ -181,13 +228,16 @@ Su = Su0[idx.flatten()] # We remove the lines that are out of interest # Solving iLQR # =============================== - +tic=time.time() for i in range( param.nbIter ): x = Su0 @ u + Sx0 @ x0 x = x.reshape( ( param.nbData , param.nbVarX) ) - f, J = f_reach(param,x[tl]) # Tracking objective - f2, J2, id2 = f_avoid(param,x)# Avoidance objective + f, J = f_reach(x[tl], param) # Tracking objective + if param.useBatch: + f2, J2, id2 = f_avoid2(x, param)# Avoidance objective + else: + f2, J2, id2 = f_avoid(x, param)# Avoidance objective if len(id2) > 0: # Numpy does not allow zero sized array as Indices Su2 = Su0[id2.flatten()] @@ -204,8 +254,12 @@ for i in range( param.nbIter ): utmp = u + du * alpha xtmp = Su0 @ utmp + Sx0 @ x0 xtmp = xtmp.reshape( ( param.nbData , param.nbVarX) ) - ftmp, _ = f_reach(param,xtmp[tl]) - f2tmp,_,_ = f_avoid(param,xtmp) + ftmp, _ = f_reach(xtmp[tl], param) + if param.useBatch: + f2tmp,_,_ = f_avoid2(xtmp, param) + else: + f2tmp,_,_ = f_avoid(xtmp, param) + cost = np.linalg.norm(ftmp.flatten())**2 * param.Q_track + np.linalg.norm(f2tmp.flatten())**2 * param.Q_avoid + np.linalg.norm(utmp) * param.r if cost < cost0 or alpha < 1e-3: @@ -216,8 +270,9 @@ for i in range( param.nbIter ): alpha /=2 if np.linalg.norm(alpha * du) < 1e-2: # Early stop condition - break - + break +toc=time.time() +print('Solving in {} seconds'.format(toc-tic)) # Ploting # =============================== @@ -232,8 +287,8 @@ for i in range(param.nbVarX-1): fi = fkin( x[:,:i+1] , param.linkLengths[:i+1] ) plt.plot(fi[:,0],fi[:,1],c="black",alpha=.5,linestyle='dashed') -f00 = fkin0(param,x[0]) -fT0 = fkin0(param,x[-1]) +f00 = fkin0(x[0], param) +fT0 = fkin0(x[-1],param) plt.plot( f00[:,0] , f00[:,1],c='black',linewidth=5,alpha=.2) plt.plot( fT0[:,0] , fT0[:,1],c='black',linewidth=5,alpha=.6)