Commit d8cc3060 authored by Sylvain Calinon's avatar Sylvain Calinon

New online examples for LQR

parent 33684ab4
......@@ -112,6 +112,7 @@ All the examples are located in the main folder, and the functions are located i
| demo_batchLQR01 | Controller retrieval through a batch solution of linear quadratic optimal control (unconstrained linear MPC), by relying on a Gaussian mixture model (GMM) encoding of position and velocity data (see also demo_iterativeLQR01) |
| demo_batchLQR02 | Same as demo_batchLQR01 but with only position data |
| demo_batchLQR_augmSigma01 | Batch LQR with augmented covariance to transform a tracking problem to a regulation problem |
| demo_batchLQR_online01 | Batch solution of linear quadratic optimal control (unconstrained linear MPC) computed in an online manner, by relying on a GMM encoding of position and velocity data |
| demo_DMP01 | Dynamic movement primitive (DMP) encoding with radial basis functions |
| demo_DMP02 | Generalization of dynamic movement primitive (DMP) with polynomial fitting using radial basis functions |
| demo_DMP_GMR01 | Emulation of a standard dynamic movement primitive (DMP) by using a GMM with diagonal covariance matrix, and retrieval computed through Gaussian mixture regression (GMR) |
......@@ -142,6 +143,7 @@ All the examples are located in the main folder, and the functions are located i
| demo_iterativeLQR01 | Iterative solution of linear quadratic tracking problem (finite horizon, unconstrained linear MPC), by relying on a GMM encoding of position and velocity data (see also demo_batchLQR01) |
| demo_iterativeLQR02 | Same as demo_iterativeLQR01 with only position data |
| demo_iterativeLQR_augmSigma01 | Iterative LQR with augmented covariance to transform the tracking problem to a regulation problem |
| demo_iterativeLQR_augmSigma_online01 | Same as demo_iterativeLQR_augmSigma01 but recomputed in an online manner |
| demo_LQR_infHor01 | Continuous infinite horizon linear quadratic tracking, by relying on a GMM encoding of position and velocity data |
| demo_LQR_infHor02 | Discrete infinite horizon linear quadratic tracking, by relying on a GMM encoding of position and velocity data |
| demo_MFA01 | Mixture of factor analyzers (MFA) |
......
function demo_batchLQR_online01
% Batch solution of linear quadratic optimal control (unconstrained linear MPC)
% computed in an online manner, by relying on a GMM encoding of position and velocity data.
%
% Writing code takes time. Polishing it and making it available to others takes longer!
% If some parts of the code were useful for your research of for a better understanding
% of the algorithms, please reward the authors by citing the related publications,
% and consider making your own research available in this way.
%
% @article{Calinon16JIST,
% author="Calinon, S.",
% title="A Tutorial on Task-Parameterized Movement Learning and Retrieval",
% journal="Intelligent Service Robotics",
% publisher="Springer Berlin Heidelberg",
% doi="10.1007/s11370-015-0187-9",
% year="2016",
% volume="9",
% number="1",
% pages="1--29"
% }
%
% Copyright (c) 2015 Idiap Research Institute, http://idiap.ch/
% Written by Sylvain Calinon, http://calinon.ch/
%
% This file is part of PbDlib, http://www.idiap.ch/software/pbdlib/
%
% PbDlib is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License version 3 as
% published by the Free Software Foundation.
%
% PbDlib is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with PbDlib. If not, see <http://www.gnu.org/licenses/>.
addpath('./m_fcts/');
%% Parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
nbSamples = 5; %Number of demonstrations
nbRepros = 1; %Number of reproductions in new situations
nbData = 100; %Number of datapoints
nbD = 20; %Time window for LQR computation
model.nbStates = 4; %Number of Gaussians in the GMM
model.nbVarPos = 2; %Dimension of position data (here: x1,x2)
model.nbDeriv = 2; %Number of static and dynamic features (nbDeriv=2 for [x,dx] and u=ddx)
model.nbVar = model.nbVarPos * model.nbDeriv; %Dimension of state vector
model.dt = 0.01; %Time step duration
model.rfactor = 1E-5; %Control cost in LQR
%Control cost matrix
R = eye(model.nbVarPos) * model.rfactor;
R = kron(eye(nbD-1),R);
%% Dynamical System settings (discrete version)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Integration with higher order Taylor series expansion
A1d = zeros(model.nbDeriv);
for i=0:model.nbDeriv-1
A1d = A1d + diag(ones(model.nbDeriv-i,1),i) * model.dt^i * 1/factorial(i); %Discrete 1D
end
B1d = zeros(model.nbDeriv,1);
for i=1:model.nbDeriv
B1d(model.nbDeriv-i+1) = model.dt^i * 1/factorial(i); %Discrete 1D
end
A = kron(A1d, eye(model.nbVarPos)); %Discrete nD
B = kron(B1d, eye(model.nbVarPos)); %Discrete nD
%Build Sx and Su matrices for batch LQR, see Eq. (35)
Su = zeros(model.nbVar*nbD, model.nbVarPos*(nbD-1));
Sx = kron(ones(nbD,1),eye(model.nbVar));
M = B;
for n=2:nbD
id1 = (n-1)*model.nbVar+1:nbD*model.nbVar;
Sx(id1,:) = Sx(id1,:) * A;
id1 = (n-1)*model.nbVar+1:n*model.nbVar;
id2 = 1:(n-1)*model.nbVarPos;
Su(id1,id2) = M;
M = [A*M(:,1:model.nbVarPos), M]; %Also M = [A^(n-1)*B, M] or M = [Sx(id1,:)*B, M]
end
%% Load handwriting data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
load('data/2Dletters/A.mat');
Data=[];
for n=1:nbSamples
s(n).Data=[];
for m=1:model.nbDeriv
if m==1
dTmp = spline(1:size(demos{n}.pos,2), demos{n}.pos, linspace(1,size(demos{n}.pos,2),nbData)); %Resampling
else
dTmp = gradient(dTmp) / model.dt; %Compute derivatives
end
s(n).Data = [s(n).Data; dTmp];
end
Data = [Data s(n).Data];
end
%% Learning
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%model = init_GMM_kmeans(Data,model);
model = init_GMM_kbins(Data,model,nbSamples);
[model, H] = EM_GMM(Data, model);
%% Batch LQR recomputed in an online manner
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for n=1:nbRepros
X = Data(:,1);
for t=1:nbData
%Set list of states according to first demonstration (alternatively, an HSMM can be used)
id = [t:min(t+nbD-1,nbData), repmat(nbData,1,t-nbData+nbD-1)];
[~,q] = max(H(:,id),[],1); %works also for nbStates=1
%Create single Gaussian N(MuQ,SigmaQ) based on optimal state sequence q, see Eq. (27)
MuQ = reshape(model.Mu(:,q), model.nbVar*nbD, 1);
SigmaQ = (kron(ones(nbD,1), eye(model.nbVar)) * reshape(model.Sigma(:,:,q), model.nbVar, model.nbVar*nbD)) .* kron(eye(nbD), ones(model.nbVar));
%Set matrices to compute the damped weighted least squares estimate, see Eq. (37)
SuInvSigmaQ = Su' / SigmaQ;
Rq = SuInvSigmaQ * Su + R;
rq = SuInvSigmaQ * (MuQ-Sx*X);
u = Rq \ rq;
%Update X with first control command
X = A * X + B * u(1:model.nbVarPos);
r(n).Data(:,t) = X;
end
end
%% Plot 2D
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
figure('position',[10 10 700 650],'color',[1 1 1]); hold on; axis off;
plotGMM(model.Mu(1:2,:), model.Sigma(1:2,1:2,:), [0.5 0.5 0.5]);
plot(Data(1,:), Data(2,:), 'k.');
for n=1:nbRepros
plot(r(n).Data(1,:), r(n).Data(2,:), '-','linewidth',2,'color',[.8 0 0]);
end
axis equal;
%% Timeline plot
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
labList = {'$x_1$','$x_2$','$\dot{x}_1$','$\dot{x}_2$','$\ddot{x}_1$','$\ddot{x}_2$'};
figure('position',[720 10 600 650],'color',[1 1 1]);
for j=1:model.nbVar
subplot(model.nbVar,1,j); hold on;
for n=1:nbSamples
plot(Data(j,(n-1)*nbData+1:n*nbData), '-','linewidth',.5,'color',[0 0 0]);
end
for n=1:nbRepros
plot(r(n).Data(j,:), '-','linewidth',1,'color',[.8 0 0]);
end
if j<7
ylabel(labList{j},'fontsize',14,'interpreter','latex');
end
end
xlabel('$t$','fontsize',14,'interpreter','latex');
%print('-dpng','graphs/demo_batchLQR_online01.png');
%pause;
%close all;
function demo_iterativeLQR_augmSigma_online01
% Iterative LQR with augmented covariance to transform the tracking problem to a
% regulation problem, recomputed in an online manner.
%
% Writing code takes time. Polishing it and making it available to others takes longer!
% If some parts of the code were useful for your research of for a better understanding
% of the algorithms, please reward the authors by citing the related publications,
% and consider making your own research available in this way.
%
% @article{Calinon16JIST,
% author="Calinon, S.",
% title="A Tutorial on Task-Parameterized Movement Learning and Retrieval",
% journal="Intelligent Service Robotics",
% publisher="Springer Berlin Heidelberg",
% doi="10.1007/s11370-015-0187-9",
% year="2016",
% volume="9",
% number="1",
% pages="1--29"
% }
%
% Copyright (c) 2016 Idiap Research Institute, http://idiap.ch/
% Written by Sylvain Calinon (http://calinon.ch/) and Danilo Bruno (danilo.bruno@iit.it)
%
% This file is part of PbDlib, http://www.idiap.ch/software/pbdlib/
%
% PbDlib is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License version 3 as
% published by the Free Software Foundation.
%
% PbDlib is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with PbDlib. If not, see <http://www.gnu.org/licenses/>.
addpath('./m_fcts/');
%% Parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
nbSamples = 5; %Number of demonstrations
nbRepros = 1; %Number of reproductions
nbData = 100; %Number of datapoints
nbD = 20; %Time window for LQR computation
model.nbStates = 5; %Number of Gaussians in the GMM
model.nbVarPos = 2; %Dimension of position data (here: x1,x2)
model.nbDeriv = 2; %Number of static and dynamic features (nbDeriv=2 for [x,dx] and u=ddx)
model.nbVar = model.nbVarPos * model.nbDeriv; %Dimension of state vector
model.dt = 0.01; %Time step duration
model.rfactor = 1E-6; %Control cost in LQR
%Control cost matrix
R = eye(model.nbVarPos) * model.rfactor;
%% Dynamical System settings (discrete version)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Integration with higher order Taylor series expansion
A1d = zeros(model.nbDeriv);
for i=0:model.nbDeriv-1
A1d = A1d + diag(ones(model.nbDeriv-i,1),i) * model.dt^i * 1/factorial(i); %Discrete 1D
end
B1d = zeros(model.nbDeriv,1);
for i=1:model.nbDeriv
B1d(model.nbDeriv-i+1) = model.dt^i * 1/factorial(i); %Discrete 1D
end
A0 = kron(A1d, eye(model.nbVarPos)); %Discrete nD
B0 = kron(B1d, eye(model.nbVarPos)); %Discrete nD
A = [A0, zeros(model.nbVar,1); zeros(1,model.nbVar), 1]; %Augmented A
B = [B0; zeros(1,model.nbVarPos)]; %Augmented B
%% Load handwriting data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
load('data/2Dletters/G.mat');
Data=[];
for n=1:nbSamples
s(n).Data=[];
for m=1:model.nbDeriv
if m==1
dTmp = spline(1:size(demos{n}.pos,2), demos{n}.pos, linspace(1,size(demos{n}.pos,2),nbData)); %Resampling
else
dTmp = gradient(dTmp) / model.dt; %Compute derivatives
end
s(n).Data = [s(n).Data; dTmp];
end
Data = [Data s(n).Data];
end
%% Learning
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
model = init_GMM_kbins(Data,model,nbSamples);
[model, H] = EM_GMM(Data, model);
%Transform model to the corresponding version with augmented covariance
model0 = model;
model.nbVar = model0.nbVar+1;
model.Mu = zeros(model.nbVar, model.nbStates);
model.Sigma = zeros(model.nbVar, model.nbVar, model.nbStates);
for i=1:model.nbStates
model.Sigma(:,:,i) = [model0.Sigma(:,:,i)+model0.Mu(:,i)*model0.Mu(:,i)', model0.Mu(:,i); model0.Mu(:,i)', 1];
end
%% Iterative LQR with augmented state space, recomputed in an online manner
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for n=1:nbRepros
X = [Data(:,1); 1];
for t=1:nbData
r(n).Data(:,t) = X; %Log data
%Set list of states according to first demonstration (alternatively, an HSMM can be used)
id = [t:min(t+nbD-1,nbData), repmat(nbData,1,t-nbData+nbD-1)];
[~,q] = max(H(:,id),[],1); %works also for nbStates=1
%Riccati equation
P = zeros(model.nbVar,model.nbVar,nbD);
P(:,:,end) = inv(model.Sigma(:,:,q(end)));
for s=nbD-1:-1:1
Q = inv(model.Sigma(:,:,q(s)));
P(:,:,s) = Q - A' * (P(:,:,s+1) * B / (B' * P(:,:,s+1) * B + R) * B' * P(:,:,s+1) - P(:,:,s+1)) * A;
end
K = (B' * P(:,:,1) * B + R) \ B' * P(:,:,1) * A; %FB gain
u = -K * X; %Acceleration command with FB terms on augmented state (resulting in FB and FF terms)
X = A * X + B * u; %Update of state vector
end
end
%% Plot
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
figure('position',[10 10 1300 500],'color',[1 1 1]);
%Plot position
subplot(1,2,1); hold on;
plotGMM(model0.Mu(1:2,:), model0.Sigma(1:2,1:2,:), [0.5 0.5 0.5], .3);
for n=1:nbSamples
plot(Data(1,(n-1)*nbData+1:n*nbData), Data(2,(n-1)*nbData+1:n*nbData), '-','color',[.7 .7 .7]);
end
for n=1:nbRepros
h(1) = plot(r(n).Data(1,:), r(n).Data(2,:), '-','linewidth',2,'color',[.8 0 0]);
end
axis equal;
xlabel('x_1'); ylabel('x_2');
%Plot velocity
subplot(1,2,2); hold on;
plotGMM(model0.Mu(3:4,:), model0.Sigma(3:4,3:4,:), [0.5 0.5 0.5], .3);
for n=1:nbSamples
plot(Data(3,(n-1)*nbData+1:n*nbData), Data(4,(n-1)*nbData+1:n*nbData), '-','color',[.7 .7 .7]);
end
for n=1:nbRepros
plot(r(n).Data(3,:), r(n).Data(4,:), '-','linewidth',2,'color',[.8 0 0]);
plot(r(n).Data(3,1), r(n).Data(4,1), '.','markersize',18,'color',[.6 0 0]);
end
plot(0,0,'k+');
axis equal;
xlabel('dx_1'); ylabel('dx_2');
%print('-dpng','graphs/demo_iterativeLQR_augmSigma_online01.png');
%pause;
%close all;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment