demo_Riemannian_S2_batchLQR02.m 9.71 KB
Newer Older
Sylvain CALINON's avatar
Sylvain CALINON committed
1
2
3
4
5
function demo_Riemannian_S2_batchLQR02
% LQT on a sphere by relying on Riemannian manifold (recomputed in an online manner),
% by using position and velocity data (-> acceleration commands)
%
% If this code is useful for your research, please cite the related publication:
Sylvain CALINON's avatar
Sylvain CALINON committed
6
% @article{Calinon20RAM,
Sylvain CALINON's avatar
Sylvain CALINON committed
7
% 	author="Calinon, S.",
Sylvain CALINON's avatar
Sylvain CALINON committed
8
% 	title="Gaussians on {R}iemannian Manifolds for Robot Learning and Adaptive Control",
Sylvain CALINON's avatar
Sylvain CALINON committed
9
10
11
% 	journal="{IEEE} Robotics and Automation Magazine ({RAM})",
% 	year="2020",
% 	pages=""
Sylvain CALINON's avatar
Sylvain CALINON committed
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
% }
% 
% Copyright (c) 2019 Idiap Research Institute, http://idiap.ch/
% Written by Sylvain Calinon, http://calinon.ch/
% 
% This file is part of PbDlib, http://www.idiap.ch/software/pbdlib/
% 
% PbDlib is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License version 3 as
% published by the Free Software Foundation.
% 
% PbDlib is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
% 
% You should have received a copy of the GNU General Public License
% along with PbDlib. If not, see <http://www.gnu.org/licenses/>.

addpath('./m_fcts/');


%% Parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
nbSamples = 5; %Number of demonstrations
nbRepros = 5; %Number of reproductions
nbIter = 20; %Number of iteration for the Gauss Newton algorithm
nbIterEM = 20; %Number of iteration for the EM algorithm
nbData = 100; %Number of datapoints
nbD = 40; %Time window for LQR computation
nbDrawingSeg = 20; %Number of segments used to draw ellipsoids

model.nbStates = 6; %Number of states in the GMM
model.nbVarPos = 2; %Dimension of data in tangent space (here: v1,v2)
model.nbDeriv = 2; %Number of static & dynamic features (D=2 for [x,dx])
model.nbVar = model.nbVarPos * model.nbDeriv; %Dimension of state vector in the tangent space
model.nbVarMan = 2*model.nbVarPos+1; %Dimension of the manifold (here: x1,x2,x3,v1,v2)
model.dt = 1E-5; %Time step duration
model.params_diagRegFact = 1E-4; %Regularization of covariance
model.rfactor = 1E-15;	%Control cost in LQR 
e0 = [0; -1; 0]; %Point on the sphere to project handwriting data

%Control cost matrix
R = eye(model.nbVarPos) * model.rfactor;
R = kron(eye(nbD-1),R);


%% Discrete dynamical System settings (in tangent space)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
A1d = zeros(model.nbDeriv);
for i=0:model.nbDeriv-1
	A1d = A1d + diag(ones(model.nbDeriv-i,1),i) * model.dt^i * 1/factorial(i); %Discrete 1D
end
B1d = zeros(model.nbDeriv,1); 
for i=1:model.nbDeriv
	B1d(model.nbDeriv-i+1) = model.dt^i * 1/factorial(i); %Discrete 1D
end
A = kron(A1d, eye(model.nbVarPos)); %Discrete nD
B = kron(B1d, eye(model.nbVarPos)); %Discrete nD

%Build Sx and Su matrices for batch LQR
Su = zeros(model.nbVar*nbD, model.nbVarPos*(nbD-1));
Sx = kron(ones(nbD,1),eye(model.nbVar));
M = B;
for n=2:nbD
	id1 = (n-1)*model.nbVar+1:nbD*model.nbVar;
	Sx(id1,:) = Sx(id1,:) * A;
	id1 = (n-1)*model.nbVar+1:n*model.nbVar; 
	id2 = 1:(n-1)*model.nbVarPos;
	Su(id1,id2) = M;
	M = [A*M(:,1:model.nbVarPos), M]; %Also M = [A^(n-1)*B, M] or M = [Sx(id1,:)*B, M]
end


%% Generate data on a sphere from handwriting data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
demos = [];
load('data/2Dletters/S.mat');
u = [];
for n=1:nbSamples
 	s(n).Data = []; %Resampling
	for m=1:model.nbDeriv
		if m==1
			dTmp = spline(1:size(demos{n}.pos,2), demos{n}.pos, linspace(1,size(demos{n}.pos,2),nbData)) * .9E-1; %Resampling
		else
			dTmp = gradient(dTmp) / model.dt; %Compute derivatives
		end
		s(n).Data = [s(n).Data; dTmp];
	end
	%s(n).Data = spline(1:size(demos{n}.pos,2), demos{n}.pos, linspace(1,size(demos{n}.pos,2),nbData)) * 1.3E-1;
	u = [u, s(n).Data]; 
end
x0 = [expmap(u(1:model.nbVarPos,:), e0); u(model.nbVarPos+1:end,:)]; %x is on the manifold and dx is in the tangent space of e0


%% GMM parameters estimation (encoding of [x;dx] with x on sphere and dx in Euclidean space)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
model = init_GMM_kbins(u, model, nbSamples);
model.MuMan = [expmap(model.Mu(1:model.nbVarPos,:), e0); model.Mu(model.nbVarPos+1:end,:)]; %Center on the manifold
model.Mu = zeros(model.nbVarPos,model.nbStates); %Center in the tangent plane at point MuMan of the manifold
u = [];
for nb=1:nbIterEM
	%E-step
	L = zeros(model.nbStates,size(x0,2));
	for i=1:model.nbStates
		L(i,:) = model.Priors(i) * gaussPDF(logmap(x0(1:3,:), model.MuMan(1:3,i)), model.Mu(1:model.nbVarPos,i), model.Sigma(1:model.nbVarPos,1:model.nbVarPos,i));
	end
	GAMMA = L ./ repmat(sum(L,1)+realmin, model.nbStates, 1);
	H = GAMMA ./ repmat(sum(GAMMA,2),1,nbData*nbSamples);
	%M-step
	for i=1:model.nbStates
		%Update Priors
		model.Priors(i) = sum(GAMMA(i,:)) / (nbData*nbSamples);
		%Update MuMan
		for n=1:nbIter
			xTar = model.MuMan(1:3,i); %Position target
			uTmp = logmap(x0(1:3,:), xTar);
			
% 			dTmp = gradient(uTmp) / model.dt;
			dTmp = x0(4:5,:); %dx in the e0 tangent space
			
			%Transportation of dTmp from e0 to xTar
			Ac = transp(e0, xTar);
			dTmp = Ac * dTmp;
			
			u(:,:,i) = [uTmp; dTmp];
			model.MuMan(:,i) = [expmap(uTmp*H(i,:)', xTar); dTmp*H(i,:)'];

% 			%Debug
% 			dTmp = x0(4:5,2);
% 			%Transportation of dTmp from e0 to xTar
% 			Ac = transp(e0, xTar);
% 			dTmp = Ac * dTmp;
% 			model.MuMan(:,i) = [expmap(uTmp*H(i,:)', xTar); dTmp];

		end
		%Update Sigma
		model.Sigma(:,:,i) = u(:,:,i) * diag(H(i,:)) * u(:,:,i)' + eye(size(u,1)) * model.params_diagRegFact;
% 		model.Sigma(:,:,i) = blkdiag(eye(model.nbVarPos)*1E-1, eye(model.nbVarPos)*1E-5);
	end
end

%Precomputation of inverses
for i=1:model.nbStates
	model.invSigma(:,:,i) = inv(model.Sigma(:,:,i));
end


%% Batch LQR recomputed in an online manner (computation centered on x)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for n=1:nbRepros
	x = x0(1:3,1) + randn(3,1)*3E-1;
	x = x / norm(x);
	x_old = x;
	U = zeros(model.nbVar,1);
	for t=1:nbData
		r(n).x(:,t) = x; %Log data
		U(1:model.nbVarPos) = zeros(model.nbVarPos,1); %Set tangent space at x
		
		%Transportation of velocity vectors from x_old to x
		Ac = transp(x_old, x);
		U(model.nbVarPos+1:end) = Ac * U(model.nbVarPos+1:end); 

		%Set list of states for the next nbD time steps according to first demonstration (alternatively, an HSMM can be used)
		id = [t:min(t+nbD-1,nbData), repmat(nbData,1,t-nbData+nbD-1)];
		[~,q] = max(H(:,id),[],1); %works also for nbStates=1
		
		%Create single Gaussian N(MuQ,Q^-1) based on optimal state sequence q
		MuQ = zeros(model.nbVar*nbD,1);
		Q = zeros(model.nbVar*nbD);
		for s=1:nbD
			id = (s-1)*model.nbVar+1:s*model.nbVar;
			%Transportation of Sigma and duCov from model.MuMan to x
			Ac = transp(model.MuMan(1:3,q(s)), x);
			Q(id,id) = blkdiag(Ac,Ac) * model.invSigma(:,:,q(s)) * blkdiag(Ac,Ac)';
% 			Q(id,id) = blkdiag(Ac,eye(model.nbVarPos)) * model.invSigma(:,:,q(s)) * blkdiag(Ac,eye(model.nbVarPos))';
			
			%Transportation of du from model.MuMan to x
			dxTmp = Ac * model.MuMan(4:5,q(s));
			MuQ(id) = [logmap(model.MuMan(1:3,q(s)), x); dxTmp];
% 			MuQ(id) = [logmap(model.MuMan(1:3,q(s)), x); model.MuMan(4:5,q(s))]; 
		end
		
		%Compute acceleration commands
		SuInvSigmaQ = Su' * Q;
		Rq = SuInvSigmaQ * Su + R;
		rq = SuInvSigmaQ * (MuQ-Sx*U);
		ddu = Rq \ rq;
		
		U = A * U + B * ddu(1:model.nbVarPos); %Update U with first control command
		x_old = x; %Keep x for next iteration
		x = expmap(U(1:model.nbVarPos), x); %Update x
	end
end


%% Plots
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clrmap = lines(model.nbStates);

%Display of covariance contours on the sphere
tl = linspace(-pi, pi, nbDrawingSeg);
Gdisp = zeros(3, nbDrawingSeg, model.nbStates);
Gvel = zeros(3, model.nbStates);
for i=1:model.nbStates
	[V,D] = eig(model.Sigma(1:model.nbVarPos,1:model.nbVarPos,i));
	Gdisp(:,:,i) = expmap(V*D.^.5*[cos(tl); sin(tl)], model.MuMan(1:3,i));
	Gvel(:,i) = expmap(model.MuMan(4:5,i)*model.dt*5, model.MuMan(1:3,i));
end

%Plots
figure('position',[10,10,1650,1250]); hold on; axis off; grid off; rotate3d on; 
colormap([.8 .8 .8]);
[X,Y,Z] = sphere(20);
mesh(X,Y,Z);
plot3(x0(1,:), x0(2,:), x0(3,:), '.','markersize',10,'color',[.5 .5 .5]);
for i=1:model.nbStates
	plot3(model.MuMan(1,i), model.MuMan(2,i), model.MuMan(3,i), '.','markersize',24,'color',clrmap(i,:));
	plot3(Gdisp(1,:,i), Gdisp(2,:,i), Gdisp(3,:,i), '-','linewidth',2,'color',clrmap(i,:));
	plot3(Gvel(1,i), Gvel(2,i), Gvel(3,i), 'o','markersize',6,'linewidth',2,'color',clrmap(i,:));
end
for n=1:nbRepros
	h(1) = plot3(r(n).x(1,:), r(n).x(2,:), r(n).x(3,:), '-','linewidth',2,'color',[0 0 0]);
	plot3(r(n).x(1,1), r(n).x(2,1), r(n).x(3,1), '.','markersize',24,'color',[0 0 0]);
end
view(-20,15); axis equal; axis vis3d;  

% print('-dpng','graphs/demo_Riemannian_S2_batchLQR01.png');
pause;
close all;
end


%% Functions
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function x = expmap(u, mu)
	x = rotM(mu)' * expfct(u);
end

function u = logmap(x, mu)
	if norm(mu-[0;0;-1])<1e-6
		R = [1 0 0; 0 -1 0; 0 0 -1];
	else
		R = rotM(mu);
	end
	u = logfct(R*x);
end

function Exp = expfct(u)
	normv = sqrt(u(1,:).^2+u(2,:).^2);
	Exp = real([u(1,:).*sin(normv)./normv; u(2,:).*sin(normv)./normv; cos(normv)]);
	Exp(:,normv < 1e-16) = repmat([0;0;1],1,sum(normv < 1e-16));	
end

function Log = logfct(x)
	scale = acos(x(3,:)) ./ sqrt(1-x(3,:).^2);
	scale(isnan(scale)) = 1;
	Log = [x(1,:).*scale; x(2,:).*scale];	
end

function Ac = transp(g,h)
	E = [eye(2); zeros(1,2)];
	vm = rotM(g)' * [logmap(h,g); 0];
	mn = norm(vm);
	uv = vm / (mn+realmin);
	Rpar = eye(3) - sin(mn)*(g*uv') - (1-cos(mn))*(uv*uv');	
	Ac = E' * rotM(h) * Rpar * rotM(g)' * E; %Transportation operator from g to h 
end