diff --git a/doc/images/gradientDescent_vs_NewtonMethod01.jpg b/doc/images/gradientDescent_vs_NewtonMethod01.jpg index 8b8bd582068516c0db73405c2852cab9a00a3a20..06f13fffaf327beff511ab47338d7292d7b6a680 100644 Binary files a/doc/images/gradientDescent_vs_NewtonMethod01.jpg and b/doc/images/gradientDescent_vs_NewtonMethod01.jpg differ diff --git a/matlab/ergodic_control_SMC_2D.m b/matlab/ergodic_control_SMC_2D.m index 5653622a8871ecf7df5544a4c8b2692e11d436b7..3800c7385aa7db6e52041efcac08d7517924d5a2 100644 --- a/matlab/ergodic_control_SMC_2D.m +++ b/matlab/ergodic_control_SMC_2D.m @@ -37,7 +37,7 @@ Priors = ones(1,nbGaussian) / nbGaussian; %Mixing coefficients %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% rg = 0:nbFct-1; [KX(1,:,:), KX(2,:,:)] = ndgrid(rg, rg); -Lambda = (KX(1,:).^2 + KX(2,:).^2 + 1)'.^-sp; %Weighting vector (Eq.(15)) +Lambda = (KX(1,:).^2 + KX(2,:).^2 + 1)'.^-sp; %Weighting vector %Explicit computation of w_hat by exploiting the Fourier transform properties of Gaussians (optimized version by exploiting symmetries), %by enumerating symmetry operations for 2D signal ([-1,-1],[-1,1],[1,-1] and [1,1]), and removing redundant ones -> keeping ([-1,-1],[-1,1]) @@ -49,8 +49,8 @@ w_hat = zeros(nbFct^nbVar, 1); for j=1:nbGaussian for n=1:size(op,2) MuTmp = diag(op(:,n)) * Mu(:,j); %Eq.(20) - SigmaTmp = diag(op(:,n)) * Sigma(:,:,j) * diag(op(:,n))'; %Eq.(20) - w_hat = w_hat + Priors(j) * cos(kk' * MuTmp) .* exp(diag(-.5 * kk' * SigmaTmp * kk)); %Eq.(21) + SigmaTmp = diag(op(:,n)) * Sigma(:,:,j) * diag(op(:,n))'; + w_hat = w_hat + Priors(j) * cos(kk' * MuTmp) .* exp(diag(-.5 * kk' * SigmaTmp * kk)); end end w_hat = w_hat / L^nbVar / size(op,2); diff --git a/matlab/iLQR_manipulator_basic.m b/matlab/iLQR_manipulator_basic.m index 4b915386d9e53fc27882ae1ed313e5572bc08f04..c551850f8b322a361f83d0fe2a0a45ab5bcbb081 100644 --- a/matlab/iLQR_manipulator_basic.m +++ b/matlab/iLQR_manipulator_basic.m @@ -18,11 +18,11 @@ param.nbVarX = 3; %State space dimension (x1,x2,x3) param.nbVarU = 3; %Control space dimension (dx1,dx2,dx3) param.nbVarF = 2; %Objective function dimension (f1,f2) param.l = [2; 2; 1]; %Robot links lengths -param.r = 1E-6; %Control weight term +param.q = 1E0; %Tracking weight +param.r = 1E-6; %Control weight param.Mu = [[2; 1], [3; 2]]; %Viapoints R = speye((param.nbData-1)*param.nbVarU) * param.r; %Control weight matrix (at trajectory level) -Q = speye(param.nbVarF * param.nbPoints) * 1E0; %Precision matrix %Time occurrence of viapoints tl = linspace(1, param.nbData, param.nbPoints+1); @@ -43,16 +43,16 @@ Su = Su0(idx,:); for n=1:param.nbMaxIter x = reshape(Su0 * u + Sx0 * x0, param.nbVarX, param.nbData); %System evolution [f, J] = f_reach(x(:,tl), param); %Residuals and Jacobians - du = (Su' * J' * Q * J * Su + R) \ (-Su' * J' * Q * f(:) - u * param.r); %Gauss-Newton update + du = (Su' * J' * J * Su * param.q + R) \ (-Su' * J' * f(:) * param.q - u * param.r); %Gauss-Newton update %Estimate step size with backtracking line search method alpha = 1; - cost0 = f(:)' * Q * f(:) + norm(u)^2 * param.r; %Cost + cost0 = norm(f(:))^2 * param.q + norm(u)^2 * param.r; %Cost while 1 utmp = u + du * alpha; xtmp = reshape(Su0 * utmp + Sx0 * x0, param.nbVarX, param.nbData); %System evolution ftmp = f_reach(xtmp(:,tl), param); %Residuals - cost = ftmp(:)' * Q * ftmp(:) + norm(utmp)^2 * param.r; %Cost + cost = norm(ftmp(:))^2 * param.q + norm(utmp)^2 * param.r; %Cost if cost < cost0 || alpha < 1E-3 break; end @@ -60,7 +60,7 @@ for n=1:param.nbMaxIter end u = u + du * alpha; %Update - if norm(du * alpha) < 1E-2 + if norm(du * alpha) < 1E-3 break; %Stop iLQR when solution is reached end end diff --git a/matlab/iLQR_manipulator_initStateOptim.m b/matlab/iLQR_manipulator_initStateOptim.m index 313b2f7b65fc5ab5b08e4c6988429d5afe137307..1d14d0f923a503945befa6e2685e66d118a304d9 100644 --- a/matlab/iLQR_manipulator_initStateOptim.m +++ b/matlab/iLQR_manipulator_initStateOptim.m @@ -15,54 +15,54 @@ param.nbData = 30; %Number of datapoints in a trajectory param.nbPoints = 2; %Number of viapoints param.dt = 1E0; %Time step size param.nbIter = 50; %Number of iterations for iLQR -param.nbDOFs = 5; %Number of articulated links -param.nbVarX = param.nbDOFs * 2; %State space dimension (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3), with 5 rotary and 5 prismatic joints -param.nbVarU = param.nbDOFs * 2; %Control space dimension (dq01,dq02,dq1,dq2,dq3 dx1,dx2,dl1,dl2,dl3) +param.nbDOFs = 5; %Number of revolute joints +param.nbVarX = param.nbDOFs * 2; %State space dimension (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3), with 5 revolute and 5 prismatic joints +param.nbVarU = 3; %Control space dimension (we only control dq1,dq2,dq3) param.nbVarF = 3; %Task space dimension (f1,f2,f3, with f3 as orientation) -param.Mu = [[2; 1; -pi/6], [3; 2; -pi/3]]; %Viapoints -param.Mu(1:2,:) = param.Mu(1:2,:) + 5; - -Rx0 = diag([ones(1,2)*1E0, ones(1,param.nbDOFs-2)*1E0, ... - zeros(1,2), ones(1,param.nbDOFs-2)*1E0]); %Weights to modify pose (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3) -> here, only x1,x2 can change -R = kron(speye(param.nbData-1), diag([ones(1,2)*1E0, ones(1,param.nbDOFs-2)*1E-6, ... - ones(1,2)*1E0, ones(1,param.nbDOFs-2)*1E0])); %Weights for (dq01,dq02,dq1,dq2,dq3 dx1,dx2,dl1,dl2,dl3) -> here, only dq1,dq2,dq3 can change -Ra = blkdiag(Rx0, R); %Weights for augmented control commands +param.Mu = [[7; 6; -pi/6], [8; 7; -pi/3]]; %Viapoints +Rx0 = zeros(2); %we only search for initial x1,x2 in the state vector (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3) +R = speye(3*(param.nbData-1)) * 1E-6; %We only control dq1,dq2,dq3 in the velocity vector (dq01,dq02,dq1,dq2,dq3 dx1,dx2,dl1,dl2,dl3) +Ra = blkdiag(Rx0, R); %Weights for list of decision variables (control commands augmented with initial state) Q = kron(speye(param.nbPoints), diag([1E0, 1E0, 0])); %Precision matrix (by removing orientation constraint for the viapoint tracking task) %Time occurrence of viapoints tl = linspace(1, param.nbData, param.nbPoints+1); tl = round(tl(2:end)); -idx = (tl - 1) * param.nbVarX + [1:param.nbVarX]'; +idx = (tl - 1) * param.nbVarX + [1:param.nbVarX]'; %indices listing viapoints to reach %% Iterative LQR (iLQR) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% x0_hat = [0; pi/2; pi/4; -pi/2; -pi/4; .1; .1; 3; 2; 1]; %Initial robot pose estimate (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3) -ua = [x0_hat; zeros(param.nbVarU*(param.nbData-1), 1)]; %Initial augmented control commands +ua = [x0_hat(6:7); zeros(param.nbVarU*(param.nbData-1), 1)]; %Initialization of augmented control commands with x1,x2 as initial state and dq1,dq2,dq3 as list of control commands +ua_full = [x0_hat; zeros(param.nbVarX*(param.nbData-1), 1)]; + +idu = [12:10:size(ua_full,1)] + [1:3]'; +idu = [6,7,idu(:)']; %indices listing decision variables to update %Transfer matrices (for linear system as single integrator) Sx0 = kron(ones(param.nbData,1), eye(param.nbVarX)); Su0 = [zeros(param.nbVarX, param.nbVarX*(param.nbData-1)); kron(tril(ones(param.nbData-1)), eye(param.nbVarX)*param.dt)]; Sa0 = [Sx0, Su0]; -Sa = [Sx0(idx,:), Su0(idx,:)]; +Sa = Sa0(idx,idu); for n=1:param.nbIter - x = reshape(Sa0 * ua, param.nbVarX, param.nbData); %System evolution + ua_full(idu) = ua; + x = reshape(Sa0 * ua_full, param.nbVarX, param.nbData); %System evolution [f, J] = f_reach(x(:,tl), param); - fua = ua - [x0_hat; zeros(param.nbVarU*(param.nbData-1),1)]; - dua = (Sa' * J' * Q * J * Sa + Ra) \ (-Sa' * J' * Q * f(:) - Ra * fua); %Gradient for initial pose and control commands + dua = (Sa' * J' * Q * J * Sa + Ra) \ (-Sa' * J' * Q * f(:) - Ra * ua); %Gradient for initial pose and control commands %Estimate step size with line search method alpha = 1; - cost0 = f(:)' * Q * f(:) + fua' * Ra * fua; + cost0 = f(:)' * Q * f(:) + ua' * Ra * ua; while 1 uatmp = ua + dua * alpha; - xtmp = reshape(Sa0 * uatmp, param.nbVarX, param.nbData); + ua_full(idu) = uatmp; + xtmp = reshape(Sa0 * ua_full, param.nbVarX, param.nbData); ftmp = f_reach(xtmp(:,tl), param); - fuatmp = uatmp - [x0_hat; zeros(param.nbVarU*(param.nbData-1),1)]; - cost = ftmp(:)' * Q * ftmp(:) + fuatmp' * Ra * fuatmp; + cost = ftmp(:)' * Q * ftmp(:) + uatmp' * Ra * uatmp; if cost < cost0 || alpha < 1E-4 break; end