From 41e2e20c7fd57e7b1c1ddc835fde32f099b04564 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Maceiras?= <jeremy.maceiras@idiap.ch> Date: Tue, 28 May 2024 17:50:18 +0200 Subject: [PATCH] feat: Implemented ergodic_control_SMC_DDP_1D --- python/ergodic_control_SMC_DDP_1D.py | 182 +++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 python/ergodic_control_SMC_DDP_1D.py diff --git a/python/ergodic_control_SMC_DDP_1D.py b/python/ergodic_control_SMC_DDP_1D.py new file mode 100644 index 0000000..48c7278 --- /dev/null +++ b/python/ergodic_control_SMC_DDP_1D.py @@ -0,0 +1,182 @@ +''' +Trajectory optimization for ergodic control problem + +Copyright (c) 2023 Idiap Research Institute <https://www.idiap.ch/> +Written by Jérémy Maceiras <jeremy.maceiras@idiap.ch> and +Sylvain Calinon <https://calinon.ch> + +This file is part of RCFS <https://robotics-codes-from-scratch.github.io/> +License: MIT +''' + +import numpy.matlib +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.patches as patches + +# Helper functions +# =============================== + +# Residuals w and Jacobians J in spectral domain +def f_ergodic(x, param): + phi = np.cos(x @ param.kk.T) / param.L + dphi = - np.sin(x @ param.kk.T) * np.matlib.repmat(param.kk.T,param.nbData,1) / param.L + w = (np.sum(phi,axis=0) / param.nbData).reshape((param.nbFct,1)) + J = dphi.T / param.nbData + return w, J + +## Parameters +# =============================== + +param = lambda: None # Lazy way to define an empty class in python +param.nbData = 100 # Number of datapoints +param.nbVarX = 1 # State space dimension +param.nbFct = 8 # Number of Fourier basis functsions +param.nbStates = 2 # Number of Gaussians to represent the spatial distribution +param.nbIter = 500 # Maximum number of iterations for iLQR +param.dt = 1e-2 # Time step length +param.r = 1e-12 # Control weight term + +param.xlim = [0,1] # Domain limit +param.L = (param.xlim[1] - param.xlim[0]) * 2 # Size of [-param.xlim(2),param.xlim(2)] +param.om = 2 * np.pi / param.L # Omega +param.range = np.arange(param.nbFct) +param.kk = param.om * param.range.reshape((param.nbFct,1)) +param.Lambda = 1/(param.range**2 + 1) # Weighting factor + +# Desired spatial distribution represented as a mixture of Gaussians +param.Mu = np.array([0.7, 0.5]) +param.Sigma = np.array([0.003,0.01]) + +logs = lambda: None # Object to store logs +logs.x = [] +logs.w = [] +logs.g = [] +logs.e = [] + +Priors = np.ones(param.nbStates) / param.nbStates # Mixing coefficients + +# Transfer matrices (for linear system as single integrator) +Su = np.vstack([ + np.zeros([param.nbVarX, param.nbVarX*(param.nbData-1)]), + np.tril(np.kron(np.ones([param.nbData-1, param.nbData-1]), np.eye(param.nbVarX) * param.dt)) +]) +Sx = np.kron(np.ones(param.nbData), np.eye(param.nbVarX)).T + +Q = np.diag(param.Lambda) # Precision matrix +R = np.eye((param.nbData-1) * param.nbVarX) * param.r # Control weight matrix (at trajectory level) + + +# Compute Fourier series coefficients w_hat of desired spatial distribution +# ========================================================================= +# Explicit description of w_hat by exploiting the Fourier transform properties of Gaussians (optimized version by exploiting symmetries) + +w_hat = np.zeros((param.nbFct,1)) +for i in range(param.nbStates): + w_hat = w_hat + Priors[i] * np.cos( param.kk * param.Mu[i] ) * np.exp(-.5*param.kk**2*param.Sigma[i]) +w_hat = w_hat / param.L + +# Fourier basis functions (only used for display as a discretized map) + +nbRes = 200 +xm = np.linspace(param.xlim[0],param.xlim[1],nbRes).reshape((1,nbRes)) # Spatial range +phim = np.cos(param.kk @ xm) * 2 # Fourier basis functions +phim[1:] = phim[1:]*2 + +# Desired spatial distribution +g = w_hat.T @ phim + +# iLQR +# =============================== + +u = np.zeros(param.nbVarX * (param.nbData-1)).reshape((-1,1)) # Initial control command +u = (u + np.random.normal(size=(len(u),1))).reshape((-1,1)) + +x0 = np.array([[0.6]]) # Initial position + +for i in range(param.nbIter): + x = Su @ u + Sx @ x0 # System evolution + w, J = f_ergodic(x, param) # Fourier series coefficients and Jacobian + f = w - w_hat # Residual + + du = np.linalg.inv(Su.T @ J.T @ Q @ J @ Su + R) @ (-Su.T @ J.T @ Q @ f - u * param.r) # Gauss-Newton update + + cost0 = f.T @ Q @ f + np.linalg.norm(u)**2 * param.r # Cost + + # Log data + logs.x += [x] # Save trajectory in state space + logs.w += [w] # Save Fourier coefficients along trajectory + logs.g += [w.T @ phim] # Save reconstructed spatial distribution (for visualization) + logs.e += [cost0.squeeze()] # Save reconstruction error + + # Estimate step size with backtracking line search method + alpha = 1 + while True: + utmp = u + du * alpha + xtmp = Sx @ x0 + Su @ utmp + wtmp, _ = f_ergodic(xtmp,param) + ftmp = wtmp - w_hat + cost = ftmp.T @ Q @ ftmp + np.linalg.norm(utmp)**2 * param.r + if cost < cost0 or alpha < 1e-3: + print("Iteration {}, cost: {}".format(i,cost.squeeze())) + break + alpha /= 2 + + u = u + du * alpha + + if np.linalg.norm(du * alpha) < 1E-2: + break # Stop iLQR iterations when solution is reached + +# Plots +# =============================== + +fig,axs = plt.subplots(3,2) + +# Plot distribution +plt.subplot(3,2,1) +plt.plot(xm.squeeze(),g.squeeze(),label="Desired",color=(1,.6,.6),linewidth=6) +plt.plot(xm.squeeze(),logs.g[0].squeeze(),label="Initial",color=(.7,.7,.7),linewidth=2) +plt.plot(xm.squeeze(),logs.g[-1].squeeze(),label="Final",color=(0,0,0),linewidth=2) +plt.legend() +plt.xlabel("x",fontsize=16) +plt.ylabel("g(x)",fontsize=16) +plt.yticks([]) +plt.xticks([]) + +# Plot signal +plt.subplot(3,1,2) +plt.xlabel("x",fontsize=16) +plt.yticks([1,param.nbData],["t-T","t"],fontsize=16) +plt.xticks([]) +for i,x in enumerate(logs.x[::10]): + c = [0.9 * (1-(i*10)/len(logs.x))]*3 + plt.plot(x,range(1,param.nbData+1),color=c) +plt.plot(logs.x[-1],range(1,param.nbData+1),color=[0,0,0]) + +# Plot Fourier coefficients +plt.subplot(3,2,2) +plt.xlabel("k",fontsize=16) +plt.ylabel("$W_k$",fontsize=16) +plt.xticks([0,param.nbFct-1]) +plt.yticks([]) +for i in range(param.nbFct): + plt.plot( [param.range[i],param.range[i]], [0,w_hat[i,0]], color = [1,.6,.6], linewidth=6) + plt.plot( [param.range[i],param.range[i]], [0,logs.w[-1][i,0]], color = [0,0,0], linewidth=2) + plt.scatter([param.range[i]],[logs.w[-1][i,0]],color=[0,0,0],zorder=10) +plt.plot(param.range,np.zeros(len(param.range)),color=[0,0,0]) + +# Plot Lambbda_k +plt.subplot(3,2,5) +plt.xlabel("k",fontsize=16) +plt.ylabel("$\Lambda_k$",fontsize=16) +plt.xticks([0,param.nbFct-1]) +plt.yticks([0,1]) +plt.plot(param.range,param.Lambda,color=[0,0,0],marker='o') + +# Plot Epsilon +plt.subplot(3,2,6) +plt.xlabel("n",fontsize=16) +plt.ylabel("$\epsilon$",fontsize=16) +plt.plot(logs.e,color=[0,0,0]) + +plt.show() \ No newline at end of file -- GitLab