diff --git a/python/LQR_probabilistic.py b/python/LQR_probabilistic.py index 2d06ed095f193c65c0df724f0aa2ba2f2fa84988..59d98be220bd3d84ba35f1da13ba4926458622be 100644 --- a/python/LQR_probabilistic.py +++ b/python/LQR_probabilistic.py @@ -1,8 +1,6 @@ import numpy as np from math import factorial import matplotlib.pyplot as plt -from scipy.linalg import solve_continuous_are -from scipy.stats import multivariate_normal # Parameters # =============================== @@ -13,16 +11,23 @@ param.nbVarPos = 2 # Number of position variable param.nbVar = param.nbVarPos * param.nbDeriv # Dimension of state vector param.nbData = 100 # Number of datapoints param.rfactor = 1e-7 # Control weight term +param.nbVia = 4 R = np.eye((param.nbData-1) * param.nbVarPos) * param.rfactor # Control cost matrix Q = np.zeros((param.nbVar * param.nbData, param.nbVar * param.nbData)) # Task precision for augmented state xd = np.zeros([param.nbVar, param.nbData]) -target = np.random.uniform(size=param.nbVarPos) -xd[:, param.nbData-1] = np.concatenate((target, np.zeros(param.nbVarPos))) -xd = xd.T.flatten() +targets = [] +for k in range(param.nbVia): + idx = int((k+1)*param.nbData/param.nbVia-1) + target = np.random.uniform(size=param.nbVarPos) + xd[:, idx] = np.concatenate((target, np.zeros(param.nbVarPos))) + targets.append(target) + idx2 = idx * param.nbVar + Q[idx2:idx2+param.nbVar,idx2:idx2+param.nbVar] = 200.0 * np.diag([1,1,0,0]) -Q[param.nbVar*(param.nbData-1):param.nbVar*param.nbData,param.nbVar*(param.nbData-1):param.nbVar*param.nbData] = 10.0 * np.eye(param.nbVar) +xd = xd.T.flatten() +targets = np.array(targets) A1d = np.zeros((param.nbDeriv,param.nbDeriv)) B1d = np.zeros((param.nbDeriv,1)) @@ -48,19 +53,36 @@ for i in range(1,param.nbData): x0 = np.random.uniform(size=param.nbVar) mean = Sx @ x0 + Su @ np.linalg.inv(Su.T @ Q @ Su + R) @ Su.T @ Q @ (xd - Sx @ x0) -cov = Su @ (Su.T @ Q @ Su + R) @ Su.T +cov = Su @ (Su.T @ Q @ Su + R) @ Su.T + 1e-7*np.eye(param.nbVar * param.nbData) +viaidx = param.nbData-1 +idx1 = slice(viaidx * param.nbVar, viaidx * param.nbVar + param.nbVar) +idx2 = slice(0, viaidx * param.nbVar) -trajectories = multivariate_normal.rvs(mean=mean, cov=cov, size=100) +mu1 = mean[idx1] +mu2 = mean[idx2] +sigma11 = cov[idx1, idx1] +sigma22 = cov[idx2, idx2] +sigma12 = cov[idx1, idx2] +sigma21 = cov[idx2, idx1] + +xc = np.random.uniform(size=param.nbVar) +cmean = mu2 + sigma21 @ np.linalg.inv(sigma11) @ (xc - mu1) +ccov = sigma22 - sigma21 @ np.linalg.inv(sigma11) @ sigma12 plt.figure() for k in range(100): - trajectory = trajectories[k, :] - trajectory = trajectory.reshape((param.nbData, param.nbVar)) - plt.plot(trajectory[:, 0], trajectory[:, 1], alpha=0.5) + t = mean + np.linalg.cholesky(cov) @ np.random.randn(param.nbVar * param.nbData) + t = t.reshape((param.nbData, param.nbVar)) + plt.plot(t[:, 0], t[:, 1], alpha=0.1, color='red') + + ct = cmean + np.linalg.cholesky(ccov) @ np.random.randn(param.nbVar * (param.nbData-1)) + ct = ct.reshape(((param.nbData-1), param.nbVar)) + plt.plot(ct[:, 0], ct[:, 1], alpha=0.1, color='green') -plt.scatter(x0[0], x0[1], color='green', label='Initial State') -plt.scatter(target[0], target[1], color='red', label='Target State') +plt.scatter(x0[0], x0[1], color='red', label='Initial State') +plt.scatter(xc[0], xc[1], color='blue', label='Conditional State') +plt.scatter(targets[:,0], targets[:,1], color='green', label='Viapoints') plt.legend() plt.grid() plt.show()