diff --git a/python/iLQR_manipulator_initStateOptim.py b/python/iLQR_manipulator_initStateOptim.py index b55f910fabc82f174d9628275baf88be37dfb949..cbe659109b1f20a9600c2f6e675c43052570aca7 100644 --- a/python/iLQR_manipulator_initStateOptim.py +++ b/python/iLQR_manipulator_initStateOptim.py @@ -1,5 +1,5 @@ ''' -iLQR applied to a planar manipulator, where both an optimal controller +iLQR applied to a planar manipulator, where both an optimal controller and an optimal robot base location are estimated Copyright (c) 2023 Idiap Research Institute <https://www.idiap.ch> @@ -59,7 +59,7 @@ def f_reach(x, param): for t in range(1, x.shape[1]): # TODO: temp_J = Jkin_num(x[:, t][:, np.newaxis], param) - J = np.block([[J, np.zeros((J.shape[0], temp_J.shape[1]))], + J = np.block([[J, np.zeros((J.shape[0], temp_J.shape[1]))], [np.zeros((temp_J.shape[0], J.shape[1])), temp_J]]) return f, J @@ -71,22 +71,22 @@ param.nbData = 30 # Number of datapoints param.nbPoints = 2 # Number of viapoints param.dt = 1E0 # Time step length param.nbIter = 50 # Maximum number of iterations for iLQR -param.nbDOFs = 5 # Number of articulated links -param.nbVarX = param.nbDOFs * 2; # State space dimension (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3), with 5 rotary and 5 prismatic joints -param.nbVarU = param.nbDOFs * 2; # Control space dimension (dq01,dq02,dq1,dq2,dq3 dx1,dx2,dl1,dl2,dl3) +param.nbDOFs = 5 # Number of revolute joints +param.nbVarX = param.nbDOFs * 2; # State space dimension (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3), with 5 revolute and 5 prismatic joints +param.nbVarU = 3; # Control space dimension (we only control dq1, dq2, dq3) param.nbVarF = 3; # Task space dimension (f1,f2,f3, with f3 as orientation) -param.Mu = np.array([[2, 1, -np.pi/6], [3, 2, -np.pi/3]]).T # Viapoints -param.Mu[:2, :] += 5 +param.Mu = np.array([[7, 6, -np.pi/6], [8, 7, -np.pi/3]]).T # Viapoints -# Weights to modify pose (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3) -> here, only x1,x2 can change -Rx0 = np.diag([1E0]*2 + [1E0]*(param.nbDOFs - 2) + [0.0]*2 + [1E0]*(param.nbDOFs - 2)) -# Weights for (dq01,dq02,dq1,dq2,dq3 dx1,dx2,dl1,dl2,dl3) -> here, only dq1,dq2,dq3 can change -R = np.kron(np.eye(param.nbData - 1), - np.diag(2*[1E0] + (param.nbDOFs-2)*[1E-6] + 2*[1E0] + (param.nbDOFs-2)*[1E0])) -# Weights for augmented control commands -Ra = np.block([[Rx0, np.zeros((Rx0.shape[0], R.shape[1]))], - [np.zeros((R.shape[0], Rx0.shape[1])), R]]) +# We only search for initial x1,x2 in the state vector (q01,q02,q1,q2,q3,x1,x2,l1,l2,l3) +Rx0 = np.zeros((2, 2)) +# We only control dq1,dq2,dq3 in the velocity vector (dq01,dq02,dq1,dq2,dq3,dx1,dx2,dl1,dl2,dl3) +R = np.eye(3 * (param.nbData-1)) * 1E-6 +# Weights for list of decision variables (control commands augmented with initial state) +Ra = np.block([ + [Rx0, np.zeros((Rx0.shape[0], R.shape[1]))], + [np.zeros((R.shape[0], Rx0.shape[1])), R] +]) # Precision matrix (by removing orientation constraint for the viapoint tracking task) Q = np.kron(np.eye(param.nbPoints), np.diag([1E0, 1E0, 0])) @@ -94,46 +94,55 @@ Q = np.kron(np.eye(param.nbPoints), np.diag([1E0, 1E0, 0])) # Time occurrence of viapoints tl = np.linspace(0, param.nbData, param.nbPoints+1) tl = np.rint(tl[1:]).astype(np.int64) - 1 -idx = np.array([i + np.arange(0,param.nbVarX,1) for i in (tl*param.nbVarX)]).flatten() +idx = np.array([i + np.arange(0,param.nbVarX,1) for i in (tl*param.nbVarX)]).flatten() # Indices listing viapoints to reach # Main program # =============================== # iLQR # =============================== -x0_hat = np.array([0, np.pi/2, np.pi/4, -np.pi/2, -np.pi/4, +x0_hat = np.array([0, np.pi/2, np.pi/4, -np.pi/2, -np.pi/4, 0.1, 0.1, 3, 2, 1]) # Initial state -ua = np.concatenate((x0_hat, np.zeros(param.nbVarU * (param.nbData-1)))) # Initial control command +# Initialization of augmented control commands with x1,x2 as initial state and dq1,dq2,dq3 as list of control commands +ua = np.concatenate((x0_hat[5:7], np.zeros(param.nbVarU * (param.nbData-1)))) +ua_full = np.concatenate([x0_hat, np.zeros(param.nbVarX * (param.nbData-1))]) + +idu = np.arange(12, ua_full.shape[0] + 1, 10) +idu = np.add.outer(idu, np.array([0, 1, 2])).flatten() +idu = np.concatenate([[5,6], idu]) # indices listing decision variables to update # Transfer matrices (for linear system as single integrator) Su0 = np.vstack([ - np.zeros([param.nbVarX, param.nbVarX*(param.nbData-1)]), - np.kron(np.tril(np.ones(param.nbData-1)), np.eye(param.nbVarX) * param.dt)]) + np.zeros([param.nbVarX, param.nbVarX*(param.nbData-1)]), + np.kron(np.tril(np.ones(param.nbData-1)), np.eye(param.nbVarX) * param.dt) +]) Sx0 = np.kron(np.ones(param.nbData), np.eye(param.nbVarX)).T Sa0 = np.hstack((Sx0, Su0)) -Sa = np.hstack((Sx0[idx,:], Su0[idx,:])) # We remove the lines that are out of interest +# We remove the lines that are out of interest +Sa = Sa0[idx] +Sa = Sa[:, idu] for i in range(param.nbIter): - x = Sa0 @ ua # System evolution + ua_full[idu] = ua + x = Sa0 @ ua_full # System evolution x = x.reshape([param.nbVarX, param.nbData], order='F') f, J = f_reach(x[:,tl], param) # Residuals and Jacobians - fua = ua - np.concatenate((x0_hat, np.zeros(param.nbVarU * (param.nbData-1)))) # Control command evolution dua = np.linalg.lstsq( Sa.T @ J.T @ Q @ J @ Sa + Ra, - -Sa.T @ J.T @ Q @ f.flatten('F') - Ra @ fua, + -Sa.T @ J.T @ Q @ f.flatten('F') - Ra @ ua, #fua, rcond=-1 )[0] # Gauss-Newton update # Estimate step size with backtracking line search method alpha = 1 - cost0 = f.flatten('F').T @ Q @ f.flatten('F') + fua.T @ Ra @ fua # Cost + cost0 = f.flatten('F').T @ Q @ f.flatten('F') + ua.T @ Ra @ ua # Cost while True: uatmp = ua + dua * alpha - xtmp = Sa0 @ uatmp # System evolution + ua_full[idu] = uatmp + xtmp = Sa0 @ ua_full # System evolution xtmp = xtmp.reshape([param.nbVarX, param.nbData], order='F') - ftmp,_ = f_reach(xtmp[:,tl], param) # Residuals - fuatmp = uatmp - np.concatenate((x0_hat, np.zeros(param.nbVarU * (param.nbData-1)))) # Control command evolution - cost = ftmp.flatten('F').T @ Q @ ftmp.flatten('F') + fuatmp.T @ Ra @ fuatmp # Cost + ftmp,_ = f_reach(xtmp[:,tl], param) # Residuals + cost = ftmp.flatten('F').T @ Q @ ftmp.flatten('F') + uatmp.T @ Ra @ uatmp # Cost if cost < cost0 or alpha < 1e-4: print("Iteration {}, cost: {}".format(i,cost)) break