fix: python example with initial state simplified

ce4e831c · Guillaume CLIVAZ · 64113679 · ce4e831c
Commit ce4e831c authored 3 months ago by Guillaume CLIVAZ
--- a/python/iLQR_manipulator_initStateOptim.py
+++ b/python/iLQR_manipulator_initStateOptim.py
 '''
-iLQR applied to a planar manipulator, where both an optimal controller 
+iLQR applied to a planar manipulator, where both an optimal controller
 and an optimal robot base location are estimated

 Copyright (c) 2023 Idiap Research Institute <https://www.idiap.ch>
@@ -59,7 +59,7 @@ def f_reach(x, param):
 	for t in range(1, x.shape[1]):
 		# TODO:
 		temp_J = Jkin_num(x[:, t][:, np.newaxis], param)
-		J = np.block([[J, np.zeros((J.shape[0], temp_J.shape[1]))], 
+		J = np.block([[J, np.zeros((J.shape[0], temp_J.shape[1]))],
 				[np.zeros((temp_J.shape[0], J.shape[1])), temp_J]])
 	return f, J

@@ -71,22 +71,22 @@ param.nbData = 30 # Number of datapoints
 param.nbPoints = 2 # Number of viapoints
 param.dt = 1E0 # Time step length
 param.nbIter = 50 # Maximum number of iterations for iLQR
-param.nbDOFs = 5 # Number of articulated links
-param.nbVarX = param.nbDOFs * 2; # State space dimension (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3), with 5 rotary and 5 prismatic joints
-param.nbVarU = param.nbDOFs * 2; # Control space dimension (dq01,dq02,dq1,dq2,dq3  dx1,dx2,dl1,dl2,dl3)
+param.nbDOFs = 5 # Number of revolute joints
+param.nbVarX = param.nbDOFs * 2; # State space dimension (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3), with 5 revolute and 5 prismatic joints
+param.nbVarU = 3; # Control space dimension (we only control dq1, dq2, dq3)
 param.nbVarF = 3; # Task space dimension (f1,f2,f3, with f3 as orientation)

-param.Mu = np.array([[2, 1, -np.pi/6], [3, 2, -np.pi/3]]).T # Viapoints
-param.Mu[:2, :] += 5
+param.Mu = np.array([[7, 6, -np.pi/6], [8, 7, -np.pi/3]]).T # Viapoints

-# Weights to modify pose (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3) -> here, only x1,x2 can change
-Rx0 = np.diag([1E0]*2 + [1E0]*(param.nbDOFs - 2) + [0.0]*2 + [1E0]*(param.nbDOFs - 2)) 
-# Weights for (dq01,dq02,dq1,dq2,dq3  dx1,dx2,dl1,dl2,dl3) -> here, only dq1,dq2,dq3 can change
-R = np.kron(np.eye(param.nbData - 1),
-		 np.diag(2*[1E0] + (param.nbDOFs-2)*[1E-6] + 2*[1E0] + (param.nbDOFs-2)*[1E0]))
-# Weights for augmented control commands
-Ra = np.block([[Rx0, np.zeros((Rx0.shape[0], R.shape[1]))],
-				[np.zeros((R.shape[0], Rx0.shape[1])), R]])
+# We only search for initial x1,x2 in the state vector (q01,q02,q1,q2,q3,x1,x2,l1,l2,l3)
+Rx0 = np.zeros((2, 2))
+# We only control dq1,dq2,dq3 in the velocity vector (dq01,dq02,dq1,dq2,dq3,dx1,dx2,dl1,dl2,dl3)
+R = np.eye(3 * (param.nbData-1)) * 1E-6
+# Weights for list of decision variables (control commands augmented with initial state)
+Ra = np.block([
+	[Rx0, np.zeros((Rx0.shape[0], R.shape[1]))],
+	[np.zeros((R.shape[0], Rx0.shape[1])), R]
+])

 # Precision matrix (by removing orientation constraint for the viapoint tracking task)
 Q = np.kron(np.eye(param.nbPoints), np.diag([1E0, 1E0, 0]))
@@ -94,46 +94,55 @@ Q = np.kron(np.eye(param.nbPoints), np.diag([1E0, 1E0, 0]))
 # Time occurrence of viapoints
 tl = np.linspace(0, param.nbData, param.nbPoints+1)
 tl = np.rint(tl[1:]).astype(np.int64) - 1
-idx = np.array([i + np.arange(0,param.nbVarX,1) for i in (tl*param.nbVarX)]).flatten() 
+idx = np.array([i + np.arange(0,param.nbVarX,1) for i in (tl*param.nbVarX)]).flatten() # Indices listing viapoints to reach

 # Main program
 # ===============================

 # iLQR
 # ===============================
-x0_hat = np.array([0, np.pi/2, np.pi/4, -np.pi/2, -np.pi/4, 
+x0_hat = np.array([0, np.pi/2, np.pi/4, -np.pi/2, -np.pi/4,
 					0.1, 0.1, 3, 2, 1]) # Initial state
-ua = np.concatenate((x0_hat, np.zeros(param.nbVarU * (param.nbData-1)))) # Initial control command
+# Initialization of augmented control commands with x1,x2 as initial state and dq1,dq2,dq3 as list of control commands
+ua = np.concatenate((x0_hat[5:7], np.zeros(param.nbVarU * (param.nbData-1))))
+ua_full = np.concatenate([x0_hat, np.zeros(param.nbVarX * (param.nbData-1))])
+
+idu = np.arange(12, ua_full.shape[0] + 1, 10)
+idu = np.add.outer(idu, np.array([0, 1, 2])).flatten()
+idu = np.concatenate([[5,6], idu]) # indices listing decision variables to update

 # Transfer matrices (for linear system as single integrator)
 Su0 = np.vstack([
-	np.zeros([param.nbVarX, param.nbVarX*(param.nbData-1)]), 
-	np.kron(np.tril(np.ones(param.nbData-1)), np.eye(param.nbVarX) * param.dt)]) 
+	np.zeros([param.nbVarX, param.nbVarX*(param.nbData-1)]),
+	np.kron(np.tril(np.ones(param.nbData-1)), np.eye(param.nbVarX) * param.dt)
+])
 Sx0 = np.kron(np.ones(param.nbData), np.eye(param.nbVarX)).T
 Sa0 = np.hstack((Sx0, Su0))
-Sa = np.hstack((Sx0[idx,:], Su0[idx,:])) # We remove the lines that are out of interest
+# We remove the lines that are out of interest
+Sa = Sa0[idx]
+Sa = Sa[:, idu]

 for i in range(param.nbIter):
-	x = Sa0 @ ua # System evolution
+	ua_full[idu] = ua
+	x = Sa0 @ ua_full # System evolution
 	x = x.reshape([param.nbVarX, param.nbData], order='F')
 	f, J = f_reach(x[:,tl], param) # Residuals and Jacobians
-	fua = ua - np.concatenate((x0_hat, np.zeros(param.nbVarU * (param.nbData-1)))) # Control command evolution
 	dua = np.linalg.lstsq(
 		Sa.T @ J.T @ Q @ J @ Sa + Ra,
-		-Sa.T @ J.T @ Q @ f.flatten('F') - Ra @ fua,
+		-Sa.T @ J.T @ Q @ f.flatten('F') - Ra @ ua, #fua,
 		rcond=-1
 	)[0] # Gauss-Newton update

 	# Estimate step size with backtracking line search method
 	alpha = 1
-	cost0 = f.flatten('F').T @ Q @ f.flatten('F') + fua.T @ Ra @ fua # Cost
+	cost0 = f.flatten('F').T @ Q @ f.flatten('F') + ua.T @ Ra @ ua # Cost
 	while True:
 		uatmp = ua + dua * alpha
-		xtmp = Sa0 @ uatmp # System evolution
+		ua_full[idu] = uatmp
+		xtmp = Sa0 @ ua_full # System evolution
 		xtmp = xtmp.reshape([param.nbVarX, param.nbData], order='F')
-		ftmp,_ = f_reach(xtmp[:,tl], param) # Residuals 
-		fuatmp = uatmp - np.concatenate((x0_hat, np.zeros(param.nbVarU * (param.nbData-1)))) # Control command evolution
-		cost = ftmp.flatten('F').T @ Q @ ftmp.flatten('F') + fuatmp.T @ Ra @ fuatmp # Cost
+		ftmp,_ = f_reach(xtmp[:,tl], param) # Residuals
+		cost = ftmp.flatten('F').T @ Q @ ftmp.flatten('F') + uatmp.T @ Ra @ uatmp # Cost
 		if cost < cost0 or alpha < 1e-4:
 			print("Iteration {}, cost: {}".format(i,cost))
 			break