Skip to content
Snippets Groups Projects
Commit ce4e831c authored by Guillaume CLIVAZ's avatar Guillaume CLIVAZ
Browse files

fix: python example with initial state simplified

parent 64113679
No related branches found
No related tags found
No related merge requests found
'''
iLQR applied to a planar manipulator, where both an optimal controller
iLQR applied to a planar manipulator, where both an optimal controller
and an optimal robot base location are estimated
Copyright (c) 2023 Idiap Research Institute <https://www.idiap.ch>
......@@ -59,7 +59,7 @@ def f_reach(x, param):
for t in range(1, x.shape[1]):
# TODO:
temp_J = Jkin_num(x[:, t][:, np.newaxis], param)
J = np.block([[J, np.zeros((J.shape[0], temp_J.shape[1]))],
J = np.block([[J, np.zeros((J.shape[0], temp_J.shape[1]))],
[np.zeros((temp_J.shape[0], J.shape[1])), temp_J]])
return f, J
......@@ -71,22 +71,22 @@ param.nbData = 30 # Number of datapoints
param.nbPoints = 2 # Number of viapoints
param.dt = 1E0 # Time step length
param.nbIter = 50 # Maximum number of iterations for iLQR
param.nbDOFs = 5 # Number of articulated links
param.nbVarX = param.nbDOFs * 2; # State space dimension (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3), with 5 rotary and 5 prismatic joints
param.nbVarU = param.nbDOFs * 2; # Control space dimension (dq01,dq02,dq1,dq2,dq3 dx1,dx2,dl1,dl2,dl3)
param.nbDOFs = 5 # Number of revolute joints
param.nbVarX = param.nbDOFs * 2; # State space dimension (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3), with 5 revolute and 5 prismatic joints
param.nbVarU = 3; # Control space dimension (we only control dq1, dq2, dq3)
param.nbVarF = 3; # Task space dimension (f1,f2,f3, with f3 as orientation)
param.Mu = np.array([[2, 1, -np.pi/6], [3, 2, -np.pi/3]]).T # Viapoints
param.Mu[:2, :] += 5
param.Mu = np.array([[7, 6, -np.pi/6], [8, 7, -np.pi/3]]).T # Viapoints
# Weights to modify pose (q01,q02,q1,q2,q3, x1,x2,l1,l2,l3) -> here, only x1,x2 can change
Rx0 = np.diag([1E0]*2 + [1E0]*(param.nbDOFs - 2) + [0.0]*2 + [1E0]*(param.nbDOFs - 2))
# Weights for (dq01,dq02,dq1,dq2,dq3 dx1,dx2,dl1,dl2,dl3) -> here, only dq1,dq2,dq3 can change
R = np.kron(np.eye(param.nbData - 1),
np.diag(2*[1E0] + (param.nbDOFs-2)*[1E-6] + 2*[1E0] + (param.nbDOFs-2)*[1E0]))
# Weights for augmented control commands
Ra = np.block([[Rx0, np.zeros((Rx0.shape[0], R.shape[1]))],
[np.zeros((R.shape[0], Rx0.shape[1])), R]])
# We only search for initial x1,x2 in the state vector (q01,q02,q1,q2,q3,x1,x2,l1,l2,l3)
Rx0 = np.zeros((2, 2))
# We only control dq1,dq2,dq3 in the velocity vector (dq01,dq02,dq1,dq2,dq3,dx1,dx2,dl1,dl2,dl3)
R = np.eye(3 * (param.nbData-1)) * 1E-6
# Weights for list of decision variables (control commands augmented with initial state)
Ra = np.block([
[Rx0, np.zeros((Rx0.shape[0], R.shape[1]))],
[np.zeros((R.shape[0], Rx0.shape[1])), R]
])
# Precision matrix (by removing orientation constraint for the viapoint tracking task)
Q = np.kron(np.eye(param.nbPoints), np.diag([1E0, 1E0, 0]))
......@@ -94,46 +94,55 @@ Q = np.kron(np.eye(param.nbPoints), np.diag([1E0, 1E0, 0]))
# Time occurrence of viapoints
tl = np.linspace(0, param.nbData, param.nbPoints+1)
tl = np.rint(tl[1:]).astype(np.int64) - 1
idx = np.array([i + np.arange(0,param.nbVarX,1) for i in (tl*param.nbVarX)]).flatten()
idx = np.array([i + np.arange(0,param.nbVarX,1) for i in (tl*param.nbVarX)]).flatten() # Indices listing viapoints to reach
# Main program
# ===============================
# iLQR
# ===============================
x0_hat = np.array([0, np.pi/2, np.pi/4, -np.pi/2, -np.pi/4,
x0_hat = np.array([0, np.pi/2, np.pi/4, -np.pi/2, -np.pi/4,
0.1, 0.1, 3, 2, 1]) # Initial state
ua = np.concatenate((x0_hat, np.zeros(param.nbVarU * (param.nbData-1)))) # Initial control command
# Initialization of augmented control commands with x1,x2 as initial state and dq1,dq2,dq3 as list of control commands
ua = np.concatenate((x0_hat[5:7], np.zeros(param.nbVarU * (param.nbData-1))))
ua_full = np.concatenate([x0_hat, np.zeros(param.nbVarX * (param.nbData-1))])
idu = np.arange(12, ua_full.shape[0] + 1, 10)
idu = np.add.outer(idu, np.array([0, 1, 2])).flatten()
idu = np.concatenate([[5,6], idu]) # indices listing decision variables to update
# Transfer matrices (for linear system as single integrator)
Su0 = np.vstack([
np.zeros([param.nbVarX, param.nbVarX*(param.nbData-1)]),
np.kron(np.tril(np.ones(param.nbData-1)), np.eye(param.nbVarX) * param.dt)])
np.zeros([param.nbVarX, param.nbVarX*(param.nbData-1)]),
np.kron(np.tril(np.ones(param.nbData-1)), np.eye(param.nbVarX) * param.dt)
])
Sx0 = np.kron(np.ones(param.nbData), np.eye(param.nbVarX)).T
Sa0 = np.hstack((Sx0, Su0))
Sa = np.hstack((Sx0[idx,:], Su0[idx,:])) # We remove the lines that are out of interest
# We remove the lines that are out of interest
Sa = Sa0[idx]
Sa = Sa[:, idu]
for i in range(param.nbIter):
x = Sa0 @ ua # System evolution
ua_full[idu] = ua
x = Sa0 @ ua_full # System evolution
x = x.reshape([param.nbVarX, param.nbData], order='F')
f, J = f_reach(x[:,tl], param) # Residuals and Jacobians
fua = ua - np.concatenate((x0_hat, np.zeros(param.nbVarU * (param.nbData-1)))) # Control command evolution
dua = np.linalg.lstsq(
Sa.T @ J.T @ Q @ J @ Sa + Ra,
-Sa.T @ J.T @ Q @ f.flatten('F') - Ra @ fua,
-Sa.T @ J.T @ Q @ f.flatten('F') - Ra @ ua, #fua,
rcond=-1
)[0] # Gauss-Newton update
# Estimate step size with backtracking line search method
alpha = 1
cost0 = f.flatten('F').T @ Q @ f.flatten('F') + fua.T @ Ra @ fua # Cost
cost0 = f.flatten('F').T @ Q @ f.flatten('F') + ua.T @ Ra @ ua # Cost
while True:
uatmp = ua + dua * alpha
xtmp = Sa0 @ uatmp # System evolution
ua_full[idu] = uatmp
xtmp = Sa0 @ ua_full # System evolution
xtmp = xtmp.reshape([param.nbVarX, param.nbData], order='F')
ftmp,_ = f_reach(xtmp[:,tl], param) # Residuals
fuatmp = uatmp - np.concatenate((x0_hat, np.zeros(param.nbVarU * (param.nbData-1)))) # Control command evolution
cost = ftmp.flatten('F').T @ Q @ ftmp.flatten('F') + fuatmp.T @ Ra @ fuatmp # Cost
ftmp,_ = f_reach(xtmp[:,tl], param) # Residuals
cost = ftmp.flatten('F').T @ Q @ ftmp.flatten('F') + uatmp.T @ Ra @ uatmp # Cost
if cost < cost0 or alpha < 1e-4:
print("Iteration {}, cost: {}".format(i,cost))
break
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment