Skip to content

Commit

Permalink
Implement dynamics optimizer for RMP
Browse files Browse the repository at this point in the history
Signed-off-by: An Thai Le <an.thai.le97@gmail.com>
  • Loading branch information
anindex committed Aug 12, 2021
1 parent a7045d9 commit 17fda1d
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 73 deletions.
17 changes: 15 additions & 2 deletions tprmp/models/tp_gmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,16 @@ def reset_covariance(self, idxs_1, idxs_2):
self._mvns[k][f_key].cov[np.ix_(idxs_1, idxs_2)] = 0.0
self._mvns[k][f_key].cov[np.ix_(idxs_2, idxs_1)] = 0.0

def get_local_gmm(self, frame, tag=None):
if frame not in self.frame_names:
raise ValueError(f'[TPGMM]: Frame {frame} not in model!')
comps = range(self.num_comp) if ((self._tag_to_comp_map is None) or
(tag not in self._tag_to_comp_map)) else self.tag_to_comp_map[tag]
local_gmm = []
for k in comps:
local_gmm.append(self.mvns[k][frame])
return local_gmm

def generate_global_gmm(self, frames, tag=None):
comps = range(self.num_comp) if ((self._tag_to_comp_map is None) or
(tag not in self._tag_to_comp_map)) else self.tag_to_comp_map[tag]
Expand All @@ -79,11 +89,10 @@ def generate_global_gmm(self, frames, tag=None):
def combine_gaussians(self, k, frames):
if not set(self.frame_names).issubset(set(frames)):
raise IndexError(f'[TPGMM]: Frames must be subset of {self.frame_names}')
man = self.mvns[k][self.frame_names[0]].manifold
g_list = []
for f_key in self.frame_names:
g_list.append(self.mvns[k][f_key].transform(frames[f_key].A, frames[f_key].b)) # Transform Gaussians to global frame
return man.gaussian_product(g_list)
return self.manifold.gaussian_product(g_list)

def rename_component(self, comp, name):
self.component_names[comp] = name
Expand Down Expand Up @@ -120,6 +129,10 @@ def load(task_name, model_name):
def mvns(self):
return self._mvns

@property
def manifold(self):
return self._mvns[0][self._frame_names[0]].manifold

@property
def pi(self):
return self._pi
Expand Down
74 changes: 24 additions & 50 deletions tprmp/models/tp_rmp.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import os
import logging
import numpy as np
import pickle
import time

from tprmp.models.tp_hsmm import TPHSMM
from tprmp.models.rmp import compute_policy
from tprmp.optimizer.dynamics import optimize_dynamics


_path_file = os.path.dirname(os.path.realpath(__file__))
DATA_PATH = os.path.join(_path_file, '..', '..', 'data', 'tasks')


class TPRMP(object):
Expand All @@ -21,47 +26,27 @@ def __init__(self, **kwargs):

def save(self, name=None):
self.model.save(name)
file = os.path.join(DATA_PATH, self.model.name, 'models', name if name is not None else ('dynamics_' + str(time.time()) + '.p'))
os.makedirs(file, exist_ok=True)
with open(file, 'wb') as f:
pickle.dump(self.parameters(), f)

def retrieve(self, frames, **kwargs):
"""
Retrieve global RMP.
"""

def compute_canonical_rmp(self, x, t, global_mvns, **kwargs):
manifold = global_mvns[0].manifold
K = kwargs.get('K', 10 * np.eye(manifold.dim_T))
hard = kwargs.get('hard', True)
weights = self.compute_force_weights(x, t, global_mvns, **kwargs)
if hard:
return K @ manifold.log_map(x, base=global_mvns[np.argmax(weights)].mean)
else:
phi = np.zeros((self.model.num_comp, manifold.dim_T))
for k in range(self.model.num_comp):
phi[k] = K @ manifold.log_map(x, base=global_mvns[k].mean)
return weights @ phi

def compute_policy(self, x, x_dot, global_mvns):
phi = self.compute_potentials(x, global_mvns)
weights = TPRMP.compute_obsrv_prob(x, global_mvns)
Phi = weights @ phi
num_comp = len(global_mvns)
manifold = global_mvns[0].manifold
Fs = np.zeros((num_comp, manifold.dim_T))
for k in range(num_comp):
Fs[k] = weights[k] * (phi[k] - Phi) * global_mvns[k].cov_inv @ manifold.log_map(x, base=global_mvns[k].mean)
Fs[k] += -weights[k] * global_mvns[k].cov_inv @ manifold.log_map(x, base=global_mvns[k].mean)
Fs[k] += -weights[k] * self._d0[k] * x_dot
return Fs.sum(axis=0)

def compute_potentials(self, x, global_mvns):
num_comp = len(global_mvns)
phi = np.zeros(num_comp)
manifold = global_mvns[0].manifold
for k in range(num_comp):
comp = global_mvns[k]
v = manifold.log_map(x, base=comp.mean)
phi[k] = self._phi0[k] + v.T @ comp.cov_inv @ v
return phi
def compute_global_policy(self, x, dx, frames): # TODO: add Riemmanian metric
if not set(self.model.frame_names).issubset(set(frames)):
raise IndexError(f'[TPRMP]: Frames must be subset of {self.model.frame_names}')
policies = np.zeros((len(frames), self.model.manifold.dim_T))
for i, f_key in enumerate(self.model.frame_names):
# compute local policy
lx = frames[f_key].pullback(x)
ldx = frames[f_key].pullback_tangent(dx)
local_policy = compute_policy(self._phi0[f_key], self._d0[f_key], lx, ldx, self.model.get_local_gmm(f_key))
policies[i] = frames[f_key].transform_tangent(local_policy)
return policies.sum(axis=0) # TODO: may need weighted sum based on frames

def train(self, demos, **kwargs):
"""
Expand All @@ -71,22 +56,11 @@ def train(self, demos, **kwargs):
----------
:param demos: list of Demonstration objects
"""
alpha = kwargs.get('alpha', 0.5)
# train TP-HSMM/TP-GMM
self.model.train(demos, **kwargs)

@staticmethod
def compute_obsrv_prob(obsrv, global_mvns, normalized=True):
"""
Parameters
----------
:param model_name: name of model in data/models
"""
num_comp = len(global_mvns)
prob = np.zeros(num_comp)
for k in range(num_comp):
prob[k] = global_mvns[k].pdf(obsrv)
if normalized:
prob /= prob.sum()
return prob
# train dynamics
self._phi0, self._d0 = optimize_dynamics(self.model, demos, alpha)

@staticmethod
def load(task_name, model_name='sample.p'):
Expand Down
65 changes: 44 additions & 21 deletions tprmp/networks/rmp_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@


class LowTri:

def __init__(self, m):

# Calculate lower triangular matrix indices using numpy
Expand Down Expand Up @@ -77,7 +76,6 @@ def forward(self, x):


class LagrangianLayer(nn.Module):

def __init__(self, input_size, output_size, activation="ReLu"):
super(LagrangianLayer, self).__init__()

Expand Down Expand Up @@ -105,18 +103,21 @@ def __init__(self, input_size, output_size, activation="ReLu"):
self.g_prime = LinearDer()

else:
raise ValueError("Activation Type must be in ['Linear', 'ReLu', 'SoftPlus', 'Cos'] but is {0}".format(self.activation))
raise ValueError(
"Activation Type must be in ['Linear', 'ReLu', 'SoftPlus', 'Cos'] but is {0}"
.format(self.activation))

def forward(self, q, der_prev):
# Apply Affine Transformation:
a = F.linear(q, self.weight, self.bias)
out = self.g(a)
der = torch.matmul(self.g_prime(a).view(-1, self.output_size, 1) * self.weight, der_prev)
der = torch.matmul(
self.g_prime(a).view(-1, self.output_size, 1) * self.weight,
der_prev)
return out, der


class DeepRMPNetwork(nn.Module):

def __init__(self, dim_M, **kwargs):
super(DeepRMPNetwork, self).__init__()

Expand Down Expand Up @@ -194,22 +195,26 @@ def init_output(layer):
torch.nn.init.sparse_(layer.weight, p_non_zero, output_std)

else:
raise ValueError("Weight Initialization Type must be in ['xavier_normal', 'orthogonal', 'sparse'] but is {0}".format(self._w_init))
raise ValueError(
"Weight Initialization Type must be in ['xavier_normal', 'orthogonal', 'sparse'] but is {0}"
.format(self._w_init))

# Compute In- / Output Sizes:
self.dim_M = dim_M
self.m = int((self.dim_M ** 2 + self.dim_M) / 2)
self.m = int((self.dim_M**2 + self.dim_M) / 2)

# Compute non-zero elements of L:
l_output_size = int((self.dim_M ** 2 + self.dim_M) / 2)
l_output_size = int((self.dim_M**2 + self.dim_M) / 2)
l_lower_size = l_output_size - self.dim_M

# Calculate the indices of the diagonal elements of L:
idx_diag = np.arange(self.dim_M) + 1
idx_diag = idx_diag * (idx_diag + 1) / 2 - 1

# Calculate the indices of the off-diagonal elements of L:
idx_tril = np.extract([x not in idx_diag for x in np.arange(l_output_size)], np.arange(l_output_size))
idx_tril = np.extract(
[x not in idx_diag for x in np.arange(l_output_size)],
np.arange(l_output_size))

# Indexing for concatenation of l_o and l_d
cat_idx = np.hstack((idx_diag, idx_tril))
Expand All @@ -225,23 +230,32 @@ def init_output(layer):
non_linearity = kwargs.get("activation", "ReLu")

# Create Input Layer:
self.layers.append(LagrangianLayer(self.dim_M, self.n_width, activation=non_linearity))
self.layers.append(
LagrangianLayer(self.dim_M, self.n_width,
activation=non_linearity))
init_hidden(self.layers[-1])

# Create Hidden Layer:
for _ in range(1, self.n_hidden):
self.layers.append(LagrangianLayer(self.n_width, self.n_width, activation=non_linearity))
self.layers.append(
LagrangianLayer(self.n_width,
self.n_width,
activation=non_linearity))
init_hidden(self.layers[-1])

# Create output Layer:
self.net_g = LagrangianLayer(self.n_width, 1, activation="Linear")
init_output(self.net_g)

self.net_lo = LagrangianLayer(self.n_width, l_lower_size, activation="Linear")
self.net_lo = LagrangianLayer(self.n_width,
l_lower_size,
activation="Linear")
init_hidden(self.net_lo)

# The diagonal must be non-negative. Therefore, the non-linearity is set to ReLu.
self.net_ld = LagrangianLayer(self.n_width, self.dim_M, activation="ReLu")
self.net_ld = LagrangianLayer(self.n_width,
self.dim_M,
activation="ReLu")
init_hidden(self.net_ld)
torch.nn.init.constant_(self.net_ld.bias, self._b0_diag)

Expand Down Expand Up @@ -279,27 +293,35 @@ def _dyn_model(self, q, qd):
# Compute M:
L = self.low_tri(l)
LT = L.transpose(dim0=1, dim1=2)
M = torch.matmul(L, LT) + self._epsilon * torch.eye(self.dim_M).type_as(L)
M = torch.matmul(L,
LT) + self._epsilon * torch.eye(self.dim_M).type_as(L)

# Calculate dH/dt
Ldt = self.low_tri(torch.matmul(der_l, qd_3d).view(-1, self.m))
Hdt = torch.matmul(L, Ldt.transpose(dim0=1, dim1=2)) + torch.matmul(Ldt, LT)
Hdt = torch.matmul(L, Ldt.transpose(dim0=1, dim1=2)) + torch.matmul(
Ldt, LT)

# Calculate dH/dq:
Ldq = self.low_tri(der_l.transpose(2, 1).reshape(-1, self.m)).reshape(-1, self.dim_M, self.dim_M, self.dim_M)
Hdq = torch.matmul(Ldq, LT.view(-1, 1, self.dim_M, self.dim_M)) + torch.matmul(L.view(-1, 1, self.dim_M, self.dim_M), Ldq.transpose(2, 3))
Ldq = self.low_tri(der_l.transpose(2, 1).reshape(-1, self.m)).reshape(
-1, self.dim_M, self.dim_M, self.dim_M)
Hdq = torch.matmul(Ldq, LT.view(
-1, 1, self.dim_M, self.dim_M)) + torch.matmul(
L.view(-1, 1, self.dim_M, self.dim_M), Ldq.transpose(2, 3))

# Compute the Coriolis & Centrifugal forces:
Hdt_qd = torch.matmul(Hdt, qd_3d).view(-1, self.dim_M)
quad_dq = torch.matmul(qd_4d.transpose(dim0=2, dim1=3), torch.matmul(Hdq, qd_4d)).view(-1, self.dim_M)
quad_dq = torch.matmul(qd_4d.transpose(dim0=2, dim1=3),
torch.matmul(Hdq, qd_4d)).view(-1, self.dim_M)
c = Hdt_qd - 1. / 2. * quad_dq

# Compute kinetic energy T
H_qd = torch.matmul(M, qd_3d).view(-1, self.dim_M)
T = 1. / 2. * torch.matmul(qd_4d.transpose(dim0=2, dim1=3), H_qd.view(-1, 1, self.dim_M, 1)).view(-1)
T = 1. / 2. * torch.matmul(qd_4d.transpose(dim0=2, dim1=3),
H_qd.view(-1, 1, self.dim_M, 1)).view(-1)

# Compute dV/dt
dVdt = torch.matmul(qd_4d.transpose(dim0=2, dim1=3), g.view(-1, 1, self.dim_M, 1)).view(-1)
dVdt = torch.matmul(qd_4d.transpose(dim0=2, dim1=3),
g.view(-1, 1, self.dim_M, 1)).view(-1)
return M, c, g, T, V, dVdt

def rmp(self, q, qd, beta=0.5): # beta is damping coefficient
Expand All @@ -308,7 +330,8 @@ def rmp(self, q, qd, beta=0.5): # beta is damping coefficient
# Compute Acceleration, e.g., forward model:
invH = torch.inverse(M)
# damping_term = beta * qd.view(-1, self.dim_M, 1)
qdd_pred = (torch.matmul(invH, (- c - g).view(-1, self.dim_M, 1))).view(-1, self.dim_M)
qdd_pred = (torch.matmul(invH, (-c - g).view(-1, self.dim_M,
1))).view(-1, self.dim_M)
return qdd_pred.squeeze()

def energy(self, q, qd):
Expand Down
Loading

0 comments on commit 17fda1d

Please sign in to comment.