Implement dynamics optimizer for RMP

Signed-off-by: An Thai Le <an.thai.le97@gmail.com>
ASUDCT · Aug 12, 2021 · 17fda1d · 17fda1d
1 parent a7045d9
commit 17fda1d
Show file tree

Hide file tree

Showing 4 changed files with 182 additions and 73 deletions.
diff --git a/tprmp/models/tp_gmm.py b/tprmp/models/tp_gmm.py
@@ -68,6 +68,16 @@ def reset_covariance(self, idxs_1, idxs_2):
                 self._mvns[k][f_key].cov[np.ix_(idxs_1, idxs_2)] = 0.0
                 self._mvns[k][f_key].cov[np.ix_(idxs_2, idxs_1)] = 0.0
 
+    def get_local_gmm(self, frame, tag=None):
+        if frame not in self.frame_names:
+            raise ValueError(f'[TPGMM]: Frame {frame} not in model!')
+        comps = range(self.num_comp) if ((self._tag_to_comp_map is None) or
+                                         (tag not in self._tag_to_comp_map)) else self.tag_to_comp_map[tag]
+        local_gmm = []
+        for k in comps:
+            local_gmm.append(self.mvns[k][frame])
+        return local_gmm
+
     def generate_global_gmm(self, frames, tag=None):
         comps = range(self.num_comp) if ((self._tag_to_comp_map is None) or
                                          (tag not in self._tag_to_comp_map)) else self.tag_to_comp_map[tag]
@@ -79,11 +89,10 @@ def generate_global_gmm(self, frames, tag=None):
     def combine_gaussians(self, k, frames):
         if not set(self.frame_names).issubset(set(frames)):
             raise IndexError(f'[TPGMM]: Frames must be subset of {self.frame_names}')
-        man = self.mvns[k][self.frame_names[0]].manifold
         g_list = []
         for f_key in self.frame_names:
             g_list.append(self.mvns[k][f_key].transform(frames[f_key].A, frames[f_key].b))  # Transform Gaussians to global frame
-        return man.gaussian_product(g_list)
+        return self.manifold.gaussian_product(g_list)
 
     def rename_component(self, comp, name):
         self.component_names[comp] = name
@@ -120,6 +129,10 @@ def load(task_name, model_name):
     def mvns(self):
         return self._mvns
 
+    @property
+    def manifold(self):
+        return self._mvns[0][self._frame_names[0]].manifold
+
     @property
     def pi(self):
         return self._pi

diff --git a/tprmp/models/tp_rmp.py b/tprmp/models/tp_rmp.py
@@ -1,11 +1,16 @@
 import os
 import logging
 import numpy as np
+import pickle
+import time
 
 from tprmp.models.tp_hsmm import TPHSMM
+from tprmp.models.rmp import compute_policy
+from tprmp.optimizer.dynamics import optimize_dynamics
 
 
 _path_file = os.path.dirname(os.path.realpath(__file__))
+DATA_PATH = os.path.join(_path_file, '..', '..', 'data', 'tasks')
 
 
 class TPRMP(object):
@@ -21,47 +26,27 @@ def __init__(self, **kwargs):
 
     def save(self, name=None):
         self.model.save(name)
+        file = os.path.join(DATA_PATH, self.model.name, 'models', name if name is not None else ('dynamics_' + str(time.time()) + '.p'))
+        os.makedirs(file, exist_ok=True)
+        with open(file, 'wb') as f:
+            pickle.dump(self.parameters(), f)
 
     def retrieve(self, frames, **kwargs):
         """
         Retrieve global RMP.
         """
 
-    def compute_canonical_rmp(self, x, t, global_mvns, **kwargs):
-        manifold = global_mvns[0].manifold
-        K = kwargs.get('K', 10 * np.eye(manifold.dim_T))
-        hard = kwargs.get('hard', True)
-        weights = self.compute_force_weights(x, t, global_mvns, **kwargs)
-        if hard:
-            return K @ manifold.log_map(x, base=global_mvns[np.argmax(weights)].mean)
-        else:
-            phi = np.zeros((self.model.num_comp, manifold.dim_T))
-            for k in range(self.model.num_comp):
-                phi[k] = K @ manifold.log_map(x, base=global_mvns[k].mean)
-            return weights @ phi
-
-    def compute_policy(self, x, x_dot, global_mvns):
-        phi = self.compute_potentials(x, global_mvns)
-        weights = TPRMP.compute_obsrv_prob(x, global_mvns)
-        Phi = weights @ phi
-        num_comp = len(global_mvns)
-        manifold = global_mvns[0].manifold
-        Fs = np.zeros((num_comp, manifold.dim_T))
-        for k in range(num_comp):
-            Fs[k] = weights[k] * (phi[k] - Phi) * global_mvns[k].cov_inv @ manifold.log_map(x, base=global_mvns[k].mean)
-            Fs[k] += -weights[k] * global_mvns[k].cov_inv @ manifold.log_map(x, base=global_mvns[k].mean)
-            Fs[k] += -weights[k] * self._d0[k] * x_dot
-        return Fs.sum(axis=0)
-
-    def compute_potentials(self, x, global_mvns):
-        num_comp = len(global_mvns)
-        phi = np.zeros(num_comp)
-        manifold = global_mvns[0].manifold
-        for k in range(num_comp):
-            comp = global_mvns[k]
-            v = manifold.log_map(x, base=comp.mean)
-            phi[k] = self._phi0[k] + v.T @ comp.cov_inv @ v
-        return phi
+    def compute_global_policy(self, x, dx, frames):  # TODO: add Riemmanian metric
+        if not set(self.model.frame_names).issubset(set(frames)):
+            raise IndexError(f'[TPRMP]: Frames must be subset of {self.model.frame_names}')
+        policies = np.zeros((len(frames), self.model.manifold.dim_T))
+        for i, f_key in enumerate(self.model.frame_names):
+            # compute local policy
+            lx = frames[f_key].pullback(x)
+            ldx = frames[f_key].pullback_tangent(dx)
+            local_policy = compute_policy(self._phi0[f_key], self._d0[f_key], lx, ldx, self.model.get_local_gmm(f_key))
+            policies[i] = frames[f_key].transform_tangent(local_policy)
+        return policies.sum(axis=0)  # TODO: may need weighted sum based on frames
 
     def train(self, demos, **kwargs):
         """
@@ -71,22 +56,11 @@ def train(self, demos, **kwargs):
         ----------
         :param demos: list of Demonstration objects
         """
+        alpha = kwargs.get('alpha', 0.5)
+        # train TP-HSMM/TP-GMM
         self.model.train(demos, **kwargs)
-
-    @staticmethod
-    def compute_obsrv_prob(obsrv, global_mvns, normalized=True):
-        """
-        Parameters
-        ----------
-        :param model_name: name of model in data/models
-        """
-        num_comp = len(global_mvns)
-        prob = np.zeros(num_comp)
-        for k in range(num_comp):
-            prob[k] = global_mvns[k].pdf(obsrv)
-        if normalized:
-            prob /= prob.sum()
-        return prob
+        # train dynamics
+        self._phi0, self._d0 = optimize_dynamics(self.model, demos, alpha)
 
     @staticmethod
     def load(task_name, model_name='sample.p'):

diff --git a/tprmp/networks/rmp_net.py b/tprmp/networks/rmp_net.py
@@ -5,7 +5,6 @@
 
 
 class LowTri:
-
     def __init__(self, m):
 
         # Calculate lower triangular matrix indices using numpy
@@ -77,7 +76,6 @@ def forward(self, x):
 
 
 class LagrangianLayer(nn.Module):
-
     def __init__(self, input_size, output_size, activation="ReLu"):
         super(LagrangianLayer, self).__init__()
 
@@ -105,18 +103,21 @@ def __init__(self, input_size, output_size, activation="ReLu"):
             self.g_prime = LinearDer()
 
         else:
-            raise ValueError("Activation Type must be in ['Linear', 'ReLu', 'SoftPlus', 'Cos'] but is {0}".format(self.activation))
+            raise ValueError(
+                "Activation Type must be in ['Linear', 'ReLu', 'SoftPlus', 'Cos'] but is {0}"
+                .format(self.activation))
 
     def forward(self, q, der_prev):
         # Apply Affine Transformation:
         a = F.linear(q, self.weight, self.bias)
         out = self.g(a)
-        der = torch.matmul(self.g_prime(a).view(-1, self.output_size, 1) * self.weight, der_prev)
+        der = torch.matmul(
+            self.g_prime(a).view(-1, self.output_size, 1) * self.weight,
+            der_prev)
         return out, der
 
 
 class DeepRMPNetwork(nn.Module):
-
     def __init__(self, dim_M, **kwargs):
         super(DeepRMPNetwork, self).__init__()
 
@@ -194,22 +195,26 @@ def init_output(layer):
                 torch.nn.init.sparse_(layer.weight, p_non_zero, output_std)
 
         else:
-            raise ValueError("Weight Initialization Type must be in ['xavier_normal', 'orthogonal', 'sparse'] but is {0}".format(self._w_init))
+            raise ValueError(
+                "Weight Initialization Type must be in ['xavier_normal', 'orthogonal', 'sparse'] but is {0}"
+                .format(self._w_init))
 
         # Compute In- / Output Sizes:
         self.dim_M = dim_M
-        self.m = int((self.dim_M ** 2 + self.dim_M) / 2)
+        self.m = int((self.dim_M**2 + self.dim_M) / 2)
 
         # Compute non-zero elements of L:
-        l_output_size = int((self.dim_M ** 2 + self.dim_M) / 2)
+        l_output_size = int((self.dim_M**2 + self.dim_M) / 2)
         l_lower_size = l_output_size - self.dim_M
 
         # Calculate the indices of the diagonal elements of L:
         idx_diag = np.arange(self.dim_M) + 1
         idx_diag = idx_diag * (idx_diag + 1) / 2 - 1
 
         # Calculate the indices of the off-diagonal elements of L:
-        idx_tril = np.extract([x not in idx_diag for x in np.arange(l_output_size)], np.arange(l_output_size))
+        idx_tril = np.extract(
+            [x not in idx_diag for x in np.arange(l_output_size)],
+            np.arange(l_output_size))
 
         # Indexing for concatenation of l_o  and l_d
         cat_idx = np.hstack((idx_diag, idx_tril))
@@ -225,23 +230,32 @@ def init_output(layer):
         non_linearity = kwargs.get("activation", "ReLu")
 
         # Create Input Layer:
-        self.layers.append(LagrangianLayer(self.dim_M, self.n_width, activation=non_linearity))
+        self.layers.append(
+            LagrangianLayer(self.dim_M, self.n_width,
+                            activation=non_linearity))
         init_hidden(self.layers[-1])
 
         # Create Hidden Layer:
         for _ in range(1, self.n_hidden):
-            self.layers.append(LagrangianLayer(self.n_width, self.n_width, activation=non_linearity))
+            self.layers.append(
+                LagrangianLayer(self.n_width,
+                                self.n_width,
+                                activation=non_linearity))
             init_hidden(self.layers[-1])
 
         # Create output Layer:
         self.net_g = LagrangianLayer(self.n_width, 1, activation="Linear")
         init_output(self.net_g)
 
-        self.net_lo = LagrangianLayer(self.n_width, l_lower_size, activation="Linear")
+        self.net_lo = LagrangianLayer(self.n_width,
+                                      l_lower_size,
+                                      activation="Linear")
         init_hidden(self.net_lo)
 
         # The diagonal must be non-negative. Therefore, the non-linearity is set to ReLu.
-        self.net_ld = LagrangianLayer(self.n_width, self.dim_M, activation="ReLu")
+        self.net_ld = LagrangianLayer(self.n_width,
+                                      self.dim_M,
+                                      activation="ReLu")
         init_hidden(self.net_ld)
         torch.nn.init.constant_(self.net_ld.bias, self._b0_diag)
 
@@ -279,27 +293,35 @@ def _dyn_model(self, q, qd):
         # Compute M:
         L = self.low_tri(l)
         LT = L.transpose(dim0=1, dim1=2)
-        M = torch.matmul(L, LT) + self._epsilon * torch.eye(self.dim_M).type_as(L)
+        M = torch.matmul(L,
+                         LT) + self._epsilon * torch.eye(self.dim_M).type_as(L)
 
         # Calculate dH/dt
         Ldt = self.low_tri(torch.matmul(der_l, qd_3d).view(-1, self.m))
-        Hdt = torch.matmul(L, Ldt.transpose(dim0=1, dim1=2)) + torch.matmul(Ldt, LT)
+        Hdt = torch.matmul(L, Ldt.transpose(dim0=1, dim1=2)) + torch.matmul(
+            Ldt, LT)
 
         # Calculate dH/dq:
-        Ldq = self.low_tri(der_l.transpose(2, 1).reshape(-1, self.m)).reshape(-1, self.dim_M, self.dim_M, self.dim_M)
-        Hdq = torch.matmul(Ldq, LT.view(-1, 1, self.dim_M, self.dim_M)) + torch.matmul(L.view(-1, 1, self.dim_M, self.dim_M), Ldq.transpose(2, 3))
+        Ldq = self.low_tri(der_l.transpose(2, 1).reshape(-1, self.m)).reshape(
+            -1, self.dim_M, self.dim_M, self.dim_M)
+        Hdq = torch.matmul(Ldq, LT.view(
+            -1, 1, self.dim_M, self.dim_M)) + torch.matmul(
+                L.view(-1, 1, self.dim_M, self.dim_M), Ldq.transpose(2, 3))
 
         # Compute the Coriolis & Centrifugal forces:
         Hdt_qd = torch.matmul(Hdt, qd_3d).view(-1, self.dim_M)
-        quad_dq = torch.matmul(qd_4d.transpose(dim0=2, dim1=3), torch.matmul(Hdq, qd_4d)).view(-1, self.dim_M)
+        quad_dq = torch.matmul(qd_4d.transpose(dim0=2, dim1=3),
+                               torch.matmul(Hdq, qd_4d)).view(-1, self.dim_M)
         c = Hdt_qd - 1. / 2. * quad_dq
 
         # Compute kinetic energy T
         H_qd = torch.matmul(M, qd_3d).view(-1, self.dim_M)
-        T = 1. / 2. * torch.matmul(qd_4d.transpose(dim0=2, dim1=3), H_qd.view(-1, 1, self.dim_M, 1)).view(-1)
+        T = 1. / 2. * torch.matmul(qd_4d.transpose(dim0=2, dim1=3),
+                                   H_qd.view(-1, 1, self.dim_M, 1)).view(-1)
 
         # Compute dV/dt
-        dVdt = torch.matmul(qd_4d.transpose(dim0=2, dim1=3), g.view(-1, 1, self.dim_M, 1)).view(-1)
+        dVdt = torch.matmul(qd_4d.transpose(dim0=2, dim1=3),
+                            g.view(-1, 1, self.dim_M, 1)).view(-1)
         return M, c, g, T, V, dVdt
 
     def rmp(self, q, qd, beta=0.5):  # beta is damping coefficient
@@ -308,7 +330,8 @@ def rmp(self, q, qd, beta=0.5):  # beta is damping coefficient
         # Compute Acceleration, e.g., forward model:
         invH = torch.inverse(M)
         # damping_term = beta * qd.view(-1, self.dim_M, 1)
-        qdd_pred = (torch.matmul(invH, (- c - g).view(-1, self.dim_M, 1))).view(-1, self.dim_M)
+        qdd_pred = (torch.matmul(invH, (-c - g).view(-1, self.dim_M,
+                                                     1))).view(-1, self.dim_M)
         return qdd_pred.squeeze()
 
     def energy(self, q, qd):