Early test of TPRMP, still need tuning

Signed-off-by: An Thai Le <an.thai.le97@gmail.com>
ASUDCT · Aug 19, 2021 · 204ff3f · 204ff3f
1 parent 8183d19
commit 204ff3f
Show file tree

Hide file tree

Showing 8 changed files with 213 additions and 46 deletions.
diff --git a/scripts/test_tprmp.py b/scripts/test_tprmp.py
@@ -0,0 +1,70 @@
+import sys
+import argparse
+import numpy as np
+import time
+import pybullet as p
+from os.path import join, dirname, abspath
+import logging
+logging.basicConfig()
+logging.getLogger().setLevel(logging.INFO)
+
+ROOT_DIR = join(dirname(abspath(__file__)), '..')
+sys.path.append(ROOT_DIR)
+from tprmp.utils.loading import load_demos  # noqa
+from tprmp.visualization.demonstration import plot_demo  # noqa
+from tprmp.models.tp_rmp import TPRMP
+from tprmp.demonstrations.base import Demonstration
+from tprmp.demonstrations.frame import Frame
+from tprmp.demonstrations.quaternion import q_to_euler, q_convert_wxyz, q_from_euler, q_convert_xyzw  # noqa
+from tprmp.envs.gym import Environment # noqa
+from tprmp.envs.tasks import PalletizingBoxes # noqa
+
+parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+                                 description='Example run: python test_tprmp.py test.p')
+parser.add_argument('task', help='The task folder', type=str, default='pick')
+parser.add_argument('data', help='The data file', type=str, default='data.p')
+args = parser.parse_args()
+
+DATA_DIR = join(ROOT_DIR, 'data', 'tasks', args.task, 'demos')
+data_file = join(DATA_DIR, args.data)
+# parameters
+T = 300
+dt = 0.001
+NUM_COMP = 30
+alpha, beta = 1e-2, 0.
+min_d = 0.
+energy = 0.
+sigma = 0.5
+var_scale = 2.
+# load data
+demos = load_demos(data_file, tag='pick_side')
+demos = [demos[-1]]
+manifold = demos[0].manifold
+# plot_demo(demos, only_global=False, plot_quat=False, new_fig=True, new_ax=True, show=True)
+# train tprmp
+model = TPRMP(num_comp=NUM_COMP, name=args.task, sigma=sigma)
+model.train(demos, alpha=alpha, beta=beta, min_d=min_d, energy=energy, var_scale=var_scale)
+# model.model.plot_model(demos)
+# test tprmp
+env = Environment(task=PalletizingBoxes(), disp=True)
+ee_pose = np.array(env.home_pose)
+ee_pose[3:] = q_convert_wxyz(ee_pose[3:])
+A, b = Demonstration.construct_linear_map(manifold, ee_pose)
+ee_frame = Frame(A, b, manifold=manifold)
+box_id = env.task.goals[0][0][0]
+position = np.array([0.5, -0.25, env.task.box_size[0] / 2])
+rotation = q_convert_xyzw(q_from_euler(np.array([np.pi/2, 0., 0.])))
+p.resetBasePositionAndOrientation(box_id, position, rotation)
+target = p.getBasePositionAndOrientation(box_id)
+obj_pose = np.append(position, q_convert_wxyz(rotation))
+A, b = Demonstration.construct_linear_map(manifold, obj_pose)
+obj_frame = Frame(A, b, manifold=manifold)
+frames = {'ee_frame': ee_frame, 'obj_frame': obj_frame}
+model.generate_global_gmm(frames)
+for t in np.linspace(0, T, int(T / dt) + 1):
+    x = np.append(env.ee_pose[:3], q_convert_wxyz(env.ee_pose[3:]))
+    dx = env.ee_vel
+    ddx = model.retrieve(x, dx, frames)
+    env.step(ddx)
+    # input()
+    time.sleep(dt)
diff --git a/tprmp/demonstrations/manifold.py b/tprmp/demonstrations/manifold.py
@@ -210,6 +210,36 @@ def gaussian_product(self, g_list, accuracy=1e-5, max_iter=100):
             Manifold.logger.warn(f'Iterative likelihood maximization for mean terminated after reaching {max_iter} iterations')
         return ManifoldGaussian(self, mu, np.linalg.inv(sigma_inv))
 
+    def get_origin(self):
+        spaces = self.name.split(" x ")
+        origin = []
+        for man_name in spaces:
+            if man_name == "S^3":
+                origin.extend([1., 0., 0., 0.])
+            elif man_name[:2] == "R^":
+                n = int(man_name[2:])
+                origin.extend([0.] * n)
+            else:
+                Manifold.logger.error(f'Invalid manifold naming {man_name}.')
+        return np.array(origin)
+
+    def get_pos_quat_indices(self, tangent=False):
+        spaces = self.name.split(" x ")
+        pos_idx, quat_idx = [], []
+        i = 0
+        for man_name in spaces:
+            if man_name == "S^3":
+                r = 3 if tangent else 4
+                quat_idx.extend(range(i, i + r))
+                i += r
+            elif man_name[:2] == "R^":
+                n = int(man_name[2:])
+                pos_idx.extend(range(i, i + n))
+                i += n
+            else:
+                Manifold.logger.error(f'Invalid manifold naming {man_name}.')
+        return np.array(pos_idx), np.array(quat_idx)      
+
     @staticmethod
     def get_quaternion_manifold():
         return Manifold(4, 3, q_log_map, q_exp_map, q_parallel_transport, "S^3")

diff --git a/tprmp/demonstrations/probability.py b/tprmp/demonstrations/probability.py
@@ -60,6 +60,10 @@ def kl_divergence_mvn(self, g):
         term3 = g.cov_inv.dot(v).dot(v)
         return (term1 + term2 + term3) / 2
 
+    def recompute_inv(self):
+        self._cov_inv = np.linalg.inv(self.cov)
+        self._nf = 1 / (np.sqrt((2 * np.pi)**self.manifold.dim_T * np.linalg.det(self.cov)))
+
     @property
     def manifold(self):
         return self._manifold
@@ -75,8 +79,7 @@ def nf(self):
     @cov.setter
     def cov(self, value):
         self._cov = value
-        self._cov_inv = np.linalg.inv(value)
-        self._nf = 1 / (np.sqrt((2 * np.pi)**self.manifold.dim_T * np.linalg.det(value)))
+        self.recompute_inv()
 
     @property
     def cov_inv(self):

diff --git a/tprmp/models/rmp.py b/tprmp/models/rmp.py
@@ -1,15 +1,15 @@
 import numpy as np
-
+import cvxpy as cp
 
 def compute_riemannian_metric(x, mvns):
     weights = compute_obsrv_prob(x, mvns)
     Ms = np.array([comp.cov_inv for comp in mvns])
     return Ms.T @ weights 
 
 
-def compute_policy(phi0, d0, x, dx, mvns):
+def compute_policy(phi0, d0, x, dx, mvns, use_cp=False):
     weights = compute_obsrv_prob(x, mvns)
-    return compute_potential_term(weights, phi0, x, mvns) + compute_dissipation_term(weights, d0, dx, mvns)
+    return compute_potential_term(weights, phi0, x, mvns) + compute_dissipation_term(weights, d0, dx, mvns, use_cp=use_cp)
 
 
 def compute_potential_term(weights, phi0, x, mvns):
@@ -24,11 +24,18 @@ def compute_potential_term(weights, phi0, x, mvns):
     return Ps
 
 
-def compute_dissipation_term(weights, d0, dx, mvns):
+def compute_dissipation_term(weights, d0, dx, mvns, use_cp=False):
     manifold = mvns[0].manifold
     Ds = np.zeros(manifold.dim_T) if len(dx.shape) == 1 else np.zeros((manifold.dim_T, dx.shape[1]))
     for k in range(len(mvns)):
-        Ds += -weights[k] * d0[k] * dx
+        if use_cp:
+            Ds += -weights[k] * cp.multiply(d0[k], dx)
+        else:
+            if len(dx.shape) > 1:
+                d = np.expand_dims(d0[k], axis=-1)
+            else:
+                d = d0[k]
+            Ds += -weights[k] * d * dx
     return Ds
 
 
@@ -44,15 +51,17 @@ def compute_potentials(phi0, x, mvns):
     return phi
 
 
-def compute_obsrv_prob(x, mvns, normalized=True):
+def compute_obsrv_prob(x, mvns, normalized=True, eps=1e-30):
     num_comp = len(mvns)
     prob = np.zeros(num_comp) if len(x.shape) == 1 else np.zeros((num_comp, x.shape[1]))
     for k in range(num_comp):
         prob[k] = mvns[k].pdf(x)
-    if (prob.sum() == 0.):
-        print(x)
     if normalized:
-        prob /= prob.sum()
+        s = prob.sum()
+        if s < eps:  # collapse into uniform distribution
+            prob = np.ones(num_comp) / num_comp
+        else:
+            prob /= prob.sum()
     return prob
 
 
@@ -69,7 +78,7 @@ def compute_obsrv_prob(x, mvns, normalized=True):
     T = 300
     dt = 0.05
     phi0 = [5., 1., 0.]
-    d0 = .2 * np.ones(3)
+    d0 = .2 * np.ones((3, 2))
     # test semantics
     x, dx = np.zeros((2, T)), np.zeros((2, T))
     print(compute_policy(phi0, d0, x, dx, mvns).shape)

diff --git a/tprmp/models/tp_gmm.py b/tprmp/models/tp_gmm.py
@@ -67,6 +67,12 @@ def reset_covariance(self, idxs_1, idxs_2):
             for f_key in self.frame_names:
                 self._mvns[k][f_key].cov[np.ix_(idxs_1, idxs_2)] = 0.0
                 self._mvns[k][f_key].cov[np.ix_(idxs_2, idxs_1)] = 0.0
+                self._mvns[k][f_key].recompute_inv()
+
+    def scale_covariance(self, scale):
+        for k in range(self.num_comp):
+            for f_key in self.frame_names:
+                self._mvns[k][f_key].cov = self._mvns[k][f_key].cov * scale**2
 
     def get_local_gmm(self, frame, tag=None):
         if frame not in self.frame_names:

diff --git a/tprmp/models/tp_rmp.py b/tprmp/models/tp_rmp.py
@@ -1,13 +1,15 @@
 import os
+from os.path import join, exists
 import logging
 import numpy as np
 import pickle
 import time
 
 from tprmp.models.tp_hsmm import TPHSMM
-from tprmp.models.rmp import compute_policy
+from tprmp.models.rmp import compute_policy, compute_riemannian_metric
+from tprmp.models.coriolis import compute_coriolis_force
 from tprmp.optimizer.dynamics import optimize_dynamics
-from tprmp.optimizer.riemannian import optimize_riemannian_metric
+from tprmp.utils.loading import load
 
 
 _path_file = os.path.dirname(os.path.realpath(__file__))
@@ -21,34 +23,63 @@ class TPRMP(object):
     logger = logging.getLogger(__name__)
 
     def __init__(self, **kwargs):
+        self._sigma = kwargs.pop('sigma', 1.)
         self._model = TPHSMM(**kwargs)
+        self._global_mvns = None
         self._phi0 = None
         self._d0 = None
         self._R_net = None
 
     def save(self, name=None):
         self.model.save(name)
-        file = os.path.join(DATA_PATH, self.model.name, 'models', name if name is not None else ('dynamics_' + str(time.time()) + '.p'))
+        file = join(DATA_PATH, self.model.name, 'models', name if name is not None else ('dynamics_' + str(time.time()) + '.p'))
         os.makedirs(file, exist_ok=True)
         with open(file, 'wb') as f:
-            pickle.dump(self.parameters(), f)
+            pickle.dump({'phi0': self._phi0, 'd0': self._d0}, f)
+
+    def generate_global_gmm(self, frames):
+        self._global_mvns = self.model.generate_global_gmm(frames)
 
-    def retrieve(self, frames, **kwargs):
+    def retrieve(self, x, dx, frames, compute_global_mvns=False):
         """
         Retrieve global RMP.
         """
+        if compute_global_mvns or self._global_mvns is None:
+            self.generate_global_gmm(frames)
+        f = self.compute_global_policy(x, dx, frames) - compute_coriolis_force(x, dx, self._global_mvns)
+        M = compute_riemannian_metric(x, self._global_mvns)
+        return np.linalg.inv(M) @ f
 
-    def compute_global_policy(self, x, dx, frames):  # TODO: add Riemmanian metric
+    def compute_global_policy(self, x, dx, frames):
         if not set(self.model.frame_names).issubset(set(frames)):
             raise IndexError(f'[TPRMP]: Frames must be subset of {self.model.frame_names}')
         policies = np.zeros((len(frames), self.model.manifold.dim_T))
+        weights = self.compute_frame_weights(x, frames)
         for i, f_key in enumerate(self.model.frame_names):
             # compute local policy
             lx = frames[f_key].pullback(x)
             ldx = frames[f_key].pullback_tangent(dx)
-            local_policy = compute_policy(self._phi0[f_key], self._d0[f_key], lx, ldx, self.model.get_local_gmm(f_key))
-            policies[i] = frames[f_key].transform_tangent(local_policy)
-        return policies.sum(axis=0)  # TODO: may need weighted sum based on frames
+            local_policy = compute_policy(self._phi0[f_key], 150 * self._d0[f_key], lx, ldx, self.model.get_local_gmm(f_key))
+            policies[i] = weights[f_key] * frames[f_key].transform_tangent(local_policy)
+        return policies.sum(0)
+
+    def compute_frame_weights(self, x, frames, normalized=True, eps=1e-30):
+        origin = self.model.manifold.get_origin()
+        frame_origins = {k: v.transform(origin) for k, v in frames.items()}
+        weights = {}
+        s = 0.
+        for f, o in frame_origins.items():
+            v = self.model.manifold.log_map(x, base=o)
+            w = np.exp(-v.T @ v / (2 * self._sigma ** 2))
+            weights[f] = w
+            s += w
+        if normalized:
+            for f in weights:
+                if s < eps:  # collapse to equal distribution
+                    weights[f] = 1. / len(weights)
+                else:
+                    weights[f] /= s
+        return weights
 
     def train(self, demos, **kwargs):
         """
@@ -58,13 +89,22 @@ def train(self, demos, **kwargs):
         ----------
         :param demos: list of Demonstration objects
         """
-        alpha = kwargs.get('alpha', 0.5)
+        alpha = kwargs.get('alpha', 1e-5)
+        beta = kwargs.get('beta', 1e-5)
+        min_d = kwargs.get('min_d', 20.)
+        energy = kwargs.get('energy', 0.)
+        var_scale = kwargs.get('var_scale', 1.)
         # train TP-HSMM/TP-GMM
         self.model.train(demos, **kwargs)
+        if 'S' in self.model.manifold.name:  # decouple orientation and position
+            pos_idx, quat_idx = self.model.manifold.get_pos_quat_indices(tangent=True)
+            self.model.reset_covariance(pos_idx, quat_idx)
+        if var_scale > 1.:
+            self.model.scale_covariance(var_scale)
         # train dynamics
-        self._phi0, self._d0 = optimize_dynamics(self.model, demos, alpha)
-        # train local Riemannian metrics
-        self._R_net = optimize_riemannian_metric(self, demos, **kwargs)
+        self._phi0, self._d0 = optimize_dynamics(self.model, demos, alpha, beta, min_d, energy)
+        # train local Riemannian metrics TODO: RiemannianNetwork is still under consideration
+        # self._R_net = optimize_riemannian_metric(self, demos, **kwargs)
 
     @staticmethod
     def load(task_name, model_name='sample.p'):
@@ -74,7 +114,12 @@ def load(task_name, model_name='sample.p'):
         :param model_name: name of model in data/models
         """
         tprmp = TPRMP()
-        tprmp._model = TPHSMM.load(task_name, model_name)
+        tprmp._model = TPHSMM.load(task_name, 'stats_' + model_name)
+        file = join(DATA_PATH, task_name, 'models', 'dynamics_' + model_name)
+        if not exists(file):
+            raise ValueError(f'[TPHSMM]: File {file} does not exist!')
+        dynamics = load(file)
+        tprmp._phi0, tprmp._d0 = dynamics['phi0'], dynamics['d0']
         return tprmp
 
     @property

diff --git a/tprmp/optimizer/dynamics.py b/tprmp/optimizer/dynamics.py
@@ -6,21 +6,22 @@
 logger = logging.getLogger(__name__)
 
 
-def optimize_dynamics(tp_hsmm, demos, alpha=0.01, beta=0.5):
+def optimize_dynamics(tp_hsmm, demos, alpha=1e-5, beta=1e-5, min_d=20., energy=10.):
     frames = demos[0].frame_names
+    gap = energy / tp_hsmm.num_comp
     phi0_frames = {}
     d0_frames = {}
     for frame in frames:
         phi0 = cp.Variable(tp_hsmm.num_comp)
-        d0 = cp.Variable(tp_hsmm.num_comp)
+        d0 = cp.Variable((tp_hsmm.num_comp, tp_hsmm.manifold.dim_T))
         loss = 0.
         for demo in demos:
             trajs = demo.traj_in_frames[frame]
             x, dx, ddx = trajs['traj'], trajs['d_traj'], trajs['dd_traj']
             for t in range(x.shape[1]):
-                loss += cp.sum_squares(ddx[:, t] - compute_policy(phi0, d0, x[:, t], dx[:, t], tp_hsmm.get_local_gmm(frame))) / x.shape[1]
+                loss += cp.sum_squares(ddx[:, t] - compute_policy(phi0, d0, x[:, t], dx[:, t], tp_hsmm.get_local_gmm(frame), use_cp=True)) / x.shape[1]
         objective = cp.Minimize(loss / len(demos) + alpha * cp.pnorm(phi0, p=2)**2 + beta * cp.pnorm(d0, p=2)**2)  # L2 regularization
-        problem = cp.Problem(objective, field_constraints(phi0, d0))
+        problem = cp.Problem(objective, field_constraints(phi0, d0, gap, min_d))
         problem.solve()
         logger.info(f'Opimizing dynamics for frame {frame}...')
         logger.info(f'Status: {problem.status}')
@@ -31,11 +32,11 @@ def optimize_dynamics(tp_hsmm, demos, alpha=0.01, beta=0.5):
     return phi0_frames, d0_frames
 
 
-def field_constraints(phi0, d0, eps=1e-2):
+def field_constraints(phi0, d0, gap=0., min_d=1.):
     constraints = []
     for k in range(phi0.size - 1):
-        constraints.append(phi0[k] >= phi0[k + 1])
-    constraints.extend([phi0 >= 0, d0 >= eps])
+        constraints.append(phi0[k] >= phi0[k + 1] + gap)
+    constraints.extend([phi0 >= 0, d0 >= min_d])
     return constraints