Skip to content

Commit

Permalink
Early test of TPRMP, still need tuning
Browse files Browse the repository at this point in the history
Signed-off-by: An Thai Le <an.thai.le97@gmail.com>
  • Loading branch information
anindex committed Aug 19, 2021
1 parent 8183d19 commit 204ff3f
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 46 deletions.
70 changes: 70 additions & 0 deletions scripts/test_tprmp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import sys
import argparse
import numpy as np
import time
import pybullet as p
from os.path import join, dirname, abspath
import logging
logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

ROOT_DIR = join(dirname(abspath(__file__)), '..')
sys.path.append(ROOT_DIR)
from tprmp.utils.loading import load_demos # noqa
from tprmp.visualization.demonstration import plot_demo # noqa
from tprmp.models.tp_rmp import TPRMP
from tprmp.demonstrations.base import Demonstration
from tprmp.demonstrations.frame import Frame
from tprmp.demonstrations.quaternion import q_to_euler, q_convert_wxyz, q_from_euler, q_convert_xyzw # noqa
from tprmp.envs.gym import Environment # noqa
from tprmp.envs.tasks import PalletizingBoxes # noqa

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='Example run: python test_tprmp.py test.p')
parser.add_argument('task', help='The task folder', type=str, default='pick')
parser.add_argument('data', help='The data file', type=str, default='data.p')
args = parser.parse_args()

DATA_DIR = join(ROOT_DIR, 'data', 'tasks', args.task, 'demos')
data_file = join(DATA_DIR, args.data)
# parameters
T = 300
dt = 0.001
NUM_COMP = 30
alpha, beta = 1e-2, 0.
min_d = 0.
energy = 0.
sigma = 0.5
var_scale = 2.
# load data
demos = load_demos(data_file, tag='pick_side')
demos = [demos[-1]]
manifold = demos[0].manifold
# plot_demo(demos, only_global=False, plot_quat=False, new_fig=True, new_ax=True, show=True)
# train tprmp
model = TPRMP(num_comp=NUM_COMP, name=args.task, sigma=sigma)
model.train(demos, alpha=alpha, beta=beta, min_d=min_d, energy=energy, var_scale=var_scale)
# model.model.plot_model(demos)
# test tprmp
env = Environment(task=PalletizingBoxes(), disp=True)
ee_pose = np.array(env.home_pose)
ee_pose[3:] = q_convert_wxyz(ee_pose[3:])
A, b = Demonstration.construct_linear_map(manifold, ee_pose)
ee_frame = Frame(A, b, manifold=manifold)
box_id = env.task.goals[0][0][0]
position = np.array([0.5, -0.25, env.task.box_size[0] / 2])
rotation = q_convert_xyzw(q_from_euler(np.array([np.pi/2, 0., 0.])))
p.resetBasePositionAndOrientation(box_id, position, rotation)
target = p.getBasePositionAndOrientation(box_id)
obj_pose = np.append(position, q_convert_wxyz(rotation))
A, b = Demonstration.construct_linear_map(manifold, obj_pose)
obj_frame = Frame(A, b, manifold=manifold)
frames = {'ee_frame': ee_frame, 'obj_frame': obj_frame}
model.generate_global_gmm(frames)
for t in np.linspace(0, T, int(T / dt) + 1):
x = np.append(env.ee_pose[:3], q_convert_wxyz(env.ee_pose[3:]))
dx = env.ee_vel
ddx = model.retrieve(x, dx, frames)
env.step(ddx)
# input()
time.sleep(dt)
30 changes: 30 additions & 0 deletions tprmp/demonstrations/manifold.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,36 @@ def gaussian_product(self, g_list, accuracy=1e-5, max_iter=100):
Manifold.logger.warn(f'Iterative likelihood maximization for mean terminated after reaching {max_iter} iterations')
return ManifoldGaussian(self, mu, np.linalg.inv(sigma_inv))

def get_origin(self):
spaces = self.name.split(" x ")
origin = []
for man_name in spaces:
if man_name == "S^3":
origin.extend([1., 0., 0., 0.])
elif man_name[:2] == "R^":
n = int(man_name[2:])
origin.extend([0.] * n)
else:
Manifold.logger.error(f'Invalid manifold naming {man_name}.')
return np.array(origin)

def get_pos_quat_indices(self, tangent=False):
spaces = self.name.split(" x ")
pos_idx, quat_idx = [], []
i = 0
for man_name in spaces:
if man_name == "S^3":
r = 3 if tangent else 4
quat_idx.extend(range(i, i + r))
i += r
elif man_name[:2] == "R^":
n = int(man_name[2:])
pos_idx.extend(range(i, i + n))
i += n
else:
Manifold.logger.error(f'Invalid manifold naming {man_name}.')
return np.array(pos_idx), np.array(quat_idx)

@staticmethod
def get_quaternion_manifold():
return Manifold(4, 3, q_log_map, q_exp_map, q_parallel_transport, "S^3")
Expand Down
7 changes: 5 additions & 2 deletions tprmp/demonstrations/probability.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ def kl_divergence_mvn(self, g):
term3 = g.cov_inv.dot(v).dot(v)
return (term1 + term2 + term3) / 2

def recompute_inv(self):
self._cov_inv = np.linalg.inv(self.cov)
self._nf = 1 / (np.sqrt((2 * np.pi)**self.manifold.dim_T * np.linalg.det(self.cov)))

@property
def manifold(self):
return self._manifold
Expand All @@ -75,8 +79,7 @@ def nf(self):
@cov.setter
def cov(self, value):
self._cov = value
self._cov_inv = np.linalg.inv(value)
self._nf = 1 / (np.sqrt((2 * np.pi)**self.manifold.dim_T * np.linalg.det(value)))
self.recompute_inv()

@property
def cov_inv(self):
Expand Down
29 changes: 19 additions & 10 deletions tprmp/models/rmp.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import numpy as np

import cvxpy as cp

def compute_riemannian_metric(x, mvns):
weights = compute_obsrv_prob(x, mvns)
Ms = np.array([comp.cov_inv for comp in mvns])
return Ms.T @ weights


def compute_policy(phi0, d0, x, dx, mvns):
def compute_policy(phi0, d0, x, dx, mvns, use_cp=False):
weights = compute_obsrv_prob(x, mvns)
return compute_potential_term(weights, phi0, x, mvns) + compute_dissipation_term(weights, d0, dx, mvns)
return compute_potential_term(weights, phi0, x, mvns) + compute_dissipation_term(weights, d0, dx, mvns, use_cp=use_cp)


def compute_potential_term(weights, phi0, x, mvns):
Expand All @@ -24,11 +24,18 @@ def compute_potential_term(weights, phi0, x, mvns):
return Ps


def compute_dissipation_term(weights, d0, dx, mvns):
def compute_dissipation_term(weights, d0, dx, mvns, use_cp=False):
manifold = mvns[0].manifold
Ds = np.zeros(manifold.dim_T) if len(dx.shape) == 1 else np.zeros((manifold.dim_T, dx.shape[1]))
for k in range(len(mvns)):
Ds += -weights[k] * d0[k] * dx
if use_cp:
Ds += -weights[k] * cp.multiply(d0[k], dx)
else:
if len(dx.shape) > 1:
d = np.expand_dims(d0[k], axis=-1)
else:
d = d0[k]
Ds += -weights[k] * d * dx
return Ds


Expand All @@ -44,15 +51,17 @@ def compute_potentials(phi0, x, mvns):
return phi


def compute_obsrv_prob(x, mvns, normalized=True):
def compute_obsrv_prob(x, mvns, normalized=True, eps=1e-30):
num_comp = len(mvns)
prob = np.zeros(num_comp) if len(x.shape) == 1 else np.zeros((num_comp, x.shape[1]))
for k in range(num_comp):
prob[k] = mvns[k].pdf(x)
if (prob.sum() == 0.):
print(x)
if normalized:
prob /= prob.sum()
s = prob.sum()
if s < eps: # collapse into uniform distribution
prob = np.ones(num_comp) / num_comp
else:
prob /= prob.sum()
return prob


Expand All @@ -69,7 +78,7 @@ def compute_obsrv_prob(x, mvns, normalized=True):
T = 300
dt = 0.05
phi0 = [5., 1., 0.]
d0 = .2 * np.ones(3)
d0 = .2 * np.ones((3, 2))
# test semantics
x, dx = np.zeros((2, T)), np.zeros((2, T))
print(compute_policy(phi0, d0, x, dx, mvns).shape)
Expand Down
6 changes: 6 additions & 0 deletions tprmp/models/tp_gmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ def reset_covariance(self, idxs_1, idxs_2):
for f_key in self.frame_names:
self._mvns[k][f_key].cov[np.ix_(idxs_1, idxs_2)] = 0.0
self._mvns[k][f_key].cov[np.ix_(idxs_2, idxs_1)] = 0.0
self._mvns[k][f_key].recompute_inv()

def scale_covariance(self, scale):
for k in range(self.num_comp):
for f_key in self.frame_names:
self._mvns[k][f_key].cov = self._mvns[k][f_key].cov * scale**2

def get_local_gmm(self, frame, tag=None):
if frame not in self.frame_names:
Expand Down
73 changes: 59 additions & 14 deletions tprmp/models/tp_rmp.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import os
from os.path import join, exists
import logging
import numpy as np
import pickle
import time

from tprmp.models.tp_hsmm import TPHSMM
from tprmp.models.rmp import compute_policy
from tprmp.models.rmp import compute_policy, compute_riemannian_metric
from tprmp.models.coriolis import compute_coriolis_force
from tprmp.optimizer.dynamics import optimize_dynamics
from tprmp.optimizer.riemannian import optimize_riemannian_metric
from tprmp.utils.loading import load


_path_file = os.path.dirname(os.path.realpath(__file__))
Expand All @@ -21,34 +23,63 @@ class TPRMP(object):
logger = logging.getLogger(__name__)

def __init__(self, **kwargs):
self._sigma = kwargs.pop('sigma', 1.)
self._model = TPHSMM(**kwargs)
self._global_mvns = None
self._phi0 = None
self._d0 = None
self._R_net = None

def save(self, name=None):
self.model.save(name)
file = os.path.join(DATA_PATH, self.model.name, 'models', name if name is not None else ('dynamics_' + str(time.time()) + '.p'))
file = join(DATA_PATH, self.model.name, 'models', name if name is not None else ('dynamics_' + str(time.time()) + '.p'))
os.makedirs(file, exist_ok=True)
with open(file, 'wb') as f:
pickle.dump(self.parameters(), f)
pickle.dump({'phi0': self._phi0, 'd0': self._d0}, f)

def generate_global_gmm(self, frames):
self._global_mvns = self.model.generate_global_gmm(frames)

def retrieve(self, frames, **kwargs):
def retrieve(self, x, dx, frames, compute_global_mvns=False):
"""
Retrieve global RMP.
"""
if compute_global_mvns or self._global_mvns is None:
self.generate_global_gmm(frames)
f = self.compute_global_policy(x, dx, frames) - compute_coriolis_force(x, dx, self._global_mvns)
M = compute_riemannian_metric(x, self._global_mvns)
return np.linalg.inv(M) @ f

def compute_global_policy(self, x, dx, frames): # TODO: add Riemmanian metric
def compute_global_policy(self, x, dx, frames):
if not set(self.model.frame_names).issubset(set(frames)):
raise IndexError(f'[TPRMP]: Frames must be subset of {self.model.frame_names}')
policies = np.zeros((len(frames), self.model.manifold.dim_T))
weights = self.compute_frame_weights(x, frames)
for i, f_key in enumerate(self.model.frame_names):
# compute local policy
lx = frames[f_key].pullback(x)
ldx = frames[f_key].pullback_tangent(dx)
local_policy = compute_policy(self._phi0[f_key], self._d0[f_key], lx, ldx, self.model.get_local_gmm(f_key))
policies[i] = frames[f_key].transform_tangent(local_policy)
return policies.sum(axis=0) # TODO: may need weighted sum based on frames
local_policy = compute_policy(self._phi0[f_key], 150 * self._d0[f_key], lx, ldx, self.model.get_local_gmm(f_key))
policies[i] = weights[f_key] * frames[f_key].transform_tangent(local_policy)
return policies.sum(0)

def compute_frame_weights(self, x, frames, normalized=True, eps=1e-30):
origin = self.model.manifold.get_origin()
frame_origins = {k: v.transform(origin) for k, v in frames.items()}
weights = {}
s = 0.
for f, o in frame_origins.items():
v = self.model.manifold.log_map(x, base=o)
w = np.exp(-v.T @ v / (2 * self._sigma ** 2))
weights[f] = w
s += w
if normalized:
for f in weights:
if s < eps: # collapse to equal distribution
weights[f] = 1. / len(weights)
else:
weights[f] /= s
return weights

def train(self, demos, **kwargs):
"""
Expand All @@ -58,13 +89,22 @@ def train(self, demos, **kwargs):
----------
:param demos: list of Demonstration objects
"""
alpha = kwargs.get('alpha', 0.5)
alpha = kwargs.get('alpha', 1e-5)
beta = kwargs.get('beta', 1e-5)
min_d = kwargs.get('min_d', 20.)
energy = kwargs.get('energy', 0.)
var_scale = kwargs.get('var_scale', 1.)
# train TP-HSMM/TP-GMM
self.model.train(demos, **kwargs)
if 'S' in self.model.manifold.name: # decouple orientation and position
pos_idx, quat_idx = self.model.manifold.get_pos_quat_indices(tangent=True)
self.model.reset_covariance(pos_idx, quat_idx)
if var_scale > 1.:
self.model.scale_covariance(var_scale)
# train dynamics
self._phi0, self._d0 = optimize_dynamics(self.model, demos, alpha)
# train local Riemannian metrics
self._R_net = optimize_riemannian_metric(self, demos, **kwargs)
self._phi0, self._d0 = optimize_dynamics(self.model, demos, alpha, beta, min_d, energy)
# train local Riemannian metrics TODO: RiemannianNetwork is still under consideration
# self._R_net = optimize_riemannian_metric(self, demos, **kwargs)

@staticmethod
def load(task_name, model_name='sample.p'):
Expand All @@ -74,7 +114,12 @@ def load(task_name, model_name='sample.p'):
:param model_name: name of model in data/models
"""
tprmp = TPRMP()
tprmp._model = TPHSMM.load(task_name, model_name)
tprmp._model = TPHSMM.load(task_name, 'stats_' + model_name)
file = join(DATA_PATH, task_name, 'models', 'dynamics_' + model_name)
if not exists(file):
raise ValueError(f'[TPHSMM]: File {file} does not exist!')
dynamics = load(file)
tprmp._phi0, tprmp._d0 = dynamics['phi0'], dynamics['d0']
return tprmp

@property
Expand Down
15 changes: 8 additions & 7 deletions tprmp/optimizer/dynamics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,22 @@
logger = logging.getLogger(__name__)


def optimize_dynamics(tp_hsmm, demos, alpha=0.01, beta=0.5):
def optimize_dynamics(tp_hsmm, demos, alpha=1e-5, beta=1e-5, min_d=20., energy=10.):
frames = demos[0].frame_names
gap = energy / tp_hsmm.num_comp
phi0_frames = {}
d0_frames = {}
for frame in frames:
phi0 = cp.Variable(tp_hsmm.num_comp)
d0 = cp.Variable(tp_hsmm.num_comp)
d0 = cp.Variable((tp_hsmm.num_comp, tp_hsmm.manifold.dim_T))
loss = 0.
for demo in demos:
trajs = demo.traj_in_frames[frame]
x, dx, ddx = trajs['traj'], trajs['d_traj'], trajs['dd_traj']
for t in range(x.shape[1]):
loss += cp.sum_squares(ddx[:, t] - compute_policy(phi0, d0, x[:, t], dx[:, t], tp_hsmm.get_local_gmm(frame))) / x.shape[1]
loss += cp.sum_squares(ddx[:, t] - compute_policy(phi0, d0, x[:, t], dx[:, t], tp_hsmm.get_local_gmm(frame), use_cp=True)) / x.shape[1]
objective = cp.Minimize(loss / len(demos) + alpha * cp.pnorm(phi0, p=2)**2 + beta * cp.pnorm(d0, p=2)**2) # L2 regularization
problem = cp.Problem(objective, field_constraints(phi0, d0))
problem = cp.Problem(objective, field_constraints(phi0, d0, gap, min_d))
problem.solve()
logger.info(f'Opimizing dynamics for frame {frame}...')
logger.info(f'Status: {problem.status}')
Expand All @@ -31,11 +32,11 @@ def optimize_dynamics(tp_hsmm, demos, alpha=0.01, beta=0.5):
return phi0_frames, d0_frames


def field_constraints(phi0, d0, eps=1e-2):
def field_constraints(phi0, d0, gap=0., min_d=1.):
constraints = []
for k in range(phi0.size - 1):
constraints.append(phi0[k] >= phi0[k + 1])
constraints.extend([phi0 >= 0, d0 >= eps])
constraints.append(phi0[k] >= phi0[k + 1] + gap)
constraints.extend([phi0 >= 0, d0 >= min_d])
return constraints


Expand Down
Loading

0 comments on commit 204ff3f

Please sign in to comment.