Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
Former-commit-id: 50625cf
  • Loading branch information
zuoxingdong committed Sep 14, 2018
1 parent ba5a3a1 commit 894e37c
Show file tree
Hide file tree
Showing 29 changed files with 84 additions and 78 deletions.
2 changes: 2 additions & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ matplotlib
scikit-image
imageio
pyglet
cloudpickle
pyyaml
2 changes: 2 additions & 0 deletions lagom/agents/a2c_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ def save(self, filename):

def load(self, filename):
self.policy.network.load(filename)


"""
# Generalized Advantage Estimation (GAE)
Expand Down
1 change: 0 additions & 1 deletion lagom/core/networks/base_mdn.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,6 @@ def MDN_loss(self, log_pi, mu, std, target):
# Calculate the joint log-probabilities from [N, K, D] to [N, K]
joint_log_probs = torch.sum(log_pi + log_gaussian_probs, dim=-1, keepdim=False)


# Calculate the loss via log-sum-exp trick, from [N, K] to [N]
# It calculates over K (mixing coefficient) dimension, produce tensor with shape [N]
loss = -torch.logsumexp(joint_log_probs, dim=-1, keepdim=False)
Expand Down
23 changes: 12 additions & 11 deletions lagom/core/plotter/curve_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,21 @@ class CurvePlot(BasePlot):
Example::
x1 = [1, 4, 5, 7, 9, 13, 20]
y1 = [0.1, 0.25, 0.22, 0.53, 0.37, 0.5, 0.55]
x2 = [2, 4, 6, 7, 9, 11, 15]
y2 = [0.03, 0.12, 0.4, 0.2, 0.18, 0.32, 0.39]
>>> import matplotlib.pyplot as plt
>>> x1 = [1, 4, 5, 7, 9, 13, 20]
>>> y1 = [0.1, 0.25, 0.22, 0.53, 0.37, 0.5, 0.55]
>>> x2 = [2, 4, 6, 7, 9, 11, 15]
>>> y2 = [0.03, 0.12, 0.4, 0.2, 0.18, 0.32, 0.39]
interp = InterpCurve()
new_x, (new_y1, new_y2) = interp([x1, x2], [y1, y2], num_point=100)
>>> interp = InterpCurve()
>>> new_x, (new_y1, new_y2) = interp([x1, x2], [y1, y2], num_point=100)
plot = CurvePlot()
plot.add('curve1', [y1, y2], xvalues=[x1, x2])
ax = plot()
>>> plot = CurvePlot()
>>> plot.add('curve1', [y1, y2], xvalues=[x1, x2])
>>> ax = plot()
ax.plot(x1, y1, 'red')
ax.plot(x2, y2, 'green')
>>> ax.plot(x1, y1, 'red')
>>> ax.plot(x2, y2, 'green')
>>> ax.figure
Expand Down
2 changes: 1 addition & 1 deletion lagom/core/plotter/image_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pyglet

try:
from pyglet.gl import *
import pyglet.gl as gl
except Exception:
msg1 = '1. make sure OpenGL is installed by running `sudo apt install python-opengl`. \n'
msg2 = '2. if you are on a server, then create a fake screen with xvfb-run and make sure nvidia driver '
Expand Down
5 changes: 0 additions & 5 deletions lagom/core/policies/categorical_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ class CategoricalPolicy(BasePolicy):
action spaces. Note that the network must have an attribute ``.last_feature_dim`` of type
``int`` for the policy to create the final output layer (fully-connected), and this is
recommended to be done in the method :meth:`~BaseNetwork.make_params` of the network class.
Example::
>>> policy = CategoricalPolicy(config=config, network=network, env_spec=env_spec)
>>> policy(observation)
"""
def __init__(self, config, network, env_spec, **kwargs):
Expand Down
11 changes: 0 additions & 11 deletions lagom/core/policies/gaussian_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,6 @@ class GaussianPolicy(BasePolicy):
* :attr:`init_std` controls the initial values for independently learnable standard deviation.
Note that this is only valid when :attr:`std_state_dependent`=False.
Example::
>>> policy = GaussianPolicy(config=config,
network=network,
env_spec=env_spec,
min_std=1e-06,
std_style='exp',
constant_std=None,
std_state_dependent=True,
init_std=None)
>>> policy(observation)
"""
def __init__(self,
config,
Expand Down
10 changes: 6 additions & 4 deletions lagom/core/transform/interp_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ class InterpCurve(BaseTransform):
Example::
x1 = [1, 4, 5, 7, 9, 13, 20]
y1 = [0.1, 0.25, 0.22, 0.53, 0.37, 0.5, 0.55]
x2 = [2, 4, 6, 7, 9, 11, 15]
y2 = [0.03, 0.12, 0.4, 0.2, 0.18, 0.32, 0.39]
>>> import matplotlib.pyplot as plt
>>> x1 = [1, 4, 5, 7, 9, 13, 20]
>>> y1 = [0.1, 0.25, 0.22, 0.53, 0.37, 0.5, 0.55]
>>> x2 = [2, 4, 6, 7, 9, 11, 15]
>>> y2 = [0.03, 0.12, 0.4, 0.2, 0.18, 0.32, 0.39]
>>> plt.plot(x1, y1)
>>> plt.plot(x2, y2, 'red')
Expand Down
10 changes: 6 additions & 4 deletions lagom/core/transform/smooth_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ class SmoothFilter(BaseTransform):
Example::
x = np.linspace(0, 4*2*np.pi, num=100)
y = x*(np.sin(x) + np.random.random(100)*4)
>>> import matplotlib.pyplot as plt
>>> x = np.linspace(0, 4*2*np.pi, num=100)
>>> y = x*(np.sin(x) + np.random.random(100)*4)
smooth = SmoothFilter()
y2 = smooth(y, window_length=31, polyorder=10)
>>> smooth = SmoothFilter()
>>> y2 = smooth(y, window_length=31, polyorder=10)
>>> plt.plot(x, y)
>>> plt.plot(x, y2, 'red')
Expand Down
10 changes: 7 additions & 3 deletions lagom/envs/goal_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,13 @@ def compute_reward(self, achieved_goal, desired_goal, info):
The following should always hold true::
>>> observation, reward, done, info = env.step()
>>> assert reward == env.compute_reward(observation['achieved_goal'], observation['desired_goal'], info)
import gym
env = gym.make('FetchPush-v1')
env.reset()
observation, reward, done, info = env.step(env.action_space.sample())
assert reward == env.compute_reward(observation['achieved_goal'], observation['desired_goal'], info)
Args:
achieved_goal (object): the goal that is currently achieved.
desired_goal (object): the desired goal that agent should achieve
Expand Down
3 changes: 3 additions & 0 deletions lagom/envs/make_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def make_gym_env(env_id, seed, monitor=False, monitor_dir=None):

return env


def make_envs(make_env, env_id, num_env, init_seed, **kwargs):
r"""Create a list of argument-free make_env() functions based on the given settings.
Expand Down Expand Up @@ -79,6 +80,7 @@ def make_envs(make_env, env_id, num_env, init_seed, **kwargs):

return list_make_env


def make_vec_env(vec_env_class, make_env, env_id, num_env, init_seed, **kwargs):
r"""Create a vectorized environment (i.e. :class:`VecEnv`).
Expand All @@ -97,6 +99,7 @@ def make_vec_env(vec_env_class, make_env, env_id, num_env, init_seed, **kwargs):
Example::
>>> from lagom.envs.vec_env import SerialVecEnv
>>> make_vec_env(vec_env_class=SerialVecEnv, make_env=make_gym_env, env_id='CartPole-v1', num_env=5, init_seed=1)
<SerialVecEnv: CartPole-v1, n: 5>
Expand Down
4 changes: 2 additions & 2 deletions lagom/envs/spaces/dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ class Dict(Space):
* Simple example::
>>> from lagom.envs.spaces import Discrete, Box
>>> space = Dict({'position': Discrete(2), 'velocity': Box(low=-1.0, high=1.0, shape=(1, 2), dtype=np.float32)})
>>> space.sample()
OrderedDict([('position', 0),
('velocity', array([[0.8046695 , 0.78866726]], dtype=float32))])
* Nested example::
>>> sensor_space = Dict({'position': Box(-100, 100, shape=(3,), dtype=np.float32),
'velocity': Box(-1, 1, shape=(3,), dtype=np.float32)})
>>> sensor_space = Dict({'position': Box(-100, 100, shape=(3,), dtype=np.float32), 'velocity': Box(-1, 1, shape=(3,), dtype=np.float32)})
>>> space = Dict({'sensors': sensor_space, 'score': Discrete(100)})
>>> space.sample()
OrderedDict([('score', 47),
Expand Down
1 change: 1 addition & 0 deletions lagom/envs/spaces/product.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class Product(Space):
Example::
>>> from lagom.envs.spaces import Discrete, Box
>>> Product((Discrete(5), Box(-1.0, 1.0, shape=(2, 3), dtype=np.float32)))
"""
Expand Down
3 changes: 2 additions & 1 deletion lagom/envs/vec_env/parallel_vec_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ class ParallelVecEnv(VecEnv):
should use :class:`SerialVecEnv` instead.
Example::
>>> from lagom.envs import make_envs, make_gym_env
>>> list_make_env = make_envs(make_env=make_gym_env, env_id='CartPole-v1', num_env=3, init_seed=0)
>>> env = ParallelVecEnv(list_make_env=list_make_env)
>>> env
Expand Down
1 change: 1 addition & 0 deletions lagom/envs/vec_env/serial_vec_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class SerialVecEnv(VecEnv):
Example::
>>> from lagom.envs import make_envs, make_gym_env
>>> list_make_env = make_envs(make_env=make_gym_env, env_id='CartPole-v1', num_env=3, init_seed=0)
>>> env = SerialVecEnv(list_make_env=list_make_env)
>>> env
Expand Down
1 change: 1 addition & 0 deletions lagom/envs/vec_env/vec_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from abc import abstractmethod

from lagom.core.plotter import GridImage
from lagom.core.plotter import ImageViewer


class VecEnv(ABC):
Expand Down
20 changes: 6 additions & 14 deletions lagom/envs/vec_env/vec_standardize.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,11 @@ class VecStandardize(VecEnvWrapper):
Example::
list_make_env = make_envs(make_env=make_gym_env,
env_id='Pendulum-v0',
num_env=2,
init_seed=1)
venv = SerialVecEnv(list_make_env=list_make_env)
venv = VecStandardize(venv=venv,
use_obs=True,
use_reward=True,
clip_obs=10.0,
clip_reward=10.0,
gamma=0.99,
eps=1e-8)
>>> from lagom.envs import make_envs, make_gym_env
>>> from lagom.envs.vec_env import SerialVecEnv
>>> list_make_env = make_envs(make_env=make_gym_env, env_id='Pendulum-v0', num_env=2, init_seed=1)
>>> venv = SerialVecEnv(list_make_env=list_make_env)
>>> venv = VecStandardize(venv=venv, use_obs=True, use_reward=True, clip_obs=10.0, clip_reward=10.0, gamma=0.99, eps=1e-8)
>>> venv
<VecStandardize: Pendulum-v0, n: 2>
Expand Down Expand Up @@ -142,7 +134,7 @@ def process_reward(self, rewards):
# Update with calculated discounted returns
self.reward_runningavg(self.all_returns)
# Standardize the reward
mean = self.reward_runningavg.mu
# mean = self.reward_runningavg.mu # not useful
std = self.reward_runningavg.sigma
# Note that we do not subtract from mean, but only divided by std
if not np.allclose(std, 0.0): # only non-zero std
Expand Down
2 changes: 2 additions & 0 deletions lagom/envs/wrappers/flatten_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class FlattenDictWrapper(ObservationWrapper):
Example::
>>> import gym
>>> from lagom.envs.wrappers import GymWrapper
>>> env = gym.make('FetchPush-v1')
>>> env = GymWrapper(env)
>>> env.observation_space
Expand Down
3 changes: 2 additions & 1 deletion lagom/envs/wrappers/frame_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class FrameStack(ObservationWrapper):
Example::
>>> from lagom.envs import make_gym_env
>>> env = make_gym_env(env_id='CartPole-v1', seed=1)
>>> env = FrameStack(env, num_stack=4)
>>> env
Expand Down Expand Up @@ -79,7 +80,7 @@ def reset(self):

def process_observation(self, observation):
# Shift the oldest observation to the front
self.stack_buffer = np.roll(self.stack_buffer, shift=1, axis=-1)
self.stack_buffer = np.roll(self.stack_buffer, shift=1, axis=-1)
# Replace the front as new observation
self.stack_buffer[..., 0] = observation

Expand Down
2 changes: 1 addition & 1 deletion lagom/experiment/configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class Configurator(object):
>>> configurator.grid('network.lr', [1e-2, 5e-3])
>>> configurator.grid('env.id', ['CartPole-v1', 'Ant-v2'])
>>> list_config = configurator.make_configs()
>>> Configurator.to_dataframe(list_config)
>>> config_dataframe = Configurator.to_dataframe(list_config)
ID log.dir network.lr env.id
0 0 some path 0.010 CartPole-v1
1 1 some path 0.010 Ant-v2
Expand Down
3 changes: 2 additions & 1 deletion lagom/experiment/run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def ask_yes_or_no(msg):
return False
else:
print("Please answer 'yes' or 'no':")



def run_experiment(worker_class, master_class, max_num_worker=None, daemonic_worker=None):
r"""A convenient function to launch a parallelized experiment (Master-Worker).
Expand Down
4 changes: 2 additions & 2 deletions lagom/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ class Logger(logging.Logger):
* With indentation::
>>> logger.dump(keys=None, index=None, indent=1)
Iteration: [1, 2, 3]
Training Loss: [0.12, 0.11, 0.09]
Iteration: [1, 2, 3]
Training Loss: [0.12, 0.11, 0.09]
* With specified keys::
Expand Down
1 change: 0 additions & 1 deletion lagom/runner/base_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,3 @@ def __init__(self, agent, env, gamma):
def __call__(self, N, T):
r"""Run the agent in the environment and collect all necessary interaction data as a batch. """
raise NotImplementedError

9 changes: 6 additions & 3 deletions lagom/runner/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ class Segment(BaseHistory):
Example::
>>> import torch
>>> from lagom.runner import Transition
>>> transition1 = Transition(s=10, a=-1, r=1, s_next=20, done=False)
>>> transition1.add_info('V_s', torch.tensor(100.))
Expand Down Expand Up @@ -151,9 +153,9 @@ def transitions(self):

#########
# Use itertools.chain().from_iterable, on average 50% faster than following method
#transitions = []
#for trajectory in self.trajectories:
# transitions.extend(trajectory.transitions)
# transitions = []
# for trajectory in self.trajectories:
# transitions.extend(trajectory.transitions)
########

transitions = list(chain.from_iterable([trajectory.transitions for trajectory in self.trajectories]))
Expand Down Expand Up @@ -184,6 +186,7 @@ def all_returns(self):
out = list(chain.from_iterable([trajectory.all_returns for trajectory in self.trajectories]))

return out

@property
def all_discounted_returns(self):
# Use itertools.chain().from_iterable, similar reason with doc in `transitions(self)`
Expand Down
8 changes: 4 additions & 4 deletions lagom/runner/segment_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ class SegmentRunner(BaseRunner):
Example::
>>> list_make_env = make_envs(make_env=make_gym_env,
env_id='CartPole-v1',
num_env=2,
init_seed=0)
>>> from lagom.agents import RandomAgent
>>> from lagom.envs import make_envs, make_gym_env, EnvSpec
>>> from lagom.envs.vec_env import SerialVecEnv
>>> list_make_env = make_envs(make_env=make_gym_env, env_id='CartPole-v1', num_env=2, init_seed=0)
>>> env = SerialVecEnv(list_make_env=list_make_env)
>>> env_spec = EnvSpec(env)
Expand Down
3 changes: 2 additions & 1 deletion lagom/runner/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Trajectory(BaseHistory):
Example::
>>> from lagom.runner import Transition
>>> transition1 = Transition(s=1, a=0.1, r=0.5, s_next=2, done=False)
>>> transition1.add_info(name='V_s', value=10.0)
Expand Down Expand Up @@ -65,7 +66,7 @@ def add_transition(self, transition):
# Sanity check for trajectory
# Not allowed to add more transition if it already contains done=True
if len(self.transitions) > 0: # non-empty
assert self.transitions[-1].done == False, 'not allowed to add transition, because already contains done=True'
assert not self.transitions[-1].done, 'not allowed to add transition, because already contains done=True'
super().add_transition(transition)

@property
Expand Down
8 changes: 4 additions & 4 deletions lagom/runner/trajectory_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ class TrajectoryRunner(BaseRunner):
Example::
>>> list_make_env = make_envs(make_env=make_gym_env,
env_id='CartPole-v1',
num_env=1,
init_seed=0)
>>> from lagom.agents import RandomAgent
>>> from lagom.envs import make_envs, make_gym_env, EnvSpec
>>> from lagom.envs.vec_env import SerialVecEnv
>>> list_make_env = make_envs(make_env=make_gym_env, env_id='CartPole-v1', num_env=1, init_seed=0)
>>> env = SerialVecEnv(list_make_env=list_make_env)
>>> env_spec = EnvSpec(env)
Expand Down
Loading

0 comments on commit 894e37c

Please sign in to comment.