Update

Former-commit-id: 50625cf
zuoxingdong · Sep 14, 2018 · 894e37c · 894e37c
1 parent ba5a3a1
commit 894e37c
Show file tree

Hide file tree

Showing 29 changed files with 84 additions and 78 deletions.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -9,3 +9,5 @@ matplotlib
 scikit-image
 imageio
 pyglet
+cloudpickle
+pyyaml
diff --git a/lagom/agents/a2c_agent.py b/lagom/agents/a2c_agent.py
@@ -144,6 +144,8 @@ def save(self, filename):
 
     def load(self, filename):
         self.policy.network.load(filename)
+
+
 """
             
             # Generalized Advantage Estimation (GAE)

diff --git a/lagom/core/networks/base_mdn.py b/lagom/core/networks/base_mdn.py
@@ -298,7 +298,6 @@ def MDN_loss(self, log_pi, mu, std, target):
         # Calculate the joint log-probabilities from [N, K, D] to [N, K]
         joint_log_probs = torch.sum(log_pi + log_gaussian_probs, dim=-1, keepdim=False)
 
-
         # Calculate the loss via log-sum-exp trick, from [N, K] to [N]
         # It calculates over K (mixing coefficient) dimension, produce tensor with shape [N]
         loss = -torch.logsumexp(joint_log_probs, dim=-1, keepdim=False)

diff --git a/lagom/core/plotter/curve_plot.py b/lagom/core/plotter/curve_plot.py
@@ -33,20 +33,21 @@ class CurvePlot(BasePlot):
     
     Example::
     
-        x1 = [1, 4, 5, 7, 9, 13, 20]
-        y1 = [0.1, 0.25, 0.22, 0.53, 0.37, 0.5, 0.55]
-        x2 = [2, 4, 6, 7, 9, 11, 15]
-        y2 = [0.03, 0.12, 0.4, 0.2, 0.18, 0.32, 0.39]
+        >>> import matplotlib.pyplot as plt
+        >>> x1 = [1, 4, 5, 7, 9, 13, 20]
+        >>> y1 = [0.1, 0.25, 0.22, 0.53, 0.37, 0.5, 0.55]
+        >>> x2 = [2, 4, 6, 7, 9, 11, 15]
+        >>> y2 = [0.03, 0.12, 0.4, 0.2, 0.18, 0.32, 0.39]
         
-        interp = InterpCurve()
-        new_x, (new_y1, new_y2) = interp([x1, x2], [y1, y2], num_point=100)
+        >>> interp = InterpCurve()
+        >>> new_x, (new_y1, new_y2) = interp([x1, x2], [y1, y2], num_point=100)
         
-        plot = CurvePlot()
-        plot.add('curve1', [y1, y2], xvalues=[x1, x2])
-        ax = plot()
+        >>> plot = CurvePlot()
+        >>> plot.add('curve1', [y1, y2], xvalues=[x1, x2])
+        >>> ax = plot()
         
-        ax.plot(x1, y1, 'red')
-        ax.plot(x2, y2, 'green')
+        >>> ax.plot(x1, y1, 'red')
+        >>> ax.plot(x2, y2, 'green')
         
         >>> ax.figure
     

diff --git a/lagom/core/plotter/image_viewer.py b/lagom/core/plotter/image_viewer.py
@@ -3,7 +3,7 @@
 import pyglet
 
 try:
-    from pyglet.gl import *
+    import pyglet.gl as gl
 except Exception:
     msg1 = '1. make sure OpenGL is installed by running `sudo apt install python-opengl`. \n'
     msg2 = '2. if you are on a server, then create a fake screen with xvfb-run and make sure nvidia driver '

diff --git a/lagom/core/policies/categorical_policy.py b/lagom/core/policies/categorical_policy.py
@@ -20,11 +20,6 @@ class CategoricalPolicy(BasePolicy):
         action spaces. Note that the network must have an attribute ``.last_feature_dim`` of type
         ``int`` for the policy to create the final output layer (fully-connected), and this is
         recommended to be done in the method :meth:`~BaseNetwork.make_params` of the network class.
-    
-    Example::
-
-        >>> policy = CategoricalPolicy(config=config, network=network, env_spec=env_spec)
-        >>> policy(observation)
         
     """
     def __init__(self, config, network, env_spec, **kwargs):

diff --git a/lagom/core/policies/gaussian_policy.py b/lagom/core/policies/gaussian_policy.py
@@ -52,17 +52,6 @@ class GaussianPolicy(BasePolicy):
     * :attr:`init_std` controls the initial values for independently learnable standard deviation. 
       Note that this is only valid when :attr:`std_state_dependent`=False. 
     
-    Example::
-    
-        >>> policy = GaussianPolicy(config=config, 
-                                    network=network, 
-                                    env_spec=env_spec, 
-                                    min_std=1e-06, 
-                                    std_style='exp', 
-                                    constant_std=None, 
-                                    std_state_dependent=True, 
-                                    init_std=None)
-        >>> policy(observation)
     """
     def __init__(self,
                  config,

diff --git a/lagom/core/transform/interp_curve.py b/lagom/core/transform/interp_curve.py
@@ -29,10 +29,12 @@ class InterpCurve(BaseTransform):
     
     Example::
     
-        x1 = [1, 4, 5, 7, 9, 13, 20]
-        y1 = [0.1, 0.25, 0.22, 0.53, 0.37, 0.5, 0.55]
-        x2 = [2, 4, 6, 7, 9, 11, 15]
-        y2 = [0.03, 0.12, 0.4, 0.2, 0.18, 0.32, 0.39]
+        >>> import matplotlib.pyplot as plt
+    
+        >>> x1 = [1, 4, 5, 7, 9, 13, 20]
+        >>> y1 = [0.1, 0.25, 0.22, 0.53, 0.37, 0.5, 0.55]
+        >>> x2 = [2, 4, 6, 7, 9, 11, 15]
+        >>> y2 = [0.03, 0.12, 0.4, 0.2, 0.18, 0.32, 0.39]
         
         >>> plt.plot(x1, y1)
         >>> plt.plot(x2, y2, 'red')

diff --git a/lagom/core/transform/smooth_filter.py b/lagom/core/transform/smooth_filter.py
@@ -17,11 +17,13 @@ class SmoothFilter(BaseTransform):
     
     Example::
     
-        x = np.linspace(0, 4*2*np.pi, num=100)
-        y = x*(np.sin(x) + np.random.random(100)*4)
+        >>> import matplotlib.pyplot as plt
+    
+        >>> x = np.linspace(0, 4*2*np.pi, num=100)
+        >>> y = x*(np.sin(x) + np.random.random(100)*4)
         
-        smooth = SmoothFilter()
-        y2 = smooth(y, window_length=31, polyorder=10)
+        >>> smooth = SmoothFilter()
+        >>> y2 = smooth(y, window_length=31, polyorder=10)
         
         >>> plt.plot(x, y)
         >>> plt.plot(x, y2, 'red')

diff --git a/lagom/envs/goal_env.py b/lagom/envs/goal_env.py
@@ -48,9 +48,13 @@ def compute_reward(self, achieved_goal, desired_goal, info):
         
             The following should always hold true::
             
-                >>> observation, reward, done, info = env.step()
-                >>> assert reward == env.compute_reward(observation['achieved_goal'], observation['desired_goal'], info)
-        
+                import gym
+
+                env = gym.make('FetchPush-v1')
+                env.reset()
+                observation, reward, done, info = env.step(env.action_space.sample())
+                assert reward == env.compute_reward(observation['achieved_goal'], observation['desired_goal'], info)
+
         Args:
             achieved_goal (object): the goal that is currently achieved. 
             desired_goal (object): the desired goal that agent should achieve

diff --git a/lagom/envs/make_env.py b/lagom/envs/make_env.py
@@ -43,6 +43,7 @@ def make_gym_env(env_id, seed, monitor=False, monitor_dir=None):
 
     return env
 
+
 def make_envs(make_env, env_id, num_env, init_seed, **kwargs):
     r"""Create a list of argument-free make_env() functions based on the given settings. 
     
@@ -79,6 +80,7 @@ def make_envs(make_env, env_id, num_env, init_seed, **kwargs):
 
     return list_make_env
 
+
 def make_vec_env(vec_env_class, make_env, env_id, num_env, init_seed, **kwargs):
     r"""Create a vectorized environment (i.e. :class:`VecEnv`). 
     
@@ -97,6 +99,7 @@ def make_vec_env(vec_env_class, make_env, env_id, num_env, init_seed, **kwargs):
         
     Example::
     
+        >>> from lagom.envs.vec_env import SerialVecEnv
         >>> make_vec_env(vec_env_class=SerialVecEnv, make_env=make_gym_env, env_id='CartPole-v1', num_env=5, init_seed=1)
         <SerialVecEnv: CartPole-v1, n: 5>
     

diff --git a/lagom/envs/spaces/dict.py b/lagom/envs/spaces/dict.py
@@ -13,15 +13,15 @@ class Dict(Space):
     
     * Simple example::
     
+        >>> from lagom.envs.spaces import Discrete, Box
         >>> space = Dict({'position': Discrete(2), 'velocity': Box(low=-1.0, high=1.0, shape=(1, 2), dtype=np.float32)})
         >>> space.sample()
         OrderedDict([('position', 0),
              ('velocity', array([[0.8046695 , 0.78866726]], dtype=float32))])
     
     * Nested example::
     
-        >>> sensor_space = Dict({'position': Box(-100, 100, shape=(3,), dtype=np.float32), 
-                             'velocity': Box(-1, 1, shape=(3,), dtype=np.float32)})
+        >>> sensor_space = Dict({'position': Box(-100, 100, shape=(3,), dtype=np.float32), 'velocity': Box(-1, 1, shape=(3,), dtype=np.float32)})
         >>> space = Dict({'sensors': sensor_space, 'score': Discrete(100)})
         >>> space.sample()
         OrderedDict([('score', 47),

diff --git a/lagom/envs/spaces/product.py b/lagom/envs/spaces/product.py
@@ -8,6 +8,7 @@ class Product(Space):
     
     Example::
     
+        >>> from lagom.envs.spaces import Discrete, Box
         >>> Product((Discrete(5), Box(-1.0, 1.0, shape=(2, 3), dtype=np.float32)))
         
     """

diff --git a/lagom/envs/vec_env/parallel_vec_env.py b/lagom/envs/vec_env/parallel_vec_env.py
@@ -79,7 +79,8 @@ class ParallelVecEnv(VecEnv):
         should use :class:`SerialVecEnv` instead. 
         
     Example::
-    
+        
+        >>> from lagom.envs import make_envs, make_gym_env
         >>> list_make_env = make_envs(make_env=make_gym_env, env_id='CartPole-v1', num_env=3, init_seed=0)
         >>> env = ParallelVecEnv(list_make_env=list_make_env)
         >>> env

diff --git a/lagom/envs/vec_env/serial_vec_env.py b/lagom/envs/vec_env/serial_vec_env.py
@@ -18,6 +18,7 @@ class SerialVecEnv(VecEnv):
     
     Example::
     
+        >>> from lagom.envs import make_envs, make_gym_env
         >>> list_make_env = make_envs(make_env=make_gym_env, env_id='CartPole-v1', num_env=3, init_seed=0)
         >>> env = SerialVecEnv(list_make_env=list_make_env)
         >>> env

diff --git a/lagom/envs/vec_env/vec_env.py b/lagom/envs/vec_env/vec_env.py
@@ -4,6 +4,7 @@
 from abc import abstractmethod
 
 from lagom.core.plotter import GridImage
+from lagom.core.plotter import ImageViewer
 
 
 class VecEnv(ABC):

diff --git a/lagom/envs/vec_env/vec_standardize.py b/lagom/envs/vec_env/vec_standardize.py
@@ -38,19 +38,11 @@ class VecStandardize(VecEnvWrapper):
     
     Example::
     
-        list_make_env = make_envs(make_env=make_gym_env, 
-                                  env_id='Pendulum-v0', 
-                                  num_env=2, 
-                                  init_seed=1)
-        venv = SerialVecEnv(list_make_env=list_make_env)
-        venv = VecStandardize(venv=venv, 
-                              use_obs=True, 
-                              use_reward=True, 
-                              clip_obs=10.0, 
-                              clip_reward=10.0, 
-                              gamma=0.99, 
-                              eps=1e-8)
-                                 
+        >>> from lagom.envs import make_envs, make_gym_env
+        >>> from lagom.envs.vec_env import SerialVecEnv
+        >>> list_make_env = make_envs(make_env=make_gym_env, env_id='Pendulum-v0', num_env=2, init_seed=1)
+        >>> venv = SerialVecEnv(list_make_env=list_make_env)
+        >>> venv = VecStandardize(venv=venv, use_obs=True, use_reward=True, clip_obs=10.0, clip_reward=10.0, gamma=0.99, eps=1e-8)                                 
         >>> venv
         <VecStandardize: Pendulum-v0, n: 2>
         
@@ -142,7 +134,7 @@ def process_reward(self, rewards):
             # Update with calculated discounted returns
             self.reward_runningavg(self.all_returns)
             # Standardize the reward
-            mean = self.reward_runningavg.mu
+            # mean = self.reward_runningavg.mu  # not useful
             std = self.reward_runningavg.sigma
             # Note that we do not subtract from mean, but only divided by std
             if not np.allclose(std, 0.0):  # only non-zero std

diff --git a/lagom/envs/wrappers/flatten_dict.py b/lagom/envs/wrappers/flatten_dict.py
@@ -11,6 +11,8 @@ class FlattenDictWrapper(ObservationWrapper):
     
     Example::
     
+        >>> import gym
+        >>> from lagom.envs.wrappers import GymWrapper
         >>> env = gym.make('FetchPush-v1')
         >>> env = GymWrapper(env)
         >>> env.observation_space

diff --git a/lagom/envs/wrappers/frame_stack.py b/lagom/envs/wrappers/frame_stack.py
@@ -25,6 +25,7 @@ class FrameStack(ObservationWrapper):
     
     Example::
     
+        >>> from lagom.envs import make_gym_env
         >>> env = make_gym_env(env_id='CartPole-v1', seed=1)
         >>> env = FrameStack(env, num_stack=4)
         >>> env
@@ -79,7 +80,7 @@ def reset(self):
 
     def process_observation(self, observation):
         # Shift the oldest observation to the front
-        self.stack_buffer  = np.roll(self.stack_buffer, shift=1, axis=-1)
+        self.stack_buffer = np.roll(self.stack_buffer, shift=1, axis=-1)
         # Replace the front as new observation
         self.stack_buffer[..., 0] = observation
 

diff --git a/lagom/experiment/configurator.py b/lagom/experiment/configurator.py
@@ -42,7 +42,7 @@ class Configurator(object):
         >>> configurator.grid('network.lr', [1e-2, 5e-3])
         >>> configurator.grid('env.id', ['CartPole-v1', 'Ant-v2'])
         >>> list_config = configurator.make_configs()
-        >>> Configurator.to_dataframe(list_config)
+        >>> config_dataframe = Configurator.to_dataframe(list_config)
            ID    log.dir  network.lr       env.id
         0   0  some path       0.010  CartPole-v1
         1   1  some path       0.010       Ant-v2

diff --git a/lagom/experiment/run_experiment.py b/lagom/experiment/run_experiment.py
@@ -26,7 +26,8 @@ def ask_yes_or_no(msg):
             return False
         else:
             print("Please answer 'yes' or 'no':")
-
+
+
 def run_experiment(worker_class, master_class, max_num_worker=None, daemonic_worker=None):
     r"""A convenient function to launch a parallelized experiment (Master-Worker). 
     

diff --git a/lagom/logger.py b/lagom/logger.py
@@ -54,8 +54,8 @@ class Logger(logging.Logger):
     * With indentation::
     
         >>> logger.dump(keys=None, index=None, indent=1)
-        	Iteration: [1, 2, 3]
-        	Training Loss: [0.12, 0.11, 0.09]
+            Iteration: [1, 2, 3]
+            Training Loss: [0.12, 0.11, 0.09]
         
     * With specified keys::
     

diff --git a/lagom/runner/base_runner.py b/lagom/runner/base_runner.py
@@ -34,4 +34,3 @@ def __init__(self, agent, env, gamma):
     def __call__(self, N, T):
         r"""Run the agent in the environment and collect all necessary interaction data as a batch. """
         raise NotImplementedError
-
diff --git a/lagom/runner/segment.py b/lagom/runner/segment.py
@@ -62,6 +62,8 @@ class Segment(BaseHistory):
     
     Example::
     
+        >>> import torch
+        >>> from lagom.runner import Transition
         >>> transition1 = Transition(s=10, a=-1, r=1, s_next=20, done=False)
         >>> transition1.add_info('V_s', torch.tensor(100.))
 
@@ -151,9 +153,9 @@ def transitions(self):
 
         #########
         # Use itertools.chain().from_iterable, on average 50% faster than following method
-        #transitions = []
-        #for trajectory in self.trajectories:
-        #    transitions.extend(trajectory.transitions)
+        # transitions = []
+        # for trajectory in self.trajectories:
+        #     transitions.extend(trajectory.transitions)
         ########
 
         transitions = list(chain.from_iterable([trajectory.transitions for trajectory in self.trajectories]))
@@ -184,6 +186,7 @@ def all_returns(self):
         out = list(chain.from_iterable([trajectory.all_returns for trajectory in self.trajectories]))
 
         return out
+
     @property
     def all_discounted_returns(self):
         # Use itertools.chain().from_iterable, similar reason with doc in `transitions(self)`

diff --git a/lagom/runner/segment_runner.py b/lagom/runner/segment_runner.py
@@ -32,10 +32,10 @@ class SegmentRunner(BaseRunner):
     
     Example::
     
-        >>> list_make_env = make_envs(make_env=make_gym_env, 
-                                      env_id='CartPole-v1', 
-                                      num_env=2, 
-                                      init_seed=0)
+        >>> from lagom.agents import RandomAgent
+        >>> from lagom.envs import make_envs, make_gym_env, EnvSpec
+        >>> from lagom.envs.vec_env import SerialVecEnv
+        >>> list_make_env = make_envs(make_env=make_gym_env, env_id='CartPole-v1', num_env=2, init_seed=0)
         >>> env = SerialVecEnv(list_make_env=list_make_env)
         >>> env_spec = EnvSpec(env)
 

diff --git a/lagom/runner/trajectory.py b/lagom/runner/trajectory.py
@@ -18,6 +18,7 @@ class Trajectory(BaseHistory):
     
     Example::
     
+        >>> from lagom.runner import Transition
         >>> transition1 = Transition(s=1, a=0.1, r=0.5, s_next=2, done=False)
         >>> transition1.add_info(name='V_s', value=10.0)
 
@@ -65,7 +66,7 @@ def add_transition(self, transition):
         # Sanity check for trajectory
         # Not allowed to add more transition if it already contains done=True
         if len(self.transitions) > 0:  # non-empty
-            assert self.transitions[-1].done == False, 'not allowed to add transition, because already contains done=True'
+            assert not self.transitions[-1].done, 'not allowed to add transition, because already contains done=True'
         super().add_transition(transition)
 
     @property

diff --git a/lagom/runner/trajectory_runner.py b/lagom/runner/trajectory_runner.py
@@ -28,10 +28,10 @@ class TrajectoryRunner(BaseRunner):
     
     Example::
     
-        >>> list_make_env = make_envs(make_env=make_gym_env, 
-                                      env_id='CartPole-v1', 
-                                      num_env=1, 
-                                      init_seed=0)
+        >>> from lagom.agents import RandomAgent
+        >>> from lagom.envs import make_envs, make_gym_env, EnvSpec
+        >>> from lagom.envs.vec_env import SerialVecEnv
+        >>> list_make_env = make_envs(make_env=make_gym_env, env_id='CartPole-v1', num_env=1, init_seed=0)
         >>> env = SerialVecEnv(list_make_env=list_make_env)
         >>> env_spec = EnvSpec(env)
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,3 +9,5 @@ matplotlib @@
     scikit-image
     imageio
     pyglet
+    cloudpickle
+    pyyaml
Original file line number	Diff line number	Diff line change
Expand Up		@@ -34,4 +34,3 @@ def __init__(self, agent, env, gamma):
		def __call__(self, N, T):
		r"""Run the agent in the environment and collect all necessary interaction data as a batch. """
		raise NotImplementedError