Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[rllib] Basic port of baselines/deepq to rllib #709

Merged
merged 27 commits into from
Jul 7, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fetch metrics
  • Loading branch information
ericl committed Jun 27, 2017
commit d631bdd7fbc91b87fa4a97c499ffe24c8461b96e
44 changes: 32 additions & 12 deletions python/ray/rllib/a3c/a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

DEFAULT_CONFIG = {
"num_workers": 4,
"num_rollouts_per_iteration": 100,
"num_batches_per_iteration": 100,
}


Expand Down Expand Up @@ -53,6 +53,19 @@ def pull_batch_from_queue(self):
break
return rollout

def get_completed_rollout_metrics(self):
"""Returns metrics on previously completed rollouts.

Calling this clears the queue of completed rollout metrics.
"""
completed = []
while True:
try:
completed.append(self.runner.metrics_queue.get_nowait())
except queue.Empty:
break
return completed

def start(self):
summary_writer = tf.summary.FileWriter(
os.path.join(self.logdir, "agent_%d" % self.id))
Expand All @@ -65,8 +78,7 @@ def compute_gradient(self, params):
batch = process_rollout(rollout, gamma=0.99, lambda_=1.0)
gradient = self.policy.get_gradients(batch)
info = {"id": self.id,
"size": len(batch.a),
"total_reward": batch.total_reward}
"size": len(batch.a)}
return gradient, info


Expand All @@ -82,25 +94,33 @@ def __init__(self, env_name, config):
self.iteration = 0

def train(self):
episode_rewards = []
episode_lengths = []
gradient_list = [
agent.compute_gradient.remote(self.parameters)
for agent in self.agents]
max_rollouts = self.config["num_rollouts_per_iteration"]
rollouts_so_far = len(gradient_list)
max_batches = self.config["num_batches_per_iteration"]
batches_so_far = len(gradient_list)
while gradient_list:
done_id, gradient_list = ray.wait(gradient_list)
gradient, info = ray.get(done_id)[0]
episode_rewards.append(info["total_reward"])
episode_lengths.append(info["size"])
self.policy.model_update(gradient)
self.parameters = self.policy.get_weights()
if rollouts_so_far < max_rollouts:
rollouts_so_far += 1
if batches_so_far < max_batches:
batches_so_far += 1
gradient_list.extend(
[self.agents[info["id"]].compute_gradient.remote(self.parameters)])
res = self.fetch_metrics_from_workers()
self.iteration += 1
return res

def fetch_metrics_from_workers(self):
episode_rewards = []
episode_lengths = []
metric_lists = [
a.get_completed_rollout_metrics.remote() for a in self.agents]
for metrics in metric_lists:
for episode in ray.get(metrics):
episode_lengths.append(episode.episode_length)
episode_rewards.append(episode.episode_reward)
res = TrainingResult(
self.iteration, np.mean(episode_rewards), np.mean(episode_lengths))
self.iteration += 1
return res
16 changes: 13 additions & 3 deletions python/ray/rllib/a3c/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,14 @@ def process_rollout(rollout, gamma, lambda_=1.0):

features = rollout.features[0]
return Batch(batch_si, batch_a, batch_adv, batch_r, rollout.terminal,
features, np.sum(rewards))
features)


Batch = namedtuple(
"Batch", ["si", "a", "adv", "r", "terminal", "features", "total_reward"])
"Batch", ["si", "a", "adv", "r", "terminal", "features"])

CompletedRollout = namedtuple(
"CompletedRollout", ["episode_length", "episode_reward"])


class PartialRollout(object):
Expand Down Expand Up @@ -76,6 +79,7 @@ class RunnerThread(threading.Thread):
def __init__(self, env, policy, num_local_steps, visualise=False):
threading.Thread.__init__(self)
self.queue = queue.Queue(5)
self.metrics_queue = queue.Queue()
self.num_local_steps = num_local_steps
self.env = env
self.last_features = None
Expand Down Expand Up @@ -106,7 +110,11 @@ def _run(self):
# The timeout variable exists because apparently, if one worker dies, the
# other workers won't die with it, unless the timeout is set to some
# large number. This is an empirical observation.
self.queue.put(next(rollout_provider), timeout=600.0)
item = next(rollout_provider)
if isinstance(item, CompletedRollout):
self.metrics_queue.put(item)
else:
self.queue.put(item, timeout=600.0)


def env_runner(env, policy, num_local_steps, summary_writer, render):
Expand Down Expand Up @@ -155,6 +163,8 @@ def env_runner(env, policy, num_local_steps, summary_writer, render):

if terminal:
terminal_end = True
yield CompletedRollout(length, rewards)

if length >= timestep_limit or not env.metadata.get("semantics"
".autoreset"):
last_state = env.reset()
Expand Down