Skip to content

Commit

Permalink
Backport coursera/week2 modifications to spring19 (#204)
Browse files Browse the repository at this point in the history
  • Loading branch information
yhn112 authored May 28, 2019
1 parent 640198a commit 06d3db0
Showing 1 changed file with 2 additions and 13 deletions.
15 changes: 2 additions & 13 deletions week02_value_based/mdp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# most of this code was politely stolen from https://github.com/berkeleydeeprlcourse/homework/
# all creadit goes to https://github.com/abhishekunique (if i got the author right)
# all credit goes to https://github.com/abhishekunique (if i got the author right)
import sys
import random
import numpy as np
Expand All @@ -12,17 +12,6 @@
has_graphviz = False


def weighted_choice(v, p):
total = sum(p)
r = random.uniform(0, total)
upto = 0
for c, w in zip(v, p):
if upto + w >= r:
return c
upto += w
assert False, "Shouldn't get here"


class MDP:
def __init__(self, transition_probs, rewards, initial_state=None):
"""
Expand Down Expand Up @@ -113,7 +102,7 @@ def step(self, action):
""" take action, return next_state, reward, is_done, empty_info """
possible_states, probs = zip(
*self.get_next_states(self._current_state, action).items())
next_state = weighted_choice(possible_states, p=probs)
next_state = possible_states[np.random.choice(np.arange(len(possible_states)), p=probs)]
reward = self.get_reward(self._current_state, action, next_state)
is_done = self.is_terminal(next_state)
self._current_state = next_state
Expand Down

0 comments on commit 06d3db0

Please sign in to comment.