Backport coursera/week2 modifications to spring19 (#204)

yandexdataschool · May 28, 2019 · 06d3db0 · 06d3db0
1 parent 640198a
commit 06d3db0
Showing 1 changed file with 2 additions and 13 deletions.
diff --git a/week02_value_based/mdp.py b/week02_value_based/mdp.py
@@ -1,5 +1,5 @@
 # most of this code was politely stolen from https://github.com/berkeleydeeprlcourse/homework/
-# all creadit goes to https://github.com/abhishekunique (if i got the author right)
+# all credit goes to https://github.com/abhishekunique (if i got the author right)
 import sys
 import random
 import numpy as np
@@ -12,17 +12,6 @@
     has_graphviz = False
 
 
-def weighted_choice(v, p):
-    total = sum(p)
-    r = random.uniform(0, total)
-    upto = 0
-    for c, w in zip(v, p):
-        if upto + w >= r:
-            return c
-        upto += w
-    assert False, "Shouldn't get here"
-
-
 class MDP:
     def __init__(self, transition_probs, rewards, initial_state=None):
         """
@@ -113,7 +102,7 @@ def step(self, action):
         """ take action, return next_state, reward, is_done, empty_info """
         possible_states, probs = zip(
             *self.get_next_states(self._current_state, action).items())
-        next_state = weighted_choice(possible_states, p=probs)
+        next_state = possible_states[np.random.choice(np.arange(len(possible_states)), p=probs)]
         reward = self.get_reward(self._current_state, action, next_state)
         is_done = self.is_terminal(next_state)
         self._current_state = next_state