add gitignore, fix py3 and deps

Tobichimaru · Jan 23, 2017 · 38fb43d · 38fb43d
1 parent f0b53dd
commit 38fb43d
Show file tree

Hide file tree

Showing 3 changed files with 110 additions and 25 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,76 @@
+# node and NPM
+npm-debug.log
+node_modules
+
+# swap files
+*~
+*.swp
+
+
+
+env.sh
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg/
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+.idea
+.ipynb_checkpoints
+
+# Rope
+.ropeproject
+
+# Django stuff:
+*.log
+*.pot
+
+# Sphinx documentation
+docs/_build/
+docs/tmp*
+
+# OS X garbage
+.DS_Store
+
+# Debian things
+debian/reproducible-experiment-platform
+debian/files
+*.substvars
+*.debhelper.log
diff --git a/Dockerfile b/Dockerfile
@@ -13,8 +13,8 @@ RUN ln -s /usr/bin/swig3.0 /usr/bin/swig
 
 USER main
 
-RUN pip install --upgrade sklearn
-RUN mkdir ~/gym2 && cd ~/gym2 && git clone https://github.com/openai/gym.git && cd gym && pip install -e .[box2d]
+RUN pip install --upgrade sklearn tqdm
+RUN pip install --upgrade gym[all]
 
-RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade sklearn
-RUN mkdir ~/gym3 && cd ~/gym3 && git clone https://github.com/openai/gym.git && cd gym && /home/main/anaconda/envs/python3/bin/pip install -e .[box2d]
+RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade sklearn tqdm
+RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade gym[all]
diff --git a/week0/frozenlake.ipynb b/week0/frozenlake.ipynb
@@ -73,11 +73,11 @@
    },
    "outputs": [],
    "source": [
-    "print \"initial observation code:\",env.reset()\n",
-    "print 'printing observation:'\n",
+    "print(\"initial observation code:\",env.reset())\n",
+    "print('printing observation:')\n",
     "env.render()\n",
-    "print \"observations:\",env.observation_space, 'n=',env.observation_space.n\n",
-    "print \"actions:\",env.action_space, 'n=',env.action_space.n"
+    "print(\"observations:\",env.observation_space, 'n=',env.observation_space.n)\n",
+    "print(\"actions:\",env.action_space, 'n=',env.action_space.n)"
    ]
   },
   {
@@ -88,12 +88,12 @@
    },
    "outputs": [],
    "source": [
-    "print \"taking action 2 (right)\"\n",
+    "print(\"taking action 2 (right)\")\n",
     "new_obs, reward, is_done, _ = env.step(2)\n",
-    "print \"new observation code:\",new_obs\n",
-    "print \"reward:\", reward\n",
-    "print \"is game over?:\",is_done\n",
-    "print \"printing new state:\"\n",
+    "print(\"new observation code:\",new_obs)\n",
+    "print(\"reward:\", reward)\n",
+    "print(\"is game over?:\",is_done)\n",
+    "print(\"printing new state:\")\n",
     "env.render()"
    ]
   },
@@ -159,6 +159,8 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
+    "n_states = env.observation_space.n\n",
+    "n_actions = env.action_space.n\n",
     "def get_random_policy():\n",
     "    \"\"\"\n",
     "    Build a numpy array representing agent policy.\n",
@@ -183,9 +185,9 @@
     "assert np.min(policies) == 0, 'minimal action id should be 0'\n",
     "assert np.max(policies) == n_actions-1, 'maximal action id should match n_actions-1'\n",
     "action_probas = np.unique(policies,return_counts=True)[-1] /10**4. /n_states\n",
-    "print \"Action frequencies over 10^4 samples:\",action_probas\n",
+    "print (\"Action frequencies over 10^4 samples:\",action_probas)\n",
     "assert np.allclose(action_probas,[1./n_actions]*n_actions,atol=0.05), \"The policies aren't uniformly random (maybe it's just an extremely bad luck)\"\n",
-    "print \"Seems fine!\""
+    "print (\"Seems fine!\")"
    ]
   },
   {
@@ -209,6 +211,7 @@
     "    Interact with an environment, return sum of all rewards.\n",
     "    If game doesn't end on t_max (e.g. agent walks into a wall), \n",
     "    force end the game and return whatever reward you got so far.\n",
+    "    Tip: see signature of env.step(...) method above.\n",
     "    \"\"\"\n",
     "    s = env.reset()\n",
     "    total_reward = 0\n",
@@ -225,11 +228,11 @@
    },
    "outputs": [],
    "source": [
-    "print \"generating 10^3 sessions...\"\n",
+    "print (\"generating 10^3 sessions...\")\n",
     "rewards = [sample_reward(env,get_random_policy()) for _ in range(10**3)]\n",
     "assert all([type(r) in (int,float) for r in rewards]), 'sample_reward must return a single number'\n",
     "assert all([0 <= r <= 1 for r in rewards]), 'total rewards should be between 0 and 1 for frozenlake (if solving taxi, delete this line)'\n",
-    "print \"Looks good!\""
+    "print (\"Looks good!\")"
    ]
   },
   {
@@ -243,7 +246,7 @@
     "def evaluate(policy,n_times=100):\n",
     "    \"\"\"Run several evaluations and average the score the policy gets.\"\"\"\n",
     "    rewards = <your code>\n",
-    "    return np.mean(rewards)\n",
+    "    return float(np.mean(rewards))\n",
     "        "
    ]
   },
@@ -256,9 +259,10 @@
    "outputs": [],
    "source": [
     "def print_policy(policy):\n",
-    "    \"\"\"a function that displays a policy in a human-readable way\"\"\"\n",
+    "    \"\"\"a function that displays a policy in a human-readable way.\"\"\"\n",
     "    lake = \"SFFFFHFHFFFHHFFG\"\n",
     "    assert env.spec.id == \"FrozenLake-v0\",\"this function only works with frozenlake 4x4\"\n",
+    "\n",
     "    \n",
     "    # where to move from each tile\n",
     "    arrows = ['<v>^'[a] for a in policy]\n",
@@ -269,7 +273,7 @@
     "    for i in range(0,16,4):\n",
     "        print ' '.join(signs[i:i+4])\n",
     "\n",
-    "print \"random policy:\"\n",
+    "print(\"random policy:\")\n",
     "print_policy(get_random_policy())"
    ]
   },
@@ -298,7 +302,7 @@
     "    if score > best_score:\n",
     "        best_score = score\n",
     "        best_policy = policy\n",
-    "        print \"New best score:\",score\n",
+    "        print (\"New best score:\",score)\n",
     "        print \"Best policy:\"\n",
     "        print_policy(best_policy)"
    ]
@@ -341,8 +345,10 @@
     "def mutation(policy,p=0.1):\n",
     "    \"\"\"\n",
     "    for each state, with probability p replace action with random action\n",
+    "    Tip: mutation can be written as crossover with random policy\n",
     "    \"\"\"\n",
-    "    return crossover(policy,get_random_policy(),p=p)\n",
+    "    <your code>\n",
+    "    return <your code>\n",
     "    "
    ]
   },
@@ -361,7 +367,10 @@
     "assert all([len(p) == n_states for p in policies]), 'policy length should always be 16'\n",
     "assert np.min(policies) == 0, 'minimal action id should be 0'\n",
     "assert np.max(policies) == n_actions-1, 'maximal action id should be n_actions-1'\n",
-    "print \"Seems fine!\""
+    "\n",
+    "assert any([np.mean(crossover(np.zeros(n_states),np.ones(n_states))) not in (0,1)\n",
+    "               for _ in range(100)]),\"Make sure your crossover changes each action independently\"\n",
+    "print(\"Seems fine!\")"
    ]
   },
   {
@@ -415,7 +424,7 @@
    "source": [
     "#main loop\n",
     "for epoch in range(n_epochs):\n",
-    "    print \"Epoch %s:\"%epoch\n",
+    "    print (\"Epoch %s:\"%epoch)\n",
     "    \n",
     "    crossovered = <crossover random guys from pool, n_crossovers total>\n",
     "    mutated = <add several new policies at random, n_mutations total>\n",
@@ -432,7 +441,7 @@
     "    pool_scores = [pool_scores[i] for i in selected_indices]\n",
     "\n",
     "    #print the best policy so far (last in ascending score order)\n",
-    "    print \"best score:\",pool_scores[-1]\n",
+    "    print (\"best score:\",pool_scores[-1])\n",
     "    print_policy(pool[-1])"
    ]
   },