Skip to content

Commit

Permalink
add gitignore, fix py3 and deps
Browse files Browse the repository at this point in the history
  • Loading branch information
justheuristic committed Jan 23, 2017
1 parent f0b53dd commit 38fb43d
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 25 deletions.
76 changes: 76 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# node and NPM
npm-debug.log
node_modules

# swap files
*~
*.swp



env.sh
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

# C extensions
*.so

# Distribution / packaging
.Python
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg/

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml

# Translations
*.mo

# Mr Developer
.mr.developer.cfg
.project
.pydevproject
.idea
.ipynb_checkpoints

# Rope
.ropeproject

# Django stuff:
*.log
*.pot

# Sphinx documentation
docs/_build/
docs/tmp*

# OS X garbage
.DS_Store

# Debian things
debian/reproducible-experiment-platform
debian/files
*.substvars
*.debhelper.log
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ RUN ln -s /usr/bin/swig3.0 /usr/bin/swig

USER main

RUN pip install --upgrade sklearn
RUN mkdir ~/gym2 && cd ~/gym2 && git clone https://github.com/openai/gym.git && cd gym && pip install -e .[box2d]
RUN pip install --upgrade sklearn tqdm
RUN pip install --upgrade gym[all]

RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade sklearn
RUN mkdir ~/gym3 && cd ~/gym3 && git clone https://github.com/openai/gym.git && cd gym && /home/main/anaconda/envs/python3/bin/pip install -e .[box2d]
RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade sklearn tqdm
RUN /home/main/anaconda/envs/python3/bin/pip install --upgrade gym[all]
51 changes: 30 additions & 21 deletions week0/frozenlake.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@
},
"outputs": [],
"source": [
"print \"initial observation code:\",env.reset()\n",
"print 'printing observation:'\n",
"print(\"initial observation code:\",env.reset())\n",
"print('printing observation:')\n",
"env.render()\n",
"print \"observations:\",env.observation_space, 'n=',env.observation_space.n\n",
"print \"actions:\",env.action_space, 'n=',env.action_space.n"
"print(\"observations:\",env.observation_space, 'n=',env.observation_space.n)\n",
"print(\"actions:\",env.action_space, 'n=',env.action_space.n)"
]
},
{
Expand All @@ -88,12 +88,12 @@
},
"outputs": [],
"source": [
"print \"taking action 2 (right)\"\n",
"print(\"taking action 2 (right)\")\n",
"new_obs, reward, is_done, _ = env.step(2)\n",
"print \"new observation code:\",new_obs\n",
"print \"reward:\", reward\n",
"print \"is game over?:\",is_done\n",
"print \"printing new state:\"\n",
"print(\"new observation code:\",new_obs)\n",
"print(\"reward:\", reward)\n",
"print(\"is game over?:\",is_done)\n",
"print(\"printing new state:\")\n",
"env.render()"
]
},
Expand Down Expand Up @@ -159,6 +159,8 @@
"outputs": [],
"source": [
"import numpy as np\n",
"n_states = env.observation_space.n\n",
"n_actions = env.action_space.n\n",
"def get_random_policy():\n",
" \"\"\"\n",
" Build a numpy array representing agent policy.\n",
Expand All @@ -183,9 +185,9 @@
"assert np.min(policies) == 0, 'minimal action id should be 0'\n",
"assert np.max(policies) == n_actions-1, 'maximal action id should match n_actions-1'\n",
"action_probas = np.unique(policies,return_counts=True)[-1] /10**4. /n_states\n",
"print \"Action frequencies over 10^4 samples:\",action_probas\n",
"print (\"Action frequencies over 10^4 samples:\",action_probas)\n",
"assert np.allclose(action_probas,[1./n_actions]*n_actions,atol=0.05), \"The policies aren't uniformly random (maybe it's just an extremely bad luck)\"\n",
"print \"Seems fine!\""
"print (\"Seems fine!\")"
]
},
{
Expand All @@ -209,6 +211,7 @@
" Interact with an environment, return sum of all rewards.\n",
" If game doesn't end on t_max (e.g. agent walks into a wall), \n",
" force end the game and return whatever reward you got so far.\n",
" Tip: see signature of env.step(...) method above.\n",
" \"\"\"\n",
" s = env.reset()\n",
" total_reward = 0\n",
Expand All @@ -225,11 +228,11 @@
},
"outputs": [],
"source": [
"print \"generating 10^3 sessions...\"\n",
"print (\"generating 10^3 sessions...\")\n",
"rewards = [sample_reward(env,get_random_policy()) for _ in range(10**3)]\n",
"assert all([type(r) in (int,float) for r in rewards]), 'sample_reward must return a single number'\n",
"assert all([0 <= r <= 1 for r in rewards]), 'total rewards should be between 0 and 1 for frozenlake (if solving taxi, delete this line)'\n",
"print \"Looks good!\""
"print (\"Looks good!\")"
]
},
{
Expand All @@ -243,7 +246,7 @@
"def evaluate(policy,n_times=100):\n",
" \"\"\"Run several evaluations and average the score the policy gets.\"\"\"\n",
" rewards = <your code>\n",
" return np.mean(rewards)\n",
" return float(np.mean(rewards))\n",
" "
]
},
Expand All @@ -256,9 +259,10 @@
"outputs": [],
"source": [
"def print_policy(policy):\n",
" \"\"\"a function that displays a policy in a human-readable way\"\"\"\n",
" \"\"\"a function that displays a policy in a human-readable way.\"\"\"\n",
" lake = \"SFFFFHFHFFFHHFFG\"\n",
" assert env.spec.id == \"FrozenLake-v0\",\"this function only works with frozenlake 4x4\"\n",
"\n",
" \n",
" # where to move from each tile\n",
" arrows = ['<v>^'[a] for a in policy]\n",
Expand All @@ -269,7 +273,7 @@
" for i in range(0,16,4):\n",
" print ' '.join(signs[i:i+4])\n",
"\n",
"print \"random policy:\"\n",
"print(\"random policy:\")\n",
"print_policy(get_random_policy())"
]
},
Expand Down Expand Up @@ -298,7 +302,7 @@
" if score > best_score:\n",
" best_score = score\n",
" best_policy = policy\n",
" print \"New best score:\",score\n",
" print (\"New best score:\",score)\n",
" print \"Best policy:\"\n",
" print_policy(best_policy)"
]
Expand Down Expand Up @@ -341,8 +345,10 @@
"def mutation(policy,p=0.1):\n",
" \"\"\"\n",
" for each state, with probability p replace action with random action\n",
" Tip: mutation can be written as crossover with random policy\n",
" \"\"\"\n",
" return crossover(policy,get_random_policy(),p=p)\n",
" <your code>\n",
" return <your code>\n",
" "
]
},
Expand All @@ -361,7 +367,10 @@
"assert all([len(p) == n_states for p in policies]), 'policy length should always be 16'\n",
"assert np.min(policies) == 0, 'minimal action id should be 0'\n",
"assert np.max(policies) == n_actions-1, 'maximal action id should be n_actions-1'\n",
"print \"Seems fine!\""
"\n",
"assert any([np.mean(crossover(np.zeros(n_states),np.ones(n_states))) not in (0,1)\n",
" for _ in range(100)]),\"Make sure your crossover changes each action independently\"\n",
"print(\"Seems fine!\")"
]
},
{
Expand Down Expand Up @@ -415,7 +424,7 @@
"source": [
"#main loop\n",
"for epoch in range(n_epochs):\n",
" print \"Epoch %s:\"%epoch\n",
" print (\"Epoch %s:\"%epoch)\n",
" \n",
" crossovered = <crossover random guys from pool, n_crossovers total>\n",
" mutated = <add several new policies at random, n_mutations total>\n",
Expand All @@ -432,7 +441,7 @@
" pool_scores = [pool_scores[i] for i in selected_indices]\n",
"\n",
" #print the best policy so far (last in ascending score order)\n",
" print \"best score:\",pool_scores[-1]\n",
" print (\"best score:\",pool_scores[-1])\n",
" print_policy(pool[-1])"
]
},
Expand Down

0 comments on commit 38fb43d

Please sign in to comment.