forked from robocin/rSoccer
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adds environments using robosim and grsim
- Loading branch information
Showing
192 changed files
with
3,670 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,8 @@ | ||
# RoboCup Very Small Size League Gym Environment | ||
This Environment is used in [RobôCIn's](https://github.com/robocin/deepvss) project. Check for some Reinforcement Learning Techniques applied for the environment. | ||
|
||
## Project based on: | ||
* [VSS-SDK/VSS-Simulator & VSS-Viewer](https://github.com/VSS-SDK/VSS-Simulator) (x64 binaries provided here) | ||
* [OpenAI Gym Environments](https://github.com/openai/gym) | ||
|
||
# Requirements | ||
|
||
- FIRASim | ||
- Clone the repo | ||
- https://github.com/robocin/FIRASim/releases/tag/deep_train | ||
- Follow it's install guide | ||
- https://github.com/robocin/FIRASim/blob/master/INSTALL.md | ||
- Once builded, change the binary file in gym_vss/binaries_envs/fira_sim/bin | ||
|
||
- VSS SDK | ||
- Add the following line to your ~/.bashrc or ~/.bash_profile | ||
- source /home/$USER/path/to/envs/gym_vss/binaries_envs/vss_sdk/exportlibs | ||
|
||
FIRASim requires much more and heavy libs linked than SDK, that's why we couldn't do as in SDK guide. | ||
|
||
# Install | ||
First change the 5th line of gym_vss/gym_real_soccer/comm/Makefile to your python include path. | ||
If you are using anaconda, you need to change only the python version in the path. | ||
```bash | ||
$ sudo apt-get install swig freeglut3-dev -y | ||
$ cd gym_vss/gym_real_soccer/comm | ||
$ make | ||
$ cd ../../../ | ||
$ pip install -e . | ||
``` | ||
|
||
OBS: If you will use only the simulated environment, do: | ||
- comment line 3 in gym_vss/__init__.py | ||
- pip install -e . | ||
|
||
# Example of agent | ||
```python | ||
import gym | ||
import gym_vss | ||
|
||
from gym_vss import SingleAgentSoccerEnvWrapper | ||
|
||
|
||
env = gym.make('vss_soccer_cont-v0') | ||
# env = SingleAgentSoccerEnvWrapper(env, simulator='sdk') | ||
# If you want FIRASim | ||
env = SingleAgentSoccerEnvWrapper(env, simulator='fira') | ||
env.reset() | ||
for i in range(1): | ||
done = False | ||
state = env.reset() | ||
while not done: | ||
action = env.action_space.sample() | ||
next_state, reward, done, _ = env.step(action) | ||
env.close() | ||
``` | ||
# Robocin Environments for Reinforcement Learning | ||
|
||
- deepvss | ||
- environment to our team using FIRASim | ||
|
||
- envs | ||
- environment for vss using RoboSim | ||
- environment for SSL using GRSim |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# RoboCup Very Small Size League Gym Environment | ||
This Environment is used in [RobôCIn's](https://github.com/robocin/deepvss) project. Check for some Reinforcement Learning Techniques applied for the environment. | ||
|
||
## Project based on: | ||
* [VSS-SDK/VSS-Simulator & VSS-Viewer](https://github.com/VSS-SDK/VSS-Simulator) (x64 binaries provided here) | ||
* [OpenAI Gym Environments](https://github.com/openai/gym) | ||
|
||
# Requirements | ||
|
||
- FIRASim | ||
- Clone the repo | ||
- https://github.com/robocin/FIRASim/releases/tag/deep_train | ||
- Follow it's install guide | ||
- https://github.com/robocin/FIRASim/blob/master/INSTALL.md | ||
- Once builded, change the binary file in gym_vss/binaries_envs/fira_sim/bin | ||
|
||
- VSS SDK | ||
- Add the following line to your ~/.bashrc or ~/.bash_profile | ||
- source /home/$USER/path/to/envs/gym_vss/binaries_envs/vss_sdk/exportlibs | ||
|
||
FIRASim requires much more and heavy libs linked than SDK, that's why we couldn't do as in SDK guide. | ||
|
||
# Install | ||
First change the 5th line of gym_vss/gym_real_soccer/comm/Makefile to your python include path. | ||
If you are using anaconda, you need to change only the python version in the path. | ||
```bash | ||
$ sudo apt-get install swig freeglut3-dev -y | ||
$ cd gym_vss/gym_real_soccer/comm | ||
$ make | ||
$ cd ../../../ | ||
$ pip install -e . | ||
``` | ||
|
||
OBS: If you will use only the simulated environment, do: | ||
- comment line 3 in gym_vss/__init__.py | ||
- pip install -e . | ||
|
||
# Example of agent | ||
```python | ||
import gym | ||
import gym_vss | ||
|
||
from gym_vss import SingleAgentSoccerEnvWrapper | ||
|
||
|
||
env = gym.make('vss_soccer_cont-v0') | ||
# env = SingleAgentSoccerEnvWrapper(env, simulator='sdk') | ||
# If you want FIRASim | ||
env = SingleAgentSoccerEnvWrapper(env, simulator='fira') | ||
env.reset() | ||
for i in range(1): | ||
done = False | ||
state = env.reset() | ||
while not done: | ||
action = env.action_space.sample() | ||
next_state, reward, done, _ = env.step(action) | ||
env.close() | ||
``` | ||
|
||
|
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes.
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Robocup SSL OpenAi gym environments | ||
|
||
# Requirements | ||
## Compile protobuf files | ||
```bash | ||
$ sudo apt-get install libprotobuf-dev protobuf-compiler -y | ||
$ cd rc_gym/grsim_ssl/Communication/pb/proto | ||
$ protoc --python_out=../ *.proto | ||
``` | ||
## Fix protobuf compiled files from relative reference to absolute | ||
On file **rc_gym/grsim_ssl/Communication/pb/messages_robocup_ssl_wrapper_pb2.py**: | ||
|
||
|
||
``` python | ||
'before:' | ||
15 - import messages_robocup_ssl_detection_pb2 as messages__robocup__ssl__detection__pb2 | ||
16 - import messages_robocup_ssl_geometry_pb2 as messages__robocup__ssl__geometry__pb2 | ||
|
||
'after:' | ||
15 + import rc_gym.grsim_ssl.Communication.pb.messages_robocup_ssl_detection_pb2 as messages__robocup__ssl__detection__pb2 | ||
16 + import rc_gym.grsim_ssl.Communication.pb.messages_robocup_ssl_geometry_pb2 as messages__robocup__ssl__geometry__pb2 | ||
``` | ||
|
||
On file **rc_gym/grsim_ssl/Communication/pb/grSim_Packet_pb2.py**: | ||
|
||
``` python | ||
'before:' | ||
15 - import grSim_Commands_pb2 as grSim__Commands__pb2 | ||
16 - import grSim_Replacement_pb2 as grSim__Replacement__pb2 | ||
'after:' | ||
15 + import rc_gym.grsim_ssl.Communication.pb.grSim_Commands_pb2 as grSim__Commands__pb2 | ||
16 + import rc_gym.grsim_ssl.Communication.pb.grSim_Replacement_pb2 as grSim__Replacement__pb2 | ||
``` | ||
## Install environments | ||
|
||
```bash | ||
$ pip install -e . | ||
``` | ||
# Available Envs | ||
- **grSimSSLPenalty-v0** | ||
- **grSimSSLShootGoalie-v0** | ||
|
||
# Example code | ||
```python | ||
import gym | ||
import rc_gym | ||
|
||
# Using penalty env | ||
env = gym.make('grSimSSLPenalty-v0') | ||
|
||
env.reset() | ||
# Run for 1 episode and print reward at the end | ||
for i in range(1): | ||
done = False | ||
while not done: | ||
action = env.action_space.sample() | ||
next_state, reward, done, _ = env.step(action) | ||
print(reward) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import random | ||
import numpy as np | ||
|
||
class ReplayBuffer: | ||
def __init__(self, capacity): | ||
self.capacity = capacity | ||
self.buffer = [] | ||
self.position = 0 | ||
|
||
def push(self, state, action, reward, next_state, done): | ||
if len(self.buffer) < self.capacity: | ||
self.buffer.append(None) | ||
self.buffer[self.position] = (state, action, reward, next_state, done) | ||
self.position = (self.position + 1) % self.capacity | ||
|
||
def sample(self, batch_size): | ||
batch = random.sample(self.buffer, batch_size) | ||
state, action, reward, next_state, done = map(np.stack, zip(*batch)) | ||
return state, action, reward, next_state, done | ||
|
||
def __len__(self): | ||
return len(self.buffer) | ||
|
||
class AverageBuffer: | ||
def __init__(self, capacity = 100): | ||
self.capacity = capacity | ||
self.buffer = [] | ||
self.index = 0 | ||
|
||
def push(self, goal): | ||
if len(self.buffer) < self.capacity: | ||
self.buffer.append(None) | ||
|
||
self.buffer[self.index] = goal | ||
self.index = (self.index + 1) % self.capacity | ||
|
||
def average(self): | ||
return np.mean(self.buffer if len(self.buffer) > 0 else [0]) | ||
|
||
def state_dict(self): | ||
buffer_dict = {} | ||
buffer_dict['index'] = self.index | ||
buffer_dict['bufferSize'] = len(self.buffer) | ||
|
||
for i in range(len(self.buffer)): | ||
buffer_dict[i] = self.buffer[i] | ||
|
||
return buffer_dict | ||
|
||
def load_state_dict(self, buffer_dict): | ||
self.index = buffer_dict['index'] | ||
size = buffer_dict['bufferSize'] | ||
|
||
for i in range(size): | ||
self.buffer.append(buffer_dict[i]) | ||
|
||
def __len__(self): | ||
return len(self.buffer) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import torch | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
|
||
|
||
class ValueNetwork(nn.Module): | ||
def __init__(self, num_inputs, num_actions, hidden_size, init_w=3e-3): | ||
super(ValueNetwork, self).__init__() | ||
|
||
self.linear1 = nn.Linear(num_inputs + num_actions, hidden_size) | ||
self.linear2 = nn.Linear(hidden_size, hidden_size) | ||
self.linear3 = nn.Linear(hidden_size, 1) | ||
|
||
self.linear3.weight.data.uniform_(-init_w, init_w) | ||
self.linear3.bias.data.uniform_(-init_w, init_w) | ||
|
||
def forward(self, state, action): | ||
x = torch.cat([state, action], 1) | ||
x = F.relu(self.linear1(x)) | ||
x = F.relu(self.linear2(x)) | ||
x = self.linear3(x) | ||
return x | ||
|
||
|
||
class PolicyNetwork(nn.Module): | ||
def __init__(self, num_inputs, num_actions, hidden_size, device, init_w=3e-3): | ||
super(PolicyNetwork, self).__init__() | ||
|
||
self.linear1 = nn.Linear(num_inputs, hidden_size) | ||
self.linear2 = nn.Linear(hidden_size, hidden_size) | ||
self.linear3 = nn.Linear(hidden_size, num_actions) | ||
|
||
self.linear3.weight.data.uniform_(-init_w, init_w) | ||
self.linear3.bias.data.uniform_(-init_w, init_w) | ||
self.device = device | ||
|
||
def forward(self, state): | ||
x = F.relu(self.linear1(state)) | ||
x = F.relu(self.linear2(x)) | ||
x = torch.tanh(self.linear3(x)) | ||
return x | ||
|
||
def get_action(self, state): | ||
state = torch.FloatTensor(state).unsqueeze(0).to(self.device) | ||
action = self.forward(state) | ||
return action.detach().cpu().numpy()[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import gym | ||
|
||
import numpy as np | ||
|
||
class NormalizedWrapper(gym.Wrapper): | ||
""" | ||
:param env: (gym.Env) Gym environment that will be wrapped | ||
""" | ||
|
||
def __init__(self, env): | ||
# Call the parent constructor, so we can access self.env later | ||
super(NormalizedWrapper, self).__init__(env) | ||
|
||
assert isinstance(self.env.action_space, | ||
gym.spaces.Box), "This wrapper only works with continuous action space (spaces.Box)" | ||
assert isinstance(self.env.observation_space, | ||
gym.spaces.Box), "This wrapper only works with continuous observation space (spaces.Box)" | ||
|
||
# We modify the wrapper action space, so all actions will lie in [-1, 1] | ||
self.action_space = gym.spaces.Box(low=-1, high=1, shape=self.env.action_space.shape, dtype=np.float32) | ||
self.observation_space = gym.spaces.Box(low=-1, high=1, shape=self.env.observation_space.shape, dtype=np.float32) | ||
|
||
|
||
|
||
def rescale_action(self, scaled_action): | ||
""" | ||
Rescale the action from [-1, 1] to [low, high] | ||
(no need for symmetric action space) | ||
:param scaled_action: (np.ndarray) | ||
:return: (np.ndarray) | ||
""" | ||
return self.env.action_space.low + ( | ||
0.5 * (scaled_action + 1.0) * (self.env.action_space.high - self.env.action_space.low)) | ||
|
||
def scale_observation(self, observation): | ||
""" | ||
Scale the observation to bounds [-1, 1] | ||
""" | ||
return (2 * ((observation - self.env.observation_space.low) / | ||
(self.env.observation_space.high - self.env.observation_space.low))) - 1 | ||
|
||
def reset(self): | ||
""" | ||
Reset the environment | ||
""" | ||
# Reset the counter | ||
return self.scale_observation(self.env.reset()) | ||
|
||
def step(self, action): | ||
""" | ||
:param action: ([float] or int) Action taken by the agent | ||
:return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations | ||
""" | ||
# Rescale action from [-1, 1] to original [low, high] interval | ||
rescaled_action = self.rescale_action(action) | ||
obs, reward, done, info = self.env.step(rescaled_action) | ||
return self.scale_observation(obs), reward, done, info | ||
|
||
|
||
class NormalizedActions(gym.ActionWrapper): | ||
|
||
def _action(self, action): | ||
low_bound = self.action_space.low | ||
upper_bound = self.action_space.high | ||
|
||
action = low_bound + (action + 1.0) * 0.5 * (upper_bound - low_bound) | ||
action = np.clip(action, low_bound, upper_bound) | ||
|
||
return action | ||
|
||
def _reverse_action(self, action): | ||
low_bound = self.action_space.low | ||
upper_bound = self.action_space.high | ||
|
||
action = 2 * (action - low_bound) / (upper_bound - low_bound) - 1 | ||
action = np.clip(action, low_bound, upper_bound) | ||
|
||
return action |
Oops, something went wrong.