Skip to content

Commit

Permalink
New game transformation: add noise to terminal utilities.
Browse files Browse the repository at this point in the history
  • Loading branch information
michalsustr authored and sustr-equi committed Mar 2, 2023
1 parent 4bd47a4 commit c968903
Show file tree
Hide file tree
Showing 6 changed files with 382 additions and 0 deletions.
8 changes: 8 additions & 0 deletions open_spiel/game_transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
add_library (game_transforms OBJECT
add_noise.cc
add_noise.h
coop_to_1p.cc
coop_to_1p.h
efg_writer.cc
Expand Down Expand Up @@ -36,6 +38,12 @@ add_executable(misere_test
$<TARGET_OBJECTS:tests>)
add_test(misere_test misere_test)

add_executable(add_noise_test
add_noise_test.cc
${OPEN_SPIEL_OBJECTS}
$<TARGET_OBJECTS:tests>)
add_test(add_noise_test add_noise_test)

add_executable(coop_to_1p_test
coop_to_1p_test.cc
${OPEN_SPIEL_OBJECTS}
Expand Down
127 changes: 127 additions & 0 deletions open_spiel/game_transforms/add_noise.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Copyright 2021 DeepMind Technologies Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "open_spiel/game_transforms/add_noise.h"

#include "open_spiel/spiel.h"

namespace open_spiel {
namespace add_noise {
namespace {

// These parameters are the most-general case. The actual game may be simpler.
const GameType kGameType{
/*short_name=*/"add_noise",
/*long_name=*/"Add noise to terminal utilities.",
GameType::Dynamics::kSequential,
GameType::ChanceMode::kSampledStochastic,
GameType::Information::kImperfectInformation,
GameType::Utility::kGeneralSum,
GameType::RewardModel::kRewards,
/*max_num_players=*/100,
/*min_num_players=*/1,
/*provides_information_state_string=*/true,
/*provides_information_state_tensor=*/true,
/*provides_observation_string=*/true,
/*provides_observation_tensor=*/true,
{{"game", GameParameter(GameParameter::Type::kGame, /*is_mandatory=*/true)},
{"epsilon", GameParameter(1.0, /*is_mandatory=*/true)},
{"seed", GameParameter(1, /*is_mandatory=*/true)}},
/*default_loadable=*/false,
/*provides_factored_observation_string=*/true,
};

std::shared_ptr<const Game> Factory(const GameParameters& params) {
auto game = LoadGame(params.at("game").game_value());
GameType game_type = game->GetType();
// Only terminal reward models are supported.
SPIEL_CHECK_EQ(game_type.reward_model, GameType::RewardModel::kTerminal);

game_type.short_name = kGameType.short_name;
game_type.long_name = absl::StrCat(
"Add noise to",
" game=", game_type.long_name,
" epsilon=", params.at("epsilon").double_value(),
" seed=", params.at("seed").int_value());
return std::make_shared<AddNoiseGame>(game, game_type, params);
}

REGISTER_SPIEL_GAME(kGameType, Factory);

} // namespace

AddNoiseGame::AddNoiseGame(
std::shared_ptr<const Game> game, GameType game_type,
GameParameters game_parameters)
: WrappedGame(game, game_type, game_parameters),
epsilon_(ParameterValue<double>("epsilon")),
rng_(ParameterValue<int>("seed")) {}

std::unique_ptr<State> AddNoiseGame::NewInitialState() const {
return std::make_unique<AddNoiseState>(shared_from_this(),
game_->NewInitialState());
}

double AddNoiseGame::GetNoise(const AddNoiseState& state) {
std::string state_str = state.HistoryString();
auto it = noise_table_.find(state_str);
if (it != noise_table_.end()) {
return it->second;
}

std::uniform_real_distribution<double> dist(-epsilon_, epsilon_);
double noise = dist(rng_);
noise_table_[state_str] = noise;
return noise;
}

double AddNoiseGame::MaxUtility() const {
return WrappedGame::MaxUtility() + epsilon_;
}

double AddNoiseGame::MinUtility() const {
return WrappedGame::MinUtility() - epsilon_;
}

AddNoiseState::AddNoiseState(
std::shared_ptr<const Game> transformed_game, std::unique_ptr<State> state)
: WrappedState(transformed_game, std::move(state)) {}

std::vector<double> AddNoiseState::Returns() const {
std::vector<double> returns = state_->Returns();
SPIEL_CHECK_EQ(returns.size(), 2);

if (state_->IsTerminal()) {
auto const_noise_game = down_cast<const AddNoiseGame*>(game_.get());
AddNoiseGame* noise_game = const_cast<AddNoiseGame*>(const_noise_game);
double noise = noise_game->GetNoise(*this);
returns[0] += noise;
returns[1] -= noise;
}

return returns;
}

std::vector<double> AddNoiseState::Rewards() const {
if (IsTerminal()) {
return Returns();
} else {
SPIEL_CHECK_FALSE(IsChanceNode());
return std::vector<double>(num_players_, 0.0);
}
}


} // namespace add_noise
} // namespace open_spiel
65 changes: 65 additions & 0 deletions open_spiel/game_transforms/add_noise.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright 2021 DeepMind Technologies Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef OPEN_SPIEL_GAME_TRANSFORMS_ADD_NOISE_H_
#define OPEN_SPIEL_GAME_TRANSFORMS_ADD_NOISE_H_

#include <memory>

#include "open_spiel/game_transforms/game_wrapper.h"
#include "open_spiel/spiel.h"
#include "open_spiel/spiel_utils.h"

// Transforms game by adding noise to the original utilities.
//
// The noise is sampled from uniform distribution of [-epsilon, epsilon]
// independently for each terminal history.
// The transformation can be seeded for reproducibility.

namespace open_spiel {
namespace add_noise {

class AddNoiseState : public WrappedState {
public:
AddNoiseState(std::shared_ptr<const Game> game,
std::unique_ptr<State> state);
AddNoiseState(const AddNoiseState& other) = default;
std::unique_ptr<State> Clone() const override {
return std::make_unique<AddNoiseState>(*this);
}
std::vector<double> Returns() const override;
std::vector<double> Rewards() const override;
};

class AddNoiseGame : public WrappedGame {
public:
AddNoiseGame(std::shared_ptr<const Game> game,
GameType game_type, GameParameters game_parameters);
std::unique_ptr<State> NewInitialState() const override;
double GetNoise(const AddNoiseState& state);

double MinUtility() const override;

double MaxUtility() const override;

private:
const double epsilon_;
std::mt19937 rng_;
std::unordered_map<std::string, double> noise_table_;
};

} // namespace add_noise
} // namespace open_spiel

#endif // OPEN_SPIEL_GAME_TRANSFORMS_ADD_NOISE_H_
35 changes: 35 additions & 0 deletions open_spiel/game_transforms/add_noise_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2021 DeepMind Technologies Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "open_spiel/game_transforms/add_noise.h"

#include "open_spiel/spiel.h"
#include "open_spiel/tests/basic_tests.h"

namespace open_spiel {
namespace add_noise {
namespace {

namespace testing = open_spiel::testing;

void BasicTests() {
testing::LoadGameTest("add_noise(epsilon=1.,seed=1,game=kuhn_poker())");
testing::RandomSimTest(*LoadGame("add_noise(epsilon=1.,seed=1,game=kuhn_poker())"), 100);
}

} // namespace
} // namespace add_noise
} // namespace open_spiel

int main(int argc, char** argv) { open_spiel::add_noise::BasicTests(); }
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
game: add_noise(epsilon=1.,seed=1,game=kuhn_poker())

GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC
GameType.dynamics = Dynamics.SEQUENTIAL
GameType.information = Information.IMPERFECT_INFORMATION
GameType.long_name = "Add noise to game=Kuhn Poker epsilon=1 seed=1"
GameType.max_num_players = 10
GameType.min_num_players = 2
GameType.parameter_specification = ["players"]
GameType.provides_information_state_string = True
GameType.provides_information_state_tensor = True
GameType.provides_observation_string = True
GameType.provides_observation_tensor = True
GameType.provides_factored_observation_string = True
GameType.reward_model = RewardModel.TERMINAL
GameType.short_name = "add_noise"
GameType.utility = Utility.ZERO_SUM

NumDistinctActions() = 2
PolicyTensorShape() = [2]
MaxChanceOutcomes() = 3
GetParameters() = {epsilon=1.0,game=kuhn_poker(),seed=1}
NumPlayers() = 2
MinUtility() = -3.0
MaxUtility() = 3.0
UtilitySum() = 0.0
InformationStateTensorShape() = [11]
InformationStateTensorLayout() = TensorLayout.CHW
InformationStateTensorSize() = 11
ObservationTensorShape() = [7]
ObservationTensorLayout() = TensorLayout.CHW
ObservationTensorSize() = 7
MaxGameLength() = 3
ToString() = "add_noise(epsilon=1.0,game=kuhn_poker(),seed=1)"

# State 0
IsTerminal() = False
History() = []
HistoryString() = ""
IsChanceNode() = True
IsSimultaneousNode() = False
CurrentPlayer() = -1
InformationStateString(0) = ""
InformationStateString(1) = ""
InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯
InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯
ObservationString(0) = ""
ObservationString(1) = ""
ObservationTensor(0): ◉◯◯◯◯◉◉
ObservationTensor(1): ◯◉◯◯◯◉◉
ChanceOutcomes() = [(0, 0.3333333333333333), (1, 0.3333333333333333), (2, 0.3333333333333333)]
LegalActions() = [0, 1, 2]
StringLegalActions() = ["Deal:0", "Deal:1", "Deal:2"]

# Apply action "Deal:2"
action: 2

# State 1
# 2
IsTerminal() = False
History() = [2]
HistoryString() = "2"
IsChanceNode() = True
IsSimultaneousNode() = False
CurrentPlayer() = -1
InformationStateString(0) = "2"
InformationStateString(1) = ""
InformationStateTensor(0): ◉◯◯◯◉◯◯◯◯◯◯
InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯
ObservationString(0) = "211"
ObservationString(1) = ""
ObservationTensor(0): ◉◯◯◯◉◉◉
ObservationTensor(1): ◯◉◯◯◯◉◉
ChanceOutcomes() = [(0, 0.5), (1, 0.5)]
LegalActions() = [0, 1]
StringLegalActions() = ["Deal:0", "Deal:1"]

# Apply action "Deal:1"
action: 1

# State 2
# 2 1
IsTerminal() = False
History() = [2, 1]
HistoryString() = "2, 1"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
InformationStateString(0) = "2"
InformationStateString(1) = "1"
InformationStateTensor(0): ◉◯◯◯◉◯◯◯◯◯◯
InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯
ObservationString(0) = "211"
ObservationString(1) = "111"
ObservationTensor(0): ◉◯◯◯◉◉◉
ObservationTensor(1): ◯◉◯◉◯◉◉
Rewards() = [0, 0]
Returns() = [0, 0]
LegalActions() = [0, 1]
StringLegalActions() = ["Pass", "Bet"]

# Apply action "Bet"
action: 1

# State 3
# 2 1 b
IsTerminal() = False
History() = [2, 1, 1]
HistoryString() = "2, 1, 1"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 1
InformationStateString(0) = "2b"
InformationStateString(1) = "1b"
InformationStateTensor(0): ◉◯◯◯◉◯◉◯◯◯◯
InformationStateTensor(1): ◯◉◯◉◯◯◉◯◯◯◯
ObservationString(0) = "221"
ObservationString(1) = "121"
ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0]
ObservationTensor(1) = [0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0]
Rewards() = [0, 0]
Returns() = [0, 0]
LegalActions() = [0, 1]
StringLegalActions() = ["Pass", "Bet"]

# Apply action "Pass"
action: 0

# State 4
# 2 1 bp
IsTerminal() = True
History() = [2, 1, 1, 0]
HistoryString() = "2, 1, 1, 0"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = -4
InformationStateString(0) = "2bp"
InformationStateString(1) = "1bp"
InformationStateTensor(0): ◉◯◯◯◉◯◉◉◯◯◯
InformationStateTensor(1): ◯◉◯◉◯◯◉◉◯◯◯
ObservationString(0) = "221"
ObservationString(1) = "121"
ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0]
ObservationTensor(1) = [0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0]
Rewards() = [1.99436961646053, -1.99436961646053]
Returns() = [1.99436961646053, -1.99436961646053]
Loading

0 comments on commit c968903

Please sign in to comment.