New game transformation: add noise to terminal utilities.

sandguine · Mar 2, 2023 · c968903 · c968903
1 parent 4bd47a4
commit c968903
Show file tree

Hide file tree

Showing 6 changed files with 382 additions and 0 deletions.
diff --git a/open_spiel/game_transforms/CMakeLists.txt b/open_spiel/game_transforms/CMakeLists.txt
@@ -1,4 +1,6 @@
 add_library (game_transforms OBJECT
+  add_noise.cc
+  add_noise.h
   coop_to_1p.cc
   coop_to_1p.h
   efg_writer.cc
@@ -36,6 +38,12 @@ add_executable(misere_test
                $<TARGET_OBJECTS:tests>)
 add_test(misere_test misere_test)
 
+add_executable(add_noise_test
+               add_noise_test.cc
+               ${OPEN_SPIEL_OBJECTS}
+               $<TARGET_OBJECTS:tests>)
+add_test(add_noise_test add_noise_test)
+
 add_executable(coop_to_1p_test
                coop_to_1p_test.cc
                ${OPEN_SPIEL_OBJECTS}

diff --git a/open_spiel/game_transforms/add_noise.cc b/open_spiel/game_transforms/add_noise.cc
@@ -0,0 +1,127 @@
+// Copyright 2021 DeepMind Technologies Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "open_spiel/game_transforms/add_noise.h"
+
+#include "open_spiel/spiel.h"
+
+namespace open_spiel {
+namespace add_noise {
+namespace {
+
+// These parameters are the most-general case. The actual game may be simpler.
+const GameType kGameType{
+        /*short_name=*/"add_noise",
+        /*long_name=*/"Add noise to terminal utilities.",
+        GameType::Dynamics::kSequential,
+        GameType::ChanceMode::kSampledStochastic,
+        GameType::Information::kImperfectInformation,
+        GameType::Utility::kGeneralSum,
+        GameType::RewardModel::kRewards,
+        /*max_num_players=*/100,
+        /*min_num_players=*/1,
+        /*provides_information_state_string=*/true,
+        /*provides_information_state_tensor=*/true,
+        /*provides_observation_string=*/true,
+        /*provides_observation_tensor=*/true,
+        {{"game", GameParameter(GameParameter::Type::kGame, /*is_mandatory=*/true)},
+         {"epsilon", GameParameter(1.0, /*is_mandatory=*/true)},
+         {"seed", GameParameter(1, /*is_mandatory=*/true)}},
+        /*default_loadable=*/false,
+        /*provides_factored_observation_string=*/true,
+};
+
+std::shared_ptr<const Game> Factory(const GameParameters& params) {
+  auto game = LoadGame(params.at("game").game_value());
+  GameType game_type = game->GetType();
+  // Only terminal reward models are supported.
+  SPIEL_CHECK_EQ(game_type.reward_model, GameType::RewardModel::kTerminal);
+
+  game_type.short_name = kGameType.short_name;
+  game_type.long_name = absl::StrCat(
+          "Add noise to",
+          " game=", game_type.long_name,
+          " epsilon=", params.at("epsilon").double_value(),
+          " seed=", params.at("seed").int_value());
+  return std::make_shared<AddNoiseGame>(game, game_type, params);
+}
+
+REGISTER_SPIEL_GAME(kGameType, Factory);
+
+}  // namespace
+
+AddNoiseGame::AddNoiseGame(
+        std::shared_ptr<const Game> game, GameType game_type,
+        GameParameters game_parameters)
+        : WrappedGame(game, game_type, game_parameters),
+          epsilon_(ParameterValue<double>("epsilon")),
+          rng_(ParameterValue<int>("seed")) {}
+
+std::unique_ptr<State> AddNoiseGame::NewInitialState() const {
+  return std::make_unique<AddNoiseState>(shared_from_this(),
+                                         game_->NewInitialState());
+}
+
+double AddNoiseGame::GetNoise(const AddNoiseState& state) {
+  std::string state_str = state.HistoryString();
+  auto it = noise_table_.find(state_str);
+  if (it != noise_table_.end()) {
+    return it->second;
+  }
+
+  std::uniform_real_distribution<double> dist(-epsilon_, epsilon_);
+  double noise = dist(rng_);
+  noise_table_[state_str] = noise;
+  return noise;
+}
+
+double AddNoiseGame::MaxUtility() const {
+  return WrappedGame::MaxUtility() + epsilon_;
+}
+
+double AddNoiseGame::MinUtility() const {
+  return WrappedGame::MinUtility() - epsilon_;
+}
+
+AddNoiseState::AddNoiseState(
+        std::shared_ptr<const Game> transformed_game, std::unique_ptr<State> state)
+        : WrappedState(transformed_game, std::move(state)) {}
+
+std::vector<double> AddNoiseState::Returns() const {
+  std::vector<double> returns = state_->Returns();
+  SPIEL_CHECK_EQ(returns.size(), 2);
+
+  if (state_->IsTerminal()) {
+    auto const_noise_game = down_cast<const AddNoiseGame*>(game_.get());
+    AddNoiseGame* noise_game = const_cast<AddNoiseGame*>(const_noise_game);
+    double noise = noise_game->GetNoise(*this);
+    returns[0] += noise;
+    returns[1] -= noise;
+  }
+
+  return returns;
+}
+
+std::vector<double> AddNoiseState::Rewards() const {
+  if (IsTerminal()) {
+    return Returns();
+  } else {
+    SPIEL_CHECK_FALSE(IsChanceNode());
+    return std::vector<double>(num_players_, 0.0);
+  }
+}
+
+
+}  // namespace add_noise
+}  // namespace open_spiel
diff --git a/open_spiel/game_transforms/add_noise.h b/open_spiel/game_transforms/add_noise.h
@@ -0,0 +1,65 @@
+// Copyright 2021 DeepMind Technologies Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef OPEN_SPIEL_GAME_TRANSFORMS_ADD_NOISE_H_
+#define OPEN_SPIEL_GAME_TRANSFORMS_ADD_NOISE_H_
+
+#include <memory>
+
+#include "open_spiel/game_transforms/game_wrapper.h"
+#include "open_spiel/spiel.h"
+#include "open_spiel/spiel_utils.h"
+
+// Transforms game by adding noise to the original utilities.
+//
+// The noise is sampled from uniform distribution of [-epsilon, epsilon]
+// independently for each terminal history.
+// The transformation can be seeded for reproducibility.
+
+namespace open_spiel {
+namespace add_noise {
+
+class AddNoiseState : public WrappedState {
+ public:
+  AddNoiseState(std::shared_ptr<const Game> game,
+                std::unique_ptr<State> state);
+  AddNoiseState(const AddNoiseState& other) = default;
+  std::unique_ptr<State> Clone() const override {
+    return std::make_unique<AddNoiseState>(*this);
+  }
+  std::vector<double> Returns() const override;
+  std::vector<double> Rewards() const override;
+};
+
+class AddNoiseGame : public WrappedGame {
+ public:
+  AddNoiseGame(std::shared_ptr<const Game> game,
+               GameType game_type, GameParameters game_parameters);
+  std::unique_ptr<State> NewInitialState() const override;
+  double GetNoise(const AddNoiseState& state);
+
+  double MinUtility() const override;
+
+  double MaxUtility() const override;
+
+private:
+  const double epsilon_;
+  std::mt19937 rng_;
+  std::unordered_map<std::string, double> noise_table_;
+};
+
+}  // namespace add_noise
+}  // namespace open_spiel
+
+#endif  // OPEN_SPIEL_GAME_TRANSFORMS_ADD_NOISE_H_
diff --git a/open_spiel/game_transforms/add_noise_test.cc b/open_spiel/game_transforms/add_noise_test.cc
@@ -0,0 +1,35 @@
+// Copyright 2021 DeepMind Technologies Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "open_spiel/game_transforms/add_noise.h"
+
+#include "open_spiel/spiel.h"
+#include "open_spiel/tests/basic_tests.h"
+
+namespace open_spiel {
+namespace add_noise {
+namespace {
+
+namespace testing = open_spiel::testing;
+
+void BasicTests() {
+  testing::LoadGameTest("add_noise(epsilon=1.,seed=1,game=kuhn_poker())");
+  testing::RandomSimTest(*LoadGame("add_noise(epsilon=1.,seed=1,game=kuhn_poker())"), 100);
+}
+
+}  // namespace
+}  // namespace add_noise
+}  // namespace open_spiel
+
+int main(int argc, char** argv) { open_spiel::add_noise::BasicTests(); }
diff --git a/open_spiel/integration_tests/playthroughs/add_noise(epsilon=1.,seed=1,game=kuhn_poker()).txt b/open_spiel/integration_tests/playthroughs/add_noise(epsilon=1.,seed=1,game=kuhn_poker()).txt
@@ -0,0 +1,146 @@
+game: add_noise(epsilon=1.,seed=1,game=kuhn_poker())
+
+GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC
+GameType.dynamics = Dynamics.SEQUENTIAL
+GameType.information = Information.IMPERFECT_INFORMATION
+GameType.long_name = "Add noise to game=Kuhn Poker epsilon=1 seed=1"
+GameType.max_num_players = 10
+GameType.min_num_players = 2
+GameType.parameter_specification = ["players"]
+GameType.provides_information_state_string = True
+GameType.provides_information_state_tensor = True
+GameType.provides_observation_string = True
+GameType.provides_observation_tensor = True
+GameType.provides_factored_observation_string = True
+GameType.reward_model = RewardModel.TERMINAL
+GameType.short_name = "add_noise"
+GameType.utility = Utility.ZERO_SUM
+
+NumDistinctActions() = 2
+PolicyTensorShape() = [2]
+MaxChanceOutcomes() = 3
+GetParameters() = {epsilon=1.0,game=kuhn_poker(),seed=1}
+NumPlayers() = 2
+MinUtility() = -3.0
+MaxUtility() = 3.0
+UtilitySum() = 0.0
+InformationStateTensorShape() = [11]
+InformationStateTensorLayout() = TensorLayout.CHW
+InformationStateTensorSize() = 11
+ObservationTensorShape() = [7]
+ObservationTensorLayout() = TensorLayout.CHW
+ObservationTensorSize() = 7
+MaxGameLength() = 3
+ToString() = "add_noise(epsilon=1.0,game=kuhn_poker(),seed=1)"
+
+# State 0
+IsTerminal() = False
+History() = []
+HistoryString() = ""
+IsChanceNode() = True
+IsSimultaneousNode() = False
+CurrentPlayer() = -1
+InformationStateString(0) = ""
+InformationStateString(1) = ""
+InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯
+InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯
+ObservationString(0) = ""
+ObservationString(1) = ""
+ObservationTensor(0): ◉◯◯◯◯◉◉
+ObservationTensor(1): ◯◉◯◯◯◉◉
+ChanceOutcomes() = [(0, 0.3333333333333333), (1, 0.3333333333333333), (2, 0.3333333333333333)]
+LegalActions() = [0, 1, 2]
+StringLegalActions() = ["Deal:0", "Deal:1", "Deal:2"]
+
+# Apply action "Deal:2"
+action: 2
+
+# State 1
+# 2
+IsTerminal() = False
+History() = [2]
+HistoryString() = "2"
+IsChanceNode() = True
+IsSimultaneousNode() = False
+CurrentPlayer() = -1
+InformationStateString(0) = "2"
+InformationStateString(1) = ""
+InformationStateTensor(0): ◉◯◯◯◉◯◯◯◯◯◯
+InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯
+ObservationString(0) = "211"
+ObservationString(1) = ""
+ObservationTensor(0): ◉◯◯◯◉◉◉
+ObservationTensor(1): ◯◉◯◯◯◉◉
+ChanceOutcomes() = [(0, 0.5), (1, 0.5)]
+LegalActions() = [0, 1]
+StringLegalActions() = ["Deal:0", "Deal:1"]
+
+# Apply action "Deal:1"
+action: 1
+
+# State 2
+# 2 1
+IsTerminal() = False
+History() = [2, 1]
+HistoryString() = "2, 1"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = 0
+InformationStateString(0) = "2"
+InformationStateString(1) = "1"
+InformationStateTensor(0): ◉◯◯◯◉◯◯◯◯◯◯
+InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯
+ObservationString(0) = "211"
+ObservationString(1) = "111"
+ObservationTensor(0): ◉◯◯◯◉◉◉
+ObservationTensor(1): ◯◉◯◉◯◉◉
+Rewards() = [0, 0]
+Returns() = [0, 0]
+LegalActions() = [0, 1]
+StringLegalActions() = ["Pass", "Bet"]
+
+# Apply action "Bet"
+action: 1
+
+# State 3
+# 2 1 b
+IsTerminal() = False
+History() = [2, 1, 1]
+HistoryString() = "2, 1, 1"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = 1
+InformationStateString(0) = "2b"
+InformationStateString(1) = "1b"
+InformationStateTensor(0): ◉◯◯◯◉◯◉◯◯◯◯
+InformationStateTensor(1): ◯◉◯◉◯◯◉◯◯◯◯
+ObservationString(0) = "221"
+ObservationString(1) = "121"
+ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0]
+ObservationTensor(1) = [0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0]
+Rewards() = [0, 0]
+Returns() = [0, 0]
+LegalActions() = [0, 1]
+StringLegalActions() = ["Pass", "Bet"]
+
+# Apply action "Pass"
+action: 0
+
+# State 4
+# 2 1 bp
+IsTerminal() = True
+History() = [2, 1, 1, 0]
+HistoryString() = "2, 1, 1, 0"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = -4
+InformationStateString(0) = "2bp"
+InformationStateString(1) = "1bp"
+InformationStateTensor(0): ◉◯◯◯◉◯◉◉◯◯◯
+InformationStateTensor(1): ◯◉◯◉◯◯◉◉◯◯◯
+ObservationString(0) = "221"
+ObservationString(1) = "121"
+ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0]
+ObservationTensor(1) = [0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0]
+Rewards() = [1.99436961646053, -1.99436961646053]
+Returns() = [1.99436961646053, -1.99436961646053]