Merge branch 'master' into master

sandguine · Apr 15, 2023 · f91ebf8 · f91ebf8
2 parents 3dbd78e + 77aca74
commit f91ebf8
Show file tree

Hide file tree

Showing 264 changed files with 10,781 additions and 788 deletions.
diff --git a/.gitignore b/.gitignore
@@ -34,6 +34,7 @@ open_spiel/libnop/libnop/
 open_spiel/games/bridge/double_dummy_solver/
 open_spiel/games/universal_poker/double_dummy_solver/
 open_spiel/games/hanabi/hanabi-learning-environment/
+/open_spiel/pybind11_abseil/
 pybind11/
 
 # Install artifacts

diff --git a/docs/algorithms.md b/docs/algorithms.md
@@ -16,6 +16,7 @@ Lemke-Howson (via <tt>nashpy</tt>)                 | Opt.         | [Wikipedia](
 ADIDAS                                             | Opt.         | [Gemp et al '22](https://arxiv.org/abs/2106.01285)                                                                                                                                                                                                                                                              | <font color="orange"><b>~</b></font>
 Sequence-form linear programming                   | Opt.         | [Koller, Megiddo, and von Stengel '94](http://theory.stanford.edu/~megiddo/pdf/stoc94.pdf), <br> [Shoham &amp; Leyton-Brown '09](http://masfoundations.org/)                                                                                                                                                    | ![](_static/green_circ10.png "green circle")
 Stackelberg equilibrium solver                     | Opt.         | [Conitzer &amp; Sandholm '06](https://users.cs.duke.edu/~conitzer/commitEC06.pdf)                                                                                                                                                                                                                               | <font color="orange"><b>~</b></font>
+MIP-Nash                                           | Opt.         | [Sandholm et al. '05](https://dl.acm.org/doi/10.5555/1619410.1619413)                                                                                                                                                                                                                                           | <font color="orange"><b>~</b></font>
 Magnetic Mirror Descent (MMD) with dilated entropy | Opt.         | [Sokota et al. '22](https://arxiv.org/abs/2206.05825)                                                                                                                                                                                                                                                           | <font color="orange"><b>~</b></font>
 Counterfactual Regret Minimization (CFR)           | Tabular      | [Zinkevich et al '08](https://poker.cs.ualberta.ca/publications/NIPS07-cfr.pdf), [Neller &amp; Lanctot '13](http://modelai.gettysburg.edu/2013/cfr/cfr.pdf)                                                                                                                                                     | ![](_static/green_circ10.png "green circle")
 CFR against a best responder (CFR-BR)              | Tabular      | [Johanson et al '12](https://poker.cs.ualberta.ca/publications/AAAI12-cfrbr.pdf)                                                                                                                                                                                                                                | ![](_static/green_circ10.png "green circle")

diff --git a/docs/developer_guide.md b/docs/developer_guide.md
@@ -72,9 +72,13 @@ ideal to first be aware of the general API (see `spiel.h`).
     `NewGameState` to reflect your new game’s logic. Most API functions should
     be clear from the game you copied from. If not, each API function that is
     overridden will be fully documented in superclasses in `spiel.h`.
-8.  Once done, rebuild and rerun the tests to ensure everything passes
+8.  Run your code through a linter so it conforms to Google's
+    [style guides](https://google.github.io/styleguide/). For C++ and Python
+    use [cpplint](https://pypi.org/project/cpplint/). There is also
+    [YAPF](https://github.com/google/yapf/) for Python as well.
+9.  Once done, rebuild and rerun the tests to ensure everything passes
     (including your new game’s test!).
-9.  Add a playthrough file to catch regressions:
+10. Add a playthrough file to catch regressions:
     *   Run `./open_spiel/scripts/generate_new_playthrough.sh new_game` to
         generate a random game, to be used by integration tests to prevent any
         regression. `open_spiel/integration_tests/playthrough_test.py` will

diff --git a/docs/games.md b/docs/games.md
@@ -16,6 +16,7 @@ Status                                       | Game
 <font color="orange"><b>~</b></font>         | [Bargaining](#bargaining)
 <font color="orange"><b>~</b></font>         | [Battleship](#battleship)
 <font color="orange"><b>~</b></font>         | [Blackjack](#blackjack)
+<font color="orange"><b>~</b></font>         | [Block Dominoes](#block-dominoes)
 ![](_static/green_circ10.png "green circle") | [Breakthrough](#breakthrough)
 ![](_static/green_circ10.png "green circle") | [Bridge](#bridge)
 ![](_static/green_circ10.png "green circle") | [(Uncontested) Bridge bidding](#uncontested-bridge-bidding)
@@ -28,6 +29,7 @@ Status                                       | Game
 ![](_static/green_circ10.png "green circle") | [Connect Four](#connect-four)
 <font color="orange"><b>~</b></font>         | [Cooperative Box-Pushing](#cooperative-box-pushing)
 ![](_static/green_circ10.png "green circle") | [Chess](#chess)
+<font color="orange"><b>~</b></font>         | [Crazy Eights](#crazy-eights)
 <font color="orange"><b>~</b></font>         | [Dark Hex](#dark-hex)
 <font color="orange"><b>~</b></font>         | [Deep Sea](#deep-sea)
 <font color="orange"><b>~</b></font>         | [Dou Dizhu](#dou-dizhu)
@@ -159,6 +161,17 @@ Status                                       | Game
 *   1 player.
 *   [Wikipedia](https://en.wikipedia.org/wiki/Blackjack)
 
+### Block Dominoes
+
+*   Most simple version of dominoes.
+*   Consists of 28 tiles, featuring all combinations of spot counts (also called
+    pips or dots) between zero and six.
+*   Traditional game.
+*   Non-deterministic.
+*   Imperfect information.
+*   2 players.
+*   [Wikipedia]([https://en.wikipedia.org/wiki/Blackjack]\(https://en.wikipedia.org/wiki/Dominoes#Blocking_game\))
+
 ### Breakthrough
 
 *   Simplified chess using only pawns.
@@ -292,6 +305,18 @@ Status                                       | Game
 *   2 players.
 *   [Wikipedia](https://en.wikipedia.org/wiki/Chess)
 
+### Crazy Eights
+
+*   A precursor of UNO (see [here](https://www.unorules.org/crazy-eights/)).
+*   Players try to match the rank or suit of the previous played card.
+*   Eights are viewed as wild cards.
+*   In an alternative version, special cards such as skip, reverse, draw-two are
+    permitted.
+*   Nondeterministic.
+*   Imperfect information.
+*   >=2 players.
+*   [Wikipedia](https://en.wikipedia.org/wiki/Crazy_Eights)
+
 ### Dark Hex
 
 *   Hex, except the opponent's tokens are hidden. (Imperfect-information
@@ -321,7 +346,7 @@ Status                                       | Game
 *   Non-deterministic.
 *   Imperfect information.
 *   Three players.
-*   [Wikipeda](https://en.wikipedia.org/wiki/Dou_dizhu)
+*   [Wikipedia](https://en.wikipedia.org/wiki/Dou_dizhu)
 
 ### Euchre
 
@@ -487,7 +512,6 @@ Status                                       | Game
 *   2 players.
 *   [Wikipedia](https://en.wikipedia.org/wiki/Liar%27s_dice)
 
-
 ### Liar's Poker
 
 *   Players bid and bluff on the state of all hands, given only the state of
@@ -499,7 +523,6 @@ Status                                       | Game
 *   2 or more players.
 *   [Wikipedia](https://en.wikipedia.org/wiki/Liar%27s_poker)
 
-
 ### Mensch Aergere Dich Nicht 
 
 *   Players roll dice to move their pegs toward their home row while throwing
@@ -510,7 +533,6 @@ Status                                       | Game
 *   2-4 players.
 *   [Wikipedia](https://en.wikipedia.org/wiki/Mensch_%C3%A4rgere_Dich_nicht)
 
-
 ### Mancala
 
 *   Players take turns sowing beans on the board and try to capture more beans

diff --git a/docs/install.md b/docs/install.md
@@ -118,7 +118,7 @@ In a nutshell:
     ```
 
     Additionally, if you intend to use one of the
-    [optional Python dependencies](open_spiel/scripts/python_extra_deps.sh), you
+    optional Python dependencies (see [open_spiel/scripts/install.sh](https://github.com/deepmind/open_spiel/blob/master/open_spiel/scripts/install.sh)), you
     must manually install and/or upgrade them, e.g.: `bash pip install --upgrade
     torch==x.xx.x jax==x.x.x` where `x.xx.x` should be the desired version
     numbers (which can be found at the link above).
@@ -280,7 +280,7 @@ python3 -m pip install --upgrade -r requirements.txt
 
 ##### Optional dependencies
 
-Additionally, if you intend to use one of the [optional Python dependencies](open_spiel/scripts/python_extra_deps.sh), you must manually install and/or upgrade them. The installation scripts will not install or upgrade these dependencies. e.g.:
+Additionally, if you intend to use one of the optional Python dependencies (see [open_spiel/scripts/install.sh](https://github.com/deepmind/open_spiel/blob/master/open_spiel/scripts/install.sh)), you must manually install and/or upgrade them. The installation scripts will not install or upgrade these dependencies. e.g.:
 
 ```bash
 python3 -m pip install --upgrade torch==x.xx.x jax==x.x.x

diff --git a/docs/windows.md b/docs/windows.md
@@ -16,7 +16,7 @@ any bugs or problems you encounter.
 
 This option will describe how to install and use OpenSpiel on Windows 10 via
 [Visual Studio Community Edition](https://visualstudio.microsoft.com/vs/community/).
-This process has been written for Windows 10 and tested on Windos 10 Home
+This process has been written for Windows 10 and tested on Windows 10 Home
 Version 20H2, build 19042.1415 (installed on Nov 26th, 2021).
 
 When installing Visual Studio, enable the C++ and Python development, and also
@@ -29,7 +29,7 @@ You will need to have the following dependencies installed:
 *   [git](https://gitforwindows.org/)
 *   [Python](https://www.python.org/downloads/windows/). Note: get the latest
     3.9 release as OpenSpiel has not been tested on 3.10 yet. Also, tick the box
-    during instalation to ensure Python executable is in your path.
+    during installation to ensure Python executable is in your path.
 *   Recommended: Windows Terminal / Powershell.
 
 The rest of the instructions will assume that OpenSpiel is cloned in
@@ -174,7 +174,7 @@ This process has been written for Windows 10, and tested on Windows 10 build
     directory and the `open_spiel` directory.
 
     When using a virtualenv, the following should be added to
-    `<virtualenv>/bin/activate`. For a system-wide install, ddd it in your
+    `<virtualenv>/bin/activate`. For a system-wide install, add it in your
     `.bashrc` or `.profile`.
 
     ```bash
@@ -186,7 +186,7 @@ This process has been written for Windows 10, and tested on Windows 10 build
 
 9.  Running the first example
 
-    In the `build` directory, running `examples/example` will prints out a list
+    In the `build` directory, running `examples/example` will print out a list
     of registered games and the usage. Now, let’s play game of Tic-Tac-Toe with
     uniform random players:
 

diff --git a/open_spiel/algorithms/alpha_zero_torch/README.md b/open_spiel/algorithms/alpha_zero_torch/README.md
@@ -7,6 +7,10 @@ To build and use this implementation, you must set the optional global variables
 `OPEN_SPIEL_BUILD_WITH_LIBTORCH` and `OPEN_SPIEL_BUILD_WITH_LIBNOP` to `ON` when
 installing dependencies and building OpenSpiel.
 
+**Note**: Note: there are currently known problems with the C++ PyTorch:
+inteferences with pybind11 versions. Until it is properly fixed, please see
+[the workaround described here](https://github.com/deepmind/open_spiel/issues/966#issuecomment-1322982393).
+
 Then, to get started, see `examples/alpha_zero_torch_example.cc`.
 
 Important note: this implementation was a user contribution (see

diff --git a/open_spiel/algorithms/expected_returns.cc b/open_spiel/algorithms/expected_returns.cc
@@ -20,6 +20,7 @@
 
 #include "open_spiel/simultaneous_move_game.h"
 #include "open_spiel/spiel.h"
+#include "open_spiel/spiel_utils.h"
 
 namespace open_spiel {
 namespace algorithms {
@@ -101,12 +102,17 @@ std::vector<double> ExpectedReturnsImpl(
       SpielFatalError("Error in ExpectedReturnsImpl; infostate not found.");
     }
     values = state.Rewards();
+    float total_prob = 0.0;
     for (const Action action : state.LegalActions()) {
       std::unique_ptr<State> child = state.Child(action);
+      // GetProb can return -1 for legal actions not in the policy. We treat
+      // these as having zero probability, but check that at least some actions
+      // have positive probability.
       double action_prob = GetProb(state_policy, action);
-      SPIEL_CHECK_GE(action_prob, 0.0);
       SPIEL_CHECK_LE(action_prob, 1.0);
       if (action_prob > prob_cut_threshold) {
+        SPIEL_CHECK_GE(action_prob, 0.0);
+        total_prob += action_prob;
         std::vector<double> child_values =
             ExpectedReturnsImpl(
                 *child, policy_func, depth_limit - 1, prob_cut_threshold);
@@ -115,6 +121,9 @@ std::vector<double> ExpectedReturnsImpl(
         }
       }
     }
+    // Check that there is a least some positive mass on at least one action.
+    // Consider using: SPIEL_CHECK_FLOAT_EQ(total_prob, 1.0);
+    SPIEL_CHECK_GT(total_prob, 0.0);
   }
   SPIEL_CHECK_EQ(values.size(), state.NumPlayers());
   return values;

diff --git a/open_spiel/algorithms/expected_returns.h b/open_spiel/algorithms/expected_returns.h
@@ -29,6 +29,9 @@ namespace algorithms {
 // prob_cut_threshold > 0 will cut the tree search if the reach probability
 // goes below this value resulting in an approximate return.
 //
+// Policies need not be complete; any missing legal actions will be assumed to
+// have zero probability.
+//
 // The second overloaded function acts the same way, except assumes that all of
 // the players' policies are encapsulated in one joint policy.
 //

diff --git a/open_spiel/algorithms/tabular_best_response_mdp.cc b/open_spiel/algorithms/tabular_best_response_mdp.cc
@@ -22,7 +22,6 @@
 
 #include "open_spiel/abseil-cpp/absl/algorithm/container.h"
 #include "open_spiel/abseil-cpp/absl/memory/memory.h"
-#include "open_spiel/abseil-cpp/absl/strings/str_cat.h"
 #include "open_spiel/algorithms/expected_returns.h"
 #include "open_spiel/policy.h"
 #include "open_spiel/simultaneous_move_game.h"
@@ -404,7 +403,7 @@ TabularBestResponseMDPInfo TabularBestResponseMDP::Exploitability() {
   TabularBestResponseMDPInfo br_info = ComputeBestResponses();
   br_info.nash_conv = absl::c_accumulate(br_info.br_values, 0.0);
   br_info.exploitability =
-      (br_info.nash_conv - game_.UtilitySum()) / num_players_;
+      (br_info.nash_conv - *game_.UtilitySum()) / num_players_;
   return br_info;
 }
 

diff --git a/open_spiel/algorithms/tabular_exploitability.cc b/open_spiel/algorithms/tabular_exploitability.cc
@@ -20,7 +20,6 @@
 
 #include "open_spiel/algorithms/best_response.h"
 #include "open_spiel/algorithms/expected_returns.h"
-#include "open_spiel/algorithms/history_tree.h"
 #include "open_spiel/policy.h"
 #include "open_spiel/spiel.h"
 #include "open_spiel/spiel_utils.h"
@@ -44,7 +43,7 @@ double Exploitability(const Game& game, const Policy& policy) {
     TabularBestResponse best_response(game, i, &policy);
     nash_conv += best_response.Value(*root);
   }
-  return (nash_conv - game.UtilitySum()) / game.NumPlayers();
+  return (nash_conv - *game.UtilitySum()) / game.NumPlayers();
 }
 
 double Exploitability(

diff --git a/open_spiel/colabs/deep_cfr_pytorch.ipynb b/open_spiel/colabs/deep_cfr_pytorch.ipynb
@@ -354,7 +354,8 @@
         "      return state.returns()[player]\n",
         "    elif state.is_chance_node():\n",
         "      # If this is a chance node, sample an action\n",
-        "      action = np.random.choice([i[0] for i in state.chance_outcomes()])\n",
+        "      chance_outcome, chance_proba = zip(*state.chance_outcomes())\n",
+        "      action = np.random.choice(chance_outcome, p=chance_proba)\n",
         "      return self._traverse_game_tree(state.child(action), player)\n",
         "    elif state.current_player() == player:\n",
         "      sampled_regret = collections.defaultdict(float)\n",

diff --git a/open_spiel/data/paper_data/pbe_rrps/README.md b/open_spiel/data/paper_data/pbe_rrps/README.md
@@ -0,0 +1,6 @@
+The `bot_table_file.txt` is a data set described in
+[Population-based Evaluation in Repeated RPS as a Benchmark for Multiagent RL](https://arxiv.org/abs/2303.03196)
+and parsed by `python/examples/roshambo_population_example.py`. 
+
+It contains a cross-table of the expected values for all possible match-ups
+between the 43 RRPS bots, using an average of 1000 games per cell.