From e62a0a9ec69fc712fb2f0320cfad860731b5bedf Mon Sep 17 00:00:00 2001 From: William Wong Date: Tue, 6 Dec 2022 01:51:52 -0500 Subject: [PATCH 01/13] Working initial liars poker file structure --- .../playthroughs/python_liars_poker.txt | 240 ++++++++++++++++++ open_spiel/python/CMakeLists.txt | 1 + open_spiel/python/games/__init__.py | 1 + open_spiel/python/games/liars_poker.py | 196 ++++++++++++++ open_spiel/python/games/liars_poker_test.py | 111 ++++++++ open_spiel/python/tests/pyspiel_test.py | 1 + 6 files changed, 550 insertions(+) create mode 100644 open_spiel/integration_tests/playthroughs/python_liars_poker.txt create mode 100644 open_spiel/python/games/liars_poker.py create mode 100644 open_spiel/python/games/liars_poker_test.py diff --git a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt new file mode 100644 index 0000000000..5a36ef7cc7 --- /dev/null +++ b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt @@ -0,0 +1,240 @@ +game: liars_poker + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Python Liars Poker" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "python_liars_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 3, 3] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 27 +MaxGameLength() = 9 +ToString() = "liars_poker()" + +# State 0 +# ... +# ... +# ... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "...\n...\n..." +ObservationString(1) = "...\n...\n..." +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"] + +# Apply action "x(2,2)" +action: 8 + +# State 1 +# ... +# ... +# ..x +IsTerminal() = False +History() = [8] +HistoryString() = "8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "8" +InformationStateString(1) = "8" +ObservationString(0) = "...\n...\n..x" +ObservationString(1) = "...\n...\n..x" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)"] + +# Apply action "o(1,0)" +action: 3 + +# State 2 +# ... +# o.. +# ..x +IsTerminal() = False +History() = [8, 3] +HistoryString() = "8, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "8, 3" +InformationStateString(1) = "8, 3" +ObservationString(0) = "...\no..\n..x" +ObservationString(1) = "...\no..\n..x" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 4, 5, 6, 7] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)"] + +# Apply action "x(2,0)" +action: 6 + +# State 3 +# ... +# o.. +# x.x +IsTerminal() = False +History() = [8, 3, 6] +HistoryString() = "8, 3, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "8, 3, 6" +InformationStateString(1) = "8, 3, 6" +ObservationString(0) = "...\no..\nx.x" +ObservationString(1) = "...\no..\nx.x" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 4, 5, 7] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,1)", "o(1,2)", "o(2,1)"] + +# Apply action "o(0,0)" +action: 0 + +# State 4 +# o.. +# o.. +# x.x +IsTerminal() = False +History() = [8, 3, 6, 0] +HistoryString() = "8, 3, 6, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "8, 3, 6, 0" +InformationStateString(1) = "8, 3, 6, 0" +ObservationString(0) = "o..\no..\nx.x" +ObservationString(1) = "o..\no..\nx.x" +ObservationTensor(0): +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 4, 5, 7] +StringLegalActions() = ["x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,1)"] + +# Apply action "x(0,2)" +action: 2 + +# State 5 +# o.x +# o.. +# x.x +IsTerminal() = False +History() = [8, 3, 6, 0, 2] +HistoryString() = "8, 3, 6, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "8, 3, 6, 0, 2" +InformationStateString(1) = "8, 3, 6, 0, 2" +ObservationString(0) = "o.x\no..\nx.x" +ObservationString(1) = "o.x\no..\nx.x" +ObservationTensor(0): +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 4, 5, 7] +StringLegalActions() = ["o(0,1)", "o(1,1)", "o(1,2)", "o(2,1)"] + +# Apply action "o(0,1)" +action: 1 + +# State 6 +# Apply action "x(1,2)" +action: 5 + +# State 7 +# oox +# o.x +# x.x +IsTerminal() = True +History() = [8, 3, 6, 0, 2, 1, 5] +HistoryString() = "8, 3, 6, 0, 2, 1, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "8, 3, 6, 0, 2, 1, 5" +InformationStateString(1) = "8, 3, 6, 0, 2, 1, 5" +ObservationString(0) = "oox\no.x\nx.x" +ObservationString(1) = "oox\no.x\nx.x" +ObservationTensor(0): +◯◯◯ ◉◉◯ ◯◯◉ +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◯◯◯ ◉◉◯ ◯◯◉ +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/open_spiel/python/CMakeLists.txt b/open_spiel/python/CMakeLists.txt index 544ce4f734..720c923659 100644 --- a/open_spiel/python/CMakeLists.txt +++ b/open_spiel/python/CMakeLists.txt @@ -221,6 +221,7 @@ set(PYTHON_TESTS ${PYTHON_TESTS} games/data_test.py games/dynamic_routing_test.py games/dynamic_routing_utils_test.py + games/liars_poker_test.py games/tic_tac_toe_test.py mfg/algorithms/best_response_value_test.py mfg/algorithms/mirror_descent_test.py diff --git a/open_spiel/python/games/__init__.py b/open_spiel/python/games/__init__.py index c5c8bb7c97..959e315ea7 100644 --- a/open_spiel/python/games/__init__.py +++ b/open_spiel/python/games/__init__.py @@ -30,3 +30,4 @@ from open_spiel.python.games import iterated_prisoners_dilemma from open_spiel.python.games import kuhn_poker from open_spiel.python.games import tic_tac_toe +from open_spiel.python.games import liars_poker \ No newline at end of file diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py new file mode 100644 index 0000000000..3fd8765d11 --- /dev/null +++ b/open_spiel/python/games/liars_poker.py @@ -0,0 +1,196 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tic tac toe (noughts and crosses), implemented in Python. + +This is a demonstration of implementing a deterministic perfect-information +game in Python. + +Python games are significantly slower than C++, but it may still be suitable +for prototyping or for small games. + +It is possible to run C++ algorithms on Python implemented games, This is likely +to have good performance if the algorithm simply extracts a game tree and then +works with that (e.g. CFR algorithms). It is likely to be poor if the algorithm +relies on processing and updating states as it goes, e.g. MCTS. +""" + +import numpy as np + +from open_spiel.python.observation import IIGObserverForPublicInfoGame +import pyspiel + +_NUM_PLAYERS = 2 +_NUM_ROWS = 3 +_NUM_COLS = 3 +_NUM_CELLS = _NUM_ROWS * _NUM_COLS +_GAME_TYPE = pyspiel.GameType( + short_name="python_liars_poker", + long_name="Python Liars Poker", + dynamics=pyspiel.GameType.Dynamics.SEQUENTIAL, + chance_mode=pyspiel.GameType.ChanceMode.DETERMINISTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.ZERO_SUM, + reward_model=pyspiel.GameType.RewardModel.TERMINAL, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=True, + provides_information_state_tensor=False, + provides_observation_string=True, + provides_observation_tensor=True, + parameter_specification={}) +_GAME_INFO = pyspiel.GameInfo( + num_distinct_actions=_NUM_CELLS, + max_chance_outcomes=0, + num_players=2, + min_utility=-1.0, + max_utility=1.0, + utility_sum=0.0, + max_game_length=_NUM_CELLS) + + +class LiarsPoker(pyspiel.Game): + """A Python version of the Tic-Tac-Toe game.""" + + def __init__(self, params=None): + super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return LiarsPokerState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + if ((iig_obs_type is None) or + (iig_obs_type.public_info and not iig_obs_type.perfect_recall)): + return BoardObserver(params) + else: + return IIGObserverForPublicInfoGame(iig_obs_type, params) + + +class LiarsPokerState(pyspiel.State): + """A python version of the Tic-Tac-Toe state.""" + + def __init__(self, game): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self._cur_player = 0 + self._player0_score = 0.0 + self._is_terminal = False + self.board = np.full((_NUM_ROWS, _NUM_COLS), ".") + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every perfect-information sequential-move game. + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + return pyspiel.PlayerId.TERMINAL if self._is_terminal else self._cur_player + + def _legal_actions(self, player): + """Returns a list of legal actions, sorted in ascending order.""" + return [a for a in range(_NUM_CELLS) if self.board[_coord(a)] == "."] + + def _apply_action(self, action): + """Applies the specified action to the state.""" + self.board[_coord(action)] = "x" if self._cur_player == 0 else "o" + if _line_exists(self.board): + self._is_terminal = True + self._player0_score = 1.0 if self._cur_player == 0 else -1.0 + elif all(self.board.ravel() != "."): + self._is_terminal = True + else: + self._cur_player = 1 - self._cur_player + + def _action_to_string(self, player, action): + """Action -> string.""" + row, col = _coord(action) + return "{}({},{})".format("x" if player == 0 else "o", row, col) + + def is_terminal(self): + """Returns True if the game is over.""" + return self._is_terminal + + def returns(self): + """Total reward for each player over the course of the game so far.""" + return [self._player0_score, -self._player0_score] + + def __str__(self): + """String for debug purposes. No particular semantics are required.""" + return _board_to_string(self.board) + + +class BoardObserver: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, params): + """Initializes an empty observation tensor.""" + if params: + raise ValueError(f"Observation parameters not supported; passed {params}") + # The observation should contain a 1-D tensor in `self.tensor` and a + # dictionary of views onto the tensor, which may be of any shape. + # Here the observation is indexed `(cell state, row, column)`. + shape = (1 + _NUM_PLAYERS, _NUM_ROWS, _NUM_COLS) + self.tensor = np.zeros(np.prod(shape), np.float32) + self.dict = {"observation": np.reshape(self.tensor, shape)} + + def set_from(self, state, player): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + del player + # We update the observation via the shaped tensor since indexing is more + # convenient than with the 1-D tensor. Both are views onto the same memory. + obs = self.dict["observation"] + obs.fill(0) + for row in range(_NUM_ROWS): + for col in range(_NUM_COLS): + cell_state = ".ox".index(state.board[row, col]) + obs[cell_state, row, col] = 1 + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + del player + return _board_to_string(state.board) + + +# Helper functions for game details. + + +def _line_value(line): + """Checks a possible line, returning the winning symbol if any.""" + if all(line == "x") or all(line == "o"): + return line[0] + + +def _line_exists(board): + """Checks if a line exists, returns "x" or "o" if so, and None otherwise.""" + return (_line_value(board[0]) or _line_value(board[1]) or + _line_value(board[2]) or _line_value(board[:, 0]) or + _line_value(board[:, 1]) or _line_value(board[:, 2]) or + _line_value(board.diagonal()) or + _line_value(np.fliplr(board).diagonal())) + + +def _coord(move): + """Returns (row, col) from an action id.""" + return (move // _NUM_COLS, move % _NUM_COLS) + + +def _board_to_string(board): + """Returns a string representation of the board.""" + return "\n".join("".join(row) for row in board) + + +# Register the game with the OpenSpiel library + +pyspiel.register_game(_GAME_TYPE, LiarsPoker) diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py new file mode 100644 index 0000000000..9905b29822 --- /dev/null +++ b/open_spiel/python/games/liars_poker_test.py @@ -0,0 +1,111 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Tic-Tac-Toe.""" + +import difflib +import os +import pickle + +from absl.testing import absltest +import numpy as np +from open_spiel.python.algorithms.get_all_states import get_all_states +from open_spiel.python.games import liars_poker +from open_spiel.python.observation import make_observation +import pyspiel + +_DATA_DIR = "open_spiel/integration_tests/playthroughs/" + + +class TicTacToeTest(absltest.TestCase): + + def test_can_create_game_and_state(self): + """Checks we can create the game and a state.""" + game = liars_poker.LiarsPoker() + state = game.new_initial_state() + self.assertEqual(str(state), "...\n...\n...") + + def test_random_game(self): + """Tests basic API functions.""" + # This is here mostly to show the API by example. + # More serious simulation tests are done in python/tests/games_sim_test.py + # and in test_game_from_cc (below), both of which test the conformance to + # the API thoroughly. + game = liars_poker.LiarsPoker() + state = game.new_initial_state() + while not state.is_terminal(): + print(state) + cur_player = state.current_player() + legal_actions = state.legal_actions() + action = np.random.choice(legal_actions) + print("Player {} chooses action {}".format(cur_player, action)) + state.apply_action(action) + print(state) + print("Returns: {}".format(state.returns())) + + def test_game_from_cc(self): + """Runs our standard game tests, checking API consistency.""" + game = pyspiel.load_game("python_liars_poker") + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + def test_observation_tensors_same(self): + """Checks observation tensor is the same from C++ and from Python.""" + game = pyspiel.load_game("python_liars_poker") + state = game.new_initial_state() + for a in [4, 5, 2, 3]: + state.apply_action(a) + py_obs = make_observation(game) + py_obs.set_from(state, state.current_player()) + cc_obs = state.observation_tensor() + np.testing.assert_array_equal(py_obs.tensor, cc_obs) + + def test_pickle(self): + """Checks pickling and unpickling of game and state.""" + game = pyspiel.load_game("python_liars_poker") + pickled_game = pickle.dumps(game) + unpickled_game = pickle.loads(pickled_game) + self.assertEqual(str(game), str(unpickled_game)) + state = game.new_initial_state() + for a in [4, 2, 3, 7]: + state.apply_action(a) + ser_str = pyspiel.serialize_game_and_state(game, state) + new_game, new_state = pyspiel.deserialize_game_and_state(ser_str) + self.assertEqual(str(game), str(new_game)) + self.assertEqual(str(state), str(new_state)) + pickled_state = pickle.dumps(state) + unpickled_state = pickle.loads(pickled_state) + self.assertEqual(str(state), str(unpickled_state)) + + def test_cloned_state_matches_original_state(self): + """Check we can clone states successfully.""" + game = liars_poker.LiarsPoker() + state = game.new_initial_state() + state.apply_action(1) + state.apply_action(2) + clone = state.clone() + + self.assertEqual(state.history(), clone.history()) + self.assertEqual(state.num_players(), clone.num_players()) + self.assertEqual(state.move_number(), clone.move_number()) + self.assertEqual(state.num_distinct_actions(), clone.num_distinct_actions()) + + self.assertEqual(state._cur_player, clone._cur_player) + self.assertEqual(state._player0_score, clone._player0_score) + self.assertEqual(state._is_terminal, clone._is_terminal) + np.testing.assert_array_equal(state.board, clone.board) + + +if __name__ == "__main__": + absltest.main() diff --git a/open_spiel/python/tests/pyspiel_test.py b/open_spiel/python/tests/pyspiel_test.py index 0018f24ca1..200cc79428 100644 --- a/open_spiel/python/tests/pyspiel_test.py +++ b/open_spiel/python/tests/pyspiel_test.py @@ -106,6 +106,7 @@ "python_mfg_predator_prey", "python_kuhn_poker", "python_tic_tac_toe", + "python_liars_poker", "quoridor", "repeated_game", "rbc", From c7848f8a0e82b99b1b2fa88b3142452e9e1799f0 Mon Sep 17 00:00:00 2001 From: William Wong Date: Tue, 6 Dec 2022 02:14:00 -0500 Subject: [PATCH 02/13] Liars poker game framework --- open_spiel/python/games/liars_poker.py | 186 ++++++++++--------------- 1 file changed, 72 insertions(+), 114 deletions(-) diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py index 3fd8765d11..dd62f4e3c1 100644 --- a/open_spiel/python/games/liars_poker.py +++ b/open_spiel/python/games/liars_poker.py @@ -13,56 +13,48 @@ # limitations under the License. # Lint as python3 -"""Tic tac toe (noughts and crosses), implemented in Python. +"""Liar's Poker implemented in Python.""" -This is a demonstration of implementing a deterministic perfect-information -game in Python. - -Python games are significantly slower than C++, but it may still be suitable -for prototyping or for small games. - -It is possible to run C++ algorithms on Python implemented games, This is likely -to have good performance if the algorithm simply extracts a game tree and then -works with that (e.g. CFR algorithms). It is likely to be poor if the algorithm -relies on processing and updating states as it goes, e.g. MCTS. -""" +import enum import numpy as np -from open_spiel.python.observation import IIGObserverForPublicInfoGame import pyspiel + +class Action(enum.IntEnum): + BID = 0 + CHALLENGE = 1 + _NUM_PLAYERS = 2 -_NUM_ROWS = 3 -_NUM_COLS = 3 -_NUM_CELLS = _NUM_ROWS * _NUM_COLS +_HAND_LENGTH = 3 +_NUM_DIGITS = 3 # Number of digits to include from the range 1, 2, ..., 9, 0 _GAME_TYPE = pyspiel.GameType( short_name="python_liars_poker", long_name="Python Liars Poker", dynamics=pyspiel.GameType.Dynamics.SEQUENTIAL, - chance_mode=pyspiel.GameType.ChanceMode.DETERMINISTIC, - information=pyspiel.GameType.Information.PERFECT_INFORMATION, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.IMPERFECT_INFORMATION, utility=pyspiel.GameType.Utility.ZERO_SUM, reward_model=pyspiel.GameType.RewardModel.TERMINAL, max_num_players=_NUM_PLAYERS, min_num_players=_NUM_PLAYERS, provides_information_state_string=True, provides_information_state_tensor=False, - provides_observation_string=True, + provides_observation_string=False, provides_observation_tensor=True, - parameter_specification={}) + parameter_specification={ + "players": _NUM_PLAYERS, + "hand_length": _HAND_LENGTH, + "num_digits": _NUM_DIGITS + }) _GAME_INFO = pyspiel.GameInfo( - num_distinct_actions=_NUM_CELLS, - max_chance_outcomes=0, - num_players=2, - min_utility=-1.0, - max_utility=1.0, - utility_sum=0.0, - max_game_length=_NUM_CELLS) - + num_distinct_actions=len(Action), + max_chance_outcomes=_HAND_LENGTH * _NUM_DIGITS, + num_players=_NUM_PLAYERS) class LiarsPoker(pyspiel.Game): - """A Python version of the Tic-Tac-Toe game.""" + """A Python version of Liar's poker.""" def __init__(self, params=None): super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) @@ -73,123 +65,89 @@ def new_initial_state(self): def make_py_observer(self, iig_obs_type=None, params=None): """Returns an object used for observing game state.""" - if ((iig_obs_type is None) or - (iig_obs_type.public_info and not iig_obs_type.perfect_recall)): - return BoardObserver(params) - else: - return IIGObserverForPublicInfoGame(iig_obs_type, params) + return LiarsPokerObserver( + iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), + params) class LiarsPokerState(pyspiel.State): - """A python version of the Tic-Tac-Toe state.""" + """A python version of the Liars Poker state.""" def __init__(self, game): """Constructor; should only be called by Game.new_initial_state.""" super().__init__(game) - self._cur_player = 0 - self._player0_score = 0.0 - self._is_terminal = False - self.board = np.full((_NUM_ROWS, _NUM_COLS), ".") - - # OpenSpiel (PySpiel) API functions are below. This is the standard set that - # should be implemented by every perfect-information sequential-move game. def current_player(self): """Returns id of the next player to move, or TERMINAL if game is over.""" - return pyspiel.PlayerId.TERMINAL if self._is_terminal else self._cur_player + if self._game_over: + return pyspiel.PlayerId.TERMINAL + elif len(self.cards) < _NUM_PLAYERS: + return pyspiel.PlayerId.CHANCE + else: + return self._next_player def _legal_actions(self, player): """Returns a list of legal actions, sorted in ascending order.""" - return [a for a in range(_NUM_CELLS) if self.board[_coord(a)] == "."] + assert player >= 0 + return [Action.PASS, Action.BET] + + def chance_outcomes(self): + """Returns the possible chance outcomes and their probabilities.""" + assert self.is_chance_node() + outcomes = sorted(_DECK - set(self.cards)) + p = 1.0 / len(outcomes) + return [(o, p) for o in outcomes] def _apply_action(self, action): """Applies the specified action to the state.""" - self.board[_coord(action)] = "x" if self._cur_player == 0 else "o" - if _line_exists(self.board): - self._is_terminal = True - self._player0_score = 1.0 if self._cur_player == 0 else -1.0 - elif all(self.board.ravel() != "."): - self._is_terminal = True + if self.is_chance_node(): + self.cards.append(action) else: - self._cur_player = 1 - self._cur_player + self.bets.append(action) + if action == Action.BET: + self.pot[self._next_player] += 1 + self._next_player = 1 - self._next_player + if ((min(self.pot) == 2) or + (len(self.bets) == 2 and action == Action.PASS) or + (len(self.bets) == 3)): + self._game_over = True def _action_to_string(self, player, action): """Action -> string.""" - row, col = _coord(action) - return "{}({},{})".format("x" if player == 0 else "o", row, col) + if player == pyspiel.PlayerId.CHANCE: + return f"Deal:{action}" + elif action == Action.PASS: + return "Pass" + else: + return "Bet" def is_terminal(self): """Returns True if the game is over.""" - return self._is_terminal + return self._game_over def returns(self): """Total reward for each player over the course of the game so far.""" - return [self._player0_score, -self._player0_score] + pot = self.pot + winnings = float(min(pot)) + if not self._game_over: + return [0., 0.] + elif pot[0] > pot[1]: + return [winnings, -winnings] + elif pot[0] < pot[1]: + return [-winnings, winnings] + elif self.cards[0] > self.cards[1]: + return [winnings, -winnings] + else: + return [-winnings, winnings] def __str__(self): """String for debug purposes. No particular semantics are required.""" - return _board_to_string(self.board) + return "".join([str(c) for c in self.cards] + ["pb"[b] for b in self.bets]) -class BoardObserver: +class LiarsPokerObserver: """Observer, conforming to the PyObserver interface (see observation.py).""" - - def __init__(self, params): - """Initializes an empty observation tensor.""" - if params: - raise ValueError(f"Observation parameters not supported; passed {params}") - # The observation should contain a 1-D tensor in `self.tensor` and a - # dictionary of views onto the tensor, which may be of any shape. - # Here the observation is indexed `(cell state, row, column)`. - shape = (1 + _NUM_PLAYERS, _NUM_ROWS, _NUM_COLS) - self.tensor = np.zeros(np.prod(shape), np.float32) - self.dict = {"observation": np.reshape(self.tensor, shape)} - - def set_from(self, state, player): - """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" - del player - # We update the observation via the shaped tensor since indexing is more - # convenient than with the 1-D tensor. Both are views onto the same memory. - obs = self.dict["observation"] - obs.fill(0) - for row in range(_NUM_ROWS): - for col in range(_NUM_COLS): - cell_state = ".ox".index(state.board[row, col]) - obs[cell_state, row, col] = 1 - - def string_from(self, state, player): - """Observation of `state` from the PoV of `player`, as a string.""" - del player - return _board_to_string(state.board) - - -# Helper functions for game details. - - -def _line_value(line): - """Checks a possible line, returning the winning symbol if any.""" - if all(line == "x") or all(line == "o"): - return line[0] - - -def _line_exists(board): - """Checks if a line exists, returns "x" or "o" if so, and None otherwise.""" - return (_line_value(board[0]) or _line_value(board[1]) or - _line_value(board[2]) or _line_value(board[:, 0]) or - _line_value(board[:, 1]) or _line_value(board[:, 2]) or - _line_value(board.diagonal()) or - _line_value(np.fliplr(board).diagonal())) - - -def _coord(move): - """Returns (row, col) from an action id.""" - return (move // _NUM_COLS, move % _NUM_COLS) - - -def _board_to_string(board): - """Returns a string representation of the board.""" - return "\n".join("".join(row) for row in board) - + raise NotImplementedError() # Register the game with the OpenSpiel library From 7bb105cb0499b716c9630bec864129e87f123b6a Mon Sep 17 00:00:00 2001 From: William Wong Date: Tue, 6 Dec 2022 15:52:09 -0500 Subject: [PATCH 03/13] Current player, legal actions, chance outcomes logic --- .../playthroughs/python_liars_poker.txt | 4 +- open_spiel/python/games/liars_poker.py | 46 +++++++++++++++---- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt index 5a36ef7cc7..033397928d 100644 --- a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt +++ b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt @@ -8,8 +8,8 @@ GameType.max_num_players = 2 GameType.min_num_players = 2 GameType.parameter_specification = [] GameType.provides_information_state_string = True -GameType.provides_information_state_tensor = False -GameType.provides_observation_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False GameType.provides_observation_tensor = True GameType.provides_factored_observation_string = False GameType.reward_model = RewardModel.TERMINAL diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py index dd62f4e3c1..c3e85d84e8 100644 --- a/open_spiel/python/games/liars_poker.py +++ b/open_spiel/python/games/liars_poker.py @@ -29,6 +29,9 @@ class Action(enum.IntEnum): _NUM_PLAYERS = 2 _HAND_LENGTH = 3 _NUM_DIGITS = 3 # Number of digits to include from the range 1, 2, ..., 9, 0 +_FULL_DECK = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] +_DECK = [_FULL_DECK[i] for i in range(_NUM_DIGITS)] + _GAME_TYPE = pyspiel.GameType( short_name="python_liars_poker", long_name="Python Liars Poker", @@ -40,7 +43,7 @@ class Action(enum.IntEnum): max_num_players=_NUM_PLAYERS, min_num_players=_NUM_PLAYERS, provides_information_state_string=True, - provides_information_state_tensor=False, + provides_information_state_tensor=True, provides_observation_string=False, provides_observation_tensor=True, parameter_specification={ @@ -76,27 +79,52 @@ class LiarsPokerState(pyspiel.State): def __init__(self, game): """Constructor; should only be called by Game.new_initial_state.""" super().__init__(game) + self.hands = [] # List containing the hands for each player, represented as a list. + self._current_player = 0 + self._current_bid = -1 def current_player(self): - """Returns id of the next player to move, or TERMINAL if game is over.""" - if self._game_over: + """Returns id of the current player to act. + + The id is: + - TERMINAL if game is over. + - CHANCE if a player is drawing a number to fill out their hand. + - a number otherwise. + """ + if self._is_terminal: return pyspiel.PlayerId.TERMINAL - elif len(self.cards) < _NUM_PLAYERS: + elif len(self.hands) < _NUM_PLAYERS or len(self.hands[_NUM_PLAYERS - 1]) < _HAND_LENGTH: return pyspiel.PlayerId.CHANCE else: - return self._next_player + return self._current_player + + def _is_call_possible(self): + raise NotImplementedError() + + def _is_challenge_possible(self): + raise NotImplementedError() def _legal_actions(self, player): """Returns a list of legal actions, sorted in ascending order.""" assert player >= 0 - return [Action.PASS, Action.BET] + actions = [] + # Any move higher than the current bid is allowed. (Bids start at 0) + for b in range(self._current_bid + 1, _HAND_LENGTH * _NUM_PLAYERS): + actions.append(b) + + if self._is_call_possible(): + actions.append(Action.BID) + # TODO: verify Action.BID is not the same as the nubmer 0. + if self._is_challenge_possible(): + actions.append(Action.CHALLENGE) + # TODO: add game logic for when all players challenge - automatically count + return actions def chance_outcomes(self): """Returns the possible chance outcomes and their probabilities.""" assert self.is_chance_node() - outcomes = sorted(_DECK - set(self.cards)) - p = 1.0 / len(outcomes) - return [(o, p) for o in outcomes] + probability = 1.0 / len(_DECK) + return [(digit, probability) for digit in _DECK] def _apply_action(self, action): """Applies the specified action to the state.""" From 762902d2cdefcdddad3e047478b56cc725c88c7b Mon Sep 17 00:00:00 2001 From: William Wong Date: Fri, 9 Dec 2022 20:42:33 -0500 Subject: [PATCH 04/13] Apply action, counts and reward logic --- open_spiel/python/games/liars_poker.py | 132 ++++++++++++++++++------- 1 file changed, 95 insertions(+), 37 deletions(-) diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py index c3e85d84e8..b798ef5d60 100644 --- a/open_spiel/python/games/liars_poker.py +++ b/open_spiel/python/games/liars_poker.py @@ -30,7 +30,6 @@ class Action(enum.IntEnum): _HAND_LENGTH = 3 _NUM_DIGITS = 3 # Number of digits to include from the range 1, 2, ..., 9, 0 _FULL_DECK = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] -_DECK = [_FULL_DECK[i] for i in range(_NUM_DIGITS)] _GAME_TYPE = pyspiel.GameType( short_name="python_liars_poker", @@ -47,9 +46,9 @@ class Action(enum.IntEnum): provides_observation_string=False, provides_observation_tensor=True, parameter_specification={ - "players": _NUM_PLAYERS, + "num_players": _NUM_PLAYERS, "hand_length": _HAND_LENGTH, - "num_digits": _NUM_DIGITS + "num_digits": _NUM_DIGITS, }) _GAME_INFO = pyspiel.GameInfo( num_distinct_actions=len(Action), @@ -61,6 +60,7 @@ class LiarsPoker(pyspiel.Game): def __init__(self, params=None): super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) + self.deck = [_FULL_DECK[i] for i in range(_NUM_DIGITS)] def new_initial_state(self): """Returns a state corresponding to the start of a game.""" @@ -79,9 +79,24 @@ class LiarsPokerState(pyspiel.State): def __init__(self, game): """Constructor; should only be called by Game.new_initial_state.""" super().__init__(game) - self.hands = [] # List containing the hands for each player, represented as a list. + # Game attributes + self._num_players = game.num_players + self._hand_length = game.hand_length + self._num_digits = game.num_digits + self._deck = game.deck + self.hands = [[] for _ in range(self._num_players)] + + # Action dynamics self._current_player = 0 + self._bid_originator = 0 self._current_bid = -1 + self._num_challenges = 0 + self._is_rebid = False + + # Game over dynamics + self._game_over = False + self._winner = -1 + self._loser = -1 def current_player(self): """Returns id of the current player to act. @@ -93,28 +108,25 @@ def current_player(self): """ if self._is_terminal: return pyspiel.PlayerId.TERMINAL - elif len(self.hands) < _NUM_PLAYERS or len(self.hands[_NUM_PLAYERS - 1]) < _HAND_LENGTH: + elif len(self.hands[self._num_players - 1]) < self._hand_length: return pyspiel.PlayerId.CHANCE else: return self._current_player - def _is_call_possible(self): - raise NotImplementedError() - def _is_challenge_possible(self): - raise NotImplementedError() + return self._current_bid != -1 + + def _is_rebid_possible(self): + return self._num_challenges == self._num_players - 1 def _legal_actions(self, player): """Returns a list of legal actions, sorted in ascending order.""" assert player >= 0 actions = [] # Any move higher than the current bid is allowed. (Bids start at 0) - for b in range(self._current_bid + 1, _HAND_LENGTH * _NUM_PLAYERS): + for b in range(self._current_bid + 1, self._num_digits * self._hand_length * self._num_players): actions.append(b) - if self._is_call_possible(): - actions.append(Action.BID) - # TODO: verify Action.BID is not the same as the nubmer 0. if self._is_challenge_possible(): actions.append(Action.CHALLENGE) # TODO: add game logic for when all players challenge - automatically count @@ -123,29 +135,78 @@ def _legal_actions(self, player): def chance_outcomes(self): """Returns the possible chance outcomes and their probabilities.""" assert self.is_chance_node() - probability = 1.0 / len(_DECK) - return [(digit, probability) for digit in _DECK] + probability = 1.0 / self._num_digits + return [(digit, probability) for digit in self._deck] + + def _decode_bid(self, bid): + """ + Turns a bid ID in the range 0 to NUM_DIGITS * HAND_LENGTH * NUM_PLAYERS to a count and number. + + For example, take 2 players each with 2 numbers from the deck of 1, 2, and 3. + - A bid of two 1's would correspond to a bid id 1. + - Explanation: 1 is the lowest number, and the only lower bid would be zero 1's. + - A bid of three 3's would correspond to a bid id 10. + - Explanation: 1-4 1's take bid ids 0-3. 1-4 2's take bid ids 4-7. 1 and 2 3's take bid ids 8 and 9. + + Returns a tuple of (count, number). For example, (1, 2) represents one 2's. + """ + count = bid % (self._hand_length * self._num_players) + number = self._deck[bid // (self._hand_length * self._num_players)] + return (count, number) + + def _counts(self): + """ + Determines if the bid originator wins or loses. + """ + bid_count, bid_number = self._decode_bid(self._current_bid) + + # Count the number of bid_numbers from all players. + matches = 0 + for player_id in range(self._num_players): + for digit in self.hands[player_id]: + if digit == bid_number: + matches += 1 + + # If the number of matches are at least the bid_count bid, then the bidder wins. + # Otherwise everyone else wins. + if matches >= bid_count: + self._winner = self._bid_originator + else: + self._loser = self._bid_originator def _apply_action(self, action): """Applies the specified action to the state.""" if self.is_chance_node(): - self.cards.append(action) - else: - self.bets.append(action) - if action == Action.BET: - self.pot[self._next_player] += 1 - self._next_player = 1 - self._next_player - if ((min(self.pot) == 2) or - (len(self.bets) == 2 and action == Action.PASS) or - (len(self.bets) == 3)): + # If we are still populating hands, draw a number for the current player. + self.hands[self._current_player].append(action) + elif action == Action.CHALLENGE: + assert self._is_challenge_possible() + self._num_challenges += 1 + # If there is no ongoing rebid, check if all players challenge before counting. + # If there is an ongoing rebid, count once all the players except the bidder challenges. + if (not self._is_rebid and self._num_challenges == self._num_players) or ( + self._is_rebid and self._num_challenges == self._num_players - 1): + # TODO: counts self._game_over = True + else: + # Set the current bid and bid originator to the action and current player. + self._current_bid = action + self._bid_originator = self._current_player + # If all players but the bid originator have chllenged but the originator bids again, we have a rebid. + if self._num_challenges == self._num_players - 1: + self._is_rebid = True + else: + # Otherwise, we have a regular bid. + self._is_rebid = False + self._num_challenges = 0 + self._current_player = (self._current_player + 1) % self._num_players def _action_to_string(self, player, action): """Action -> string.""" if player == pyspiel.PlayerId.CHANCE: return f"Deal:{action}" - elif action == Action.PASS: - return "Pass" + elif action == Action.CHALLENGE: + return "Challenge" else: return "Bet" @@ -155,20 +216,17 @@ def is_terminal(self): def returns(self): """Total reward for each player over the course of the game so far.""" - pot = self.pot - winnings = float(min(pot)) - if not self._game_over: - return [0., 0.] - elif pot[0] > pot[1]: - return [winnings, -winnings] - elif pot[0] < pot[1]: - return [-winnings, winnings] - elif self.cards[0] > self.cards[1]: - return [winnings, -winnings] + if self._winner != -1: + bidder_reward = self._num_players - 1 + others_reward = -1. else: - return [-winnings, winnings] + bidder_reward = - self._num_players - 1 + others_reward = 1. + return [others_reward if player_id != self._bid_originator else bidder_reward + for player_id in range(self._num_players)] def __str__(self): + # TODO """String for debug purposes. No particular semantics are required.""" return "".join([str(c) for c in self.cards] + ["pb"[b] for b in self.bets]) From da690fa307ff8898345779e70cc6e2052cba25bb Mon Sep 17 00:00:00 2001 From: William Wong Date: Sun, 18 Dec 2022 01:16:03 -0800 Subject: [PATCH 05/13] Liars poker observer --- open_spiel/python/games/liars_poker.py | 78 +++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 7 deletions(-) diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py index b798ef5d60..99cfaa127b 100644 --- a/open_spiel/python/games/liars_poker.py +++ b/open_spiel/python/games/liars_poker.py @@ -70,7 +70,9 @@ def make_py_observer(self, iig_obs_type=None, params=None): """Returns an object used for observing game state.""" return LiarsPokerObserver( iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), - params) + _NUM_PLAYERS, + _HAND_LENGTH, + _NUM_DIGITS) class LiarsPokerState(pyspiel.State): @@ -80,6 +82,7 @@ def __init__(self, game): """Constructor; should only be called by Game.new_initial_state.""" super().__init__(game) # Game attributes + # TODO: need to verify have access to these game attributes. self._num_players = game.num_players self._hand_length = game.hand_length self._num_digits = game.num_digits @@ -87,6 +90,7 @@ def __init__(self, game): self.hands = [[] for _ in range(self._num_players)] # Action dynamics + self.actions = [[] for _ in range(self._num_players)] self._current_player = 0 self._bid_originator = 0 self._current_bid = -1 @@ -179,25 +183,25 @@ def _apply_action(self, action): if self.is_chance_node(): # If we are still populating hands, draw a number for the current player. self.hands[self._current_player].append(action) + return elif action == Action.CHALLENGE: + self.actions[self._current_player].append(action) assert self._is_challenge_possible() self._num_challenges += 1 # If there is no ongoing rebid, check if all players challenge before counting. # If there is an ongoing rebid, count once all the players except the bidder challenges. if (not self._is_rebid and self._num_challenges == self._num_players) or ( self._is_rebid and self._num_challenges == self._num_players - 1): - # TODO: counts + self._counts() self._game_over = True else: + self.actions[self._current_player].append(action) # Set the current bid and bid originator to the action and current player. self._current_bid = action self._bid_originator = self._current_player # If all players but the bid originator have chllenged but the originator bids again, we have a rebid. if self._num_challenges == self._num_players - 1: self._is_rebid = True - else: - # Otherwise, we have a regular bid. - self._is_rebid = False self._num_challenges = 0 self._current_player = (self._current_player + 1) % self._num_players @@ -228,12 +232,72 @@ def returns(self): def __str__(self): # TODO """String for debug purposes. No particular semantics are required.""" - return "".join([str(c) for c in self.cards] + ["pb"[b] for b in self.bets]) + return "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {}, Rebid: {}".format( + self.hands, + self._bid_originator, + self.current_player(), + self._current_bid, + self._is_rebid) class LiarsPokerObserver: """Observer, conforming to the PyObserver interface (see observation.py).""" - raise NotImplementedError() + + def __init__(self, iig_obs_type, num_players, hand_length, num_digits): + """Initiliazes an empty observation tensor.""" + self.num_players = num_players + self.hand_length = hand_length + + # Determine which observation pieces we want to include. + # Pieces is a list of tuples containing observation pieces. + # Pieces are described by their (name, number of elements, and shape). + pieces = [("player", num_players, (num_players,))] # One-hot encoding for the player id. + if iig_obs_type.private_info == pyspiel.PrivateInfoType.SINGLE_PLAYER: + # One-hot encoding for each digit in a player's hand + pieces.append(("private_hand", hand_length * num_digits, (hand_length, num_digits))) + if iig_obs_type.public_info: + if iig_obs_type.perfect_recall: + # One-hot encoding for a player's moves at every round. + total_possible_rounds = num_players * hand_length * num_digits + num_actions = 2 + pieces.append(("action_history", + total_possible_rounds * num_actions, + (total_possible_rounds, num_actions))) + + # Build the single flat tensor. + total_size = sum(size for name, size, shape in pieces) + self.tensor = np.zeros(total_size, np.float32) + + # Build the named & reshaped views of the bits of the flat tensor. + self.dict = {} + index = 0 + for name, size, shape in pieces: + self.dict[name] = self.tensor[index:index + size].reshape(shape) + index += size + + def set_from(self, state, player): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + self.tensor.fill(0) + if "player" in self.dict: + self.dict["player"][player] = 1 + if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length: + for i in range(len(state.hands[player])): + self.dict["private_hand"][i][state.hands[player][i]] = 1 + if "action_history" in self.dict: + for round, action in enumerate(state.actions[player]): + self.dict["action_history"][round, action] = 1 + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + pieces = [] + if "player" in self.dict: + pieces.append(f"p{player}") + if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length: + pieces.append(f"hand:{state.hands[player]}") + if "action_history" in self.dict and state.actions[player]: + # bc = bid, challenge. b is 0 or 1 and indexes into "bc" to stringify the action. + pieces.append("".join("bc"[b] for b in state.actions[player])) + return " ".join(str(p) for p in pieces) # Register the game with the OpenSpiel library From 06de6fbe95834fb62986a928f931c4d98d978c39 Mon Sep 17 00:00:00 2001 From: William Wong Date: Mon, 19 Dec 2022 20:43:01 -0800 Subject: [PATCH 06/13] State changes, bug fixes --- open_spiel/python/games/liars_poker.py | 165 +++++++++++++++++-------- 1 file changed, 111 insertions(+), 54 deletions(-) diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py index 99cfaa127b..b81cd556d0 100644 --- a/open_spiel/python/games/liars_poker.py +++ b/open_spiel/python/games/liars_poker.py @@ -26,7 +26,8 @@ class Action(enum.IntEnum): BID = 0 CHALLENGE = 1 -_NUM_PLAYERS = 2 +_MAX_NUM_PLAYERS = 10 +_MIN_NUM_PLAYERS = 2 _HAND_LENGTH = 3 _NUM_DIGITS = 3 # Number of digits to include from the range 1, 2, ..., 9, 0 _FULL_DECK = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] @@ -39,28 +40,33 @@ class Action(enum.IntEnum): information=pyspiel.GameType.Information.IMPERFECT_INFORMATION, utility=pyspiel.GameType.Utility.ZERO_SUM, reward_model=pyspiel.GameType.RewardModel.TERMINAL, - max_num_players=_NUM_PLAYERS, - min_num_players=_NUM_PLAYERS, + max_num_players=_MAX_NUM_PLAYERS, + min_num_players=_MIN_NUM_PLAYERS, provides_information_state_string=True, provides_information_state_tensor=True, provides_observation_string=False, - provides_observation_tensor=True, - parameter_specification={ - "num_players": _NUM_PLAYERS, - "hand_length": _HAND_LENGTH, - "num_digits": _NUM_DIGITS, - }) + provides_observation_tensor=True) _GAME_INFO = pyspiel.GameInfo( num_distinct_actions=len(Action), max_chance_outcomes=_HAND_LENGTH * _NUM_DIGITS, - num_players=_NUM_PLAYERS) + num_players=_MIN_NUM_PLAYERS, + min_utility=-(_MIN_NUM_PLAYERS - 1), # Reward from being challenged and losing. + max_utility=_MIN_NUM_PLAYERS - 1, # Reward for being challenged and winning. + utility_sum=0.0, + # Number of possible rounds: hand_length * num_digits * num_players + # Total moves per round: num_players for non-rebid, num_players-1 for rebid + # Max game length: number of possible rounds * total moves per round + max_game_length=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS**2) class LiarsPoker(pyspiel.Game): """A Python version of Liar's poker.""" def __init__(self, params=None): super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) - self.deck = [_FULL_DECK[i] for i in range(_NUM_DIGITS)] + self.deck = [_FULL_DECK[i] for i in range(params.get("num_digits", default=_NUM_DIGITS))] + self.num_players = params.get("num_players", default=_MIN_NUM_PLAYERS) + self.hand_length = params.get("hand_length", default=_HAND_LENGTH) + self.num_digits = params.get("num_digits", default=_NUM_DIGITS) def new_initial_state(self): """Returns a state corresponding to the start of a game.""" @@ -70,9 +76,10 @@ def make_py_observer(self, iig_obs_type=None, params=None): """Returns an object used for observing game state.""" return LiarsPokerObserver( iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), - _NUM_PLAYERS, - _HAND_LENGTH, - _NUM_DIGITS) + self.num_players, + self.hand_length, + self.num_digits, + params) class LiarsPokerState(pyspiel.State): @@ -82,7 +89,6 @@ def __init__(self, game): """Constructor; should only be called by Game.new_initial_state.""" super().__init__(game) # Game attributes - # TODO: need to verify have access to these game attributes. self._num_players = game.num_players self._hand_length = game.hand_length self._num_digits = game.num_digits @@ -90,12 +96,14 @@ def __init__(self, game): self.hands = [[] for _ in range(self._num_players)] # Action dynamics - self.actions = [[] for _ in range(self._num_players)] + total_possible_bets = game.hand_length * game.num_digits * game.num_players + self.bid_history = np.zeros((total_possible_bets, game.num_players)) + self.challenge_history = np.zeros((total_possible_bets, game.num_players)) self._current_player = 0 self._bid_originator = 0 self._current_bid = -1 self._num_challenges = 0 - self._is_rebid = False + self.is_rebid = False # Game over dynamics self._game_over = False @@ -118,22 +126,26 @@ def current_player(self): return self._current_player def _is_challenge_possible(self): + """A challenge is possible once the first bid is made.""" return self._current_bid != -1 def _is_rebid_possible(self): - return self._num_challenges == self._num_players - 1 + """A rebid is only possible when all players have challenged the original bid.""" + return not self.is_rebid and self._num_challenges == self._num_players - 1 def _legal_actions(self, player): """Returns a list of legal actions, sorted in ascending order.""" assert player >= 0 actions = [] - # Any move higher than the current bid is allowed. (Bids start at 0) - for b in range(self._current_bid + 1, self._num_digits * self._hand_length * self._num_players): - actions.append(b) + + if player != self._bid_originator or self._is_rebid_possible(): + # Any move higher than the current bid is allowed. (Bids start at 0) + for b in range(self._current_bid + 1, self._hand_length * self._num_digits * self._num_players): + actions.append(b) if self._is_challenge_possible(): actions.append(Action.CHALLENGE) - # TODO: add game logic for when all players challenge - automatically count + return actions def chance_outcomes(self): @@ -144,7 +156,7 @@ def chance_outcomes(self): def _decode_bid(self, bid): """ - Turns a bid ID in the range 0 to NUM_DIGITS * HAND_LENGTH * NUM_PLAYERS to a count and number. + Turns a bid ID in the range 0 to HAND_LENGTH * NUM_DIGITS * NUM_PLAYERS to a count and number. For example, take 2 players each with 2 numbers from the deck of 1, 2, and 3. - A bid of two 1's would correspond to a bid id 1. @@ -178,8 +190,16 @@ def _counts(self): else: self._loser = self._bid_originator + def _update_bid_history(self, bid, player): + """Writes a player's bid into memory.""" + self.bid_history[bid][player] = 1 + + def _update_challenge_history(self, bid, player): + """Write a player's challenge for a bid into memory.""" + self.challenge_history[bid][player] = 1 + def _apply_action(self, action): - """Applies the specified action to the state.""" + """Applies an action and updates the state.""" if self.is_chance_node(): # If we are still populating hands, draw a number for the current player. self.hands[self._current_player].append(action) @@ -187,21 +207,27 @@ def _apply_action(self, action): elif action == Action.CHALLENGE: self.actions[self._current_player].append(action) assert self._is_challenge_possible() + self._update_challenge_history(self._current_bid, self._current_player) self._num_challenges += 1 # If there is no ongoing rebid, check if all players challenge before counting. # If there is an ongoing rebid, count once all the players except the bidder challenges. - if (not self._is_rebid and self._num_challenges == self._num_players) or ( - self._is_rebid and self._num_challenges == self._num_players - 1): + if (not self.is_rebid and self._num_challenges == self._num_players) or ( + self.is_rebid and self._num_challenges == self._num_players - 1): self._counts() self._game_over = True else: self.actions[self._current_player].append(action) - # Set the current bid and bid originator to the action and current player. + # Set the current bid to the action. self._current_bid = action + if self._current_player == self._bid_originator: + # If the bid originator is bidding again, we have a rebid. + self.is_rebid = True + else: + # Otherwise, we have a regular bid. + self.is_rebid = False + # Set the bid originator to the current player. self._bid_originator = self._current_player - # If all players but the bid originator have chllenged but the originator bids again, we have a rebid. - if self._num_challenges == self._num_players - 1: - self._is_rebid = True + self._update_bid_history(self._current_bid, self._current_player) self._num_challenges = 0 self._current_player = (self._current_player + 1) % self._num_players @@ -222,28 +248,42 @@ def returns(self): """Total reward for each player over the course of the game so far.""" if self._winner != -1: bidder_reward = self._num_players - 1 - others_reward = -1. + others_reward = -1.0 + elif self._loser != -1: + bidder_reward = -1 * (self._num_players - 1) + others_reward = 1.0 else: - bidder_reward = - self._num_players - 1 - others_reward = 1. + # Game is not over. + bidder_reward = 0.0 + others_reward = 0.0 return [others_reward if player_id != self._bid_originator else bidder_reward for player_id in range(self._num_players)] def __str__(self): - # TODO """String for debug purposes. No particular semantics are required.""" return "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {}, Rebid: {}".format( self.hands, self._bid_originator, self.current_player(), self._current_bid, - self._is_rebid) + self.is_rebid) class LiarsPokerObserver: - """Observer, conforming to the PyObserver interface (see observation.py).""" - - def __init__(self, iig_obs_type, num_players, hand_length, num_digits): + """Observer, conforming to the PyObserver interface (see observation.py). + + An observation will consist of the following: + - One hot encoding of the current player number: [0 0 0 1 0 0 0] + - A vector of length hand_length containing the digits in a player's hand. + - Two matrices each of size (hand_length * num_digits * num_players, num_players) + will store bids and challenges respectively. Each row in the matrix corresponds + to a particular bid (e.g. one 1, two 5s, or eight 3s). 0 will represent no + action. 1 will represent a player's bid or a player's challenge. + - One bit for whether we are rebidding: [1] rebid occuring, [0] otherwise + - One bit for whether we are counting: [1] COUNTS called, [0] otherwise + """ + + def __init__(self, iig_obs_type, num_players, hand_length, num_digits, params=None): """Initiliazes an empty observation tensor.""" self.num_players = num_players self.hand_length = hand_length @@ -253,16 +293,20 @@ def __init__(self, iig_obs_type, num_players, hand_length, num_digits): # Pieces are described by their (name, number of elements, and shape). pieces = [("player", num_players, (num_players,))] # One-hot encoding for the player id. if iig_obs_type.private_info == pyspiel.PrivateInfoType.SINGLE_PLAYER: - # One-hot encoding for each digit in a player's hand - pieces.append(("private_hand", hand_length * num_digits, (hand_length, num_digits))) + # Vector containing the digits in a player's hand + pieces.append(("private_hand", hand_length, (hand_length,))) if iig_obs_type.public_info: + pieces.append(("rebid_state", 1, (1,))) + pieces.append(("counts_state", 1, (1,))) if iig_obs_type.perfect_recall: - # One-hot encoding for a player's moves at every round. - total_possible_rounds = num_players * hand_length * num_digits - num_actions = 2 - pieces.append(("action_history", - total_possible_rounds * num_actions, - (total_possible_rounds, num_actions))) + # One-hot encodings for players' moves at every round. + total_possible_rounds = hand_length * num_digits * num_players + pieces.append(("bid_history", + total_possible_rounds * num_players, + (total_possible_rounds, num_players))) + pieces.append(("challenge_history", + total_possible_rounds * num_players, + (total_possible_rounds, num_players))) # Build the single flat tensor. total_size = sum(size for name, size, shape in pieces) @@ -281,11 +325,15 @@ def set_from(self, state, player): if "player" in self.dict: self.dict["player"][player] = 1 if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length: - for i in range(len(state.hands[player])): - self.dict["private_hand"][i][state.hands[player][i]] = 1 - if "action_history" in self.dict: - for round, action in enumerate(state.actions[player]): - self.dict["action_history"][round, action] = 1 + self.dict["private_hand"] = self.hands[player] + if "rebid_state" in self.dict: + self.dict["rebid_state"] = state.is_rebid + if "counts_state" in self.dict: + self.dict["counts_state"] = state.is_terminal() + if "bid_history" in self.dict: + self.dict["bid_history"] = state.bid_history + if "challenge_history" in self.dict: + self.dict["challenge_history"] = state.challenge_history def string_from(self, state, player): """Observation of `state` from the PoV of `player`, as a string.""" @@ -294,9 +342,18 @@ def string_from(self, state, player): pieces.append(f"p{player}") if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length: pieces.append(f"hand:{state.hands[player]}") - if "action_history" in self.dict and state.actions[player]: - # bc = bid, challenge. b is 0 or 1 and indexes into "bc" to stringify the action. - pieces.append("".join("bc"[b] for b in state.actions[player])) + if "rebid_state" in self.dict: + pieces.append(f"rebid:{state.is_rebid}") + if "counts_state" in self.dict: + pieces.append(f"rebid:{state.is_terminal()}") + if "bid_history" in self.dict: + for bid in range(len(state.bid_history)): + if np.any(state.bid_history[bid] == 1): + pieces.append("b:{}.".format(bid)) + if "challenge_history" in self.dict: + for bid in range(len(state.challenge_history)): + if np.any(state.challenge_history[bid] == 1): + pieces.append("c:{}.".format(bid)) return " ".join(str(p) for p in pieces) # Register the game with the OpenSpiel library From eeeeda367223626ed5d231c4207f382814793d7b Mon Sep 17 00:00:00 2001 From: William Wong Date: Mon, 19 Dec 2022 23:39:12 -0800 Subject: [PATCH 07/13] Unit tests and bug fixes --- open_spiel/python/games/liars_poker.py | 62 +++--- open_spiel/python/games/liars_poker_test.py | 212 ++++++++++++++++---- 2 files changed, 212 insertions(+), 62 deletions(-) diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py index b81cd556d0..bb973345d1 100644 --- a/open_spiel/python/games/liars_poker.py +++ b/open_spiel/python/games/liars_poker.py @@ -47,7 +47,8 @@ class Action(enum.IntEnum): provides_observation_string=False, provides_observation_tensor=True) _GAME_INFO = pyspiel.GameInfo( - num_distinct_actions=len(Action), + # Num actions = total number of cards * number of digits + action enum + num_distinct_actions=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS + len(Action), max_chance_outcomes=_HAND_LENGTH * _NUM_DIGITS, num_players=_MIN_NUM_PLAYERS, min_utility=-(_MIN_NUM_PLAYERS - 1), # Reward from being challenged and losing. @@ -63,10 +64,10 @@ class LiarsPoker(pyspiel.Game): def __init__(self, params=None): super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) - self.deck = [_FULL_DECK[i] for i in range(params.get("num_digits", default=_NUM_DIGITS))] - self.num_players = params.get("num_players", default=_MIN_NUM_PLAYERS) - self.hand_length = params.get("hand_length", default=_HAND_LENGTH) - self.num_digits = params.get("num_digits", default=_NUM_DIGITS) + self.num_players = _MIN_NUM_PLAYERS + self.hand_length = _HAND_LENGTH + self.num_digits = _NUM_DIGITS + self.deck = [_FULL_DECK[i] for i in range(self.num_digits)] def new_initial_state(self): """Returns a state corresponding to the start of a game.""" @@ -100,15 +101,18 @@ def __init__(self, game): self.bid_history = np.zeros((total_possible_bets, game.num_players)) self.challenge_history = np.zeros((total_possible_bets, game.num_players)) self._current_player = 0 - self._bid_originator = 0 + self._bid_offset = len(Action) + self._max_bid = (self._hand_length * self._num_digits * self._num_players + + self._bid_offset - 1) + self._bid_originator = -1 self._current_bid = -1 self._num_challenges = 0 self.is_rebid = False # Game over dynamics self._game_over = False - self._winner = -1 - self._loser = -1 + self.winner = -1 + self.loser = -1 def current_player(self): """Returns id of the current player to act. @@ -118,7 +122,7 @@ def current_player(self): - CHANCE if a player is drawing a number to fill out their hand. - a number otherwise. """ - if self._is_terminal: + if self.is_terminal(): return pyspiel.PlayerId.TERMINAL elif len(self.hands[self._num_players - 1]) < self._hand_length: return pyspiel.PlayerId.CHANCE @@ -138,14 +142,15 @@ def _legal_actions(self, player): assert player >= 0 actions = [] - if player != self._bid_originator or self._is_rebid_possible(): - # Any move higher than the current bid is allowed. (Bids start at 0) - for b in range(self._current_bid + 1, self._hand_length * self._num_digits * self._num_players): - actions.append(b) - if self._is_challenge_possible(): actions.append(Action.CHALLENGE) + if player != self._bid_originator or self._is_rebid_possible(): + # Any move higher than the current bid is allowed. + # Bids start at 2 as 0 and 1 are for bid and challenge. + for b in range(max(self._bid_offset, self._current_bid + 1), self._max_bid + 1): + actions.append(b) + return actions def chance_outcomes(self): @@ -170,11 +175,16 @@ def _decode_bid(self, bid): number = self._deck[bid // (self._hand_length * self._num_players)] return (count, number) + def _end_game(self): + """Ends the game by calling a counts and setting respective attributes.""" + self._counts() + self._game_over = True + def _counts(self): """ Determines if the bid originator wins or loses. """ - bid_count, bid_number = self._decode_bid(self._current_bid) + bid_count, bid_number = self._decode_bid(self._current_bid - self._bid_offset) # Count the number of bid_numbers from all players. matches = 0 @@ -186,9 +196,9 @@ def _counts(self): # If the number of matches are at least the bid_count bid, then the bidder wins. # Otherwise everyone else wins. if matches >= bid_count: - self._winner = self._bid_originator + self.winner = self._bid_originator else: - self._loser = self._bid_originator + self.loser = self._bid_originator def _update_bid_history(self, bid, player): """Writes a player's bid into memory.""" @@ -203,20 +213,17 @@ def _apply_action(self, action): if self.is_chance_node(): # If we are still populating hands, draw a number for the current player. self.hands[self._current_player].append(action) - return elif action == Action.CHALLENGE: - self.actions[self._current_player].append(action) assert self._is_challenge_possible() - self._update_challenge_history(self._current_bid, self._current_player) + self._update_challenge_history( + self._current_bid - self._bid_offset, self._current_player) self._num_challenges += 1 # If there is no ongoing rebid, check if all players challenge before counting. # If there is an ongoing rebid, count once all the players except the bidder challenges. if (not self.is_rebid and self._num_challenges == self._num_players) or ( self.is_rebid and self._num_challenges == self._num_players - 1): - self._counts() - self._game_over = True + self._end_game() else: - self.actions[self._current_player].append(action) # Set the current bid to the action. self._current_bid = action if self._current_player == self._bid_originator: @@ -227,7 +234,7 @@ def _apply_action(self, action): self.is_rebid = False # Set the bid originator to the current player. self._bid_originator = self._current_player - self._update_bid_history(self._current_bid, self._current_player) + self._update_bid_history(self._current_bid - self._bid_offset, self._current_player) self._num_challenges = 0 self._current_player = (self._current_player + 1) % self._num_players @@ -246,10 +253,10 @@ def is_terminal(self): def returns(self): """Total reward for each player over the course of the game so far.""" - if self._winner != -1: + if self.winner != -1: bidder_reward = self._num_players - 1 others_reward = -1.0 - elif self._loser != -1: + elif self.loser != -1: bidder_reward = -1 * (self._num_players - 1) others_reward = 1.0 else: @@ -325,7 +332,7 @@ def set_from(self, state, player): if "player" in self.dict: self.dict["player"][player] = 1 if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length: - self.dict["private_hand"] = self.hands[player] + self.dict["private_hand"] = state.hands[player] if "rebid_state" in self.dict: self.dict["rebid_state"] = state.is_rebid if "counts_state" in self.dict: @@ -356,6 +363,7 @@ def string_from(self, state, player): pieces.append("c:{}.".format(bid)) return " ".join(str(p) for p in pieces) + # Register the game with the OpenSpiel library pyspiel.register_game(_GAME_TYPE, LiarsPoker) diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py index 9905b29822..e0f5eac535 100644 --- a/open_spiel/python/games/liars_poker_test.py +++ b/open_spiel/python/games/liars_poker_test.py @@ -13,7 +13,7 @@ # limitations under the License. # Lint as python3 -"""Tests for Python Tic-Tac-Toe.""" +"""Tests for Python Liar's Poker.""" import difflib import os @@ -26,51 +26,192 @@ from open_spiel.python.observation import make_observation import pyspiel +# TODO: remove? _DATA_DIR = "open_spiel/integration_tests/playthroughs/" -class TicTacToeTest(absltest.TestCase): +class LiarsPokerTest(absltest.TestCase): def test_can_create_game_and_state(self): """Checks we can create the game and a state.""" game = liars_poker.LiarsPoker() state = game.new_initial_state() - self.assertEqual(str(state), "...\n...\n...") - - def test_random_game(self): - """Tests basic API functions.""" - # This is here mostly to show the API by example. - # More serious simulation tests are done in python/tests/games_sim_test.py - # and in test_game_from_cc (below), both of which test the conformance to - # the API thoroughly. + # Ensure no moves have been made. + expected_hands = [[] for _ in range(game.num_players)] + expected_bidder = -1 + expected_current_player = pyspiel.PlayerId.CHANCE + expected_current_bid = -1 + expected_rebid = False + expected = "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {}, Rebid: {}".format( + expected_hands, + expected_bidder, + expected_current_player, + expected_current_bid, + expected_rebid + ) + self.assertEqual(str(state), expected) + + def test_draw_hands(self): + """Tests hand drawing functions.""" game = liars_poker.LiarsPoker() state = game.new_initial_state() - while not state.is_terminal(): - print(state) - cur_player = state.current_player() - legal_actions = state.legal_actions() - action = np.random.choice(legal_actions) - print("Player {} chooses action {}".format(cur_player, action)) + expected_hands = [[] for _ in range(game.num_players)] + for i in range(game.num_players * game.hand_length): + # Verify we have chance nodes until all player hands are filled. + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + + # Draw a digit. + outcomes_with_probs = state.chance_outcomes() + action_list, prob_list = zip(*outcomes_with_probs) + action = np.random.choice(action_list, p=prob_list) + + # Verify players' hands are filled correctly. + cur_player = i % game.num_players + expected_hands[cur_player].append(action) state.apply_action(action) - print(state) - print("Returns: {}".format(state.returns())) + self.assertEqual(state.hands, expected_hands) + # Assert after all hands are filled, we have non-chance nodes. + cur_player = state.current_player() + self.assertNotEqual(cur_player, pyspiel.PlayerId.CHANCE) + self.assertEqual(cur_player, 0) + + def _populate_game_hands(self, game, state): + """Populates players hands for testing.""" + for _ in range(game.num_players * game.hand_length): + outcomes_with_probs = state.chance_outcomes() + action_list, prob_list = zip(*outcomes_with_probs) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + + def test_basic_bid(self): + """Tests a single bid.""" + game = liars_poker.LiarsPoker() + state = game.new_initial_state() + total_possible_bets = game.hand_length * game.num_digits * game.num_players + expected_bid_history = np.zeros((total_possible_bets, game.num_players)) + + # Fill player hands. + self._populate_game_hands(game, state) + # After all hands are filled, have player 0 bid. + cur_player = state.current_player() + action = 2 + state.apply_action(action) + + # Verify bid history is updated correctly. + bid_offset = len(liars_poker.Action) + expected_bid_history[action - bid_offset][cur_player] = 1 + self.assertTrue((state.bid_history == expected_bid_history).all()) + + # Verify next set of legal bids is greater than the current bid. + for next_action in state.legal_actions(): + if next_action == liars_poker.Action.CHALLENGE: + continue + self.assertGreater(next_action, action) + + def _verify_returns(self, game, state): + self.assertTrue(state.winner != -1 or state.loser != -1) + actual_returns = state.returns() + if state.winner != -1: + expected_returns = [-1.0 for _ in range(game.num_players)] + expected_returns[state.winner] = game.num_players - 1 + else: + expected_returns = [1.0 for _ in range(game.num_players)] + expected_returns[state.loser] = -1.0 * (game.num_players - 1) + self.assertEqual(actual_returns, expected_returns) + + def test_single_round(self): + """Runs a single round of bidding followed by a challenge.""" + game = liars_poker.LiarsPoker() + state = game.new_initial_state() + total_possible_bets = game.hand_length * game.num_digits * game.num_players + expected_challenge_history = np.zeros((total_possible_bets, game.num_players)) + + # Fill player hands. + self._populate_game_hands(game, state) + # Have player 0 bid. + action = 2 + state.apply_action(action) + # Verify challenge action is available to the next player. + challenge = liars_poker.Action.CHALLENGE + self.assertTrue(challenge in state.legal_actions()) + # Player 1 challenges. + cur_player = state.current_player() + state.apply_action(challenge) + bid_offset = len(liars_poker.Action) + expected_challenge_history[action - bid_offset][cur_player] = 1 + # Verify challenge history is updated correctly. + self.assertTrue((state.challenge_history == expected_challenge_history).all()) + # Original bidder challenges, thus agreeing to a count. + cur_player = state.current_player() + state.apply_action(challenge) + expected_challenge_history[action - bid_offset][cur_player] = 1 + # Verify challenge history is updated correctly. + self.assertTrue((state.challenge_history == expected_challenge_history).all()) + + # Verify game is over. + self.assertTrue(state.is_terminal()) + # Verify returns. + self._verify_returns(game, state) + + def test_single_rebid(self): + """Runs a 2 player game where a rebid is enacted.""" + game = liars_poker.LiarsPoker() + state = game.new_initial_state() + + # Fill player hands. + self._populate_game_hands(game, state) + # Have player 0 bid. + state.apply_action(2) + # Player 1 challenges. + state.apply_action(liars_poker.Action.CHALLENGE) + # Original bidder rebids. + state.apply_action(3) + # Verify game is not over. + self.assertFalse(state.is_terminal()) + self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players)]) + # Player 1 challenges again. + state.apply_action(liars_poker.Action.CHALLENGE) + + # Verify game is now over. + self.assertTrue(state.is_terminal()) + self._verify_returns(game, state) + + def test_rebid_then_new_bid(self): + """Runs a 2 player game where a rebid is enacted.""" + game = liars_poker.LiarsPoker() + state = game.new_initial_state() + + # Fill player hands. + self._populate_game_hands(game, state) + # Have player 0 bid. + state.apply_action(2) + # Player 1 challenges. + state.apply_action(liars_poker.Action.CHALLENGE) + # Original bidder rebids. + state.apply_action(3) + # Verify game is not over. + self.assertFalse(state.is_terminal()) + self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players)]) + # Player 1 bids. + state.apply_action(4) + # Verify game is not over. + self.assertFalse(state.is_terminal()) + # Player 0 challenges. + state.apply_action(liars_poker.Action.CHALLENGE) + # Verify we're not rebidding and counts is only called once both players challenge. + self.assertFalse(state.is_terminal()) + # Player 1 challenges and ends the game with a counts. + state.apply_action(liars_poker.Action.CHALLENGE) + + # Verify game is now over. + self.assertTrue(state.is_terminal()) + self._verify_returns(game, state) def test_game_from_cc(self): - """Runs our standard game tests, checking API consistency.""" + """Runs the standard game tests, checking API consistency.""" game = pyspiel.load_game("python_liars_poker") pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) - def test_observation_tensors_same(self): - """Checks observation tensor is the same from C++ and from Python.""" - game = pyspiel.load_game("python_liars_poker") - state = game.new_initial_state() - for a in [4, 5, 2, 3]: - state.apply_action(a) - py_obs = make_observation(game) - py_obs.set_from(state, state.current_player()) - cc_obs = state.observation_tensor() - np.testing.assert_array_equal(py_obs.tensor, cc_obs) - def test_pickle(self): """Checks pickling and unpickling of game and state.""" game = pyspiel.load_game("python_liars_poker") @@ -78,7 +219,7 @@ def test_pickle(self): unpickled_game = pickle.loads(pickled_game) self.assertEqual(str(game), str(unpickled_game)) state = game.new_initial_state() - for a in [4, 2, 3, 7]: + for a in [2, 3, 4, 5]: state.apply_action(a) ser_str = pyspiel.serialize_game_and_state(game, state) new_game, new_state = pyspiel.deserialize_game_and_state(ser_str) @@ -101,10 +242,11 @@ def test_cloned_state_matches_original_state(self): self.assertEqual(state.move_number(), clone.move_number()) self.assertEqual(state.num_distinct_actions(), clone.num_distinct_actions()) - self.assertEqual(state._cur_player, clone._cur_player) - self.assertEqual(state._player0_score, clone._player0_score) - self.assertEqual(state._is_terminal, clone._is_terminal) - np.testing.assert_array_equal(state.board, clone.board) + self.assertEqual(state._current_player, clone._current_player) + self.assertEqual(state._current_bid, clone._current_bid) + self.assertEqual(state._game_over, clone._game_over) + np.testing.assert_array_equal(state.bid_history, clone.bid_history) + np.testing.assert_array_equal(state.challenge_history, clone.challenge_history) if __name__ == "__main__": From cf32057a23f788ee7ebe2f20ef28a7551dd1d4a5 Mon Sep 17 00:00:00 2001 From: William Wong Date: Tue, 20 Dec 2022 01:15:20 -0800 Subject: [PATCH 08/13] Change visibility of winners/losers --- open_spiel/python/games/liars_poker.py | 12 ++++++------ open_spiel/python/games/liars_poker_test.py | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py index bb973345d1..8d517c65f3 100644 --- a/open_spiel/python/games/liars_poker.py +++ b/open_spiel/python/games/liars_poker.py @@ -111,8 +111,8 @@ def __init__(self, game): # Game over dynamics self._game_over = False - self.winner = -1 - self.loser = -1 + self._winner = -1 + self._loser = -1 def current_player(self): """Returns id of the current player to act. @@ -196,9 +196,9 @@ def _counts(self): # If the number of matches are at least the bid_count bid, then the bidder wins. # Otherwise everyone else wins. if matches >= bid_count: - self.winner = self._bid_originator + self._winner = self._bid_originator else: - self.loser = self._bid_originator + self._loser = self._bid_originator def _update_bid_history(self, bid, player): """Writes a player's bid into memory.""" @@ -253,10 +253,10 @@ def is_terminal(self): def returns(self): """Total reward for each player over the course of the game so far.""" - if self.winner != -1: + if self._winner != -1: bidder_reward = self._num_players - 1 others_reward = -1.0 - elif self.loser != -1: + elif self._loser != -1: bidder_reward = -1 * (self._num_players - 1) others_reward = 1.0 else: diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py index e0f5eac535..f0abbed1cd 100644 --- a/open_spiel/python/games/liars_poker_test.py +++ b/open_spiel/python/games/liars_poker_test.py @@ -109,14 +109,14 @@ def test_basic_bid(self): self.assertGreater(next_action, action) def _verify_returns(self, game, state): - self.assertTrue(state.winner != -1 or state.loser != -1) + self.assertTrue(state._winner != -1 or state._loser != -1) actual_returns = state.returns() - if state.winner != -1: + if state._winner != -1: expected_returns = [-1.0 for _ in range(game.num_players)] - expected_returns[state.winner] = game.num_players - 1 + expected_returns[state._winner] = game.num_players - 1 else: expected_returns = [1.0 for _ in range(game.num_players)] - expected_returns[state.loser] = -1.0 * (game.num_players - 1) + expected_returns[state._loser] = -1.0 * (game.num_players - 1) self.assertEqual(actual_returns, expected_returns) def test_single_round(self): From e975687549978eb6ce0fdc9e5a9692e08ef516d7 Mon Sep 17 00:00:00 2001 From: William Wong Date: Wed, 21 Dec 2022 02:59:58 -0800 Subject: [PATCH 09/13] Bug fixes and playthrough --- .../playthroughs/python_liars_poker.txt | 920 ++++++++++++++---- open_spiel/python/games/liars_poker.py | 42 +- open_spiel/python/games/liars_poker_test.py | 40 +- 3 files changed, 797 insertions(+), 205 deletions(-) diff --git a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt index 033397928d..ef0f271761 100644 --- a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt +++ b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt @@ -1,14 +1,14 @@ -game: liars_poker +game: python_liars_poker -GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC GameType.dynamics = Dynamics.SEQUENTIAL -GameType.information = Information.PERFECT_INFORMATION +GameType.information = Information.IMPERFECT_INFORMATION GameType.long_name = "Python Liars Poker" -GameType.max_num_players = 2 +GameType.max_num_players = 10 GameType.min_num_players = 2 -GameType.parameter_specification = [] +GameType.parameter_specification = ["hand_length", "num_digits", "players"] GameType.provides_information_state_string = True -GameType.provides_information_state_tensor = True +GameType.provides_information_state_tensor = True GameType.provides_observation_string = False GameType.provides_observation_tensor = True GameType.provides_factored_observation_string = False @@ -16,225 +16,805 @@ GameType.reward_model = RewardModel.TERMINAL GameType.short_name = "python_liars_poker" GameType.utility = Utility.ZERO_SUM -NumDistinctActions() = 9 -PolicyTensorShape() = [9] -MaxChanceOutcomes() = 0 -GetParameters() = {} +NumDistinctActions() = 20 +PolicyTensorShape() = [20] +MaxChanceOutcomes() = 9 +GetParameters() = {hand_length=3,num_digits=3,players=2} NumPlayers() = 2 MinUtility() = -1.0 MaxUtility() = 1.0 UtilitySum() = 0.0 -ObservationTensorShape() = [3, 3, 3] +InformationStateTensorShape() = player: [2], private_hand: [3], rebid_state: [1], counts_state: [1], bid_history: [18, 2], challenge_history: [18, 2] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 79 +ObservationTensorShape() = player: [2], private_hand: [3], rebid_state: [1], counts_state: [1] ObservationTensorLayout() = TensorLayout.CHW -ObservationTensorSize() = 27 -MaxGameLength() = 9 -ToString() = "liars_poker()" +ObservationTensorSize() = 7 +MaxGameLength() = 36 +ToString() = "python_liars_poker(hand_length=3,num_digits=3,players=2)" # State 0 -# ... -# ... -# ... +# Hands: [[], []], Bidder: -1, Current Player: PlayerId.CHANCE, Current Bid: None of None, Rebid: False IsTerminal() = False History() = [] HistoryString() = "" -IsChanceNode() = False +IsChanceNode() = True IsSimultaneousNode() = False -CurrentPlayer() = 0 -InformationStateString(0) = "" -InformationStateString(1) = "" -ObservationString(0) = "...\n...\n..." -ObservationString(1) = "...\n...\n..." -ObservationTensor(0): -◉◉◉ ◯◯◯ ◯◯◯ -◉◉◉ ◯◯◯ ◯◯◯ -◉◉◉ ◯◯◯ ◯◯◯ -ObservationTensor(1): -◉◉◉ ◯◯◯ ◯◯◯ -◉◉◉ ◯◯◯ ◯◯◯ -◉◉◉ ◯◯◯ ◯◯◯ -Rewards() = [0, 0] -Returns() = [0, 0] -LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] -StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"] +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "p0 rebid:[0] counts:[0]" +InformationStateString(1) = "p1 rebid:[0] counts:[0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand: ◯◯◯ +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand: ◯◯◯ +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 rebid:[0] counts:[0]" +ObservationString(1) = "p1 rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0" +PrivateObservationString(1) = "p1" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◯ +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯ +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333333333333333)] +LegalActions() = [1, 2, 3] +StringLegalActions() = ["Deal:1", "Deal:2", "Deal:3"] -# Apply action "x(2,2)" -action: 8 +# Apply action "Deal:1" +action: 1 # State 1 -# ... -# ... -# ..x +# Hands: [[1], []], Bidder: -1, Current Player: PlayerId.CHANCE, Current Bid: None of None, Rebid: False IsTerminal() = False -History() = [8] -HistoryString() = "8" -IsChanceNode() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True IsSimultaneousNode() = False -CurrentPlayer() = 1 -InformationStateString(0) = "8" -InformationStateString(1) = "8" -ObservationString(0) = "...\n...\n..x" -ObservationString(1) = "...\n...\n..x" -ObservationTensor(0): -◉◉◉ ◯◯◯ ◯◯◯ -◉◉◉ ◯◯◯ ◯◯◯ -◉◉◯ ◯◯◯ ◯◯◉ -ObservationTensor(1): -◉◉◉ ◯◯◯ ◯◯◯ -◉◉◉ ◯◯◯ ◯◯◯ -◉◉◯ ◯◯◯ ◯◯◉ -Rewards() = [0, 0] -Returns() = [0, 0] -LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] -StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)"] +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "p0 rebid:[0] counts:[0]" +InformationStateString(1) = "p1 rebid:[0] counts:[0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand: ◯◯◯ +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand: ◯◯◯ +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 rebid:[0] counts:[0]" +ObservationString(1) = "p1 rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0" +PrivateObservationString(1) = "p1" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◯ +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯ +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333333333333333)] +LegalActions() = [1, 2, 3] +StringLegalActions() = ["Deal:1", "Deal:2", "Deal:3"] -# Apply action "o(1,0)" +# Apply action "Deal:3" action: 3 # State 2 -# ... -# o.. -# ..x +# Apply action "Deal:1" +action: 1 + +# State 3 +# Apply action "Deal:3" +action: 3 + +# State 4 +# Apply action "Deal:2" +action: 2 + +# State 5 +# Apply action "Deal:3" +action: 3 + +# State 6 +# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False IsTerminal() = False -History() = [8, 3] -HistoryString() = "8, 3" +History() = [1, 3, 1, 3, 2, 3] +HistoryString() = "1, 3, 1, 3, 2, 3" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -InformationStateString(0) = "8, 3" -InformationStateString(1) = "8, 3" -ObservationString(0) = "...\no..\n..x" -ObservationString(1) = "...\no..\n..x" -ObservationTensor(0): -◉◉◉ ◯◯◯ ◯◯◯ -◯◉◉ ◉◯◯ ◯◯◯ -◉◉◯ ◯◯◯ ◯◯◉ -ObservationTensor(1): -◉◉◉ ◯◯◯ ◯◯◯ -◯◉◉ ◉◯◯ ◯◯◯ -◉◉◯ ◯◯◯ ◯◯◉ +InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]" +InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [1, 1, 2] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [3, 3, 3] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[1, 1, 2]" +PrivateObservationString(1) = "p1 hand:[3, 3, 3]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [1, 1, 2] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [3, 3, 3] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] Returns() = [0, 0] -LegalActions() = [0, 1, 2, 4, 5, 6, 7] -StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)"] +LegalActions() = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +StringLegalActions() = ["Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet"] -# Apply action "x(2,0)" -action: 6 +# Apply action "Bet" +action: 15 -# State 3 -# ... -# o.. -# x.x +# State 7 +# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 0, Current Player: 1, Current Bid: 1 of 3, Rebid: False IsTerminal() = False -History() = [8, 3, 6] -HistoryString() = "8, 3, 6" +History() = [1, 3, 1, 3, 2, 3, 15] +HistoryString() = "1, 3, 1, 3, 2, 3, 15" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 1 -InformationStateString(0) = "8, 3, 6" -InformationStateString(1) = "8, 3, 6" -ObservationString(0) = "...\no..\nx.x" -ObservationString(1) = "...\no..\nx.x" -ObservationTensor(0): -◉◉◉ ◯◯◯ ◯◯◯ -◯◉◉ ◉◯◯ ◯◯◯ -◯◉◯ ◯◯◯ ◉◯◉ -ObservationTensor(1): -◉◉◉ ◯◯◯ ◯◯◯ -◯◉◉ ◉◯◯ ◯◯◯ -◯◉◯ ◯◯◯ ◉◯◉ +InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13." +InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [1, 1, 2] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [3, 3, 3] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[1, 1, 2]" +PrivateObservationString(1) = "p1 hand:[3, 3, 3]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [1, 1, 2] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [3, 3, 3] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] Returns() = [0, 0] -LegalActions() = [0, 1, 2, 4, 5, 7] -StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,1)", "o(1,2)", "o(2,1)"] +LegalActions() = [1, 16, 17, 18, 19] +StringLegalActions() = ["Challenge", "Bet", "Bet", "Bet", "Bet"] -# Apply action "o(0,0)" -action: 0 +# Apply action "Bet" +action: 19 -# State 4 -# o.. -# o.. -# x.x +# State 8 +# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: 0, Current Bid: 5 of 3, Rebid: False IsTerminal() = False -History() = [8, 3, 6, 0] -HistoryString() = "8, 3, 6, 0" +History() = [1, 3, 1, 3, 2, 3, 15, 19] +HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -InformationStateString(0) = "8, 3, 6, 0" -InformationStateString(1) = "8, 3, 6, 0" -ObservationString(0) = "o..\no..\nx.x" -ObservationString(1) = "o..\no..\nx.x" -ObservationTensor(0): -◯◉◉ ◉◯◯ ◯◯◯ -◯◉◉ ◉◯◯ ◯◯◯ -◯◉◯ ◯◯◯ ◉◯◉ -ObservationTensor(1): -◯◉◉ ◉◯◯ ◯◯◯ -◯◉◉ ◉◯◯ ◯◯◯ -◯◉◯ ◯◯◯ ◉◯◉ +InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13. b:17." +InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13. b:17." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [1, 1, 2] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [3, 3, 3] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[1, 1, 2]" +PrivateObservationString(1) = "p1 hand:[3, 3, 3]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [1, 1, 2] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [3, 3, 3] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] Returns() = [0, 0] -LegalActions() = [1, 2, 4, 5, 7] -StringLegalActions() = ["x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,1)"] +LegalActions() = [1] +StringLegalActions() = ["Challenge"] -# Apply action "x(0,2)" -action: 2 +# Apply action "Challenge" +action: 1 -# State 5 -# o.x -# o.. -# x.x +# State 9 +# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: 1, Current Bid: 5 of 3, Rebid: False IsTerminal() = False -History() = [8, 3, 6, 0, 2] -HistoryString() = "8, 3, 6, 0, 2" +History() = [1, 3, 1, 3, 2, 3, 15, 19, 1] +HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19, 1" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 1 -InformationStateString(0) = "8, 3, 6, 0, 2" -InformationStateString(1) = "8, 3, 6, 0, 2" -ObservationString(0) = "o.x\no..\nx.x" -ObservationString(1) = "o.x\no..\nx.x" -ObservationTensor(0): -◯◉◯ ◉◯◯ ◯◯◉ -◯◉◉ ◉◯◯ ◯◯◯ -◯◉◯ ◯◯◯ ◉◯◉ -ObservationTensor(1): -◯◉◯ ◉◯◯ ◯◯◉ -◯◉◉ ◉◯◯ ◯◯◯ -◯◉◯ ◯◯◯ ◉◯◉ +InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13. b:17. c:17." +InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13. b:17. c:17." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [1, 1, 2] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [3, 3, 3] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ +ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[1, 1, 2]" +PrivateObservationString(1) = "p1 hand:[3, 3, 3]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [1, 1, 2] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [3, 3, 3] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] Returns() = [0, 0] -LegalActions() = [1, 4, 5, 7] -StringLegalActions() = ["o(0,1)", "o(1,1)", "o(1,2)", "o(2,1)"] +LegalActions() = [1] +StringLegalActions() = ["Challenge"] -# Apply action "o(0,1)" +# Apply action "Challenge" action: 1 -# State 6 -# Apply action "x(1,2)" -action: 5 - -# State 7 -# oox -# o.x -# x.x +# State 10 +# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: PlayerId.TERMINAL, Current Bid: 5 of 3, Rebid: False IsTerminal() = True -History() = [8, 3, 6, 0, 2, 1, 5] -HistoryString() = "8, 3, 6, 0, 2, 1, 5" +History() = [1, 3, 1, 3, 2, 3, 15, 19, 1, 1] +HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19, 1, 1" IsChanceNode() = False IsSimultaneousNode() = False -CurrentPlayer() = -4 -InformationStateString(0) = "8, 3, 6, 0, 2, 1, 5" -InformationStateString(1) = "8, 3, 6, 0, 2, 1, 5" -ObservationString(0) = "oox\no.x\nx.x" -ObservationString(1) = "oox\no.x\nx.x" -ObservationTensor(0): -◯◯◯ ◉◉◯ ◯◯◉ -◯◉◯ ◉◯◯ ◯◯◉ -◯◉◯ ◯◯◯ ◉◯◉ -ObservationTensor(1): -◯◯◯ ◉◉◯ ◯◯◉ -◯◉◯ ◉◯◯ ◯◯◉ -◯◉◯ ◯◯◯ ◉◯◉ +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[1] b:13. b:17. c:17." +InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[1] b:13. b:17. c:17." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [1, 1, 2] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◉ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◉ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [3, 3, 3] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◉ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◉ +ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[1]" +ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[1]" +PublicObservationString() = "p0 rebid:[0] counts:[1]" +PrivateObservationString(0) = "p0 hand:[1, 1, 2]" +PrivateObservationString(1) = "p1 hand:[3, 3, 3]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [1, 1, 2] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [3, 3, 3] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◉ Rewards() = [1, -1] Returns() = [1, -1] diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py index 8d517c65f3..e425bced41 100644 --- a/open_spiel/python/games/liars_poker.py +++ b/open_spiel/python/games/liars_poker.py @@ -45,7 +45,12 @@ class Action(enum.IntEnum): provides_information_state_string=True, provides_information_state_tensor=True, provides_observation_string=False, - provides_observation_tensor=True) + provides_observation_tensor=True, + parameter_specification={ + "players": _MIN_NUM_PLAYERS, + "hand_length": _HAND_LENGTH, + "num_digits": _NUM_DIGITS + }) _GAME_INFO = pyspiel.GameInfo( # Num actions = total number of cards * number of digits + action enum num_distinct_actions=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS + len(Action), @@ -64,9 +69,9 @@ class LiarsPoker(pyspiel.Game): def __init__(self, params=None): super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) - self.num_players = _MIN_NUM_PLAYERS - self.hand_length = _HAND_LENGTH - self.num_digits = _NUM_DIGITS + game_parameters = self.get_parameters() + self.hand_length = game_parameters.get("hand_length", _HAND_LENGTH) + self.num_digits = game_parameters.get("num_digits", _NUM_DIGITS) self.deck = [_FULL_DECK[i] for i in range(self.num_digits)] def new_initial_state(self): @@ -77,7 +82,7 @@ def make_py_observer(self, iig_obs_type=None, params=None): """Returns an object used for observing game state.""" return LiarsPokerObserver( iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), - self.num_players, + self.num_players(), self.hand_length, self.num_digits, params) @@ -90,16 +95,16 @@ def __init__(self, game): """Constructor; should only be called by Game.new_initial_state.""" super().__init__(game) # Game attributes - self._num_players = game.num_players + self._num_players = game.num_players() self._hand_length = game.hand_length self._num_digits = game.num_digits self._deck = game.deck self.hands = [[] for _ in range(self._num_players)] # Action dynamics - total_possible_bets = game.hand_length * game.num_digits * game.num_players - self.bid_history = np.zeros((total_possible_bets, game.num_players)) - self.challenge_history = np.zeros((total_possible_bets, game.num_players)) + total_possible_bets = game.hand_length * game.num_digits * self._num_players + self.bid_history = np.zeros((total_possible_bets, self._num_players)) + self.challenge_history = np.zeros((total_possible_bets, self._num_players)) self._current_player = 0 self._bid_offset = len(Action) self._max_bid = (self._hand_length * self._num_digits * self._num_players @@ -268,11 +273,16 @@ def returns(self): def __str__(self): """String for debug purposes. No particular semantics are required.""" - return "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {}, Rebid: {}".format( + if self._current_bid != -1: + count, number = self._decode_bid(self._current_bid - self._bid_offset) + else: + count, number = 'None', 'None' + return "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {} of {}, Rebid: {}".format( self.hands, self._bid_originator, self.current_player(), - self._current_bid, + count, + number, self.is_rebid) @@ -332,11 +342,11 @@ def set_from(self, state, player): if "player" in self.dict: self.dict["player"][player] = 1 if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length: - self.dict["private_hand"] = state.hands[player] + self.dict["private_hand"] = np.asarray(state.hands[player]) if "rebid_state" in self.dict: - self.dict["rebid_state"] = state.is_rebid + self.dict["rebid_state"][0] = int(state.is_rebid) if "counts_state" in self.dict: - self.dict["counts_state"] = state.is_terminal() + self.dict["counts_state"][0] = int(state.is_terminal()) if "bid_history" in self.dict: self.dict["bid_history"] = state.bid_history if "challenge_history" in self.dict: @@ -350,9 +360,9 @@ def string_from(self, state, player): if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length: pieces.append(f"hand:{state.hands[player]}") if "rebid_state" in self.dict: - pieces.append(f"rebid:{state.is_rebid}") + pieces.append(f"rebid:{[int(state.is_rebid)]}") if "counts_state" in self.dict: - pieces.append(f"rebid:{state.is_terminal()}") + pieces.append(f"counts:{[int(state.is_terminal())]}") if "bid_history" in self.dict: for bid in range(len(state.bid_history)): if np.any(state.bid_history[bid] == 1): diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py index f0abbed1cd..a4ae7bc344 100644 --- a/open_spiel/python/games/liars_poker_test.py +++ b/open_spiel/python/games/liars_poker_test.py @@ -37,16 +37,18 @@ def test_can_create_game_and_state(self): game = liars_poker.LiarsPoker() state = game.new_initial_state() # Ensure no moves have been made. - expected_hands = [[] for _ in range(game.num_players)] + expected_hands = [[] for _ in range(game.num_players())] expected_bidder = -1 expected_current_player = pyspiel.PlayerId.CHANCE - expected_current_bid = -1 + expected_current_count = 'None' + expected_current_number = 'None' expected_rebid = False - expected = "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {}, Rebid: {}".format( + expected = "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {} of {}, Rebid: {}".format( expected_hands, expected_bidder, expected_current_player, - expected_current_bid, + expected_current_count, + expected_current_number, expected_rebid ) self.assertEqual(str(state), expected) @@ -55,8 +57,8 @@ def test_draw_hands(self): """Tests hand drawing functions.""" game = liars_poker.LiarsPoker() state = game.new_initial_state() - expected_hands = [[] for _ in range(game.num_players)] - for i in range(game.num_players * game.hand_length): + expected_hands = [[] for _ in range(game.num_players())] + for i in range(game.num_players() * game.hand_length): # Verify we have chance nodes until all player hands are filled. self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) @@ -66,7 +68,7 @@ def test_draw_hands(self): action = np.random.choice(action_list, p=prob_list) # Verify players' hands are filled correctly. - cur_player = i % game.num_players + cur_player = i % game.num_players() expected_hands[cur_player].append(action) state.apply_action(action) self.assertEqual(state.hands, expected_hands) @@ -77,7 +79,7 @@ def test_draw_hands(self): def _populate_game_hands(self, game, state): """Populates players hands for testing.""" - for _ in range(game.num_players * game.hand_length): + for _ in range(game.num_players() * game.hand_length): outcomes_with_probs = state.chance_outcomes() action_list, prob_list = zip(*outcomes_with_probs) action = np.random.choice(action_list, p=prob_list) @@ -87,8 +89,8 @@ def test_basic_bid(self): """Tests a single bid.""" game = liars_poker.LiarsPoker() state = game.new_initial_state() - total_possible_bets = game.hand_length * game.num_digits * game.num_players - expected_bid_history = np.zeros((total_possible_bets, game.num_players)) + total_possible_bets = game.hand_length * game.num_digits * game.num_players() + expected_bid_history = np.zeros((total_possible_bets, game.num_players())) # Fill player hands. self._populate_game_hands(game, state) @@ -112,19 +114,19 @@ def _verify_returns(self, game, state): self.assertTrue(state._winner != -1 or state._loser != -1) actual_returns = state.returns() if state._winner != -1: - expected_returns = [-1.0 for _ in range(game.num_players)] - expected_returns[state._winner] = game.num_players - 1 + expected_returns = [-1.0 for _ in range(game.num_players())] + expected_returns[state._winner] = game.num_players() - 1 else: - expected_returns = [1.0 for _ in range(game.num_players)] - expected_returns[state._loser] = -1.0 * (game.num_players - 1) + expected_returns = [1.0 for _ in range(game.num_players())] + expected_returns[state._loser] = -1.0 * (game.num_players() - 1) self.assertEqual(actual_returns, expected_returns) def test_single_round(self): """Runs a single round of bidding followed by a challenge.""" game = liars_poker.LiarsPoker() state = game.new_initial_state() - total_possible_bets = game.hand_length * game.num_digits * game.num_players - expected_challenge_history = np.zeros((total_possible_bets, game.num_players)) + total_possible_bets = game.hand_length * game.num_digits * game.num_players() + expected_challenge_history = np.zeros((total_possible_bets, game.num_players())) # Fill player hands. self._populate_game_hands(game, state) @@ -168,7 +170,7 @@ def test_single_rebid(self): state.apply_action(3) # Verify game is not over. self.assertFalse(state.is_terminal()) - self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players)]) + self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players())]) # Player 1 challenges again. state.apply_action(liars_poker.Action.CHALLENGE) @@ -191,7 +193,7 @@ def test_rebid_then_new_bid(self): state.apply_action(3) # Verify game is not over. self.assertFalse(state.is_terminal()) - self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players)]) + self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players())]) # Player 1 bids. state.apply_action(4) # Verify game is not over. @@ -209,7 +211,7 @@ def test_rebid_then_new_bid(self): def test_game_from_cc(self): """Runs the standard game tests, checking API consistency.""" - game = pyspiel.load_game("python_liars_poker") + game = pyspiel.load_game("python_liars_poker", {"players": 2}) pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) def test_pickle(self): From d75b5eac09a573e40cc84ddd3da68126b9103ac3 Mon Sep 17 00:00:00 2001 From: William Wong Date: Wed, 21 Dec 2022 03:06:00 -0800 Subject: [PATCH 10/13] Add Liar's Poker to game docs --- docs/games.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/games.md b/docs/games.md index 3a8027d02e..f44cc8dca1 100644 --- a/docs/games.md +++ b/docs/games.md @@ -45,6 +45,7 @@ Status | Game ![](_static/green_circ10.png "green circle") | [Leduc poker](#leduc-poker) ~ | [Lewis Signaling](#lewis-signaling) ![](_static/green_circ10.png "green circle") | [Liar's Dice](#liars-dice) +~ | [Liar's Poker](#liars-poker) ~ | [Mancala](#mancala) ~ | [Markov Soccer](#markov-soccer) ![](_static/green_circ10.png "green circle") | [Matching Pennies (Three-player)](#matching-pennies-three-player) @@ -474,6 +475,17 @@ Status | Game * 2 players. * [Wikipedia](https://en.wikipedia.org/wiki/Liar%27s_dice) +### Liar's Poker + +* Players bid and bluff on the state of all hands, given only the state of + their hand. +* Cards with bidding. +* Traditional game. +* Non-deterministic. +* Imperfect information +* 2 or more players. +* [Wikipedia](https://en.wikipedia.org/wiki/Liar%27s_poker) + ### Mancala * Players take turns sowing beans on the board and try to capture more beans From 8d10fa463bcd551cdc7f7297be1f659bc65a359b Mon Sep 17 00:00:00 2001 From: William Wong Date: Wed, 21 Dec 2022 03:39:34 -0800 Subject: [PATCH 11/13] Test cleanup --- open_spiel/python/games/liars_poker_test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py index a4ae7bc344..fb064eb7c2 100644 --- a/open_spiel/python/games/liars_poker_test.py +++ b/open_spiel/python/games/liars_poker_test.py @@ -15,8 +15,6 @@ # Lint as python3 """Tests for Python Liar's Poker.""" -import difflib -import os import pickle from absl.testing import absltest @@ -26,9 +24,6 @@ from open_spiel.python.observation import make_observation import pyspiel -# TODO: remove? -_DATA_DIR = "open_spiel/integration_tests/playthroughs/" - class LiarsPokerTest(absltest.TestCase): From 1c24c5c458ba9ddf26e89a8d6a68958a24d9b892 Mon Sep 17 00:00:00 2001 From: William Wong Date: Wed, 18 Jan 2023 19:33:43 +1000 Subject: [PATCH 12/13] Address January 17th comments --- .../playthroughs/python_liars_poker.txt | 333 ++++++++++++------ open_spiel/python/games/liars_poker.py | 87 +++-- open_spiel/python/games/liars_poker_test.py | 73 ++-- 3 files changed, 322 insertions(+), 171 deletions(-) diff --git a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt index ef0f271761..1141887c4e 100644 --- a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt +++ b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt @@ -16,8 +16,8 @@ GameType.reward_model = RewardModel.TERMINAL GameType.short_name = "python_liars_poker" GameType.utility = Utility.ZERO_SUM -NumDistinctActions() = 20 -PolicyTensorShape() = [20] +NumDistinctActions() = 19 +PolicyTensorShape() = [19] MaxChanceOutcomes() = 9 GetParameters() = {hand_length=3,num_digits=3,players=2} NumPlayers() = 2 @@ -138,9 +138,9 @@ ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◯ ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333333333333333)] LegalActions() = [1, 2, 3] -StringLegalActions() = ["Deal:1", "Deal:2", "Deal:3"] +StringLegalActions() = ["Deal: 1", "Deal: 2", "Deal: 3"] -# Apply action "Deal:1" +# Apply action "Deal: 1" action: 1 # State 1 @@ -248,39 +248,39 @@ ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◯ ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333333333333333)] LegalActions() = [1, 2, 3] -StringLegalActions() = ["Deal:1", "Deal:2", "Deal:3"] +StringLegalActions() = ["Deal: 1", "Deal: 2", "Deal: 3"] -# Apply action "Deal:3" -action: 3 +# Apply action "Deal: 2" +action: 2 # State 2 -# Apply action "Deal:1" -action: 1 +# Apply action "Deal: 3" +action: 3 # State 3 -# Apply action "Deal:3" -action: 3 +# Apply action "Deal: 2" +action: 2 # State 4 -# Apply action "Deal:2" -action: 2 +# Apply action "Deal: 3" +action: 3 # State 5 -# Apply action "Deal:3" -action: 3 +# Apply action "Deal: 2" +action: 2 # State 6 -# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False +# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False IsTerminal() = False -History() = [1, 3, 1, 3, 2, 3] -HistoryString() = "1, 3, 1, 3, 2, 3" +History() = [1, 2, 3, 2, 3, 2] +HistoryString() = "1, 2, 3, 2, 3, 2" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]" -InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]" +InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" +InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 1, 2] +InformationStateTensor(0).private_hand = [1, 3, 3] InformationStateTensor(0).rebid_state: ◯ InformationStateTensor(0).counts_state: ◯ InformationStateTensor(0).bid_history: ◯◯ @@ -320,7 +320,7 @@ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [3, 3, 3] +InformationStateTensor(1).private_hand = [2, 2, 2] InformationStateTensor(1).rebid_state: ◯ InformationStateTensor(1).counts_state: ◯ InformationStateTensor(1).bid_history: ◯◯ @@ -359,39 +359,39 @@ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ -ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]" -ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]" +ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" PublicObservationString() = "p0 rebid:[0] counts:[0]" -PrivateObservationString(0) = "p0 hand:[1, 1, 2]" -PrivateObservationString(1) = "p1 hand:[3, 3, 3]" +PrivateObservationString(0) = "p0 hand:[1, 3, 3]" +PrivateObservationString(1) = "p1 hand:[2, 2, 2]" ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 1, 2] +ObservationTensor(0).private_hand = [1, 3, 3] ObservationTensor(0).rebid_state: ◯ ObservationTensor(0).counts_state: ◯ ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [3, 3, 3] +ObservationTensor(1).private_hand = [2, 2, 2] ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] Returns() = [0, 0] -LegalActions() = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] -StringLegalActions() = ["Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet"] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] +StringLegalActions() = ["Bid: 1 of 1", "Bid: 2 of 1", "Bid: 3 of 1", "Bid: 4 of 1", "Bid: 5 of 1", "Bid: 6 of 1", "Bid: 1 of 2", "Bid: 2 of 2", "Bid: 3 of 2", "Bid: 4 of 2", "Bid: 5 of 2", "Bid: 6 of 2", "Bid: 1 of 3", "Bid: 2 of 3", "Bid: 3 of 3", "Bid: 4 of 3", "Bid: 5 of 3", "Bid: 6 of 3"] -# Apply action "Bet" -action: 15 +# Apply action "Bid: 1 of 3" +action: 13 # State 7 -# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 0, Current Player: 1, Current Bid: 1 of 3, Rebid: False +# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 1, Current Bid: 1 of 3, Rebid: False IsTerminal() = False -History() = [1, 3, 1, 3, 2, 3, 15] -HistoryString() = "1, 3, 1, 3, 2, 3, 15" +History() = [1, 2, 3, 2, 3, 2, 13] +HistoryString() = "1, 2, 3, 2, 3, 2, 13" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 1 -InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13." -InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13." +InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12." +InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12." InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 1, 2] +InformationStateTensor(0).private_hand = [1, 3, 3] InformationStateTensor(0).rebid_state: ◯ InformationStateTensor(0).counts_state: ◯ InformationStateTensor(0).bid_history: ◯◯ @@ -406,12 +406,12 @@ InformationStateTensor(0).bid_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◯◯ ◉◯ ◯◯ ◯◯ ◯◯ ◯◯ + ◯◯ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ @@ -431,7 +431,7 @@ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [3, 3, 3] +InformationStateTensor(1).private_hand = [2, 2, 2] InformationStateTensor(1).rebid_state: ◯ InformationStateTensor(1).counts_state: ◯ InformationStateTensor(1).bid_history: ◯◯ @@ -446,12 +446,12 @@ InformationStateTensor(1).bid_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◯◯ ◉◯ ◯◯ ◯◯ ◯◯ ◯◯ + ◯◯ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ @@ -470,39 +470,39 @@ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ -ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]" -ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]" +ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" PublicObservationString() = "p0 rebid:[0] counts:[0]" -PrivateObservationString(0) = "p0 hand:[1, 1, 2]" -PrivateObservationString(1) = "p1 hand:[3, 3, 3]" +PrivateObservationString(0) = "p0 hand:[1, 3, 3]" +PrivateObservationString(1) = "p1 hand:[2, 2, 2]" ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 1, 2] +ObservationTensor(0).private_hand = [1, 3, 3] ObservationTensor(0).rebid_state: ◯ ObservationTensor(0).counts_state: ◯ ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [3, 3, 3] +ObservationTensor(1).private_hand = [2, 2, 2] ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] Returns() = [0, 0] -LegalActions() = [1, 16, 17, 18, 19] -StringLegalActions() = ["Challenge", "Bet", "Bet", "Bet", "Bet"] +LegalActions() = [0, 15, 16, 17, 18] +StringLegalActions() = ["Challenge", "Bid: 3 of 3", "Bid: 4 of 3", "Bid: 5 of 3", "Bid: 6 of 3"] -# Apply action "Bet" -action: 19 +# Apply action "Bid: 4 of 3" +action: 16 # State 8 -# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: 0, Current Bid: 5 of 3, Rebid: False +# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 1, Current Player: 0, Current Bid: 4 of 3, Rebid: False IsTerminal() = False -History() = [1, 3, 1, 3, 2, 3, 15, 19] -HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19" +History() = [1, 2, 3, 2, 3, 2, 13, 16] +HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13. b:17." -InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13. b:17." +InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15." +InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15." InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 1, 2] +InformationStateTensor(0).private_hand = [1, 3, 3] InformationStateTensor(0).rebid_state: ◯ InformationStateTensor(0).counts_state: ◯ InformationStateTensor(0).bid_history: ◯◯ @@ -517,12 +517,12 @@ InformationStateTensor(0).bid_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◯◯ ◉◯ ◯◯ ◯◯ - ◯◯ ◯◉ + ◯◯ + ◯◯ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ @@ -542,7 +542,7 @@ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [3, 3, 3] +InformationStateTensor(1).private_hand = [2, 2, 2] InformationStateTensor(1).rebid_state: ◯ InformationStateTensor(1).counts_state: ◯ InformationStateTensor(1).bid_history: ◯◯ @@ -557,12 +557,12 @@ InformationStateTensor(1).bid_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◯◯ ◉◯ ◯◯ ◯◯ - ◯◯ ◯◉ + ◯◯ + ◯◯ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ @@ -581,39 +581,39 @@ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ -ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]" -ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]" +ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" PublicObservationString() = "p0 rebid:[0] counts:[0]" -PrivateObservationString(0) = "p0 hand:[1, 1, 2]" -PrivateObservationString(1) = "p1 hand:[3, 3, 3]" +PrivateObservationString(0) = "p0 hand:[1, 3, 3]" +PrivateObservationString(1) = "p1 hand:[2, 2, 2]" ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 1, 2] +ObservationTensor(0).private_hand = [1, 3, 3] ObservationTensor(0).rebid_state: ◯ ObservationTensor(0).counts_state: ◯ ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [3, 3, 3] +ObservationTensor(1).private_hand = [2, 2, 2] ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] Returns() = [0, 0] -LegalActions() = [1] -StringLegalActions() = ["Challenge"] +LegalActions() = [0, 18] +StringLegalActions() = ["Challenge", "Bid: 6 of 3"] -# Apply action "Challenge" -action: 1 +# Apply action "Bid: 6 of 3" +action: 18 # State 9 -# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: 1, Current Bid: 5 of 3, Rebid: False +# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 1, Current Bid: 6 of 3, Rebid: False IsTerminal() = False -History() = [1, 3, 1, 3, 2, 3, 15, 19, 1] -HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19, 1" +History() = [1, 2, 3, 2, 3, 2, 13, 16, 18] +HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 1 -InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13. b:17. c:17." -InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13. b:17. c:17." +InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15. b:17." +InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15. b:17." InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 1, 2] +InformationStateTensor(0).private_hand = [1, 3, 3] InformationStateTensor(0).rebid_state: ◯ InformationStateTensor(0).counts_state: ◯ InformationStateTensor(0).bid_history: ◯◯ @@ -628,12 +628,12 @@ InformationStateTensor(0).bid_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◯◯ ◉◯ ◯◯ ◯◯ - ◯◯ ◯◉ + ◯◯ + ◉◯ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ @@ -651,9 +651,9 @@ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◉◯ + ◯◯ InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [3, 3, 3] +InformationStateTensor(1).private_hand = [2, 2, 2] InformationStateTensor(1).rebid_state: ◯ InformationStateTensor(1).counts_state: ◯ InformationStateTensor(1).bid_history: ◯◯ @@ -668,12 +668,123 @@ InformationStateTensor(1).bid_history: ◯◯ ◯◯ ◯◯ ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◉ ◯◯ ◉◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[1, 3, 3]" +PrivateObservationString(1) = "p1 hand:[2, 2, 2]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [1, 3, 3] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [2, 2, 2] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0] +StringLegalActions() = ["Challenge"] + +# Apply action "Challenge" +action: 0 + +# State 10 +# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 0, Current Bid: 6 of 3, Rebid: False +IsTerminal() = False +History() = [1, 2, 3, 2, 3, 2, 13, 16, 18, 0] +HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15. b:17. c:17." +InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15. b:17. c:17." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [1, 3, 3] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ ◯◯ ◯◯ ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ ◯◉ + ◯◯ + ◉◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [2, 2, 2] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◉◯ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ @@ -691,40 +802,40 @@ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◉◯ -ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]" -ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]" + ◯◉ +ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" PublicObservationString() = "p0 rebid:[0] counts:[0]" -PrivateObservationString(0) = "p0 hand:[1, 1, 2]" -PrivateObservationString(1) = "p1 hand:[3, 3, 3]" +PrivateObservationString(0) = "p0 hand:[1, 3, 3]" +PrivateObservationString(1) = "p1 hand:[2, 2, 2]" ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 1, 2] +ObservationTensor(0).private_hand = [1, 3, 3] ObservationTensor(0).rebid_state: ◯ ObservationTensor(0).counts_state: ◯ ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [3, 3, 3] +ObservationTensor(1).private_hand = [2, 2, 2] ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] Returns() = [0, 0] -LegalActions() = [1] +LegalActions() = [0] StringLegalActions() = ["Challenge"] # Apply action "Challenge" -action: 1 +action: 0 -# State 10 -# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: PlayerId.TERMINAL, Current Bid: 5 of 3, Rebid: False +# State 11 +# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: PlayerId.TERMINAL, Current Bid: 6 of 3, Rebid: False IsTerminal() = True -History() = [1, 3, 1, 3, 2, 3, 15, 19, 1, 1] -HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19, 1, 1" +History() = [1, 2, 3, 2, 3, 2, 13, 16, 18, 0, 0] +HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18, 0, 0" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = PlayerId.TERMINAL -InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[1] b:13. b:17. c:17." -InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[1] b:13. b:17. c:17." +InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[1] b:12. b:15. b:17. c:17." +InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[1] b:12. b:15. b:17. c:17." InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 1, 2] +InformationStateTensor(0).private_hand = [1, 3, 3] InformationStateTensor(0).rebid_state: ◯ InformationStateTensor(0).counts_state: ◉ InformationStateTensor(0).bid_history: ◯◯ @@ -739,12 +850,12 @@ InformationStateTensor(0).bid_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◯◯ ◉◯ ◯◯ ◯◯ - ◯◯ ◯◉ + ◯◯ + ◉◯ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ @@ -764,7 +875,7 @@ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◉◉ InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [3, 3, 3] +InformationStateTensor(1).private_hand = [2, 2, 2] InformationStateTensor(1).rebid_state: ◯ InformationStateTensor(1).counts_state: ◉ InformationStateTensor(1).bid_history: ◯◯ @@ -779,12 +890,12 @@ InformationStateTensor(1).bid_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◯◯ ◉◯ ◯◯ ◯◯ - ◯◯ ◯◉ + ◯◯ + ◉◯ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ @@ -803,18 +914,18 @@ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ ◉◉ -ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[1]" -ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[1]" +ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[1]" +ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[1]" PublicObservationString() = "p0 rebid:[0] counts:[1]" -PrivateObservationString(0) = "p0 hand:[1, 1, 2]" -PrivateObservationString(1) = "p1 hand:[3, 3, 3]" +PrivateObservationString(0) = "p0 hand:[1, 3, 3]" +PrivateObservationString(1) = "p1 hand:[2, 2, 2]" ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 1, 2] +ObservationTensor(0).private_hand = [1, 3, 3] ObservationTensor(0).rebid_state: ◯ ObservationTensor(0).counts_state: ◉ ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [3, 3, 3] +ObservationTensor(1).private_hand = [2, 2, 2] ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◉ -Rewards() = [1, -1] -Returns() = [1, -1] +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py index e425bced41..ae15edb31b 100644 --- a/open_spiel/python/games/liars_poker.py +++ b/open_spiel/python/games/liars_poker.py @@ -22,9 +22,8 @@ import pyspiel -class Action(enum.IntEnum): - BID = 0 - CHALLENGE = 1 +CHALLENGE_ACTION = 0 +BID_ACTION_OFFSET = 1 _MAX_NUM_PLAYERS = 10 _MIN_NUM_PLAYERS = 2 @@ -53,7 +52,7 @@ class Action(enum.IntEnum): }) _GAME_INFO = pyspiel.GameInfo( # Num actions = total number of cards * number of digits + action enum - num_distinct_actions=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS + len(Action), + num_distinct_actions=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS + BID_ACTION_OFFSET, max_chance_outcomes=_HAND_LENGTH * _NUM_DIGITS, num_players=_MIN_NUM_PLAYERS, min_utility=-(_MIN_NUM_PLAYERS - 1), # Reward from being challenged and losing. @@ -72,7 +71,7 @@ def __init__(self, params=None): game_parameters = self.get_parameters() self.hand_length = game_parameters.get("hand_length", _HAND_LENGTH) self.num_digits = game_parameters.get("num_digits", _NUM_DIGITS) - self.deck = [_FULL_DECK[i] for i in range(self.num_digits)] + self.deck = _FULL_DECK[:self.num_digits] def new_initial_state(self): """Returns a state corresponding to the start of a game.""" @@ -102,20 +101,18 @@ def __init__(self, game): self.hands = [[] for _ in range(self._num_players)] # Action dynamics - total_possible_bets = game.hand_length * game.num_digits * self._num_players - self.bid_history = np.zeros((total_possible_bets, self._num_players)) - self.challenge_history = np.zeros((total_possible_bets, self._num_players)) + self.total_possible_bids = game.hand_length * game.num_digits * self._num_players + self.bid_history = np.zeros((self.total_possible_bids, self._num_players)) + self.challenge_history = np.zeros((self.total_possible_bids, self._num_players)) + # self._current_player is only the valid current_player when cards have been dealt. Otherwise it's chance. self._current_player = 0 - self._bid_offset = len(Action) - self._max_bid = (self._hand_length * self._num_digits * self._num_players - + self._bid_offset - 1) + self._max_bid = self._hand_length * self._num_digits * self._num_players self._bid_originator = -1 - self._current_bid = -1 + self._current_action = -1 self._num_challenges = 0 self.is_rebid = False # Game over dynamics - self._game_over = False self._winner = -1 self._loser = -1 @@ -133,10 +130,18 @@ def current_player(self): return pyspiel.PlayerId.CHANCE else: return self._current_player + + def winner(self): + """Returns the id of the winner if the bid originator has won. -1 otherwise.""" + return self._winner + + def loser(self): + """Returns the id of the loser if the bid originator has lost. -1 otherwise.""" + return self._loser def _is_challenge_possible(self): """A challenge is possible once the first bid is made.""" - return self._current_bid != -1 + return self._current_action != -1 def _is_rebid_possible(self): """A rebid is only possible when all players have challenged the original bid.""" @@ -148,13 +153,13 @@ def _legal_actions(self, player): actions = [] if self._is_challenge_possible(): - actions.append(Action.CHALLENGE) + actions.append(CHALLENGE_ACTION) if player != self._bid_originator or self._is_rebid_possible(): # Any move higher than the current bid is allowed. - # Bids start at 2 as 0 and 1 are for bid and challenge. - for b in range(max(self._bid_offset, self._current_bid + 1), self._max_bid + 1): - actions.append(b) + # Bids start at BID_ACTION_OFFSET (1) as 0 represents the challenge action. + for bid in range(self._current_action + 1, self._max_bid): + actions.append(bid + BID_ACTION_OFFSET) return actions @@ -166,7 +171,8 @@ def chance_outcomes(self): def _decode_bid(self, bid): """ - Turns a bid ID in the range 0 to HAND_LENGTH * NUM_DIGITS * NUM_PLAYERS to a count and number. + Turns a bid ID in the range 0 to self._max_bid (non-inclusive) + to a count and number. For example, take 2 players each with 2 numbers from the deck of 1, 2, and 3. - A bid of two 1's would correspond to a bid id 1. @@ -176,20 +182,28 @@ def _decode_bid(self, bid): Returns a tuple of (count, number). For example, (1, 2) represents one 2's. """ - count = bid % (self._hand_length * self._num_players) + count = bid % (self._hand_length * self._num_players) + 1 number = self._deck[bid // (self._hand_length * self._num_players)] return (count, number) - def _end_game(self): - """Ends the game by calling a counts and setting respective attributes.""" - self._counts() - self._game_over = True + def encode_bid(self, count, number): + """ + Turns a count and number into a bid ID in the range 0 to self._max_bid (non-inclusive). + + For example, take 2 players each with 2 numbers from the deck of 1, 2, and 3. + - A count of 2 and number of 1 would be a bid of two one's and a bid id 1. + - Explanation: 1 is the lowest number, and the only lower bid would be zero 1's + corresponding to bid id 0. + + Returns a single bid ID. + """ + return ((number - 1) * self._hand_length * self._num_players) + count - 1 def _counts(self): """ Determines if the bid originator wins or loses. """ - bid_count, bid_number = self._decode_bid(self._current_bid - self._bid_offset) + bid_count, bid_number = self._decode_bid(self._current_action - BID_ACTION_OFFSET) # Count the number of bid_numbers from all players. matches = 0 @@ -218,19 +232,19 @@ def _apply_action(self, action): if self.is_chance_node(): # If we are still populating hands, draw a number for the current player. self.hands[self._current_player].append(action) - elif action == Action.CHALLENGE: + elif action == CHALLENGE_ACTION: assert self._is_challenge_possible() self._update_challenge_history( - self._current_bid - self._bid_offset, self._current_player) + self._current_action - BID_ACTION_OFFSET, self._current_player) self._num_challenges += 1 # If there is no ongoing rebid, check if all players challenge before counting. # If there is an ongoing rebid, count once all the players except the bidder challenges. if (not self.is_rebid and self._num_challenges == self._num_players) or ( self.is_rebid and self._num_challenges == self._num_players - 1): - self._end_game() + self._counts() else: # Set the current bid to the action. - self._current_bid = action + self._current_action = action if self._current_player == self._bid_originator: # If the bid originator is bidding again, we have a rebid. self.is_rebid = True @@ -239,22 +253,23 @@ def _apply_action(self, action): self.is_rebid = False # Set the bid originator to the current player. self._bid_originator = self._current_player - self._update_bid_history(self._current_bid - self._bid_offset, self._current_player) + self._update_bid_history(self._current_action - BID_ACTION_OFFSET, self._current_player) self._num_challenges = 0 self._current_player = (self._current_player + 1) % self._num_players def _action_to_string(self, player, action): """Action -> string.""" if player == pyspiel.PlayerId.CHANCE: - return f"Deal:{action}" - elif action == Action.CHALLENGE: + return f"Deal: {action}" + elif action == CHALLENGE_ACTION: return "Challenge" else: - return "Bet" + count, number = self._decode_bid(action - BID_ACTION_OFFSET) + return f"Bid: {count} of {number}" def is_terminal(self): """Returns True if the game is over.""" - return self._game_over + return self._winner >= 0 or self._loser >= 0 def returns(self): """Total reward for each player over the course of the game so far.""" @@ -273,8 +288,8 @@ def returns(self): def __str__(self): """String for debug purposes. No particular semantics are required.""" - if self._current_bid != -1: - count, number = self._decode_bid(self._current_bid - self._bid_offset) + if self._current_action != -1: + count, number = self._decode_bid(self._current_action - BID_ACTION_OFFSET) else: count, number = 'None', 'None' return "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {} of {}, Rebid: {}".format( diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py index fb064eb7c2..45a652ecf7 100644 --- a/open_spiel/python/games/liars_poker_test.py +++ b/open_spiel/python/games/liars_poker_test.py @@ -84,10 +84,9 @@ def test_basic_bid(self): """Tests a single bid.""" game = liars_poker.LiarsPoker() state = game.new_initial_state() - total_possible_bets = game.hand_length * game.num_digits * game.num_players() - expected_bid_history = np.zeros((total_possible_bets, game.num_players())) + expected_bid_history = np.zeros((state.total_possible_bids, state.num_players())) - # Fill player hands. + # Fill players hands. self._populate_game_hands(game, state) # After all hands are filled, have player 0 bid. cur_player = state.current_player() @@ -95,46 +94,45 @@ def test_basic_bid(self): state.apply_action(action) # Verify bid history is updated correctly. - bid_offset = len(liars_poker.Action) + bid_offset = liars_poker.BID_ACTION_OFFSET expected_bid_history[action - bid_offset][cur_player] = 1 self.assertTrue((state.bid_history == expected_bid_history).all()) # Verify next set of legal bids is greater than the current bid. for next_action in state.legal_actions(): - if next_action == liars_poker.Action.CHALLENGE: + if next_action == liars_poker.CHALLENGE_ACTION: continue self.assertGreater(next_action, action) def _verify_returns(self, game, state): - self.assertTrue(state._winner != -1 or state._loser != -1) + self.assertTrue(state.winner() != -1 or state.loser() != -1) actual_returns = state.returns() - if state._winner != -1: + if state.winner() != -1: expected_returns = [-1.0 for _ in range(game.num_players())] - expected_returns[state._winner] = game.num_players() - 1 + expected_returns[state.winner()] = game.num_players() - 1 else: expected_returns = [1.0 for _ in range(game.num_players())] - expected_returns[state._loser] = -1.0 * (game.num_players() - 1) + expected_returns[state.loser()] = -1.0 * (game.num_players() - 1) self.assertEqual(actual_returns, expected_returns) - def test_single_round(self): + def test_single_random_round(self): """Runs a single round of bidding followed by a challenge.""" game = liars_poker.LiarsPoker() state = game.new_initial_state() - total_possible_bets = game.hand_length * game.num_digits * game.num_players() - expected_challenge_history = np.zeros((total_possible_bets, game.num_players())) + expected_challenge_history = np.zeros((state.total_possible_bids, state.num_players())) - # Fill player hands. + # Fill players hands. self._populate_game_hands(game, state) # Have player 0 bid. action = 2 state.apply_action(action) # Verify challenge action is available to the next player. - challenge = liars_poker.Action.CHALLENGE + challenge = liars_poker.CHALLENGE_ACTION self.assertTrue(challenge in state.legal_actions()) # Player 1 challenges. cur_player = state.current_player() state.apply_action(challenge) - bid_offset = len(liars_poker.Action) + bid_offset = liars_poker.BID_ACTION_OFFSET expected_challenge_history[action - bid_offset][cur_player] = 1 # Verify challenge history is updated correctly. self.assertTrue((state.challenge_history == expected_challenge_history).all()) @@ -149,25 +147,53 @@ def test_single_round(self): self.assertTrue(state.is_terminal()) # Verify returns. self._verify_returns(game, state) + + def test_single_deterministic_round(self): + """Runs a single round where cards are dealt deterministically.""" + game = liars_poker.LiarsPoker() + state = game.new_initial_state() + + # Deal player 0 all "1" cards and player 1 all "2" cards. + for i in range(game.num_players() * game.hand_length): + if i % 2 == 0: + # Deal card to player 0 + state.apply_action(1) + else: + # Deal card to player 1 + state._apply_action(2) + + # Have player 0 bid that there are four 1's. + state.apply_action(state.encode_bid(4, 1) + liars_poker.BID_ACTION_OFFSET) + # Player 1 challenges. + state.apply_action(liars_poker.CHALLENGE_ACTION) + # Player 0 accepts the challenge. + state.apply_action(liars_poker.CHALLENGE_ACTION) + # Verify game ends with player 0 losing. + self.assertTrue(state.is_terminal()) + self.assertTrue(state.loser() == 0) + expected_returns = [1.0 for _ in range(game.num_players())] + expected_returns[state.loser()] = -1.0 * (game.num_players() - 1) + self.assertEqual(state.returns(), expected_returns) + def test_single_rebid(self): """Runs a 2 player game where a rebid is enacted.""" game = liars_poker.LiarsPoker() state = game.new_initial_state() - # Fill player hands. + # Fill players hands. self._populate_game_hands(game, state) # Have player 0 bid. state.apply_action(2) # Player 1 challenges. - state.apply_action(liars_poker.Action.CHALLENGE) + state.apply_action(liars_poker.CHALLENGE_ACTION) # Original bidder rebids. state.apply_action(3) # Verify game is not over. self.assertFalse(state.is_terminal()) self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players())]) # Player 1 challenges again. - state.apply_action(liars_poker.Action.CHALLENGE) + state.apply_action(liars_poker.CHALLENGE_ACTION) # Verify game is now over. self.assertTrue(state.is_terminal()) @@ -178,12 +204,12 @@ def test_rebid_then_new_bid(self): game = liars_poker.LiarsPoker() state = game.new_initial_state() - # Fill player hands. + # Fill players hands. self._populate_game_hands(game, state) # Have player 0 bid. state.apply_action(2) # Player 1 challenges. - state.apply_action(liars_poker.Action.CHALLENGE) + state.apply_action(liars_poker.CHALLENGE_ACTION) # Original bidder rebids. state.apply_action(3) # Verify game is not over. @@ -194,11 +220,11 @@ def test_rebid_then_new_bid(self): # Verify game is not over. self.assertFalse(state.is_terminal()) # Player 0 challenges. - state.apply_action(liars_poker.Action.CHALLENGE) + state.apply_action(liars_poker.CHALLENGE_ACTION) # Verify we're not rebidding and counts is only called once both players challenge. self.assertFalse(state.is_terminal()) # Player 1 challenges and ends the game with a counts. - state.apply_action(liars_poker.Action.CHALLENGE) + state.apply_action(liars_poker.CHALLENGE_ACTION) # Verify game is now over. self.assertTrue(state.is_terminal()) @@ -240,8 +266,7 @@ def test_cloned_state_matches_original_state(self): self.assertEqual(state.num_distinct_actions(), clone.num_distinct_actions()) self.assertEqual(state._current_player, clone._current_player) - self.assertEqual(state._current_bid, clone._current_bid) - self.assertEqual(state._game_over, clone._game_over) + self.assertEqual(state._current_action, clone._current_action) np.testing.assert_array_equal(state.bid_history, clone.bid_history) np.testing.assert_array_equal(state.challenge_history, clone.challenge_history) From 546c701e6da87940e50c4fa088abcae8104d273f Mon Sep 17 00:00:00 2001 From: William Wong Date: Wed, 18 Jan 2023 19:37:47 +1000 Subject: [PATCH 13/13] Updated playthrough with latest pull --- .../playthroughs/python_liars_poker.txt | 299 ++++++------------ 1 file changed, 94 insertions(+), 205 deletions(-) diff --git a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt index 1141887c4e..082306060d 100644 --- a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt +++ b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt @@ -140,14 +140,14 @@ ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333 LegalActions() = [1, 2, 3] StringLegalActions() = ["Deal: 1", "Deal: 2", "Deal: 3"] -# Apply action "Deal: 1" -action: 1 +# Apply action "Deal: 2" +action: 2 # State 1 -# Hands: [[1], []], Bidder: -1, Current Player: PlayerId.CHANCE, Current Bid: None of None, Rebid: False +# Hands: [[2], []], Bidder: -1, Current Player: PlayerId.CHANCE, Current Bid: None of None, Rebid: False IsTerminal() = False -History() = [1] -HistoryString() = "1" +History() = [2] +HistoryString() = "2" IsChanceNode() = True IsSimultaneousNode() = False CurrentPlayer() = PlayerId.CHANCE @@ -250,37 +250,37 @@ ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333 LegalActions() = [1, 2, 3] StringLegalActions() = ["Deal: 1", "Deal: 2", "Deal: 3"] -# Apply action "Deal: 2" -action: 2 +# Apply action "Deal: 1" +action: 1 # State 2 -# Apply action "Deal: 3" -action: 3 - -# State 3 # Apply action "Deal: 2" action: 2 +# State 3 +# Apply action "Deal: 1" +action: 1 + # State 4 -# Apply action "Deal: 3" -action: 3 +# Apply action "Deal: 2" +action: 2 # State 5 # Apply action "Deal: 2" action: 2 # State 6 -# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False +# Hands: [[2, 2, 2], [1, 1, 2]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False IsTerminal() = False -History() = [1, 2, 3, 2, 3, 2] -HistoryString() = "1, 2, 3, 2, 3, 2" +History() = [2, 1, 2, 1, 2, 2] +HistoryString() = "2, 1, 2, 1, 2, 2" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" -InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" +InformationStateString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0]" +InformationStateString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0]" InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 3, 3] +InformationStateTensor(0).private_hand = [2, 2, 2] InformationStateTensor(0).rebid_state: ◯ InformationStateTensor(0).counts_state: ◯ InformationStateTensor(0).bid_history: ◯◯ @@ -320,7 +320,7 @@ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [2, 2, 2] +InformationStateTensor(1).private_hand = [1, 1, 2] InformationStateTensor(1).rebid_state: ◯ InformationStateTensor(1).counts_state: ◯ InformationStateTensor(1).bid_history: ◯◯ @@ -359,17 +359,17 @@ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ -ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" -ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" +ObservationString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0]" PublicObservationString() = "p0 rebid:[0] counts:[0]" -PrivateObservationString(0) = "p0 hand:[1, 3, 3]" -PrivateObservationString(1) = "p1 hand:[2, 2, 2]" +PrivateObservationString(0) = "p0 hand:[2, 2, 2]" +PrivateObservationString(1) = "p1 hand:[1, 1, 2]" ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 3, 3] +ObservationTensor(0).private_hand = [2, 2, 2] ObservationTensor(0).rebid_state: ◯ ObservationTensor(0).counts_state: ◯ ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [2, 2, 2] +ObservationTensor(1).private_hand = [1, 1, 2] ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] @@ -381,17 +381,17 @@ StringLegalActions() = ["Bid: 1 of 1", "Bid: 2 of 1", "Bid: 3 of 1", "Bid: 4 of action: 13 # State 7 -# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 1, Current Bid: 1 of 3, Rebid: False +# Hands: [[2, 2, 2], [1, 1, 2]], Bidder: 0, Current Player: 1, Current Bid: 1 of 3, Rebid: False IsTerminal() = False -History() = [1, 2, 3, 2, 3, 2, 13] -HistoryString() = "1, 2, 3, 2, 3, 2, 13" +History() = [2, 1, 2, 1, 2, 2, 13] +HistoryString() = "2, 1, 2, 1, 2, 2, 13" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 1 -InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12." -InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12." +InformationStateString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0] b:12." +InformationStateString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0] b:12." InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 3, 3] +InformationStateTensor(0).private_hand = [2, 2, 2] InformationStateTensor(0).rebid_state: ◯ InformationStateTensor(0).counts_state: ◯ InformationStateTensor(0).bid_history: ◯◯ @@ -431,7 +431,7 @@ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [2, 2, 2] +InformationStateTensor(1).private_hand = [1, 1, 2] InformationStateTensor(1).rebid_state: ◯ InformationStateTensor(1).counts_state: ◯ InformationStateTensor(1).bid_history: ◯◯ @@ -470,17 +470,17 @@ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ -ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" -ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" +ObservationString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0]" PublicObservationString() = "p0 rebid:[0] counts:[0]" -PrivateObservationString(0) = "p0 hand:[1, 3, 3]" -PrivateObservationString(1) = "p1 hand:[2, 2, 2]" +PrivateObservationString(0) = "p0 hand:[2, 2, 2]" +PrivateObservationString(1) = "p1 hand:[1, 1, 2]" ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 3, 3] +ObservationTensor(0).private_hand = [2, 2, 2] ObservationTensor(0).rebid_state: ◯ ObservationTensor(0).counts_state: ◯ ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [2, 2, 2] +ObservationTensor(1).private_hand = [1, 1, 2] ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] @@ -488,21 +488,21 @@ Returns() = [0, 0] LegalActions() = [0, 15, 16, 17, 18] StringLegalActions() = ["Challenge", "Bid: 3 of 3", "Bid: 4 of 3", "Bid: 5 of 3", "Bid: 6 of 3"] -# Apply action "Bid: 4 of 3" -action: 16 +# Apply action "Bid: 5 of 3" +action: 17 # State 8 -# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 1, Current Player: 0, Current Bid: 4 of 3, Rebid: False +# Hands: [[2, 2, 2], [1, 1, 2]], Bidder: 1, Current Player: 0, Current Bid: 5 of 3, Rebid: False IsTerminal() = False -History() = [1, 2, 3, 2, 3, 2, 13, 16] -HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16" +History() = [2, 1, 2, 1, 2, 2, 13, 17] +HistoryString() = "2, 1, 2, 1, 2, 2, 13, 17" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15." -InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15." +InformationStateString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:16." +InformationStateString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0] b:12. b:16." InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 3, 3] +InformationStateTensor(0).private_hand = [2, 2, 2] InformationStateTensor(0).rebid_state: ◯ InformationStateTensor(0).counts_state: ◯ InformationStateTensor(0).bid_history: ◯◯ @@ -520,120 +520,9 @@ InformationStateTensor(0).bid_history: ◯◯ ◉◯ ◯◯ ◯◯ - ◯◉ - ◯◯ - ◯◯ -InformationStateTensor(0).challenge_history: ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ -InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [2, 2, 2] -InformationStateTensor(1).rebid_state: ◯ -InformationStateTensor(1).counts_state: ◯ -InformationStateTensor(1).bid_history: ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◉◯ - ◯◯ ◯◯ ◯◉ ◯◯ - ◯◯ -InformationStateTensor(1).challenge_history: ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ -ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" -ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" -PublicObservationString() = "p0 rebid:[0] counts:[0]" -PrivateObservationString(0) = "p0 hand:[1, 3, 3]" -PrivateObservationString(1) = "p1 hand:[2, 2, 2]" -ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 3, 3] -ObservationTensor(0).rebid_state: ◯ -ObservationTensor(0).counts_state: ◯ -ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [2, 2, 2] -ObservationTensor(1).rebid_state: ◯ -ObservationTensor(1).counts_state: ◯ -Rewards() = [0, 0] -Returns() = [0, 0] -LegalActions() = [0, 18] -StringLegalActions() = ["Challenge", "Bid: 6 of 3"] - -# Apply action "Bid: 6 of 3" -action: 18 - -# State 9 -# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 1, Current Bid: 6 of 3, Rebid: False -IsTerminal() = False -History() = [1, 2, 3, 2, 3, 2, 13, 16, 18] -HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18" -IsChanceNode() = False -IsSimultaneousNode() = False -CurrentPlayer() = 1 -InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15. b:17." -InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15. b:17." -InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 3, 3] -InformationStateTensor(0).rebid_state: ◯ -InformationStateTensor(0).counts_state: ◯ -InformationStateTensor(0).bid_history: ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◯◯ - ◉◯ - ◯◯ - ◯◯ - ◯◉ - ◯◯ - ◉◯ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ @@ -653,7 +542,7 @@ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [2, 2, 2] +InformationStateTensor(1).private_hand = [1, 1, 2] InformationStateTensor(1).rebid_state: ◯ InformationStateTensor(1).counts_state: ◯ InformationStateTensor(1).bid_history: ◯◯ @@ -671,9 +560,9 @@ InformationStateTensor(1).bid_history: ◯◯ ◉◯ ◯◯ ◯◯ + ◯◯ ◯◉ ◯◯ - ◉◯ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ @@ -692,17 +581,17 @@ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ -ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" -ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" +ObservationString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0]" PublicObservationString() = "p0 rebid:[0] counts:[0]" -PrivateObservationString(0) = "p0 hand:[1, 3, 3]" -PrivateObservationString(1) = "p1 hand:[2, 2, 2]" +PrivateObservationString(0) = "p0 hand:[2, 2, 2]" +PrivateObservationString(1) = "p1 hand:[1, 1, 2]" ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 3, 3] +ObservationTensor(0).private_hand = [2, 2, 2] ObservationTensor(0).rebid_state: ◯ ObservationTensor(0).counts_state: ◯ ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [2, 2, 2] +ObservationTensor(1).private_hand = [1, 1, 2] ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] @@ -713,18 +602,18 @@ StringLegalActions() = ["Challenge"] # Apply action "Challenge" action: 0 -# State 10 -# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 0, Current Bid: 6 of 3, Rebid: False +# State 9 +# Hands: [[2, 2, 2], [1, 1, 2]], Bidder: 1, Current Player: 1, Current Bid: 5 of 3, Rebid: False IsTerminal() = False -History() = [1, 2, 3, 2, 3, 2, 13, 16, 18, 0] -HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18, 0" +History() = [2, 1, 2, 1, 2, 2, 13, 17, 0] +HistoryString() = "2, 1, 2, 1, 2, 2, 13, 17, 0" IsChanceNode() = False IsSimultaneousNode() = False -CurrentPlayer() = 0 -InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15. b:17. c:17." -InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15. b:17. c:17." +CurrentPlayer() = 1 +InformationStateString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:16. c:16." +InformationStateString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0] b:12. b:16. c:16." InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 3, 3] +InformationStateTensor(0).private_hand = [2, 2, 2] InformationStateTensor(0).rebid_state: ◯ InformationStateTensor(0).counts_state: ◯ InformationStateTensor(0).bid_history: ◯◯ @@ -742,9 +631,9 @@ InformationStateTensor(0).bid_history: ◯◯ ◉◯ ◯◯ ◯◯ + ◯◯ ◯◉ ◯◯ - ◉◯ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ @@ -761,10 +650,10 @@ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ + ◉◯ ◯◯ - ◯◉ InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [2, 2, 2] +InformationStateTensor(1).private_hand = [1, 1, 2] InformationStateTensor(1).rebid_state: ◯ InformationStateTensor(1).counts_state: ◯ InformationStateTensor(1).bid_history: ◯◯ @@ -782,9 +671,9 @@ InformationStateTensor(1).bid_history: ◯◯ ◉◯ ◯◯ ◯◯ + ◯◯ ◯◉ ◯◯ - ◉◯ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ @@ -801,19 +690,19 @@ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ + ◉◯ ◯◯ - ◯◉ -ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]" -ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]" +ObservationString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0]" PublicObservationString() = "p0 rebid:[0] counts:[0]" -PrivateObservationString(0) = "p0 hand:[1, 3, 3]" -PrivateObservationString(1) = "p1 hand:[2, 2, 2]" +PrivateObservationString(0) = "p0 hand:[2, 2, 2]" +PrivateObservationString(1) = "p1 hand:[1, 1, 2]" ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 3, 3] +ObservationTensor(0).private_hand = [2, 2, 2] ObservationTensor(0).rebid_state: ◯ ObservationTensor(0).counts_state: ◯ ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [2, 2, 2] +ObservationTensor(1).private_hand = [1, 1, 2] ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◯ Rewards() = [0, 0] @@ -824,18 +713,18 @@ StringLegalActions() = ["Challenge"] # Apply action "Challenge" action: 0 -# State 11 -# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: PlayerId.TERMINAL, Current Bid: 6 of 3, Rebid: False +# State 10 +# Hands: [[2, 2, 2], [1, 1, 2]], Bidder: 1, Current Player: PlayerId.TERMINAL, Current Bid: 5 of 3, Rebid: False IsTerminal() = True -History() = [1, 2, 3, 2, 3, 2, 13, 16, 18, 0, 0] -HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18, 0, 0" +History() = [2, 1, 2, 1, 2, 2, 13, 17, 0, 0] +HistoryString() = "2, 1, 2, 1, 2, 2, 13, 17, 0, 0" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = PlayerId.TERMINAL -InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[1] b:12. b:15. b:17. c:17." -InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[1] b:12. b:15. b:17. c:17." +InformationStateString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[1] b:12. b:16. c:16." +InformationStateString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[1] b:12. b:16. c:16." InformationStateTensor(0).player: ◉◯ -InformationStateTensor(0).private_hand = [1, 3, 3] +InformationStateTensor(0).private_hand = [2, 2, 2] InformationStateTensor(0).rebid_state: ◯ InformationStateTensor(0).counts_state: ◉ InformationStateTensor(0).bid_history: ◯◯ @@ -853,9 +742,9 @@ InformationStateTensor(0).bid_history: ◯◯ ◉◯ ◯◯ ◯◯ + ◯◯ ◯◉ ◯◯ - ◉◯ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ @@ -872,10 +761,10 @@ InformationStateTensor(0).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◯◯ ◉◉ + ◯◯ InformationStateTensor(1).player: ◯◉ -InformationStateTensor(1).private_hand = [2, 2, 2] +InformationStateTensor(1).private_hand = [1, 1, 2] InformationStateTensor(1).rebid_state: ◯ InformationStateTensor(1).counts_state: ◉ InformationStateTensor(1).bid_history: ◯◯ @@ -893,9 +782,9 @@ InformationStateTensor(1).bid_history: ◯◯ ◉◯ ◯◯ ◯◯ + ◯◯ ◯◉ ◯◯ - ◉◯ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ @@ -912,20 +801,20 @@ InformationStateTensor(1).challenge_history: ◯◯ ◯◯ ◯◯ ◯◯ - ◯◯ ◉◉ -ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[1]" -ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[1]" + ◯◯ +ObservationString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[1]" +ObservationString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[1]" PublicObservationString() = "p0 rebid:[0] counts:[1]" -PrivateObservationString(0) = "p0 hand:[1, 3, 3]" -PrivateObservationString(1) = "p1 hand:[2, 2, 2]" +PrivateObservationString(0) = "p0 hand:[2, 2, 2]" +PrivateObservationString(1) = "p1 hand:[1, 1, 2]" ObservationTensor(0).player: ◉◯ -ObservationTensor(0).private_hand = [1, 3, 3] +ObservationTensor(0).private_hand = [2, 2, 2] ObservationTensor(0).rebid_state: ◯ ObservationTensor(0).counts_state: ◉ ObservationTensor(1).player: ◯◉ -ObservationTensor(1).private_hand = [2, 2, 2] +ObservationTensor(1).private_hand = [1, 1, 2] ObservationTensor(1).rebid_state: ◯ ObservationTensor(1).counts_state: ◉ -Rewards() = [-1, 1] -Returns() = [-1, 1] +Rewards() = [1, -1] +Returns() = [1, -1]