From e62a0a9ec69fc712fb2f0320cfad860731b5bedf Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Tue, 6 Dec 2022 01:51:52 -0500
Subject: [PATCH 01/13] Working initial liars poker file structure

---
 .../playthroughs/python_liars_poker.txt       | 240 ++++++++++++++++++
 open_spiel/python/CMakeLists.txt              |   1 +
 open_spiel/python/games/__init__.py           |   1 +
 open_spiel/python/games/liars_poker.py        | 196 ++++++++++++++
 open_spiel/python/games/liars_poker_test.py   | 111 ++++++++
 open_spiel/python/tests/pyspiel_test.py       |   1 +
 6 files changed, 550 insertions(+)
 create mode 100644 open_spiel/integration_tests/playthroughs/python_liars_poker.txt
 create mode 100644 open_spiel/python/games/liars_poker.py
 create mode 100644 open_spiel/python/games/liars_poker_test.py

diff --git a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
new file mode 100644
index 0000000000..5a36ef7cc7
--- /dev/null
+++ b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
@@ -0,0 +1,240 @@
+game: liars_poker
+
+GameType.chance_mode = ChanceMode.DETERMINISTIC
+GameType.dynamics = Dynamics.SEQUENTIAL
+GameType.information = Information.PERFECT_INFORMATION
+GameType.long_name = "Python Liars Poker"
+GameType.max_num_players = 2
+GameType.min_num_players = 2
+GameType.parameter_specification = []
+GameType.provides_information_state_string = True
+GameType.provides_information_state_tensor = False
+GameType.provides_observation_string = True
+GameType.provides_observation_tensor = True
+GameType.provides_factored_observation_string = False
+GameType.reward_model = RewardModel.TERMINAL
+GameType.short_name = "python_liars_poker"
+GameType.utility = Utility.ZERO_SUM
+
+NumDistinctActions() = 9
+PolicyTensorShape() = [9]
+MaxChanceOutcomes() = 0
+GetParameters() = {}
+NumPlayers() = 2
+MinUtility() = -1.0
+MaxUtility() = 1.0
+UtilitySum() = 0.0
+ObservationTensorShape() = [3, 3, 3]
+ObservationTensorLayout() = TensorLayout.CHW
+ObservationTensorSize() = 27
+MaxGameLength() = 9
+ToString() = "liars_poker()"
+
+# State 0
+# ...
+# ...
+# ...
+IsTerminal() = False
+History() = []
+HistoryString() = ""
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = 0
+InformationStateString(0) = ""
+InformationStateString(1) = ""
+ObservationString(0) = "...\n...\n..."
+ObservationString(1) = "...\n...\n..."
+ObservationTensor(0):
+◉◉◉  ◯◯◯  ◯◯◯
+◉◉◉  ◯◯◯  ◯◯◯
+◉◉◉  ◯◯◯  ◯◯◯
+ObservationTensor(1):
+◉◉◉  ◯◯◯  ◯◯◯
+◉◉◉  ◯◯◯  ◯◯◯
+◉◉◉  ◯◯◯  ◯◯◯
+Rewards() = [0, 0]
+Returns() = [0, 0]
+LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8]
+StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"]
+
+# Apply action "x(2,2)"
+action: 8
+
+# State 1
+# ...
+# ...
+# ..x
+IsTerminal() = False
+History() = [8]
+HistoryString() = "8"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = 1
+InformationStateString(0) = "8"
+InformationStateString(1) = "8"
+ObservationString(0) = "...\n...\n..x"
+ObservationString(1) = "...\n...\n..x"
+ObservationTensor(0):
+◉◉◉  ◯◯◯  ◯◯◯
+◉◉◉  ◯◯◯  ◯◯◯
+◉◉◯  ◯◯◯  ◯◯◉
+ObservationTensor(1):
+◉◉◉  ◯◯◯  ◯◯◯
+◉◉◉  ◯◯◯  ◯◯◯
+◉◉◯  ◯◯◯  ◯◯◉
+Rewards() = [0, 0]
+Returns() = [0, 0]
+LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7]
+StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)"]
+
+# Apply action "o(1,0)"
+action: 3
+
+# State 2
+# ...
+# o..
+# ..x
+IsTerminal() = False
+History() = [8, 3]
+HistoryString() = "8, 3"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = 0
+InformationStateString(0) = "8, 3"
+InformationStateString(1) = "8, 3"
+ObservationString(0) = "...\no..\n..x"
+ObservationString(1) = "...\no..\n..x"
+ObservationTensor(0):
+◉◉◉  ◯◯◯  ◯◯◯
+◯◉◉  ◉◯◯  ◯◯◯
+◉◉◯  ◯◯◯  ◯◯◉
+ObservationTensor(1):
+◉◉◉  ◯◯◯  ◯◯◯
+◯◉◉  ◉◯◯  ◯◯◯
+◉◉◯  ◯◯◯  ◯◯◉
+Rewards() = [0, 0]
+Returns() = [0, 0]
+LegalActions() = [0, 1, 2, 4, 5, 6, 7]
+StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)"]
+
+# Apply action "x(2,0)"
+action: 6
+
+# State 3
+# ...
+# o..
+# x.x
+IsTerminal() = False
+History() = [8, 3, 6]
+HistoryString() = "8, 3, 6"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = 1
+InformationStateString(0) = "8, 3, 6"
+InformationStateString(1) = "8, 3, 6"
+ObservationString(0) = "...\no..\nx.x"
+ObservationString(1) = "...\no..\nx.x"
+ObservationTensor(0):
+◉◉◉  ◯◯◯  ◯◯◯
+◯◉◉  ◉◯◯  ◯◯◯
+◯◉◯  ◯◯◯  ◉◯◉
+ObservationTensor(1):
+◉◉◉  ◯◯◯  ◯◯◯
+◯◉◉  ◉◯◯  ◯◯◯
+◯◉◯  ◯◯◯  ◉◯◉
+Rewards() = [0, 0]
+Returns() = [0, 0]
+LegalActions() = [0, 1, 2, 4, 5, 7]
+StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,1)", "o(1,2)", "o(2,1)"]
+
+# Apply action "o(0,0)"
+action: 0
+
+# State 4
+# o..
+# o..
+# x.x
+IsTerminal() = False
+History() = [8, 3, 6, 0]
+HistoryString() = "8, 3, 6, 0"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = 0
+InformationStateString(0) = "8, 3, 6, 0"
+InformationStateString(1) = "8, 3, 6, 0"
+ObservationString(0) = "o..\no..\nx.x"
+ObservationString(1) = "o..\no..\nx.x"
+ObservationTensor(0):
+◯◉◉  ◉◯◯  ◯◯◯
+◯◉◉  ◉◯◯  ◯◯◯
+◯◉◯  ◯◯◯  ◉◯◉
+ObservationTensor(1):
+◯◉◉  ◉◯◯  ◯◯◯
+◯◉◉  ◉◯◯  ◯◯◯
+◯◉◯  ◯◯◯  ◉◯◉
+Rewards() = [0, 0]
+Returns() = [0, 0]
+LegalActions() = [1, 2, 4, 5, 7]
+StringLegalActions() = ["x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,1)"]
+
+# Apply action "x(0,2)"
+action: 2
+
+# State 5
+# o.x
+# o..
+# x.x
+IsTerminal() = False
+History() = [8, 3, 6, 0, 2]
+HistoryString() = "8, 3, 6, 0, 2"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = 1
+InformationStateString(0) = "8, 3, 6, 0, 2"
+InformationStateString(1) = "8, 3, 6, 0, 2"
+ObservationString(0) = "o.x\no..\nx.x"
+ObservationString(1) = "o.x\no..\nx.x"
+ObservationTensor(0):
+◯◉◯  ◉◯◯  ◯◯◉
+◯◉◉  ◉◯◯  ◯◯◯
+◯◉◯  ◯◯◯  ◉◯◉
+ObservationTensor(1):
+◯◉◯  ◉◯◯  ◯◯◉
+◯◉◉  ◉◯◯  ◯◯◯
+◯◉◯  ◯◯◯  ◉◯◉
+Rewards() = [0, 0]
+Returns() = [0, 0]
+LegalActions() = [1, 4, 5, 7]
+StringLegalActions() = ["o(0,1)", "o(1,1)", "o(1,2)", "o(2,1)"]
+
+# Apply action "o(0,1)"
+action: 1
+
+# State 6
+# Apply action "x(1,2)"
+action: 5
+
+# State 7
+# oox
+# o.x
+# x.x
+IsTerminal() = True
+History() = [8, 3, 6, 0, 2, 1, 5]
+HistoryString() = "8, 3, 6, 0, 2, 1, 5"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = -4
+InformationStateString(0) = "8, 3, 6, 0, 2, 1, 5"
+InformationStateString(1) = "8, 3, 6, 0, 2, 1, 5"
+ObservationString(0) = "oox\no.x\nx.x"
+ObservationString(1) = "oox\no.x\nx.x"
+ObservationTensor(0):
+◯◯◯  ◉◉◯  ◯◯◉
+◯◉◯  ◉◯◯  ◯◯◉
+◯◉◯  ◯◯◯  ◉◯◉
+ObservationTensor(1):
+◯◯◯  ◉◉◯  ◯◯◉
+◯◉◯  ◉◯◯  ◯◯◉
+◯◉◯  ◯◯◯  ◉◯◉
+Rewards() = [1, -1]
+Returns() = [1, -1]
diff --git a/open_spiel/python/CMakeLists.txt b/open_spiel/python/CMakeLists.txt
index 544ce4f734..720c923659 100644
--- a/open_spiel/python/CMakeLists.txt
+++ b/open_spiel/python/CMakeLists.txt
@@ -221,6 +221,7 @@ set(PYTHON_TESTS ${PYTHON_TESTS}
   games/data_test.py
   games/dynamic_routing_test.py
   games/dynamic_routing_utils_test.py
+  games/liars_poker_test.py
   games/tic_tac_toe_test.py
   mfg/algorithms/best_response_value_test.py
   mfg/algorithms/mirror_descent_test.py
diff --git a/open_spiel/python/games/__init__.py b/open_spiel/python/games/__init__.py
index c5c8bb7c97..959e315ea7 100644
--- a/open_spiel/python/games/__init__.py
+++ b/open_spiel/python/games/__init__.py
@@ -30,3 +30,4 @@
 from open_spiel.python.games import iterated_prisoners_dilemma
 from open_spiel.python.games import kuhn_poker
 from open_spiel.python.games import tic_tac_toe
+from open_spiel.python.games import liars_poker
\ No newline at end of file
diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py
new file mode 100644
index 0000000000..3fd8765d11
--- /dev/null
+++ b/open_spiel/python/games/liars_poker.py
@@ -0,0 +1,196 @@
+# Copyright 2019 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as python3
+"""Tic tac toe (noughts and crosses), implemented in Python.
+
+This is a demonstration of implementing a deterministic perfect-information
+game in Python.
+
+Python games are significantly slower than C++, but it may still be suitable
+for prototyping or for small games.
+
+It is possible to run C++ algorithms on Python implemented games, This is likely
+to have good performance if the algorithm simply extracts a game tree and then
+works with that (e.g. CFR algorithms). It is likely to be poor if the algorithm
+relies on processing and updating states as it goes, e.g. MCTS.
+"""
+
+import numpy as np
+
+from open_spiel.python.observation import IIGObserverForPublicInfoGame
+import pyspiel
+
+_NUM_PLAYERS = 2
+_NUM_ROWS = 3
+_NUM_COLS = 3
+_NUM_CELLS = _NUM_ROWS * _NUM_COLS
+_GAME_TYPE = pyspiel.GameType(
+    short_name="python_liars_poker",
+    long_name="Python Liars Poker",
+    dynamics=pyspiel.GameType.Dynamics.SEQUENTIAL,
+    chance_mode=pyspiel.GameType.ChanceMode.DETERMINISTIC,
+    information=pyspiel.GameType.Information.PERFECT_INFORMATION,
+    utility=pyspiel.GameType.Utility.ZERO_SUM,
+    reward_model=pyspiel.GameType.RewardModel.TERMINAL,
+    max_num_players=_NUM_PLAYERS,
+    min_num_players=_NUM_PLAYERS,
+    provides_information_state_string=True,
+    provides_information_state_tensor=False,
+    provides_observation_string=True,
+    provides_observation_tensor=True,
+    parameter_specification={})
+_GAME_INFO = pyspiel.GameInfo(
+    num_distinct_actions=_NUM_CELLS,
+    max_chance_outcomes=0,
+    num_players=2,
+    min_utility=-1.0,
+    max_utility=1.0,
+    utility_sum=0.0,
+    max_game_length=_NUM_CELLS)
+
+
+class LiarsPoker(pyspiel.Game):
+  """A Python version of the Tic-Tac-Toe game."""
+
+  def __init__(self, params=None):
+    super().__init__(_GAME_TYPE, _GAME_INFO, params or dict())
+
+  def new_initial_state(self):
+    """Returns a state corresponding to the start of a game."""
+    return LiarsPokerState(self)
+
+  def make_py_observer(self, iig_obs_type=None, params=None):
+    """Returns an object used for observing game state."""
+    if ((iig_obs_type is None) or
+        (iig_obs_type.public_info and not iig_obs_type.perfect_recall)):
+      return BoardObserver(params)
+    else:
+      return IIGObserverForPublicInfoGame(iig_obs_type, params)
+
+
+class LiarsPokerState(pyspiel.State):
+  """A python version of the Tic-Tac-Toe state."""
+
+  def __init__(self, game):
+    """Constructor; should only be called by Game.new_initial_state."""
+    super().__init__(game)
+    self._cur_player = 0
+    self._player0_score = 0.0
+    self._is_terminal = False
+    self.board = np.full((_NUM_ROWS, _NUM_COLS), ".")
+
+  # OpenSpiel (PySpiel) API functions are below. This is the standard set that
+  # should be implemented by every perfect-information sequential-move game.
+
+  def current_player(self):
+    """Returns id of the next player to move, or TERMINAL if game is over."""
+    return pyspiel.PlayerId.TERMINAL if self._is_terminal else self._cur_player
+
+  def _legal_actions(self, player):
+    """Returns a list of legal actions, sorted in ascending order."""
+    return [a for a in range(_NUM_CELLS) if self.board[_coord(a)] == "."]
+
+  def _apply_action(self, action):
+    """Applies the specified action to the state."""
+    self.board[_coord(action)] = "x" if self._cur_player == 0 else "o"
+    if _line_exists(self.board):
+      self._is_terminal = True
+      self._player0_score = 1.0 if self._cur_player == 0 else -1.0
+    elif all(self.board.ravel() != "."):
+      self._is_terminal = True
+    else:
+      self._cur_player = 1 - self._cur_player
+
+  def _action_to_string(self, player, action):
+    """Action -> string."""
+    row, col = _coord(action)
+    return "{}({},{})".format("x" if player == 0 else "o", row, col)
+
+  def is_terminal(self):
+    """Returns True if the game is over."""
+    return self._is_terminal
+
+  def returns(self):
+    """Total reward for each player over the course of the game so far."""
+    return [self._player0_score, -self._player0_score]
+
+  def __str__(self):
+    """String for debug purposes. No particular semantics are required."""
+    return _board_to_string(self.board)
+
+
+class BoardObserver:
+  """Observer, conforming to the PyObserver interface (see observation.py)."""
+
+  def __init__(self, params):
+    """Initializes an empty observation tensor."""
+    if params:
+      raise ValueError(f"Observation parameters not supported; passed {params}")
+    # The observation should contain a 1-D tensor in `self.tensor` and a
+    # dictionary of views onto the tensor, which may be of any shape.
+    # Here the observation is indexed `(cell state, row, column)`.
+    shape = (1 + _NUM_PLAYERS, _NUM_ROWS, _NUM_COLS)
+    self.tensor = np.zeros(np.prod(shape), np.float32)
+    self.dict = {"observation": np.reshape(self.tensor, shape)}
+
+  def set_from(self, state, player):
+    """Updates `tensor` and `dict` to reflect `state` from PoV of `player`."""
+    del player
+    # We update the observation via the shaped tensor since indexing is more
+    # convenient than with the 1-D tensor. Both are views onto the same memory.
+    obs = self.dict["observation"]
+    obs.fill(0)
+    for row in range(_NUM_ROWS):
+      for col in range(_NUM_COLS):
+        cell_state = ".ox".index(state.board[row, col])
+        obs[cell_state, row, col] = 1
+
+  def string_from(self, state, player):
+    """Observation of `state` from the PoV of `player`, as a string."""
+    del player
+    return _board_to_string(state.board)
+
+
+# Helper functions for game details.
+
+
+def _line_value(line):
+  """Checks a possible line, returning the winning symbol if any."""
+  if all(line == "x") or all(line == "o"):
+    return line[0]
+
+
+def _line_exists(board):
+  """Checks if a line exists, returns "x" or "o" if so, and None otherwise."""
+  return (_line_value(board[0]) or _line_value(board[1]) or
+          _line_value(board[2]) or _line_value(board[:, 0]) or
+          _line_value(board[:, 1]) or _line_value(board[:, 2]) or
+          _line_value(board.diagonal()) or
+          _line_value(np.fliplr(board).diagonal()))
+
+
+def _coord(move):
+  """Returns (row, col) from an action id."""
+  return (move // _NUM_COLS, move % _NUM_COLS)
+
+
+def _board_to_string(board):
+  """Returns a string representation of the board."""
+  return "\n".join("".join(row) for row in board)
+
+
+# Register the game with the OpenSpiel library
+
+pyspiel.register_game(_GAME_TYPE, LiarsPoker)
diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py
new file mode 100644
index 0000000000..9905b29822
--- /dev/null
+++ b/open_spiel/python/games/liars_poker_test.py
@@ -0,0 +1,111 @@
+# Copyright 2019 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as python3
+"""Tests for Python Tic-Tac-Toe."""
+
+import difflib
+import os
+import pickle
+
+from absl.testing import absltest
+import numpy as np
+from open_spiel.python.algorithms.get_all_states import get_all_states
+from open_spiel.python.games import liars_poker
+from open_spiel.python.observation import make_observation
+import pyspiel
+
+_DATA_DIR = "open_spiel/integration_tests/playthroughs/"
+
+
+class TicTacToeTest(absltest.TestCase):
+
+  def test_can_create_game_and_state(self):
+    """Checks we can create the game and a state."""
+    game = liars_poker.LiarsPoker()
+    state = game.new_initial_state()
+    self.assertEqual(str(state), "...\n...\n...")
+
+  def test_random_game(self):
+    """Tests basic API functions."""
+    # This is here mostly to show the API by example.
+    # More serious simulation tests are done in python/tests/games_sim_test.py
+    # and in test_game_from_cc (below), both of which test the conformance to
+    # the API thoroughly.
+    game = liars_poker.LiarsPoker()
+    state = game.new_initial_state()
+    while not state.is_terminal():
+      print(state)
+      cur_player = state.current_player()
+      legal_actions = state.legal_actions()
+      action = np.random.choice(legal_actions)
+      print("Player {} chooses action {}".format(cur_player, action))
+      state.apply_action(action)
+    print(state)
+    print("Returns: {}".format(state.returns()))
+
+  def test_game_from_cc(self):
+    """Runs our standard game tests, checking API consistency."""
+    game = pyspiel.load_game("python_liars_poker")
+    pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True)
+
+  def test_observation_tensors_same(self):
+    """Checks observation tensor is the same from C++ and from Python."""
+    game = pyspiel.load_game("python_liars_poker")
+    state = game.new_initial_state()
+    for a in [4, 5, 2, 3]:
+      state.apply_action(a)
+    py_obs = make_observation(game)
+    py_obs.set_from(state, state.current_player())
+    cc_obs = state.observation_tensor()
+    np.testing.assert_array_equal(py_obs.tensor, cc_obs)
+
+  def test_pickle(self):
+    """Checks pickling and unpickling of game and state."""
+    game = pyspiel.load_game("python_liars_poker")
+    pickled_game = pickle.dumps(game)
+    unpickled_game = pickle.loads(pickled_game)
+    self.assertEqual(str(game), str(unpickled_game))
+    state = game.new_initial_state()
+    for a in [4, 2, 3, 7]:
+      state.apply_action(a)
+    ser_str = pyspiel.serialize_game_and_state(game, state)
+    new_game, new_state = pyspiel.deserialize_game_and_state(ser_str)
+    self.assertEqual(str(game), str(new_game))
+    self.assertEqual(str(state), str(new_state))
+    pickled_state = pickle.dumps(state)
+    unpickled_state = pickle.loads(pickled_state)
+    self.assertEqual(str(state), str(unpickled_state))
+
+  def test_cloned_state_matches_original_state(self):
+    """Check we can clone states successfully."""
+    game = liars_poker.LiarsPoker()
+    state = game.new_initial_state()
+    state.apply_action(1)
+    state.apply_action(2)
+    clone = state.clone()
+
+    self.assertEqual(state.history(), clone.history())
+    self.assertEqual(state.num_players(), clone.num_players())
+    self.assertEqual(state.move_number(), clone.move_number())
+    self.assertEqual(state.num_distinct_actions(), clone.num_distinct_actions())
+
+    self.assertEqual(state._cur_player, clone._cur_player)
+    self.assertEqual(state._player0_score, clone._player0_score)
+    self.assertEqual(state._is_terminal, clone._is_terminal)
+    np.testing.assert_array_equal(state.board, clone.board)
+
+
+if __name__ == "__main__":
+  absltest.main()
diff --git a/open_spiel/python/tests/pyspiel_test.py b/open_spiel/python/tests/pyspiel_test.py
index 0018f24ca1..200cc79428 100644
--- a/open_spiel/python/tests/pyspiel_test.py
+++ b/open_spiel/python/tests/pyspiel_test.py
@@ -106,6 +106,7 @@
     "python_mfg_predator_prey",
     "python_kuhn_poker",
     "python_tic_tac_toe",
+    "python_liars_poker",
     "quoridor",
     "repeated_game",
     "rbc",

From c7848f8a0e82b99b1b2fa88b3142452e9e1799f0 Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Tue, 6 Dec 2022 02:14:00 -0500
Subject: [PATCH 02/13] Liars poker game framework

---
 open_spiel/python/games/liars_poker.py | 186 ++++++++++---------------
 1 file changed, 72 insertions(+), 114 deletions(-)

diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py
index 3fd8765d11..dd62f4e3c1 100644
--- a/open_spiel/python/games/liars_poker.py
+++ b/open_spiel/python/games/liars_poker.py
@@ -13,56 +13,48 @@
 # limitations under the License.
 
 # Lint as python3
-"""Tic tac toe (noughts and crosses), implemented in Python.
+"""Liar's Poker implemented in Python."""
 
-This is a demonstration of implementing a deterministic perfect-information
-game in Python.
-
-Python games are significantly slower than C++, but it may still be suitable
-for prototyping or for small games.
-
-It is possible to run C++ algorithms on Python implemented games, This is likely
-to have good performance if the algorithm simply extracts a game tree and then
-works with that (e.g. CFR algorithms). It is likely to be poor if the algorithm
-relies on processing and updating states as it goes, e.g. MCTS.
-"""
+import enum
 
 import numpy as np
 
-from open_spiel.python.observation import IIGObserverForPublicInfoGame
 import pyspiel
 
+
+class Action(enum.IntEnum):
+  BID = 0
+  CHALLENGE = 1
+
 _NUM_PLAYERS = 2
-_NUM_ROWS = 3
-_NUM_COLS = 3
-_NUM_CELLS = _NUM_ROWS * _NUM_COLS
+_HAND_LENGTH = 3
+_NUM_DIGITS = 3 # Number of digits to include from the range 1, 2, ..., 9, 0
 _GAME_TYPE = pyspiel.GameType(
     short_name="python_liars_poker",
     long_name="Python Liars Poker",
     dynamics=pyspiel.GameType.Dynamics.SEQUENTIAL,
-    chance_mode=pyspiel.GameType.ChanceMode.DETERMINISTIC,
-    information=pyspiel.GameType.Information.PERFECT_INFORMATION,
+    chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC,
+    information=pyspiel.GameType.Information.IMPERFECT_INFORMATION,
     utility=pyspiel.GameType.Utility.ZERO_SUM,
     reward_model=pyspiel.GameType.RewardModel.TERMINAL,
     max_num_players=_NUM_PLAYERS,
     min_num_players=_NUM_PLAYERS,
     provides_information_state_string=True,
     provides_information_state_tensor=False,
-    provides_observation_string=True,
+    provides_observation_string=False,
     provides_observation_tensor=True,
-    parameter_specification={})
+    parameter_specification={
+      "players": _NUM_PLAYERS,
+      "hand_length": _HAND_LENGTH,
+      "num_digits": _NUM_DIGITS
+    })
 _GAME_INFO = pyspiel.GameInfo(
-    num_distinct_actions=_NUM_CELLS,
-    max_chance_outcomes=0,
-    num_players=2,
-    min_utility=-1.0,
-    max_utility=1.0,
-    utility_sum=0.0,
-    max_game_length=_NUM_CELLS)
-
+    num_distinct_actions=len(Action),
+    max_chance_outcomes=_HAND_LENGTH * _NUM_DIGITS,
+    num_players=_NUM_PLAYERS)
 
 class LiarsPoker(pyspiel.Game):
-  """A Python version of the Tic-Tac-Toe game."""
+  """A Python version of Liar's poker."""
 
   def __init__(self, params=None):
     super().__init__(_GAME_TYPE, _GAME_INFO, params or dict())
@@ -73,123 +65,89 @@ def new_initial_state(self):
 
   def make_py_observer(self, iig_obs_type=None, params=None):
     """Returns an object used for observing game state."""
-    if ((iig_obs_type is None) or
-        (iig_obs_type.public_info and not iig_obs_type.perfect_recall)):
-      return BoardObserver(params)
-    else:
-      return IIGObserverForPublicInfoGame(iig_obs_type, params)
+    return LiarsPokerObserver(
+      iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False),
+      params)
 
 
 class LiarsPokerState(pyspiel.State):
-  """A python version of the Tic-Tac-Toe state."""
+  """A python version of the Liars Poker state."""
 
   def __init__(self, game):
     """Constructor; should only be called by Game.new_initial_state."""
     super().__init__(game)
-    self._cur_player = 0
-    self._player0_score = 0.0
-    self._is_terminal = False
-    self.board = np.full((_NUM_ROWS, _NUM_COLS), ".")
-
-  # OpenSpiel (PySpiel) API functions are below. This is the standard set that
-  # should be implemented by every perfect-information sequential-move game.
 
   def current_player(self):
     """Returns id of the next player to move, or TERMINAL if game is over."""
-    return pyspiel.PlayerId.TERMINAL if self._is_terminal else self._cur_player
+    if self._game_over:
+      return pyspiel.PlayerId.TERMINAL
+    elif len(self.cards) < _NUM_PLAYERS:
+      return pyspiel.PlayerId.CHANCE
+    else:
+      return self._next_player
 
   def _legal_actions(self, player):
     """Returns a list of legal actions, sorted in ascending order."""
-    return [a for a in range(_NUM_CELLS) if self.board[_coord(a)] == "."]
+    assert player >= 0
+    return [Action.PASS, Action.BET]
+
+  def chance_outcomes(self):
+    """Returns the possible chance outcomes and their probabilities."""
+    assert self.is_chance_node()
+    outcomes = sorted(_DECK - set(self.cards))
+    p = 1.0 / len(outcomes)
+    return [(o, p) for o in outcomes]
 
   def _apply_action(self, action):
     """Applies the specified action to the state."""
-    self.board[_coord(action)] = "x" if self._cur_player == 0 else "o"
-    if _line_exists(self.board):
-      self._is_terminal = True
-      self._player0_score = 1.0 if self._cur_player == 0 else -1.0
-    elif all(self.board.ravel() != "."):
-      self._is_terminal = True
+    if self.is_chance_node():
+      self.cards.append(action)
     else:
-      self._cur_player = 1 - self._cur_player
+      self.bets.append(action)
+      if action == Action.BET:
+        self.pot[self._next_player] += 1
+      self._next_player = 1 - self._next_player
+      if ((min(self.pot) == 2) or
+          (len(self.bets) == 2 and action == Action.PASS) or
+          (len(self.bets) == 3)):
+        self._game_over = True
 
   def _action_to_string(self, player, action):
     """Action -> string."""
-    row, col = _coord(action)
-    return "{}({},{})".format("x" if player == 0 else "o", row, col)
+    if player == pyspiel.PlayerId.CHANCE:
+      return f"Deal:{action}"
+    elif action == Action.PASS:
+      return "Pass"
+    else:
+      return "Bet"
 
   def is_terminal(self):
     """Returns True if the game is over."""
-    return self._is_terminal
+    return self._game_over
 
   def returns(self):
     """Total reward for each player over the course of the game so far."""
-    return [self._player0_score, -self._player0_score]
+    pot = self.pot
+    winnings = float(min(pot))
+    if not self._game_over:
+      return [0., 0.]
+    elif pot[0] > pot[1]:
+      return [winnings, -winnings]
+    elif pot[0] < pot[1]:
+      return [-winnings, winnings]
+    elif self.cards[0] > self.cards[1]:
+      return [winnings, -winnings]
+    else:
+      return [-winnings, winnings]
 
   def __str__(self):
     """String for debug purposes. No particular semantics are required."""
-    return _board_to_string(self.board)
+    return "".join([str(c) for c in self.cards] + ["pb"[b] for b in self.bets])
 
 
-class BoardObserver:
+class LiarsPokerObserver:
   """Observer, conforming to the PyObserver interface (see observation.py)."""
-
-  def __init__(self, params):
-    """Initializes an empty observation tensor."""
-    if params:
-      raise ValueError(f"Observation parameters not supported; passed {params}")
-    # The observation should contain a 1-D tensor in `self.tensor` and a
-    # dictionary of views onto the tensor, which may be of any shape.
-    # Here the observation is indexed `(cell state, row, column)`.
-    shape = (1 + _NUM_PLAYERS, _NUM_ROWS, _NUM_COLS)
-    self.tensor = np.zeros(np.prod(shape), np.float32)
-    self.dict = {"observation": np.reshape(self.tensor, shape)}
-
-  def set_from(self, state, player):
-    """Updates `tensor` and `dict` to reflect `state` from PoV of `player`."""
-    del player
-    # We update the observation via the shaped tensor since indexing is more
-    # convenient than with the 1-D tensor. Both are views onto the same memory.
-    obs = self.dict["observation"]
-    obs.fill(0)
-    for row in range(_NUM_ROWS):
-      for col in range(_NUM_COLS):
-        cell_state = ".ox".index(state.board[row, col])
-        obs[cell_state, row, col] = 1
-
-  def string_from(self, state, player):
-    """Observation of `state` from the PoV of `player`, as a string."""
-    del player
-    return _board_to_string(state.board)
-
-
-# Helper functions for game details.
-
-
-def _line_value(line):
-  """Checks a possible line, returning the winning symbol if any."""
-  if all(line == "x") or all(line == "o"):
-    return line[0]
-
-
-def _line_exists(board):
-  """Checks if a line exists, returns "x" or "o" if so, and None otherwise."""
-  return (_line_value(board[0]) or _line_value(board[1]) or
-          _line_value(board[2]) or _line_value(board[:, 0]) or
-          _line_value(board[:, 1]) or _line_value(board[:, 2]) or
-          _line_value(board.diagonal()) or
-          _line_value(np.fliplr(board).diagonal()))
-
-
-def _coord(move):
-  """Returns (row, col) from an action id."""
-  return (move // _NUM_COLS, move % _NUM_COLS)
-
-
-def _board_to_string(board):
-  """Returns a string representation of the board."""
-  return "\n".join("".join(row) for row in board)
-
+  raise NotImplementedError()
 
 # Register the game with the OpenSpiel library
 

From 7bb105cb0499b716c9630bec864129e87f123b6a Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Tue, 6 Dec 2022 15:52:09 -0500
Subject: [PATCH 03/13] Current player, legal actions, chance outcomes logic

---
 .../playthroughs/python_liars_poker.txt       |  4 +-
 open_spiel/python/games/liars_poker.py        | 46 +++++++++++++++----
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
index 5a36ef7cc7..033397928d 100644
--- a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
+++ b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
@@ -8,8 +8,8 @@ GameType.max_num_players = 2
 GameType.min_num_players = 2
 GameType.parameter_specification = []
 GameType.provides_information_state_string = True
-GameType.provides_information_state_tensor = False
-GameType.provides_observation_string = True
+GameType.provides_information_state_tensor = True 
+GameType.provides_observation_string = False
 GameType.provides_observation_tensor = True
 GameType.provides_factored_observation_string = False
 GameType.reward_model = RewardModel.TERMINAL
diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py
index dd62f4e3c1..c3e85d84e8 100644
--- a/open_spiel/python/games/liars_poker.py
+++ b/open_spiel/python/games/liars_poker.py
@@ -29,6 +29,9 @@ class Action(enum.IntEnum):
 _NUM_PLAYERS = 2
 _HAND_LENGTH = 3
 _NUM_DIGITS = 3 # Number of digits to include from the range 1, 2, ..., 9, 0
+_FULL_DECK = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+_DECK = [_FULL_DECK[i] for i in range(_NUM_DIGITS)]
+
 _GAME_TYPE = pyspiel.GameType(
     short_name="python_liars_poker",
     long_name="Python Liars Poker",
@@ -40,7 +43,7 @@ class Action(enum.IntEnum):
     max_num_players=_NUM_PLAYERS,
     min_num_players=_NUM_PLAYERS,
     provides_information_state_string=True,
-    provides_information_state_tensor=False,
+    provides_information_state_tensor=True,
     provides_observation_string=False,
     provides_observation_tensor=True,
     parameter_specification={
@@ -76,27 +79,52 @@ class LiarsPokerState(pyspiel.State):
   def __init__(self, game):
     """Constructor; should only be called by Game.new_initial_state."""
     super().__init__(game)
+    self.hands = [] # List containing the hands for each player, represented as a list.
+    self._current_player = 0
+    self._current_bid = -1
 
   def current_player(self):
-    """Returns id of the next player to move, or TERMINAL if game is over."""
-    if self._game_over:
+    """Returns id of the current player to act.
+    
+    The id is:
+      - TERMINAL if game is over.
+      - CHANCE if a player is drawing a number to fill out their hand.
+      - a number otherwise.
+    """
+    if self._is_terminal:
       return pyspiel.PlayerId.TERMINAL
-    elif len(self.cards) < _NUM_PLAYERS:
+    elif len(self.hands) < _NUM_PLAYERS or len(self.hands[_NUM_PLAYERS - 1]) < _HAND_LENGTH:
       return pyspiel.PlayerId.CHANCE
     else:
-      return self._next_player
+      return self._current_player
+
+  def _is_call_possible(self):
+    raise NotImplementedError()
+
+  def _is_challenge_possible(self):
+    raise NotImplementedError()
 
   def _legal_actions(self, player):
     """Returns a list of legal actions, sorted in ascending order."""
     assert player >= 0
-    return [Action.PASS, Action.BET]
+    actions = []
+    # Any move higher than the current bid is allowed. (Bids start at 0)
+    for b in range(self._current_bid + 1, _HAND_LENGTH * _NUM_PLAYERS):
+      actions.append(b)
+    
+    if self._is_call_possible():
+      actions.append(Action.BID)
+    # TODO: verify Action.BID is not the same as the nubmer 0.
+    if self._is_challenge_possible():
+      actions.append(Action.CHALLENGE)
+    # TODO: add game logic for when all players challenge - automatically count
+    return actions
 
   def chance_outcomes(self):
     """Returns the possible chance outcomes and their probabilities."""
     assert self.is_chance_node()
-    outcomes = sorted(_DECK - set(self.cards))
-    p = 1.0 / len(outcomes)
-    return [(o, p) for o in outcomes]
+    probability = 1.0 / len(_DECK)
+    return [(digit, probability) for digit in _DECK]
 
   def _apply_action(self, action):
     """Applies the specified action to the state."""

From 762902d2cdefcdddad3e047478b56cc725c88c7b Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Fri, 9 Dec 2022 20:42:33 -0500
Subject: [PATCH 04/13] Apply action, counts and reward logic

---
 open_spiel/python/games/liars_poker.py | 132 ++++++++++++++++++-------
 1 file changed, 95 insertions(+), 37 deletions(-)

diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py
index c3e85d84e8..b798ef5d60 100644
--- a/open_spiel/python/games/liars_poker.py
+++ b/open_spiel/python/games/liars_poker.py
@@ -30,7 +30,6 @@ class Action(enum.IntEnum):
 _HAND_LENGTH = 3
 _NUM_DIGITS = 3 # Number of digits to include from the range 1, 2, ..., 9, 0
 _FULL_DECK = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
-_DECK = [_FULL_DECK[i] for i in range(_NUM_DIGITS)]
 
 _GAME_TYPE = pyspiel.GameType(
     short_name="python_liars_poker",
@@ -47,9 +46,9 @@ class Action(enum.IntEnum):
     provides_observation_string=False,
     provides_observation_tensor=True,
     parameter_specification={
-      "players": _NUM_PLAYERS,
+      "num_players": _NUM_PLAYERS,
       "hand_length": _HAND_LENGTH,
-      "num_digits": _NUM_DIGITS
+      "num_digits": _NUM_DIGITS,
     })
 _GAME_INFO = pyspiel.GameInfo(
     num_distinct_actions=len(Action),
@@ -61,6 +60,7 @@ class LiarsPoker(pyspiel.Game):
 
   def __init__(self, params=None):
     super().__init__(_GAME_TYPE, _GAME_INFO, params or dict())
+    self.deck = [_FULL_DECK[i] for i in range(_NUM_DIGITS)]
 
   def new_initial_state(self):
     """Returns a state corresponding to the start of a game."""
@@ -79,9 +79,24 @@ class LiarsPokerState(pyspiel.State):
   def __init__(self, game):
     """Constructor; should only be called by Game.new_initial_state."""
     super().__init__(game)
-    self.hands = [] # List containing the hands for each player, represented as a list.
+    # Game attributes
+    self._num_players = game.num_players
+    self._hand_length = game.hand_length
+    self._num_digits = game.num_digits
+    self._deck = game.deck
+    self.hands = [[] for _ in range(self._num_players)]
+
+    # Action dynamics
     self._current_player = 0
+    self._bid_originator = 0
     self._current_bid = -1
+    self._num_challenges = 0
+    self._is_rebid = False
+
+    # Game over dynamics
+    self._game_over = False
+    self._winner = -1
+    self._loser = -1
 
   def current_player(self):
     """Returns id of the current player to act.
@@ -93,28 +108,25 @@ def current_player(self):
     """
     if self._is_terminal:
       return pyspiel.PlayerId.TERMINAL
-    elif len(self.hands) < _NUM_PLAYERS or len(self.hands[_NUM_PLAYERS - 1]) < _HAND_LENGTH:
+    elif len(self.hands[self._num_players - 1]) < self._hand_length:
       return pyspiel.PlayerId.CHANCE
     else:
       return self._current_player
 
-  def _is_call_possible(self):
-    raise NotImplementedError()
-
   def _is_challenge_possible(self):
-    raise NotImplementedError()
+    return self._current_bid != -1
+
+  def _is_rebid_possible(self):
+    return self._num_challenges == self._num_players - 1
 
   def _legal_actions(self, player):
     """Returns a list of legal actions, sorted in ascending order."""
     assert player >= 0
     actions = []
     # Any move higher than the current bid is allowed. (Bids start at 0)
-    for b in range(self._current_bid + 1, _HAND_LENGTH * _NUM_PLAYERS):
+    for b in range(self._current_bid + 1, self._num_digits * self._hand_length * self._num_players):
       actions.append(b)
     
-    if self._is_call_possible():
-      actions.append(Action.BID)
-    # TODO: verify Action.BID is not the same as the nubmer 0.
     if self._is_challenge_possible():
       actions.append(Action.CHALLENGE)
     # TODO: add game logic for when all players challenge - automatically count
@@ -123,29 +135,78 @@ def _legal_actions(self, player):
   def chance_outcomes(self):
     """Returns the possible chance outcomes and their probabilities."""
     assert self.is_chance_node()
-    probability = 1.0 / len(_DECK)
-    return [(digit, probability) for digit in _DECK]
+    probability = 1.0 / self._num_digits
+    return [(digit, probability) for digit in self._deck]
+
+  def _decode_bid(self, bid):
+    """
+    Turns a bid ID in the range 0 to NUM_DIGITS * HAND_LENGTH * NUM_PLAYERS to a count and number.
+
+    For example, take 2 players each with 2 numbers from the deck of 1, 2, and 3.
+      - A bid of two 1's would correspond to a bid id 1.
+        - Explanation: 1 is the lowest number, and the only lower bid would be zero 1's.
+      - A bid of three 3's would correspond to a bid id 10.
+        - Explanation: 1-4 1's take bid ids 0-3. 1-4 2's take bid ids 4-7. 1 and 2 3's take bid ids 8 and 9.
+
+    Returns a tuple of (count, number). For example, (1, 2) represents one 2's.
+    """
+    count = bid % (self._hand_length * self._num_players)
+    number = self._deck[bid // (self._hand_length * self._num_players)]
+    return (count, number)
+
+  def _counts(self):
+    """
+    Determines if the bid originator wins or loses.
+    """
+    bid_count, bid_number = self._decode_bid(self._current_bid)
+
+    # Count the number of bid_numbers from all players.
+    matches = 0
+    for player_id in range(self._num_players):
+      for digit in self.hands[player_id]:
+        if digit == bid_number:
+          matches += 1
+    
+    # If the number of matches are at least the bid_count bid, then the bidder wins.
+    # Otherwise everyone else wins.
+    if matches >= bid_count:
+      self._winner = self._bid_originator
+    else:
+      self._loser = self._bid_originator
 
   def _apply_action(self, action):
     """Applies the specified action to the state."""
     if self.is_chance_node():
-      self.cards.append(action)
-    else:
-      self.bets.append(action)
-      if action == Action.BET:
-        self.pot[self._next_player] += 1
-      self._next_player = 1 - self._next_player
-      if ((min(self.pot) == 2) or
-          (len(self.bets) == 2 and action == Action.PASS) or
-          (len(self.bets) == 3)):
+      # If we are still populating hands, draw a number for the current player.
+      self.hands[self._current_player].append(action)
+    elif action == Action.CHALLENGE:
+      assert self._is_challenge_possible()
+      self._num_challenges += 1
+      # If there is no ongoing rebid, check if all players challenge before counting.
+      # If there is an ongoing rebid, count once all the players except the bidder challenges.
+      if (not self._is_rebid and self._num_challenges == self._num_players) or (
+        self._is_rebid and self._num_challenges == self._num_players - 1):
+        # TODO: counts
         self._game_over = True
+    else:
+      # Set the current bid and bid originator to the action and current player.
+      self._current_bid = action
+      self._bid_originator = self._current_player
+      # If all players but the bid originator have chllenged but the originator bids again, we have a rebid.
+      if self._num_challenges == self._num_players - 1:
+        self._is_rebid = True
+      else:
+        # Otherwise, we have a regular bid.
+        self._is_rebid = False
+      self._num_challenges = 0
+    self._current_player = (self._current_player + 1) % self._num_players
 
   def _action_to_string(self, player, action):
     """Action -> string."""
     if player == pyspiel.PlayerId.CHANCE:
       return f"Deal:{action}"
-    elif action == Action.PASS:
-      return "Pass"
+    elif action == Action.CHALLENGE:
+      return "Challenge"
     else:
       return "Bet"
 
@@ -155,20 +216,17 @@ def is_terminal(self):
 
   def returns(self):
     """Total reward for each player over the course of the game so far."""
-    pot = self.pot
-    winnings = float(min(pot))
-    if not self._game_over:
-      return [0., 0.]
-    elif pot[0] > pot[1]:
-      return [winnings, -winnings]
-    elif pot[0] < pot[1]:
-      return [-winnings, winnings]
-    elif self.cards[0] > self.cards[1]:
-      return [winnings, -winnings]
+    if self._winner != -1:
+      bidder_reward = self._num_players - 1
+      others_reward = -1.
     else:
-      return [-winnings, winnings]
+      bidder_reward = - self._num_players - 1
+      others_reward = 1.
+    return [others_reward if player_id != self._bid_originator else bidder_reward
+      for player_id in range(self._num_players)]
 
   def __str__(self):
+    # TODO
     """String for debug purposes. No particular semantics are required."""
     return "".join([str(c) for c in self.cards] + ["pb"[b] for b in self.bets])
 

From da690fa307ff8898345779e70cc6e2052cba25bb Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Sun, 18 Dec 2022 01:16:03 -0800
Subject: [PATCH 05/13] Liars poker observer

---
 open_spiel/python/games/liars_poker.py | 78 +++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 7 deletions(-)

diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py
index b798ef5d60..99cfaa127b 100644
--- a/open_spiel/python/games/liars_poker.py
+++ b/open_spiel/python/games/liars_poker.py
@@ -70,7 +70,9 @@ def make_py_observer(self, iig_obs_type=None, params=None):
     """Returns an object used for observing game state."""
     return LiarsPokerObserver(
       iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False),
-      params)
+      _NUM_PLAYERS,
+      _HAND_LENGTH,
+      _NUM_DIGITS)
 
 
 class LiarsPokerState(pyspiel.State):
@@ -80,6 +82,7 @@ def __init__(self, game):
     """Constructor; should only be called by Game.new_initial_state."""
     super().__init__(game)
     # Game attributes
+    # TODO: need to verify have access to these game attributes.
     self._num_players = game.num_players
     self._hand_length = game.hand_length
     self._num_digits = game.num_digits
@@ -87,6 +90,7 @@ def __init__(self, game):
     self.hands = [[] for _ in range(self._num_players)]
 
     # Action dynamics
+    self.actions = [[] for _ in range(self._num_players)]
     self._current_player = 0
     self._bid_originator = 0
     self._current_bid = -1
@@ -179,25 +183,25 @@ def _apply_action(self, action):
     if self.is_chance_node():
       # If we are still populating hands, draw a number for the current player.
       self.hands[self._current_player].append(action)
+      return
     elif action == Action.CHALLENGE:
+      self.actions[self._current_player].append(action)
       assert self._is_challenge_possible()
       self._num_challenges += 1
       # If there is no ongoing rebid, check if all players challenge before counting.
       # If there is an ongoing rebid, count once all the players except the bidder challenges.
       if (not self._is_rebid and self._num_challenges == self._num_players) or (
         self._is_rebid and self._num_challenges == self._num_players - 1):
-        # TODO: counts
+        self._counts()
         self._game_over = True
     else:
+      self.actions[self._current_player].append(action)
       # Set the current bid and bid originator to the action and current player.
       self._current_bid = action
       self._bid_originator = self._current_player
       # If all players but the bid originator have chllenged but the originator bids again, we have a rebid.
       if self._num_challenges == self._num_players - 1:
         self._is_rebid = True
-      else:
-        # Otherwise, we have a regular bid.
-        self._is_rebid = False
       self._num_challenges = 0
     self._current_player = (self._current_player + 1) % self._num_players
 
@@ -228,12 +232,72 @@ def returns(self):
   def __str__(self):
     # TODO
     """String for debug purposes. No particular semantics are required."""
-    return "".join([str(c) for c in self.cards] + ["pb"[b] for b in self.bets])
+    return "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {}, Rebid: {}".format(
+      self.hands,
+      self._bid_originator,
+      self.current_player(),
+      self._current_bid,
+      self._is_rebid)
 
 
 class LiarsPokerObserver:
   """Observer, conforming to the PyObserver interface (see observation.py)."""
-  raise NotImplementedError()
+
+  def __init__(self, iig_obs_type, num_players, hand_length, num_digits):
+    """Initiliazes an empty observation tensor."""
+    self.num_players = num_players
+    self.hand_length = hand_length
+
+    # Determine which observation pieces we want to include.
+    # Pieces is a list of tuples containing observation pieces.
+    # Pieces are described by their (name, number of elements, and shape).
+    pieces = [("player", num_players, (num_players,))] # One-hot encoding for the player id.
+    if iig_obs_type.private_info == pyspiel.PrivateInfoType.SINGLE_PLAYER:
+      # One-hot encoding for each digit in a player's hand
+      pieces.append(("private_hand", hand_length * num_digits, (hand_length, num_digits)))
+    if iig_obs_type.public_info:
+      if iig_obs_type.perfect_recall:
+        # One-hot encoding for a player's moves at every round.
+        total_possible_rounds = num_players * hand_length * num_digits
+        num_actions = 2
+        pieces.append(("action_history",
+                       total_possible_rounds * num_actions,
+                       (total_possible_rounds, num_actions)))
+
+    # Build the single flat tensor.
+    total_size = sum(size for name, size, shape in pieces)
+    self.tensor = np.zeros(total_size, np.float32)
+
+    # Build the named & reshaped views of the bits of the flat tensor.
+    self.dict = {}
+    index = 0
+    for name, size, shape in pieces:
+      self.dict[name] = self.tensor[index:index + size].reshape(shape)
+      index += size
+
+  def set_from(self, state, player):
+    """Updates `tensor` and `dict` to reflect `state` from PoV of `player`."""
+    self.tensor.fill(0)
+    if "player" in self.dict:
+      self.dict["player"][player] = 1
+    if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length:
+      for i in range(len(state.hands[player])):
+        self.dict["private_hand"][i][state.hands[player][i]] = 1
+    if "action_history" in self.dict:
+      for round, action in enumerate(state.actions[player]):
+        self.dict["action_history"][round, action] = 1
+
+  def string_from(self, state, player):
+    """Observation of `state` from the PoV of `player`, as a string."""
+    pieces = []
+    if "player" in self.dict:
+      pieces.append(f"p{player}")
+    if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length:
+      pieces.append(f"hand:{state.hands[player]}")
+    if "action_history" in self.dict and state.actions[player]:
+      # bc = bid, challenge. b is 0 or 1 and indexes into "bc" to stringify the action.
+      pieces.append("".join("bc"[b] for b in state.actions[player]))
+    return " ".join(str(p) for p in pieces)
 
 # Register the game with the OpenSpiel library
 

From 06de6fbe95834fb62986a928f931c4d98d978c39 Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Mon, 19 Dec 2022 20:43:01 -0800
Subject: [PATCH 06/13] State changes, bug fixes

---
 open_spiel/python/games/liars_poker.py | 165 +++++++++++++++++--------
 1 file changed, 111 insertions(+), 54 deletions(-)

diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py
index 99cfaa127b..b81cd556d0 100644
--- a/open_spiel/python/games/liars_poker.py
+++ b/open_spiel/python/games/liars_poker.py
@@ -26,7 +26,8 @@ class Action(enum.IntEnum):
   BID = 0
   CHALLENGE = 1
 
-_NUM_PLAYERS = 2
+_MAX_NUM_PLAYERS = 10
+_MIN_NUM_PLAYERS = 2
 _HAND_LENGTH = 3
 _NUM_DIGITS = 3 # Number of digits to include from the range 1, 2, ..., 9, 0
 _FULL_DECK = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
@@ -39,28 +40,33 @@ class Action(enum.IntEnum):
     information=pyspiel.GameType.Information.IMPERFECT_INFORMATION,
     utility=pyspiel.GameType.Utility.ZERO_SUM,
     reward_model=pyspiel.GameType.RewardModel.TERMINAL,
-    max_num_players=_NUM_PLAYERS,
-    min_num_players=_NUM_PLAYERS,
+    max_num_players=_MAX_NUM_PLAYERS,
+    min_num_players=_MIN_NUM_PLAYERS,
     provides_information_state_string=True,
     provides_information_state_tensor=True,
     provides_observation_string=False,
-    provides_observation_tensor=True,
-    parameter_specification={
-      "num_players": _NUM_PLAYERS,
-      "hand_length": _HAND_LENGTH,
-      "num_digits": _NUM_DIGITS,
-    })
+    provides_observation_tensor=True)
 _GAME_INFO = pyspiel.GameInfo(
     num_distinct_actions=len(Action),
     max_chance_outcomes=_HAND_LENGTH * _NUM_DIGITS,
-    num_players=_NUM_PLAYERS)
+    num_players=_MIN_NUM_PLAYERS,
+    min_utility=-(_MIN_NUM_PLAYERS - 1), # Reward from being challenged and losing.
+    max_utility=_MIN_NUM_PLAYERS - 1, # Reward for being challenged and winning.
+    utility_sum=0.0,
+    # Number of possible rounds: hand_length * num_digits * num_players
+    # Total moves per round: num_players for non-rebid, num_players-1 for rebid
+    # Max game length: number of possible rounds * total moves per round
+    max_game_length=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS**2)
 
 class LiarsPoker(pyspiel.Game):
   """A Python version of Liar's poker."""
 
   def __init__(self, params=None):
     super().__init__(_GAME_TYPE, _GAME_INFO, params or dict())
-    self.deck = [_FULL_DECK[i] for i in range(_NUM_DIGITS)]
+    self.deck = [_FULL_DECK[i] for i in range(params.get("num_digits", default=_NUM_DIGITS))]
+    self.num_players = params.get("num_players", default=_MIN_NUM_PLAYERS)
+    self.hand_length = params.get("hand_length", default=_HAND_LENGTH)
+    self.num_digits = params.get("num_digits", default=_NUM_DIGITS)
 
   def new_initial_state(self):
     """Returns a state corresponding to the start of a game."""
@@ -70,9 +76,10 @@ def make_py_observer(self, iig_obs_type=None, params=None):
     """Returns an object used for observing game state."""
     return LiarsPokerObserver(
       iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False),
-      _NUM_PLAYERS,
-      _HAND_LENGTH,
-      _NUM_DIGITS)
+      self.num_players,
+      self.hand_length,
+      self.num_digits,
+      params)
 
 
 class LiarsPokerState(pyspiel.State):
@@ -82,7 +89,6 @@ def __init__(self, game):
     """Constructor; should only be called by Game.new_initial_state."""
     super().__init__(game)
     # Game attributes
-    # TODO: need to verify have access to these game attributes.
     self._num_players = game.num_players
     self._hand_length = game.hand_length
     self._num_digits = game.num_digits
@@ -90,12 +96,14 @@ def __init__(self, game):
     self.hands = [[] for _ in range(self._num_players)]
 
     # Action dynamics
-    self.actions = [[] for _ in range(self._num_players)]
+    total_possible_bets = game.hand_length * game.num_digits * game.num_players
+    self.bid_history = np.zeros((total_possible_bets, game.num_players))
+    self.challenge_history = np.zeros((total_possible_bets, game.num_players))
     self._current_player = 0
     self._bid_originator = 0
     self._current_bid = -1
     self._num_challenges = 0
-    self._is_rebid = False
+    self.is_rebid = False
 
     # Game over dynamics
     self._game_over = False
@@ -118,22 +126,26 @@ def current_player(self):
       return self._current_player
 
   def _is_challenge_possible(self):
+    """A challenge is possible once the first bid is made."""
     return self._current_bid != -1
 
   def _is_rebid_possible(self):
-    return self._num_challenges == self._num_players - 1
+    """A rebid is only possible when all players have challenged the original bid."""
+    return not self.is_rebid and self._num_challenges == self._num_players - 1
 
   def _legal_actions(self, player):
     """Returns a list of legal actions, sorted in ascending order."""
     assert player >= 0
     actions = []
-    # Any move higher than the current bid is allowed. (Bids start at 0)
-    for b in range(self._current_bid + 1, self._num_digits * self._hand_length * self._num_players):
-      actions.append(b)
+
+    if player != self._bid_originator or self._is_rebid_possible():
+      # Any move higher than the current bid is allowed. (Bids start at 0)
+      for b in range(self._current_bid + 1, self._hand_length * self._num_digits * self._num_players):
+        actions.append(b)
     
     if self._is_challenge_possible():
       actions.append(Action.CHALLENGE)
-    # TODO: add game logic for when all players challenge - automatically count
+
     return actions
 
   def chance_outcomes(self):
@@ -144,7 +156,7 @@ def chance_outcomes(self):
 
   def _decode_bid(self, bid):
     """
-    Turns a bid ID in the range 0 to NUM_DIGITS * HAND_LENGTH * NUM_PLAYERS to a count and number.
+    Turns a bid ID in the range 0 to HAND_LENGTH * NUM_DIGITS * NUM_PLAYERS to a count and number.
 
     For example, take 2 players each with 2 numbers from the deck of 1, 2, and 3.
       - A bid of two 1's would correspond to a bid id 1.
@@ -178,8 +190,16 @@ def _counts(self):
     else:
       self._loser = self._bid_originator
 
+  def _update_bid_history(self, bid, player):
+    """Writes a player's bid into memory."""
+    self.bid_history[bid][player] = 1
+
+  def _update_challenge_history(self, bid, player):
+    """Write a player's challenge for a bid into memory."""
+    self.challenge_history[bid][player] = 1
+
   def _apply_action(self, action):
-    """Applies the specified action to the state."""
+    """Applies an action and updates the state."""
     if self.is_chance_node():
       # If we are still populating hands, draw a number for the current player.
       self.hands[self._current_player].append(action)
@@ -187,21 +207,27 @@ def _apply_action(self, action):
     elif action == Action.CHALLENGE:
       self.actions[self._current_player].append(action)
       assert self._is_challenge_possible()
+      self._update_challenge_history(self._current_bid, self._current_player)
       self._num_challenges += 1
       # If there is no ongoing rebid, check if all players challenge before counting.
       # If there is an ongoing rebid, count once all the players except the bidder challenges.
-      if (not self._is_rebid and self._num_challenges == self._num_players) or (
-        self._is_rebid and self._num_challenges == self._num_players - 1):
+      if (not self.is_rebid and self._num_challenges == self._num_players) or (
+        self.is_rebid and self._num_challenges == self._num_players - 1):
         self._counts()
         self._game_over = True
     else:
       self.actions[self._current_player].append(action)
-      # Set the current bid and bid originator to the action and current player.
+      # Set the current bid to the action.
       self._current_bid = action
+      if self._current_player == self._bid_originator:
+        # If the bid originator is bidding again, we have a rebid.
+        self.is_rebid = True
+      else:
+         # Otherwise, we have a regular bid.
+         self.is_rebid = False
+      # Set the bid originator to the current player.
       self._bid_originator = self._current_player
-      # If all players but the bid originator have chllenged but the originator bids again, we have a rebid.
-      if self._num_challenges == self._num_players - 1:
-        self._is_rebid = True
+      self._update_bid_history(self._current_bid, self._current_player)
       self._num_challenges = 0
     self._current_player = (self._current_player + 1) % self._num_players
 
@@ -222,28 +248,42 @@ def returns(self):
     """Total reward for each player over the course of the game so far."""
     if self._winner != -1:
       bidder_reward = self._num_players - 1
-      others_reward = -1.
+      others_reward = -1.0
+    elif self._loser != -1:
+      bidder_reward = -1 * (self._num_players - 1)
+      others_reward = 1.0
     else:
-      bidder_reward = - self._num_players - 1
-      others_reward = 1.
+      # Game is not over.
+      bidder_reward = 0.0
+      others_reward = 0.0
     return [others_reward if player_id != self._bid_originator else bidder_reward
       for player_id in range(self._num_players)]
 
   def __str__(self):
-    # TODO
     """String for debug purposes. No particular semantics are required."""
     return "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {}, Rebid: {}".format(
       self.hands,
       self._bid_originator,
       self.current_player(),
       self._current_bid,
-      self._is_rebid)
+      self.is_rebid)
 
 
 class LiarsPokerObserver:
-  """Observer, conforming to the PyObserver interface (see observation.py)."""
-
-  def __init__(self, iig_obs_type, num_players, hand_length, num_digits):
+  """Observer, conforming to the PyObserver interface (see observation.py).
+  
+    An observation will consist of the following:
+      - One hot encoding of the current player number: [0 0 0 1 0 0 0]
+      - A vector of length hand_length containing the digits in a player's hand.
+      - Two matrices each of size (hand_length * num_digits * num_players, num_players)
+        will store bids and challenges respectively. Each row in the matrix corresponds
+        to a particular bid (e.g. one 1, two 5s, or eight 3s). 0 will represent no
+        action. 1 will represent a player's bid or a player's challenge.
+      - One bit for whether we are rebidding: [1] rebid occuring, [0] otherwise
+      - One bit for whether we are counting: [1] COUNTS called, [0] otherwise
+  """
+
+  def __init__(self, iig_obs_type, num_players, hand_length, num_digits, params=None):
     """Initiliazes an empty observation tensor."""
     self.num_players = num_players
     self.hand_length = hand_length
@@ -253,16 +293,20 @@ def __init__(self, iig_obs_type, num_players, hand_length, num_digits):
     # Pieces are described by their (name, number of elements, and shape).
     pieces = [("player", num_players, (num_players,))] # One-hot encoding for the player id.
     if iig_obs_type.private_info == pyspiel.PrivateInfoType.SINGLE_PLAYER:
-      # One-hot encoding for each digit in a player's hand
-      pieces.append(("private_hand", hand_length * num_digits, (hand_length, num_digits)))
+      # Vector containing the digits in a player's hand
+      pieces.append(("private_hand", hand_length, (hand_length,)))
     if iig_obs_type.public_info:
+      pieces.append(("rebid_state", 1, (1,)))
+      pieces.append(("counts_state", 1, (1,)))
       if iig_obs_type.perfect_recall:
-        # One-hot encoding for a player's moves at every round.
-        total_possible_rounds = num_players * hand_length * num_digits
-        num_actions = 2
-        pieces.append(("action_history",
-                       total_possible_rounds * num_actions,
-                       (total_possible_rounds, num_actions)))
+        # One-hot encodings for players' moves at every round.
+        total_possible_rounds = hand_length * num_digits * num_players
+        pieces.append(("bid_history",
+                       total_possible_rounds * num_players,
+                       (total_possible_rounds, num_players)))
+        pieces.append(("challenge_history",
+                       total_possible_rounds * num_players,
+                       (total_possible_rounds, num_players)))
 
     # Build the single flat tensor.
     total_size = sum(size for name, size, shape in pieces)
@@ -281,11 +325,15 @@ def set_from(self, state, player):
     if "player" in self.dict:
       self.dict["player"][player] = 1
     if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length:
-      for i in range(len(state.hands[player])):
-        self.dict["private_hand"][i][state.hands[player][i]] = 1
-    if "action_history" in self.dict:
-      for round, action in enumerate(state.actions[player]):
-        self.dict["action_history"][round, action] = 1
+      self.dict["private_hand"] = self.hands[player]
+    if "rebid_state" in self.dict:
+      self.dict["rebid_state"] = state.is_rebid
+    if "counts_state" in self.dict:
+      self.dict["counts_state"] = state.is_terminal()
+    if "bid_history" in self.dict:
+      self.dict["bid_history"] = state.bid_history
+    if "challenge_history" in self.dict:
+      self.dict["challenge_history"] = state.challenge_history
 
   def string_from(self, state, player):
     """Observation of `state` from the PoV of `player`, as a string."""
@@ -294,9 +342,18 @@ def string_from(self, state, player):
       pieces.append(f"p{player}")
     if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length:
       pieces.append(f"hand:{state.hands[player]}")
-    if "action_history" in self.dict and state.actions[player]:
-      # bc = bid, challenge. b is 0 or 1 and indexes into "bc" to stringify the action.
-      pieces.append("".join("bc"[b] for b in state.actions[player]))
+    if "rebid_state" in self.dict:
+      pieces.append(f"rebid:{state.is_rebid}")
+    if "counts_state" in self.dict:
+      pieces.append(f"rebid:{state.is_terminal()}")
+    if "bid_history" in self.dict:
+      for bid in range(len(state.bid_history)):
+        if np.any(state.bid_history[bid] == 1):
+          pieces.append("b:{}.".format(bid))
+    if "challenge_history" in self.dict:
+      for bid in range(len(state.challenge_history)):
+        if np.any(state.challenge_history[bid] == 1):
+          pieces.append("c:{}.".format(bid))
     return " ".join(str(p) for p in pieces)
 
 # Register the game with the OpenSpiel library

From eeeeda367223626ed5d231c4207f382814793d7b Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Mon, 19 Dec 2022 23:39:12 -0800
Subject: [PATCH 07/13] Unit tests and bug fixes

---
 open_spiel/python/games/liars_poker.py      |  62 +++---
 open_spiel/python/games/liars_poker_test.py | 212 ++++++++++++++++----
 2 files changed, 212 insertions(+), 62 deletions(-)

diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py
index b81cd556d0..bb973345d1 100644
--- a/open_spiel/python/games/liars_poker.py
+++ b/open_spiel/python/games/liars_poker.py
@@ -47,7 +47,8 @@ class Action(enum.IntEnum):
     provides_observation_string=False,
     provides_observation_tensor=True)
 _GAME_INFO = pyspiel.GameInfo(
-    num_distinct_actions=len(Action),
+    # Num actions = total number of cards * number of digits + action enum
+    num_distinct_actions=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS + len(Action),
     max_chance_outcomes=_HAND_LENGTH * _NUM_DIGITS,
     num_players=_MIN_NUM_PLAYERS,
     min_utility=-(_MIN_NUM_PLAYERS - 1), # Reward from being challenged and losing.
@@ -63,10 +64,10 @@ class LiarsPoker(pyspiel.Game):
 
   def __init__(self, params=None):
     super().__init__(_GAME_TYPE, _GAME_INFO, params or dict())
-    self.deck = [_FULL_DECK[i] for i in range(params.get("num_digits", default=_NUM_DIGITS))]
-    self.num_players = params.get("num_players", default=_MIN_NUM_PLAYERS)
-    self.hand_length = params.get("hand_length", default=_HAND_LENGTH)
-    self.num_digits = params.get("num_digits", default=_NUM_DIGITS)
+    self.num_players = _MIN_NUM_PLAYERS
+    self.hand_length = _HAND_LENGTH
+    self.num_digits = _NUM_DIGITS
+    self.deck = [_FULL_DECK[i] for i in range(self.num_digits)]
 
   def new_initial_state(self):
     """Returns a state corresponding to the start of a game."""
@@ -100,15 +101,18 @@ def __init__(self, game):
     self.bid_history = np.zeros((total_possible_bets, game.num_players))
     self.challenge_history = np.zeros((total_possible_bets, game.num_players))
     self._current_player = 0
-    self._bid_originator = 0
+    self._bid_offset = len(Action)
+    self._max_bid = (self._hand_length * self._num_digits * self._num_players
+                     + self._bid_offset - 1)
+    self._bid_originator = -1 
     self._current_bid = -1
     self._num_challenges = 0
     self.is_rebid = False
 
     # Game over dynamics
     self._game_over = False
-    self._winner = -1
-    self._loser = -1
+    self.winner = -1
+    self.loser = -1
 
   def current_player(self):
     """Returns id of the current player to act.
@@ -118,7 +122,7 @@ def current_player(self):
       - CHANCE if a player is drawing a number to fill out their hand.
       - a number otherwise.
     """
-    if self._is_terminal:
+    if self.is_terminal():
       return pyspiel.PlayerId.TERMINAL
     elif len(self.hands[self._num_players - 1]) < self._hand_length:
       return pyspiel.PlayerId.CHANCE
@@ -138,14 +142,15 @@ def _legal_actions(self, player):
     assert player >= 0
     actions = []
 
-    if player != self._bid_originator or self._is_rebid_possible():
-      # Any move higher than the current bid is allowed. (Bids start at 0)
-      for b in range(self._current_bid + 1, self._hand_length * self._num_digits * self._num_players):
-        actions.append(b)
-    
     if self._is_challenge_possible():
       actions.append(Action.CHALLENGE)
 
+    if player != self._bid_originator or self._is_rebid_possible():
+      # Any move higher than the current bid is allowed.
+      # Bids start at 2 as 0 and 1 are for bid and challenge.
+      for b in range(max(self._bid_offset, self._current_bid + 1), self._max_bid + 1):
+        actions.append(b)
+
     return actions
 
   def chance_outcomes(self):
@@ -170,11 +175,16 @@ def _decode_bid(self, bid):
     number = self._deck[bid // (self._hand_length * self._num_players)]
     return (count, number)
 
+  def _end_game(self):
+    """Ends the game by calling a counts and setting respective attributes."""
+    self._counts()
+    self._game_over = True
+
   def _counts(self):
     """
     Determines if the bid originator wins or loses.
     """
-    bid_count, bid_number = self._decode_bid(self._current_bid)
+    bid_count, bid_number = self._decode_bid(self._current_bid - self._bid_offset)
 
     # Count the number of bid_numbers from all players.
     matches = 0
@@ -186,9 +196,9 @@ def _counts(self):
     # If the number of matches are at least the bid_count bid, then the bidder wins.
     # Otherwise everyone else wins.
     if matches >= bid_count:
-      self._winner = self._bid_originator
+      self.winner = self._bid_originator
     else:
-      self._loser = self._bid_originator
+      self.loser = self._bid_originator
 
   def _update_bid_history(self, bid, player):
     """Writes a player's bid into memory."""
@@ -203,20 +213,17 @@ def _apply_action(self, action):
     if self.is_chance_node():
       # If we are still populating hands, draw a number for the current player.
       self.hands[self._current_player].append(action)
-      return
     elif action == Action.CHALLENGE:
-      self.actions[self._current_player].append(action)
       assert self._is_challenge_possible()
-      self._update_challenge_history(self._current_bid, self._current_player)
+      self._update_challenge_history(
+        self._current_bid - self._bid_offset, self._current_player)
       self._num_challenges += 1
       # If there is no ongoing rebid, check if all players challenge before counting.
       # If there is an ongoing rebid, count once all the players except the bidder challenges.
       if (not self.is_rebid and self._num_challenges == self._num_players) or (
         self.is_rebid and self._num_challenges == self._num_players - 1):
-        self._counts()
-        self._game_over = True
+        self._end_game()
     else:
-      self.actions[self._current_player].append(action)
       # Set the current bid to the action.
       self._current_bid = action
       if self._current_player == self._bid_originator:
@@ -227,7 +234,7 @@ def _apply_action(self, action):
          self.is_rebid = False
       # Set the bid originator to the current player.
       self._bid_originator = self._current_player
-      self._update_bid_history(self._current_bid, self._current_player)
+      self._update_bid_history(self._current_bid - self._bid_offset, self._current_player)
       self._num_challenges = 0
     self._current_player = (self._current_player + 1) % self._num_players
 
@@ -246,10 +253,10 @@ def is_terminal(self):
 
   def returns(self):
     """Total reward for each player over the course of the game so far."""
-    if self._winner != -1:
+    if self.winner != -1:
       bidder_reward = self._num_players - 1
       others_reward = -1.0
-    elif self._loser != -1:
+    elif self.loser != -1:
       bidder_reward = -1 * (self._num_players - 1)
       others_reward = 1.0
     else:
@@ -325,7 +332,7 @@ def set_from(self, state, player):
     if "player" in self.dict:
       self.dict["player"][player] = 1
     if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length:
-      self.dict["private_hand"] = self.hands[player]
+      self.dict["private_hand"] = state.hands[player]
     if "rebid_state" in self.dict:
       self.dict["rebid_state"] = state.is_rebid
     if "counts_state" in self.dict:
@@ -356,6 +363,7 @@ def string_from(self, state, player):
           pieces.append("c:{}.".format(bid))
     return " ".join(str(p) for p in pieces)
 
+
 # Register the game with the OpenSpiel library
 
 pyspiel.register_game(_GAME_TYPE, LiarsPoker)
diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py
index 9905b29822..e0f5eac535 100644
--- a/open_spiel/python/games/liars_poker_test.py
+++ b/open_spiel/python/games/liars_poker_test.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 # Lint as python3
-"""Tests for Python Tic-Tac-Toe."""
+"""Tests for Python Liar's Poker."""
 
 import difflib
 import os
@@ -26,51 +26,192 @@
 from open_spiel.python.observation import make_observation
 import pyspiel
 
+# TODO: remove?
 _DATA_DIR = "open_spiel/integration_tests/playthroughs/"
 
 
-class TicTacToeTest(absltest.TestCase):
+class LiarsPokerTest(absltest.TestCase):
 
   def test_can_create_game_and_state(self):
     """Checks we can create the game and a state."""
     game = liars_poker.LiarsPoker()
     state = game.new_initial_state()
-    self.assertEqual(str(state), "...\n...\n...")
-
-  def test_random_game(self):
-    """Tests basic API functions."""
-    # This is here mostly to show the API by example.
-    # More serious simulation tests are done in python/tests/games_sim_test.py
-    # and in test_game_from_cc (below), both of which test the conformance to
-    # the API thoroughly.
+    # Ensure no moves have been made.
+    expected_hands = [[] for _ in range(game.num_players)]
+    expected_bidder = -1
+    expected_current_player = pyspiel.PlayerId.CHANCE
+    expected_current_bid = -1
+    expected_rebid = False
+    expected = "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {}, Rebid: {}".format(
+      expected_hands,
+      expected_bidder,
+      expected_current_player,
+      expected_current_bid,
+      expected_rebid
+    )
+    self.assertEqual(str(state), expected)
+
+  def test_draw_hands(self):
+    """Tests hand drawing functions."""
     game = liars_poker.LiarsPoker()
     state = game.new_initial_state()
-    while not state.is_terminal():
-      print(state)
-      cur_player = state.current_player()
-      legal_actions = state.legal_actions()
-      action = np.random.choice(legal_actions)
-      print("Player {} chooses action {}".format(cur_player, action))
+    expected_hands = [[] for _ in range(game.num_players)]
+    for i in range(game.num_players * game.hand_length):
+      # Verify we have chance nodes until all player hands are filled.
+      self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE)
+
+      # Draw a digit.
+      outcomes_with_probs = state.chance_outcomes()
+      action_list, prob_list = zip(*outcomes_with_probs)
+      action = np.random.choice(action_list, p=prob_list)
+
+      # Verify players' hands are filled correctly.
+      cur_player = i % game.num_players
+      expected_hands[cur_player].append(action)
       state.apply_action(action)
-    print(state)
-    print("Returns: {}".format(state.returns()))
+      self.assertEqual(state.hands, expected_hands)
+    # Assert after all hands are filled, we have non-chance nodes.
+    cur_player = state.current_player()
+    self.assertNotEqual(cur_player, pyspiel.PlayerId.CHANCE)
+    self.assertEqual(cur_player, 0)
+
+  def _populate_game_hands(self, game, state):
+    """Populates players hands for testing."""
+    for _ in range(game.num_players * game.hand_length):
+      outcomes_with_probs = state.chance_outcomes()
+      action_list, prob_list = zip(*outcomes_with_probs)
+      action = np.random.choice(action_list, p=prob_list)
+      state.apply_action(action)
+
+  def test_basic_bid(self):
+    """Tests a single bid."""
+    game = liars_poker.LiarsPoker()
+    state = game.new_initial_state()
+    total_possible_bets = game.hand_length * game.num_digits * game.num_players
+    expected_bid_history = np.zeros((total_possible_bets, game.num_players))
+  
+    # Fill player hands.
+    self._populate_game_hands(game, state)
+    # After all hands are filled, have player 0 bid.
+    cur_player = state.current_player()
+    action = 2
+    state.apply_action(action)
+  
+    # Verify bid history is updated correctly.
+    bid_offset = len(liars_poker.Action)
+    expected_bid_history[action - bid_offset][cur_player] = 1
+    self.assertTrue((state.bid_history == expected_bid_history).all())
+  
+    # Verify next set of legal bids is greater than the current bid.
+    for next_action in state.legal_actions():
+      if next_action == liars_poker.Action.CHALLENGE:
+        continue
+      self.assertGreater(next_action, action)
+
+  def _verify_returns(self, game, state):
+    self.assertTrue(state.winner != -1 or state.loser != -1)
+    actual_returns = state.returns()
+    if state.winner != -1:
+      expected_returns = [-1.0 for _ in range(game.num_players)]
+      expected_returns[state.winner] = game.num_players - 1
+    else:
+      expected_returns = [1.0 for _ in range(game.num_players)]
+      expected_returns[state.loser] = -1.0 * (game.num_players - 1)
+    self.assertEqual(actual_returns, expected_returns)
+
+  def test_single_round(self):
+    """Runs a single round of bidding followed by a challenge."""
+    game = liars_poker.LiarsPoker()
+    state = game.new_initial_state()
+    total_possible_bets = game.hand_length * game.num_digits * game.num_players
+    expected_challenge_history = np.zeros((total_possible_bets, game.num_players))
+
+    # Fill player hands.
+    self._populate_game_hands(game, state)
+    # Have player 0 bid.
+    action = 2
+    state.apply_action(action)
+    # Verify challenge action is available to the next player.
+    challenge = liars_poker.Action.CHALLENGE
+    self.assertTrue(challenge in state.legal_actions())
+    # Player 1 challenges.
+    cur_player = state.current_player()
+    state.apply_action(challenge)
+    bid_offset = len(liars_poker.Action)
+    expected_challenge_history[action - bid_offset][cur_player] = 1
+    # Verify challenge history is updated correctly.
+    self.assertTrue((state.challenge_history == expected_challenge_history).all())
+    # Original bidder challenges, thus agreeing to a count.
+    cur_player = state.current_player()
+    state.apply_action(challenge)
+    expected_challenge_history[action - bid_offset][cur_player] = 1
+    # Verify challenge history is updated correctly.
+    self.assertTrue((state.challenge_history == expected_challenge_history).all())
+  
+    # Verify game is over.
+    self.assertTrue(state.is_terminal())
+    # Verify returns.
+    self._verify_returns(game, state)
+
+  def test_single_rebid(self):
+    """Runs a 2 player game where a rebid is enacted."""
+    game = liars_poker.LiarsPoker()
+    state = game.new_initial_state()
+
+    # Fill player hands.
+    self._populate_game_hands(game, state)
+    # Have player 0 bid.
+    state.apply_action(2)
+    # Player 1 challenges.
+    state.apply_action(liars_poker.Action.CHALLENGE)
+    # Original bidder rebids.
+    state.apply_action(3)
+    # Verify game is not over.
+    self.assertFalse(state.is_terminal())
+    self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players)])
+    # Player 1 challenges again.
+    state.apply_action(liars_poker.Action.CHALLENGE)
+
+    # Verify game is now over.
+    self.assertTrue(state.is_terminal())
+    self._verify_returns(game, state)
+
+  def test_rebid_then_new_bid(self):
+    """Runs a 2 player game where a rebid is enacted."""
+    game = liars_poker.LiarsPoker()
+    state = game.new_initial_state()
+
+    # Fill player hands.
+    self._populate_game_hands(game, state)
+    # Have player 0 bid.
+    state.apply_action(2)
+    # Player 1 challenges.
+    state.apply_action(liars_poker.Action.CHALLENGE)
+    # Original bidder rebids.
+    state.apply_action(3)
+    # Verify game is not over.
+    self.assertFalse(state.is_terminal())
+    self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players)])
+    # Player 1 bids.
+    state.apply_action(4)
+    # Verify game is not over.
+    self.assertFalse(state.is_terminal())
+    # Player 0 challenges.
+    state.apply_action(liars_poker.Action.CHALLENGE)
+    # Verify we're not rebidding and counts is only called once both players challenge.
+    self.assertFalse(state.is_terminal())
+    # Player 1 challenges and ends the game with a counts.
+    state.apply_action(liars_poker.Action.CHALLENGE)
+
+    # Verify game is now over.
+    self.assertTrue(state.is_terminal())
+    self._verify_returns(game, state)
 
   def test_game_from_cc(self):
-    """Runs our standard game tests, checking API consistency."""
+    """Runs the standard game tests, checking API consistency."""
     game = pyspiel.load_game("python_liars_poker")
     pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True)
 
-  def test_observation_tensors_same(self):
-    """Checks observation tensor is the same from C++ and from Python."""
-    game = pyspiel.load_game("python_liars_poker")
-    state = game.new_initial_state()
-    for a in [4, 5, 2, 3]:
-      state.apply_action(a)
-    py_obs = make_observation(game)
-    py_obs.set_from(state, state.current_player())
-    cc_obs = state.observation_tensor()
-    np.testing.assert_array_equal(py_obs.tensor, cc_obs)
-
   def test_pickle(self):
     """Checks pickling and unpickling of game and state."""
     game = pyspiel.load_game("python_liars_poker")
@@ -78,7 +219,7 @@ def test_pickle(self):
     unpickled_game = pickle.loads(pickled_game)
     self.assertEqual(str(game), str(unpickled_game))
     state = game.new_initial_state()
-    for a in [4, 2, 3, 7]:
+    for a in [2, 3, 4, 5]:
       state.apply_action(a)
     ser_str = pyspiel.serialize_game_and_state(game, state)
     new_game, new_state = pyspiel.deserialize_game_and_state(ser_str)
@@ -101,10 +242,11 @@ def test_cloned_state_matches_original_state(self):
     self.assertEqual(state.move_number(), clone.move_number())
     self.assertEqual(state.num_distinct_actions(), clone.num_distinct_actions())
 
-    self.assertEqual(state._cur_player, clone._cur_player)
-    self.assertEqual(state._player0_score, clone._player0_score)
-    self.assertEqual(state._is_terminal, clone._is_terminal)
-    np.testing.assert_array_equal(state.board, clone.board)
+    self.assertEqual(state._current_player, clone._current_player)
+    self.assertEqual(state._current_bid, clone._current_bid)
+    self.assertEqual(state._game_over, clone._game_over)
+    np.testing.assert_array_equal(state.bid_history, clone.bid_history)
+    np.testing.assert_array_equal(state.challenge_history, clone.challenge_history)
 
 
 if __name__ == "__main__":

From cf32057a23f788ee7ebe2f20ef28a7551dd1d4a5 Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Tue, 20 Dec 2022 01:15:20 -0800
Subject: [PATCH 08/13] Change visibility of winners/losers

---
 open_spiel/python/games/liars_poker.py      | 12 ++++++------
 open_spiel/python/games/liars_poker_test.py |  8 ++++----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py
index bb973345d1..8d517c65f3 100644
--- a/open_spiel/python/games/liars_poker.py
+++ b/open_spiel/python/games/liars_poker.py
@@ -111,8 +111,8 @@ def __init__(self, game):
 
     # Game over dynamics
     self._game_over = False
-    self.winner = -1
-    self.loser = -1
+    self._winner = -1
+    self._loser = -1
 
   def current_player(self):
     """Returns id of the current player to act.
@@ -196,9 +196,9 @@ def _counts(self):
     # If the number of matches are at least the bid_count bid, then the bidder wins.
     # Otherwise everyone else wins.
     if matches >= bid_count:
-      self.winner = self._bid_originator
+      self._winner = self._bid_originator
     else:
-      self.loser = self._bid_originator
+      self._loser = self._bid_originator
 
   def _update_bid_history(self, bid, player):
     """Writes a player's bid into memory."""
@@ -253,10 +253,10 @@ def is_terminal(self):
 
   def returns(self):
     """Total reward for each player over the course of the game so far."""
-    if self.winner != -1:
+    if self._winner != -1:
       bidder_reward = self._num_players - 1
       others_reward = -1.0
-    elif self.loser != -1:
+    elif self._loser != -1:
       bidder_reward = -1 * (self._num_players - 1)
       others_reward = 1.0
     else:
diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py
index e0f5eac535..f0abbed1cd 100644
--- a/open_spiel/python/games/liars_poker_test.py
+++ b/open_spiel/python/games/liars_poker_test.py
@@ -109,14 +109,14 @@ def test_basic_bid(self):
       self.assertGreater(next_action, action)
 
   def _verify_returns(self, game, state):
-    self.assertTrue(state.winner != -1 or state.loser != -1)
+    self.assertTrue(state._winner != -1 or state._loser != -1)
     actual_returns = state.returns()
-    if state.winner != -1:
+    if state._winner != -1:
       expected_returns = [-1.0 for _ in range(game.num_players)]
-      expected_returns[state.winner] = game.num_players - 1
+      expected_returns[state._winner] = game.num_players - 1
     else:
       expected_returns = [1.0 for _ in range(game.num_players)]
-      expected_returns[state.loser] = -1.0 * (game.num_players - 1)
+      expected_returns[state._loser] = -1.0 * (game.num_players - 1)
     self.assertEqual(actual_returns, expected_returns)
 
   def test_single_round(self):

From e975687549978eb6ce0fdc9e5a9692e08ef516d7 Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Wed, 21 Dec 2022 02:59:58 -0800
Subject: [PATCH 09/13] Bug fixes and playthrough

---
 .../playthroughs/python_liars_poker.txt       | 920 ++++++++++++++----
 open_spiel/python/games/liars_poker.py        |  42 +-
 open_spiel/python/games/liars_poker_test.py   |  40 +-
 3 files changed, 797 insertions(+), 205 deletions(-)

diff --git a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
index 033397928d..ef0f271761 100644
--- a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
+++ b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
@@ -1,14 +1,14 @@
-game: liars_poker
+game: python_liars_poker
 
-GameType.chance_mode = ChanceMode.DETERMINISTIC
+GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC
 GameType.dynamics = Dynamics.SEQUENTIAL
-GameType.information = Information.PERFECT_INFORMATION
+GameType.information = Information.IMPERFECT_INFORMATION
 GameType.long_name = "Python Liars Poker"
-GameType.max_num_players = 2
+GameType.max_num_players = 10
 GameType.min_num_players = 2
-GameType.parameter_specification = []
+GameType.parameter_specification = ["hand_length", "num_digits", "players"]
 GameType.provides_information_state_string = True
-GameType.provides_information_state_tensor = True 
+GameType.provides_information_state_tensor = True
 GameType.provides_observation_string = False
 GameType.provides_observation_tensor = True
 GameType.provides_factored_observation_string = False
@@ -16,225 +16,805 @@ GameType.reward_model = RewardModel.TERMINAL
 GameType.short_name = "python_liars_poker"
 GameType.utility = Utility.ZERO_SUM
 
-NumDistinctActions() = 9
-PolicyTensorShape() = [9]
-MaxChanceOutcomes() = 0
-GetParameters() = {}
+NumDistinctActions() = 20
+PolicyTensorShape() = [20]
+MaxChanceOutcomes() = 9
+GetParameters() = {hand_length=3,num_digits=3,players=2}
 NumPlayers() = 2
 MinUtility() = -1.0
 MaxUtility() = 1.0
 UtilitySum() = 0.0
-ObservationTensorShape() = [3, 3, 3]
+InformationStateTensorShape() = player: [2], private_hand: [3], rebid_state: [1], counts_state: [1], bid_history: [18, 2], challenge_history: [18, 2]
+InformationStateTensorLayout() = TensorLayout.CHW
+InformationStateTensorSize() = 79
+ObservationTensorShape() = player: [2], private_hand: [3], rebid_state: [1], counts_state: [1]
 ObservationTensorLayout() = TensorLayout.CHW
-ObservationTensorSize() = 27
-MaxGameLength() = 9
-ToString() = "liars_poker()"
+ObservationTensorSize() = 7
+MaxGameLength() = 36
+ToString() = "python_liars_poker(hand_length=3,num_digits=3,players=2)"
 
 # State 0
-# ...
-# ...
-# ...
+# Hands: [[], []], Bidder: -1, Current Player: PlayerId.CHANCE, Current Bid: None of None, Rebid: False
 IsTerminal() = False
 History() = []
 HistoryString() = ""
-IsChanceNode() = False
+IsChanceNode() = True
 IsSimultaneousNode() = False
-CurrentPlayer() = 0
-InformationStateString(0) = ""
-InformationStateString(1) = ""
-ObservationString(0) = "...\n...\n..."
-ObservationString(1) = "...\n...\n..."
-ObservationTensor(0):
-◉◉◉  ◯◯◯  ◯◯◯
-◉◉◉  ◯◯◯  ◯◯◯
-◉◉◉  ◯◯◯  ◯◯◯
-ObservationTensor(1):
-◉◉◉  ◯◯◯  ◯◯◯
-◉◉◉  ◯◯◯  ◯◯◯
-◉◉◉  ◯◯◯  ◯◯◯
-Rewards() = [0, 0]
-Returns() = [0, 0]
-LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8]
-StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"]
+CurrentPlayer() = PlayerId.CHANCE
+InformationStateString(0) = "p0 rebid:[0] counts:[0]"
+InformationStateString(1) = "p1 rebid:[0] counts:[0]"
+InformationStateTensor(0).player: ◉◯
+InformationStateTensor(0).private_hand: ◯◯◯
+InformationStateTensor(0).rebid_state: ◯
+InformationStateTensor(0).counts_state: ◯
+InformationStateTensor(0).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+InformationStateTensor(0).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+InformationStateTensor(1).player: ◯◉
+InformationStateTensor(1).private_hand: ◯◯◯
+InformationStateTensor(1).rebid_state: ◯
+InformationStateTensor(1).counts_state: ◯
+InformationStateTensor(1).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+InformationStateTensor(1).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+ObservationString(0) = "p0 rebid:[0] counts:[0]"
+ObservationString(1) = "p1 rebid:[0] counts:[0]"
+PublicObservationString() = "p0 rebid:[0] counts:[0]"
+PrivateObservationString(0) = "p0"
+PrivateObservationString(1) = "p1"
+ObservationTensor(0).player: ◉◯
+ObservationTensor(0).private_hand: ◯◯◯
+ObservationTensor(0).rebid_state: ◯
+ObservationTensor(0).counts_state: ◯
+ObservationTensor(1).player: ◯◉
+ObservationTensor(1).private_hand: ◯◯◯
+ObservationTensor(1).rebid_state: ◯
+ObservationTensor(1).counts_state: ◯
+ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333333333333333)]
+LegalActions() = [1, 2, 3]
+StringLegalActions() = ["Deal:1", "Deal:2", "Deal:3"]
 
-# Apply action "x(2,2)"
-action: 8
+# Apply action "Deal:1"
+action: 1
 
 # State 1
-# ...
-# ...
-# ..x
+# Hands: [[1], []], Bidder: -1, Current Player: PlayerId.CHANCE, Current Bid: None of None, Rebid: False
 IsTerminal() = False
-History() = [8]
-HistoryString() = "8"
-IsChanceNode() = False
+History() = [1]
+HistoryString() = "1"
+IsChanceNode() = True
 IsSimultaneousNode() = False
-CurrentPlayer() = 1
-InformationStateString(0) = "8"
-InformationStateString(1) = "8"
-ObservationString(0) = "...\n...\n..x"
-ObservationString(1) = "...\n...\n..x"
-ObservationTensor(0):
-◉◉◉  ◯◯◯  ◯◯◯
-◉◉◉  ◯◯◯  ◯◯◯
-◉◉◯  ◯◯◯  ◯◯◉
-ObservationTensor(1):
-◉◉◉  ◯◯◯  ◯◯◯
-◉◉◉  ◯◯◯  ◯◯◯
-◉◉◯  ◯◯◯  ◯◯◉
-Rewards() = [0, 0]
-Returns() = [0, 0]
-LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7]
-StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)"]
+CurrentPlayer() = PlayerId.CHANCE
+InformationStateString(0) = "p0 rebid:[0] counts:[0]"
+InformationStateString(1) = "p1 rebid:[0] counts:[0]"
+InformationStateTensor(0).player: ◉◯
+InformationStateTensor(0).private_hand: ◯◯◯
+InformationStateTensor(0).rebid_state: ◯
+InformationStateTensor(0).counts_state: ◯
+InformationStateTensor(0).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+InformationStateTensor(0).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+InformationStateTensor(1).player: ◯◉
+InformationStateTensor(1).private_hand: ◯◯◯
+InformationStateTensor(1).rebid_state: ◯
+InformationStateTensor(1).counts_state: ◯
+InformationStateTensor(1).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+InformationStateTensor(1).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+ObservationString(0) = "p0 rebid:[0] counts:[0]"
+ObservationString(1) = "p1 rebid:[0] counts:[0]"
+PublicObservationString() = "p0 rebid:[0] counts:[0]"
+PrivateObservationString(0) = "p0"
+PrivateObservationString(1) = "p1"
+ObservationTensor(0).player: ◉◯
+ObservationTensor(0).private_hand: ◯◯◯
+ObservationTensor(0).rebid_state: ◯
+ObservationTensor(0).counts_state: ◯
+ObservationTensor(1).player: ◯◉
+ObservationTensor(1).private_hand: ◯◯◯
+ObservationTensor(1).rebid_state: ◯
+ObservationTensor(1).counts_state: ◯
+ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333333333333333)]
+LegalActions() = [1, 2, 3]
+StringLegalActions() = ["Deal:1", "Deal:2", "Deal:3"]
 
-# Apply action "o(1,0)"
+# Apply action "Deal:3"
 action: 3
 
 # State 2
-# ...
-# o..
-# ..x
+# Apply action "Deal:1"
+action: 1
+
+# State 3
+# Apply action "Deal:3"
+action: 3
+
+# State 4
+# Apply action "Deal:2"
+action: 2
+
+# State 5
+# Apply action "Deal:3"
+action: 3
+
+# State 6
+# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False
 IsTerminal() = False
-History() = [8, 3]
-HistoryString() = "8, 3"
+History() = [1, 3, 1, 3, 2, 3]
+HistoryString() = "1, 3, 1, 3, 2, 3"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 0
-InformationStateString(0) = "8, 3"
-InformationStateString(1) = "8, 3"
-ObservationString(0) = "...\no..\n..x"
-ObservationString(1) = "...\no..\n..x"
-ObservationTensor(0):
-◉◉◉  ◯◯◯  ◯◯◯
-◯◉◉  ◉◯◯  ◯◯◯
-◉◉◯  ◯◯◯  ◯◯◉
-ObservationTensor(1):
-◉◉◉  ◯◯◯  ◯◯◯
-◯◉◉  ◉◯◯  ◯◯◯
-◉◉◯  ◯◯◯  ◯◯◉
+InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]"
+InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]"
+InformationStateTensor(0).player: ◉◯
+InformationStateTensor(0).private_hand = [1, 1, 2]
+InformationStateTensor(0).rebid_state: ◯
+InformationStateTensor(0).counts_state: ◯
+InformationStateTensor(0).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+InformationStateTensor(0).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+InformationStateTensor(1).player: ◯◉
+InformationStateTensor(1).private_hand = [3, 3, 3]
+InformationStateTensor(1).rebid_state: ◯
+InformationStateTensor(1).counts_state: ◯
+InformationStateTensor(1).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+InformationStateTensor(1).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]"
+PublicObservationString() = "p0 rebid:[0] counts:[0]"
+PrivateObservationString(0) = "p0 hand:[1, 1, 2]"
+PrivateObservationString(1) = "p1 hand:[3, 3, 3]"
+ObservationTensor(0).player: ◉◯
+ObservationTensor(0).private_hand = [1, 1, 2]
+ObservationTensor(0).rebid_state: ◯
+ObservationTensor(0).counts_state: ◯
+ObservationTensor(1).player: ◯◉
+ObservationTensor(1).private_hand = [3, 3, 3]
+ObservationTensor(1).rebid_state: ◯
+ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
 Returns() = [0, 0]
-LegalActions() = [0, 1, 2, 4, 5, 6, 7]
-StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)"]
+LegalActions() = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+StringLegalActions() = ["Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet"]
 
-# Apply action "x(2,0)"
-action: 6
+# Apply action "Bet"
+action: 15
 
-# State 3
-# ...
-# o..
-# x.x
+# State 7
+# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 0, Current Player: 1, Current Bid: 1 of 3, Rebid: False
 IsTerminal() = False
-History() = [8, 3, 6]
-HistoryString() = "8, 3, 6"
+History() = [1, 3, 1, 3, 2, 3, 15]
+HistoryString() = "1, 3, 1, 3, 2, 3, 15"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 1
-InformationStateString(0) = "8, 3, 6"
-InformationStateString(1) = "8, 3, 6"
-ObservationString(0) = "...\no..\nx.x"
-ObservationString(1) = "...\no..\nx.x"
-ObservationTensor(0):
-◉◉◉  ◯◯◯  ◯◯◯
-◯◉◉  ◉◯◯  ◯◯◯
-◯◉◯  ◯◯◯  ◉◯◉
-ObservationTensor(1):
-◉◉◉  ◯◯◯  ◯◯◯
-◯◉◉  ◉◯◯  ◯◯◯
-◯◉◯  ◯◯◯  ◉◯◉
+InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13."
+InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13."
+InformationStateTensor(0).player: ◉◯
+InformationStateTensor(0).private_hand = [1, 1, 2]
+InformationStateTensor(0).rebid_state: ◯
+InformationStateTensor(0).counts_state: ◯
+InformationStateTensor(0).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+InformationStateTensor(0).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+InformationStateTensor(1).player: ◯◉
+InformationStateTensor(1).private_hand = [3, 3, 3]
+InformationStateTensor(1).rebid_state: ◯
+InformationStateTensor(1).counts_state: ◯
+InformationStateTensor(1).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+InformationStateTensor(1).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]"
+PublicObservationString() = "p0 rebid:[0] counts:[0]"
+PrivateObservationString(0) = "p0 hand:[1, 1, 2]"
+PrivateObservationString(1) = "p1 hand:[3, 3, 3]"
+ObservationTensor(0).player: ◉◯
+ObservationTensor(0).private_hand = [1, 1, 2]
+ObservationTensor(0).rebid_state: ◯
+ObservationTensor(0).counts_state: ◯
+ObservationTensor(1).player: ◯◉
+ObservationTensor(1).private_hand = [3, 3, 3]
+ObservationTensor(1).rebid_state: ◯
+ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
 Returns() = [0, 0]
-LegalActions() = [0, 1, 2, 4, 5, 7]
-StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,1)", "o(1,2)", "o(2,1)"]
+LegalActions() = [1, 16, 17, 18, 19]
+StringLegalActions() = ["Challenge", "Bet", "Bet", "Bet", "Bet"]
 
-# Apply action "o(0,0)"
-action: 0
+# Apply action "Bet"
+action: 19
 
-# State 4
-# o..
-# o..
-# x.x
+# State 8
+# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: 0, Current Bid: 5 of 3, Rebid: False
 IsTerminal() = False
-History() = [8, 3, 6, 0]
-HistoryString() = "8, 3, 6, 0"
+History() = [1, 3, 1, 3, 2, 3, 15, 19]
+HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 0
-InformationStateString(0) = "8, 3, 6, 0"
-InformationStateString(1) = "8, 3, 6, 0"
-ObservationString(0) = "o..\no..\nx.x"
-ObservationString(1) = "o..\no..\nx.x"
-ObservationTensor(0):
-◯◉◉  ◉◯◯  ◯◯◯
-◯◉◉  ◉◯◯  ◯◯◯
-◯◉◯  ◯◯◯  ◉◯◉
-ObservationTensor(1):
-◯◉◉  ◉◯◯  ◯◯◯
-◯◉◉  ◉◯◯  ◯◯◯
-◯◉◯  ◯◯◯  ◉◯◉
+InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13. b:17."
+InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13. b:17."
+InformationStateTensor(0).player: ◉◯
+InformationStateTensor(0).private_hand = [1, 1, 2]
+InformationStateTensor(0).rebid_state: ◯
+InformationStateTensor(0).counts_state: ◯
+InformationStateTensor(0).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◉
+InformationStateTensor(0).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+InformationStateTensor(1).player: ◯◉
+InformationStateTensor(1).private_hand = [3, 3, 3]
+InformationStateTensor(1).rebid_state: ◯
+InformationStateTensor(1).counts_state: ◯
+InformationStateTensor(1).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◉
+InformationStateTensor(1).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]"
+PublicObservationString() = "p0 rebid:[0] counts:[0]"
+PrivateObservationString(0) = "p0 hand:[1, 1, 2]"
+PrivateObservationString(1) = "p1 hand:[3, 3, 3]"
+ObservationTensor(0).player: ◉◯
+ObservationTensor(0).private_hand = [1, 1, 2]
+ObservationTensor(0).rebid_state: ◯
+ObservationTensor(0).counts_state: ◯
+ObservationTensor(1).player: ◯◉
+ObservationTensor(1).private_hand = [3, 3, 3]
+ObservationTensor(1).rebid_state: ◯
+ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
 Returns() = [0, 0]
-LegalActions() = [1, 2, 4, 5, 7]
-StringLegalActions() = ["x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,1)"]
+LegalActions() = [1]
+StringLegalActions() = ["Challenge"]
 
-# Apply action "x(0,2)"
-action: 2
+# Apply action "Challenge"
+action: 1
 
-# State 5
-# o.x
-# o..
-# x.x
+# State 9
+# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: 1, Current Bid: 5 of 3, Rebid: False
 IsTerminal() = False
-History() = [8, 3, 6, 0, 2]
-HistoryString() = "8, 3, 6, 0, 2"
+History() = [1, 3, 1, 3, 2, 3, 15, 19, 1]
+HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19, 1"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 1
-InformationStateString(0) = "8, 3, 6, 0, 2"
-InformationStateString(1) = "8, 3, 6, 0, 2"
-ObservationString(0) = "o.x\no..\nx.x"
-ObservationString(1) = "o.x\no..\nx.x"
-ObservationTensor(0):
-◯◉◯  ◉◯◯  ◯◯◉
-◯◉◉  ◉◯◯  ◯◯◯
-◯◉◯  ◯◯◯  ◉◯◉
-ObservationTensor(1):
-◯◉◯  ◉◯◯  ◯◯◉
-◯◉◉  ◉◯◯  ◯◯◯
-◯◉◯  ◯◯◯  ◉◯◉
+InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13. b:17. c:17."
+InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13. b:17. c:17."
+InformationStateTensor(0).player: ◉◯
+InformationStateTensor(0).private_hand = [1, 1, 2]
+InformationStateTensor(0).rebid_state: ◯
+InformationStateTensor(0).counts_state: ◯
+InformationStateTensor(0).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◉
+InformationStateTensor(0).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◉◯
+InformationStateTensor(1).player: ◯◉
+InformationStateTensor(1).private_hand = [3, 3, 3]
+InformationStateTensor(1).rebid_state: ◯
+InformationStateTensor(1).counts_state: ◯
+InformationStateTensor(1).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◉
+InformationStateTensor(1).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◉◯
+ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]"
+PublicObservationString() = "p0 rebid:[0] counts:[0]"
+PrivateObservationString(0) = "p0 hand:[1, 1, 2]"
+PrivateObservationString(1) = "p1 hand:[3, 3, 3]"
+ObservationTensor(0).player: ◉◯
+ObservationTensor(0).private_hand = [1, 1, 2]
+ObservationTensor(0).rebid_state: ◯
+ObservationTensor(0).counts_state: ◯
+ObservationTensor(1).player: ◯◉
+ObservationTensor(1).private_hand = [3, 3, 3]
+ObservationTensor(1).rebid_state: ◯
+ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
 Returns() = [0, 0]
-LegalActions() = [1, 4, 5, 7]
-StringLegalActions() = ["o(0,1)", "o(1,1)", "o(1,2)", "o(2,1)"]
+LegalActions() = [1]
+StringLegalActions() = ["Challenge"]
 
-# Apply action "o(0,1)"
+# Apply action "Challenge"
 action: 1
 
-# State 6
-# Apply action "x(1,2)"
-action: 5
-
-# State 7
-# oox
-# o.x
-# x.x
+# State 10
+# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: PlayerId.TERMINAL, Current Bid: 5 of 3, Rebid: False
 IsTerminal() = True
-History() = [8, 3, 6, 0, 2, 1, 5]
-HistoryString() = "8, 3, 6, 0, 2, 1, 5"
+History() = [1, 3, 1, 3, 2, 3, 15, 19, 1, 1]
+HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19, 1, 1"
 IsChanceNode() = False
 IsSimultaneousNode() = False
-CurrentPlayer() = -4
-InformationStateString(0) = "8, 3, 6, 0, 2, 1, 5"
-InformationStateString(1) = "8, 3, 6, 0, 2, 1, 5"
-ObservationString(0) = "oox\no.x\nx.x"
-ObservationString(1) = "oox\no.x\nx.x"
-ObservationTensor(0):
-◯◯◯  ◉◉◯  ◯◯◉
-◯◉◯  ◉◯◯  ◯◯◉
-◯◉◯  ◯◯◯  ◉◯◉
-ObservationTensor(1):
-◯◯◯  ◉◉◯  ◯◯◉
-◯◉◯  ◉◯◯  ◯◯◉
-◯◉◯  ◯◯◯  ◉◯◉
+CurrentPlayer() = PlayerId.TERMINAL
+InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[1] b:13. b:17. c:17."
+InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[1] b:13. b:17. c:17."
+InformationStateTensor(0).player: ◉◯
+InformationStateTensor(0).private_hand = [1, 1, 2]
+InformationStateTensor(0).rebid_state: ◯
+InformationStateTensor(0).counts_state: ◉
+InformationStateTensor(0).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◉
+InformationStateTensor(0).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◉◉
+InformationStateTensor(1).player: ◯◉
+InformationStateTensor(1).private_hand = [3, 3, 3]
+InformationStateTensor(1).rebid_state: ◯
+InformationStateTensor(1).counts_state: ◉
+InformationStateTensor(1).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◉
+InformationStateTensor(1).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◉◉
+ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[1]"
+ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[1]"
+PublicObservationString() = "p0 rebid:[0] counts:[1]"
+PrivateObservationString(0) = "p0 hand:[1, 1, 2]"
+PrivateObservationString(1) = "p1 hand:[3, 3, 3]"
+ObservationTensor(0).player: ◉◯
+ObservationTensor(0).private_hand = [1, 1, 2]
+ObservationTensor(0).rebid_state: ◯
+ObservationTensor(0).counts_state: ◉
+ObservationTensor(1).player: ◯◉
+ObservationTensor(1).private_hand = [3, 3, 3]
+ObservationTensor(1).rebid_state: ◯
+ObservationTensor(1).counts_state: ◉
 Rewards() = [1, -1]
 Returns() = [1, -1]
diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py
index 8d517c65f3..e425bced41 100644
--- a/open_spiel/python/games/liars_poker.py
+++ b/open_spiel/python/games/liars_poker.py
@@ -45,7 +45,12 @@ class Action(enum.IntEnum):
     provides_information_state_string=True,
     provides_information_state_tensor=True,
     provides_observation_string=False,
-    provides_observation_tensor=True)
+    provides_observation_tensor=True,
+    parameter_specification={
+      "players": _MIN_NUM_PLAYERS,
+      "hand_length": _HAND_LENGTH,
+      "num_digits": _NUM_DIGITS
+    })
 _GAME_INFO = pyspiel.GameInfo(
     # Num actions = total number of cards * number of digits + action enum
     num_distinct_actions=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS + len(Action),
@@ -64,9 +69,9 @@ class LiarsPoker(pyspiel.Game):
 
   def __init__(self, params=None):
     super().__init__(_GAME_TYPE, _GAME_INFO, params or dict())
-    self.num_players = _MIN_NUM_PLAYERS
-    self.hand_length = _HAND_LENGTH
-    self.num_digits = _NUM_DIGITS
+    game_parameters = self.get_parameters()
+    self.hand_length = game_parameters.get("hand_length", _HAND_LENGTH)
+    self.num_digits = game_parameters.get("num_digits", _NUM_DIGITS)
     self.deck = [_FULL_DECK[i] for i in range(self.num_digits)]
 
   def new_initial_state(self):
@@ -77,7 +82,7 @@ def make_py_observer(self, iig_obs_type=None, params=None):
     """Returns an object used for observing game state."""
     return LiarsPokerObserver(
       iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False),
-      self.num_players,
+      self.num_players(),
       self.hand_length,
       self.num_digits,
       params)
@@ -90,16 +95,16 @@ def __init__(self, game):
     """Constructor; should only be called by Game.new_initial_state."""
     super().__init__(game)
     # Game attributes
-    self._num_players = game.num_players
+    self._num_players = game.num_players()
     self._hand_length = game.hand_length
     self._num_digits = game.num_digits
     self._deck = game.deck
     self.hands = [[] for _ in range(self._num_players)]
 
     # Action dynamics
-    total_possible_bets = game.hand_length * game.num_digits * game.num_players
-    self.bid_history = np.zeros((total_possible_bets, game.num_players))
-    self.challenge_history = np.zeros((total_possible_bets, game.num_players))
+    total_possible_bets = game.hand_length * game.num_digits * self._num_players
+    self.bid_history = np.zeros((total_possible_bets, self._num_players))
+    self.challenge_history = np.zeros((total_possible_bets, self._num_players))
     self._current_player = 0
     self._bid_offset = len(Action)
     self._max_bid = (self._hand_length * self._num_digits * self._num_players
@@ -268,11 +273,16 @@ def returns(self):
 
   def __str__(self):
     """String for debug purposes. No particular semantics are required."""
-    return "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {}, Rebid: {}".format(
+    if self._current_bid != -1:
+      count, number = self._decode_bid(self._current_bid - self._bid_offset)
+    else:
+      count, number = 'None', 'None'
+    return "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {} of {}, Rebid: {}".format(
       self.hands,
       self._bid_originator,
       self.current_player(),
-      self._current_bid,
+      count,
+      number,
       self.is_rebid)
 
 
@@ -332,11 +342,11 @@ def set_from(self, state, player):
     if "player" in self.dict:
       self.dict["player"][player] = 1
     if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length:
-      self.dict["private_hand"] = state.hands[player]
+      self.dict["private_hand"] = np.asarray(state.hands[player])
     if "rebid_state" in self.dict:
-      self.dict["rebid_state"] = state.is_rebid
+      self.dict["rebid_state"][0] = int(state.is_rebid)
     if "counts_state" in self.dict:
-      self.dict["counts_state"] = state.is_terminal()
+      self.dict["counts_state"][0] = int(state.is_terminal())
     if "bid_history" in self.dict:
       self.dict["bid_history"] = state.bid_history
     if "challenge_history" in self.dict:
@@ -350,9 +360,9 @@ def string_from(self, state, player):
     if "private_hand" in self.dict and len(state.hands[player]) == self.hand_length:
       pieces.append(f"hand:{state.hands[player]}")
     if "rebid_state" in self.dict:
-      pieces.append(f"rebid:{state.is_rebid}")
+      pieces.append(f"rebid:{[int(state.is_rebid)]}")
     if "counts_state" in self.dict:
-      pieces.append(f"rebid:{state.is_terminal()}")
+      pieces.append(f"counts:{[int(state.is_terminal())]}")
     if "bid_history" in self.dict:
       for bid in range(len(state.bid_history)):
         if np.any(state.bid_history[bid] == 1):
diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py
index f0abbed1cd..a4ae7bc344 100644
--- a/open_spiel/python/games/liars_poker_test.py
+++ b/open_spiel/python/games/liars_poker_test.py
@@ -37,16 +37,18 @@ def test_can_create_game_and_state(self):
     game = liars_poker.LiarsPoker()
     state = game.new_initial_state()
     # Ensure no moves have been made.
-    expected_hands = [[] for _ in range(game.num_players)]
+    expected_hands = [[] for _ in range(game.num_players())]
     expected_bidder = -1
     expected_current_player = pyspiel.PlayerId.CHANCE
-    expected_current_bid = -1
+    expected_current_count = 'None'
+    expected_current_number = 'None'
     expected_rebid = False
-    expected = "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {}, Rebid: {}".format(
+    expected = "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {} of {}, Rebid: {}".format(
       expected_hands,
       expected_bidder,
       expected_current_player,
-      expected_current_bid,
+      expected_current_count,
+      expected_current_number,
       expected_rebid
     )
     self.assertEqual(str(state), expected)
@@ -55,8 +57,8 @@ def test_draw_hands(self):
     """Tests hand drawing functions."""
     game = liars_poker.LiarsPoker()
     state = game.new_initial_state()
-    expected_hands = [[] for _ in range(game.num_players)]
-    for i in range(game.num_players * game.hand_length):
+    expected_hands = [[] for _ in range(game.num_players())]
+    for i in range(game.num_players() * game.hand_length):
       # Verify we have chance nodes until all player hands are filled.
       self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE)
 
@@ -66,7 +68,7 @@ def test_draw_hands(self):
       action = np.random.choice(action_list, p=prob_list)
 
       # Verify players' hands are filled correctly.
-      cur_player = i % game.num_players
+      cur_player = i % game.num_players()
       expected_hands[cur_player].append(action)
       state.apply_action(action)
       self.assertEqual(state.hands, expected_hands)
@@ -77,7 +79,7 @@ def test_draw_hands(self):
 
   def _populate_game_hands(self, game, state):
     """Populates players hands for testing."""
-    for _ in range(game.num_players * game.hand_length):
+    for _ in range(game.num_players() * game.hand_length):
       outcomes_with_probs = state.chance_outcomes()
       action_list, prob_list = zip(*outcomes_with_probs)
       action = np.random.choice(action_list, p=prob_list)
@@ -87,8 +89,8 @@ def test_basic_bid(self):
     """Tests a single bid."""
     game = liars_poker.LiarsPoker()
     state = game.new_initial_state()
-    total_possible_bets = game.hand_length * game.num_digits * game.num_players
-    expected_bid_history = np.zeros((total_possible_bets, game.num_players))
+    total_possible_bets = game.hand_length * game.num_digits * game.num_players()
+    expected_bid_history = np.zeros((total_possible_bets, game.num_players()))
   
     # Fill player hands.
     self._populate_game_hands(game, state)
@@ -112,19 +114,19 @@ def _verify_returns(self, game, state):
     self.assertTrue(state._winner != -1 or state._loser != -1)
     actual_returns = state.returns()
     if state._winner != -1:
-      expected_returns = [-1.0 for _ in range(game.num_players)]
-      expected_returns[state._winner] = game.num_players - 1
+      expected_returns = [-1.0 for _ in range(game.num_players())]
+      expected_returns[state._winner] = game.num_players() - 1
     else:
-      expected_returns = [1.0 for _ in range(game.num_players)]
-      expected_returns[state._loser] = -1.0 * (game.num_players - 1)
+      expected_returns = [1.0 for _ in range(game.num_players())]
+      expected_returns[state._loser] = -1.0 * (game.num_players() - 1)
     self.assertEqual(actual_returns, expected_returns)
 
   def test_single_round(self):
     """Runs a single round of bidding followed by a challenge."""
     game = liars_poker.LiarsPoker()
     state = game.new_initial_state()
-    total_possible_bets = game.hand_length * game.num_digits * game.num_players
-    expected_challenge_history = np.zeros((total_possible_bets, game.num_players))
+    total_possible_bets = game.hand_length * game.num_digits * game.num_players()
+    expected_challenge_history = np.zeros((total_possible_bets, game.num_players()))
 
     # Fill player hands.
     self._populate_game_hands(game, state)
@@ -168,7 +170,7 @@ def test_single_rebid(self):
     state.apply_action(3)
     # Verify game is not over.
     self.assertFalse(state.is_terminal())
-    self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players)])
+    self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players())])
     # Player 1 challenges again.
     state.apply_action(liars_poker.Action.CHALLENGE)
 
@@ -191,7 +193,7 @@ def test_rebid_then_new_bid(self):
     state.apply_action(3)
     # Verify game is not over.
     self.assertFalse(state.is_terminal())
-    self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players)])
+    self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players())])
     # Player 1 bids.
     state.apply_action(4)
     # Verify game is not over.
@@ -209,7 +211,7 @@ def test_rebid_then_new_bid(self):
 
   def test_game_from_cc(self):
     """Runs the standard game tests, checking API consistency."""
-    game = pyspiel.load_game("python_liars_poker")
+    game = pyspiel.load_game("python_liars_poker", {"players": 2})
     pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True)
 
   def test_pickle(self):

From d75b5eac09a573e40cc84ddd3da68126b9103ac3 Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Wed, 21 Dec 2022 03:06:00 -0800
Subject: [PATCH 10/13] Add Liar's Poker to game docs

---
 docs/games.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/games.md b/docs/games.md
index 3a8027d02e..f44cc8dca1 100644
--- a/docs/games.md
+++ b/docs/games.md
@@ -45,6 +45,7 @@ Status                                       | Game
 ![](_static/green_circ10.png "green circle") | [Leduc poker](#leduc-poker)
 <font color="orange"><b>~</b></font>         | [Lewis Signaling](#lewis-signaling)
 ![](_static/green_circ10.png "green circle") | [Liar's Dice](#liars-dice)
+<font color="orange"><b>~</b></font>         | [Liar's Poker](#liars-poker)
 <font color="orange"><b>~</b></font>         | [Mancala](#mancala)
 <font color="orange"><b>~</b></font>         | [Markov Soccer](#markov-soccer)
 ![](_static/green_circ10.png "green circle") | [Matching Pennies (Three-player)](#matching-pennies-three-player)
@@ -474,6 +475,17 @@ Status                                       | Game
 *   2 players.
 *   [Wikipedia](https://en.wikipedia.org/wiki/Liar%27s_dice)
 
+### Liar's Poker
+
+*   Players bid and bluff on the state of all hands, given only the state of
+    their hand.
+*   Cards with bidding.
+*   Traditional game.
+*   Non-deterministic.
+*   Imperfect information
+*   2 or more players.
+*   [Wikipedia](https://en.wikipedia.org/wiki/Liar%27s_poker)
+
 ### Mancala
 
 *   Players take turns sowing beans on the board and try to capture more beans

From 8d10fa463bcd551cdc7f7297be1f659bc65a359b Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Wed, 21 Dec 2022 03:39:34 -0800
Subject: [PATCH 11/13] Test cleanup

---
 open_spiel/python/games/liars_poker_test.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py
index a4ae7bc344..fb064eb7c2 100644
--- a/open_spiel/python/games/liars_poker_test.py
+++ b/open_spiel/python/games/liars_poker_test.py
@@ -15,8 +15,6 @@
 # Lint as python3
 """Tests for Python Liar's Poker."""
 
-import difflib
-import os
 import pickle
 
 from absl.testing import absltest
@@ -26,9 +24,6 @@
 from open_spiel.python.observation import make_observation
 import pyspiel
 
-# TODO: remove?
-_DATA_DIR = "open_spiel/integration_tests/playthroughs/"
-
 
 class LiarsPokerTest(absltest.TestCase):
 

From 1c24c5c458ba9ddf26e89a8d6a68958a24d9b892 Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Wed, 18 Jan 2023 19:33:43 +1000
Subject: [PATCH 12/13] Address January 17th comments

---
 .../playthroughs/python_liars_poker.txt       | 333 ++++++++++++------
 open_spiel/python/games/liars_poker.py        |  87 +++--
 open_spiel/python/games/liars_poker_test.py   |  73 ++--
 3 files changed, 322 insertions(+), 171 deletions(-)

diff --git a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
index ef0f271761..1141887c4e 100644
--- a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
+++ b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
@@ -16,8 +16,8 @@ GameType.reward_model = RewardModel.TERMINAL
 GameType.short_name = "python_liars_poker"
 GameType.utility = Utility.ZERO_SUM
 
-NumDistinctActions() = 20
-PolicyTensorShape() = [20]
+NumDistinctActions() = 19
+PolicyTensorShape() = [19]
 MaxChanceOutcomes() = 9
 GetParameters() = {hand_length=3,num_digits=3,players=2}
 NumPlayers() = 2
@@ -138,9 +138,9 @@ ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◯
 ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333333333333333)]
 LegalActions() = [1, 2, 3]
-StringLegalActions() = ["Deal:1", "Deal:2", "Deal:3"]
+StringLegalActions() = ["Deal: 1", "Deal: 2", "Deal: 3"]
 
-# Apply action "Deal:1"
+# Apply action "Deal: 1"
 action: 1
 
 # State 1
@@ -248,39 +248,39 @@ ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◯
 ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333333333333333)]
 LegalActions() = [1, 2, 3]
-StringLegalActions() = ["Deal:1", "Deal:2", "Deal:3"]
+StringLegalActions() = ["Deal: 1", "Deal: 2", "Deal: 3"]
 
-# Apply action "Deal:3"
-action: 3
+# Apply action "Deal: 2"
+action: 2
 
 # State 2
-# Apply action "Deal:1"
-action: 1
+# Apply action "Deal: 3"
+action: 3
 
 # State 3
-# Apply action "Deal:3"
-action: 3
+# Apply action "Deal: 2"
+action: 2
 
 # State 4
-# Apply action "Deal:2"
-action: 2
+# Apply action "Deal: 3"
+action: 3
 
 # State 5
-# Apply action "Deal:3"
-action: 3
+# Apply action "Deal: 2"
+action: 2
 
 # State 6
-# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False
+# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False
 IsTerminal() = False
-History() = [1, 3, 1, 3, 2, 3]
-HistoryString() = "1, 3, 1, 3, 2, 3"
+History() = [1, 2, 3, 2, 3, 2]
+HistoryString() = "1, 2, 3, 2, 3, 2"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 0
-InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]"
-InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]"
+InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
+InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
 InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 1, 2]
+InformationStateTensor(0).private_hand = [1, 3, 3]
 InformationStateTensor(0).rebid_state: ◯
 InformationStateTensor(0).counts_state: ◯
 InformationStateTensor(0).bid_history: ◯◯
@@ -320,7 +320,7 @@ InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
 InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [3, 3, 3]
+InformationStateTensor(1).private_hand = [2, 2, 2]
 InformationStateTensor(1).rebid_state: ◯
 InformationStateTensor(1).counts_state: ◯
 InformationStateTensor(1).bid_history: ◯◯
@@ -359,39 +359,39 @@ InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
-ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]"
-ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]"
+ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
 PublicObservationString() = "p0 rebid:[0] counts:[0]"
-PrivateObservationString(0) = "p0 hand:[1, 1, 2]"
-PrivateObservationString(1) = "p1 hand:[3, 3, 3]"
+PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
+PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
 ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 1, 2]
+ObservationTensor(0).private_hand = [1, 3, 3]
 ObservationTensor(0).rebid_state: ◯
 ObservationTensor(0).counts_state: ◯
 ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [3, 3, 3]
+ObservationTensor(1).private_hand = [2, 2, 2]
 ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
 Returns() = [0, 0]
-LegalActions() = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-StringLegalActions() = ["Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet", "Bet"]
+LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
+StringLegalActions() = ["Bid: 1 of 1", "Bid: 2 of 1", "Bid: 3 of 1", "Bid: 4 of 1", "Bid: 5 of 1", "Bid: 6 of 1", "Bid: 1 of 2", "Bid: 2 of 2", "Bid: 3 of 2", "Bid: 4 of 2", "Bid: 5 of 2", "Bid: 6 of 2", "Bid: 1 of 3", "Bid: 2 of 3", "Bid: 3 of 3", "Bid: 4 of 3", "Bid: 5 of 3", "Bid: 6 of 3"]
 
-# Apply action "Bet"
-action: 15
+# Apply action "Bid: 1 of 3"
+action: 13
 
 # State 7
-# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 0, Current Player: 1, Current Bid: 1 of 3, Rebid: False
+# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 1, Current Bid: 1 of 3, Rebid: False
 IsTerminal() = False
-History() = [1, 3, 1, 3, 2, 3, 15]
-HistoryString() = "1, 3, 1, 3, 2, 3, 15"
+History() = [1, 2, 3, 2, 3, 2, 13]
+HistoryString() = "1, 2, 3, 2, 3, 2, 13"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 1
-InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13."
-InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13."
+InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12."
+InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12."
 InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 1, 2]
+InformationStateTensor(0).private_hand = [1, 3, 3]
 InformationStateTensor(0).rebid_state: ◯
 InformationStateTensor(0).counts_state: ◯
 InformationStateTensor(0).bid_history: ◯◯
@@ -406,12 +406,12 @@ InformationStateTensor(0).bid_history: ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
+                                       ◯◯
 InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -431,7 +431,7 @@ InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
 InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [3, 3, 3]
+InformationStateTensor(1).private_hand = [2, 2, 2]
 InformationStateTensor(1).rebid_state: ◯
 InformationStateTensor(1).counts_state: ◯
 InformationStateTensor(1).bid_history: ◯◯
@@ -446,12 +446,12 @@ InformationStateTensor(1).bid_history: ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
+                                       ◯◯
 InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -470,39 +470,39 @@ InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
-ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]"
-ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]"
+ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
 PublicObservationString() = "p0 rebid:[0] counts:[0]"
-PrivateObservationString(0) = "p0 hand:[1, 1, 2]"
-PrivateObservationString(1) = "p1 hand:[3, 3, 3]"
+PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
+PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
 ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 1, 2]
+ObservationTensor(0).private_hand = [1, 3, 3]
 ObservationTensor(0).rebid_state: ◯
 ObservationTensor(0).counts_state: ◯
 ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [3, 3, 3]
+ObservationTensor(1).private_hand = [2, 2, 2]
 ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
 Returns() = [0, 0]
-LegalActions() = [1, 16, 17, 18, 19]
-StringLegalActions() = ["Challenge", "Bet", "Bet", "Bet", "Bet"]
+LegalActions() = [0, 15, 16, 17, 18]
+StringLegalActions() = ["Challenge", "Bid: 3 of 3", "Bid: 4 of 3", "Bid: 5 of 3", "Bid: 6 of 3"]
 
-# Apply action "Bet"
-action: 19
+# Apply action "Bid: 4 of 3"
+action: 16
 
 # State 8
-# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: 0, Current Bid: 5 of 3, Rebid: False
+# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 1, Current Player: 0, Current Bid: 4 of 3, Rebid: False
 IsTerminal() = False
-History() = [1, 3, 1, 3, 2, 3, 15, 19]
-HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19"
+History() = [1, 2, 3, 2, 3, 2, 13, 16]
+HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 0
-InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13. b:17."
-InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13. b:17."
+InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15."
+InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15."
 InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 1, 2]
+InformationStateTensor(0).private_hand = [1, 3, 3]
 InformationStateTensor(0).rebid_state: ◯
 InformationStateTensor(0).counts_state: ◯
 InformationStateTensor(0).bid_history: ◯◯
@@ -517,12 +517,12 @@ InformationStateTensor(0).bid_history: ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◯◉
+                                       ◯◯
+                                       ◯◯
 InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -542,7 +542,7 @@ InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
 InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [3, 3, 3]
+InformationStateTensor(1).private_hand = [2, 2, 2]
 InformationStateTensor(1).rebid_state: ◯
 InformationStateTensor(1).counts_state: ◯
 InformationStateTensor(1).bid_history: ◯◯
@@ -557,12 +557,12 @@ InformationStateTensor(1).bid_history: ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◯◉
+                                       ◯◯
+                                       ◯◯
 InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -581,39 +581,39 @@ InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
-ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]"
-ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]"
+ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
 PublicObservationString() = "p0 rebid:[0] counts:[0]"
-PrivateObservationString(0) = "p0 hand:[1, 1, 2]"
-PrivateObservationString(1) = "p1 hand:[3, 3, 3]"
+PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
+PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
 ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 1, 2]
+ObservationTensor(0).private_hand = [1, 3, 3]
 ObservationTensor(0).rebid_state: ◯
 ObservationTensor(0).counts_state: ◯
 ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [3, 3, 3]
+ObservationTensor(1).private_hand = [2, 2, 2]
 ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
 Returns() = [0, 0]
-LegalActions() = [1]
-StringLegalActions() = ["Challenge"]
+LegalActions() = [0, 18]
+StringLegalActions() = ["Challenge", "Bid: 6 of 3"]
 
-# Apply action "Challenge"
-action: 1
+# Apply action "Bid: 6 of 3"
+action: 18
 
 # State 9
-# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: 1, Current Bid: 5 of 3, Rebid: False
+# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 1, Current Bid: 6 of 3, Rebid: False
 IsTerminal() = False
-History() = [1, 3, 1, 3, 2, 3, 15, 19, 1]
-HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19, 1"
+History() = [1, 2, 3, 2, 3, 2, 13, 16, 18]
+HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 1
-InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0] b:13. b:17. c:17."
-InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0] b:13. b:17. c:17."
+InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15. b:17."
+InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15. b:17."
 InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 1, 2]
+InformationStateTensor(0).private_hand = [1, 3, 3]
 InformationStateTensor(0).rebid_state: ◯
 InformationStateTensor(0).counts_state: ◯
 InformationStateTensor(0).bid_history: ◯◯
@@ -628,12 +628,12 @@ InformationStateTensor(0).bid_history: ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◯◉
+                                       ◯◯
+                                       ◉◯
 InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -651,9 +651,9 @@ InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
-                                             ◉◯
+                                             ◯◯
 InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [3, 3, 3]
+InformationStateTensor(1).private_hand = [2, 2, 2]
 InformationStateTensor(1).rebid_state: ◯
 InformationStateTensor(1).counts_state: ◯
 InformationStateTensor(1).bid_history: ◯◯
@@ -668,12 +668,123 @@ InformationStateTensor(1).bid_history: ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◉
                                        ◯◯
                                        ◉◯
+InformationStateTensor(1).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
+PublicObservationString() = "p0 rebid:[0] counts:[0]"
+PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
+PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
+ObservationTensor(0).player: ◉◯
+ObservationTensor(0).private_hand = [1, 3, 3]
+ObservationTensor(0).rebid_state: ◯
+ObservationTensor(0).counts_state: ◯
+ObservationTensor(1).player: ◯◉
+ObservationTensor(1).private_hand = [2, 2, 2]
+ObservationTensor(1).rebid_state: ◯
+ObservationTensor(1).counts_state: ◯
+Rewards() = [0, 0]
+Returns() = [0, 0]
+LegalActions() = [0]
+StringLegalActions() = ["Challenge"]
+
+# Apply action "Challenge"
+action: 0
+
+# State 10
+# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 0, Current Bid: 6 of 3, Rebid: False
+IsTerminal() = False
+History() = [1, 2, 3, 2, 3, 2, 13, 16, 18, 0]
+HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18, 0"
+IsChanceNode() = False
+IsSimultaneousNode() = False
+CurrentPlayer() = 0
+InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15. b:17. c:17."
+InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15. b:17. c:17."
+InformationStateTensor(0).player: ◉◯
+InformationStateTensor(0).private_hand = [1, 3, 3]
+InformationStateTensor(0).rebid_state: ◯
+InformationStateTensor(0).counts_state: ◯
+InformationStateTensor(0).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
                                        ◯◉
+                                       ◯◯
+                                       ◉◯
+InformationStateTensor(0).challenge_history: ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◯
+                                             ◯◉
+InformationStateTensor(1).player: ◯◉
+InformationStateTensor(1).private_hand = [2, 2, 2]
+InformationStateTensor(1).rebid_state: ◯
+InformationStateTensor(1).counts_state: ◯
+InformationStateTensor(1).bid_history: ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◯
+                                       ◉◯
+                                       ◯◯
+                                       ◯◯
+                                       ◯◉
+                                       ◯◯
+                                       ◉◯
 InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -691,40 +802,40 @@ InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
-                                             ◉◯
-ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[0]"
-ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[0]"
+                                             ◯◉
+ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
 PublicObservationString() = "p0 rebid:[0] counts:[0]"
-PrivateObservationString(0) = "p0 hand:[1, 1, 2]"
-PrivateObservationString(1) = "p1 hand:[3, 3, 3]"
+PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
+PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
 ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 1, 2]
+ObservationTensor(0).private_hand = [1, 3, 3]
 ObservationTensor(0).rebid_state: ◯
 ObservationTensor(0).counts_state: ◯
 ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [3, 3, 3]
+ObservationTensor(1).private_hand = [2, 2, 2]
 ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
 Returns() = [0, 0]
-LegalActions() = [1]
+LegalActions() = [0]
 StringLegalActions() = ["Challenge"]
 
 # Apply action "Challenge"
-action: 1
+action: 0
 
-# State 10
-# Hands: [[1, 1, 2], [3, 3, 3]], Bidder: 1, Current Player: PlayerId.TERMINAL, Current Bid: 5 of 3, Rebid: False
+# State 11
+# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: PlayerId.TERMINAL, Current Bid: 6 of 3, Rebid: False
 IsTerminal() = True
-History() = [1, 3, 1, 3, 2, 3, 15, 19, 1, 1]
-HistoryString() = "1, 3, 1, 3, 2, 3, 15, 19, 1, 1"
+History() = [1, 2, 3, 2, 3, 2, 13, 16, 18, 0, 0]
+HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18, 0, 0"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = PlayerId.TERMINAL
-InformationStateString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[1] b:13. b:17. c:17."
-InformationStateString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[1] b:13. b:17. c:17."
+InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[1] b:12. b:15. b:17. c:17."
+InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[1] b:12. b:15. b:17. c:17."
 InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 1, 2]
+InformationStateTensor(0).private_hand = [1, 3, 3]
 InformationStateTensor(0).rebid_state: ◯
 InformationStateTensor(0).counts_state: ◉
 InformationStateTensor(0).bid_history: ◯◯
@@ -739,12 +850,12 @@ InformationStateTensor(0).bid_history: ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◯◉
+                                       ◯◯
+                                       ◉◯
 InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -764,7 +875,7 @@ InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◉◉
 InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [3, 3, 3]
+InformationStateTensor(1).private_hand = [2, 2, 2]
 InformationStateTensor(1).rebid_state: ◯
 InformationStateTensor(1).counts_state: ◉
 InformationStateTensor(1).bid_history: ◯◯
@@ -779,12 +890,12 @@ InformationStateTensor(1).bid_history: ◯◯
                                        ◯◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
-                                       ◯◯
                                        ◯◉
+                                       ◯◯
+                                       ◉◯
 InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -803,18 +914,18 @@ InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◉◉
-ObservationString(0) = "p0 hand:[1, 1, 2] rebid:[0] counts:[1]"
-ObservationString(1) = "p1 hand:[3, 3, 3] rebid:[0] counts:[1]"
+ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[1]"
+ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[1]"
 PublicObservationString() = "p0 rebid:[0] counts:[1]"
-PrivateObservationString(0) = "p0 hand:[1, 1, 2]"
-PrivateObservationString(1) = "p1 hand:[3, 3, 3]"
+PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
+PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
 ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 1, 2]
+ObservationTensor(0).private_hand = [1, 3, 3]
 ObservationTensor(0).rebid_state: ◯
 ObservationTensor(0).counts_state: ◉
 ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [3, 3, 3]
+ObservationTensor(1).private_hand = [2, 2, 2]
 ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◉
-Rewards() = [1, -1]
-Returns() = [1, -1]
+Rewards() = [-1, 1]
+Returns() = [-1, 1]
diff --git a/open_spiel/python/games/liars_poker.py b/open_spiel/python/games/liars_poker.py
index e425bced41..ae15edb31b 100644
--- a/open_spiel/python/games/liars_poker.py
+++ b/open_spiel/python/games/liars_poker.py
@@ -22,9 +22,8 @@
 import pyspiel
 
 
-class Action(enum.IntEnum):
-  BID = 0
-  CHALLENGE = 1
+CHALLENGE_ACTION = 0
+BID_ACTION_OFFSET = 1
 
 _MAX_NUM_PLAYERS = 10
 _MIN_NUM_PLAYERS = 2
@@ -53,7 +52,7 @@ class Action(enum.IntEnum):
     })
 _GAME_INFO = pyspiel.GameInfo(
     # Num actions = total number of cards * number of digits + action enum
-    num_distinct_actions=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS + len(Action),
+    num_distinct_actions=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS + BID_ACTION_OFFSET,
     max_chance_outcomes=_HAND_LENGTH * _NUM_DIGITS,
     num_players=_MIN_NUM_PLAYERS,
     min_utility=-(_MIN_NUM_PLAYERS - 1), # Reward from being challenged and losing.
@@ -72,7 +71,7 @@ def __init__(self, params=None):
     game_parameters = self.get_parameters()
     self.hand_length = game_parameters.get("hand_length", _HAND_LENGTH)
     self.num_digits = game_parameters.get("num_digits", _NUM_DIGITS)
-    self.deck = [_FULL_DECK[i] for i in range(self.num_digits)]
+    self.deck = _FULL_DECK[:self.num_digits] 
 
   def new_initial_state(self):
     """Returns a state corresponding to the start of a game."""
@@ -102,20 +101,18 @@ def __init__(self, game):
     self.hands = [[] for _ in range(self._num_players)]
 
     # Action dynamics
-    total_possible_bets = game.hand_length * game.num_digits * self._num_players
-    self.bid_history = np.zeros((total_possible_bets, self._num_players))
-    self.challenge_history = np.zeros((total_possible_bets, self._num_players))
+    self.total_possible_bids = game.hand_length * game.num_digits * self._num_players
+    self.bid_history = np.zeros((self.total_possible_bids, self._num_players))
+    self.challenge_history = np.zeros((self.total_possible_bids, self._num_players))
+    # self._current_player is only the valid current_player when cards have been dealt. Otherwise it's chance.
     self._current_player = 0
-    self._bid_offset = len(Action)
-    self._max_bid = (self._hand_length * self._num_digits * self._num_players
-                     + self._bid_offset - 1)
+    self._max_bid = self._hand_length * self._num_digits * self._num_players
     self._bid_originator = -1 
-    self._current_bid = -1
+    self._current_action = -1
     self._num_challenges = 0
     self.is_rebid = False
 
     # Game over dynamics
-    self._game_over = False
     self._winner = -1
     self._loser = -1
 
@@ -133,10 +130,18 @@ def current_player(self):
       return pyspiel.PlayerId.CHANCE
     else:
       return self._current_player
+    
+  def winner(self):
+    """Returns the id of the winner if the bid originator has won. -1 otherwise."""
+    return self._winner
+
+  def loser(self):
+    """Returns the id of the loser if the bid originator has lost. -1 otherwise."""
+    return self._loser
 
   def _is_challenge_possible(self):
     """A challenge is possible once the first bid is made."""
-    return self._current_bid != -1
+    return self._current_action != -1
 
   def _is_rebid_possible(self):
     """A rebid is only possible when all players have challenged the original bid."""
@@ -148,13 +153,13 @@ def _legal_actions(self, player):
     actions = []
 
     if self._is_challenge_possible():
-      actions.append(Action.CHALLENGE)
+      actions.append(CHALLENGE_ACTION)
 
     if player != self._bid_originator or self._is_rebid_possible():
       # Any move higher than the current bid is allowed.
-      # Bids start at 2 as 0 and 1 are for bid and challenge.
-      for b in range(max(self._bid_offset, self._current_bid + 1), self._max_bid + 1):
-        actions.append(b)
+      # Bids start at BID_ACTION_OFFSET (1) as 0 represents the challenge action.
+      for bid in range(self._current_action + 1, self._max_bid):
+        actions.append(bid + BID_ACTION_OFFSET)
 
     return actions
 
@@ -166,7 +171,8 @@ def chance_outcomes(self):
 
   def _decode_bid(self, bid):
     """
-    Turns a bid ID in the range 0 to HAND_LENGTH * NUM_DIGITS * NUM_PLAYERS to a count and number.
+    Turns a bid ID in the range 0 to self._max_bid (non-inclusive)
+    to a count and number.
 
     For example, take 2 players each with 2 numbers from the deck of 1, 2, and 3.
       - A bid of two 1's would correspond to a bid id 1.
@@ -176,20 +182,28 @@ def _decode_bid(self, bid):
 
     Returns a tuple of (count, number). For example, (1, 2) represents one 2's.
     """
-    count = bid % (self._hand_length * self._num_players)
+    count = bid % (self._hand_length * self._num_players) + 1
     number = self._deck[bid // (self._hand_length * self._num_players)]
     return (count, number)
 
-  def _end_game(self):
-    """Ends the game by calling a counts and setting respective attributes."""
-    self._counts()
-    self._game_over = True
+  def encode_bid(self, count, number):
+    """
+    Turns a count and number into a bid ID in the range 0 to self._max_bid (non-inclusive).
+
+    For example, take 2 players each with 2 numbers from the deck of 1, 2, and 3.
+      - A count of 2 and number of 1 would be a bid of two one's and a bid id 1.
+        - Explanation: 1 is the lowest number, and the only lower bid would be zero 1's
+          corresponding to bid id 0.
+    
+    Returns a single bid ID.
+    """
+    return ((number - 1) * self._hand_length * self._num_players) + count - 1
 
   def _counts(self):
     """
     Determines if the bid originator wins or loses.
     """
-    bid_count, bid_number = self._decode_bid(self._current_bid - self._bid_offset)
+    bid_count, bid_number = self._decode_bid(self._current_action - BID_ACTION_OFFSET)
 
     # Count the number of bid_numbers from all players.
     matches = 0
@@ -218,19 +232,19 @@ def _apply_action(self, action):
     if self.is_chance_node():
       # If we are still populating hands, draw a number for the current player.
       self.hands[self._current_player].append(action)
-    elif action == Action.CHALLENGE:
+    elif action == CHALLENGE_ACTION:
       assert self._is_challenge_possible()
       self._update_challenge_history(
-        self._current_bid - self._bid_offset, self._current_player)
+        self._current_action - BID_ACTION_OFFSET, self._current_player)
       self._num_challenges += 1
       # If there is no ongoing rebid, check if all players challenge before counting.
       # If there is an ongoing rebid, count once all the players except the bidder challenges.
       if (not self.is_rebid and self._num_challenges == self._num_players) or (
         self.is_rebid and self._num_challenges == self._num_players - 1):
-        self._end_game()
+        self._counts()
     else:
       # Set the current bid to the action.
-      self._current_bid = action
+      self._current_action = action
       if self._current_player == self._bid_originator:
         # If the bid originator is bidding again, we have a rebid.
         self.is_rebid = True
@@ -239,22 +253,23 @@ def _apply_action(self, action):
          self.is_rebid = False
       # Set the bid originator to the current player.
       self._bid_originator = self._current_player
-      self._update_bid_history(self._current_bid - self._bid_offset, self._current_player)
+      self._update_bid_history(self._current_action - BID_ACTION_OFFSET, self._current_player)
       self._num_challenges = 0
     self._current_player = (self._current_player + 1) % self._num_players
 
   def _action_to_string(self, player, action):
     """Action -> string."""
     if player == pyspiel.PlayerId.CHANCE:
-      return f"Deal:{action}"
-    elif action == Action.CHALLENGE:
+      return f"Deal: {action}"
+    elif action == CHALLENGE_ACTION:
       return "Challenge"
     else:
-      return "Bet"
+      count, number = self._decode_bid(action - BID_ACTION_OFFSET)
+      return f"Bid: {count} of {number}"
 
   def is_terminal(self):
     """Returns True if the game is over."""
-    return self._game_over
+    return self._winner >= 0 or self._loser >= 0
 
   def returns(self):
     """Total reward for each player over the course of the game so far."""
@@ -273,8 +288,8 @@ def returns(self):
 
   def __str__(self):
     """String for debug purposes. No particular semantics are required."""
-    if self._current_bid != -1:
-      count, number = self._decode_bid(self._current_bid - self._bid_offset)
+    if self._current_action != -1:
+      count, number = self._decode_bid(self._current_action - BID_ACTION_OFFSET)
     else:
       count, number = 'None', 'None'
     return "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {} of {}, Rebid: {}".format(
diff --git a/open_spiel/python/games/liars_poker_test.py b/open_spiel/python/games/liars_poker_test.py
index fb064eb7c2..45a652ecf7 100644
--- a/open_spiel/python/games/liars_poker_test.py
+++ b/open_spiel/python/games/liars_poker_test.py
@@ -84,10 +84,9 @@ def test_basic_bid(self):
     """Tests a single bid."""
     game = liars_poker.LiarsPoker()
     state = game.new_initial_state()
-    total_possible_bets = game.hand_length * game.num_digits * game.num_players()
-    expected_bid_history = np.zeros((total_possible_bets, game.num_players()))
+    expected_bid_history = np.zeros((state.total_possible_bids, state.num_players()))
   
-    # Fill player hands.
+    # Fill players hands.
     self._populate_game_hands(game, state)
     # After all hands are filled, have player 0 bid.
     cur_player = state.current_player()
@@ -95,46 +94,45 @@ def test_basic_bid(self):
     state.apply_action(action)
   
     # Verify bid history is updated correctly.
-    bid_offset = len(liars_poker.Action)
+    bid_offset = liars_poker.BID_ACTION_OFFSET
     expected_bid_history[action - bid_offset][cur_player] = 1
     self.assertTrue((state.bid_history == expected_bid_history).all())
   
     # Verify next set of legal bids is greater than the current bid.
     for next_action in state.legal_actions():
-      if next_action == liars_poker.Action.CHALLENGE:
+      if next_action == liars_poker.CHALLENGE_ACTION:
         continue
       self.assertGreater(next_action, action)
 
   def _verify_returns(self, game, state):
-    self.assertTrue(state._winner != -1 or state._loser != -1)
+    self.assertTrue(state.winner() != -1 or state.loser() != -1)
     actual_returns = state.returns()
-    if state._winner != -1:
+    if state.winner() != -1:
       expected_returns = [-1.0 for _ in range(game.num_players())]
-      expected_returns[state._winner] = game.num_players() - 1
+      expected_returns[state.winner()] = game.num_players() - 1
     else:
       expected_returns = [1.0 for _ in range(game.num_players())]
-      expected_returns[state._loser] = -1.0 * (game.num_players() - 1)
+      expected_returns[state.loser()] = -1.0 * (game.num_players() - 1)
     self.assertEqual(actual_returns, expected_returns)
 
-  def test_single_round(self):
+  def test_single_random_round(self):
     """Runs a single round of bidding followed by a challenge."""
     game = liars_poker.LiarsPoker()
     state = game.new_initial_state()
-    total_possible_bets = game.hand_length * game.num_digits * game.num_players()
-    expected_challenge_history = np.zeros((total_possible_bets, game.num_players()))
+    expected_challenge_history = np.zeros((state.total_possible_bids, state.num_players()))
 
-    # Fill player hands.
+    # Fill players hands.
     self._populate_game_hands(game, state)
     # Have player 0 bid.
     action = 2
     state.apply_action(action)
     # Verify challenge action is available to the next player.
-    challenge = liars_poker.Action.CHALLENGE
+    challenge = liars_poker.CHALLENGE_ACTION
     self.assertTrue(challenge in state.legal_actions())
     # Player 1 challenges.
     cur_player = state.current_player()
     state.apply_action(challenge)
-    bid_offset = len(liars_poker.Action)
+    bid_offset = liars_poker.BID_ACTION_OFFSET
     expected_challenge_history[action - bid_offset][cur_player] = 1
     # Verify challenge history is updated correctly.
     self.assertTrue((state.challenge_history == expected_challenge_history).all())
@@ -149,25 +147,53 @@ def test_single_round(self):
     self.assertTrue(state.is_terminal())
     # Verify returns.
     self._verify_returns(game, state)
+  
+  def test_single_deterministic_round(self):
+    """Runs a single round where cards are dealt deterministically."""
+    game = liars_poker.LiarsPoker()
+    state = game.new_initial_state()
+
+    # Deal player 0 all "1" cards and player 1 all "2" cards.
+    for i in range(game.num_players() * game.hand_length):
+      if i % 2 == 0:
+        # Deal card to player 0
+        state.apply_action(1)
+      else:
+        # Deal card to player 1
+        state._apply_action(2)
+
+    # Have player 0 bid that there are four 1's.
+    state.apply_action(state.encode_bid(4, 1) + liars_poker.BID_ACTION_OFFSET)
+    # Player 1 challenges.
+    state.apply_action(liars_poker.CHALLENGE_ACTION)
+    # Player 0 accepts the challenge.
+    state.apply_action(liars_poker.CHALLENGE_ACTION)
+    # Verify game ends with player 0 losing.
+    self.assertTrue(state.is_terminal())
+    self.assertTrue(state.loser() == 0)
+    expected_returns = [1.0 for _ in range(game.num_players())]
+    expected_returns[state.loser()] = -1.0 * (game.num_players() - 1)
+    self.assertEqual(state.returns(), expected_returns)
+
 
   def test_single_rebid(self):
     """Runs a 2 player game where a rebid is enacted."""
     game = liars_poker.LiarsPoker()
     state = game.new_initial_state()
 
-    # Fill player hands.
+    # Fill players hands.
     self._populate_game_hands(game, state)
     # Have player 0 bid.
     state.apply_action(2)
     # Player 1 challenges.
-    state.apply_action(liars_poker.Action.CHALLENGE)
+    state.apply_action(liars_poker.CHALLENGE_ACTION)
     # Original bidder rebids.
     state.apply_action(3)
     # Verify game is not over.
     self.assertFalse(state.is_terminal())
     self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players())])
     # Player 1 challenges again.
-    state.apply_action(liars_poker.Action.CHALLENGE)
+    state.apply_action(liars_poker.CHALLENGE_ACTION)
 
     # Verify game is now over.
     self.assertTrue(state.is_terminal())
@@ -178,12 +204,12 @@ def test_rebid_then_new_bid(self):
     game = liars_poker.LiarsPoker()
     state = game.new_initial_state()
 
-    # Fill player hands.
+    # Fill players hands.
     self._populate_game_hands(game, state)
     # Have player 0 bid.
     state.apply_action(2)
     # Player 1 challenges.
-    state.apply_action(liars_poker.Action.CHALLENGE)
+    state.apply_action(liars_poker.CHALLENGE_ACTION)
     # Original bidder rebids.
     state.apply_action(3)
     # Verify game is not over.
@@ -194,11 +220,11 @@ def test_rebid_then_new_bid(self):
     # Verify game is not over.
     self.assertFalse(state.is_terminal())
     # Player 0 challenges.
-    state.apply_action(liars_poker.Action.CHALLENGE)
+    state.apply_action(liars_poker.CHALLENGE_ACTION)
     # Verify we're not rebidding and counts is only called once both players challenge.
     self.assertFalse(state.is_terminal())
     # Player 1 challenges and ends the game with a counts.
-    state.apply_action(liars_poker.Action.CHALLENGE)
+    state.apply_action(liars_poker.CHALLENGE_ACTION)
 
     # Verify game is now over.
     self.assertTrue(state.is_terminal())
@@ -240,8 +266,7 @@ def test_cloned_state_matches_original_state(self):
     self.assertEqual(state.num_distinct_actions(), clone.num_distinct_actions())
 
     self.assertEqual(state._current_player, clone._current_player)
-    self.assertEqual(state._current_bid, clone._current_bid)
-    self.assertEqual(state._game_over, clone._game_over)
+    self.assertEqual(state._current_action, clone._current_action)
     np.testing.assert_array_equal(state.bid_history, clone.bid_history)
     np.testing.assert_array_equal(state.challenge_history, clone.challenge_history)
 

From 546c701e6da87940e50c4fa088abcae8104d273f Mon Sep 17 00:00:00 2001
From: William Wong <w07wong@yahoo.com>
Date: Wed, 18 Jan 2023 19:37:47 +1000
Subject: [PATCH 13/13] Updated playthrough with latest pull

---
 .../playthroughs/python_liars_poker.txt       | 299 ++++++------------
 1 file changed, 94 insertions(+), 205 deletions(-)

diff --git a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
index 1141887c4e..082306060d 100644
--- a/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
+++ b/open_spiel/integration_tests/playthroughs/python_liars_poker.txt
@@ -140,14 +140,14 @@ ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333
 LegalActions() = [1, 2, 3]
 StringLegalActions() = ["Deal: 1", "Deal: 2", "Deal: 3"]
 
-# Apply action "Deal: 1"
-action: 1
+# Apply action "Deal: 2"
+action: 2
 
 # State 1
-# Hands: [[1], []], Bidder: -1, Current Player: PlayerId.CHANCE, Current Bid: None of None, Rebid: False
+# Hands: [[2], []], Bidder: -1, Current Player: PlayerId.CHANCE, Current Bid: None of None, Rebid: False
 IsTerminal() = False
-History() = [1]
-HistoryString() = "1"
+History() = [2]
+HistoryString() = "2"
 IsChanceNode() = True
 IsSimultaneousNode() = False
 CurrentPlayer() = PlayerId.CHANCE
@@ -250,37 +250,37 @@ ChanceOutcomes() = [(1, 0.3333333333333333), (2, 0.3333333333333333), (3, 0.3333
 LegalActions() = [1, 2, 3]
 StringLegalActions() = ["Deal: 1", "Deal: 2", "Deal: 3"]
 
-# Apply action "Deal: 2"
-action: 2
+# Apply action "Deal: 1"
+action: 1
 
 # State 2
-# Apply action "Deal: 3"
-action: 3
-
-# State 3
 # Apply action "Deal: 2"
 action: 2
 
+# State 3
+# Apply action "Deal: 1"
+action: 1
+
 # State 4
-# Apply action "Deal: 3"
-action: 3
+# Apply action "Deal: 2"
+action: 2
 
 # State 5
 # Apply action "Deal: 2"
 action: 2
 
 # State 6
-# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False
+# Hands: [[2, 2, 2], [1, 1, 2]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False
 IsTerminal() = False
-History() = [1, 2, 3, 2, 3, 2]
-HistoryString() = "1, 2, 3, 2, 3, 2"
+History() = [2, 1, 2, 1, 2, 2]
+HistoryString() = "2, 1, 2, 1, 2, 2"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 0
-InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
-InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
+InformationStateString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0]"
+InformationStateString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0]"
 InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 3, 3]
+InformationStateTensor(0).private_hand = [2, 2, 2]
 InformationStateTensor(0).rebid_state: ◯
 InformationStateTensor(0).counts_state: ◯
 InformationStateTensor(0).bid_history: ◯◯
@@ -320,7 +320,7 @@ InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
 InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [2, 2, 2]
+InformationStateTensor(1).private_hand = [1, 1, 2]
 InformationStateTensor(1).rebid_state: ◯
 InformationStateTensor(1).counts_state: ◯
 InformationStateTensor(1).bid_history: ◯◯
@@ -359,17 +359,17 @@ InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
-ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
-ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
+ObservationString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0]"
 PublicObservationString() = "p0 rebid:[0] counts:[0]"
-PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
-PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
+PrivateObservationString(0) = "p0 hand:[2, 2, 2]"
+PrivateObservationString(1) = "p1 hand:[1, 1, 2]"
 ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 3, 3]
+ObservationTensor(0).private_hand = [2, 2, 2]
 ObservationTensor(0).rebid_state: ◯
 ObservationTensor(0).counts_state: ◯
 ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [2, 2, 2]
+ObservationTensor(1).private_hand = [1, 1, 2]
 ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
@@ -381,17 +381,17 @@ StringLegalActions() = ["Bid: 1 of 1", "Bid: 2 of 1", "Bid: 3 of 1", "Bid: 4 of
 action: 13
 
 # State 7
-# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 1, Current Bid: 1 of 3, Rebid: False
+# Hands: [[2, 2, 2], [1, 1, 2]], Bidder: 0, Current Player: 1, Current Bid: 1 of 3, Rebid: False
 IsTerminal() = False
-History() = [1, 2, 3, 2, 3, 2, 13]
-HistoryString() = "1, 2, 3, 2, 3, 2, 13"
+History() = [2, 1, 2, 1, 2, 2, 13]
+HistoryString() = "2, 1, 2, 1, 2, 2, 13"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 1
-InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12."
-InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12."
+InformationStateString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0] b:12."
+InformationStateString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0] b:12."
 InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 3, 3]
+InformationStateTensor(0).private_hand = [2, 2, 2]
 InformationStateTensor(0).rebid_state: ◯
 InformationStateTensor(0).counts_state: ◯
 InformationStateTensor(0).bid_history: ◯◯
@@ -431,7 +431,7 @@ InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
 InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [2, 2, 2]
+InformationStateTensor(1).private_hand = [1, 1, 2]
 InformationStateTensor(1).rebid_state: ◯
 InformationStateTensor(1).counts_state: ◯
 InformationStateTensor(1).bid_history: ◯◯
@@ -470,17 +470,17 @@ InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
-ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
-ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
+ObservationString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0]"
 PublicObservationString() = "p0 rebid:[0] counts:[0]"
-PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
-PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
+PrivateObservationString(0) = "p0 hand:[2, 2, 2]"
+PrivateObservationString(1) = "p1 hand:[1, 1, 2]"
 ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 3, 3]
+ObservationTensor(0).private_hand = [2, 2, 2]
 ObservationTensor(0).rebid_state: ◯
 ObservationTensor(0).counts_state: ◯
 ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [2, 2, 2]
+ObservationTensor(1).private_hand = [1, 1, 2]
 ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
@@ -488,21 +488,21 @@ Returns() = [0, 0]
 LegalActions() = [0, 15, 16, 17, 18]
 StringLegalActions() = ["Challenge", "Bid: 3 of 3", "Bid: 4 of 3", "Bid: 5 of 3", "Bid: 6 of 3"]
 
-# Apply action "Bid: 4 of 3"
-action: 16
+# Apply action "Bid: 5 of 3"
+action: 17
 
 # State 8
-# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 1, Current Player: 0, Current Bid: 4 of 3, Rebid: False
+# Hands: [[2, 2, 2], [1, 1, 2]], Bidder: 1, Current Player: 0, Current Bid: 5 of 3, Rebid: False
 IsTerminal() = False
-History() = [1, 2, 3, 2, 3, 2, 13, 16]
-HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16"
+History() = [2, 1, 2, 1, 2, 2, 13, 17]
+HistoryString() = "2, 1, 2, 1, 2, 2, 13, 17"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = 0
-InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15."
-InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15."
+InformationStateString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:16."
+InformationStateString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0] b:12. b:16."
 InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 3, 3]
+InformationStateTensor(0).private_hand = [2, 2, 2]
 InformationStateTensor(0).rebid_state: ◯
 InformationStateTensor(0).counts_state: ◯
 InformationStateTensor(0).bid_history: ◯◯
@@ -520,120 +520,9 @@ InformationStateTensor(0).bid_history: ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
-                                       ◯◉
-                                       ◯◯
-                                       ◯◯
-InformationStateTensor(0).challenge_history: ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [2, 2, 2]
-InformationStateTensor(1).rebid_state: ◯
-InformationStateTensor(1).counts_state: ◯
-InformationStateTensor(1).bid_history: ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◉◯
-                                       ◯◯
                                        ◯◯
                                        ◯◉
                                        ◯◯
-                                       ◯◯
-InformationStateTensor(1).challenge_history: ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-                                             ◯◯
-ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
-ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
-PublicObservationString() = "p0 rebid:[0] counts:[0]"
-PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
-PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
-ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 3, 3]
-ObservationTensor(0).rebid_state: ◯
-ObservationTensor(0).counts_state: ◯
-ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [2, 2, 2]
-ObservationTensor(1).rebid_state: ◯
-ObservationTensor(1).counts_state: ◯
-Rewards() = [0, 0]
-Returns() = [0, 0]
-LegalActions() = [0, 18]
-StringLegalActions() = ["Challenge", "Bid: 6 of 3"]
-
-# Apply action "Bid: 6 of 3"
-action: 18
-
-# State 9
-# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 1, Current Bid: 6 of 3, Rebid: False
-IsTerminal() = False
-History() = [1, 2, 3, 2, 3, 2, 13, 16, 18]
-HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18"
-IsChanceNode() = False
-IsSimultaneousNode() = False
-CurrentPlayer() = 1
-InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15. b:17."
-InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15. b:17."
-InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 3, 3]
-InformationStateTensor(0).rebid_state: ◯
-InformationStateTensor(0).counts_state: ◯
-InformationStateTensor(0).bid_history: ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◯
-                                       ◉◯
-                                       ◯◯
-                                       ◯◯
-                                       ◯◉
-                                       ◯◯
-                                       ◉◯
 InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -653,7 +542,7 @@ InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
 InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [2, 2, 2]
+InformationStateTensor(1).private_hand = [1, 1, 2]
 InformationStateTensor(1).rebid_state: ◯
 InformationStateTensor(1).counts_state: ◯
 InformationStateTensor(1).bid_history: ◯◯
@@ -671,9 +560,9 @@ InformationStateTensor(1).bid_history: ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
+                                       ◯◯
                                        ◯◉
                                        ◯◯
-                                       ◉◯
 InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -692,17 +581,17 @@ InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
-ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
-ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
+ObservationString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0]"
 PublicObservationString() = "p0 rebid:[0] counts:[0]"
-PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
-PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
+PrivateObservationString(0) = "p0 hand:[2, 2, 2]"
+PrivateObservationString(1) = "p1 hand:[1, 1, 2]"
 ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 3, 3]
+ObservationTensor(0).private_hand = [2, 2, 2]
 ObservationTensor(0).rebid_state: ◯
 ObservationTensor(0).counts_state: ◯
 ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [2, 2, 2]
+ObservationTensor(1).private_hand = [1, 1, 2]
 ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
@@ -713,18 +602,18 @@ StringLegalActions() = ["Challenge"]
 # Apply action "Challenge"
 action: 0
 
-# State 10
-# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: 0, Current Bid: 6 of 3, Rebid: False
+# State 9
+# Hands: [[2, 2, 2], [1, 1, 2]], Bidder: 1, Current Player: 1, Current Bid: 5 of 3, Rebid: False
 IsTerminal() = False
-History() = [1, 2, 3, 2, 3, 2, 13, 16, 18, 0]
-HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18, 0"
+History() = [2, 1, 2, 1, 2, 2, 13, 17, 0]
+HistoryString() = "2, 1, 2, 1, 2, 2, 13, 17, 0"
 IsChanceNode() = False
 IsSimultaneousNode() = False
-CurrentPlayer() = 0
-InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0] b:12. b:15. b:17. c:17."
-InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:15. b:17. c:17."
+CurrentPlayer() = 1
+InformationStateString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0] b:12. b:16. c:16."
+InformationStateString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0] b:12. b:16. c:16."
 InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 3, 3]
+InformationStateTensor(0).private_hand = [2, 2, 2]
 InformationStateTensor(0).rebid_state: ◯
 InformationStateTensor(0).counts_state: ◯
 InformationStateTensor(0).bid_history: ◯◯
@@ -742,9 +631,9 @@ InformationStateTensor(0).bid_history: ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
+                                       ◯◯
                                        ◯◉
                                        ◯◯
-                                       ◉◯
 InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -761,10 +650,10 @@ InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
+                                             ◉◯
                                              ◯◯
-                                             ◯◉
 InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [2, 2, 2]
+InformationStateTensor(1).private_hand = [1, 1, 2]
 InformationStateTensor(1).rebid_state: ◯
 InformationStateTensor(1).counts_state: ◯
 InformationStateTensor(1).bid_history: ◯◯
@@ -782,9 +671,9 @@ InformationStateTensor(1).bid_history: ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
+                                       ◯◯
                                        ◯◉
                                        ◯◯
-                                       ◉◯
 InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -801,19 +690,19 @@ InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
+                                             ◉◯
                                              ◯◯
-                                             ◯◉
-ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[0]"
-ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[0]"
+ObservationString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[0]"
+ObservationString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[0]"
 PublicObservationString() = "p0 rebid:[0] counts:[0]"
-PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
-PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
+PrivateObservationString(0) = "p0 hand:[2, 2, 2]"
+PrivateObservationString(1) = "p1 hand:[1, 1, 2]"
 ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 3, 3]
+ObservationTensor(0).private_hand = [2, 2, 2]
 ObservationTensor(0).rebid_state: ◯
 ObservationTensor(0).counts_state: ◯
 ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [2, 2, 2]
+ObservationTensor(1).private_hand = [1, 1, 2]
 ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◯
 Rewards() = [0, 0]
@@ -824,18 +713,18 @@ StringLegalActions() = ["Challenge"]
 # Apply action "Challenge"
 action: 0
 
-# State 11
-# Hands: [[1, 3, 3], [2, 2, 2]], Bidder: 0, Current Player: PlayerId.TERMINAL, Current Bid: 6 of 3, Rebid: False
+# State 10
+# Hands: [[2, 2, 2], [1, 1, 2]], Bidder: 1, Current Player: PlayerId.TERMINAL, Current Bid: 5 of 3, Rebid: False
 IsTerminal() = True
-History() = [1, 2, 3, 2, 3, 2, 13, 16, 18, 0, 0]
-HistoryString() = "1, 2, 3, 2, 3, 2, 13, 16, 18, 0, 0"
+History() = [2, 1, 2, 1, 2, 2, 13, 17, 0, 0]
+HistoryString() = "2, 1, 2, 1, 2, 2, 13, 17, 0, 0"
 IsChanceNode() = False
 IsSimultaneousNode() = False
 CurrentPlayer() = PlayerId.TERMINAL
-InformationStateString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[1] b:12. b:15. b:17. c:17."
-InformationStateString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[1] b:12. b:15. b:17. c:17."
+InformationStateString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[1] b:12. b:16. c:16."
+InformationStateString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[1] b:12. b:16. c:16."
 InformationStateTensor(0).player: ◉◯
-InformationStateTensor(0).private_hand = [1, 3, 3]
+InformationStateTensor(0).private_hand = [2, 2, 2]
 InformationStateTensor(0).rebid_state: ◯
 InformationStateTensor(0).counts_state: ◉
 InformationStateTensor(0).bid_history: ◯◯
@@ -853,9 +742,9 @@ InformationStateTensor(0).bid_history: ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
+                                       ◯◯
                                        ◯◉
                                        ◯◯
-                                       ◉◯
 InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -872,10 +761,10 @@ InformationStateTensor(0).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
-                                             ◯◯
                                              ◉◉
+                                             ◯◯
 InformationStateTensor(1).player: ◯◉
-InformationStateTensor(1).private_hand = [2, 2, 2]
+InformationStateTensor(1).private_hand = [1, 1, 2]
 InformationStateTensor(1).rebid_state: ◯
 InformationStateTensor(1).counts_state: ◉
 InformationStateTensor(1).bid_history: ◯◯
@@ -893,9 +782,9 @@ InformationStateTensor(1).bid_history: ◯◯
                                        ◉◯
                                        ◯◯
                                        ◯◯
+                                       ◯◯
                                        ◯◉
                                        ◯◯
-                                       ◉◯
 InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
@@ -912,20 +801,20 @@ InformationStateTensor(1).challenge_history: ◯◯
                                              ◯◯
                                              ◯◯
                                              ◯◯
-                                             ◯◯
                                              ◉◉
-ObservationString(0) = "p0 hand:[1, 3, 3] rebid:[0] counts:[1]"
-ObservationString(1) = "p1 hand:[2, 2, 2] rebid:[0] counts:[1]"
+                                             ◯◯
+ObservationString(0) = "p0 hand:[2, 2, 2] rebid:[0] counts:[1]"
+ObservationString(1) = "p1 hand:[1, 1, 2] rebid:[0] counts:[1]"
 PublicObservationString() = "p0 rebid:[0] counts:[1]"
-PrivateObservationString(0) = "p0 hand:[1, 3, 3]"
-PrivateObservationString(1) = "p1 hand:[2, 2, 2]"
+PrivateObservationString(0) = "p0 hand:[2, 2, 2]"
+PrivateObservationString(1) = "p1 hand:[1, 1, 2]"
 ObservationTensor(0).player: ◉◯
-ObservationTensor(0).private_hand = [1, 3, 3]
+ObservationTensor(0).private_hand = [2, 2, 2]
 ObservationTensor(0).rebid_state: ◯
 ObservationTensor(0).counts_state: ◉
 ObservationTensor(1).player: ◯◉
-ObservationTensor(1).private_hand = [2, 2, 2]
+ObservationTensor(1).private_hand = [1, 1, 2]
 ObservationTensor(1).rebid_state: ◯
 ObservationTensor(1).counts_state: ◉
-Rewards() = [-1, 1]
-Returns() = [-1, 1]
+Rewards() = [1, -1]
+Returns() = [1, -1]