Skip to content

Commit

Permalink
Merge pull request google-deepmind#239 from michalsustr:master
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 315790097
Change-Id: I9bf375a5e04dc0713be8f5c806c17e4349ecc808
  • Loading branch information
open_spiel@google.com authored and open_spiel@google.com committed Jun 11, 2020
2 parents 6c06a62 + 5bcd0bc commit c0064da
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 31 deletions.
178 changes: 148 additions & 30 deletions open_spiel/integration_tests/api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
# Lint as: python3
"""Tests for open_spiel.integration_tests.api."""

import collections
import logging
import random
import time
import unittest

from absl.testing import absltest
Expand Down Expand Up @@ -89,6 +92,10 @@
"nf_auction": 2,
}

# Some tests run for a fixed time budget.
# This specified approximately how many seconds they should run.
TIMEABLE_TEST_RUNTIME = 10


class EnforceAPIOnFullTreeBase(parameterized.TestCase):
"""Test properties on the full game tree, instantiating the tree only once.
Expand Down Expand Up @@ -267,6 +274,7 @@ def _get_some_states(game, num_plays=10, include_terminals=True):
class PartialEnforceAPIConventionsTest(parameterized.TestCase):
"""This only partially test some properties."""

# pylint: disable=g-unreachable-test-method
def _assert_observations_raises_error_on_invalid_player(self, game, state):
game_type = game.get_type()
game_name = game_type.short_name
Expand Down Expand Up @@ -311,41 +319,151 @@ def _assert_observations_raises_error_on_invalid_player(self, game, state):
state.observation_string(num_players + 1)

@parameterized.parameters(_GAMES_TO_TEST)
def test_observations_raises_error_on_invalid_player(self, game_name):
print(f"Testing observations for {game_name}")
def test_observations_are_consistent_with_info_states(self, game_name):
print(f"Testing observation <-> info_state consistency for '{game_name}'")
game = pyspiel.load_game(game_name)
state = game.new_initial_state()

# Some games cannot be finished by always taking the first legal actions.
give_up_after = float("inf")
if game.get_type().short_name in ["backgammon", "laser_tag"]:
give_up_after = 100

while not state.is_terminal():
if len(state.history()) > give_up_after:
break

if not state.is_chance_node():
self._assert_observations_raises_error_on_invalid_player(game, state)
game_type = game.get_type()

if state.is_chance_node():
for action, prob in state.chance_outcomes():
if prob != 0:
state.apply_action(action)
break
elif state.is_simultaneous_node():
# Simultaneous node: sample actions for all players.
chosen_actions = [
state.legal_actions(pid)[0] for pid in range(game.num_players())
]
state.apply_actions(chosen_actions)
if not game_type.provides_information_state_string \
or not game_type.provides_observation_string:
print(f"Skipping test for '{game_name}', as it doesn't provide both "
"information_state_string and observation_string")
return

if game_type.dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS:
logging.warning(
"'%s' is not turn-based. Trying to reload game as turn-based.",
game_name)
game = pyspiel.load_game_as_turn_based(game_name)

# Idea of the test: make rollouts in the game, and collect both
# Action-Observation histories (AOH) and InformationState for different
# ground states. Check that there is a unique bijection between them.
#
# Of course, this test does not exclude the possibility the game might
# have a bug! But it is a fast way to discover a possible inconsistency
# in a new implementation.
aoh_is = dict() # aoh -> info_state
is_aoh = dict() # info_state -> aoh
aoh_histories = collections.defaultdict(set) # aoh -> states
is_histories = collections.defaultdict(set) # info_states -> states

# Some games have very long play-throughs.
give_up_after = 100 # actions

# Show a helpful error message for debugging the observations in a game.
def show_error(histories, player, dump_collections=True):
aohs = list()
info_states = list()
descriptions = list()
# Emulate the histories to collect relevant lists.
for history in histories:
state = game.new_initial_state()
aoh = [("obs", state.observation_string(player))]
for action in history:
state.apply_action(action)
if state.current_player() == player:
aoh.append(("action", action))
aoh.append(("obs", state.observation_string(player)))
aohs.append(aoh)
info_states.append(state.information_state_string(player))
descriptions.append(str(state))

histories_str = "\n".join([str(history) for history in histories])
descriptions_str = "\n".join(descriptions)
aohs_str = "\n".join([str(aoh) for aoh in aohs])
info_states_str = "\n".join([str(s) for s in info_states])

if dump_collections:
def format_dump(xs):
return "\n".join([f"{str(key)} -> {str(value)}"
for key, value in xs.items()])
# pylint: disable=g-backslash-continuation
extras = "Dumping colections:\n" \
f"aoh -> info_state:\n{format_dump(aoh_is)}\n\n" \
f"info_state -> aoh:\n{format_dump(is_aoh)}\n\n" \
f"aoh -> histories:\n{format_dump(aoh_histories)}\n\n" \
f"info_state -> histories:\n{format_dump(is_histories)}\n\n"
else:
# Decision node: sample action for the single current player
# pylint: disable=g-backslash-continuation
extras = "Rerun this test with dump_collections=True " \
"for extra information."

# pylint: disable=g-backslash-continuation
msg = \
f"\n\n" \
f"The action-observation histories (AOH) are not consistent with " \
f"information states for player {player}.\n\n" \
f"The conflicting set of states (histories) is:\n{histories_str}\n\n" \
f"Their domain-specific descriptions are:\n{descriptions_str}\n\n" \
f"The corresponding AOH are:\n{aohs_str}\n\n" \
f"The corresponding info states are:\n{info_states_str}\n\n" \
f"{extras}\n" \
f"What to do to fix this? Consult the documentation to " \
f"State::InformationStateString and State::ObservationString."
return msg

def collect_and_test_rollouts(player):
random.seed(0)
nonlocal aoh_is, is_aoh, aoh_histories, is_histories
state = game.new_initial_state()
aoh = [("obs", state.observation_string(player))]

# TODO(author13): we want to check terminals for consistency too, but info
# state string is not defined there and neither are observations by
# design.
while not state.is_terminal():
if len(state.history()) > give_up_after:
break

# Do not collect over chance nodes.
if not state.is_chance_node():
info_state = state.information_state_string()
aoh_histories[str(aoh)].add(tuple(state.history()))
is_histories[info_state].add(tuple(state.history()))

states = {tuple(state.history())}
states = states.union(aoh_histories[str(aoh)])
states = states.union(is_histories[info_state])
if str(aoh) in aoh_is:
states = states.union(is_histories[aoh_is[str(aoh)]])
self.assertEqual(aoh_is[str(aoh)], info_state,
show_error(states, player))
else:
aoh_is[str(aoh)] = info_state
if info_state in is_aoh:
states = states.union(aoh_histories[str(is_aoh[info_state])])
self.assertEqual(is_aoh[info_state], str(aoh),
show_error(states, player))
else:
is_aoh[info_state] = str(aoh)

# Make random actions.
action = random.choice(state.legal_actions(state.current_player()))
state.action_to_string(state.current_player(), action)
if state.current_player() == player:
aoh.append(("action", action))
state.apply_action(action)

self._assert_observations_raises_error_on_invalid_player(game, state)
aoh.append(("obs", state.observation_string(player)))

# Run (very roughly!) for this many seconds. This very much depends on the
# machine the test runs on, as some games take a long time to produce
# a single rollout.
time_limit = TIMEABLE_TEST_RUNTIME / game.num_players()
start = time.time()
is_time_out = lambda: time.time() - start > time_limit

rollouts = 0
for player in range(game.num_players()):
aoh_is.clear()
is_aoh.clear()
aoh_histories.clear()
is_histories.clear()
while not is_time_out():
collect_and_test_rollouts(player)
rollouts += 1

print(f"Test for {game_name} took {time.time()-start} seconds "
f"to make {rollouts} rollouts.")

@parameterized.parameters(_GAMES_TO_TEST)
def test_legal_actions_returns_empty_list_on_opponent(self, game_name):
Expand Down
27 changes: 26 additions & 1 deletion open_spiel/spiel.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,17 @@ class State {
// when the only part of the state that differs is not observable by that
// player (e.g. opponents' cards in Poker.)

// The identifiers must be unique across all players.
// This allows an algorithm to maintain a single table of identifiers
// instead of maintaining a table per player to avoid name collisions.
//
// A simple way to do so is for example, in a card game, if both players can
// hold the card Jack, the identifier can contain player identification as
// well, like P1Jack and P2Jack. However prefixing by player number is not
// a requirement. The only thing that is necessary is that it is unambiguous
// who is the observer.


// Games that do not have imperfect information do not need to implement
// these methods, but most algorithms intended for imperfect information
// games will work on perfect information games provided the InformationState
Expand All @@ -387,6 +398,16 @@ class State {
// a perfect-recall representation, but the sequence of moves played would
// be.

// If you implement both InformationState and Observation, the two must be
// consistent for all the players (even the non-acting player(s)).
// By consistency we mean that when you maintain an Action-Observation
// history (AOH) for different ground states, the (in)equality of two AOHs
// implies the (in)equality of two InformationStates.
// In other words, AOH is a factored representation of InformationState.
//
// For details, see Section 3.1 of https://arxiv.org/abs/1908.09453
// or Section 2.1 of https://arxiv.org/pdf/1906.11110.pdf

// There are currently no use-case for calling this function with
// `kChancePlayerId` or `kTerminalPlayerId`. Thus, games are expected to raise
// an error in those cases using (and it's tested in api_test.py):
Expand Down Expand Up @@ -430,14 +451,18 @@ class State {
// - It has at most the same information content as the information state
// - The complete history of observations and our actions over the
// course of the game is sufficient to reconstruct the information
// state.
// state for any players at any point in the game.
//
// For example, the cards revealed and bets made since our previous move in
// poker, or the current state of the board in chess.
// Note that neither of these are valid information states, since the same
// observation may arise from two different observation histories (i.e. they
// are not perfect recall).
//
// Observations should cover all observations: a combination of both public
// (common) and private observations. They are not factored into these
// individual constituent parts.
//
// Implementations should start with (and it's tested in api_test.py):
// SPIEL_CHECK_GE(player, 0);
// SPIEL_CHECK_LT(player, num_players_);
Expand Down

0 comments on commit c0064da

Please sign in to comment.