Skip to content

Commit

Permalink
dataset generation for matrix games.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 501626702
Change-Id: I971af0e8865dc3be8d6f0e61897856cb7a72ee01
  • Loading branch information
Elnaz Davoodi authored and lanctot committed Jan 16, 2023
1 parent c054367 commit 1c05977
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 0 deletions.
81 changes: 81 additions & 0 deletions open_spiel/python/examples/meta_cfr/matrix_games/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""Main file to train and evaluate meta-regret and regret matching agents."""

from absl import app
from absl import flags
import numpy as np

from open_spiel.python.examples.meta_cfr.matrix_games import evaluation
from open_spiel.python.examples.meta_cfr.matrix_games import matrix_dataset
from open_spiel.python.examples.meta_cfr.matrix_games import meta_selfplay_agent
from open_spiel.python.examples.meta_cfr.matrix_games import regret_matching_agent


FLAGS = flags.FLAGS
flags.DEFINE_integer("batch_size", 1, "Batch size.")
flags.DEFINE_integer("evaluation_steps", 1000, "Number of evaluation steps.")
flags.DEFINE_integer("num_batches", 1,
"Number of batches to train a meta optimizer.")
flags.DEFINE_integer("repeats", 10,
"Number of training each batch in meta learning.")
flags.DEFINE_integer("seed", 10, "random seed.")
flags.DEFINE_integer("min_val", 0,
"minimum value for randomizing a payoff matrix.")
flags.DEFINE_integer("max_val", 10,
"maximum value for randomizing a payoff matrix.")
flags.DEFINE_integer("num_actions", 3, "Number of actions an agent can take.")
flags.DEFINE_bool("single_problem", False,
"If the matrix dataset generates only a single matrix.")


def selfplay_main(argv):
"""Self play."""
del argv
np.random.seed(FLAGS.seed)
# rock-paper-scissor
base_matrix = np.array([[[0, -1, 1], [1, 0, -1], [-1, 1, 0]]] *
FLAGS.batch_size)
dataset = matrix_dataset.Dataset(
base_matrix=base_matrix,
num_training_batches=FLAGS.num_batches,
minval=FLAGS.min_val,
maxval=FLAGS.max_val)
data_loader = dataset.get_training_batch()
eval_payoff_batch = dataset.get_eval_batch()

mr_agent = meta_selfplay_agent.MetaSelfplayAgent(
repeats=FLAGS.repeats,
training_epochs=FLAGS.evaluation_steps,
data_loader=data_loader)
mr_agent.train()

mr_agent2 = meta_selfplay_agent.MetaSelfplayAgent(
repeats=FLAGS.repeats,
training_epochs=FLAGS.evaluation_steps,
data_loader=data_loader)
mr_agent2.train()

rm_agent = regret_matching_agent.RegretMatchingAgent(
num_actions=FLAGS.num_actions, data_loader=data_loader)
rm_agent.train()

rm_agent2 = regret_matching_agent.RegretMatchingAgent(
num_actions=FLAGS.num_actions, data_loader=data_loader)
rm_agent2.train()

print("Regret matching")
evaluation.evaluate_in_selfplay(
agent_x=rm_agent,
agent_y=rm_agent2,
payoff_batch=eval_payoff_batch,
steps_count=FLAGS.evaluation_steps)

print("Meta regret matching")
evaluation.evaluate_in_selfplay(
agent_x=mr_agent,
agent_y=mr_agent2,
payoff_batch=eval_payoff_batch,
steps_count=FLAGS.evaluation_steps)


if __name__ == "__main__":
app.run(selfplay_main)
41 changes: 41 additions & 0 deletions open_spiel/python/examples/meta_cfr/matrix_games/matrix_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Dataset for structured payoff matrices."""

from absl import flags
import numpy as np

FLAGS = flags.FLAGS


class Dataset:
"""Dataset class."""

def __init__(self, base_matrix, num_training_batches, minval, maxval):
self._base_matrix = base_matrix
self._num_training_batches = num_training_batches
self._minval, self._maxval = minval, maxval
# to overfit
self._new_matrix = np.copy(self._base_matrix)

def get_training_batch(self):
"""Get training data."""
while True:
if not FLAGS.single_problem:
random_vec = np.random.randint(
low=self._minval, high=self._maxval, size=FLAGS.batch_size)
self._new_matrix = np.copy(self._base_matrix)
for i in range(FLAGS.batch_size):
self._new_matrix[self._new_matrix > 0] += random_vec[i]
self._new_matrix[self._new_matrix < 0] -= random_vec[i]
yield self._new_matrix

def get_eval_batch(self):
"""Get eval dataset."""

if not FLAGS.single_problem:
random_vec = np.random.randint(
low=self._minval, high=self._maxval, size=FLAGS.batch_size)
self._new_matrix = np.copy(self._base_matrix)
for i in range(FLAGS.batch_size):
self._new_matrix[self._new_matrix > 0] += random_vec[i]
self._new_matrix[self._new_matrix < 0] -= random_vec[i]
return self._new_matrix

0 comments on commit 1c05977

Please sign in to comment.