Convert quoridor movement action IDs to relative

The original implementation of Quoridor used absolute position numbering for pawn moves, so moving to b3 was always the same action ID, regardless of where the pawn was. This commit changes to using relative action IDs, so moving directly north is always the same action ID, regardless of what square that is moving to.
google-deepmind · May 21, 2024 · 63c35ea · 63c35ea
1 parent 7d3a355
commit 63c35ea
Show file tree

Hide file tree

Showing 4 changed files with 717 additions and 688 deletions.
diff --git a/open_spiel/games/quoridor/quoridor.cc b/open_spiel/games/quoridor/quoridor.cc
@@ -146,7 +146,9 @@ QuoridorState::QuoridorState(std::shared_ptr<const Game> game, int board_size,
     : State(game),
       board_size_(board_size),
       board_diameter_(board_size * 2 - 1),
-      ansi_color_output_(ansi_color_output) {
+      ansi_color_output_(ansi_color_output),
+      // See ActionToMove for explanation of the below
+      base_for_relative_(2, 2, board_diameter_) {
   board_.resize(board_diameter_ * board_diameter_, kPlayerNone);
   players_.resize(num_players_);
   // Account for order of turns (order of play is clockwise)
@@ -200,14 +202,32 @@ void QuoridorState::InitializePlayer(QuoridorPlayer p) {
   }
 }
 
+/*
+ * The original implementation mapped action IDs to absolute board positions.
+ * This meant that moving "north" had a different ID for every pawn position.
+ * Now action IDs are encoded in the same virtual space as absolute board 
+ * positions, but they indicate the pawn's relative move as if it were in
+ * square (1,1). So when we get those action IDs in, we need to convert them
+ * back into the absolute position into which we need to place the pawn.
+ */
 Move QuoridorState::ActionToMove(Action action_id) const {
-  return GetMove(action_id % board_diameter_, action_id / board_diameter_);
+  Move move = GetMove(action_id % board_diameter_, action_id / board_diameter_);
+  if (!move.IsWall()) {
+    Move target = player_loc_[current_player_] + (move - base_for_relative_);
+    if (GetPlayer(target) == kPlayerNone) {
+      return target;
+    } else {
+      // Jumping over a player is inferred - it has the same action ID as just stepping
+      return player_loc_[current_player_] + ((move - base_for_relative_) * 2);
+    }
+  }
+  return move;
 }
 
 std::vector<Action> QuoridorState::LegalActions() const {
   std::vector<Action> moves;
   if (IsTerminal()) return moves;
-  int max_moves = 5;  // Max pawn moves, including jumps.
+  int max_moves = num_players_ > 2 ? 6 : 5;  // Max legal pawn moves, including jumps.
   if (wall_count_[current_player_] > 0) {
     max_moves += 2 * (board_size_ - 1) * (board_size_ - 1);  // Max wall moves.
   }
@@ -261,7 +281,7 @@ void QuoridorState::AddActions(Move cur, Offset offset,
   Move forward = cur + offset * 2;
   if (GetPlayer(forward) == kPlayerNone) {
     // Normal single step in this direction.
-    moves->push_back(forward.xy);
+    moves->push_back((base_for_relative_ + offset * 2).xy);
     return;
   }
 
@@ -271,7 +291,8 @@ void QuoridorState::AddActions(Move cur, Offset offset,
     // In two-players: A normal jump is allowed. We know that spot is empty.
     // In >2 players, must check.
     if (GetPlayer(cur + offset * 4) == kPlayerNone) {
-      moves->push_back((cur + offset * 4).xy);
+      // The relative action ID for jumping directly over is the same as moving
+      moves->push_back((base_for_relative_ + offset * 2).xy);
       return;
     } else {
       return;
@@ -283,13 +304,13 @@ void QuoridorState::AddActions(Move cur, Offset offset,
   Offset left = offset.rotate_left();
   if (!IsWall(forward + left)) {
     if (GetPlayer(forward + left * 2) == kPlayerNone) {
-      moves->push_back((forward + left * 2).xy);
+      moves->push_back((base_for_relative_ + offset * 2 + left * 2).xy);
     }
   }
   Offset right = offset.rotate_right();
   if (!IsWall(forward + right)) {
     if (GetPlayer(forward + right * 2) == kPlayerNone) {
-      moves->push_back((forward + right * 2).xy);
+      moves->push_back((base_for_relative_ + offset * 2 + right * 2).xy);
     }
   }
 }
@@ -582,14 +603,14 @@ void QuoridorState::ObservationTensor(Player player,
 }
 
 void QuoridorState::DoApplyAction(Action action) {
+  Move move = ActionToMove(action);
   // If players is forced to pass it is valid to stay in place, on a field where
   // there is already a player
-  if (board_[action] != current_player_) {
-    SPIEL_CHECK_EQ(board_[action], kPlayerNone);
+  if (board_[move.xy] != current_player_) {
+    SPIEL_CHECK_EQ(board_[move.xy], kPlayerNone);
   }
   SPIEL_CHECK_EQ(outcome_, kPlayerNone);
 
-  Move move = ActionToMove(action);
   SPIEL_CHECK_TRUE(move.IsValid());
 
   if (move.IsWall()) {
@@ -636,7 +657,13 @@ QuoridorGame::QuoridorGame(const GameParameters& params)
       wall_count_(
           ParameterValue<int>("wall_count", board_size_ * board_size_ / 8)),
       ansi_color_output_(ParameterValue<bool>("ansi_color_output")),
-      num_players_(ParameterValue<int>("players")) {}
-
+      num_players_(ParameterValue<int>("players")) {
+  if (board_size_ < 3) {
+    // For relative moves, we need to be able to describe moves using a 3x3 grid
+    // and since we use the board to number the moves (see above), we need the
+    // playing board to be at least that big.
+    SpielFatalError("Board size must be at least 3x3.");
+  }
+}
 }  // namespace quoridor
 }  // namespace open_spiel
diff --git a/open_spiel/games/quoridor/quoridor.h b/open_spiel/games/quoridor/quoridor.h
@@ -85,6 +85,7 @@ struct Move {
 
   Move operator+(const Offset& o) const { return Move(x + o.x, y + o.y, size); }
   Move operator-(const Offset& o) const { return Move(x - o.x, y - o.y, size); }
+  Offset operator-(const Move& o) const { return Offset(x - o.x, y - o.y); }
 };
 
 // State of an in-play game.
@@ -155,6 +156,7 @@ class QuoridorState : public State {
   const int board_size_;
   const int board_diameter_;
   const bool ansi_color_output_;
+  const Move base_for_relative_;
 };
 
 // Game object.