-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmcts.py
140 lines (127 loc) · 5.86 KB
/
mcts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from board_scorer import *
import random
from itertools import product
import copy
class MCTSMovePicker:
@staticmethod
def next_move(board, player):
raise NotImplementedError("interface method should not be invoked")
class MCTSRandomMove(MCTSMovePicker):
@staticmethod
def next_move(board, player):
actions = MCTSScorer.findNeighbor(board,neighborNearBy=1)
return random.choice(actions)
# raise NotImplementedError("Implement me")
class MCTSScorer(Scorer):
"""
Scorer that is based on the MCTS method
"""
@staticmethod
def evaluate(board, x, y, move,trail=200,explore=4):
"""
This function takes a board, evaluates the score (on behave of player 1)
and returns the evaluated score
:param board: The current board (including the recent move)
:param x: last recent move's position x
:param y: last recent move's position y
:param move: last recent move's player, move = 1 / 2
:param trail: number of simulations at each action
:param explore: number of opponent's actions to be explored
:return: a real value for the score of this recent move (the best winning rate among the following actions)
"""
new_board = copy.deepcopy(board)
new_board[x][y] = move # the player takes the current move
actions = MCTSScorer.findNeighbor(new_board,neighborNearBy=1)
score = {}
for action in actions:
a_board = copy.deepcopy(new_board)
a_board[action[0]][action[1]] = 3 - move
score[action] = MCTSScorer.heuristic(a_board,action[0],action[1],3 - move)
score = sorted(score.items(), key = lambda s:(s[1], s[0]))
winrate = [0 for i in range(explore)]
for i in range(explore):
t = 0
win = 0
action = score[i][0]
a_board = copy.deepcopy(new_board)
a_board[action[0]][action[1]] = 3 - move
while (t < trail):
t += 1
win += MCTSScorer.simulation(copy.deepcopy(a_board),move,player = move)
winrate[i] = win/t
print(win)
print(winrate)
return min(winrate) # 返回最小胜率(minimax思想)
@staticmethod
def simulation(board,move,player):
"""
To simulate a game randomly according to current board,
:return: the winner
"""
winner = MCTSScorer.judge(board)
if winner != 0:
# print(winner == move)
return (winner == move)
else:
action = MCTSRandomMove.next_move(board,player)
board[action[0]][action[1]] = player
return MCTSScorer.simulation(board,move,3 - player)
@staticmethod
def judge(board):
"""
To check if a final state is reached and the winner
"""
patterDict = PatternExtractionScorer.patternCount(board)
if patterDict['11111'][0] or patterDict['011110'][0]:
# for i in board:print(i)
return 1
elif patterDict['11111'][1] or patterDict['011110'][1]:
# for i in board:print(i)
return 2
return 0
@staticmethod
def heuristic(board,x,y,move):
"""
This function is used during the internal nodes pruning
This method is used for better pruning strategy
"""
return PatternExtractionScorer.evaluate(board,x,y,move)
@staticmethod
def findNeighbor(board, neighborNearBy=1):
"""
Find all open positions 1 cell away
TODO Enable more general neighbor discovering strategy
:param board:
:param neighborNearBy: how much distance
:return: a list of binary tuples.
"""
neighbors = []
stones = [(i, j) for i in range(len(board)) for j in range(len(board)) if board[i][j] != 0]
for stone in stones:
for i, j in product(range(stone[0] - 1, stone[0] + 2), range(stone[1] - 1, stone[1] + 2)):
if (i, j) in product(range(len(board)), range(len(board))) and (i, j) not in neighbors and (
i, j) not in stones:
neighbors.append((i, j))
return neighbors
if __name__ == "__main__":
board = [[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
print(MCTSScorer.evaluate(board,0,1,1))