forked from tensorfly-gpu/aichess
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgame.py
898 lines (810 loc) · 41 KB
/
game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
"""棋盘游戏控制"""
import numpy as np
import copy
import time
from config import CONFIG
from collections import deque # 这个队列用来判断长将或长捉
import random
# 列表来表示棋盘,红方在上,黑方在下。使用时需要使用深拷贝
state_list_init = [['红车', '红马', '红象', '红士', '红帅', '红士', '红象', '红马', '红车'],
['一一', '一一', '一一', '一一', '一一', '一一', '一一', '一一', '一一'],
['一一', '红炮', '一一', '一一', '一一', '一一', '一一', '红炮', '一一'],
['红兵', '一一', '红兵', '一一', '红兵', '一一', '红兵', '一一', '红兵'],
['一一', '一一', '一一', '一一', '一一', '一一', '一一', '一一', '一一'],
['一一', '一一', '一一', '一一', '一一', '一一', '一一', '一一', '一一'],
['黑兵', '一一', '黑兵', '一一', '黑兵', '一一', '黑兵', '一一', '黑兵'],
['一一', '黑炮', '一一', '一一', '一一', '一一', '一一', '黑炮', '一一'],
['一一', '一一', '一一', '一一', '一一', '一一', '一一', '一一', '一一'],
['黑车', '黑马', '黑象', '黑士', '黑帅', '黑士', '黑象', '黑马', '黑车']]
# deque来存储棋盘状态,长度为4
state_deque_init = deque(maxlen=4)
for _ in range(4):
state_deque_init.append(copy.deepcopy(state_list_init))
# 构建一个字典:字符串到数组的映射,函数:数组到字符串的映射
string2array = dict(红车=np.array([1, 0, 0, 0, 0, 0, 0]), 红马=np.array([0, 1, 0, 0, 0, 0, 0]),
红象=np.array([0, 0, 1, 0, 0, 0, 0]), 红士=np.array([0, 0, 0, 1, 0, 0, 0]),
红帅=np.array([0, 0, 0, 0, 1, 0, 0]), 红炮=np.array([0, 0, 0, 0, 0, 1, 0]),
红兵=np.array([0, 0, 0, 0, 0, 0, 1]), 黑车=np.array([-1, 0, 0, 0, 0, 0, 0]),
黑马=np.array([0, -1, 0, 0, 0, 0, 0]), 黑象=np.array([0, 0, -1, 0, 0, 0, 0]),
黑士=np.array([0, 0, 0, -1, 0, 0, 0]), 黑帅=np.array([0, 0, 0, 0, -1, 0, 0]),
黑炮=np.array([0, 0, 0, 0, 0, -1, 0]), 黑兵=np.array([0, 0, 0, 0, 0, 0, -1]),
一一=np.array([0, 0, 0, 0, 0, 0, 0]))
def array2string(array):
return list(filter(lambda string: (string2array[string] == array).all(), string2array))[0]
# 改变棋盘状态
def change_state(state_list, move):
"""move : 字符串'0010'"""
copy_list = copy.deepcopy(state_list)
y, x, toy, tox = int(move[0]), int(move[1]), int(move[2]), int(move[3])
copy_list[toy][tox] = copy_list[y][x]
copy_list[y][x] = '一一'
return copy_list
# 打印盘面,可视化用到
def print_board(_state_array):
# _state_array: [10, 9, 7], HWC
board_line = []
for i in range(10):
for j in range(9):
board_line.append(array2string(_state_array[i][j]))
print(board_line)
board_line.clear()
# 列表棋盘状态到数组棋盘状态
def state_list2state_array(state_list):
_state_array = np.zeros([10, 9, 7])
for i in range(10):
for j in range(9):
_state_array[i][j] = string2array[state_list[i][j]]
return _state_array
# 拿到所有合法走子的集合,2086长度,也就是神经网络预测的走子概率向量的长度
# 第一个字典:move_id到move_action
# 第二个字典:move_action到move_id
# 例如:move_id:0 --> move_action:'0010'
def get_all_legal_moves():
_move_id2move_action = {}
_move_action2move_id = {}
row = ['0', '1', '2', '3', '4', '5', '6', '7', '8']
column = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
# 士的全部走法
advisor_labels = ['0314', '1403', '0514', '1405', '2314', '1423', '2514', '1425',
'9384', '8493', '9584', '8495', '7384', '8473', '7584', '8475']
# 象的全部走法
bishop_labels = ['2002', '0220', '2042', '4220', '0224', '2402', '4224', '2442',
'2406', '0624', '2446', '4624', '0628', '2806', '4628', '2846',
'7052', '5270', '7092', '9270', '5274', '7452', '9274', '7492',
'7456', '5674', '7496', '9674', '5678', '7856', '9678', '7896']
idx = 0
for l1 in range(10):
for n1 in range(9):
destinations = [(t, n1) for t in range(10)] + \
[(l1, t) for t in range(9)] + \
[(l1 + a, n1 + b) for (a, b) in
[(-2, -1), (-1, -2), (-2, 1), (1, -2), (2, -1), (-1, 2), (2, 1), (1, 2)]] # 马走日
for (l2, n2) in destinations:
if (l1, n1) != (l2, n2) and l2 in range(10) and n2 in range(9):
action = column[l1] + row[n1] + column[l2] + row[n2]
_move_id2move_action[idx] = action
_move_action2move_id[action] = idx
idx += 1
for action in advisor_labels:
_move_id2move_action[idx] = action
_move_action2move_id[action] = idx
idx += 1
for action in bishop_labels:
_move_id2move_action[idx] = action
_move_action2move_id[action] = idx
idx += 1
return _move_id2move_action, _move_action2move_id
move_id2move_action, move_action2move_id = get_all_legal_moves()
# 走子翻转的函数,用来扩充我们的数据
def flip_map(string):
new_str = ''
for index in range(4):
if index == 0 or index == 2:
new_str += (str(string[index]))
else:
new_str += (str(8 - int(string[index])))
return new_str
# 边界检查
def check_bounds(toY, toX):
if toY in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] and toX in [0, 1, 2, 3, 4, 5, 6, 7, 8]:
return True
return False
# 不能走到自己的棋子位置
def check_obstruct(piece, current_player_color):
# 当走到的位置存在棋子的时候,进行一次判断
if piece != '一一':
if current_player_color == '红':
if '黑' in piece:
return True
else:
return False
elif current_player_color == '黑':
if '红' in piece:
return True
else:
return False
else:
return True
# 得到当前盘面合法走子集合
# 输入状态队列不能小于10,current_player_color:当前玩家控制的棋子颜色
# 用来存放合法走子的列表,例如[0, 1, 2, 1089, 2085]
def get_legal_moves(state_deque, current_player_color):
"""
====
将
车
====
====
将
车
====
====
将
车
====
====
将
车
====
====
将
车
====
这个时候,车就不能再往右走抓帅
接下来不能走的动作是(1011),因为将会盘面与state_deque[-4]重复
"""
state_list = state_deque[-1]
old_state_list = state_deque[-4]
moves = [] # 用来存放所有合法的走子方法
face_to_face = False # 将军面对面
# 记录将军的位置信息
k_x = None
k_y = None
K_x = None
K_y = None
# state_list是以列表形式表示的, len(state_list) == 10, len(state_list[0]) == 9
# 遍历移动初始位置
for y in range(10):
for x in range(9):
# 只有是棋子才可以移动
if state_list[y][x] == '一一':
pass
else:
if state_list[y][x] == '黑车' and current_player_color == '黑': # 黑车的合法走子
toY = y
for toX in range(x - 1, -1, -1):
# 前面是先前位置,后面是移动后的位置
# 这里通过中断for循环实现了车的走子,车不能越过子
m = str(y) + str(x) + str(toY) + str(toX)
if state_list[toY][toX] != '一一':
if '红' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
if change_state(state_list, m) != old_state_list:
moves.append(m)
for toX in range(x + 1, 9):
m = str(y) + str(x) + str(toY) + str(toX)
if state_list[toY][toX] != '一一':
if '红' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
if change_state(state_list, m) != old_state_list:
moves.append(m)
toX = x
for toY in range(y - 1, -1, -1):
m = str(y) + str(x) + str(toY) + str(toX)
if state_list[toY][toX] != '一一':
if '红' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
if change_state(state_list, m) != old_state_list:
moves.append(m)
for toY in range(y + 1, 10):
m = str(y) + str(x) + str(toY) + str(toX)
if state_list[toY][toX] != '一一':
if '红' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
if change_state(state_list, m) != old_state_list:
moves.append(m)
elif state_list[y][x] == '红车' and current_player_color == '红': # 红车的合法走子
toY = y
for toX in range(x - 1, -1, -1):
# 前面是先前位置,后面是移动后的位置
# 这里通过中断for循环实现了,车不能越过子
m = str(y) + str(x) + str(toY) + str(toX)
if state_list[toY][toX] != '一一':
if '黑' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
if change_state(state_list, m) != old_state_list:
moves.append(m)
for toX in range(x + 1, 9):
m = str(y) + str(x) + str(toY) + str(toX)
if state_list[toY][toX] != '一一':
if '黑' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
if change_state(state_list, m) != old_state_list:
moves.append(m)
toX = x
for toY in range(y - 1, -1, -1):
m = str(y) + str(x) + str(toY) + str(toX)
if state_list[toY][toX] != '一一':
if '黑' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
if change_state(state_list, m) != old_state_list:
moves.append(m)
for toY in range(y + 1, 10):
m = str(y) + str(x) + str(toY) + str(toX)
if state_list[toY][toX] != '一一':
if '黑' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 黑马的合理走法
elif state_list[y][x] == '黑马' and current_player_color == '黑':
for i in range(-1, 3, 2):
for j in range(-1, 3, 2):
toY = y + 2 * i
toX = x + 1 * j
if check_bounds(toY, toX) \
and check_obstruct(state_list[toY][toX], current_player_color='黑') \
and state_list[toY - i][x] == '一一':
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
toY = y + 1 * i
toX = x + 2 * j
if check_bounds(toY, toX) \
and check_obstruct(state_list[toY][toX], current_player_color='黑') \
and state_list[y][toX - j] == '一一':
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 红马的合理走法
elif state_list[y][x] == '红马' and current_player_color == '红':
for i in range(-1, 3, 2):
for j in range(-1, 3, 2):
toY = y + 2 * i
toX = x + 1 * j
if check_bounds(toY, toX) \
and check_obstruct(state_list[toY][toX], current_player_color='红') \
and state_list[toY - i][x] == '一一':
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
toY = y + 1 * i
toX = x + 2 * j
if check_bounds(toY, toX) \
and check_obstruct(state_list[toY][toX], current_player_color='红') \
and state_list[y][toX - j] == '一一':
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 黑象的合理走法
elif state_list[y][x] == '黑象' and current_player_color == '黑':
for i in range(-2, 3, 4):
toY = y + i
toX = x + i
if check_bounds(toY, toX) \
and check_obstruct(state_list[toY][toX], current_player_color='黑') \
and toY >= 5 and state_list[y + i // 2][x + i // 2] == '一一':
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
toY = y + i
toX = x - i
if check_bounds(toY, toX) \
and check_obstruct(state_list[toY][toX], current_player_color='黑') \
and toY >= 5 and state_list[y + i // 2][x - i // 2] == '一一':
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 红象的合理走法
elif state_list[y][x] == '红象' and current_player_color == '红':
for i in range(-2, 3, 4):
toY = y + i
toX = x + i
if check_bounds(toY, toX) \
and check_obstruct(state_list[toY][toX], current_player_color='红') \
and toY <= 4 and state_list[y + i // 2][x + i // 2] == '一一':
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
toY = y + i
toX = x - i
if check_bounds(toY, toX) \
and check_obstruct(state_list[toY][toX], current_player_color='红') \
and toY <= 4 and state_list[y + i // 2][x - i // 2] == '一一':
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 黑士的合理走法
elif state_list[y][x] == '黑士' and current_player_color == '黑':
for i in range(-1, 3, 2):
toY = y + i
toX = x + i
if check_bounds(toY, toX) and check_obstruct(state_list[toY][toX], current_player_color='黑') \
and toY >= 7 and 3 <= toX <= 5:
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
toY = y + i
toX = x - i
if check_bounds(toY, toX) and check_obstruct(state_list[toY][toX], current_player_color='黑') \
and toY >= 7 and 3 <= toX <= 5:
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 红士的合理走法
elif state_list[y][x] == '红士' and current_player_color == '红':
for i in range(-1, 3, 2):
toY = y + i
toX = x + i
if check_bounds(toY, toX) and check_obstruct(state_list[toY][toX], current_player_color='红') \
and toY <= 2 and 3 <= toX <= 5:
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
toY = y + i
toX = x - i
if check_bounds(toY, toX) and check_obstruct(state_list[toY][toX], current_player_color='红') \
and toY <= 2 and 3 <= toX <= 5:
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 黑帅的合理走法
elif state_list[y][x] == '黑帅':
k_x = x
k_y = y
if current_player_color == '黑':
for i in range(2):
for sign in range(-1, 2, 2):
j = 1 - i
toY = y + i * sign
toX = x + j * sign
if check_bounds(toY, toX) and check_obstruct(
state_list[toY][toX], current_player_color='黑') and toY >= 7 and 3 <= toX <= 5:
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 红帅的合理走法
elif state_list[y][x] == '红帅':
K_x = x
K_y = y
if current_player_color == '红':
for i in range(2):
for sign in range(-1, 2, 2):
j = 1 - i
toY = y + i * sign
toX = x + j * sign
if check_bounds(toY, toX) and check_obstruct(
state_list[toY][toX], current_player_color='红') and toY <= 2 and 3 <= toX <= 5:
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 黑炮的合理走法
elif state_list[y][x] == '黑炮' and current_player_color == '黑':
toY = y
hits = False
for toX in range(x - 1, -1, -1):
m = str(y) + str(x) + str(toY) + str(toX)
if hits is False:
if state_list[toY][toX] != '一一':
hits = True
else:
if change_state(state_list, m) != old_state_list:
moves.append(m)
else:
if state_list[toY][toX] != '一一':
if '红' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
hits = False
for toX in range(x + 1, 9):
m = str(y) + str(x) + str(toY) + str(toX)
if hits is False:
if state_list[toY][toX] != '一一':
hits = True
else:
if change_state(state_list, m) != old_state_list:
moves.append(m)
else:
if state_list[toY][toX] != '一一':
if '红' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
toX = x
hits = False
for toY in range(y - 1, -1, -1):
m = str(y) + str(x) + str(toY) + str(toX)
if hits is False:
if state_list[toY][toX] != '一一':
hits = True
else:
if change_state(state_list, m) != old_state_list:
moves.append(m)
else:
if state_list[toY][toX] != '一一':
if '红' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
hits = False
for toY in range(y + 1, 10):
m = str(y) + str(x) + str(toY) + str(toX)
if hits is False:
if state_list[toY][toX] != '一一':
hits = True
else:
if change_state(state_list, m) != old_state_list:
moves.append(m)
else:
if state_list[toY][toX] != '一一':
if '红' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
# 红炮的合理走法
elif state_list[y][x] == '红炮' and current_player_color == '红':
toY = y
hits = False
for toX in range(x - 1, -1, -1):
m = str(y) + str(x) + str(toY) + str(toX)
if hits is False:
if state_list[toY][toX] != '一一':
hits = True
else:
if change_state(state_list, m) != old_state_list:
moves.append(m)
else:
if state_list[toY][toX] != '一一':
if '黑' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
hits = False
for toX in range(x + 1, 9):
m = str(y) + str(x) + str(toY) + str(toX)
if hits is False:
if state_list[toY][toX] != '一一':
hits = True
else:
if change_state(state_list, m) != old_state_list:
moves.append(m)
else:
if state_list[toY][toX] != '一一':
if '黑' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
toX = x
hits = False
for toY in range(y - 1, -1, -1):
m = str(y) + str(x) + str(toY) + str(toX)
if hits is False:
if state_list[toY][toX] != '一一':
hits = True
else:
if change_state(state_list, m) != old_state_list:
moves.append(m)
else:
if state_list[toY][toX] != '一一':
if '黑' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
hits = False
for toY in range(y + 1, 10):
m = str(y) + str(x) + str(toY) + str(toX)
if hits is False:
if state_list[toY][toX] != '一一':
hits = True
else:
if change_state(state_list, m) != old_state_list:
moves.append(m)
else:
if state_list[toY][toX] != '一一':
if '黑' in state_list[toY][toX]:
if change_state(state_list, m) != old_state_list:
moves.append(m)
break
# 黑兵的合法走子
elif state_list[y][x] == '黑兵' and current_player_color == '黑':
toY = y - 1
toX = x
if check_bounds(toY, toX) and check_obstruct(state_list[toY][toX], current_player_color='黑'):
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 小兵过河
if y < 5:
toY = y
toX = x + 1
if check_bounds(toY, toX) and check_obstruct(state_list[toY][toX], current_player_color='黑'):
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
toX = x - 1
if check_bounds(toY, toX) and check_obstruct(state_list[toY][toX], current_player_color='黑'):
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 红兵的合法走子
elif state_list[y][x] == '红兵' and current_player_color == '红':
toY = y + 1
toX = x
if check_bounds(toY, toX) and check_obstruct(state_list[toY][toX], current_player_color='红'):
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
# 小兵过河
if y > 4:
toY = y
toX = x + 1
if check_bounds(toY, toX) and check_obstruct(state_list[toY][toX], current_player_color='红'):
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
toX = x - 1
if check_bounds(toY, toX) and check_obstruct(state_list[toY][toX], current_player_color='红'):
m = str(y) + str(x) + str(toY) + str(toX)
if change_state(state_list, m) != old_state_list:
moves.append(m)
if K_x is not None and k_x is not None and K_x == k_x:
face_to_face = True
for i in range(K_y + 1, k_y, 1):
if state_list[i][K_x] != '一一':
face_to_face = False
if face_to_face is True:
if current_player_color == '黑':
m = str(k_y) + str(k_x) + str(K_y) + str(K_x)
if change_state(state_list, m) != old_state_list:
moves.append(m)
else:
m = str(K_y) + str(K_x) + str(k_y) + str(k_x)
if change_state(state_list, m) != old_state_list:
moves.append(m)
moves_id = []
for move in moves:
moves_id.append(move_action2move_id[move])
return moves_id
# 棋盘逻辑控制
class Board(object):
def __init__(self):
self.state_list = copy.deepcopy(state_list_init)
self.game_start = False
self.winner = None
self.state_deque = copy.deepcopy(state_deque_init)
# 初始化棋盘的方法
def init_board(self, start_player=1): # 传入先手玩家的id
# 增加一个颜色到id的映射字典,id到颜色的映射字典
# 永远是红方先移动
self.start_player = start_player
if start_player == 1:
self.id2color = {1: '红', 2: '黑'}
self.color2id = {'红': 1, '黑': 2}
self.backhand_player = 2
elif start_player == 2:
self.id2color = {2: '红', 1: '黑'}
self.color2id = {'红': 2, '黑': 1}
self.backhand_player = 1
# 当前手玩家,也就是先手玩家
self.current_player_color = self.id2color[start_player] # 红
self.current_player_id = self.color2id['红']
# 初始化棋盘状态
self.state_list = copy.deepcopy(state_list_init)
self.state_deque = copy.deepcopy(state_deque_init)
# 初始化最后落子位置
self.last_move = -1
# 记录游戏中吃子的回合数
self.kill_action = 0
self.game_start = False
self.action_count = 0 # 游戏动作计数器
self.winner = None
@property
# 获的当前盘面的所有合法走子集合
def availables(self):
return get_legal_moves(self.state_deque, self.current_player_color)
# 从当前玩家的视角返回棋盘状态,current_state_array: [9, 10, 9] CHW
def current_state(self):
_current_state = np.zeros([9, 10, 9])
# 使用9个平面来表示棋盘状态
# 0-6个平面表示棋子位置,1代表红方棋子,-1代表黑方棋子, 队列最后一个盘面
# 第7个平面表示对手player最近一步的落子位置,走子之前的位置为-1,走子之后的位置为1,其余全部是0
# 第8个平面表示的是当前player是不是先手player,如果是先手player则整个平面全部为1,否则全部为0
_current_state[:7] = state_list2state_array(self.state_deque[-1]).transpose([2, 0, 1]) # [7, 10, 9]
if self.game_start:
# 解构self.last_move
move = move_id2move_action[self.last_move]
start_position = int(move[0]), int(move[1])
end_position = int(move[2]), int(move[3])
_current_state[7][start_position[0]][start_position[1]] = -1
_current_state[7][end_position[0]][end_position[1]] = 1
# 指出当前是哪个玩家走子
if self.action_count % 2 == 0:
_current_state[8][:, :] = 1.0
return _current_state
# 根据move对棋盘状态做出改变
def do_move(self, move):
self.game_start = True # 游戏开始
self.action_count += 1 # 移动次数加1
move_action = move_id2move_action[move]
start_y, start_x = int(move_action[0]), int(move_action[1])
end_y, end_x = int(move_action[2]), int(move_action[3])
state_list = copy.deepcopy(self.state_deque[-1])
# 判断是否吃子
if state_list[end_y][end_x] != '一一':
# 如果吃掉对方的帅,则返回当前的current_player胜利
self.kill_action = 0
if self.current_player_color == '黑' and state_list[end_y][end_x] == '红帅':
self.winner = self.color2id['黑']
elif self.current_player_color == '红' and state_list[end_y][end_x] == '黑帅':
self.winner = self.color2id['红']
else:
self.kill_action += 1
# 更改棋盘状态
state_list[end_y][end_x] = state_list[start_y][start_x]
state_list[start_y][start_x] = '一一'
self.current_player_color = '黑' if self.current_player_color == '红' else '红' # 改变当前玩家
self.current_player_id = 1 if self.current_player_id == 2 else 2
# 记录最后一次移动的位置
self.last_move = move
self.state_deque.append(state_list)
# 是否产生赢家
def has_a_winner(self):
"""一共有三种状态,红方胜,黑方胜,平局"""
if self.winner is not None:
return True, self.winner
elif self.kill_action >= CONFIG['kill_action']: # 平局先手判负
# return False, -1
return True, self.backhand_player
return False, -1
# 检查当前棋局是否结束
def game_end(self):
win, winner = self.has_a_winner()
if win:
return True, winner
elif self.kill_action >= CONFIG['kill_action']: # 平局,没有赢家
return True, -1
return False, -1
def get_current_player_color(self):
return self.current_player_color
def get_current_player_id(self):
return self.current_player_id
# 在Board类基础上定义Game类,该类用于启动并控制一整局对局的完整流程,并收集对局过程中的数据,以及进行棋盘的展示
class Game(object):
def __init__(self, board):
self.board = board
# 可视化
def graphic(self, board, player1_color, player2_color):
print('player1 take: ', player1_color)
print('player2 take: ', player2_color)
print_board(state_list2state_array(board.state_deque[-1]))
# 用于人机对战,人人对战等
def start_play(self, player1, player2, start_player=1, is_shown=1):
if start_player not in (1, 2):
raise Exception('start_player should be either 1 (player1 first) '
'or 2 (player2 first)')
self.board.init_board(start_player) # 初始化棋盘
p1, p2 = 1, 2
player1.set_player_ind(1)
player2.set_player_ind(2)
players = {p1: player1, p2: player2}
if is_shown:
self.graphic(self.board, player1.player, player2.player)
while True:
current_player = self.board.get_current_player_id() # 红子对应的玩家id
player_in_turn = players[current_player] # 决定当前玩家的代理
move = player_in_turn.get_action(self.board) # 当前玩家代理拿到动作
self.board.do_move(move) # 棋盘做出改变
if is_shown:
self.graphic(self.board, player1.player, player2.player)
end, winner = self.board.game_end()
if end:
if winner != -1:
print("Game end. Winner is", players[winner])
else:
print("Game end. Tie")
return winner
# 使用蒙特卡洛树搜索开始自我对弈,存储游戏状态(状态,蒙特卡洛落子概率,胜负手)三元组用于神经网络训练
def start_self_play(self, player, is_shown=False, temp=1e-3):
self.board.init_board() # 初始化棋盘, start_player=1
p1, p2 = 1, 2
states, mcts_probs, current_players = [], [], []
# 开始自我对弈
_count = 0
while True:
_count += 1
if _count % 20 == 0:
start_time = time.time()
move, move_probs = player.get_action(self.board,
temp=temp,
return_prob=1)
print('走一步要花: ', time.time() - start_time)
else:
move, move_probs = player.get_action(self.board,
temp=temp,
return_prob=1)
# 保存自我对弈的数据
states.append(self.board.current_state())
mcts_probs.append(move_probs)
current_players.append(self.board.current_player_id)
# 执行一步落子
self.board.do_move(move)
end, winner = self.board.game_end()
if end:
# 从每一个状态state对应的玩家的视角保存胜负信息
winner_z = np.zeros(len(current_players))
if winner != -1:
winner_z[np.array(current_players) == winner] = 1.0
winner_z[np.array(current_players) != winner] = -1.0
# 重置蒙特卡洛根节点
player.reset_player()
if is_shown:
if winner != -1:
print("Game end. Winner is:", winner)
else:
print('Game end. Tie')
return winner, zip(states, mcts_probs, winner_z)
if __name__ == '__main__':
# 测试array2string
# _array = np.array([0, 0, 0, 0, 0, 0, 0])
# print(array2num(_array))
"""# 测试change_state
new_state = change_state(state_list_init, move='0010')
for row in range(10):
print(new_state[row])"""
"""# 测试print_board
_state_list = copy.deepcopy(state_list_init)
print_board(state_list2state_array(_state_list))"""
"""# 测试get_legal_moves
moves = get_legal_moves(state_deque_init, current_player_color='黑')
move_actions = []
for item in moves:
move_actions.append(move_id2move_action[item])
print(move_actions)"""
# 测试Board中的start_play
# class Human1:
# def get_action(self, board):
# # print('当前是player1在操作')
# # print(board.current_player_color)
# # move = move_action2move_id[input('请输入')]
# move = random.choice(board.availables)
# return move
#
# def set_player_ind(self, p):
# self.player = p
#
#
# class Human2:
# def get_action(self, board):
# # print('当前是player2在操作')
# # print(board.current_player_color)
# # move = move_action2move_id[input('请输入')]
# move = random.choice(board.availables)
# return move
#
# def set_player_ind(self, p):
# self.player = p
#
# human1 = Human1()
# human2 = Human2()
# game = Game(board=Board())
# for i in range(20):
# game.start_play(human1, human2, start_player=2, is_shown=0)
board = Board()
board.init_board()