Skip to content

Commit

Permalink
added phyre agent
Browse files Browse the repository at this point in the history
Cheng-Xue committed Dec 18, 2021
1 parent d061898 commit f9da89e
Showing 51 changed files with 5,254 additions and 61 deletions.
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -7,3 +7,14 @@ buildgame/Linux/
tasks/generated_tasks/
sciencebirdsgames/Linux/
*.pyc
sciencebirdsagents/PhyreAgents/data/

sciencebirdsagents/__pycache__/

sciencebirdsagents/Client/__pycache__/

sciencebirdsagents/final_run/

sciencebirdsagents/HeuristicAgents/__pycache__/

sciencebirdsagents/LearningAgents/__pycache__/
157 changes: 119 additions & 38 deletions sciencebirdsagents/HeuristicAgents/CollectionAgentThread.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
import csv
import cv2
import json
import numpy as np
import os
import threading
import time
from typing import List

import hickle
from HeuristicAgents.CollectionAgent import CollectionAgent
from SBEnvironment.SBEnvironmentWrapper import SBEnvironmentWrapper
import os
import csv
import cv2
import json
from StateReader.SymbolicStateDevReader import SymbolicStateDevReader


class AgentThread(threading.Thread):
def __init__(self, agent: CollectionAgent, env: SBEnvironmentWrapper, lock: threading.Lock, mode='train',
simulation_speed=100):
simulation_speed=100, saving_path='PhyreStyleTrainingData'):
self.result = None
threading.Thread.__init__(self)
self.agent = agent
self.env = env
self.mode = mode
self.lock = lock
self.simulation_speed = simulation_speed
self.saving_path = 'PhyreStyleTrainingData'
self.saving_path = saving_path
self.model = np.loadtxt("Utils/model", delimiter=",")
self.target_class = list(map(lambda x: x.replace("\n", ""), open('Utils/target_class').readlines()))

def save_local(self, s0, s0_image, action, if_win, attempts, obj_movements, game_level_idx, template):
if not os.path.exists(self.saving_path):
@@ -31,16 +37,19 @@ def save_local(self, s0, s0_image, action, if_win, attempts, obj_movements, game
if not os.path.exists(game_level_save_path):
os.mkdir(game_level_save_path)

state_path = os.path.join(os.path.join(self.saving_path, template), "{}_{}_state.pt".format(template, game_level_idx))
state_path = os.path.join(os.path.join(self.saving_path, template),
"{}_{}_state.pt".format(template, game_level_idx))
if not os.path.exists(state_path):
with open(state_path, 'w') as f:
json.dump(s0, f)
image_path = os.path.join(os.path.join(self.saving_path, template), "{}_{}_image.jpg".format(template, game_level_idx))
image_path = os.path.join(os.path.join(self.saving_path, template),
"{}_{}_image.jpg".format(template, game_level_idx))
if not os.path.exists(image_path):
s0_image = cv2.cvtColor(s0_image, cv2.COLOR_RGB2BGR)
cv2.imwrite(image_path, s0_image)

obj_movements_path = os.path.join(game_level_save_path, "{}_{}_{}_{}".format(template, game_level_idx, str(action), if_win))
obj_movements_path = os.path.join(game_level_save_path,
"{}_{}_{}_{}".format(template, game_level_idx, str(action), if_win))
with open(obj_movements_path, 'w') as f:
json.dump(obj_movements, f)

@@ -71,32 +80,104 @@ def convert_bts_to_obj_movements(self, bt_gts):
return obj_dict

def run(self):
if self.mode == 'train':
self.env.make(agent=self.agent, start_level=self.agent.level_list[0],
state_representation_type='symbolic')
s, r, is_done, info = self.env.reset()
while True:
while not is_done:
s0 = s
s0_image = self.agent.ar.do_screenshot()
save_prefix = self.agent.template + "_" + str(self.env.current_level)
state_name = save_prefix + "_state"
action = self.agent.select_action(s)
s, _, is_done, info = self.env.step(action, batch_gt=True)
did_win = info[0]
batch_gts = info[2]
total_score = info[1]
self.agent.update_score(self.env.current_level, total_score, did_win)
self.agent.update_episode_rewards(self.env.current_level, total_score)
self.agent.update_winning(self.env.current_level, did_win)
attempts = self.agent.total_score_record[self.env.current_level]['attempts']
obj_movements = self.convert_bts_to_obj_movements(batch_gts)
with self.lock:
self.save_local(s0, s0_image, action, did_win, attempts, obj_movements, self.env.current_level, self.agent.template)
self.env.current_level = self.agent.select_level()
if not self.env.current_level: # that's when all the levels has been played.
return
s, r, is_done, info = self.env.reload_current_level()

self.env.make(agent=self.agent, start_level=self.agent.level_list[0],
state_representation_type='symbolic')
s, r, is_done, info = self.env.reset()
while True:
while not is_done:
s0 = s
s0_image = self.agent.ar.do_screenshot()
save_prefix = self.agent.template + "_" + str(self.env.current_level)
state_name = save_prefix + "_state"
action, deg = self.agent.select_action(s)
s, _, is_done, info = self.env.step(action, batch_gt=True)
did_win = info[0]
batch_gts = info[2]
total_score = info[1]
self.agent.update_score(self.env.current_level, total_score, did_win)
self.agent.update_episode_rewards(self.env.current_level, total_score)
self.agent.update_winning(self.env.current_level, did_win)
attempts = self.agent.total_score_record[self.env.current_level]['attempts']

full_image, boxes, masks = self.process_batch_gts(batch_gts)
save_path = f'{self.saving_path}/{self.agent.template}/{self.env.current_level}'
os.makedirs(save_path, exist_ok=True)
with self.lock:
# save bounding boxes
hickle.dump(full_image, f'{save_path}/{deg:.4f}_image.hkl', mode='w', compression='gzip')
hickle.dump(int(did_win), f'{save_path}/{deg:.4f}_label.hkl', mode='w',
compression='gzip')
hickle.dump(boxes, f'{save_path}/{deg:.4f}_boxes.hkl', mode='w', compression='gzip')
hickle.dump(masks, f'{save_path}/{deg:.4f}_masks.hkl', mode='w', compression='gzip')

self.env.current_level = self.agent.select_level()

if not self.env.current_level: # that's when all the levels has been played.
return
s, r, is_done, info = self.env.reload_current_level()

def process_batch_gts(self, batch_gts):

'''
batch_image: n x h x w array, with each type of object occupys a number
boxes: n x n_obj x 6 ([o_id, x1, y1, x2, y2, if_destroyed])
masks: n x n_obj x h_mask x w_mask
'''
input_w = 160
input_h = 120
im_width = 640
im_height = 480
mask_size = 21

full_image = np.zeros((len(batch_gts), input_h, input_w))
full_objs = []
for i, gt in enumerate(batch_gts):
symbolic_state_reader = SymbolicStateDevReader(gt, self.model, self.target_class)
image, obj_ids = symbolic_state_reader.get_symbolic_image_flat(input_h, input_w)
full_image[i] = image
full_objs.append(obj_ids)

all_ids = set()
for objs_t in full_objs:
[all_ids.add(obj) for obj in objs_t.keys()]

boxes = np.zeros((len(batch_gts), len(all_ids), 6))
masks = np.zeros((len(batch_gts), len(all_ids), mask_size, mask_size))
for t, objs in enumerate(full_objs):
for id_ind, id in enumerate(all_ids):
if id in objs:
top_left_x, top_left_y = objs[id].top_left
bottom_right_x, bottom_right_y = objs[id].bottom_right



top_left_x *= (input_w-1) / (im_width-1)
bottom_right_x *= (input_w-1) / (im_width-1)
top_left_y *= (input_h-1) / (im_height-1)
bottom_right_y *= (input_h-1) / (im_height-1)

top_left_x = max(top_left_x, 0)
bottom_right_x = min(bottom_right_x, input_w-1)
top_left_y = max(top_left_y, 0)
bottom_right_y = min(bottom_right_y, input_h-1)

boxes[t, id_ind] = [id_ind, top_left_x, top_left_y, bottom_right_x, bottom_right_y, 1]
mask_im = np.zeros((input_h,input_w))

for x in range(np.int(top_left_x), int(np.ceil(bottom_right_x))):
for y in range(np.int(top_left_y), int(np.ceil(bottom_right_y))):
mask_im[y,x] = 1

masks[t, id_ind] = cv2.resize(mask_im, (mask_size, mask_size)) >= 0.5


else:
boxes[t, id_ind] = [id_ind, -1, -1, -1, -1, 0]
mask_im = np.zeros((input_h,input_w))
masks[t, id_ind] = cv2.resize(mask_im, (mask_size, mask_size)) >= 0.5

return full_image, boxes, masks


# Multithread .agents manager
@@ -111,13 +192,13 @@ def __init__(self, agents: List[CollectionAgent], simulation_speed=100):
# Connects agents to the SB games and starts training
# at the moment agents will connect to each level 1 by 1
# i.e. agent 1 will correspond to level 1, agent 2 to level 2, etc
def connect_and_run_agents(self, mode='train'):
def connect_and_run_agents(self, saving_path='PhyreStyleTrainingData', mode='train'):
agents_threads = []
try:
for i in range(1, len(self.agents) + 1):
print('agent %s running' % str(i))
agent = AgentThread(self.agents[i - 1], self.agents[i - 1].env, self.lock, mode=mode,
simulation_speed=self.simulation_speed)
simulation_speed=self.simulation_speed, saving_path=saving_path)
agent.start()
agents_threads.append(agent)
time.sleep(2)
21 changes: 18 additions & 3 deletions sciencebirdsagents/HeuristicAgents/RandomAgent.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import random

import numpy as np
from SBAgent import SBAgent
from SBEnvironment.SBEnvironmentWrapper import SBEnvironmentWrapper

import torch

class RandomAgent(SBAgent):
def __init__(self, env: SBEnvironmentWrapper, level_selection_function, id: int = 28888, level_list: list = [], ):
def __init__(self, env: SBEnvironmentWrapper, level_selection_function, id: int = 28888, level_list: list = [],
degree_range=None, ):
SBAgent.__init__(self, level_list=level_list, env=env, id=id)
# initialise a record of the levels to the agent

@@ -15,6 +16,7 @@ def __init__(self, env: SBEnvironmentWrapper, level_selection_function, id: int
self.state_representation_type = 'symbolic'
self.episode_rewards = {}
self.did_win = {}
self.degree_range = degree_range

def select_level(self):
# you can choose to implement this by yourself, or just get it from the LevelSelectionSchema
@@ -23,6 +25,9 @@ def select_level(self):

def select_action(self, state, mode=None):
shot = [random.randint(-200, -10), random.randint(-200, 200), random.randint(50, 80)]
if self.degree_range:
deg = np.random.rand() * (self.degree_range[1] - self.degree_range[0]) + self.degree_range[0]
return self.__degToShot(deg), deg
return shot

def update_episode_rewards(self, current_level, eps_reward):
@@ -36,3 +41,13 @@ def update_winning(self, current_level, did_win):
self.did_win[current_level] = [did_win]
else:
self.did_win[current_level].append(did_win)

def __degToShot(self, deg):
# deg = torch.argmax(q_values, 1) + 90
deg = torch.tensor(deg + 90)
ax_pixels = 200 * torch.cos(torch.deg2rad(deg)).view(-1, 1)
ay_pixels = 200 * torch.sin(torch.deg2rad(deg)).view(-1, 1)
out = torch.cat((ax_pixels, ay_pixels), 1)
if out.size(0) == 1:
return out[0]
return out
51 changes: 51 additions & 0 deletions sciencebirdsagents/PhyreAgents/RPIN/configs/rpcin_cross_plan.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
OUTPUT_DIR: ./outputs/phys
DATA_ROOT: 'data/planning/'
DATASET_ABS: 'PHYREO' # online version of PHYRE data loader
PHYRE_PROTOCAL: 'cross'
PHYRE_FOLD: 0
SOLVER:
BASE_LR: 2.0e-4 # single GPU LR
WEIGHT_DECAY: 3.0e-7
SCHEDULER: 'cosine'
MAX_ITERS: 6000000
VAL_INTERVAL: 600000
WARMUP_ITERS: 40000
BATCH_SIZE: 40 # single GPU Batch Size
INPUT:
PRELOAD_TO_MEMORY: False
IMAGE_CHANNEL: 3
RPIN:
BACKBONE: 'hourglass_bn'
ARCH: 'rpcin'
VE_FEAT_DIM: 256
IN_FEAT_DIM: 256
# prediction setting:
INPUT_SIZE: 1
PRED_SIZE_TRAIN: 10
PRED_SIZE_TEST: 10
# input setting:
INPUT_HEIGHT: 128
INPUT_WIDTH: 128
MAX_NUM_OBJS: 6
# data augmentation
HORIZONTAL_FLIP: True
VERTICAL_FLIP: False
# loss weight
POSITION_LOSS_WEIGHT: 1
# ----- RoIPooling Setting
ROI_POOL_SIZE: 5
ROI_POOL_SAMPLE_R: 2
# ----- mask prediction
MASK_LOSS_WEIGHT: 0.003
MASK_SIZE: 21
# ----- Architecture
N_EXTRA_ROI_F: 2
N_EXTRA_PRED_F: 0
N_EXTRA_SELFD_F: 0
N_EXTRA_RELD_F: 0
N_EXTRA_AFFECTOR_F: 0
N_EXTRA_AGGREGATOR_F: 0
EXTRA_F_KERNEL: 3
EXTRA_F_PADDING: 1
IMAGE_EXT: '.npy'
SEQ_CLS_LOSS_WEIGHT: 0.02
50 changes: 50 additions & 0 deletions sciencebirdsagents/PhyreAgents/RPIN/configs/rpcin_cross_pred.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
OUTPUT_DIR: ./outputs/phys
DATA_ROOT: 'data/PHYRE_1fps_p100n400/'
DATASET_ABS: 'PHYRE'
PHYRE_PROTOCAL: 'cross'
PHYRE_FOLD: 0
SOLVER:
BASE_LR: 2.0e-4 # single GPU LR
WEIGHT_DECAY: 3.0e-7
SCHEDULER: 'cosine'
MAX_ITERS: 6000000
VAL_INTERVAL: 6000000
WARMUP_ITERS: 40000
BATCH_SIZE: 40 # single GPU Batch Size
INPUT:
PRELOAD_TO_MEMORY: False
IMAGE_CHANNEL: 3
RPIN:
BACKBONE: 'hourglass_bn'
ARCH: 'rpcin'
VE_FEAT_DIM: 256
IN_FEAT_DIM: 256
# prediction setting:
INPUT_SIZE: 1
PRED_SIZE_TRAIN: 5
PRED_SIZE_TEST: 10
# input setting:
INPUT_HEIGHT: 128
INPUT_WIDTH: 128
MAX_NUM_OBJS: 6
# data augmentation
HORIZONTAL_FLIP: True
VERTICAL_FLIP: False
# loss weight
POSITION_LOSS_WEIGHT: 1
# ----- RoIPooling Setting
ROI_POOL_SIZE: 5
ROI_POOL_SAMPLE_R: 2
# ----- mask prediction
MASK_LOSS_WEIGHT: 0.003
MASK_SIZE: 21
# ----- Architecture
N_EXTRA_ROI_F: 2
N_EXTRA_PRED_F: 0
N_EXTRA_SELFD_F: 0
N_EXTRA_RELD_F: 0
N_EXTRA_AFFECTOR_F: 0
N_EXTRA_AGGREGATOR_F: 0
EXTRA_F_KERNEL: 3
EXTRA_F_PADDING: 1
IMAGE_EXT: '.npy'
Loading

0 comments on commit f9da89e

Please sign in to comment.