added phyre agent

phy-q · Dec 18, 2021 · f9da89e · f9da89e
1 parent d061898
commit f9da89e
Showing 51 changed files with 5,254 additions and 61 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,14 @@ buildgame/Linux/
 tasks/generated_tasks/
 sciencebirdsgames/Linux/
 *.pyc
+sciencebirdsagents/PhyreAgents/data/
+
+sciencebirdsagents/__pycache__/
+
+sciencebirdsagents/Client/__pycache__/
+
+sciencebirdsagents/final_run/
+
+sciencebirdsagents/HeuristicAgents/__pycache__/
+
+sciencebirdsagents/LearningAgents/__pycache__/
diff --git a/sciencebirdsagents/HeuristicAgents/CollectionAgentThread.py b/sciencebirdsagents/HeuristicAgents/CollectionAgentThread.py
@@ -1,24 +1,30 @@
+import csv
+import cv2
+import json
+import numpy as np
+import os
 import threading
 import time
 from typing import List
-
+import hickle
 from HeuristicAgents.CollectionAgent import CollectionAgent
 from SBEnvironment.SBEnvironmentWrapper import SBEnvironmentWrapper
-import os
-import csv
-import cv2
-import json
+from StateReader.SymbolicStateDevReader import SymbolicStateDevReader
+
+
 class AgentThread(threading.Thread):
     def __init__(self, agent: CollectionAgent, env: SBEnvironmentWrapper, lock: threading.Lock, mode='train',
-                 simulation_speed=100):
+                 simulation_speed=100, saving_path='PhyreStyleTrainingData'):
         self.result = None
         threading.Thread.__init__(self)
         self.agent = agent
         self.env = env
         self.mode = mode
         self.lock = lock
         self.simulation_speed = simulation_speed
-        self.saving_path = 'PhyreStyleTrainingData'
+        self.saving_path = saving_path
+        self.model = np.loadtxt("Utils/model", delimiter=",")
+        self.target_class = list(map(lambda x: x.replace("\n", ""), open('Utils/target_class').readlines()))
 
     def save_local(self, s0, s0_image, action, if_win, attempts, obj_movements, game_level_idx, template):
         if not os.path.exists(self.saving_path):
@@ -31,16 +37,19 @@ def save_local(self, s0, s0_image, action, if_win, attempts, obj_movements, game
         if not os.path.exists(game_level_save_path):
             os.mkdir(game_level_save_path)
 
-        state_path = os.path.join(os.path.join(self.saving_path, template), "{}_{}_state.pt".format(template, game_level_idx))
+        state_path = os.path.join(os.path.join(self.saving_path, template),
+                                  "{}_{}_state.pt".format(template, game_level_idx))
         if not os.path.exists(state_path):
             with open(state_path, 'w') as f:
                 json.dump(s0, f)
-        image_path = os.path.join(os.path.join(self.saving_path, template), "{}_{}_image.jpg".format(template, game_level_idx))
+        image_path = os.path.join(os.path.join(self.saving_path, template),
+                                  "{}_{}_image.jpg".format(template, game_level_idx))
         if not os.path.exists(image_path):
             s0_image = cv2.cvtColor(s0_image, cv2.COLOR_RGB2BGR)
             cv2.imwrite(image_path, s0_image)
 
-        obj_movements_path = os.path.join(game_level_save_path, "{}_{}_{}_{}".format(template, game_level_idx, str(action), if_win))
+        obj_movements_path = os.path.join(game_level_save_path,
+                                          "{}_{}_{}_{}".format(template, game_level_idx, str(action), if_win))
         with open(obj_movements_path, 'w') as f:
             json.dump(obj_movements, f)
 
@@ -71,32 +80,104 @@ def convert_bts_to_obj_movements(self, bt_gts):
         return obj_dict
 
     def run(self):
-        if self.mode == 'train':
-            self.env.make(agent=self.agent, start_level=self.agent.level_list[0],
-                          state_representation_type='symbolic')
-            s, r, is_done, info = self.env.reset()
-            while True:
-                while not is_done:
-                    s0 = s
-                    s0_image = self.agent.ar.do_screenshot()
-                    save_prefix = self.agent.template + "_" + str(self.env.current_level)
-                    state_name = save_prefix + "_state"
-                    action = self.agent.select_action(s)
-                    s, _, is_done, info = self.env.step(action, batch_gt=True)
-                did_win = info[0]
-                batch_gts = info[2]
-                total_score = info[1]
-                self.agent.update_score(self.env.current_level, total_score, did_win)
-                self.agent.update_episode_rewards(self.env.current_level, total_score)
-                self.agent.update_winning(self.env.current_level, did_win)
-                attempts = self.agent.total_score_record[self.env.current_level]['attempts']
-                obj_movements = self.convert_bts_to_obj_movements(batch_gts)
-                with self.lock:
-                    self.save_local(s0, s0_image, action, did_win, attempts, obj_movements, self.env.current_level, self.agent.template)
-                self.env.current_level = self.agent.select_level()
-                if not self.env.current_level:  # that's when all the levels has been played.
-                    return
-                s, r, is_done, info = self.env.reload_current_level()
+
+        self.env.make(agent=self.agent, start_level=self.agent.level_list[0],
+                      state_representation_type='symbolic')
+        s, r, is_done, info = self.env.reset()
+        while True:
+            while not is_done:
+                s0 = s
+                s0_image = self.agent.ar.do_screenshot()
+                save_prefix = self.agent.template + "_" + str(self.env.current_level)
+                state_name = save_prefix + "_state"
+                action, deg = self.agent.select_action(s)
+                s, _, is_done, info = self.env.step(action, batch_gt=True)
+            did_win = info[0]
+            batch_gts = info[2]
+            total_score = info[1]
+            self.agent.update_score(self.env.current_level, total_score, did_win)
+            self.agent.update_episode_rewards(self.env.current_level, total_score)
+            self.agent.update_winning(self.env.current_level, did_win)
+            attempts = self.agent.total_score_record[self.env.current_level]['attempts']
+
+            full_image, boxes, masks = self.process_batch_gts(batch_gts)
+            save_path = f'{self.saving_path}/{self.agent.template}/{self.env.current_level}'
+            os.makedirs(save_path, exist_ok=True)
+            with self.lock:
+                # save bounding boxes
+                hickle.dump(full_image, f'{save_path}/{deg:.4f}_image.hkl', mode='w', compression='gzip')
+                hickle.dump(int(did_win), f'{save_path}/{deg:.4f}_label.hkl', mode='w',
+                            compression='gzip')
+                hickle.dump(boxes, f'{save_path}/{deg:.4f}_boxes.hkl', mode='w', compression='gzip')
+                hickle.dump(masks, f'{save_path}/{deg:.4f}_masks.hkl', mode='w', compression='gzip')
+
+            self.env.current_level = self.agent.select_level()
+
+            if not self.env.current_level:  # that's when all the levels has been played.
+                return
+            s, r, is_done, info = self.env.reload_current_level()
+
+    def process_batch_gts(self, batch_gts):
+
+        '''
+        batch_image: n x h x w array, with each type of object occupys a number
+        boxes: n x n_obj x 6 ([o_id, x1, y1, x2, y2, if_destroyed])
+        masks: n x n_obj x h_mask x w_mask
+        '''
+        input_w = 160
+        input_h = 120
+        im_width = 640
+        im_height = 480
+        mask_size = 21
+
+        full_image = np.zeros((len(batch_gts), input_h, input_w))
+        full_objs = []
+        for i, gt in enumerate(batch_gts):
+            symbolic_state_reader = SymbolicStateDevReader(gt, self.model, self.target_class)
+            image, obj_ids = symbolic_state_reader.get_symbolic_image_flat(input_h, input_w)
+            full_image[i] = image
+            full_objs.append(obj_ids)
+
+        all_ids = set()
+        for objs_t in full_objs:
+            [all_ids.add(obj) for obj in objs_t.keys()]
+
+        boxes = np.zeros((len(batch_gts), len(all_ids), 6))
+        masks = np.zeros((len(batch_gts), len(all_ids), mask_size, mask_size))
+        for t, objs in enumerate(full_objs):
+            for id_ind, id in enumerate(all_ids):
+                if id in objs:
+                    top_left_x, top_left_y = objs[id].top_left
+                    bottom_right_x, bottom_right_y = objs[id].bottom_right
+
+
+
+                    top_left_x *= (input_w-1) / (im_width-1)
+                    bottom_right_x *= (input_w-1) / (im_width-1)
+                    top_left_y *= (input_h-1) / (im_height-1)
+                    bottom_right_y *= (input_h-1) / (im_height-1)
+
+                    top_left_x = max(top_left_x, 0)
+                    bottom_right_x = min(bottom_right_x, input_w-1)
+                    top_left_y = max(top_left_y, 0)
+                    bottom_right_y = min(bottom_right_y, input_h-1)
+
+                    boxes[t, id_ind] = [id_ind, top_left_x, top_left_y, bottom_right_x, bottom_right_y, 1]
+                    mask_im = np.zeros((input_h,input_w))
+
+                    for x in range(np.int(top_left_x), int(np.ceil(bottom_right_x))):
+                        for y in range(np.int(top_left_y), int(np.ceil(bottom_right_y))):
+                            mask_im[y,x] = 1
+
+                    masks[t, id_ind] = cv2.resize(mask_im, (mask_size, mask_size)) >= 0.5
+
+
+                else:
+                    boxes[t, id_ind] = [id_ind, -1, -1, -1, -1, 0]
+                    mask_im = np.zeros((input_h,input_w))
+                    masks[t, id_ind] = cv2.resize(mask_im, (mask_size, mask_size)) >= 0.5
+
+        return full_image, boxes, masks
 
 
 # Multithread .agents manager
@@ -111,13 +192,13 @@ def __init__(self, agents: List[CollectionAgent], simulation_speed=100):
     # Connects agents to the SB games and starts training
     # at the moment agents will connect to each level 1 by 1
     # i.e. agent 1 will correspond to level 1, agent 2 to level 2, etc
-    def connect_and_run_agents(self, mode='train'):
+    def connect_and_run_agents(self, saving_path='PhyreStyleTrainingData', mode='train'):
         agents_threads = []
         try:
             for i in range(1, len(self.agents) + 1):
                 print('agent %s running' % str(i))
                 agent = AgentThread(self.agents[i - 1], self.agents[i - 1].env, self.lock, mode=mode,
-                                    simulation_speed=self.simulation_speed)
+                                    simulation_speed=self.simulation_speed, saving_path=saving_path)
                 agent.start()
                 agents_threads.append(agent)
                 time.sleep(2)

diff --git a/sciencebirdsagents/HeuristicAgents/RandomAgent.py b/sciencebirdsagents/HeuristicAgents/RandomAgent.py
@@ -1,11 +1,12 @@
 import random
-
+import numpy as np
 from SBAgent import SBAgent
 from SBEnvironment.SBEnvironmentWrapper import SBEnvironmentWrapper
-
+import torch
 
 class RandomAgent(SBAgent):
-    def __init__(self, env: SBEnvironmentWrapper, level_selection_function, id: int = 28888, level_list: list = [], ):
+    def __init__(self, env: SBEnvironmentWrapper, level_selection_function, id: int = 28888, level_list: list = [],
+                 degree_range=None, ):
         SBAgent.__init__(self, level_list=level_list, env=env, id=id)
         # initialise a record of the levels to the agent
 
@@ -15,6 +16,7 @@ def __init__(self, env: SBEnvironmentWrapper, level_selection_function, id: int
         self.state_representation_type = 'symbolic'
         self.episode_rewards = {}
         self.did_win = {}
+        self.degree_range = degree_range
 
     def select_level(self):
         # you can choose to implement this by yourself, or just get it from the LevelSelectionSchema
@@ -23,6 +25,9 @@ def select_level(self):
 
     def select_action(self, state, mode=None):
         shot = [random.randint(-200, -10), random.randint(-200, 200), random.randint(50, 80)]
+        if self.degree_range:
+            deg = np.random.rand() * (self.degree_range[1] - self.degree_range[0]) + self.degree_range[0]
+            return self.__degToShot(deg), deg
         return shot
 
     def update_episode_rewards(self, current_level, eps_reward):
@@ -36,3 +41,13 @@ def update_winning(self, current_level, did_win):
             self.did_win[current_level] = [did_win]
         else:
             self.did_win[current_level].append(did_win)
+
+    def __degToShot(self, deg):
+        # deg = torch.argmax(q_values, 1) + 90
+        deg = torch.tensor(deg + 90)
+        ax_pixels = 200 * torch.cos(torch.deg2rad(deg)).view(-1, 1)
+        ay_pixels = 200 * torch.sin(torch.deg2rad(deg)).view(-1, 1)
+        out = torch.cat((ax_pixels, ay_pixels), 1)
+        if out.size(0) == 1:
+            return out[0]
+        return out
diff --git a/sciencebirdsagents/PhyreAgents/RPIN/configs/rpcin_cross_plan.yaml b/sciencebirdsagents/PhyreAgents/RPIN/configs/rpcin_cross_plan.yaml
@@ -0,0 +1,51 @@
+OUTPUT_DIR: ./outputs/phys
+DATA_ROOT: 'data/planning/'
+DATASET_ABS: 'PHYREO'  # online version of PHYRE data loader
+PHYRE_PROTOCAL: 'cross'
+PHYRE_FOLD: 0
+SOLVER:
+  BASE_LR: 2.0e-4  # single GPU LR
+  WEIGHT_DECAY: 3.0e-7
+  SCHEDULER: 'cosine'
+  MAX_ITERS: 6000000
+  VAL_INTERVAL: 600000
+  WARMUP_ITERS: 40000
+  BATCH_SIZE: 40 # single GPU Batch Size
+INPUT:
+  PRELOAD_TO_MEMORY: False
+  IMAGE_CHANNEL: 3
+RPIN:
+  BACKBONE: 'hourglass_bn'
+  ARCH: 'rpcin'
+  VE_FEAT_DIM: 256
+  IN_FEAT_DIM: 256
+  # prediction setting:
+  INPUT_SIZE: 1
+  PRED_SIZE_TRAIN: 10
+  PRED_SIZE_TEST: 10
+  # input setting:
+  INPUT_HEIGHT: 128
+  INPUT_WIDTH: 128
+  MAX_NUM_OBJS: 6
+  # data augmentation
+  HORIZONTAL_FLIP: True
+  VERTICAL_FLIP: False
+  # loss weight
+  POSITION_LOSS_WEIGHT: 1
+  # ----- RoIPooling Setting
+  ROI_POOL_SIZE: 5
+  ROI_POOL_SAMPLE_R: 2
+  # ----- mask prediction
+  MASK_LOSS_WEIGHT: 0.003
+  MASK_SIZE: 21
+  # ----- Architecture
+  N_EXTRA_ROI_F: 2
+  N_EXTRA_PRED_F: 0
+  N_EXTRA_SELFD_F: 0
+  N_EXTRA_RELD_F: 0
+  N_EXTRA_AFFECTOR_F: 0
+  N_EXTRA_AGGREGATOR_F: 0
+  EXTRA_F_KERNEL: 3
+  EXTRA_F_PADDING: 1
+  IMAGE_EXT: '.npy'
+  SEQ_CLS_LOSS_WEIGHT: 0.02
diff --git a/sciencebirdsagents/PhyreAgents/RPIN/configs/rpcin_cross_pred.yaml b/sciencebirdsagents/PhyreAgents/RPIN/configs/rpcin_cross_pred.yaml
@@ -0,0 +1,50 @@
+OUTPUT_DIR: ./outputs/phys
+DATA_ROOT: 'data/PHYRE_1fps_p100n400/'
+DATASET_ABS: 'PHYRE'
+PHYRE_PROTOCAL: 'cross'
+PHYRE_FOLD: 0
+SOLVER:
+  BASE_LR: 2.0e-4  # single GPU LR
+  WEIGHT_DECAY: 3.0e-7
+  SCHEDULER: 'cosine'
+  MAX_ITERS: 6000000
+  VAL_INTERVAL: 6000000
+  WARMUP_ITERS: 40000
+  BATCH_SIZE: 40 # single GPU Batch Size
+INPUT:
+  PRELOAD_TO_MEMORY: False
+  IMAGE_CHANNEL: 3
+RPIN:
+  BACKBONE: 'hourglass_bn'
+  ARCH: 'rpcin'
+  VE_FEAT_DIM: 256
+  IN_FEAT_DIM: 256
+  # prediction setting:
+  INPUT_SIZE: 1
+  PRED_SIZE_TRAIN: 5
+  PRED_SIZE_TEST: 10
+  # input setting:
+  INPUT_HEIGHT: 128
+  INPUT_WIDTH: 128
+  MAX_NUM_OBJS: 6
+  # data augmentation
+  HORIZONTAL_FLIP: True
+  VERTICAL_FLIP: False
+  # loss weight
+  POSITION_LOSS_WEIGHT: 1
+  # ----- RoIPooling Setting
+  ROI_POOL_SIZE: 5
+  ROI_POOL_SAMPLE_R: 2
+  # ----- mask prediction
+  MASK_LOSS_WEIGHT: 0.003
+  MASK_SIZE: 21
+  # ----- Architecture
+  N_EXTRA_ROI_F: 2
+  N_EXTRA_PRED_F: 0
+  N_EXTRA_SELFD_F: 0
+  N_EXTRA_RELD_F: 0
+  N_EXTRA_AFFECTOR_F: 0
+  N_EXTRA_AGGREGATOR_F: 0
+  EXTRA_F_KERNEL: 3
+  EXTRA_F_PADDING: 1
+  IMAGE_EXT: '.npy'