Update dockerscript and introduce loading of MC environment

jakvah · Jul 30, 2020 · 6e65296 · 6e65296
1 parent 796e5b8
commit 6e65296
Show file tree

Hide file tree

Showing 5 changed files with 38 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -273,8 +273,11 @@ In addition to [overwriting the starting conditions] of the agent (bit), there e
 
 - Toggle hazards by passing ``activate_hazards = True`` as a keyword argument to the ``make()`` function. This will enrich the environment with hazards of amount and size as specified in the [environment config file](gym_drill/envs/environment_config.py). See the [Adjust environment parameters](#adjust-environment-parameters) section for details. By default this is set to ``True``
 
+
 - Toggle Monte Carlo simulated training by passing ``monte_carlo = True`` as a keyword argument to the ``make()`` function. This will ensure that an agent training in the environment always will be exposed to an environment where a feasible path to all targets exist. This is done by first generating a set of random paths and then populate those paths with targets. The details of the Monte Carlo simulation is specified in the [environment config file](gym_drill/envs/environment_config.py). See the [Adjust environment parameters](#adjust-environment-parameters) section for details. By default this is set to ``False``
 
+- Toggle loading of Monte Carlo generated environment by passing ``activate_hazards = True`` as a keyword arugment to the ``make()`` function. If ``True`` togheter with ``monte_carlo`` then it will not generate a new set of Monte Carlo simulated environments, put load form an existing set. It is recommended to use when plotting trained agent to avoid having to generate a new set of Monte Carlo environments
+
 - Toggle the episode log by passing ``activate_log == True`` as a keyword argument to the ``make()`` function. This will write the amount of steps and total reward from each episode to a file named "drill_log.txt". This log will contain the total amount of steps **NOTE: Using the log will greatly reduce performance during training.** It is recommended that the log is used when tweaking the reward system or during very thorough training. By default this is set to ``False``.  
 
 As an example, if you want to turn of hazards and Monte Carlo simulated training, but see behind the scenes magic written in the log, you would do

diff --git a/gym-drill/README.md b/gym-drill/README.md
@@ -78,6 +78,8 @@ In addition to [overwriting the starting conditions] of the agent (bit), there e
 
 - Toggle Monte Carlo simulated training by passing ``monte_carlo = True`` as a keyword argument to the ``make()`` function. This will ensure that an agent training in the environment always will be exposed to an environment where a feasible path to all targets exist. This is done by first generating a set of random paths and then populate those paths with targets. The details of the Monte Carlo simulation is specified in the [environment config file](gym_drill/envs/environment_config.py). See the [Adjust environment parameters](#adjust-environment-parameters) section for details. By default this is set to ``False``
 
+- Toggle loading of Monte Carlo generated environment by passing ``activate_hazards = True`` as a keyword arugment to the ``make()`` function. If ``True`` togheter with ``monte_carlo`` then it will not generate a new set of Monte Carlo simulated environments, put load form an existing set. It is recommended to use when plotting trained agent to avoid having to generate a new set of Monte Carlo environments
+
 - Toggle the episode log by passing ``activate_log == True`` as a keyword argument to the ``make()`` function. This will write the amount of steps and total reward from each episode to a file named "drill_log.txt". This log will contain the total amount of steps **NOTE: Using the log will greatly reduce performance during training.** It is recommended that the log is used when tweaking the reward system or during very thorough training. By default this is set to ``False``.  
 
 As an example, if you want to turn of hazards and Monte Carlo simulated training, but see behind the scenes magic written in the log, you would do

diff --git a/gym-drill/agent_training.py b/gym-drill/agent_training.py
@@ -106,70 +106,69 @@ def save_model(model,save_name,*,folder_name = TRAINED_MODEL_FOLDER_DOCKER):
 
 	print("Results have been saved in ", save_location)
 
+ENV_DISP = gym.make(ENV_name, bitInitialization= [uniform(0,2*np.pi),uniform(0,np.pi/4),0.0,0.0,0.0,0.0], activate_hazards = True,load = False)
 # Will display model from trained_models folder. To override, specify FOLDERNAME in source_folder
 def display_agent(model,*,num_episodes = 1,source_folder = TRAINED_MODEL_FOLDER_DOCKER,vector = False):
 	if not vector:
 		try:
 			model_to_load = source_folder + model 
-			trained_model = DQN.load(model_to_load, ENV)
+			trained_model = DQN.load(model_to_load, ENV_DISP)
 		except Exception as e:
 			try:
 				source_folder = TRAINED_MODEL_FOLDER_LOCAL
 				model_to_load = source_folder + model
-				trained_model = DQN.load(model_to_load, ENV)
+				trained_model = DQN.load(model_to_load, ENV_DISP)
 			except Exception as e:
 				print("Failed to load model.")
 				print("If model is not inside the trained_model folder, override the source_folder to match the desired folder")
 				print(str(e))
 				os._exit(0)
 
 		# Show the result of the training
-		obs = ENV.reset()
+		obs = ENV_DISP.reset()
 		for episode in range (num_episodes):
 			done = False
 			while not done:
 				action, _states = trained_model.predict(obs)
-				obs, rewards, done, info = ENV.step(action)
+				obs, rewards, done, info = ENV_DISP.step(action)
 
-			fig_xy = ENV.get_xy_plane_figure()
-			fig_xz = ENV.get_xz_plane_figure()
-			fig_3d = ENV.get_3d_figure()
+			fig_xy = ENV_DISP.get_xy_plane_figure()
+			fig_xz = ENV_DISP.get_xz_plane_figure()
+			fig_3d = ENV_DISP.get_3d_figure()
 			print('[EPISODE ENDED]')
-			print("Showing plots at: http://127.0.0.1:8988/")		
 			plt.show()	
 
-			obs = ENV.reset()
-
+			obs = ENV_DISP.reset()			
 
 	else:
 		print("Vectorized env not implemented yet")
 def get_environment_figures(model,*,source_folder = TRAINED_MODEL_FOLDER_DOCKER,vector = False):
 	if not vector:
 		try:
 			model_to_load = source_folder + model 
-			trained_model = DQN.load(model_to_load, ENV)
+			trained_model = DQN.load(model_to_load, ENV_DISP)
 		except Exception as e:
 			try:
 				source_folder = TRAINED_MODEL_FOLDER_LOCAL
 				model_to_load = source_folder + model
-				trained_model = DQN.load(model_to_load, ENV)
+				trained_model = DQN.load(model_to_load, ENV_DISP)
 			except Exception as e:
 				print("Failed to load model.")
 				print("If model is not inside the trained_model folder, override the source_folder to match the desired folder")
 				print(str(e))
 				os._exit(0)
 
 		# Show the result of the training
-		obs = ENV.reset()
+		obs = ENV_DISP.reset()
 		for episode in range (1):
 			done = False
 			while not done:
 				action, _states = trained_model.predict(obs)
-				obs, rewards, done, info = ENV.step(action)
+				obs, rewards, done, info = ENV_DISP.step(action)
 
-			fig_xy = ENV.get_xy_plane_figure()
-			fig_xz = ENV.get_xz_plane_figure()
-			fig_3d = ENV.get_3d_figure()
+			fig_xy = ENV_DISPNV.get_xy_plane_figure()
+			fig_xz = ENV_DISP.get_xz_plane_figure()
+			fig_3d = ENV_DISP.get_3d_figure()
 			return fig_xy,fig_xz,fig_3d
 	else:
 		print("Vectorized env not implemented yet")

diff --git a/gym-drill/gym_drill/envs/drill_env.py b/gym-drill/gym_drill/envs/drill_env.py
@@ -23,7 +23,7 @@ class DrillEnv(gym.Env):
         'video.frames_per_second': 50
 }
 
-    def __init__(self,startLocation,bitInitialization,*,activate_hazards=False,monte_carlo=False,activate_log=False):
+    def __init__(self,startLocation,bitInitialization,*,activate_hazards=False,monte_carlo=True,activate_log=False,load=True):
         self.activate_log = activate_log
         self.activate_hazards = activate_hazards
         self.monte_carlo = monte_carlo
@@ -54,10 +54,10 @@ def __init__(self,startLocation,bitInitialization,*,activate_hazards=False,monte
         self.initial_azimuth_angVel = bitInitialization[2]
         self.initial_inclination_angVel = bitInitialization[3]
         self.initial_azimuth_angAcc = bitInitialization[4]
-        self.initial_inclination_angAcc = bitInitialization[5]
+        self.initial_inclination_angAcc = bitInitialization[5]        
 
         # Generate feasible environments to train in using a Monte Carlo simulation 
-        if self.monte_carlo:
+        if self.monte_carlo and not load:
             print("Running", str(cfg.NUM_MONTE_CARLO_ENVS),"Monte Carlo simulations to generate target sets!")
 
             rwp.generate_targets_hazards_to_file(cfg.NUM_TARGETS, cfg.NUM_HAZARDS,
@@ -66,7 +66,10 @@ def __init__(self,startLocation,bitInitialization,*,activate_hazards=False,monte
             cfg.MC_PATH_LENGTH_BOUND[0], cfg.MC_PATH_LENGTH_BOUND[1],
             [cfg.TARGET_BOUND_X[0],cfg.TARGET_BOUND_Y[0],cfg.TARGET_BOUND_Z[0]],
             cfg.NUM_MONTE_CARLO_ENVS, cfg.ENVIRONMENT_FILENAME)
-
+        elif load and self.monte_carlo:
+            print("Using prexisting Monte Carlo generated environment")
+            print("Make sure it matches your trained models setting. See environment.txt for details!")           
+
 
         self.create_targets_and_hazards()
         self.observation_space_container= ObservationSpace(cfg.SPACE_BOUNDS,cfg.TARGET_BOUNDS,cfg.HAZARD_BOUNDS,cfg.BIT_BOUNDS,self.targets,self.hazards,self.bitLocation)

diff --git a/run.ps1 b/run.ps1
@@ -7,6 +7,7 @@ param (
     [switch]$r = $false,
     [switch]$auto = $false,
     [switch]$a = $false,
+    [switch]$help = $false,
 
     # Arguments for python script
     $action = $args[0],
@@ -46,10 +47,8 @@ function run_tensorboard {
 function delete_running($name) {
      docker rm -f $name
 }
-function run {
-    run_container ; if($?) {run_tensorboard} ; if ($?) {run_python_script $python_filename $action $name $algorithm $timesteps $new_save_name}    
-}
-function run2($script_action) {
+
+function run($script_action) {
     if ($script_action -eq "load") {
         run_container ; if($?) {run_tensorboard} ; if ($?) {run_python_script $python_filename_load $action $name $algorithm $timesteps $new_save_name}
     }
@@ -63,9 +62,12 @@ function run2($script_action) {
 }
 
 if ($build -or $b) {build_container}
-#elseif ($run -or $r) {run} <- old
-elseif ($run -or $r) {run2($action)}
-#elseif ($auto -or $a) {build_container ; if ($?) {run}}
+elseif ($run -or $r) {run($action)}
+elseif ($auto -or $a) {build_container ; if ($?) {run($action)}}
+elseif ($help){
+    Write-Output("Use -build to build, use -run to run og use -auto to do both")
+}
+
 else {    
-    Write-Output("You must specify an action!")
+    Write-Output("You must specify a flag! For info, pass -help")
 }