Skip to content

Commit

Permalink
[CI] Reduce CI time (pytorch#1226)
Browse files Browse the repository at this point in the history
Co-authored-by: BY571 <sebastian.dittert@gmx.de>
  • Loading branch information
vmoens and BY571 authored Jun 7, 2023
1 parent 76bb23a commit e1d1874
Show file tree
Hide file tree
Showing 34 changed files with 420 additions and 325 deletions.
6 changes: 3 additions & 3 deletions .circleci/unittest/linux/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$lib_dir
export MKL_THREADING_LAYER=GNU
export CKPT_BACKEND=torch

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 20
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20 -k 'test_gym or test_dm_control_pixels or test_dm_control or test_tb'
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest --instafail -v --durations 20 --ignore test/test_distributed.py
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym or test_dm_control_pixels or test_dm_control or test_tb'
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest --instafail -v --durations 200 --ignore test/test_distributed.py
coverage combine
coverage xml -i
6 changes: 3 additions & 3 deletions .circleci/unittest/linux_distributed/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$lib_dir
export MKL_THREADING_LAYER=GNU
export CKPT_BACKEND=torch

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 20
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20 -k 'test_gym or test_dm_control_pixels or test_dm_control or test_tb'
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_distributed.py --instafail -v --durations 20
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym or test_dm_control_pixels or test_dm_control or test_tb'
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_distributed.py --instafail -v --durations 200
coverage combine
coverage xml -i
4 changes: 2 additions & 2 deletions .circleci/unittest/linux_examples/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ lib_dir="${env_dir}/lib"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$lib_dir
export MKL_THREADING_LAYER=GNU

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 20
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200

# With batched environments
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
Expand Down
2 changes: 1 addition & 1 deletion .circleci/unittest/linux_libs/scripts_brax/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ export MAGNUM_LOG=verbose MAGNUM_GPU_VALIDATION=ON
# this workflow only tests the libs
python -c "import brax"

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 20 --capture no -k TestBrax --error-for-skips
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestBrax --error-for-skips
coverage combine
coverage xml -i
2 changes: 1 addition & 1 deletion .circleci/unittest/linux_libs/scripts_d4rl/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ conda deactivate && conda activate ./env
# this workflow only tests the libs
python -c "import gym, d4rl"

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 20 --capture no -k TestD4RL --error-for-skips
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestD4RL --error-for-skips
coverage combine
coverage xml -i
2 changes: 1 addition & 1 deletion .circleci/unittest/linux_libs/scripts_envpool/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@ export MKL_THREADING_LAYER=GNU
# this workflow only tests the libs
python -c "import envpool"

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 20 --capture no -k TestEnvPool --error-for-skips
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestEnvPool --error-for-skips
coverage combine
coverage xml -i
6 changes: 3 additions & 3 deletions .circleci/unittest/linux_libs/scripts_gym/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ lib_dir="${env_dir}/lib"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$lib_dir
export MKL_THREADING_LAYER=GNU

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 20
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20 -k 'test_gym'
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym'

export DISPLAY=':99.0'
Xvfb :99 -screen 0 1400x900x24 > /dev/null 2>&1 &
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 20 -k "gym" --error-for-skips
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 -k "gym" --error-for-skips
coverage combine
coverage xml -i
2 changes: 1 addition & 1 deletion .circleci/unittest/linux_libs/scripts_habitat/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,6 @@ env = HabitatEnv('HabitatRenderPick-v0')
env.reset()
"""

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 20 --capture no -k TestHabitat --error-for-skips
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestHabitat --error-for-skips
coverage combine
coverage xml -i
2 changes: 1 addition & 1 deletion .circleci/unittest/linux_libs/scripts_jumanji/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ export MAGNUM_LOG=verbose MAGNUM_GPU_VALIDATION=ON
# this workflow only tests the libs
python -c "import jumanji"

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 20 --capture no -k TestJumanji --error-for-skips
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestJumanji --error-for-skips
coverage combine
coverage xml -i
2 changes: 1 addition & 1 deletion .circleci/unittest/linux_libs/scripts_sklearn/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ conda deactivate && conda activate ./env
# this workflow only tests the libs
python -c "import sklearn, pandas"

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 20 --capture no -k TestOpenML --error-for-skips
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestOpenML --error-for-skips
coverage combine
coverage xml -i
2 changes: 1 addition & 1 deletion .circleci/unittest/linux_libs/scripts_vmas/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ export MAGNUM_LOG=verbose MAGNUM_GPU_VALIDATION=ON
# this workflow only tests the libs
python -c "import vmas"

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 20 --capture no -k TestVmas --error-for-skips
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestVmas --error-for-skips
coverage combine
coverage xml -i
8 changes: 4 additions & 4 deletions .circleci/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ lib_dir="${env_dir}/lib"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$lib_dir
export MKL_THREADING_LAYER=GNU

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 20
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20 -k 'test_gym or test_dm_control_pixels or test_dm_control'
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym or test_dm_control_pixels or test_dm_control'

export DISPLAY=':99.0'
Xvfb :99 -screen 0 1400x900x24 > /dev/null 2>&1 &
CKPT_BACKEND=torch MUJOCO_GL=egl python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest --instafail -v --durations 20 --ignore test/test_distributed.py
#pytest --instafail -v --durations 20
CKPT_BACKEND=torch MUJOCO_GL=egl python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest --instafail -v --durations 200 --ignore test/test_distributed.py
#pytest --instafail -v --durations 200
#python test/test_libs.py
coverage combine
coverage xml -i
4 changes: 2 additions & 2 deletions .circleci/unittest/linux_optdeps/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ root_dir="$(git rev-parse --show-toplevel)"
export MKL_THREADING_LAYER=GNU
export CKPT_BACKEND=torch

#MUJOCO_GL=glfw pytest --cov=torchrl --junitxml=test-results/junit.xml -v --durations 20
MUJOCO_GL=egl python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest --instafail -v --durations 20 --ignore test/test_distributed.py
#MUJOCO_GL=glfw pytest --cov=torchrl --junitxml=test-results/junit.xml -v --durations 200
MUJOCO_GL=egl python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest --instafail -v --durations 200 --ignore test/test_distributed.py
coverage combine
coverage xml -i
6 changes: 3 additions & 3 deletions .circleci/unittest/linux_stable/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$lib_dir
export MKL_THREADING_LAYER=GNU
export CKPT_BACKEND=torch

python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 20
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20 -k 'test_gym or test_dm_control_pixels or test_dm_control or test_tb'
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest --instafail -v --durations 20 --ignore test/test_distributed.py
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym or test_dm_control_pixels or test_dm_control or test_tb'
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest --instafail -v --durations 200 --ignore test/test_distributed.py
coverage combine
coverage xml -i
2 changes: 1 addition & 1 deletion .circleci/unittest/windows_optdepts/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
source "$this_dir/set_cuda_envs.sh"

python -m torch.utils.collect_env
pytest --junitxml=test-results/junit.xml -v --durations 20 --ignore test/test_distributed.py
pytest --junitxml=test-results/junit.xml -v --durations 200 --ignore test/test_distributed.py
6 changes: 3 additions & 3 deletions .github/workflows/nightly_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ jobs:
python -m torch.utils.collect_env
python -c "import torchrl; print(torchrl.__version__);from torchrl.data import ReplayBuffer"
EXIT_STATUS=0
pytest test/smoke_test.py -v --durations 20
pytest test/smoke_test.py -v --durations 200
exit $EXIT_STATUS
upload-wheel-linux:
Expand Down Expand Up @@ -270,7 +270,7 @@ jobs:
python3 -m torch.utils.collect_env
python3 -c "import torchrl; print(torchrl.__version__);from torchrl.data import ReplayBuffer"
EXIT_STATUS=0
pytest test/smoke_test.py -v --durations 20
pytest test/smoke_test.py -v --durations 200
exit $EXIT_STATUS
build-wheel-windows:
Expand Down Expand Up @@ -359,7 +359,7 @@ jobs:
python -m torch.utils.collect_env
python -c "import torchrl; print(torchrl.__version__);from torchrl.data import ReplayBuffer"
EXIT_STATUS=0
pytest test/smoke_test.py -v --durations 20
pytest test/smoke_test.py -v --durations 200
exit $EXIT_STATUS
upload-wheel-windows:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ jobs:
python -m torch.utils.collect_env
python -c "import torchrl; print(torchrl.__version__)"
EXIT_STATUS=0
pytest test/smoke_test.py -v --durations 20
pytest test/smoke_test.py -v --durations 200
exit $EXIT_STATUS
test-wheel-windows:
Expand Down Expand Up @@ -221,5 +221,5 @@ jobs:
python -m torch.utils.collect_env
python -c "import torchrl; print(torchrl.__version__)"
EXIT_STATUS=0
pytest test/smoke_test.py -v --durations 20
pytest test/smoke_test.py -v --durations 200
exit $EXIT_STATUS
11 changes: 11 additions & 0 deletions test/_utils_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,17 @@ def get_available_devices():
return devices


def get_default_devices():
num_cuda = torch.cuda.device_count()
if num_cuda == 0:
return [torch.device("cpu")]
elif num_cuda == 1:
return [torch.device("cuda:0")]
else:
# then run on all devices
return get_available_devices()


def generate_seeds(seed, repeat):
seeds = [seed]
for _ in range(repeat - 1):
Expand Down
8 changes: 4 additions & 4 deletions test/test_actors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest
import torch

from _utils_internal import get_available_devices
from _utils_internal import get_default_devices
from tensordict import TensorDict
from tensordict.nn import TensorDictModule
from torch import nn
Expand Down Expand Up @@ -408,7 +408,7 @@ def test_qvalue_hook_categorical_1_dim_batch(self, action_space, expected_action
assert (values == in_values).all()


@pytest.mark.parametrize("device", get_available_devices())
@pytest.mark.parametrize("device", get_default_devices())
def test_value_based_policy(device):
torch.manual_seed(0)
obs_dim = 4
Expand Down Expand Up @@ -481,7 +481,7 @@ def test_qvalactor_construct(
QValueActor(**kwargs)


@pytest.mark.parametrize("device", get_available_devices())
@pytest.mark.parametrize("device", get_default_devices())
def test_value_based_policy_categorical(device):
torch.manual_seed(0)
obs_dim = 4
Expand Down Expand Up @@ -512,7 +512,7 @@ def make_net():
assert (0 <= action).all() and (action < action_dim).all()


@pytest.mark.parametrize("device", get_available_devices())
@pytest.mark.parametrize("device", get_default_devices())
def test_actorcritic(device):
common_module = SafeModule(
module=nn.Linear(3, 4), in_keys=["obs"], out_keys=["hidden"], spec=None
Expand Down
58 changes: 36 additions & 22 deletions test/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def _is_consistent_device_type(
_os_is_windows and _python_is_3_10,
reason="Windows Access Violation in torch.multiprocessing / BrokenPipeError in multiprocessing.connection",
)
@pytest.mark.parametrize("num_env", [1, 2])
@pytest.mark.parametrize("num_env", [2])
@pytest.mark.parametrize("device", ["cuda", "cpu", None])
@pytest.mark.parametrize("policy_device", ["cuda", "cpu", None])
@pytest.mark.parametrize("storing_device", ["cuda", "cpu", None])
Expand Down Expand Up @@ -185,7 +185,9 @@ def env_fn(seed):
else:

def env_fn(seed):
env = ParallelEnv(
# 1226: faster execution
# env = ParallelEnv(
env = SerialEnv(
num_workers=num_env,
create_env_fn=make_make_env("vec"),
create_env_kwargs=[{"seed": i} for i in range(seed, seed + num_env)],
Expand Down Expand Up @@ -476,8 +478,16 @@ def make_env(seed):


@pytest.mark.parametrize("num_env", [1, 2])
@pytest.mark.parametrize("env_name", ["vec", "conv"])
def test_collector_batch_size(num_env, env_name, seed=100):
@pytest.mark.parametrize(
"env_name",
[
"vec",
],
) # 1226: for efficiency, we just test vec, not "conv"
def test_collector_batch_size(
num_env, env_name, seed=100, num_workers=2, frames_per_batch=20
):
"""Tests that there are 'frames_per_batch' frames in each batch of a collection."""
if num_env == 3 and _os_is_windows:
pytest.skip("Test timeout (> 10 min) on CI pipeline Windows machine with GPU")
if num_env == 1:
Expand All @@ -489,17 +499,16 @@ def env_fn():
else:

def env_fn():
env = ParallelEnv(
num_workers=num_env, create_env_fn=make_make_env(env_name)
)
# 1226: For efficiency, we don't use Parallel but Serial
# env = ParallelEnv(
env = SerialEnv(num_workers=num_env, create_env_fn=make_make_env(env_name))
return env

policy = make_policy(env_name)

torch.manual_seed(0)
np.random.seed(0)
num_workers = 2
frames_per_batch = 20

ccollector = MultiaSyncDataCollector(
create_env_fn=[env_fn for _ in range(num_workers)],
policy=policy,
Expand Down Expand Up @@ -644,8 +653,13 @@ def env_fn(seed):


@pytest.mark.parametrize("num_env", [1, 2])
@pytest.mark.parametrize("collector_class", [SyncDataCollector, aSyncDataCollector])
@pytest.mark.parametrize("env_name", ["conv", "vec"])
@pytest.mark.parametrize(
"collector_class",
[
SyncDataCollector,
],
) # aSyncDataCollector])
@pytest.mark.parametrize("env_name", ["vec"]) # 1226: removing "conv" for efficiency
def test_traj_len_consistency(num_env, env_name, collector_class, seed=100):
"""Tests that various frames_per_batch lead to the same results."""

Expand All @@ -669,9 +683,6 @@ def env_fn(seed):

policy = make_policy(env_name)

def make_frames_per_batch(frames_per_batch):
return -(-frames_per_batch // num_env) * num_env

collector1 = collector_class(
create_env_fn=env_fn,
create_env_kwargs={"seed": seed},
Expand Down Expand Up @@ -925,9 +936,10 @@ def make_env():
MultiSyncDataCollector,
],
)
@pytest.mark.parametrize("init_random_frames", [0, 50])
@pytest.mark.parametrize("explicit_spec", [False, True])
@pytest.mark.parametrize("split_trajs", [True, False])
@pytest.mark.parametrize("init_random_frames", [50]) # 1226: faster execution
@pytest.mark.parametrize(
"explicit_spec,split_trajs", [[True, True], [False, False]]
) # 1226: faster execution
def test_collector_output_keys(
collector_class, init_random_frames, explicit_spec, split_trajs
):
Expand Down Expand Up @@ -1265,7 +1277,9 @@ def env_fn(seed):
assert batch["collector"]["traj_ids"][0] != -1
assert batch["collector"]["traj_ids"][1] == -1

@pytest.mark.parametrize("env_name", ["conv", "vec"])
@pytest.mark.parametrize(
"env_name", ["vec"]
) # 1226: removing "conv" for efficiency
def test_multisync_collector_interruptor_mechanism(self, env_name, seed=100):

frames_per_batch = 800
Expand Down Expand Up @@ -1393,10 +1407,10 @@ def forward(self, td):
[
["cpu", "cuda"],
["cuda", "cpu"],
["cpu", "cuda:0"],
["cuda:0", "cpu"],
["cuda", "cuda:0"],
["cuda:0", "cuda"],
# ["cpu", "cuda:0"], # 1226: faster execution
# ["cuda:0", "cpu"],
# ["cuda", "cuda:0"],
# ["cuda:0", "cuda"],
],
)
def test_param_sync(self, give_weights, collector, policy_device, env_device):
Expand Down
Loading

0 comments on commit e1d1874

Please sign in to comment.