pytorch · vmoens · Mar 27, 2023 · Feb 13, 2023 · Mar 17, 2023 · Mar 17, 2023
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -586,6 +586,61 @@ jobs:
       - store_test_results:
           path: test-results
 
+  unittest_linux_sklearn_gpu:
+    <<: *binary_common
+    machine:
+      image: ubuntu-2004-cuda-11.4:202110-01
+    resource_class: gpu.nvidia.medium
+    environment:
+      image_name: "nvidia/cudagl:11.4.0-base"
+      TAR_OPTIONS: --no-same-owner
+      PYTHON_VERSION: << parameters.python_version >>
+      CU_VERSION: << parameters.cu_version >>
+
+    steps:
+      - checkout
+      - designate_upload_channel
+      - run:
+          name: Generate cache key
+          # This will refresh cache on Sundays, nightly build should generate new cache.
+          command: echo "$(date +"%Y-%U")" > .circleci-weekly
+      - restore_cache:
+          keys:
+            - env-v3-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux_libs/scripts_sklearn/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+      - run:
+          name: Setup
+          command: docker run -e PYTHON_VERSION -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux_libs/scripts_sklearn/setup_env.sh
+      - save_cache:
+
+          key: env-v3-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux_libs/scripts_sklearn/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+
+          paths:
+            - conda
+            - env
+      - run:
+          # Here we create an envlist file that contains some env variables that we want the docker container to be aware of.
+          # Normally, the CIRCLECI variable is set and available on all CI workflows: https://circleci.com/docs/2.0/env-vars/#built-in-environment-variables.
+          # They're available in all the other workflows (OSX and Windows).
+          # But here, we're running the unittest_linux_gpu workflows in a docker container, where those variables aren't accessible.
+          # So instead we dump the variables we need in env.list and we pass that file when invoking "docker run".
+          name: export CIRCLECI env var
+          command: echo "CIRCLECI=true" >> ./env.list
+      - run:
+          name: Install torchrl
+          command: docker run -e PYTHON_VERSION -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux_libs/scripts_sklearn/install.sh
+      - run:
+          name: Run tests
+          command: docker run --env-file ./env.list -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux_libs/scripts_sklearn/run_test.sh
+      - run:
+          name: Codecov upload
+          command: |
+            bash <(curl -s https://codecov.io/bash) -Z -F sklearn-gpu
+      - run:
+          name: Post Process
+          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux_libs/scripts_sklearn/post_process.sh
+      - store_test_results:
+          path: test-results
+
 
   unittest_linux_jumanji_gpu:
     <<: *binary_common
@@ -1344,6 +1399,14 @@ workflows:
           cu_version: cu117
           name: unittest_linux_habitat_gpu_py3.8
           python_version: '3.8'
+#      - unittest_linux_d4rl_gpu:
+#          cu_version: cu117
+#          name: unittest_linux_d4rl_gpu_py3.8
+#          python_version: '3.8'
+      - unittest_linux_sklearn_gpu:
+          cu_version: cu117
+          name: unittest_linux_sklearn_gpu_py3.8
+          python_version: '3.8'
       - unittest_linux_d4rl_gpu:
           cu_version: cu117
           name: unittest_linux_d4rl_gpu_py3.8

diff --git a/.circleci/unittest/linux_examples/scripts/run_test.sh b/.circleci/unittest/linux_examples/scripts/run_test.sh
@@ -231,6 +231,7 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/iql/iql_onli
   mode=offline \
   collector_devices=cuda:0
 
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/bandits/dqn.py --n_steps=100
 
 coverage combine
 coverage xml -i
diff --git a/.circleci/unittest/linux_libs/scripts_sklearn/environment.yml b/.circleci/unittest/linux_libs/scripts_sklearn/environment.yml
@@ -0,0 +1,20 @@
+channels:
+  - pytorch
+  - defaults
+dependencies:
+  - pip
+  - pip:
+    - hypothesis
+    - future
+    - cloudpickle
+    - pytest
+    - pytest-cov
+    - pytest-mock
+    - pytest-instafail
+    - pytest-rerunfailures
+    - expecttest
+    - pyyaml
+    - scipy
+    - hydra-core
+    - scikit-learn
+    - pandas
diff --git a/.circleci/unittest/linux_libs/scripts_sklearn/install.sh b/.circleci/unittest/linux_libs/scripts_sklearn/install.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+unset PYTORCH_VERSION
+# For unittest, nightly PyTorch is used as the following section,
+# so no need to set PYTORCH_VERSION.
+# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
+apt-get update && apt-get install -y git wget gcc g++
+#apt-get update && apt-get install -y git wget freeglut3 freeglut3-dev
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
+
+if [ "${CU_VERSION:-}" == cpu ] ; then
+    version="cpu"
+else
+    if [[ ${#CU_VERSION} -eq 4 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
+    elif [[ ${#CU_VERSION} -eq 5 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
+    fi
+    echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION ($CU_VERSION)"
+    version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
+fi
+
+
+# submodules
+git submodule sync && git submodule update --init --recursive
+
+printf "Installing PyTorch with %s\n" "${CU_VERSION}"
+if [ "${CU_VERSION:-}" == cpu ] ; then
+    # conda install -y pytorch torchvision cpuonly -c pytorch-nightly
+    # use pip to install pytorch as conda can frequently pick older release
+#    conda install -y pytorch cpuonly -c pytorch-nightly
+    pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall
+else
+    pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+fi
+
+# install tensordict
+pip install git+https://github.com/pytorch-labs/tensordict.git
+
+# smoke test
+python -c "import functorch;import tensordict"
+
+printf "* Installing torchrl\n"
+pip3 install -e .
+
+# smoke test
+python -c "import torchrl"
diff --git a/.circleci/unittest/linux_libs/scripts_sklearn/post_process.sh b/.circleci/unittest/linux_libs/scripts_sklearn/post_process.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env