[Feature] Habitat integration (pytorch#514)

ai4co · Oct 28, 2022 · fae718a · fae718a
1 parent af104c3
commit fae718a
Show file tree

Hide file tree

Showing 13 changed files with 685 additions and 72 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -347,6 +347,61 @@ jobs:
       - store_test_results:
           path: test-results
 
+  unittest_linux_habitat_gpu:
+    <<: *binary_common
+    machine:
+      image: ubuntu-2004-cuda-11.4:202110-01
+    resource_class: gpu.nvidia.medium
+    environment:
+      image_name: "nvidia/cudagl:11.4.0-base"
+      TAR_OPTIONS: --no-same-owner
+      PYTHON_VERSION: << parameters.python_version >>
+      CU_VERSION: << parameters.cu_version >>
+
+    steps:
+      - checkout
+      - designate_upload_channel
+      - run:
+          name: Generate cache key
+          # This will refresh cache on Sundays, nightly build should generate new cache.
+          command: echo "$(date +"%Y-%U")" > .circleci-weekly
+      - restore_cache:
+          keys:
+            - env-v3-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux_libs/scripts_habitat/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+      - run:
+          name: Setup
+          command: docker run -e PYTHON_VERSION -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux_libs/scripts_habitat/setup_env.sh
+      - save_cache:
+
+          key: env-v3-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux_libs/scripts_habitat/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+
+          paths:
+            - conda
+            - env
+      - run:
+          # Here we create an envlist file that contains some env variables that we want the docker container to be aware of.
+          # Normally, the CIRCLECI variable is set and available on all CI workflows: https://circleci.com/docs/2.0/env-vars/#built-in-environment-variables.
+          # They're available in all the other workflows (OSX and Windows).
+          # But here, we're running the unittest_linux_gpu workflows in a docker container, where those variables aren't accessible.
+          # So instead we dump the variables we need in env.list and we pass that file when invoking "docker run".
+          name: export CIRCLECI env var
+          command: echo "CIRCLECI=true" >> ./env.list
+      - run:
+          name: Install torchrl
+          command: docker run -e PYTHON_VERSION -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux_libs/scripts_habitat/install.sh
+      - run:
+          name: Run tests
+          command: docker run --env-file ./env.list -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux_libs/scripts_habitat/run_test.sh
+      - run:
+          name: Codecov upload
+          command: |
+            bash <(curl -s https://codecov.io/bash) -Z -F habitat-gpu
+      - run:
+          name: Post Process
+          command: docker run -t --gpus all -v $PWD:$PWD .circleci/unittest/linux_libs/scripts_habitat/post_process.sh
+      - store_test_results:
+          path: test-results
+
   unittest_linux_optdeps_gpu:
     <<: *binary_common
     machine:
@@ -546,20 +601,17 @@ jobs:
             - conda
             - env
       - run:
-          name: Install torchrl, run tests, upload codecov
+          name: Install torchrl, run tests
           command: |
             docker run -t --env=CUDA_VISIBLE_DEVICES="" --gpus all -v $PWD:$PWD -w $PWD -e UPLOAD_CHANNEL -e CU_VERSION "${image_name}" .circleci/unittest/linux_olddeps/scripts_gym_0_13/batch_scripts.sh
 #            docker run -t --gpus all -v $PWD:$PWD -w $PWD -e UPLOAD_CHANNEL -e CU_VERSION "${image_name}" .circleci/unittest/linux_olddeps/scripts_gym_0_13/batch_scripts.sh
-#      - run:
-#          name: Run tests
-#          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD -e UPLOAD_CHANNEL -e CU_VERSION "${image_name}" .circleci/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh
-#      - run:
-#          name: Codecov upload
-#          command: |
-#            docker run -t --gpus all -v $PWD:$PWD -w $PWD -e UPLOAD_CHANNEL -e CU_VERSION "${image_name}" <(curl -s https://codecov.io/bash) -Z -F linux-stable-cpu
-#      - run:
-#          name: Post process
-#          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD -e UPLOAD_CHANNEL -e CU_VERSION "${image_name}" .circleci/unittest/linux_olddeps/scripts_gym_0_13/post_process.sh
+      - run:
+          name: Codecov upload
+          command: |
+            bash <(curl -s https://codecov.io/bash) -Z -F olddeps-gpu
+      - run:
+          name: Post process
+          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD -e UPLOAD_CHANNEL -e CU_VERSION "${image_name}" .circleci/unittest/linux_olddeps/scripts_gym_0_13/post_process.sh
       - store_test_results:
           path: test-results
 
@@ -649,62 +701,6 @@ workflows:
           python_version: '3.10'
           wheel_docker_image: pytorch/manylinux-cuda102
 
-#      - binary_linux_wheel:
-#          conda_docker_image: pytorch/conda-builder:cuda102
-#          cu_version: cu102
-#          name: binary_linux_wheel_py3.7_cu102
-#          python_version: '3.7'
-#          wheel_docker_image: pytorch/manylinux-cuda102
-#
-#      - binary_linux_wheel:
-#          conda_docker_image: pytorch/conda-builder:cuda102
-#          cu_version: cu102
-#          name: binary_linux_wheel_py3.8_cu102
-#          python_version: '3.8'
-#          wheel_docker_image: pytorch/manylinux-cuda102
-#
-#      - binary_linux_wheel:
-#          conda_docker_image: pytorch/conda-builder:cuda102
-#          cu_version: cu102
-#          name: binary_linux_wheel_py3.9_cu102
-#          python_version: '3.9'
-#          wheel_docker_image: pytorch/manylinux-cuda102
-#
-#      - binary_linux_wheel:
-#          conda_docker_image: pytorch/conda-builder:cuda102
-#          cu_version: cu102
-#          name: binary_linux_wheel_py3.10_cu102
-#          python_version: '3.10'
-#          wheel_docker_image: pytorch/manylinux-cuda102
-
-#      - binary_linux_wheel:
-#          conda_docker_image: pytorch/conda-builder:cuda113
-#          cu_version: cu113
-#          name: binary_linux_wheel_py3.7_cu113
-#          python_version: '3.7'
-#          wheel_docker_image: pytorch/manylinux-cuda113
-#
-#      - binary_linux_wheel:
-#          conda_docker_image: pytorch/conda-builder:cuda113
-#          cu_version: cu113
-#          name: binary_linux_wheel_py3.8_cu113
-#          python_version: '3.8'
-#          wheel_docker_image: pytorch/manylinux-cuda113
-#
-#      - binary_linux_wheel:
-#          conda_docker_image: pytorch/conda-builder:cuda113
-#          cu_version: cu113
-#          name: binary_linux_wheel_py3.9_cu113
-#          python_version: '3.9'
-#          wheel_docker_image: pytorch/manylinux-cuda113
-#
-#      - binary_linux_wheel:
-#          conda_docker_image: pytorch/conda-builder:cuda113
-#          cu_version: cu113
-#          name: binary_linux_wheel_py3.10_cu113
-#          python_version: '3.10'
-#          wheel_docker_image: pytorch/manylinux-cuda113
-
       - binary_macos_wheel:
           conda_docker_image: pytorch/conda-builder:cpu
           cu_version: cpu
@@ -784,6 +780,11 @@ workflows:
           cu_version: cu113
           name: unittest_linux_stable_gpu_py3.8
           python_version: '3.8'
+      # we test supported libs for 3.8 only
+      - unittest_linux_habitat_gpu:
+          cu_version: cu113
+          name: unittest_linux_habitat_gpu_py3.8
+          python_version: '3.8'
 
       - unittest_macos_cpu:
           cu_version: cpu

diff --git a/.circleci/unittest/linux_libs/scripts_habitat/environment.yml b/.circleci/unittest/linux_libs/scripts_habitat/environment.yml
@@ -0,0 +1,17 @@
+channels:
+  - pytorch
+  - defaults
+dependencies:
+  - pip
+  - pip:
+    - hypothesis
+    - future
+    - cloudpickle
+    - pytest
+    - pytest-cov
+    - pytest-mock
+    - pytest-instafail
+    - expecttest
+    - pyyaml
+    - scipy
+    - hydra-core
diff --git a/.circleci/unittest/linux_libs/scripts_habitat/install.sh b/.circleci/unittest/linux_libs/scripts_habitat/install.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+unset PYTORCH_VERSION
+# For unittest, nightly PyTorch is used as the following section,
+# so no need to set PYTORCH_VERSION.
+# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
+apt-get update && apt-get install -y git wget gcc g++
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
+
+if [ "${CU_VERSION:-}" == cpu ] ; then
+    version="cpu"
+else
+    if [[ ${#CU_VERSION} -eq 4 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
+    elif [[ ${#CU_VERSION} -eq 5 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
+    fi
+    echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION ($CU_VERSION)"
+    version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
+fi
+
+
+# submodules
+git submodule sync && git submodule update --init --recursive
+
+printf "Installing PyTorch with %s\n" "${CU_VERSION}"
+if [ "${CU_VERSION:-}" == cpu ] ; then
+    # conda install -y pytorch torchvision cpuonly -c pytorch-nightly
+    # use pip to install pytorch as conda can frequently pick older release
+#    conda install -y pytorch cpuonly -c pytorch-nightly
+    pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu --force-reinstall
+else
+    pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
+fi
+
+# smoke test
+python -c "import functorch"
+
+printf "* Installing torchrl\n"
+pip3 install -e .
+
+# smoke test
+python -c "import torchrl"
diff --git a/.circleci/unittest/linux_libs/scripts_habitat/post_process.sh b/.circleci/unittest/linux_libs/scripts_habitat/post_process.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env