diff --git a/.circleci/cimodel/data/caffe2_build_definitions.py b/.circleci/cimodel/data/caffe2_build_definitions.py index ecd9f74379..a419b5e473 100644 --- a/.circleci/cimodel/data/caffe2_build_definitions.py +++ b/.circleci/cimodel/data/caffe2_build_definitions.py @@ -14,7 +14,7 @@ DOCKER_IMAGE_PATH_BASE = "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/" -DOCKER_IMAGE_VERSION = 287 +DOCKER_IMAGE_VERSION = 301 @dataclass diff --git a/.circleci/config.yml b/.circleci/config.yml index cab4282a51..f313dc0cd2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1835,7 +1835,7 @@ workflows: - master - /ci-all\/.*/ build_environment: "caffe2-py2-gcc4.8-ubuntu14.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:301" - caffe2_linux_test: name: caffe2_py2_gcc4_8_ubuntu14_04_test requires: @@ -1847,7 +1847,7 @@ workflows: - master - /ci-all\/.*/ build_environment: "caffe2-py2-gcc4.8-ubuntu14.04-test" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:301" resource_class: large - caffe2_linux_build: name: caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_build @@ -1859,7 +1859,7 @@ workflows: - master - /ci-all\/.*/ build_environment: "caffe2-py2-cuda9.0-cudnn7-ubuntu16.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301" - caffe2_linux_test: name: caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_test requires: @@ -1872,14 +1872,14 @@ workflows: - /ci-all\/.*/ build_environment: "caffe2-py2-cuda9.0-cudnn7-ubuntu16.04-test" use_cuda_docker_runtime: "1" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301" resource_class: gpu.medium - caffe2_linux_build: name: caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_build requires: - setup build_environment: "caffe2-cmake-cuda9.0-cudnn7-ubuntu16.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301" - caffe2_linux_test: name: caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_test requires: @@ -1887,14 +1887,14 @@ workflows: - caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_build build_environment: "caffe2-cmake-cuda9.0-cudnn7-ubuntu16.04-test" use_cuda_docker_runtime: "1" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:301" resource_class: gpu.medium - caffe2_linux_build: name: caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_build requires: - setup build_environment: "caffe2-py2-cuda9.1-cudnn7-ubuntu16.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:301" - caffe2_linux_test: name: caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_test requires: @@ -1902,35 +1902,35 @@ workflows: - caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_build build_environment: "caffe2-py2-cuda9.1-cudnn7-ubuntu16.04-test" use_cuda_docker_runtime: "1" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:301" resource_class: gpu.medium - caffe2_linux_build: name: caffe2_py2_mkl_ubuntu16_04_build requires: - setup build_environment: "caffe2-py2-mkl-ubuntu16.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:301" - caffe2_linux_test: name: caffe2_py2_mkl_ubuntu16_04_test requires: - setup - caffe2_py2_mkl_ubuntu16_04_build build_environment: "caffe2-py2-mkl-ubuntu16.04-test" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:301" resource_class: large - caffe2_linux_build: name: caffe2_onnx_py2_gcc5_ubuntu16_04_build requires: - setup build_environment: "caffe2-onnx-py2-gcc5-ubuntu16.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:301" - caffe2_linux_test: name: caffe2_onnx_py2_gcc5_ubuntu16_04_test requires: - setup - caffe2_onnx_py2_gcc5_ubuntu16_04_build build_environment: "caffe2-onnx-py2-gcc5-ubuntu16.04-test" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:301" resource_class: large - caffe2_linux_build: name: caffe2_py2_clang3_8_ubuntu16_04_build @@ -1942,7 +1942,7 @@ workflows: - master - /ci-all\/.*/ build_environment: "caffe2-py2-clang3.8-ubuntu16.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.8-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.8-ubuntu16.04:301" build_only: "1" - caffe2_linux_build: name: caffe2_py2_clang3_9_ubuntu16_04_build @@ -1954,35 +1954,35 @@ workflows: - master - /ci-all\/.*/ build_environment: "caffe2-py2-clang3.9-ubuntu16.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.9-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.9-ubuntu16.04:301" build_only: "1" - caffe2_linux_build: name: caffe2_py2_clang7_ubuntu16_04_build requires: - setup build_environment: "caffe2-py2-clang7-ubuntu16.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang7-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang7-ubuntu16.04:301" build_only: "1" - caffe2_linux_build: name: caffe2_onnx_py3_6_clang7_ubuntu16_04_build requires: - setup build_environment: "caffe2-onnx-py3.6-clang7-ubuntu16.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:301" - caffe2_linux_test: name: caffe2_onnx_py3_6_clang7_ubuntu16_04_test requires: - setup - caffe2_onnx_py3_6_clang7_ubuntu16_04_build build_environment: "caffe2-onnx-py3.6-clang7-ubuntu16.04-test" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py3.6-clang7-ubuntu16.04:301" resource_class: large - caffe2_linux_build: name: caffe2_py2_android_ubuntu16_04_build requires: - setup build_environment: "caffe2-py2-android-ubuntu16.04-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-android-ubuntu16.04:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-android-ubuntu16.04:301" build_only: "1" - caffe2_linux_build: name: caffe2_py2_cuda9_0_cudnn7_centos7_build @@ -1994,7 +1994,7 @@ workflows: - master - /ci-all\/.*/ build_environment: "caffe2-py2-cuda9.0-cudnn7-centos7-build" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:301" - caffe2_linux_test: name: caffe2_py2_cuda9_0_cudnn7_centos7_test requires: @@ -2007,7 +2007,7 @@ workflows: - /ci-all\/.*/ build_environment: "caffe2-py2-cuda9.0-cudnn7-centos7-test" use_cuda_docker_runtime: "1" - docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:287" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:301" resource_class: gpu.medium - caffe2_macos_build: name: caffe2_py2_ios_macos10_13_build diff --git a/aten/src/ATen/native/cuda/Embedding.cu b/aten/src/ATen/native/cuda/Embedding.cu index 8bb61c6398..359512ca6d 100644 --- a/aten/src/ATen/native/cuda/Embedding.cu +++ b/aten/src/ATen/native/cuda/Embedding.cu @@ -10,6 +10,7 @@ #include #include +#include #include #include diff --git a/aten/src/ATen/native/cuda/EmbeddingBag.cu b/aten/src/ATen/native/cuda/EmbeddingBag.cu index ed2307dd95..88b8bd3252 100644 --- a/aten/src/ATen/native/cuda/EmbeddingBag.cu +++ b/aten/src/ATen/native/cuda/EmbeddingBag.cu @@ -13,6 +13,7 @@ #include #include +#include #include #include diff --git a/aten/src/ATen/native/cuda/Unique.cu b/aten/src/ATen/native/cuda/Unique.cu index 753784d4ed..38eb5d9308 100644 --- a/aten/src/ATen/native/cuda/Unique.cu +++ b/aten/src/ATen/native/cuda/Unique.cu @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/aten/src/THC/generic/THCTensorMode.cu b/aten/src/THC/generic/THCTensorMode.cu index 4908a43a58..3290be54e0 100644 --- a/aten/src/THC/generic/THCTensorMode.cu +++ b/aten/src/THC/generic/THCTensorMode.cu @@ -2,6 +2,8 @@ #define THC_GENERIC_FILE "THC/generic/THCTensorMode.cu" #else +#include + void THCTensor_(calculateMode)(THCState *state, THCTensor *values, THCudaLongTensor *indices, diff --git a/aten/src/THCUNN/generic/LookupTable.cu b/aten/src/THCUNN/generic/LookupTable.cu index e6550736db..77b81b2819 100644 --- a/aten/src/THCUNN/generic/LookupTable.cu +++ b/aten/src/THCUNN/generic/LookupTable.cu @@ -2,6 +2,8 @@ #define THC_GENERIC_FILE "THCUNN/generic/LookupTable.cu" #else +#include + void THNN_(LookupTable_accGradParameters)( THCState *state, THCIndexTensor *input, diff --git a/caffe2/core/common_gpu.cc b/caffe2/core/common_gpu.cc index 33e88b76fa..2999faf5eb 100644 --- a/caffe2/core/common_gpu.cc +++ b/caffe2/core/common_gpu.cc @@ -257,6 +257,14 @@ const char* cublasGetErrorString(cublasStatus_t error) { #ifdef __HIP_PLATFORM_HCC__ case rocblas_status_invalid_size: return "rocblas_status_invalid_size"; + case rocblas_status_perf_degraded: + return "rocblas_status_perf_degraded"; + case rocblas_status_size_query_mismatch: + return "rocblas_status_size_query_mismatch"; + case rocblas_status_size_increased: + return "rocblas_status_size_increased"; + case rocblas_status_size_unchanged: + return "rocblas_status_size_unchanged"; #endif } // To suppress compiler warning. diff --git a/caffe2/operators/generate_proposals_op.cu b/caffe2/operators/generate_proposals_op.cu index ea5861deee..e619d50a5b 100644 --- a/caffe2/operators/generate_proposals_op.cu +++ b/caffe2/operators/generate_proposals_op.cu @@ -6,6 +6,10 @@ #include "caffe2/operators/generate_proposals_op_util_nms.h" #include "caffe2/operators/generate_proposals_op_util_nms_gpu.h" +#ifdef __HIP_PLATFORM_HCC__ +#include +#endif + using caffe2::utils::RotatedBox; namespace caffe2 { diff --git a/caffe2/operators/reduce_front_back_max_ops.cu b/caffe2/operators/reduce_front_back_max_ops.cu index 48693e132e..7001dbb1bf 100644 --- a/caffe2/operators/reduce_front_back_max_ops.cu +++ b/caffe2/operators/reduce_front_back_max_ops.cu @@ -2,6 +2,10 @@ #include "caffe2/core/context_gpu.h" #include "caffe2/operators/reduce_front_back_max_ops.h" +#ifdef __HIP_PLATFORM_HCC__ +#include +#endif + namespace caffe2 { /*** diff --git a/caffe2/operators/rmac_regions_op.cu b/caffe2/operators/rmac_regions_op.cu index ecc70240c6..8779bac9ad 100644 --- a/caffe2/operators/rmac_regions_op.cu +++ b/caffe2/operators/rmac_regions_op.cu @@ -3,7 +3,15 @@ #include "caffe2/core/context_gpu.h" #include "caffe2/operators/rmac_regions_op.h" +#ifdef __HIP_PLATFORM_HCC__ +#include +#endif + +#ifdef __HIP_PLATFORM_HCC__ +namespace rocprim { +#else namespace cub { +#endif template inline __host__ __device__ bool operator<( diff --git a/caffe2/operators/unique_ops.cu b/caffe2/operators/unique_ops.cu index 3813a016af..fc0e1f4b1f 100644 --- a/caffe2/operators/unique_ops.cu +++ b/caffe2/operators/unique_ops.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include "caffe2/core/context_gpu.h" namespace caffe2 { diff --git a/caffe2/utils/math_gpu.cu b/caffe2/utils/math_gpu.cu index 44966972d1..35cb970e75 100644 --- a/caffe2/utils/math_gpu.cu +++ b/caffe2/utils/math_gpu.cu @@ -622,10 +622,8 @@ CAFFE2_CUDA_EXPORT void Gemm( N, // ldd rocblas_datatype_f32_r, // compute type rocblas_gemm_algo_standard, // rocblas_gemm_algo - 0, // solution index, reserved for future use - 0, // flags, reserved for future use - NULL, // size of workspace - NULL)); // workspace + 0, // solution index, reserved for future use + 0)); // flags, reserved for future use #else CUBLAS_ENFORCE(cublasSgemmEx( context->cublas_handle(), @@ -1033,10 +1031,8 @@ CAFFE2_CUDA_EXPORT void GemmStridedBatched( batch_size, rocblas_datatype_f32_r, // compute type rocblas_gemm_algo_standard, // rocblas_gemm_algo - 0, // solution index, reserved for future use - 0, // flags, reserved for future use - NULL, // size of workspace - NULL)); // workspace + 0, // solution index, reserved for future use + 0)); // flags, reserved for future use #else CUBLAS_ENFORCE(cublasGemmStridedBatchedEx( context->cublas_handle(), @@ -1178,10 +1174,8 @@ CAFFE2_CUDA_EXPORT void Gemv( ldc, // ldd rocblas_datatype_f32_r, // compute type rocblas_gemm_algo_standard, // rocblas_gemm_algo - 0, // solution index, reserved for future use - 0, // flags, reserved for future use - NULL, // size of workspace - NULL)); // workspace + 0, // solution index, reserved for future use + 0)); // flags, reserved for future use #else CUBLAS_ENFORCE(cublasSgemmEx( context->cublas_handle(), diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index c1fa335d7b..0a033dbc9b 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -872,6 +872,7 @@ if(USE_ROCM) list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument) list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier) list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN) + list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP) if(CMAKE_BUILD_TYPE MATCHES Debug) list(APPEND HIP_CXX_FLAGS -g) @@ -887,13 +888,12 @@ if(USE_ROCM) endforeach() set(Caffe2_HIP_INCLUDE - ${hip_INCLUDE_DIRS} ${hcc_INCLUDE_DIRS} ${hsa_INCLUDE_DIRS} ${rocrand_INCLUDE_DIRS} ${hiprand_INCLUDE_DIRS} ${rocblas_INCLUDE_DIRS} ${miopen_INCLUDE_DIRS} ${thrust_INCLUDE_DIRS} $ ${Caffe2_HIP_INCLUDE}) - + ${thrust_INCLUDE_DIRS} ${hipcub_INCLUDE_DIRS} ${rocprim_INCLUDE_DIRS} ${miopen_INCLUDE_DIRS} ${rocblas_INCLUDE_DIRS} ${rocrand_INCLUDE_DIRS} ${hiprand_INCLUDE_DIRS} ${hip_INCLUDE_DIRS} ${hcc_INCLUDE_DIRS} ${hsa_INCLUDE_DIRS} $ ${Caffe2_HIP_INCLUDE}) # This is needed for library added by hip_add_library (same for hip_add_executable) hip_include_directories(${Caffe2_HIP_INCLUDE}) set(Caffe2_HIP_DEPENDENCY_LIBS - ${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES}) + ${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES} ${hipcub_LIBRARIES}) # Note [rocblas & rocfft cmake bug] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake index 83b70e46c0..79a37a5025 100644 --- a/cmake/public/LoadHIP.cmake +++ b/cmake/public/LoadHIP.cmake @@ -80,6 +80,27 @@ ELSE() SET(MIOPEN_PATH $ENV{MIOPEN_PATH}) ENDIF() +# ROCPRIM_PATH +IF(NOT DEFINED ENV{ROCPRIM_PATH}) + SET(ROCPRIM_PATH ${ROCM_PATH}/rocprim) +ELSE() + SET(ROCPRIM_PATH $ENV{ROCPRIM_PATH}) +ENDIF() + +# HIPCUB_PATH +IF(NOT DEFINED ENV{HIPCUB_PATH}) + SET(HIPCUB_PATH ${ROCM_PATH}/hipcub) +ELSE() + SET(HIPCUB_PATH $ENV{HIPCUB_PATH}) +ENDIF() + +# ROCTHRUST_PATH +IF(NOT DEFINED ENV{ROCTHRUST_PATH}) + SET(ROCTHRUST_PATH ${ROCM_PATH}/rocthrust) +ELSE() + SET(ROCTHRUST_PATH $ENV{ROCTHRUST_PATH}) +ENDIF() + IF(NOT DEFINED ENV{PYTORCH_ROCM_ARCH}) SET(PYTORCH_ROCM_ARCH gfx803;gfx900;gfx906) ELSE() @@ -124,6 +145,9 @@ IF(HIP_FOUND) set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen) set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft) set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse) + set(rocprim_DIR ${ROCPRIM_PATH}/lib/cmake/rocprim) + set(hipcub_DIR ${HIPCUB_PATH}/lib/cmake/hipcub) + set(rocthrust_DIR ${ROCTHRUST_PATH}/lib/cmake/rocthrust) find_package_and_print_version(rocrand REQUIRED) find_package_and_print_version(hiprand REQUIRED) @@ -131,7 +155,10 @@ IF(HIP_FOUND) find_package_and_print_version(miopen REQUIRED) find_package_and_print_version(rocfft REQUIRED) find_package_and_print_version(hipsparse REQUIRED) - + find_package_and_print_version(rocprim REQUIRED) + find_package_and_print_version(hipcub REQUIRED) + find_package_and_print_version(rocthrust REQUIRED) + # TODO: hip_hcc has an interface include flag "-hc" which is only # recognizable by hcc, but not gcc and clang. Right now in our # setup, hcc is only used for linking, but it should be used to @@ -146,6 +173,4 @@ IF(HIP_FOUND) set(hcc_INCLUDE_DIRS ${HCC_PATH}/include) set(hsa_INCLUDE_DIRS ${HSA_PATH}/include) - set(thrust_INCLUDE_DIRS ${THRUST_PATH} ${THRUST_PATH}/thrust/system/cuda/detail/cub-hip) - ENDIF() diff --git a/docker/caffe2/jenkins/common/install_rocm.sh b/docker/caffe2/jenkins/common/install_rocm.sh index 886d13b903..82bc425111 100644 --- a/docker/caffe2/jenkins/common/install_rocm.sh +++ b/docker/caffe2/jenkins/common/install_rocm.sh @@ -27,7 +27,8 @@ install_ubuntu() { cxlactivitylogger \ hipsparse \ rocrand \ - hip-thrust \ + hipcub \ + rocthrust \ rccl } @@ -59,7 +60,8 @@ install_centos() { hipsparse \ rocrand \ rccl \ - hip-thrust + hipcub \ + rocthrust } # Install Python packages depending on the base OS diff --git a/test/test_sparse.py b/test/test_sparse.py index b69f24fe20..1243103e6e 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -273,6 +273,7 @@ def _to_dense_half_safe(self, tensor): else: return tensor.to_dense() + @skipIfRocm def test_to_sparse(self): shape = [10, 5, 19, 8] max_nnz = 1 diff --git a/test/test_torch.py b/test/test_torch.py index 5f310d1edc..08ec2fe88f 100644 --- a/test/test_torch.py +++ b/test/test_torch.py @@ -6857,6 +6857,7 @@ def get_random_mat_scale(n): test(u.mm(s.diag()).mm(v)) @skipIfNoLapack + @skipIfRocm def test_det_logdet_slogdet(self): self._test_det_logdet_slogdet(self, 'cpu') @@ -8970,6 +8971,7 @@ def test_unbiased(self): self.assertEqual(tensor.std(), tensor.std(unbiased=True)) self.assertEqual(tensor.std(unbiased=False), tensor.std(0, unbiased=False)) + @skipIfRocm def test_structseq_repr(self): a = torch.arange(250).reshape(5, 5, 10) expected = """ @@ -12368,6 +12370,7 @@ def run_test(device): if torch.cuda.is_available(): run_test(torch.device('cuda')) + @skipIfRocm def test_unique_dim(self): self.assertFalse(hasattr(torch, 'unique_dim')) diff --git a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py index aaddc91287..7f0ae6d16a 100644 --- a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py +++ b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py @@ -277,6 +277,13 @@ ("cusparse.h", ("hipsparse.h", CONV_INCLUDE, API_RAND)), ("cufft.h", ("hipfft.h", CONV_INCLUDE, API_BLAS)), ("cufftXt.h", ("hipfft.h", CONV_INCLUDE, API_BLAS)), + ("thrust/system/cuda/", ("thrust/system/hip/", CONV_INCLUDE, API_BLAS)), + ("cub/util_allocator.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), + ("cub/block/block_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), + ("cub/cub.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), + ("cub/block/block_load.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), + ("cub/device/device_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), + ("cub/device/device_scan.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), ]) CUDA_IDENTIFIER_MAP = collections.OrderedDict([ @@ -2174,6 +2181,8 @@ ("cufftDestroy", ("hipfftDestroy", CONV_MATH_FUNC, API_FFT)), ("cufftGetVersion", ("hipfftGetVersion", CONV_MATH_FUNC, API_FFT)), ("cufftGetProperty", ("hipfftGetProperty", CONV_MATH_FUNC, API_FFT, HIP_UNSUPPORTED)), + ("thrust::cuda::", ("thrust::hip::", CONV_MATH_FUNC, API_BLAS)), + ("cub::", ("hipcub::", CONV_MATH_FUNC, API_BLAS)), ]) CUDA_SPARSE_MAP = collections.OrderedDict([