Skip to content

Commit

Permalink
Integrated RCCL with MSCCL++ for small message sizes (#1231)
Browse files Browse the repository at this point in the history
  • Loading branch information
corey-derochie-amd authored Jul 12, 2024
1 parent c755b9c commit 6dc47ee
Show file tree
Hide file tree
Showing 15 changed files with 441 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
*.gcov
/coverage/
build/
ext/
3 changes: 2 additions & 1 deletion .jenkins/common.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ def runTestCommand (platform, project, gfilter, envars)

def command = """#!/usr/bin/env bash
set -x
export RUN_TEST_ROOT=\$(pwd)
cd ${project.paths.project_build_prefix}/build/release/test
${sudo} ulimit -l unlimited
ulimit -a
${sudo} ${envars} RCCL_ENABLE_SIGNALHANDLER=1 NCCL_DEBUG=INFO HSA_FORCE_FINE_GRAIN_PCIE=1 UT_MULTITHREAD=1 UT_PROCESS_MASK=1 ./rccl-UnitTests --gtest_filter=${gfilter} --gtest_output=xml --gtest_color=yes
${sudo} ${envars} LD_LIBRARY_PATH=\${RUN_TEST_ROOT}/${project.paths.project_build_prefix}/build/release:\${LD_LIBRARY_PATH} RCCL_ENABLE_SIGNALHANDLER=1 NCCL_DEBUG=INFO HSA_FORCE_FINE_GRAIN_PCIE=1 UT_MULTITHREAD=1 UT_PROCESS_MASK=1 ./rccl-UnitTests --gtest_filter=${gfilter} --gtest_output=xml --gtest_color=yes
"""

platform.runCommand(this, command)
Expand Down
31 changes: 31 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ option(BUILD_SHARED_LIBS "Build as shared library"
option(BUILD_TESTS "Build unit test programs" OFF)
option(COLLTRACE "Collective Trace Option" ON)
option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON)
option(ENABLE_MSCCLPP "Enable MSCCL++" ON)
option(ENABLE_IFC "Enable indirect function call" OFF)
option(INSTALL_DEPENDENCIES "Force install dependencies" OFF)
option(ROCTX "Enable ROCTX" OFF)
Expand Down Expand Up @@ -59,6 +60,8 @@ include(CheckSymbolExists)
include(cmake/Dependencies.cmake) # GTest, rocm-cmake, rocm_local_targets
include(cmake/Generator.cmake) # Configure functions that goes into RCCL

list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

# Build only for local GPU architecture
if (BUILD_LOCAL_GPU_TARGET_ONLY)
message(STATUS "Building only for local GPU target")
Expand Down Expand Up @@ -242,6 +245,15 @@ if (HAVE_KERNARG_PRELOAD)
message(STATUS "Kernarg preloading to SGPR enabled")
endif()

# Check for IBVerbs; disable MSCCL++ if not present
if (ENABLE_MSCCLPP)
find_package(IBVerbs)
if (NOT IBVerbs_FOUND)
set(ENABLE_MSCCLPP OFF)
message(WARNING "IBVerbs not found; disabling MSCCL++")
endif()
endif()

# Determine version from makefiles/version.mk and fill in templates
#==================================================================================================
## parse version from Makefile NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH must exist
Expand Down Expand Up @@ -482,6 +494,14 @@ if (ENABLE_MSCCL_KERNEL)
list(APPEND SRC_FILES ${MSCCL_KERNEL_SOURCES})
endif()

if (ENABLE_MSCCLPP)
set(MSCCLPP_SOURCES
src/include/mscclpp/mscclpp_nccl.h
src/misc/mscclpp/mscclpp_nccl.cc
)
list(APPEND SRC_FILES ${MSCCLPP_SOURCES})
endif()

# Hipify source files (copy of source generated into hipify directory)
#==================================================================================================
find_program(hipify-perl_executable hipify-perl)
Expand Down Expand Up @@ -563,6 +583,9 @@ endif()
if(ENABLE_MSCCL_KERNEL)
target_compile_definitions(rccl PRIVATE COMPILE_MSCCL_KERNEL)
endif()
if(ENABLE_MSCCLPP)
target_compile_definitions(rccl PRIVATE ENABLE_MSCCLPP)
endif()
if(HAVE_ROCM_SMI64CONFIG)
target_compile_definitions(rccl PRIVATE USE_ROCM_SMI64CONFIG)
endif()
Expand Down Expand Up @@ -682,6 +705,11 @@ if (HAVE_KERNARG_PRELOAD)
target_link_options(rccl PRIVATE -Xoffload-linker -mllvm=-amdgpu-kernarg-preload-count=16)
endif()

if(ENABLE_MSCCLPP)
include(cmake/MSCCLPP.cmake)
message(STATUS "Building RCCL with MSCCL++ support")
endif()

## Track linking time
set_property(TARGET rccl PROPERTY RULE_LAUNCH_LINK "${CMAKE_COMMAND} -E time")

Expand All @@ -699,6 +727,9 @@ file(COPY tools/msccl-unit-test-algorithms DESTINATION ${PROJECT_BINARY_DIR})
## Install Algorithm files under share folder
rocm_install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-algorithms DESTINATION ${CMAKE_INSTALL_DATADIR}/rccl)
rocm_install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-unit-test-algorithms DESTINATION ${CMAKE_INSTALL_DATADIR}/rccl)
if(ENABLE_MSCCLPP)
rocm_install(FILES ${MSCCLPP_OUT_LIBS} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT "runtime")
endif()

rocm_export_targets(
NAMESPACE roc::
Expand Down
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Attributions
Contains contributions from NVIDIA.

Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
Modifications Copyright (c) 2019-2023 Advanced Micro Devices, Inc. All rights reserved.
Modifications Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
Modifications Copyright (c) Microsoft Corporation. Licensed under the MIT License.

Redistribution and use in source and binary forms, with or without
Expand Down
30 changes: 30 additions & 0 deletions NOTICES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,33 @@ Dependencies on NPKit (MIT License)
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE

_______________________________________________________________

Dependencies on MSCCL++ (MIT License)

Copyright (c) Microsoft Corporation.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE

See:

https://github.com/microsoft/mscclpp

for more information and license details.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ RCCL build & installation helper script
--enable_backtrace Build with custom backtrace support
--disable-colltrace Build without collective trace
--disable-msccl-kernel Build without MSCCL kernels
--disable-mscclpp Build without MSCCL++ support
-f|--fast Quick-build RCCL (local gpu arch only, no backtrace, and collective trace support)
-h|--help Prints this help message
-i|--install Install RCCL library (see --prefix argument below)
Expand All @@ -45,6 +46,7 @@ RCCL build & installation helper script
--amdgpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)
--no_clean Don't delete files if they already exist
--npkit-enable Compile with npkit enabled
--openmp-test-enable Enable OpenMP in rccl unit tests
--roctx-enable Compile with roctx enabled (example usage: rocprof --roctx-trace ./rccl-program)
-p|--package_build Build RCCL package
--prefix Specify custom directory to install RCCL to (default: `/opt/rocm`)
Expand Down Expand Up @@ -123,6 +125,13 @@ To manually run RCCL with NPKit enabled, environment variable `NPKIT_DUMP_DIR` n
To manually analyze NPKit dump results, please leverage [npkit_trace_generator.py](https://github.com/microsoft/NPKit/blob/main/rccl_samples/npkit_trace_generator.py).
## MSCCL/MSCCL++
RCCL integrates MSCCL(https://github.com/microsoft/msccl) and MSCCL++ (https://github.com/microsoft/mscclpp) to leverage the highly efficient GPU-GPU communication primitives for collective operations. Thanks to Microsoft Corporation for collaborating with us in this project.
MSCCL uses XMLs for different collective algorithms on different architectures. RCCL collectives can leverage those algorithms once the corresponding XML has been provided by the user. The XML files contain the sequence of send-recv and reduction operations to be executed by the kernel. On MI300X, MSCCL is enabled by default. On other platforms, the users may have to enable this by setting `RCCL_MSCCL_FORCE_ENABLE=1`.
On the other hand, RCCL allreduce and allgather collectives can leverage the efficient MSCCL++ communication kernels for certain message sizes. MSCCL++ support is available whenever MSCCL support is available. Users need to set the RCCL environment variable `RCCL_ENABLE_MSCCLPP=1` to run RCCL workload with MSCCL++ support. It is also possible to set the message size threshold for using MSCCL++ by using the environment variable `RCCL_MSCCLPP_THRESHOLD`. Once `RCCL_MSCCLPP_THRESHOLD` (the default value is 1MB) is set, RCCL will invoke MSCCL++ kernels for all message sizes less than or equal to the specified threshold.
## Library and API Documentation
Please refer to the [RCCL Documentation Site](https://rocm.docs.amd.com/projects/rccl/en/latest/) for current documentation.
Expand Down
39 changes: 39 additions & 0 deletions cmake/FindIBVerbs.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# MIT License
#
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

find_path(IBVERBS_INCLUDE_DIRS
NAMES infiniband/verbs.h
HINTS
${IBVERBS_INCLUDE_DIR}
${IBVERBS_ROOT_DIR}
${IBVERBS_ROOT_DIR}/include)

find_library(IBVERBS_LIBRARIES
NAMES ibverbs
HINTS
${IBVERBS_LIB_DIR}
${IBVERBS_ROOT_DIR}
${IBVERBS_ROOT_DIR}/lib)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(IBVerbs DEFAULT_MSG IBVERBS_INCLUDE_DIRS IBVERBS_LIBRARIES)
mark_as_advanced(IBVERBS_INCLUDE_DIR IBVERBS_LIBRARIES)
36 changes: 36 additions & 0 deletions cmake/Findmscclpp_nccl.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# MIT License
#
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

find_path(MSCCLPP_INCLUDE_DIRS
NAMES mscclpp/gpu.hpp
HINTS
${MSCCLPP_ROOT}/include)

find_library(MSCCLPP_LIBRARIES
NAMES mscclpp_nccl
HINTS
${MSCCLPP_ROOT}/lib)

include (FindPackageHandleStandardArgs)
find_package_handle_standard_args(mscclpp_nccl DEFAULT_MSG MSCCLPP_INCLUDE_DIRS MSCCLPP_LIBRARIES)
mark_as_advanced(MSCCLPP_INCLUDE_DIRS MSCCLPP_LIBRARIES)

70 changes: 70 additions & 0 deletions cmake/MSCCLPP.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# MIT License
#
# Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# Dependencies

# HIP dependency is handled earlier in the project cmake file
# when VerifyCompiler.cmake is included.

# GIT

# Test dependencies

# For downloading, building, and installing required dependencies
include(cmake/DownloadProject.cmake)

if(ENABLE_MSCCLPP)
set(MSCCLPP_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/ext/mscclpp CACHE PATH "")
execute_process(
COMMAND mkdir -p ${MSCCLPP_ROOT}
)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
find_package(mscclpp_nccl)

if(NOT mscclpp_nccl_FOUND)
message(STATUS "MSCCL++ not found. Downloading and building MSCCL++.")
# Download, build and install mscclpp

download_project(PROJ mscclpp_nccl
GIT_REPOSITORY https://github.com/microsoft/mscclpp.git
GIT_TAG b1b9d0626cfa40319c18c05f8c16650568395c29
INSTALL_DIR ${MSCCLPP_ROOT}
CMAKE_ARGS -DGPU_TARGETS=${GPU_TARGETS} -DBYPASS_GPU_CHECK=ON -DUSE_ROCM=ON -DCMAKE_BUILD_TYPE=Release -DBUILD_APPS_NCCL=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
LOG_DOWNLOAD TRUE
LOG_CONFIGURE TRUE
LOG_BUILD TRUE
LOG_INSTALL TRUE
UPDATE_DISCONNECTED TRUE
)

find_package(mscclpp_nccl REQUIRED)
endif()

# Copy the outputs to the PROJECT_BINARY_DIR, list them in MSCCLPP_OUT_LIBS
file(GLOB MSCCLPP_LIB_FILES "${MSCCLPP_ROOT}/lib/*")
file(GLOB MSCCLPP_LIB_NAMES RELATIVE ${MSCCLPP_ROOT}/lib "${MSCCLPP_ROOT}/lib/*")
set(MSCCLPP_OUT_LIBS "")
foreach(LIB_NAME ${MSCCLPP_LIB_NAMES})
list(APPEND MSCCLPP_OUT_LIBS ${PROJECT_BINARY_DIR}/${LIB_NAME})
endforeach()
file(COPY ${MSCCLPP_LIB_FILES} DESTINATION ${PROJECT_BINARY_DIR})
endif()
9 changes: 8 additions & 1 deletion install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ install_dependencies=false
install_library=false
install_prefix="${ROCM_PATH}"
msccl_kernel_enabled=true
mscclpp_enabled=true
num_parallel_jobs=$(nproc)
npkit_enabled=false
openmp_test_enabled=false
Expand All @@ -45,6 +46,7 @@ function display_help()
echo " --enable_backtrace Build with custom backtrace support"
echo " --disable-colltrace Build without collective trace"
echo " --disable-msccl-kernel Build without MSCCL kernels"
echo " --disable-mscclpp Build without MSCCL++ support"
echo " -f|--fast Quick-build RCCL (local gpu arch only, no backtrace, and collective trace support)"
echo " -h|--help Prints this help message"
echo " -i|--install Install RCCL library (see --prefix argument below)"
Expand Down Expand Up @@ -73,7 +75,7 @@ function display_help()
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T
if [[ "$?" -eq 4 ]]; then
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable_backtrace,disable-colltrace,disable-msccl-kernel,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,verbose -- "$@")
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable_backtrace,disable-colltrace,disable-msccl-kernel,disable-mscclpp,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,verbose -- "$@")
else
echo "Need a new version of getopt"
exit 1
Expand All @@ -94,6 +96,7 @@ while true; do
--enable_backtrace) build_bfd=true; shift ;;
--disable-colltrace) collective_trace=false; shift ;;
--disable-msccl-kernel) msccl_kernel_enabled=false; shift ;;
--disable-mscclpp) mscclpp_enabled=false; shift ;;
-f | --fast) build_local_gpu_only=true; collective_trace=false; msccl_kernel_enabled=false; shift ;;
-h | --help) display_help; exit 0 ;;
-i | --install) install_library=true; shift ;;
Expand Down Expand Up @@ -234,6 +237,10 @@ if [[ "${msccl_kernel_enabled}" == false ]]; then
cmake_common_options="${cmake_common_options} -DENABLE_MSCCL_KERNEL=OFF"
fi

if [[ "${mscclpp_enabled}" == false ]]; then
cmake_common_options="${cmake_common_options} -DENABLE_MSCCLPP=OFF"
fi

# Install dependencies
if [[ "${install_dependencies}" == true ]]; then
cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON"
Expand Down
7 changes: 7 additions & 0 deletions src/include/comm.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,13 @@ struct ncclComm {
// shared structures for finalization
int finalizeRankCnt;

#if defined(ENABLE_MSCCLPP)
// Whether this comm is compatible with MSCCLPP
bool mscclppCompatible;
struct mscclpp_ncclComm* mscclpp_comm;
size_t mscclpp_threshold;
#endif

// Whether this comm is compatible with MSCCL
bool mscclCompatible;
// group job to support multi-thread FT
Expand Down
Loading

0 comments on commit 6dc47ee

Please sign in to comment.