-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(dist): Add NCCL communicator and allreduce kernel
- Loading branch information
1 parent
58adc3f
commit d5e73fe
Showing
20 changed files
with
665 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# From PyTorch: | ||
# | ||
# Copyright (c) 2016- Facebook, Inc (Adam Paszke) | ||
# Copyright (c) 2014- Facebook, Inc (Soumith Chintala) | ||
# Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) | ||
# Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) | ||
# Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) | ||
# Copyright (c) 2011-2013 NYU (Clement Farabet) | ||
# Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) | ||
# Copyright (c) 2006 Idiap Research Institute (Samy Bengio) | ||
# Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) | ||
# | ||
# From Caffe2: | ||
# | ||
# Copyright (c) 2016-present, Facebook Inc. All rights reserved. | ||
# | ||
# All contributions by Facebook: | ||
# Copyright (c) 2016 Facebook Inc. | ||
# | ||
# All contributions by Google: | ||
# Copyright (c) 2015 Google Inc. | ||
# All rights reserved. | ||
# | ||
# All contributions by Yangqing Jia: | ||
# Copyright (c) 2015 Yangqing Jia | ||
# All rights reserved. | ||
# | ||
# All contributions by Kakao Brain: | ||
# Copyright 2019-2020 Kakao Brain | ||
# | ||
# All contributions from Caffe: | ||
# Copyright(c) 2013, 2014, 2015, the respective contributors | ||
# All rights reserved. | ||
# | ||
# All other contributions: | ||
# Copyright(c) 2015, 2016 the respective contributors | ||
# All rights reserved. | ||
# | ||
# Caffe2 uses a copyright model similar to Caffe: each contributor holds | ||
# copyright over their contributions to Caffe2. The project versioning records | ||
# all such contribution and copyright details. If a contributor wants to further | ||
# mark their specific copyright on a particular contribution, they should | ||
# indicate their copyright solely in the commit message of the change when it is | ||
# committed. | ||
# | ||
# All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions are met: | ||
# | ||
# 1. Redistributions of source code must retain the above copyright | ||
# notice, this list of conditions and the following disclaimer. | ||
# | ||
# 2. Redistributions in binary form must reproduce the above copyright | ||
# notice, this list of conditions and the following disclaimer in the | ||
# documentation and/or other materials provided with the distribution. | ||
# | ||
# 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America | ||
# and IDIAP Research Institute nor the names of its contributors may be | ||
# used to endorse or promote products derived from this software without | ||
# specific prior written permission. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
# POSSIBILITY OF SUCH DAMAGE. | ||
# | ||
# Find the nccl libraries | ||
# | ||
# The following variables are optionally searched for defaults | ||
# NCCL_ROOT: Base directory where all NCCL components are foundHong Xu, 1 year ago: • Let CMake handle NCCL detection instead of ou… | ||
# NCCL_INCLUDE_DIR: Directory where NCCL header is foundPieter Noordhuis, 3 years ago: • Bump gloo | ||
# NCCL_LIB_DIR: Directory where NCCL library is found | ||
# | ||
# The following are set after configuration is done: | ||
# NCCL_FOUND | ||
# NCCL_INCLUDE_DIRS | ||
# NCCL_LIBRARIES | ||
# | ||
# The path hints include CUDA_TOOLKIT_ROOT_DIR seeing as some folks | ||
# install NCCL in the same location as the CUDA toolkit. | ||
# See https://github.com/caffe2/caffe2/issues/1601 | ||
|
||
set(NCCL_INCLUDE_DIR $ENV{NCCL_INCLUDE_DIR} CACHE PATH "Folder contains NVIDIA NCCL headers") | ||
set(NCCL_LIB_DIR $ENV{NCCL_LIB_DIR} CACHE PATH "Folder contains NVIDIA NCCL libraries") | ||
set(NCCL_VERSION $ENV{NCCL_VERSION} CACHE STRING "Version of NCCL to build with") | ||
|
||
if ($ENV{NCCL_ROOT_DIR}) | ||
message(WARNING "NCCL_ROOT_DIR is deprecated. Please set NCCL_ROOT instead.") | ||
endif() | ||
list(APPEND NCCL_ROOT $ENV{NCCL_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}) | ||
# Compatible layer for CMake <3.12. NCCL_ROOT will be accounted in for searching paths and libraries for CMake >=3.12. | ||
list(APPEND CMAKE_PREFIX_PATH ${NCCL_ROOT}) | ||
|
||
find_path(NCCL_INCLUDE_DIRS | ||
NAMES nccl.h | ||
HINTS ${NCCL_INCLUDE_DIR}) | ||
|
||
if (USE_STATIC_NCCL) | ||
MESSAGE(STATUS "USE_STATIC_NCCL is set. Linking with static NCCL library.") | ||
SET(NCCL_LIBNAME "nccl_static") | ||
if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified | ||
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) | ||
endif() | ||
else() | ||
SET(NCCL_LIBNAME "nccl") | ||
if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified | ||
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) | ||
endif() | ||
endif() | ||
|
||
find_library(NCCL_LIBRARIES | ||
NAMES ${NCCL_LIBNAME} | ||
HINTS ${NCCL_LIB_DIR}) | ||
|
||
include(FindPackageHandleStandardArgs) | ||
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES) | ||
|
||
if(NCCL_FOUND) # obtaining NCCL version and some sanity checks | ||
set (NCCL_HEADER_FILE "${NCCL_INCLUDE_DIRS}/nccl.h") | ||
message (STATUS "Determining NCCL version from ${NCCL_HEADER_FILE}...") | ||
set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) | ||
list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS}) | ||
include(CheckCXXSymbolExists) | ||
check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED) | ||
|
||
if (NCCL_VERSION_DEFINED) | ||
set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc") | ||
file(WRITE ${file} " | ||
#include <iostream> | ||
#include <nccl.h> | ||
int main() | ||
{ | ||
std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl; | ||
int x; | ||
ncclGetVersion(&x); | ||
return x == NCCL_VERSION_CODE; | ||
} | ||
") | ||
try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file} | ||
RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER | ||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}" | ||
LINK_LIBRARIES ${NCCL_LIBRARIES}) | ||
if (NOT NCCL_VERSION_MATCHED) | ||
message(FATAL_ERROR "Found NCCL header version and library version do not match! \ | ||
(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.") | ||
endif() | ||
message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}") | ||
else() | ||
# message(STATUS "NCCL version < 2.3.5-5") | ||
endif () | ||
set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES}) | ||
|
||
message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})") | ||
mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#ifndef KERNEL_COMMUNICATION_ATTRIBUTES_H | ||
#define KERNEL_COMMUNICATION_ATTRIBUTES_H | ||
|
||
namespace refactor::kernel { | ||
enum class AllReduceType { | ||
Sum, | ||
Avg, | ||
Min, | ||
Max, | ||
Prod | ||
}; | ||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#ifndef KERNEL_COLLECTOR_ALL_REDUCE_H | ||
#define KERNEL_COLLECTOR_ALL_REDUCE_H | ||
|
||
#include "../collector.h" | ||
#include "../target.h" | ||
#include "kernel/attributes/communication.h" | ||
|
||
namespace refactor::kernel { | ||
|
||
struct AllReduceCollector final : public InfoCollector { | ||
Target target; | ||
AllReduceType type; | ||
|
||
constexpr AllReduceCollector(Target target_, AllReduceType type_) noexcept | ||
: InfoCollector(), target(target_), type(type_) {} | ||
|
||
std::vector<KernelBox> | ||
filter(TensorRefs inputs, TensorRefs outputs) const final; | ||
}; | ||
}// namespace refactor::kernel | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#include "kernel/collectors/all_reduce.h" | ||
#include "../kernels/all_reduce/nccl_kernel.hh" | ||
namespace refactor::kernel { | ||
std::vector<KernelBox> | ||
AllReduceCollector::filter(TensorRefs inputs, TensorRefs outputs) const { | ||
std::vector<KernelBox> ans; | ||
switch (target) { | ||
case Target::Cpu: | ||
break; | ||
case Target::NvidiaGpu: | ||
if (auto ptr = AllReduceNccl::build(type, inputs[0], outputs[0]); ptr) { | ||
ans.emplace_back(std::move(ptr)); | ||
} | ||
break; | ||
default: | ||
UNREACHABLEX(void, "Unknown target"); | ||
} | ||
return ans; | ||
} | ||
}// namespace refactor::kernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#include "nccl_kernel.hh" | ||
|
||
namespace refactor::kernel { | ||
using K = AllReduceNccl; | ||
using DT = DataType; | ||
|
||
K::AllReduceNccl(AllReduceType opType_, DT dataType_, size_t size_) noexcept | ||
: opType(opType_), dataType(dataType_), size(size_) {} | ||
|
||
auto K::build(AllReduceType opType_, Tensor const &input, Tensor const &output) noexcept -> KernelBox { | ||
#ifndef USE_CUDA | ||
return nullptr; | ||
#endif | ||
if (input.elementsSize() != output.elementsSize() || | ||
input.dataType != output.dataType) { | ||
return nullptr; | ||
} | ||
|
||
return std::make_unique<K>(opType_, input.dataType, input.elementsSize()); | ||
} | ||
|
||
auto K::typeId() noexcept -> size_t { | ||
static uint8_t ID = 1; | ||
return reinterpret_cast<size_t>(&ID); | ||
} | ||
|
||
auto K::kernelTypeId() const noexcept -> size_t { return typeId(); } | ||
auto K::description() const noexcept -> std::string_view { | ||
return "Performing AllReduce using NCCL"; | ||
} | ||
|
||
}// namespace refactor::kernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#include "nccl_kernel.hh" | ||
#include "../../utilities/cuda/nccl_communicator.hh" | ||
#include <nccl.h> | ||
namespace refactor::kernel { | ||
using K = AllReduceNccl; | ||
using DT = DataType; | ||
using namespace nccl; | ||
|
||
Routine K::lower(Resources &res) const noexcept { | ||
return [count = size, | ||
redOp = getRedOp(opType), | ||
ncclDataType = getNcclDataType(dataType)](Resources &res, void const **inputs, void **outputs) { | ||
auto communicator = res.fetch<NcclCommunicator>(); | ||
auto input = inputs[0]; | ||
auto output = outputs[0]; | ||
checkNcclError(ncclAllReduce(input, output, count, ncclDataType, | ||
redOp, communicator->get(), 0));// TODO: use default stream for now | ||
}; | ||
} | ||
}// namespace refactor::kernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#ifndef KERNEL_ALLREDUCE_NCCL_KERNEL_HH | ||
#define KERNEL_ALLREDUCE_NCCL_KERNEL_HH | ||
|
||
#include "kernel/collectors/all_reduce.h" | ||
#include "kernel/tensor.h" | ||
|
||
namespace refactor::kernel { | ||
|
||
struct AllReduceNccl final : public Kernel { | ||
DataType dataType; | ||
AllReduceType opType; | ||
size_t size; | ||
|
||
AllReduceNccl(AllReduceType, DataType, size_t) noexcept; | ||
|
||
static KernelBox build(AllReduceType, Tensor const &, Tensor const &) noexcept; | ||
static size_t typeId() noexcept; | ||
|
||
size_t kernelTypeId() const noexcept final; | ||
std::string_view description() const noexcept final; | ||
#ifdef USE_CUDA | ||
Routine lower(Resources &) const noexcept final; | ||
#endif | ||
}; | ||
|
||
}// namespace refactor::kernel | ||
|
||
#endif// KERNEL_ALLREDUCE_NCCL_KERNEL_HH |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.