-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e3febd9
commit 16870d6
Showing
17 changed files
with
666 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# From PyTorch: | ||
# | ||
# Copyright (c) 2016- Facebook, Inc (Adam Paszke) | ||
# Copyright (c) 2014- Facebook, Inc (Soumith Chintala) | ||
# Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) | ||
# Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) | ||
# Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) | ||
# Copyright (c) 2011-2013 NYU (Clement Farabet) | ||
# Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) | ||
# Copyright (c) 2006 Idiap Research Institute (Samy Bengio) | ||
# Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) | ||
# | ||
# From Caffe2: | ||
# | ||
# Copyright (c) 2016-present, Facebook Inc. All rights reserved. | ||
# | ||
# All contributions by Facebook: | ||
# Copyright (c) 2016 Facebook Inc. | ||
# | ||
# All contributions by Google: | ||
# Copyright (c) 2015 Google Inc. | ||
# All rights reserved. | ||
# | ||
# All contributions by Yangqing Jia: | ||
# Copyright (c) 2015 Yangqing Jia | ||
# All rights reserved. | ||
# | ||
# All contributions by Kakao Brain: | ||
# Copyright 2019-2020 Kakao Brain | ||
# | ||
# All contributions from Caffe: | ||
# Copyright(c) 2013, 2014, 2015, the respective contributors | ||
# All rights reserved. | ||
# | ||
# All other contributions: | ||
# Copyright(c) 2015, 2016 the respective contributors | ||
# All rights reserved. | ||
# | ||
# Caffe2 uses a copyright model similar to Caffe: each contributor holds | ||
# copyright over their contributions to Caffe2. The project versioning records | ||
# all such contribution and copyright details. If a contributor wants to further | ||
# mark their specific copyright on a particular contribution, they should | ||
# indicate their copyright solely in the commit message of the change when it is | ||
# committed. | ||
# | ||
# All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions are met: | ||
# | ||
# 1. Redistributions of source code must retain the above copyright | ||
# notice, this list of conditions and the following disclaimer. | ||
# | ||
# 2. Redistributions in binary form must reproduce the above copyright | ||
# notice, this list of conditions and the following disclaimer in the | ||
# documentation and/or other materials provided with the distribution. | ||
# | ||
# 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America | ||
# and IDIAP Research Institute nor the names of its contributors may be | ||
# used to endorse or promote products derived from this software without | ||
# specific prior written permission. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
# POSSIBILITY OF SUCH DAMAGE. | ||
# | ||
# Find the nccl libraries | ||
# | ||
# The following variables are optionally searched for defaults | ||
# NCCL_ROOT: Base directory where all NCCL components are foundHong Xu, 1 year ago: • Let CMake handle NCCL detection instead of ou… | ||
# NCCL_INCLUDE_DIR: Directory where NCCL header is foundPieter Noordhuis, 3 years ago: • Bump gloo | ||
# NCCL_LIB_DIR: Directory where NCCL library is found | ||
# | ||
# The following are set after configuration is done: | ||
# NCCL_FOUND | ||
# NCCL_INCLUDE_DIRS | ||
# NCCL_LIBRARIES | ||
# | ||
# The path hints include CUDA_TOOLKIT_ROOT_DIR seeing as some folks | ||
# install NCCL in the same location as the CUDA toolkit. | ||
# See https://github.com/caffe2/caffe2/issues/1601 | ||
|
||
set(NCCL_INCLUDE_DIR $ENV{NCCL_INCLUDE_DIR} CACHE PATH "Folder contains NVIDIA NCCL headers") | ||
set(NCCL_LIB_DIR $ENV{NCCL_LIB_DIR} CACHE PATH "Folder contains NVIDIA NCCL libraries") | ||
set(NCCL_VERSION $ENV{NCCL_VERSION} CACHE STRING "Version of NCCL to build with") | ||
|
||
if ($ENV{NCCL_ROOT_DIR}) | ||
message(WARNING "NCCL_ROOT_DIR is deprecated. Please set NCCL_ROOT instead.") | ||
endif() | ||
list(APPEND NCCL_ROOT $ENV{NCCL_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}) | ||
# Compatible layer for CMake <3.12. NCCL_ROOT will be accounted in for searching paths and libraries for CMake >=3.12. | ||
list(APPEND CMAKE_PREFIX_PATH ${NCCL_ROOT}) | ||
|
||
find_path(NCCL_INCLUDE_DIRS | ||
NAMES nccl.h | ||
HINTS ${NCCL_INCLUDE_DIR}) | ||
|
||
if (USE_STATIC_NCCL) | ||
MESSAGE(STATUS "USE_STATIC_NCCL is set. Linking with static NCCL library.") | ||
SET(NCCL_LIBNAME "nccl_static") | ||
if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified | ||
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) | ||
endif() | ||
else() | ||
SET(NCCL_LIBNAME "nccl") | ||
if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified | ||
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) | ||
endif() | ||
endif() | ||
|
||
find_library(NCCL_LIBRARIES | ||
NAMES ${NCCL_LIBNAME} | ||
HINTS ${NCCL_LIB_DIR}) | ||
|
||
include(FindPackageHandleStandardArgs) | ||
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES) | ||
|
||
if(NCCL_FOUND) # obtaining NCCL version and some sanity checks | ||
set (NCCL_HEADER_FILE "${NCCL_INCLUDE_DIRS}/nccl.h") | ||
message (STATUS "Determining NCCL version from ${NCCL_HEADER_FILE}...") | ||
set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) | ||
list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS}) | ||
include(CheckCXXSymbolExists) | ||
check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED) | ||
|
||
if (NCCL_VERSION_DEFINED) | ||
set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc") | ||
file(WRITE ${file} " | ||
#include <iostream> | ||
#include <nccl.h> | ||
int main() | ||
{ | ||
std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl; | ||
int x; | ||
ncclGetVersion(&x); | ||
return x == NCCL_VERSION_CODE; | ||
} | ||
") | ||
try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file} | ||
RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER | ||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}" | ||
LINK_LIBRARIES ${NCCL_LIBRARIES}) | ||
if (NOT NCCL_VERSION_MATCHED) | ||
message(FATAL_ERROR "Found NCCL header version and library version do not match! \ | ||
(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.") | ||
endif() | ||
message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}") | ||
else() | ||
# message(STATUS "NCCL version < 2.3.5-5") | ||
endif () | ||
set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES}) | ||
|
||
message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})") | ||
mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#ifndef KERNEL_COMMUNICATION_ATTRIBUTES_H | ||
#define KERNEL_COMMUNICATION_ATTRIBUTES_H | ||
|
||
namespace refactor::kernel { | ||
enum class AllReduceType { | ||
Sum, | ||
Avg, | ||
Min, | ||
Max, | ||
Prod | ||
}; | ||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#ifndef KERNEL_COLLECTOR_ALL_REDUCE_H | ||
#define KERNEL_COLLECTOR_ALL_REDUCE_H | ||
|
||
#include "../collector.h" | ||
#include "kernel/attributes/communication.h" | ||
|
||
namespace refactor::kernel { | ||
|
||
struct AllReduceCollector final : public InfoCollector { | ||
|
||
AllReduceType type; | ||
|
||
constexpr AllReduceCollector(decltype(_target) target, AllReduceType type_) noexcept | ||
: InfoCollector(target), type(type_) {} | ||
|
||
std::vector<KernelBox> | ||
filter(TensorRefs inputs, TensorRefs outputs) const final; | ||
}; | ||
}// namespace refactor::kernel | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#include "kernel/collectors/all_reduce.h" | ||
#include "../kernels/all_reduce/nccl_kernel.hh" | ||
namespace refactor::kernel { | ||
std::vector<KernelBox> | ||
AllReduceCollector::filter(TensorRefs inputs, TensorRefs outputs) const { | ||
std::vector<KernelBox> ans; | ||
switch (_target) { | ||
case decltype(_target)::Cpu: | ||
break; | ||
case decltype(_target)::Nvidia: | ||
if (auto ptr = AllReduceNccl::build(type, inputs[0], outputs[0]); ptr) { | ||
ans.emplace_back(std::move(ptr)); | ||
} | ||
break; | ||
default: | ||
UNREACHABLEX(void, "Unknown target"); | ||
} | ||
return ans; | ||
} | ||
}// namespace refactor::kernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#include "nccl_kernel.hh" | ||
|
||
namespace refactor::kernel { | ||
using K = AllReduceNccl; | ||
using DT = DataType; | ||
|
||
K::AllReduceNccl(AllReduceType opType_, DT dataType_, size_t size_) noexcept | ||
: opType(opType_), dataType(dataType_), size(size_) {} | ||
|
||
auto K::build(AllReduceType opType_, Tensor const &input, Tensor const &output) noexcept -> KernelBox { | ||
#ifndef USE_CUDA | ||
return nullptr; | ||
#endif | ||
if (input.elementsSize() != output.elementsSize() || | ||
input.dataType != output.dataType) { | ||
return nullptr; | ||
} | ||
|
||
return std::make_unique<K>(opType_, input.dataType, input.elementsSize()); | ||
} | ||
|
||
auto K::typeId() noexcept -> size_t { | ||
static uint8_t ID = 1; | ||
return reinterpret_cast<size_t>(&ID); | ||
} | ||
|
||
auto K::kernelTypeId() const noexcept -> size_t { return typeId(); } | ||
auto K::description() const noexcept -> std::string_view { | ||
return "Performing AllReduce using NCCL"; | ||
} | ||
|
||
}// namespace refactor::kernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#include "nccl_kernel.hh" | ||
#include "../../utilities/cuda/nccl_communicator.hh" | ||
#include <nccl.h> | ||
namespace refactor::kernel { | ||
using K = AllReduceNccl; | ||
using DT = DataType; | ||
using namespace nccl; | ||
|
||
auto K::lower(Resources &res) const noexcept -> RoutineWorkspace{ | ||
return [count = size, | ||
redOp = getRedOp(opType), | ||
ncclDataType = getNcclDataType(dataType)](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) { | ||
auto communicator = res.fetch<NcclCommunicator>(); | ||
auto input = inputs[0]; | ||
auto output = outputs[0]; | ||
checkNcclError(ncclAllReduce(input, output, count, ncclDataType, | ||
redOp, communicator->get(), 0));// TODO: use default stream for now | ||
}; | ||
} | ||
}// namespace refactor::kernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#ifndef KERNEL_ALLREDUCE_NCCL_KERNEL_HH | ||
#define KERNEL_ALLREDUCE_NCCL_KERNEL_HH | ||
|
||
#include "kernel/collectors/all_reduce.h" | ||
#include "kernel/tensor.h" | ||
|
||
namespace refactor::kernel { | ||
|
||
struct AllReduceNccl final : public Kernel { | ||
AllReduceType opType; | ||
DataType dataType; | ||
size_t size; | ||
|
||
AllReduceNccl(AllReduceType, DataType, size_t) noexcept; | ||
|
||
static KernelBox build(AllReduceType, Tensor const &, Tensor const &) noexcept; | ||
static size_t typeId() noexcept; | ||
|
||
size_t kernelTypeId() const noexcept final; | ||
std::string_view description() const noexcept final; | ||
#ifdef USE_CUDA | ||
RoutineWorkspace lower(Resources &) const noexcept final; | ||
#endif | ||
}; | ||
|
||
}// namespace refactor::kernel | ||
|
||
#endif// KERNEL_ALLREDUCE_NCCL_KERNEL_HH |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#include "common.h" | ||
#include "nccl_communicator.hh" | ||
#include <chrono> | ||
#include <cstdlib> | ||
#include <filesystem> | ||
#include <fstream> | ||
#include <thread> | ||
|
||
|
||
namespace refactor::kernel::nccl { | ||
NcclCommunicator::NcclCommunicator(int worldSize, int rank) : worldSize_(worldSize), rank_(rank) { | ||
const std::string filePath("./nccl_id.bin"); | ||
|
||
ncclUniqueId commId; | ||
|
||
if (rank == 0) { | ||
checkNcclError(ncclGetUniqueId(&commId)); | ||
std::ofstream ofs(filePath, std::ios::binary); | ||
ofs.write((char *) &commId, sizeof(ncclUniqueId)); | ||
|
||
} else { | ||
auto begin = std::chrono::steady_clock::now(); | ||
while (!std::filesystem::exists(filePath)) { | ||
auto now = std::chrono::steady_clock::now(); | ||
ASSERT(now < begin + std::chrono::seconds(10), | ||
"time limit (10s) exceeded."); | ||
std::this_thread::sleep_for(std::chrono::milliseconds(100)); | ||
} | ||
std::ifstream ifs(filePath, std::ios::binary); | ||
ifs.read((char *) &commId, sizeof(ncclUniqueId)); | ||
} | ||
checkNcclError(ncclCommInitRank(&comm, worldSize, commId, rank)); | ||
|
||
if (rank == 0) { | ||
std::filesystem::remove(filePath); | ||
} | ||
|
||
printf("Rank %d established NCCL communicator.\n", rank); | ||
} | ||
|
||
NcclCommunicator::~NcclCommunicator() { | ||
checkNcclError(ncclCommFinalize(comm)); | ||
checkNcclError(ncclCommDestroy(comm)); | ||
} | ||
|
||
auto NcclCommunicator::typeId() noexcept -> size_t { | ||
static uint8_t ID = 1; | ||
return reinterpret_cast<size_t>(&ID); | ||
} | ||
auto NcclCommunicator::build(int worldSize, int rank) noexcept -> runtime::ResourceBox { | ||
return std::make_unique<NcclCommunicator>(worldSize, rank); | ||
} | ||
|
||
auto NcclCommunicator::resourceTypeId() const noexcept -> size_t { | ||
return typeId(); | ||
} | ||
auto NcclCommunicator::description() const noexcept -> std::string_view { | ||
return "NcclCommunicator"; | ||
} | ||
|
||
}// namespace refactor::kernel::nccl |
Oops, something went wrong.