-
Notifications
You must be signed in to change notification settings - Fork 103
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
TL/CUDA: Linear Broadcast for GPU (#948)
Adding linear CUDA Broadcast implementation with Active set feature support. It gives functional improvement, and parity with others communication libraries. - Ability to place many ranks on single GPU - No GPU blocking, communication initiated from host - Active set can be used to emulate P2P send/receive on top of broadcast collective
- Loading branch information
Showing
21 changed files
with
681 additions
and
110 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
/** | ||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
* | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#include "bcast.h" | ||
#include "components/mc/ucc_mc.h" | ||
|
||
ucc_base_coll_alg_info_t | ||
ucc_tl_cuda_bcast_algs[UCC_TL_CUDA_BCAST_ALG_LAST + 1] = { | ||
[UCC_TL_CUDA_BCAST_ALG_LINEAR] = {.id = UCC_TL_CUDA_BCAST_ALG_LINEAR, | ||
.name = "linear", | ||
.desc = "linear bcast algorithm"}, | ||
[UCC_TL_CUDA_BCAST_ALG_LAST] = {.id = 0, .name = NULL, .desc = NULL}}; | ||
|
||
ucc_status_t ucc_tl_cuda_bcast_init(ucc_base_coll_args_t *coll_args, | ||
ucc_base_team_t *tl_team, | ||
ucc_coll_task_t **task_p) | ||
{ | ||
ucc_tl_cuda_team_t *team = ucc_derived_of(tl_team, ucc_tl_cuda_team_t); | ||
|
||
if (ucc_tl_cuda_team_topo_is_fully_connected(team->topo)) { | ||
return ucc_tl_cuda_bcast_linear_init(coll_args, tl_team, task_p); | ||
} else { | ||
return UCC_ERR_NOT_SUPPORTED; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/** | ||
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
* | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#ifndef BCAST_H_ | ||
#define BCAST_H_ | ||
|
||
#include "tl_cuda.h" | ||
#include "tl_cuda_coll.h" | ||
|
||
enum | ||
{ | ||
UCC_TL_CUDA_BCAST_ALG_LINEAR, | ||
UCC_TL_CUDA_BCAST_ALG_LAST | ||
}; | ||
|
||
extern ucc_base_coll_alg_info_t | ||
ucc_tl_cuda_bcast_algs[UCC_TL_CUDA_BCAST_ALG_LAST + 1]; | ||
|
||
#define UCC_TL_CUDA_BCAST_DEFAULT_ALG_SELECT_STR "bcast:cuda:@0" | ||
|
||
ucc_status_t ucc_tl_cuda_bcast_init(ucc_base_coll_args_t *coll_args, | ||
ucc_base_team_t *tl_team, | ||
ucc_coll_task_t **task_p); | ||
|
||
ucc_status_t ucc_tl_cuda_bcast_linear_init(ucc_base_coll_args_t *coll_args, | ||
ucc_base_team_t *tl_team, | ||
ucc_coll_task_t **task_p); | ||
|
||
static inline int ucc_tl_cuda_bcast_alg_from_str(const char *str) | ||
{ | ||
int i; | ||
for (i = 0; i < UCC_TL_CUDA_BCAST_ALG_LAST; i++) { | ||
if (0 == strcasecmp(str, ucc_tl_cuda_bcast_algs[i].name)) { | ||
break; | ||
} | ||
} | ||
return i; | ||
} | ||
|
||
#endif |
Oops, something went wrong.