Skip to content

Commit

Permalink
Merge pull request #1017 from ykempf/adapt_lb_cell_send_passes
Browse files Browse the repository at this point in the history
Split cell transfer into a number of passes in load balance.
  • Loading branch information
ykempf authored Sep 17, 2024
2 parents c81422d + 2836193 commit f995f8e
Showing 1 changed file with 51 additions and 1 deletion.
52 changes: 51 additions & 1 deletion grid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,57 @@ void balanceLoad(dccrg::Dccrg<SpatialCell,dccrg::Cartesian_Geometry>& mpiGrid, S

/*transfer cells in parts to preserve memory*/
phiprof::Timer transfersTimer {"Data transfers"};
const uint64_t num_part_transfers=5;

// Idea: do as many cell sending passes hereafter so that there's not more than transfer_block_fraction_limit
// blocks of this task's total block count that gets sent. Helps in reducing memory peaks during load balancing.
creal transfer_block_fraction_limit = 0.1;
uint64_t num_part_transfers_local = 1, num_part_transfers, outgoing_block_count = 0, total_block_count = 0;
bool count_determined = false;
Real outgoing_block_fraction;

// count blocks
for (unsigned int i=0; i<outgoing_cells_list.size(); i++) {
CellID cell_id=outgoing_cells_list[i];
SpatialCell* cell = mpiGrid[cell_id];
outgoing_block_count += cell->get_number_of_all_velocity_blocks();
}
for (unsigned int i=0; i<cells.size(); i++) {
CellID cell_id=cells[i];
SpatialCell* cell = mpiGrid[cell_id];
total_block_count += cell->get_number_of_all_velocity_blocks();
}
outgoing_block_fraction = (Real)outgoing_block_count / ((Real)total_block_count + 1);
// if we're not exceeding transfer_block_fraction_limit we're good
if(outgoing_block_fraction < transfer_block_fraction_limit) {
count_determined = true;
}
// otherwise we increase the number of chunks until all chunks are below transfer_block_fraction_limit
while(!count_determined) {
uint64_t transfer_part; // we use this in the logic after the for
for (transfer_part=0; transfer_part<num_part_transfers_local; transfer_part++) {
uint64_t transfer_part_block_count=0;
for (unsigned int i=0;i<outgoing_cells_list.size();i++){
CellID cell_id=outgoing_cells_list[i];
if (cell_id%num_part_transfers_local==transfer_part) {
transfer_part_block_count += mpiGrid[cell_id]->get_number_of_all_velocity_blocks();
}
}
outgoing_block_fraction = (Real)transfer_part_block_count / ((Real)total_block_count + 1);
if(outgoing_block_fraction > transfer_block_fraction_limit) {
num_part_transfers_local *= 2;
break; // out of for
}
}
if((transfer_part == num_part_transfers_local // either the loop ended or we hit that number with the *= 2
&& outgoing_block_fraction <= transfer_block_fraction_limit) // so cross-check with this
|| num_part_transfers_local >= cells.size()
) {
count_determined = true; // we got a break out if any chunk was still too big
}
}
// ...and finally we reduce this across all tasks of course.
MPI_Allreduce(&num_part_transfers_local, &num_part_transfers, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);

for (uint64_t transfer_part=0; transfer_part<num_part_transfers; transfer_part++) {
//Set transfers on/off for the incoming cells in this transfer set and prepare for receive
for (unsigned int i=0;i<incoming_cells_list.size();i++){
Expand Down

0 comments on commit f995f8e

Please sign in to comment.