Skip to content

Commit

Permalink
Capturing some debugging code.
Browse files Browse the repository at this point in the history
  • Loading branch information
bjpalmer committed Dec 13, 2024
1 parent 51d1582 commit a2f76a0
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 32 deletions.
39 changes: 21 additions & 18 deletions comex/src-mpi-pr/comex.c
Original file line number Diff line number Diff line change
Expand Up @@ -2804,13 +2804,13 @@ int comex_malloc(void *ptrs[], size_t size, comex_group_t group)
reg_entries_local[reg_entries_local_count++] = reg_entries[i];
}
}
// else if (g_state.hostid[reg_entries[i].rank]
// == g_state.hostid[my_world_rank])
else if (g_state.hostid[reg_entries[i].rank]
== g_state.hostid[my_world_rank])

else if (g_state.master[reg_entries[i].rank] ==
g_state.master[get_my_master_rank_with_same_hostid(g_state.rank,
g_state.node_size, smallest_rank_with_same_hostid, largest_rank_with_same_hostid,
num_progress_ranks_per_node, is_node_ranks_packed)] )
// else if (g_state.master[reg_entries[i].rank] ==
// g_state.master[get_my_master_rank_with_same_hostid(g_state.rank,
// g_state.node_size, smallest_rank_with_same_hostid, largest_rank_with_same_hostid,
// num_progress_ranks_per_node, is_node_ranks_packed)] )
#if 0
#if MASTER_IS_SMALLEST_SMP_RANK
else if (g_state.master[reg_entries[i].rank] ==
Expand Down Expand Up @@ -3215,6 +3215,9 @@ int comex_malloc_dev(void *ptrs[], size_t size, comex_group_t group)
reg_entries_local[reg_entries_local_count++] = reg_entries[i];
}
}
// else if (g_state.hostid[reg_entries[i].rank]
// == g_state.hostid[my_world_rank])

/* my master is the same as the master of the notifier */
else if (g_state.master[reg_entries[i].rank] ==
g_state.master[get_my_master_rank_with_same_hostid(g_state.rank,
Expand Down Expand Up @@ -7501,8 +7504,8 @@ STATIC void nb_put(void *src, void *dst, int bytes, int proc, nb_t *nb)

if (COMEX_ENABLE_PUT_SMP) {
/* put to SMP node */
// if (g_state.hostid[proc] == g_state.hostid[g_state.rank])
if (g_state.master[proc] == g_state.master[g_state.rank])
if (g_state.hostid[proc] == g_state.hostid[g_state.rank])
// if (g_state.master[proc] == g_state.master[g_state.rank])
{
reg_entry_t *reg_entry = NULL;
void *mapped_offset = NULL;
Expand Down Expand Up @@ -7714,8 +7717,8 @@ STATIC void nb_get(void *src, void *dst, int bytes, int proc, nb_t *nb)

if (COMEX_ENABLE_GET_SMP) {
/* get from SMP node */
// if (g_state.hostid[proc] == g_state.hostid[g_state.rank])
if (g_state.master[proc] == g_state.master[g_state.rank])
if (g_state.hostid[proc] == g_state.hostid[g_state.rank])
// if (g_state.master[proc] == g_state.master[g_state.rank])
{
reg_entry_t *reg_entry = NULL;
void *mapped_offset = NULL;
Expand Down Expand Up @@ -7905,8 +7908,8 @@ STATIC void nb_acc(int datatype, void *scale,

if (COMEX_ENABLE_ACC_SMP) {
/* acc to same SMP node */
// if (g_state.hostid[proc] == g_state.hostid[g_state.rank])
if (g_state.master[proc] == g_state.master[g_state.rank])
if (g_state.hostid[proc] == g_state.hostid[g_state.rank])
// if (g_state.master[proc] == g_state.master[g_state.rank])
{
reg_entry_t *reg_entry = NULL;
void *mapped_offset = NULL;
Expand Down Expand Up @@ -9585,8 +9588,8 @@ STATIC void nb_putv(
}
#endif
} else if (COMEX_ENABLE_PUT_SMP &&
// g_state.hostid[proc] == g_state.hostid[g_state.rank]) {
g_state.master[proc] == g_state.master[g_state.rank]) {
g_state.hostid[proc] == g_state.hostid[g_state.rank]) {
// g_state.master[proc] == g_state.master[g_state.rank]) {
/* put to process on same SMP node */
int j;
void **src;
Expand Down Expand Up @@ -9918,8 +9921,8 @@ STATIC void nb_getv(
#endif
}
else if (COMEX_ENABLE_GET_SMP &&
// g_state.hostid[proc] == g_state.hostid[g_state.rank]) {
g_state.master[proc] == g_state.master[g_state.rank]) {
g_state.hostid[proc] == g_state.hostid[g_state.rank]) {
// g_state.master[proc] == g_state.master[g_state.rank]) {
int j;
void **src;
void **dst;
Expand Down Expand Up @@ -10237,8 +10240,8 @@ STATIC void nb_accv(
#endif
}
else if (COMEX_ENABLE_PUT_SMP &&
// g_state.hostid[proc] == g_state.hostid[g_state.rank]) {
g_state.master[proc] == g_state.master[g_state.rank]) {
g_state.hostid[proc] == g_state.hostid[g_state.rank]) {
// g_state.master[proc] == g_state.master[g_state.rank]) {
if (fence_array[g_state.master[proc]]) {
_fence_master(g_state.master[proc]);
}
Expand Down
151 changes: 137 additions & 14 deletions global/testing/c_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,13 @@ void test_int_array(int on_device, int local_buf_on_device)
double tbeg;
double zero = 0.0;
int ok;

int me, lprocs, lpdx, lpdy;

/* get local rank and default group size */
me = GA_Nodeid();
lprocs = GA_Nnodes();
factor(lprocs,&lpdx,&lpdy);
printf("p[%d] lprocs: %d lpdx: %d lpdy: %d\n",wrank,lprocs,lpdx,lpdy);
tput = 0.0;
tget = 0.0;
tacc = 0.0;
Expand All @@ -182,29 +188,32 @@ void test_int_array(int on_device, int local_buf_on_device)
ndim = 2;
dims[0] = DIMSIZE;
dims[1] = DIMSIZE;
xinc = DIMSIZE/pdx;
yinc = DIMSIZE/pdy;
ipx = rank%pdx;
ipy = (rank-ipx)/pdx;
isx = (ipx+1)%pdx;
isy = (ipy+1)%pdy;
xinc = DIMSIZE/lpdx;
yinc = DIMSIZE/lpdy;
ipx = me%lpdx;
ipy = (me-ipx)/lpdx;
isx = (ipx+1)%lpdx;
isy = (ipy+1)%lpdy;
/* Guarantee some data exchange between nodes */
lo[0] = isx*xinc;
lo[1] = isy*yinc;
if (isx<pdx-1) {
if (isx<lpdx-1) {
hi[0] = (isx+1)*xinc-1;
} else {
hi[0] = DIMSIZE-1;
}
if (isy<pdy-1) {
if (isy<lpdy-1) {
hi[1] = (isy+1)*yinc-1;
} else {
hi[1] = DIMSIZE-1;
}
printf("p[%d] lo[0]: %d hi[0]: %d lo[1]: %d hi[i]: %d\n",
wrank,lo[0],hi[0],lo[1],hi[1]);
nelem = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1);

/* create a global array and initialize it to zero */
tbeg = GA_Wtime();
printf("p[%d] Got to 1\n",wrank);
g_a = NGA_Create_handle();
NGA_Set_data(g_a, ndim, dims, C_INT);
if (!on_device) {
Expand All @@ -214,6 +223,7 @@ void test_int_array(int on_device, int local_buf_on_device)
NGA_Allocate(g_a);
t_create += (GA_Wtime()-tbeg);

printf("p[%d] Got to 2\n",wrank);
/* allocate a local buffer and initialize it with values*/
nsize = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1);
if (local_buf_on_device) {
Expand All @@ -227,8 +237,10 @@ void test_int_array(int on_device, int local_buf_on_device)

for (n=0; n<NLOOP; n++) {
tbeg = GA_Wtime();
printf("p[%d] Got to 3\n",wrank);
GA_Zero(g_a);
GA_Fill(g_a,&zero);
printf("p[%d] Got to 4\n",wrank);
ld = (hi[1]-lo[1]+1);
if (local_buf_on_device) {
if (lo[0]<=hi[0] && lo[1]<=hi[1]) {
Expand Down Expand Up @@ -258,17 +270,20 @@ void test_int_array(int on_device, int local_buf_on_device)
t_chk += (GA_Wtime()-tbeg);
/* copy data to global array */
tbeg = GA_Wtime();
printf("p[%d] Got to 5\n",wrank);
NGA_Put(g_a, lo, hi, buf, &ld);
printf("p[%d] Got to 6\n",wrank);
tput += (GA_Wtime()-tbeg);
t_put += (GA_Wtime()-tbeg);
put_cnt += nsize;
tbeg = GA_Wtime();
GA_Sync();
t_sync += (GA_Wtime()-tbeg);
tbeg = GA_Wtime();
NGA_Distribution(g_a,rank,tlo,thi);
NGA_Distribution(g_a,me,tlo,thi);
printf("p[%d] Got to 7\n",wrank);
#if 1
if (rank == 0 && n == 0) printf("Completed NGA_Distribution\n");
if (me == 0 && n == 0) printf("Completed NGA_Distribution\n");
if (tlo[0]<=thi[0] && tlo[1]<=thi[1]) {
int tnelem = (thi[0]-tlo[0]+1)*(thi[1]-tlo[1]+1);
if (on_device) {
Expand Down Expand Up @@ -306,6 +321,7 @@ void test_int_array(int on_device, int local_buf_on_device)
GA_Sync();
t_sync += (GA_Wtime()-tbeg);
#endif
printf("p[%d] Got to 8\n",wrank);

/* zero out local buffer */
if (local_buf_on_device) {
Expand Down Expand Up @@ -509,8 +525,9 @@ void print_bw()
{
int iB = 1<<20;
double rB = 1.0/((double)iB);
int me = GA_Nodeid();
/* 12345678901234567890123456789012345678901234567890123456789012345 */
if (rank != 0) return;
if (me != 0) return;

printf("///////////////////////////////////////////////////////////////\n");
printf("// Put (MB/sec) // Get (MB/sec) // Acc (MB/sec) //\n");
Expand Down Expand Up @@ -1825,7 +1842,7 @@ int main(int argc, char **argv) {
printf("\n 1D arrays are of size %d\n",BLOCK1*nprocs);
printf("\n Number of loops in each test %d\n\n",NLOOP);
}
#if 1
#if 0
if (rank == 0) printf(" Testing integer array on device, local buffer on host\n");
test_int_array(1,0);
print_bw();
Expand Down Expand Up @@ -1908,7 +1925,6 @@ int main(int argc, char **argv) {
test_dbl_1d_array(0,local_buf_on_device);
print_bw();

#endif
if (rank == 0) printf(" Testing scatter/gather operations for double"
" array\n on device, local buffer on host\n");
test_dbl_scatter(1,0);
Expand All @@ -1929,6 +1945,113 @@ int main(int argc, char **argv) {
test_dbl_scatter(0,local_buf_on_device);
print_bw();

#endif
#if 1
/* Check to see if code runs on subgroups */
/* Don't run this test on an odd number of processors */
if (nprocs%2 == 0) {
int grp_size = nprocs/2;
int grp, world;
int *llist, *devIDs;
int ndev, icnt;
int me;
if (rank == 0) printf(" Test operations on subgroup\n");
free(list);
list = (int*)malloc(grp_size*sizeof(int));
for (i=0; i<grp_size; i++) {
if (rank < grp_size) {
list[i] = i;
} else {
list[i] = i + grp_size;
}
}
grp = GA_Pgroup_create(list, grp_size);
world = GA_Pgroup_get_default();
GA_Pgroup_set_default(grp);
me = GA_Nodeid();
printf("p[%d] rank: %d me: %d\n",wrank,rank,me);
printf("p[%d] world: %d grp_size: %d\n",wrank,world,grp_size);
llist = (int*)malloc(grp_size*sizeof(int));
devIDs = (int*)malloc(grp_size*sizeof(int));
GA_Device_host_list(llist, devIDs, &ndev, grp);
printf("p[%d] ndev: %d\n",wrank,ndev);
for (i=0; i<ndev; i++) {
printf("p[%d] list[%d]: %d devIDs[%d]: %d\n",wrank,i,llist[i],i,devIDs[i]);
}
free(llist);
free(devIDs);
if (ndev == grp_size) {
for (i=0; i<grp_size; i++) {
list[i] = i;
}
#if 1
if (rank < grp_size) {
if (me == 0) printf(" Testing operations on group 1\n");
#if 0
if (me == 0) printf(" Group 1: Testing integer array on"
" device, local buffer on host\n");
test_int_array(1,0);
print_bw();

if (me == 0) printf(" Group 1: Testing integer array on"
" device, local buffer on device\n");
test_int_array(1,local_buf_on_device);
print_bw();

#endif
if (me == 0) printf(" Group 1: Testing integer array on"
" host, local buffer on host\n");
test_int_array(0,0);
print_bw();
#if 0

if (me == 0) printf(" Group 1: Testing integer array on"
" host, local buffer on device\n");
test_int_array(0,local_buf_on_device);
print_bw();
#endif
}
#endif
fflush(stdout);
GA_Pgroup_sync(world);
#if 1
if (rank >= grp_size) {
if (me == 0) printf(" Testing operations on group 2\n");
#if 0
if (me == 0) printf(" Group 2: Testing integer array on"
" device, local buffer on host\n");
test_int_array(1,0);
print_bw();

if (me == 0) printf(" Group 2: Testing integer array on"
" device, local buffer on device\n");
test_int_array(1,local_buf_on_device);
print_bw();

#endif
if (me == 0) printf(" Group 2: Testing integer array on"
" host, local buffer on host\n");
test_int_array(0,0);
print_bw();

#if 0
if (me == 0) printf(" Group 2: Testing integer array on"
" host, local buffer on device\n");
test_int_array(0,local_buf_on_device);
print_bw();
#endif
}
#endif
} else {
printf("p[%d] Number of processors in group does not match"
" number of devices. Ndev: %d Grp_size: %d\n",rank,ndev,grp_size);
}
GA_Pgroup_set_default(world);
free(list);
}
#endif


t_tot = GA_Wtime()-tbeg;
/* Print out timing stats */
GA_Dgop(&t_put,1,"+");
Expand Down

0 comments on commit a2f76a0

Please sign in to comment.