Skip to content

Commit

Permalink
Working ocl_lazy
Browse files Browse the repository at this point in the history
  • Loading branch information
pjdevs committed May 26, 2022
1 parent 08f3303 commit 4d7f439
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 23 deletions.
10 changes: 5 additions & 5 deletions kernel/c/life.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void life_init_ocl_lazy (void)
{
life_init();

const size_t changed_size = (DIM / TILE_W) * (DIM / TILE_H) * sizeof (unsigned);
const size_t changed_size = (GPU_SIZE_X / GPU_TILE_W) * (GPU_SIZE_Y / GPU_TILE_H) * sizeof (unsigned);

last_changed_buffer = clCreateBuffer (context, CL_MEM_READ_WRITE, changed_size, NULL, NULL);
if (!last_changed_buffer)
Expand All @@ -98,7 +98,7 @@ void life_init_ocl_lazy (void)

unsigned *tmp = malloc(changed_size);

for (unsigned i = 0; i < ((DIM / TILE_W) * (DIM / TILE_H)); ++i)
for (unsigned i = 0; i < changed_size / sizeof(unsigned); ++i)
tmp[i] = 1;

err = clEnqueueWriteBuffer (queue, last_changed_buffer, CL_TRUE, 0,
Expand Down Expand Up @@ -292,7 +292,7 @@ unsigned life_compute_omp_tiled_barrier (unsigned nb_iter)
{
temp = do_tile (x, y, TILE_W, TILE_H, omp_get_thread_num());

#pragma omp critical
#pragma omp atomic
change |= temp;
}
}
Expand Down Expand Up @@ -324,7 +324,7 @@ unsigned life_compute_omp_tiled_task (unsigned nb_iter)
#pragma omp task
{
temp = do_tile (x, y, TILE_W, TILE_H, omp_get_thread_num());
#pragma omp critical
#pragma omp atomic
change |= temp;
}
}
Expand Down Expand Up @@ -379,7 +379,7 @@ unsigned life_compute_omp_tiled_lazy (unsigned nb_iter)

next_changed_table(y / TILE_H, x / TILE_W) = temp;

#pragma omp critical
#pragma omp atomic
change |= temp;
}
}
Expand Down
39 changes: 21 additions & 18 deletions kernel/ocl/life.cl
Original file line number Diff line number Diff line change
Expand Up @@ -28,41 +28,44 @@ __kernel void life_ocl_lazy (__global unsigned *in, __global unsigned *out, __gl
int xloc = get_local_id (0);
int yloc = get_local_id (1);

if (y <= 0 && y >= DIM - 1 && x <= 0 && x >= DIM - 1)
return;

local unsigned changed;

if (xloc == 0 && yloc == 0) {
changed = 0;

for (yloc = y - 1; yloc < y + 2; ++yloc)
for (xloc = x - 1; xloc < x + 2; ++xloc)
if (yloc >= 0 && yloc < (DIM / GPU_TILE_H) && xloc >= 0 && xloc < (DIM / GPU_TILE_W))
changed |= last_changed[yloc * (DIM / GPU_TILE_H) + xloc];
for (yloc = tile_y - 1; yloc < tile_y + 2; ++yloc)
for (xloc = tile_x - 1; xloc < tile_x + 2; ++xloc)
if (yloc >= 0 && yloc < (GPU_SIZE_Y / GPU_TILE_H) && xloc >= 0 && xloc < (GPU_SIZE_Y / GPU_TILE_W))
changed |= last_changed[yloc * (GPU_SIZE_Y / GPU_TILE_H) + xloc];

changed |= last_changed[tile_y * (DIM / GPU_TILE_H) + tile_x];
changed |= last_changed[tile_y * (GPU_SIZE_Y / GPU_TILE_H) + tile_x];
}

barrier(CLK_LOCAL_MEM_FENCE);
barrier (CLK_LOCAL_MEM_FENCE);

if (!changed) {
return;
}

unsigned n = 0;
unsigned me = in[y * DIM + x];
if (y > 0 && y < DIM - 1 && x > 0 && x < DIM - 1) {
unsigned n = 0;
unsigned me = in[y * DIM + x];

for (yloc = y - 1; yloc < y + 2; yloc++)
for (xloc = x - 1; xloc < x + 2; xloc++)
n += in[yloc * DIM + xloc];
for (int yloc = y - 1; yloc < y + 2; yloc++)
for (int xloc = x - 1; xloc < x + 2; xloc++)
n += in[yloc * DIM + xloc];

n = (n == 3 + me) | (n == 3);
n = (n == 3 + me) | (n == 3);

out[y * DIM + x] = n;
out[y * DIM + x] = n;

volatile __global unsigned* changed_ptr = next_changed + tile_y * (DIM / GPU_TILE_H) + tile_x;
atomic_or(changed_ptr, n != me);
volatile __global unsigned* changed_ptr = next_changed + tile_y * (GPU_SIZE_Y / GPU_TILE_H) + tile_x;
atomic_or(changed_ptr, n != me);
}
else {
volatile __global unsigned* changed_ptr = next_changed + tile_y * (GPU_SIZE_Y / GPU_TILE_H) + tile_x;
atomic_or(changed_ptr, 0);
}
}

// DO NOT MODIFY: this kernel updates the OpenGL texture buffer
Expand Down

0 comments on commit 4d7f439

Please sign in to comment.