From 4d7f4394d0e15ac84d5b45af83523ca551d3fd3d Mon Sep 17 00:00:00 2001 From: pjdevs Date: Thu, 26 May 2022 19:39:53 +0200 Subject: [PATCH] Working ocl_lazy --- kernel/c/life.c | 10 +++++----- kernel/ocl/life.cl | 39 +++++++++++++++++++++------------------ 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/kernel/c/life.c b/kernel/c/life.c index 2fe41c7..0de809d 100644 --- a/kernel/c/life.c +++ b/kernel/c/life.c @@ -84,7 +84,7 @@ void life_init_ocl_lazy (void) { life_init(); - const size_t changed_size = (DIM / TILE_W) * (DIM / TILE_H) * sizeof (unsigned); + const size_t changed_size = (GPU_SIZE_X / GPU_TILE_W) * (GPU_SIZE_Y / GPU_TILE_H) * sizeof (unsigned); last_changed_buffer = clCreateBuffer (context, CL_MEM_READ_WRITE, changed_size, NULL, NULL); if (!last_changed_buffer) @@ -98,7 +98,7 @@ void life_init_ocl_lazy (void) unsigned *tmp = malloc(changed_size); - for (unsigned i = 0; i < ((DIM / TILE_W) * (DIM / TILE_H)); ++i) + for (unsigned i = 0; i < changed_size / sizeof(unsigned); ++i) tmp[i] = 1; err = clEnqueueWriteBuffer (queue, last_changed_buffer, CL_TRUE, 0, @@ -292,7 +292,7 @@ unsigned life_compute_omp_tiled_barrier (unsigned nb_iter) { temp = do_tile (x, y, TILE_W, TILE_H, omp_get_thread_num()); - #pragma omp critical + #pragma omp atomic change |= temp; } } @@ -324,7 +324,7 @@ unsigned life_compute_omp_tiled_task (unsigned nb_iter) #pragma omp task { temp = do_tile (x, y, TILE_W, TILE_H, omp_get_thread_num()); - #pragma omp critical + #pragma omp atomic change |= temp; } } @@ -379,7 +379,7 @@ unsigned life_compute_omp_tiled_lazy (unsigned nb_iter) next_changed_table(y / TILE_H, x / TILE_W) = temp; - #pragma omp critical + #pragma omp atomic change |= temp; } } diff --git a/kernel/ocl/life.cl b/kernel/ocl/life.cl index 06ba114..0c483b9 100644 --- a/kernel/ocl/life.cl +++ b/kernel/ocl/life.cl @@ -28,41 +28,44 @@ __kernel void life_ocl_lazy (__global unsigned *in, __global unsigned *out, __gl int xloc = get_local_id (0); int yloc = get_local_id (1); - if (y <= 0 && y >= DIM - 1 && x <= 0 && x >= DIM - 1) - return; - local unsigned changed; if (xloc == 0 && yloc == 0) { changed = 0; - for (yloc = y - 1; yloc < y + 2; ++yloc) - for (xloc = x - 1; xloc < x + 2; ++xloc) - if (yloc >= 0 && yloc < (DIM / GPU_TILE_H) && xloc >= 0 && xloc < (DIM / GPU_TILE_W)) - changed |= last_changed[yloc * (DIM / GPU_TILE_H) + xloc]; + for (yloc = tile_y - 1; yloc < tile_y + 2; ++yloc) + for (xloc = tile_x - 1; xloc < tile_x + 2; ++xloc) + if (yloc >= 0 && yloc < (GPU_SIZE_Y / GPU_TILE_H) && xloc >= 0 && xloc < (GPU_SIZE_Y / GPU_TILE_W)) + changed |= last_changed[yloc * (GPU_SIZE_Y / GPU_TILE_H) + xloc]; - changed |= last_changed[tile_y * (DIM / GPU_TILE_H) + tile_x]; + changed |= last_changed[tile_y * (GPU_SIZE_Y / GPU_TILE_H) + tile_x]; } - barrier(CLK_LOCAL_MEM_FENCE); + barrier (CLK_LOCAL_MEM_FENCE); if (!changed) { return; } - unsigned n = 0; - unsigned me = in[y * DIM + x]; + if (y > 0 && y < DIM - 1 && x > 0 && x < DIM - 1) { + unsigned n = 0; + unsigned me = in[y * DIM + x]; - for (yloc = y - 1; yloc < y + 2; yloc++) - for (xloc = x - 1; xloc < x + 2; xloc++) - n += in[yloc * DIM + xloc]; + for (int yloc = y - 1; yloc < y + 2; yloc++) + for (int xloc = x - 1; xloc < x + 2; xloc++) + n += in[yloc * DIM + xloc]; - n = (n == 3 + me) | (n == 3); + n = (n == 3 + me) | (n == 3); - out[y * DIM + x] = n; + out[y * DIM + x] = n; - volatile __global unsigned* changed_ptr = next_changed + tile_y * (DIM / GPU_TILE_H) + tile_x; - atomic_or(changed_ptr, n != me); + volatile __global unsigned* changed_ptr = next_changed + tile_y * (GPU_SIZE_Y / GPU_TILE_H) + tile_x; + atomic_or(changed_ptr, n != me); + } + else { + volatile __global unsigned* changed_ptr = next_changed + tile_y * (GPU_SIZE_Y / GPU_TILE_H) + tile_x; + atomic_or(changed_ptr, 0); + } } // DO NOT MODIFY: this kernel updates the OpenGL texture buffer