From 1e75d637065f4c0969b2d83f4f4cbe96ea24ebc9 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Sun, 15 Oct 2023 11:14:05 +0200 Subject: [PATCH] removed a comment, simplified bpp calculations --- src_c/simd_fill.h | 2 -- src_c/simd_surface_fill_avx2.c | 13 +++++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src_c/simd_fill.h b/src_c/simd_fill.h index 36d98afb09..05c30d095a 100644 --- a/src_c/simd_fill.h +++ b/src_c/simd_fill.h @@ -4,8 +4,6 @@ int _pg_has_avx2(); -// SSE2 functions - // AVX2 functions int surface_fill_blend_add_avx2(SDL_Surface *surface, SDL_Rect *rect, diff --git a/src_c/simd_surface_fill_avx2.c b/src_c/simd_surface_fill_avx2.c index aedfaffb11..101a20a2a0 100644 --- a/src_c/simd_surface_fill_avx2.c +++ b/src_c/simd_surface_fill_avx2.c @@ -29,19 +29,16 @@ _pg_has_avx2() #define SETUP_AVX2_FILLER(COLOR_PROCESS_CODE) \ /* initialize surface data */ \ int width = rect->w, height = rect->h; \ - int bpp = surface->format->BytesPerPixel; \ - int skip = (surface->pitch - width * bpp) >> 2; \ - int pxl_skip = bpp >> 2; \ + int skip = surface->pitch / 4 - width; \ /* indicates the number of pixels that can't be processed in 8-pixel \ * blocks */ \ int pxl_excess = width % 8; \ /* indicates the number of 8-pixel blocks that can be processed */ \ int n_iters_8 = width / 8; \ - int excess_skip = pxl_excess * pxl_skip, block_skip = pxl_skip * 8; \ int i; \ /* load pixel data */ \ - Uint32 *pixels = (Uint32 *)surface->pixels + \ - rect->y * (surface->pitch >> 2) + rect->x * pxl_skip; \ + Uint32 *pixels = \ + (Uint32 *)surface->pixels + rect->y * (surface->pitch / 4) + rect->x; \ \ __m256i mm256_dst; \ __m256i mask = \ @@ -69,7 +66,7 @@ _pg_has_avx2() /* store 8 pixels */ \ _mm256_storeu_si256((__m256i *)pixels, mm256_dst); \ \ - pixels += block_skip; \ + pixels += 8; \ } \ \ if (pxl_excess) { \ @@ -81,7 +78,7 @@ _pg_has_avx2() /* store up to 7 pixels */ \ _mm256_maskstore_epi32((int *)pixels, mask, mm256_dst); \ \ - pixels += excess_skip; \ + pixels += pxl_excess; \ } \ \ pixels += skip; \