diff --git a/meson.build b/meson.build index 7a42537a23..312d9f22a6 100644 --- a/meson.build +++ b/meson.build @@ -292,9 +292,13 @@ simd_sse2_neon_flags = [] if host_machine.cpu_family() == 'arm' # first check if compiler supports the flag, and use it then. Needed only # on 32-bit armv7. - flag = '-mfpu=neon' - if cc.has_argument(flag) - simd_sse2_neon_flags += flag + flags = ['-mfpu=neon', '-march=armv7-a'] + if ( + cc.has_argument(flags[0]) + and cc.has_argument(flags[1]) + and host_machine.cpu() == 'armv7l' + ) + simd_sse2_neon_flags += flags simd_sse2_neon = true endif elif host_machine.cpu_family() == 'aarch64' diff --git a/src_c/simd_fill.h b/src_c/simd_fill.h index 50157b917c..db80008c1d 100644 --- a/src_c/simd_fill.h +++ b/src_c/simd_fill.h @@ -21,11 +21,6 @@ #endif #endif -#if PG_ENABLE_ARM_NEON -// sse2neon.h is from here: https://github.com/DLTcollab/sse2neon -#include "include/sse2neon.h" -#endif /* PG_ENABLE_ARM_NEON */ - #if defined(__SSE2__) #define PG_ENABLE_SSE_NEON 1 #elif PG_ENABLE_ARM_NEON diff --git a/src_c/simd_shared.h b/src_c/simd_shared.h index d53ee5beb0..a697fffb0c 100644 --- a/src_c/simd_shared.h +++ b/src_c/simd_shared.h @@ -33,11 +33,6 @@ pg_has_avx2(); #endif #endif -#if PG_ENABLE_ARM_NEON -// sse2neon.h is from here: https://github.com/DLTcollab/sse2neon -#include "include/sse2neon.h" -#endif /* PG_ENABLE_ARM_NEON */ - /* This defines PG_ENABLE_SSE_NEON as True if either SSE or NEON is available * at compile time. Since we do compile time translation of SSE2->NEON, they * have the same code paths, so this reduces code duplication of those paths. diff --git a/src_c/simd_surface_fill_sse2.c b/src_c/simd_surface_fill_sse2.c index f095462c78..2d8a7e17b8 100644 --- a/src_c/simd_surface_fill_sse2.c +++ b/src_c/simd_surface_fill_sse2.c @@ -1,5 +1,10 @@ #include "simd_fill.h" +#if PG_ENABLE_ARM_NEON +// sse2neon.h is from here: https://github.com/DLTcollab/sse2neon +#include "include/sse2neon.h" +#endif /* PG_ENABLE_ARM_NEON */ + #define BAD_SSE2_FUNCTION_CALL \ printf( \ "Fatal Error: Attempted calling an SSE2 function when both compile " \