zilliztech
Showing with 106 additions and 95 deletions.

+0 −2 CMakeLists.txt

+1 −1 ci/docker/builder/cpu/ubuntu22.04/arm64/Dockerfile

+1 −1 cmake/libs/libfaiss.cmake

+20 −12 src/simd/distances_sve.cc

+20 −12 src/simd/distances_sve.h

+64 −67 src/simd/hook.cc
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -262,5 +262,3 @@ install(TARGETS knowhere
         DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
 install(DIRECTORY "${PROJECT_SOURCE_DIR}/include/knowhere"
         DESTINATION "${CMAKE_INSTALL_PREFIX}/include")
-
-
diff --git a/ci/docker/builder/cpu/ubuntu22.04/arm64/Dockerfile b/ci/docker/builder/cpu/ubuntu22.04/arm64/Dockerfile
@@ -17,7 +17,7 @@ RUN apt update \
     && unlink gcc && ln -s gcc-12 gcc \
     && unlink g++ && ln -s g++-12 g++ \
     && rm -rf /usr/lib/gcc/aarch64-linux-gnu/11 \ #Installed by default on ubuntu22.04, need to remove to fetch latest SVE header.
-    && unlink python3 && ln -s python3.11 python3 \ 
+    && unlink python3 && ln -s python3.11 python3 \
     && curl -sS https://bootstrap.pypa.io/get-pip.py | python3 \
     && export PATH=$PATH:$HOME/.local/bin \
     && pip3 install wheel \

diff --git a/cmake/libs/libfaiss.cmake b/cmake/libs/libfaiss.cmake
@@ -52,7 +52,7 @@ if(__AARCH64)
   # Add separate utils for NEON and SVE
   add_library(utils_neon OBJECT src/simd/distances_neon.cc)
   add_library(utils_sve OBJECT src/simd/distances_sve.cc)
-  
+
   target_compile_options(utils_neon PRIVATE -march=armv8-a+simd)
   target_compile_options(utils_sve PRIVATE -march=armv8-a+sve)
 

diff --git a/src/simd/distances_sve.cc b/src/simd/distances_sve.cc
@@ -9,16 +9,19 @@
 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 // or implied. See the License for the specific language governing permissions and limitations under the License.
 
-
 #include "distances_sve.h"
+
 #include <arm_sve.h>
+
 #include <cmath>
+
 #include "faiss/impl/platform_macros.h"
 #pragma GCC optimize("O3,fast-math,inline")
 #if defined(__ARM_FEATURE_SVE)
 namespace faiss {
 
-float fvec_L2sqr_sve(const float* x, const float* y, size_t d) {
+float
+fvec_L2sqr_sve(const float* x, const float* y, size_t d) {
     svfloat32_t sum = svdup_f32(0.0f);
     size_t i = 0;
 
@@ -38,7 +41,8 @@ float fvec_L2sqr_sve(const float* x, const float* y, size_t d) {
     return svaddv_f32(svptrue_b32(), sum);
 }
 
-float fvec_L1_sve(const float* x, const float* y, size_t d) {
+float
+fvec_L1_sve(const float* x, const float* y, size_t d) {
     svfloat32_t sum = svdup_f32(0.0f);
     size_t i = 0;
 
@@ -58,7 +62,8 @@ float fvec_L1_sve(const float* x, const float* y, size_t d) {
     return svaddv_f32(svptrue_b32(), sum);
 }
 
-float fvec_Linf_sve(const float* x, const float* y, size_t d) {
+float
+fvec_Linf_sve(const float* x, const float* y, size_t d) {
     svfloat32_t max_val = svdup_f32(0.0f);
     size_t i = 0;
 
@@ -78,7 +83,8 @@ float fvec_Linf_sve(const float* x, const float* y, size_t d) {
     return svmaxv_f32(svptrue_b32(), max_val);
 }
 
-float fvec_norm_L2sqr_sve(const float* x, size_t d) {
+float
+fvec_norm_L2sqr_sve(const float* x, size_t d) {
     svfloat32_t sum = svdup_f32(0.0f);
     size_t i = 0;
 
@@ -96,7 +102,8 @@ float fvec_norm_L2sqr_sve(const float* x, size_t d) {
     return svaddv_f32(svptrue_b32(), sum);
 }
 
-void fvec_madd_sve(size_t n, const float* a, float bf, const float* b, float* c) {
+void
+fvec_madd_sve(size_t n, const float* a, float bf, const float* b, float* c) {
     size_t i = 0;
     svfloat32_t bf_vec = svdup_f32(bf);
 
@@ -114,7 +121,8 @@ void fvec_madd_sve(size_t n, const float* a, float bf, const float* b, float* c)
     }
 }
 
-int fvec_madd_and_argmin_sve(size_t n, const float* a, float bf, const float* b, float* c) {
+int
+fvec_madd_and_argmin_sve(size_t n, const float* a, float bf, const float* b, float* c) {
     size_t i = 0;
     svfloat32_t min_val = svdup_f32(INFINITY);
     svuint32_t min_idx = svdup_u32(0);
@@ -147,8 +155,9 @@ int fvec_madd_and_argmin_sve(size_t n, const float* a, float bf, const float* b,
     return static_cast<int>(min_index);
 }
 
-void fvec_L2sqr_batch_4_sve(const float* x, const float* y0, const float* y1, const float* y2, const float* y3,
-                            const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
+void
+fvec_L2sqr_batch_4_sve(const float* x, const float* y0, const float* y1, const float* y2, const float* y3,
+                       const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
     float d0 = 0;
     float d1 = 0;
     float d2 = 0;
@@ -171,7 +180,6 @@ void fvec_L2sqr_batch_4_sve(const float* x, const float* y0, const float* y1, co
     dis3 = d3;
 }
 
+}  // namespace faiss
 
-} // namespace faiss
-
-#endif
+#endif
diff --git a/src/simd/distances_sve.h b/src/simd/distances_sve.h
@@ -9,29 +9,37 @@
 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 // or implied. See the License for the specific language governing permissions and limitations under the License.
 
+#include <arm_sve.h>
 
 #include <cstdint>
 #include <cstdio>
-#include <arm_sve.h>
 #if defined(__ARM_FEATURE_SVE)
 namespace faiss {
 
-float fvec_L2sqr_sve(const float* x, const float* y, size_t d);
+float
+fvec_L2sqr_sve(const float* x, const float* y, size_t d);
 
-float fvec_L1_sve(const float* x, const float* y, size_t d);
+float
+fvec_L1_sve(const float* x, const float* y, size_t d);
 
-float fvec_Linf_sve(const float* x, const float* y, size_t d);
+float
+fvec_Linf_sve(const float* x, const float* y, size_t d);
 
-float fvec_norm_L2sqr_sve(const float* x, size_t d);
+float
+fvec_norm_L2sqr_sve(const float* x, size_t d);
 
-void fvec_madd_sve(size_t n, const float* a, float bf, const float* b, float* c);
+void
+fvec_madd_sve(size_t n, const float* a, float bf, const float* b, float* c);
 
-int fvec_madd_and_argmin_sve(size_t n, const float* a, float bf, const float* b, float* c);
+int
+fvec_madd_and_argmin_sve(size_t n, const float* a, float bf, const float* b, float* c);
 
-int32_t ivec_L2sqr_sve(const int8_t* x, const int8_t* y, size_t d);
+int32_t
+ivec_L2sqr_sve(const int8_t* x, const int8_t* y, size_t d);
 
-void fvec_L2sqr_batch_4_sve(const float* x, const float* y0, const float* y1, const float* y2, const float* y3,
-                          const size_t d, float& dis0, float& dis1, float& dis2, float& dis3);
+void
+fvec_L2sqr_batch_4_sve(const float* x, const float* y0, const float* y1, const float* y2, const float* y3,
+                       const size_t d, float& dis0, float& dis1, float& dis2, float& dis3);
 
-} // namespace faiss
-#endif
+}  // namespace faiss
+#endif
diff --git a/src/simd/hook.cc b/src/simd/hook.cc
@@ -21,7 +21,7 @@
 #if defined(__ARM_FEATURE_SVE)
 #include "distances_sve.h"
 #endif
- 
+
 #if defined(__ARM_NEON)
 #include "distances_neon.h"
 #endif
@@ -139,17 +139,16 @@ enable_patch_for_fp32_bf16() {
 
 #if defined(__aarch64__)
 
-    #if defined(__ARM_NEON) && !defined(__ARM_FEATURE_SVE)
-
-        fvec_L2sqr = fvec_L2sqr_neon_bf16_patch;
-        fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_neon_bf16_patch;
-        fvec_inner_product = fvec_inner_product_neon_bf16_patch;
-        fvec_inner_product_batch_4 = fvec_inner_product_batch_4_neon_bf16_patch;
+#if defined(__ARM_NEON) && !defined(__ARM_FEATURE_SVE)
 
-    #endif
+    fvec_L2sqr = fvec_L2sqr_neon_bf16_patch;
+    fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_neon_bf16_patch;
+    fvec_inner_product = fvec_inner_product_neon_bf16_patch;
+    fvec_inner_product_batch_4 = fvec_inner_product_batch_4_neon_bf16_patch;
 
 #endif
 
+#endif
 }
 
 void
@@ -305,68 +304,66 @@ fvec_hook(std::string& simd_type) {
 
 #if defined(__aarch64__)
 
-    #if defined(__ARM_FEATURE_SVE)
-        // ToDo: Enable remaining functions on SVE
-
-        fvec_L2sqr = fvec_L2sqr_sve;
-        fvec_L1 = fvec_L1_sve;
-        fvec_Linf = fvec_Linf_sve;
-        fvec_norm_L2sqr = fvec_norm_L2sqr_sve;
-        fvec_madd = fvec_madd_sve;
-        fvec_madd_and_argmin = fvec_madd_and_argmin_sve;
-
-        fvec_inner_product = fvec_inner_product_neon;
-        fvec_L2sqr_ny = fvec_L2sqr_ny_neon;
-        fvec_inner_products_ny = fvec_inner_products_ny_neon;
-
-
-        ivec_inner_product = ivec_inner_product_neon;
-        ivec_L2sqr = ivec_L2sqr_neon;
-
-        fp16_vec_inner_product = fp16_vec_inner_product_neon;
-        fp16_vec_L2sqr = fp16_vec_L2sqr_neon;
-        fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_neon;
-
-        bf16_vec_inner_product = bf16_vec_inner_product_neon;
-        bf16_vec_L2sqr = bf16_vec_L2sqr_neon;
-        bf16_vec_norm_L2sqr = bf16_vec_norm_L2sqr_neon;
-        fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_sve;
-        simd_type = "SVE";
-        support_pq_fast_scan = true;
-
-    #elif defined(__ARM_NEON)
-        // NEON functions
-        fvec_inner_product = fvec_inner_product_neon;
-        fvec_L2sqr = fvec_L2sqr_neon;
-        fvec_L1 = fvec_L1_neon;
-        fvec_Linf = fvec_Linf_neon;
-        fvec_norm_L2sqr = fvec_norm_L2sqr_neon;
-        fvec_L2sqr_ny = fvec_L2sqr_ny_neon;
-        fvec_inner_products_ny = fvec_inner_products_ny_neon;
-        fvec_madd = fvec_madd_neon;
-        fvec_madd_and_argmin = fvec_madd_and_argmin_neon;
-
-        ivec_inner_product = ivec_inner_product_neon;
-        ivec_L2sqr = ivec_L2sqr_neon;
-
-        fp16_vec_inner_product = fp16_vec_inner_product_neon;
-        fp16_vec_L2sqr = fp16_vec_L2sqr_neon;
-        fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_neon;
-
-        bf16_vec_inner_product = bf16_vec_inner_product_neon;
-        bf16_vec_L2sqr = bf16_vec_L2sqr_neon;
-        bf16_vec_norm_L2sqr = bf16_vec_norm_L2sqr_neon;
-
-        fvec_inner_product_batch_4 = fvec_inner_product_batch_4_neon;
-        fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_neon;
-
-        simd_type = "NEON";
-        support_pq_fast_scan = true;
-
-    #endif
+#if defined(__ARM_FEATURE_SVE)
+    // ToDo: Enable remaining functions on SVE
+
+    fvec_L2sqr = fvec_L2sqr_sve;
+    fvec_L1 = fvec_L1_sve;
+    fvec_Linf = fvec_Linf_sve;
+    fvec_norm_L2sqr = fvec_norm_L2sqr_sve;
+    fvec_madd = fvec_madd_sve;
+    fvec_madd_and_argmin = fvec_madd_and_argmin_sve;
+
+    fvec_inner_product = fvec_inner_product_neon;
+    fvec_L2sqr_ny = fvec_L2sqr_ny_neon;
+    fvec_inner_products_ny = fvec_inner_products_ny_neon;
+
+    ivec_inner_product = ivec_inner_product_neon;
+    ivec_L2sqr = ivec_L2sqr_neon;
+
+    fp16_vec_inner_product = fp16_vec_inner_product_neon;
+    fp16_vec_L2sqr = fp16_vec_L2sqr_neon;
+    fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_neon;
+
+    bf16_vec_inner_product = bf16_vec_inner_product_neon;
+    bf16_vec_L2sqr = bf16_vec_L2sqr_neon;
+    bf16_vec_norm_L2sqr = bf16_vec_norm_L2sqr_neon;
+    fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_sve;
+    simd_type = "SVE";
+    support_pq_fast_scan = true;
+
+#elif defined(__ARM_NEON)
+    // NEON functions
+    fvec_inner_product = fvec_inner_product_neon;
+    fvec_L2sqr = fvec_L2sqr_neon;
+    fvec_L1 = fvec_L1_neon;
+    fvec_Linf = fvec_Linf_neon;
+    fvec_norm_L2sqr = fvec_norm_L2sqr_neon;
+    fvec_L2sqr_ny = fvec_L2sqr_ny_neon;
+    fvec_inner_products_ny = fvec_inner_products_ny_neon;
+    fvec_madd = fvec_madd_neon;
+    fvec_madd_and_argmin = fvec_madd_and_argmin_neon;
+
+    ivec_inner_product = ivec_inner_product_neon;
+    ivec_L2sqr = ivec_L2sqr_neon;
+
+    fp16_vec_inner_product = fp16_vec_inner_product_neon;
+    fp16_vec_L2sqr = fp16_vec_L2sqr_neon;
+    fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_neon;
+
+    bf16_vec_inner_product = bf16_vec_inner_product_neon;
+    bf16_vec_L2sqr = bf16_vec_L2sqr_neon;
+    bf16_vec_norm_L2sqr = bf16_vec_norm_L2sqr_neon;
+
+    fvec_inner_product_batch_4 = fvec_inner_product_batch_4_neon;
+    fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_neon;
+
+    simd_type = "NEON";
+    support_pq_fast_scan = true;
 
 #endif
 
+#endif
 
 // ToDo MG: include VSX intrinsics via distances_vsx once _ref tests succeed
 #if defined(__powerpc64__)
Original file line number	Diff line number	Diff line change
		@@ -262,5 +262,3 @@ install(TARGETS knowhere
		DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
		install(DIRECTORY "${PROJECT_SOURCE_DIR}/include/knowhere"
		DESTINATION "${CMAKE_INSTALL_PREFIX}/include")