Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: zilliztech/knowhere
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: dde2c81e4e99d9fb38d612266b536117cb4e89e2
Choose a base ref
..
head repository: zilliztech/knowhere
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 9276ee818e8e6a03dff1bc32f092e3e68832c168
Choose a head ref
Showing with 106 additions and 95 deletions.
  1. +0 −2 CMakeLists.txt
  2. +1 −1 ci/docker/builder/cpu/ubuntu22.04/arm64/Dockerfile
  3. +1 −1 cmake/libs/libfaiss.cmake
  4. +20 −12 src/simd/distances_sve.cc
  5. +20 −12 src/simd/distances_sve.h
  6. +64 −67 src/simd/hook.cc
2 changes: 0 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -262,5 +262,3 @@ install(TARGETS knowhere
DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
install(DIRECTORY "${PROJECT_SOURCE_DIR}/include/knowhere"
DESTINATION "${CMAKE_INSTALL_PREFIX}/include")


2 changes: 1 addition & 1 deletion ci/docker/builder/cpu/ubuntu22.04/arm64/Dockerfile
Original file line number Diff line number Diff line change
@@ -17,7 +17,7 @@ RUN apt update \
&& unlink gcc && ln -s gcc-12 gcc \
&& unlink g++ && ln -s g++-12 g++ \
&& rm -rf /usr/lib/gcc/aarch64-linux-gnu/11 \ #Installed by default on ubuntu22.04, need to remove to fetch latest SVE header.
&& unlink python3 && ln -s python3.11 python3 \
&& unlink python3 && ln -s python3.11 python3 \
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python3 \
&& export PATH=$PATH:$HOME/.local/bin \
&& pip3 install wheel \
2 changes: 1 addition & 1 deletion cmake/libs/libfaiss.cmake
Original file line number Diff line number Diff line change
@@ -52,7 +52,7 @@ if(__AARCH64)
# Add separate utils for NEON and SVE
add_library(utils_neon OBJECT src/simd/distances_neon.cc)
add_library(utils_sve OBJECT src/simd/distances_sve.cc)

target_compile_options(utils_neon PRIVATE -march=armv8-a+simd)
target_compile_options(utils_sve PRIVATE -march=armv8-a+sve)

32 changes: 20 additions & 12 deletions src/simd/distances_sve.cc
Original file line number Diff line number Diff line change
@@ -9,16 +9,19 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.


#include "distances_sve.h"

#include <arm_sve.h>

#include <cmath>

#include "faiss/impl/platform_macros.h"
#pragma GCC optimize("O3,fast-math,inline")
#if defined(__ARM_FEATURE_SVE)
namespace faiss {

float fvec_L2sqr_sve(const float* x, const float* y, size_t d) {
float
fvec_L2sqr_sve(const float* x, const float* y, size_t d) {
svfloat32_t sum = svdup_f32(0.0f);
size_t i = 0;

@@ -38,7 +41,8 @@ float fvec_L2sqr_sve(const float* x, const float* y, size_t d) {
return svaddv_f32(svptrue_b32(), sum);
}

float fvec_L1_sve(const float* x, const float* y, size_t d) {
float
fvec_L1_sve(const float* x, const float* y, size_t d) {
svfloat32_t sum = svdup_f32(0.0f);
size_t i = 0;

@@ -58,7 +62,8 @@ float fvec_L1_sve(const float* x, const float* y, size_t d) {
return svaddv_f32(svptrue_b32(), sum);
}

float fvec_Linf_sve(const float* x, const float* y, size_t d) {
float
fvec_Linf_sve(const float* x, const float* y, size_t d) {
svfloat32_t max_val = svdup_f32(0.0f);
size_t i = 0;

@@ -78,7 +83,8 @@ float fvec_Linf_sve(const float* x, const float* y, size_t d) {
return svmaxv_f32(svptrue_b32(), max_val);
}

float fvec_norm_L2sqr_sve(const float* x, size_t d) {
float
fvec_norm_L2sqr_sve(const float* x, size_t d) {
svfloat32_t sum = svdup_f32(0.0f);
size_t i = 0;

@@ -96,7 +102,8 @@ float fvec_norm_L2sqr_sve(const float* x, size_t d) {
return svaddv_f32(svptrue_b32(), sum);
}

void fvec_madd_sve(size_t n, const float* a, float bf, const float* b, float* c) {
void
fvec_madd_sve(size_t n, const float* a, float bf, const float* b, float* c) {
size_t i = 0;
svfloat32_t bf_vec = svdup_f32(bf);

@@ -114,7 +121,8 @@ void fvec_madd_sve(size_t n, const float* a, float bf, const float* b, float* c)
}
}

int fvec_madd_and_argmin_sve(size_t n, const float* a, float bf, const float* b, float* c) {
int
fvec_madd_and_argmin_sve(size_t n, const float* a, float bf, const float* b, float* c) {
size_t i = 0;
svfloat32_t min_val = svdup_f32(INFINITY);
svuint32_t min_idx = svdup_u32(0);
@@ -147,8 +155,9 @@ int fvec_madd_and_argmin_sve(size_t n, const float* a, float bf, const float* b,
return static_cast<int>(min_index);
}

void fvec_L2sqr_batch_4_sve(const float* x, const float* y0, const float* y1, const float* y2, const float* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
void
fvec_L2sqr_batch_4_sve(const float* x, const float* y0, const float* y1, const float* y2, const float* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
float d0 = 0;
float d1 = 0;
float d2 = 0;
@@ -171,7 +180,6 @@ void fvec_L2sqr_batch_4_sve(const float* x, const float* y0, const float* y1, co
dis3 = d3;
}

} // namespace faiss

} // namespace faiss

#endif
#endif
32 changes: 20 additions & 12 deletions src/simd/distances_sve.h
Original file line number Diff line number Diff line change
@@ -9,29 +9,37 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.

#include <arm_sve.h>

#include <cstdint>
#include <cstdio>
#include <arm_sve.h>
#if defined(__ARM_FEATURE_SVE)
namespace faiss {

float fvec_L2sqr_sve(const float* x, const float* y, size_t d);
float
fvec_L2sqr_sve(const float* x, const float* y, size_t d);

float fvec_L1_sve(const float* x, const float* y, size_t d);
float
fvec_L1_sve(const float* x, const float* y, size_t d);

float fvec_Linf_sve(const float* x, const float* y, size_t d);
float
fvec_Linf_sve(const float* x, const float* y, size_t d);

float fvec_norm_L2sqr_sve(const float* x, size_t d);
float
fvec_norm_L2sqr_sve(const float* x, size_t d);

void fvec_madd_sve(size_t n, const float* a, float bf, const float* b, float* c);
void
fvec_madd_sve(size_t n, const float* a, float bf, const float* b, float* c);

int fvec_madd_and_argmin_sve(size_t n, const float* a, float bf, const float* b, float* c);
int
fvec_madd_and_argmin_sve(size_t n, const float* a, float bf, const float* b, float* c);

int32_t ivec_L2sqr_sve(const int8_t* x, const int8_t* y, size_t d);
int32_t
ivec_L2sqr_sve(const int8_t* x, const int8_t* y, size_t d);

void fvec_L2sqr_batch_4_sve(const float* x, const float* y0, const float* y1, const float* y2, const float* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3);
void
fvec_L2sqr_batch_4_sve(const float* x, const float* y0, const float* y1, const float* y2, const float* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3);

} // namespace faiss
#endif
} // namespace faiss
#endif
131 changes: 64 additions & 67 deletions src/simd/hook.cc
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@
#if defined(__ARM_FEATURE_SVE)
#include "distances_sve.h"
#endif

#if defined(__ARM_NEON)
#include "distances_neon.h"
#endif
@@ -139,17 +139,16 @@ enable_patch_for_fp32_bf16() {

#if defined(__aarch64__)

#if defined(__ARM_NEON) && !defined(__ARM_FEATURE_SVE)

fvec_L2sqr = fvec_L2sqr_neon_bf16_patch;
fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_neon_bf16_patch;
fvec_inner_product = fvec_inner_product_neon_bf16_patch;
fvec_inner_product_batch_4 = fvec_inner_product_batch_4_neon_bf16_patch;
#if defined(__ARM_NEON) && !defined(__ARM_FEATURE_SVE)

#endif
fvec_L2sqr = fvec_L2sqr_neon_bf16_patch;
fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_neon_bf16_patch;
fvec_inner_product = fvec_inner_product_neon_bf16_patch;
fvec_inner_product_batch_4 = fvec_inner_product_batch_4_neon_bf16_patch;

#endif

#endif
}

void
@@ -305,68 +304,66 @@ fvec_hook(std::string& simd_type) {

#if defined(__aarch64__)

#if defined(__ARM_FEATURE_SVE)
// ToDo: Enable remaining functions on SVE

fvec_L2sqr = fvec_L2sqr_sve;
fvec_L1 = fvec_L1_sve;
fvec_Linf = fvec_Linf_sve;
fvec_norm_L2sqr = fvec_norm_L2sqr_sve;
fvec_madd = fvec_madd_sve;
fvec_madd_and_argmin = fvec_madd_and_argmin_sve;

fvec_inner_product = fvec_inner_product_neon;
fvec_L2sqr_ny = fvec_L2sqr_ny_neon;
fvec_inner_products_ny = fvec_inner_products_ny_neon;


ivec_inner_product = ivec_inner_product_neon;
ivec_L2sqr = ivec_L2sqr_neon;

fp16_vec_inner_product = fp16_vec_inner_product_neon;
fp16_vec_L2sqr = fp16_vec_L2sqr_neon;
fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_neon;

bf16_vec_inner_product = bf16_vec_inner_product_neon;
bf16_vec_L2sqr = bf16_vec_L2sqr_neon;
bf16_vec_norm_L2sqr = bf16_vec_norm_L2sqr_neon;
fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_sve;
simd_type = "SVE";
support_pq_fast_scan = true;

#elif defined(__ARM_NEON)
// NEON functions
fvec_inner_product = fvec_inner_product_neon;
fvec_L2sqr = fvec_L2sqr_neon;
fvec_L1 = fvec_L1_neon;
fvec_Linf = fvec_Linf_neon;
fvec_norm_L2sqr = fvec_norm_L2sqr_neon;
fvec_L2sqr_ny = fvec_L2sqr_ny_neon;
fvec_inner_products_ny = fvec_inner_products_ny_neon;
fvec_madd = fvec_madd_neon;
fvec_madd_and_argmin = fvec_madd_and_argmin_neon;

ivec_inner_product = ivec_inner_product_neon;
ivec_L2sqr = ivec_L2sqr_neon;

fp16_vec_inner_product = fp16_vec_inner_product_neon;
fp16_vec_L2sqr = fp16_vec_L2sqr_neon;
fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_neon;

bf16_vec_inner_product = bf16_vec_inner_product_neon;
bf16_vec_L2sqr = bf16_vec_L2sqr_neon;
bf16_vec_norm_L2sqr = bf16_vec_norm_L2sqr_neon;

fvec_inner_product_batch_4 = fvec_inner_product_batch_4_neon;
fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_neon;

simd_type = "NEON";
support_pq_fast_scan = true;

#endif
#if defined(__ARM_FEATURE_SVE)
// ToDo: Enable remaining functions on SVE

fvec_L2sqr = fvec_L2sqr_sve;
fvec_L1 = fvec_L1_sve;
fvec_Linf = fvec_Linf_sve;
fvec_norm_L2sqr = fvec_norm_L2sqr_sve;
fvec_madd = fvec_madd_sve;
fvec_madd_and_argmin = fvec_madd_and_argmin_sve;

fvec_inner_product = fvec_inner_product_neon;
fvec_L2sqr_ny = fvec_L2sqr_ny_neon;
fvec_inner_products_ny = fvec_inner_products_ny_neon;

ivec_inner_product = ivec_inner_product_neon;
ivec_L2sqr = ivec_L2sqr_neon;

fp16_vec_inner_product = fp16_vec_inner_product_neon;
fp16_vec_L2sqr = fp16_vec_L2sqr_neon;
fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_neon;

bf16_vec_inner_product = bf16_vec_inner_product_neon;
bf16_vec_L2sqr = bf16_vec_L2sqr_neon;
bf16_vec_norm_L2sqr = bf16_vec_norm_L2sqr_neon;
fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_sve;
simd_type = "SVE";
support_pq_fast_scan = true;

#elif defined(__ARM_NEON)
// NEON functions
fvec_inner_product = fvec_inner_product_neon;
fvec_L2sqr = fvec_L2sqr_neon;
fvec_L1 = fvec_L1_neon;
fvec_Linf = fvec_Linf_neon;
fvec_norm_L2sqr = fvec_norm_L2sqr_neon;
fvec_L2sqr_ny = fvec_L2sqr_ny_neon;
fvec_inner_products_ny = fvec_inner_products_ny_neon;
fvec_madd = fvec_madd_neon;
fvec_madd_and_argmin = fvec_madd_and_argmin_neon;

ivec_inner_product = ivec_inner_product_neon;
ivec_L2sqr = ivec_L2sqr_neon;

fp16_vec_inner_product = fp16_vec_inner_product_neon;
fp16_vec_L2sqr = fp16_vec_L2sqr_neon;
fp16_vec_norm_L2sqr = fp16_vec_norm_L2sqr_neon;

bf16_vec_inner_product = bf16_vec_inner_product_neon;
bf16_vec_L2sqr = bf16_vec_L2sqr_neon;
bf16_vec_norm_L2sqr = bf16_vec_norm_L2sqr_neon;

fvec_inner_product_batch_4 = fvec_inner_product_batch_4_neon;
fvec_L2sqr_batch_4 = fvec_L2sqr_batch_4_neon;

simd_type = "NEON";
support_pq_fast_scan = true;

#endif

#endif

// ToDo MG: include VSX intrinsics via distances_vsx once _ref tests succeed
#if defined(__powerpc64__)