Skip to content

Commit

Permalink
Add Int8Vector SIMD APIs for avx512/avx2/sse/ref
Browse files Browse the repository at this point in the history
Signed-off-by: CaiYudong <[email protected]>
  • Loading branch information
cydrain committed Feb 26, 2025
1 parent e886c73 commit 18afefb
Show file tree
Hide file tree
Showing 13 changed files with 531 additions and 1 deletion.
89 changes: 89 additions & 0 deletions src/simd/distances_avx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,95 @@ bf16_vec_L2sqr_batch_4_avx(const knowhere::bf16* x, const knowhere::bf16* y0, co
dis3 = _mm256_reduce_add_ps(msum_3);
}

///////////////////////////////////////////////////////////////////////////////
// int8

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
float
int8_vec_inner_product_avx(const int8_t* x, const int8_t* y, size_t d) {
float res = 0;
FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i < d; i++) {
res += (float)x[i] * (float)y[i];
}
return res;
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
float
int8_vec_L2sqr_avx(const int8_t* x, const int8_t* y, size_t d) {
float res = 0;
FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i < d; i++) {
const float tmp = (float)x[i] - (float)y[i];
res += tmp * tmp;
}
return res;
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
float
int8_vec_norm_L2sqr_avx(const int8_t* x, size_t d) {
double res = 0;
FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i < d; i++) {
res += (float)x[i] * (float)x[i];
}
return res;
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
void
int8_vec_inner_product_batch_4_avx(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
float& dis3) {
float d0 = 0, d1 = 0, d2 = 0, d3 = 0;

FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i < d; ++i) {
auto x_i = (float)x[i];
d0 += x_i * (float)y0[i];
d1 += x_i * (float)y1[i];
d2 += x_i * (float)y2[i];
d3 += x_i * (float)y3[i];
}

dis0 = d0;
dis1 = d1;
dis2 = d2;
dis3 = d3;
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
void
int8_vec_L2sqr_batch_4_avx(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
float d0 = 0, d1 = 0, d2 = 0, d3 = 0;

FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i < d; ++i) {
auto x_i = (float)x[i];
const float q0 = x_i - (float)y0[i];
const float q1 = x_i - (float)y1[i];
const float q2 = x_i - (float)y2[i];
const float q3 = x_i - (float)y3[i];
d0 += q0 * q0;
d1 += q1 * q1;
d2 += q2 * q2;
d3 += q3 * q3;
}

dis0 = d0;
dis1 = d1;
dis2 = d2;
dis3 = d3;
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

///////////////////////////////////////////////////////////////////////////////
// for cardinal

Expand Down
21 changes: 21 additions & 0 deletions src/simd/distances_avx.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,27 @@ bf16_vec_L2sqr_batch_4_avx(const knowhere::bf16* x, const knowhere::bf16* y0, co
const knowhere::bf16* y2, const knowhere::bf16* y3, const size_t d, float& dis0, float& dis1,
float& dis2, float& dis3);

///////////////////////////////////////////////////////////////////////////////
// int8

float
int8_vec_inner_product_avx(const int8_t* x, const int8_t* y, size_t d);

float
int8_vec_L2sqr_avx(const int8_t* x, const int8_t* y, size_t d);

float
int8_vec_norm_L2sqr_avx(const int8_t* x, size_t d);

void
int8_vec_inner_product_batch_4_avx(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
float& dis3);

void
int8_vec_L2sqr_batch_4_avx(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3);

///////////////////////////////////////////////////////////////////////////////
// for cardinal

Expand Down
89 changes: 89 additions & 0 deletions src/simd/distances_avx512.cc
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,95 @@ bf16_vec_L2sqr_batch_4_avx512(const knowhere::bf16* x, const knowhere::bf16* y0,
dis3 = _mm512_reduce_add_ps(m512_res_3);
}

///////////////////////////////////////////////////////////////////////////////
// int8

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
float
int8_vec_inner_product_avx512(const int8_t* x, const int8_t* y, size_t d) {
float res = 0;
FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i < d; i++) {
res += (float)x[i] * (float)y[i];
}
return res;
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
float
int8_vec_L2sqr_avx512(const int8_t* x, const int8_t* y, size_t d) {
float res = 0;
FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i < d; i++) {
const float tmp = (float)x[i] - (float)y[i];
res += tmp * tmp;
}
return res;
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
float
int8_vec_norm_L2sqr_avx512(const int8_t* x, size_t d) {
double res = 0;
FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i < d; i++) {
res += (float)x[i] * (float)x[i];
}
return res;
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
void
int8_vec_inner_product_batch_4_avx512(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
float& dis3) {
float d0 = 0, d1 = 0, d2 = 0, d3 = 0;

FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i < d; ++i) {
auto x_i = (float)x[i];
d0 += x_i * (float)y0[i];
d1 += x_i * (float)y1[i];
d2 += x_i * (float)y2[i];
d3 += x_i * (float)y3[i];
}

dis0 = d0;
dis1 = d1;
dis2 = d2;
dis3 = d3;
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
void
int8_vec_L2sqr_batch_4_avx512(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
float d0 = 0, d1 = 0, d2 = 0, d3 = 0;

FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i < d; ++i) {
auto x_i = (float)x[i];
const float q0 = x_i - (float)y0[i];
const float q1 = x_i - (float)y1[i];
const float q2 = x_i - (float)y2[i];
const float q3 = x_i - (float)y3[i];
d0 += q0 * q0;
d1 += q1 * q1;
d2 += q2 * q2;
d3 += q3 * q3;
}

dis0 = d0;
dis1 = d1;
dis2 = d2;
dis3 = d3;
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

///////////////////////////////////////////////////////////////////////////////
// for cardinal

Expand Down
21 changes: 21 additions & 0 deletions src/simd/distances_avx512.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,27 @@ bf16_vec_L2sqr_batch_4_avx512(const knowhere::bf16* x, const knowhere::bf16* y0,
const knowhere::bf16* y2, const knowhere::bf16* y3, const size_t d, float& dis0,
float& dis1, float& dis2, float& dis3);

///////////////////////////////////////////////////////////////////////////////
// int8

float
int8_vec_inner_product_avx512(const int8_t* x, const int8_t* y, size_t d);

float
int8_vec_L2sqr_avx512(const int8_t* x, const int8_t* y, size_t d);

float
int8_vec_norm_L2sqr_avx512(const int8_t* x, size_t d);

void
int8_vec_inner_product_batch_4_avx512(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
float& dis3);

void
int8_vec_L2sqr_batch_4_avx512(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3);

///////////////////////////////////////////////////////////////////////////////
// for cardinal

Expand Down
31 changes: 31 additions & 0 deletions src/simd/distances_neon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2113,6 +2113,37 @@ bf16_vec_L2sqr_batch_4_neon(const knowhere::bf16* x, const knowhere::bf16* y0, c
dis3 = vaddvq_f32(res.val[3]);
}

///////////////////////////////////////////////////////////////////////////////
// int8

float
int8_vec_inner_product_neon(const int8_t* x, const int8_t* y, size_t d) {
throw std::runtime_error("not implemented");
}

float
int8_vec_L2sqr_neon(const int8_t* x, const int8_t* y, size_t d) {
throw std::runtime_error("not implemented");
}

float
int8_vec_norm_L2sqr_neon(const int8_t* x, size_t d) {
throw std::runtime_error("not implemented");
}

void
int8_vec_inner_product_batch_4_neon(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
float& dis3) {
throw std::runtime_error("not implemented");
}

void
int8_vec_L2sqr_batch_4_neon(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
throw std::runtime_error("not implemented");
}

///////////////////////////////////////////////////////////////////////////////
// for cardinal

Expand Down
21 changes: 21 additions & 0 deletions src/simd/distances_neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,27 @@ bf16_vec_L2sqr_batch_4_neon(const knowhere::bf16* x, const knowhere::bf16* y0, c
const knowhere::bf16* y2, const knowhere::bf16* y3, const size_t d, float& dis0,
float& dis1, float& dis2, float& dis3);

///////////////////////////////////////////////////////////////////////////////
// int8

float
int8_vec_inner_product_neon(const int8_t* x, const int8_t* y, size_t d);

float
int8_vec_L2sqr_neon(const int8_t* x, const int8_t* y, size_t d);

float
int8_vec_norm_L2sqr_neon(const int8_t* x, size_t d);

void
int8_vec_inner_product_batch_4_neon(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
float& dis3);

void
int8_vec_L2sqr_batch_4_neon(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3);

///////////////////////////////////////////////////////////////////////////////
// for cardinal

Expand Down
74 changes: 74 additions & 0 deletions src/simd/distances_ref.cc
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,80 @@ bf16_vec_L2sqr_batch_4_ref(const knowhere::bf16* x, const knowhere::bf16* y0, co
dis3 = d3;
}

///////////////////////////////////////////////////////////////////////////////
// int8

float
int8_vec_inner_product_ref(const int8_t* x, const int8_t* y, size_t d) {
float res = 0;
for (size_t i = 0; i < d; i++) {
res += (float)x[i] * (float)y[i];
}
return res;
}

float
int8_vec_L2sqr_ref(const int8_t* x, const int8_t* y, size_t d) {
float res = 0;
for (size_t i = 0; i < d; i++) {
const float tmp = (float)x[i] - (float)y[i];
res += tmp * tmp;
}
return res;
}

float
int8_vec_norm_L2sqr_ref(const int8_t* x, size_t d) {
double res = 0;
for (size_t i = 0; i < d; i++) {
res += (float)x[i] * (float)x[i];
}
return res;
}

void
int8_vec_inner_product_batch_4_ref(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
float& dis3) {
float d0 = 0, d1 = 0, d2 = 0, d3 = 0;

for (size_t i = 0; i < d; ++i) {
auto x_i = (float)x[i];
d0 += x_i * (float)y0[i];
d1 += x_i * (float)y1[i];
d2 += x_i * (float)y2[i];
d3 += x_i * (float)y3[i];
}

dis0 = d0;
dis1 = d1;
dis2 = d2;
dis3 = d3;
}

void
int8_vec_L2sqr_batch_4_ref(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
float d0 = 0, d1 = 0, d2 = 0, d3 = 0;

for (size_t i = 0; i < d; ++i) {
auto x_i = (float)x[i];
const float q0 = x_i - (float)y0[i];
const float q1 = x_i - (float)y1[i];
const float q2 = x_i - (float)y2[i];
const float q3 = x_i - (float)y3[i];
d0 += q0 * q0;
d1 += q1 * q1;
d2 += q2 * q2;
d3 += q3 * q3;
}

dis0 = d0;
dis1 = d1;
dis2 = d2;
dis3 = d3;
}

///////////////////////////////////////////////////////////////////////////////
// for cardinal

Expand Down
Loading

0 comments on commit 18afefb

Please sign in to comment.