zilliztech · sre-ci-robot · Feb 28, 2025 · Feb 21, 2025 · foxspy · Feb 27, 2025
diff --git a/src/simd/distances_avx.cc b/src/simd/distances_avx.cc
@@ -876,6 +876,95 @@ bf16_vec_L2sqr_batch_4_avx(const knowhere::bf16* x, const knowhere::bf16* y0, co
     dis3 = _mm256_reduce_add_ps(msum_3);
 }
 
+///////////////////////////////////////////////////////////////////////////////
+// int8
+
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+float
+int8_vec_inner_product_avx(const int8_t* x, const int8_t* y, size_t d) {
+    int32_t res = 0;
+    FAISS_PRAGMA_IMPRECISE_LOOP
+    for (size_t i = 0; i < d; i++) {
+        res += (int32_t)x[i] * (int32_t)y[i];
+    }
+    return (float)res;
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+float
+int8_vec_L2sqr_avx(const int8_t* x, const int8_t* y, size_t d) {
+    int32_t res = 0;
+    FAISS_PRAGMA_IMPRECISE_LOOP
+    for (size_t i = 0; i < d; i++) {
+        const int32_t tmp = (int32_t)x[i] - (int32_t)y[i];
+        res += tmp * tmp;
+    }
+    return (float)res;
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+float
+int8_vec_norm_L2sqr_avx(const int8_t* x, size_t d) {
+    int32_t res = 0;
+    FAISS_PRAGMA_IMPRECISE_LOOP
+    for (size_t i = 0; i < d; i++) {
+        res += (int32_t)x[i] * (int32_t)x[i];
+    }
+    return (float)res;
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+void
+int8_vec_inner_product_batch_4_avx(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
+                                   const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
+                                   float& dis3) {
+    int32_t d0 = 0, d1 = 0, d2 = 0, d3 = 0;
+
+    FAISS_PRAGMA_IMPRECISE_LOOP
+    for (size_t i = 0; i < d; ++i) {
+        auto x_i = (int32_t)x[i];
+        d0 += x_i * (int32_t)y0[i];
+        d1 += x_i * (int32_t)y1[i];
+        d2 += x_i * (int32_t)y2[i];
+        d3 += x_i * (int32_t)y3[i];
+    }
+
+    dis0 = (float)d0;
+    dis1 = (float)d1;
+    dis2 = (float)d2;
+    dis3 = (float)d3;
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+void
+int8_vec_L2sqr_batch_4_avx(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
+                           const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
+    int32_t d0 = 0, d1 = 0, d2 = 0, d3 = 0;
+
+    FAISS_PRAGMA_IMPRECISE_LOOP
+    for (size_t i = 0; i < d; ++i) {
+        auto x_i = (int32_t)x[i];
+        const int32_t q0 = x_i - (int32_t)y0[i];
+        const int32_t q1 = x_i - (int32_t)y1[i];
+        const int32_t q2 = x_i - (int32_t)y2[i];
+        const int32_t q3 = x_i - (int32_t)y3[i];
+        d0 += q0 * q0;
+        d1 += q1 * q1;
+        d2 += q2 * q2;
+        d3 += q3 * q3;
+    }
+
+    dis0 = (float)d0;
+    dis1 = (float)d1;
+    dis2 = (float)d2;
+    dis3 = (float)d3;
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
 ///////////////////////////////////////////////////////////////////////////////
 // for cardinal
 

diff --git a/src/simd/distances_avx.h b/src/simd/distances_avx.h
@@ -106,6 +106,27 @@ bf16_vec_L2sqr_batch_4_avx(const knowhere::bf16* x, const knowhere::bf16* y0, co
                            const knowhere::bf16* y2, const knowhere::bf16* y3, const size_t d, float& dis0, float& dis1,
                            float& dis2, float& dis3);
 
+///////////////////////////////////////////////////////////////////////////////
+// int8
+
+float
+int8_vec_inner_product_avx(const int8_t* x, const int8_t* y, size_t d);
+
+float
+int8_vec_L2sqr_avx(const int8_t* x, const int8_t* y, size_t d);
+
+float
+int8_vec_norm_L2sqr_avx(const int8_t* x, size_t d);
+
+void
+int8_vec_inner_product_batch_4_avx(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
+                                   const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
+                                   float& dis3);
+
+void
+int8_vec_L2sqr_batch_4_avx(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
+                           const size_t d, float& dis0, float& dis1, float& dis2, float& dis3);
+
 ///////////////////////////////////////////////////////////////////////////////
 // for cardinal
 

diff --git a/src/simd/distances_avx512.cc b/src/simd/distances_avx512.cc
@@ -682,6 +682,95 @@ bf16_vec_L2sqr_batch_4_avx512(const knowhere::bf16* x, const knowhere::bf16* y0,
     dis3 = _mm512_reduce_add_ps(m512_res_3);
 }
 
+///////////////////////////////////////////////////////////////////////////////
+// int8
+
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+float
+int8_vec_inner_product_avx512(const int8_t* x, const int8_t* y, size_t d) {
+    int32_t res = 0;
+    FAISS_PRAGMA_IMPRECISE_LOOP
+    for (size_t i = 0; i < d; i++) {
+        res += (int32_t)x[i] * (int32_t)y[i];
+    }
+    return (float)res;
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+float
+int8_vec_L2sqr_avx512(const int8_t* x, const int8_t* y, size_t d) {
+    int32_t res = 0;
+    FAISS_PRAGMA_IMPRECISE_LOOP
+    for (size_t i = 0; i < d; i++) {
+        const int32_t tmp = (int32_t)x[i] - (int32_t)y[i];
+        res += tmp * tmp;
+    }
+    return (float)res;
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+float
+int8_vec_norm_L2sqr_avx512(const int8_t* x, size_t d) {
+    int32_t res = 0;
+    FAISS_PRAGMA_IMPRECISE_LOOP
+    for (size_t i = 0; i < d; i++) {
+        res += (int32_t)x[i] * (int32_t)x[i];
+    }
+    return (float)res;
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+void
+int8_vec_inner_product_batch_4_avx512(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
+                                      const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
+                                      float& dis3) {
+    int32_t d0 = 0, d1 = 0, d2 = 0, d3 = 0;
+
+    FAISS_PRAGMA_IMPRECISE_LOOP
+    for (size_t i = 0; i < d; ++i) {
+        auto x_i = (int32_t)x[i];
+        d0 += x_i * (int32_t)y0[i];
+        d1 += x_i * (int32_t)y1[i];
+        d2 += x_i * (int32_t)y2[i];
+        d3 += x_i * (int32_t)y3[i];
+    }
+
+    dis0 = (float)d0;
+    dis1 = (float)d1;
+    dis2 = (float)d2;
+    dis3 = (float)d3;
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
+FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
+void
+int8_vec_L2sqr_batch_4_avx512(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
+                              const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
+    int32_t d0 = 0, d1 = 0, d2 = 0, d3 = 0;
+
+    FAISS_PRAGMA_IMPRECISE_LOOP
+    for (size_t i = 0; i < d; ++i) {
+        auto x_i = (int32_t)x[i];
+        const int32_t q0 = x_i - (int32_t)y0[i];
+        const int32_t q1 = x_i - (int32_t)y1[i];
+        const int32_t q2 = x_i - (int32_t)y2[i];
+        const int32_t q3 = x_i - (int32_t)y3[i];
+        d0 += q0 * q0;
+        d1 += q1 * q1;
+        d2 += q2 * q2;
+        d3 += q3 * q3;
+    }
+
+    dis0 = (float)d0;
+    dis1 = (float)d1;
+    dis2 = (float)d2;
+    dis3 = (float)d3;
+}
+FAISS_PRAGMA_IMPRECISE_FUNCTION_END
+
 ///////////////////////////////////////////////////////////////////////////////
 // for cardinal
 

diff --git a/src/simd/distances_avx512.h b/src/simd/distances_avx512.h
@@ -100,6 +100,27 @@ bf16_vec_L2sqr_batch_4_avx512(const knowhere::bf16* x, const knowhere::bf16* y0,
                               const knowhere::bf16* y2, const knowhere::bf16* y3, const size_t d, float& dis0,
                               float& dis1, float& dis2, float& dis3);
 
+///////////////////////////////////////////////////////////////////////////////
+// int8
+
+float
+int8_vec_inner_product_avx512(const int8_t* x, const int8_t* y, size_t d);
+
+float
+int8_vec_L2sqr_avx512(const int8_t* x, const int8_t* y, size_t d);
+
+float
+int8_vec_norm_L2sqr_avx512(const int8_t* x, size_t d);
+
+void
+int8_vec_inner_product_batch_4_avx512(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
+                                      const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
+                                      float& dis3);
+
+void
+int8_vec_L2sqr_batch_4_avx512(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
+                              const size_t d, float& dis0, float& dis1, float& dis2, float& dis3);
+
 ///////////////////////////////////////////////////////////////////////////////
 // for cardinal
 

diff --git a/src/simd/distances_neon.cc b/src/simd/distances_neon.cc
@@ -2113,6 +2113,85 @@ bf16_vec_L2sqr_batch_4_neon(const knowhere::bf16* x, const knowhere::bf16* y0, c
     dis3 = vaddvq_f32(res.val[3]);
 }
 
+///////////////////////////////////////////////////////////////////////////////
+// int8
+
+float
+int8_vec_inner_product_neon(const int8_t* x, const int8_t* y, size_t d) {
+    // TODO caiyd: use ref implementation temporarily
+    int32_t res = 0;
+    for (size_t i = 0; i < d; i++) {
+        res += (int32_t)x[i] * (int32_t)y[i];
+    }
+    return (float)res;
+}
+
+float
+int8_vec_L2sqr_neon(const int8_t* x, const int8_t* y, size_t d) {
+    // TODO caiyd: use ref implementation temporarily
+    int32_t res = 0;
+    for (size_t i = 0; i < d; i++) {
+        const int32_t tmp = (int32_t)x[i] - (int32_t)y[i];
+        res += tmp * tmp;
+    }
+    return (float)res;
+}
+
+float
+int8_vec_norm_L2sqr_neon(const int8_t* x, size_t d) {
+    // TODO caiyd: use ref implementation temporarily
+    int32_t res = 0;
+    for (size_t i = 0; i < d; i++) {
+        res += (int32_t)x[i] * (int32_t)x[i];
+    }
+    return (float)res;
+}
+
+void
+int8_vec_inner_product_batch_4_neon(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
+                                    const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
+                                    float& dis3) {
+    // TODO caiyd: use ref implementation temporarily
+    int32_t d0 = 0, d1 = 0, d2 = 0, d3 = 0;
+
+    for (size_t i = 0; i < d; ++i) {
+        auto x_i = (int32_t)x[i];
+        d0 += x_i * (int32_t)y0[i];
+        d1 += x_i * (int32_t)y1[i];
+        d2 += x_i * (int32_t)y2[i];
+        d3 += x_i * (int32_t)y3[i];
+    }
+
+    dis0 = (float)d0;
+    dis1 = (float)d1;
+    dis2 = (float)d2;
+    dis3 = (float)d3;
+}
+
+void
+int8_vec_L2sqr_batch_4_neon(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
+                            const size_t d, float& dis0, float& dis1, float& dis2, float& dis3) {
+    // TODO caiyd: use ref implementation temporarily
+    int32_t d0 = 0, d1 = 0, d2 = 0, d3 = 0;
+
+    for (size_t i = 0; i < d; ++i) {
+        auto x_i = (int32_t)x[i];
+        const int32_t q0 = x_i - (int32_t)y0[i];
+        const int32_t q1 = x_i - (int32_t)y1[i];
+        const int32_t q2 = x_i - (int32_t)y2[i];
+        const int32_t q3 = x_i - (int32_t)y3[i];
+        d0 += q0 * q0;
+        d1 += q1 * q1;
+        d2 += q2 * q2;
+        d3 += q3 * q3;
+    }
+
+    dis0 = (float)d0;
+    dis1 = (float)d1;
+    dis2 = (float)d2;
+    dis3 = (float)d3;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // for cardinal
 

diff --git a/src/simd/distances_neon.h b/src/simd/distances_neon.h
@@ -117,6 +117,27 @@ bf16_vec_L2sqr_batch_4_neon(const knowhere::bf16* x, const knowhere::bf16* y0, c
                             const knowhere::bf16* y2, const knowhere::bf16* y3, const size_t d, float& dis0,
                             float& dis1, float& dis2, float& dis3);
 
+///////////////////////////////////////////////////////////////////////////////
+// int8
+
+float
+int8_vec_inner_product_neon(const int8_t* x, const int8_t* y, size_t d);
+
+float
+int8_vec_L2sqr_neon(const int8_t* x, const int8_t* y, size_t d);
+
+float
+int8_vec_norm_L2sqr_neon(const int8_t* x, size_t d);
+
+void
+int8_vec_inner_product_batch_4_neon(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2,
+                                    const int8_t* y3, const size_t d, float& dis0, float& dis1, float& dis2,
+                                    float& dis3);
+
+void
+int8_vec_L2sqr_batch_4_neon(const int8_t* x, const int8_t* y0, const int8_t* y1, const int8_t* y2, const int8_t* y3,
+                            const size_t d, float& dis0, float& dis1, float& dis2, float& dis3);
+
 ///////////////////////////////////////////////////////////////////////////////
 // for cardinal