diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index b75b68347de6a..f01f72228f3b0 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -1016,7 +1016,7 @@ endif() set(STARROCKS_LINK_LIBS ${STARROCKS_LINK_LIBS} ${WL_LINK_STATIC} -lbfd - ${WL_LINK_DYNAMIC} -lresolv -liberty -lc -lm -ldl -rdynamic -pthread -Wl,-wrap=__cxa_throw + ${WL_LINK_DYNAMIC} -lresolv -liberty -lc -lm -ldl -rdynamic -pthread -Wl,-wrap,__cxa_throw -Wl,-wrap,__floattidf ) # link gcov if WITH_GCOV is on diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index e9fcd114493b8..640c93e2cd075 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -119,6 +119,15 @@ class SimdBlockFilter { const __m256i mask = make_mask(hash >> _log_num_buckets); __m256i* const bucket = &reinterpret_cast<__m256i*>(_directory)[bucket_idx]; _mm256_store_si256(bucket, _mm256_or_si256(*bucket, mask)); +#elif defined(__ARM_NEON) + uint32x4_t masks[2]; + make_mask(hash >> _log_num_buckets, masks); + uint32x4_t directory_1 = vld1q_u32(&_directory[bucket_idx][0]); + uint32x4_t directory_2 = vld1q_u32(&_directory[bucket_idx][4]); + directory_1 = vorrq_u32(directory_1, masks[0]); + directory_2 = vorrq_u32(directory_2, masks[1]); + vst1q_u32(&_directory[bucket_idx][0], directory_1); + vst1q_u32(&_directory[bucket_idx][4], directory_2); #else uint32_t masks[BITS_SET_PER_BLOCK]; make_mask(hash >> _log_num_buckets, masks); diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index 88ff203f07730..99af74fea0934 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -106,6 +106,7 @@ set(RUNTIME_FILES dictionary_cache_sink.cpp type_pack.cpp customized_result_writer.cpp + int128_to_double.cpp ) set(RUNTIME_FILES ${RUNTIME_FILES} diff --git a/be/src/runtime/int128_to_double.cpp b/be/src/runtime/int128_to_double.cpp new file mode 100644 index 0000000000000..8f55ea783c202 --- /dev/null +++ b/be/src/runtime/int128_to_double.cpp @@ -0,0 +1,94 @@ +// Copyright 2021-present StarRocks, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "runtime/int128_to_double.h" + +#include + +#include +#include + +#include "integer_overflow_arithmetics.h" +namespace starrocks { +double __wrap___floattidf(__int128 a) { + typedef double dst_t; + typedef uint64_t dst_rep_t; + typedef __uint128_t usrc_t; +#define DST_REP_C UINT64_C + + enum { + dstSigBits = 52, + }; + + if (a == 0) return 0.0; + + enum { + dstMantDig = dstSigBits + 1, + srcBits = sizeof(__int128) * CHAR_BIT, + srcIsSigned = ((__int128)-1) < 0, + }; + + const __int128 s = srcIsSigned ? a >> (srcBits - 1) : 0; + + a = (usrc_t)(a ^ s) - s; + int sd = srcBits - clz128(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > dstMantDig) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit dstMantDig-1 bits to the right of 1 + // Q = bit dstMantDig bits to the right of 1 + // R = "or" of all bits to the right of Q + if (sd == dstMantDig + 1) { + a <<= 1; + } else if (sd == dstMantDig + 2) { + // Do nothing. + } else { + a = ((usrc_t)a >> (sd - (dstMantDig + 2))) | + ((a & ((usrc_t)(-1) >> ((srcBits + dstMantDig + 2) - sd))) != 0); + } + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to dstMantDig or dstMantDig+1 bits + if (a & ((usrc_t)1 << dstMantDig)) { + a >>= 1; + ++e; + } + // a is now rounded to dstMantDig bits + } else { + a <<= (dstMantDig - sd); + // a is now rounded to dstMantDig bits + } + const int dstBits = sizeof(dst_t) * CHAR_BIT; + const dst_rep_t dstSignMask = DST_REP_C(1) << (dstBits - 1); + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstExpBias = (1 << (dstExpBits - 1)) - 1; + const dst_rep_t dstSignificandMask = (DST_REP_C(1) << dstSigBits) - 1; + // Combine sign, exponent, and mantissa. + const dst_rep_t result = ((dst_rep_t)s & dstSignMask) | ((dst_rep_t)(e + dstExpBias) << dstSigBits) | + ((dst_rep_t)(a)&dstSignificandMask); + + const union { + dst_t f; + dst_rep_t i; + } rep = {.i = result}; + + DCHECK(std::abs(rep.f - __real___floattidf(a)) < 0.001); + return rep.f; +} +} // namespace starrocks \ No newline at end of file diff --git a/be/src/runtime/int128_to_double.h b/be/src/runtime/int128_to_double.h new file mode 100644 index 0000000000000..a228afb598dfe --- /dev/null +++ b/be/src/runtime/int128_to_double.h @@ -0,0 +1,22 @@ +// Copyright 2021-present StarRocks, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +namespace starrocks { +extern "C" { +// origin from llvm-project +// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/int_to_fp_impl.inc +// this implementation is 20x faster than gcc +double __wrap___floattidf(__int128 a); +double __real___floattidf(__int128 a); +} +} // namespace starrocks \ No newline at end of file diff --git a/be/src/runtime/integer_overflow_arithmetics.h b/be/src/runtime/integer_overflow_arithmetics.h index 4cadb645d2610..e9f6ece72cca8 100644 --- a/be/src/runtime/integer_overflow_arithmetics.h +++ b/be/src/runtime/integer_overflow_arithmetics.h @@ -113,25 +113,7 @@ inline int clz128(unsigned __int128 v) { } inline bool int128_mul_overflow(int128_t a, int128_t b, int128_t* c) { - if (a == 0 || b == 0) { - *c = 0; - return false; - } - - // sgn(x) - auto sa = a >> 127; - // sgn(y) - auto sb = b >> 127; - // abx(x), abs(y) - a = (a ^ sa) - sa; - b = (b ^ sb) - sb; - // sgn(x * y) - sa ^= sb; - *c = a * b; - // sgn(x * y) and abs(x) * abs(y) produces x * y; - *c = (*c ^ sa) - sa; - static constexpr auto int128_max = get_max(); - return clz128(a) + clz128(b) < sizeof(int128_t) || int128_max / a < b; + return __builtin_mul_overflow(a, b, c); } template <> diff --git a/thirdparty/patches/streamvbyte.patch b/thirdparty/patches/streamvbyte.patch index 2fcc3a64e33de..39a96f1895b25 100644 --- a/thirdparty/patches/streamvbyte.patch +++ b/thirdparty/patches/streamvbyte.patch @@ -1,9 +1,36 @@ +From 676d0175085a7996f909d9d2e63ab7b4683ef475 Mon Sep 17 00:00:00 2001 +From: before-Sunrise +Date: Tue, 14 Jan 2025 18:41:46 +0800 +Subject: [PATCH] patch + +Signed-off-by: before-Sunrise +--- + CMakeLists.txt | 5 +---- + include/streamvbyte.h | 2 +- + 2 files changed, 2 insertions(+), 5 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 39df85d..1e32b0c 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -32,10 +32,7 @@ if (MSVC) + endif() + # test for arm + if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") +- set(BASE_FLAGS +- ${BASE_FLAGS} +- "-D__ARM_NEON__" +- ) ++ add_compile_options(-D__ARM_NEON__) + endif() + set(STREAMVBYTE_SRCS + ${PROJECT_SOURCE_DIR}/src/streamvbyte_encode.c diff --git a/include/streamvbyte.h b/include/streamvbyte.h index bc9533c..a6cbb1a 100644 --- a/include/streamvbyte.h +++ b/include/streamvbyte.h @@ -1,7 +1,7 @@ - + #ifndef INCLUDE_STREAMVBYTE_H_ #define INCLUDE_STREAMVBYTE_H_ -#define __STDC_FORMAT_MACROS @@ -11,3 +38,6 @@ index bc9533c..a6cbb1a 100644 #include #include // please use a C99-compatible compiler #include +-- +2.34.1 +