Skip to content

Commit

Permalink
[Opt](compression) Opt gzip decompress by libdeflate on X86 and X86_6…
Browse files Browse the repository at this point in the history
…4 platforms.
  • Loading branch information
kaka11chen committed Nov 24, 2023
1 parent dfe3a2d commit 08c3330
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 1 deletion.
5 changes: 5 additions & 0 deletions be/cmake/thirdparty.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,8 @@ if (OS_MACOSX)
add_thirdparty(iberty)
add_thirdparty(intl)
endif()

# Only used on x86 or x86_64
if ("${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86" OR "${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86_64")
add_thirdparty(deflate)
endif()
45 changes: 44 additions & 1 deletion be/src/util/block_compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
#include <gen_cpp/parquet_types.h>
#include <gen_cpp/segment_v2.pb.h>
#include <glog/logging.h>
// Only used on x86 or x86_64
#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || \
defined(__i386) || defined(_M_IX86)
#include <libdeflate.h>
#endif
#include <limits.h>
#include <lz4/lz4.h>
#include <lz4/lz4frame.h>
Expand Down Expand Up @@ -1006,6 +1011,39 @@ class GzipBlockCompression final : public ZlibBlockCompression {
const static int MEM_LEVEL = 8;
};

// Only used on x86 or x86_64
#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || \
defined(__i386) || defined(_M_IX86)
class GzipBlockCompressionByLibdeflate final : public GzipBlockCompression {
public:
GzipBlockCompressionByLibdeflate() : GzipBlockCompression() {}
static GzipBlockCompressionByLibdeflate* instance() {
static GzipBlockCompressionByLibdeflate s_instance;
return &s_instance;
}
~GzipBlockCompressionByLibdeflate() override = default;

Status decompress(const Slice& input, Slice* output) override {
if (input.empty()) {
output->size = 0;
return Status::OK();
}
thread_local std::unique_ptr<libdeflate_decompressor, void (*)(libdeflate_decompressor*)>
decompressor {libdeflate_alloc_decompressor(), libdeflate_free_decompressor};
if (!decompressor) {
return Status::InternalError("libdeflate_alloc_decompressor error.");
}
std::size_t out_len;
auto result = libdeflate_gzip_decompress(decompressor.get(), input.data, input.size,
output->data, output->size, &out_len);
if (result != LIBDEFLATE_SUCCESS) {
return Status::InternalError("libdeflate_gzip_decompress error, res={}, result);
}
return Status::OK();
}
};
#endif
Status get_block_compression_codec(segment_v2::CompressionTypePB type,
BlockCompressionCodec** codec) {
switch (type) {
Expand Down Expand Up @@ -1054,8 +1092,13 @@ Status get_block_compression_codec(tparquet::CompressionCodec::type parquet_code
*codec = ZstdBlockCompression::instance();
break;
case tparquet::CompressionCodec::GZIP:
// Only used on x86 or x86_64
#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || \
defined(__i386) || defined(_M_IX86)
*codec = GzipBlockCompressionByLibdeflate::instance();
#else
*codec = GzipBlockCompression::instance();
break;
#endif
default:
return Status::InternalError("unknown compression type({})", parquet_codec);
}
Expand Down
15 changes: 15 additions & 0 deletions thirdparty/build-thirdparty.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1691,6 +1691,20 @@ build_avx2neon() {
cp -r ./* "${TP_INSTALL_DIR}/include/avx2neon/"
}

# libdeflate
build_libdeflate() {
check_if_source_exist "${LIBDEFLATE_SOURCE}"
cd "${TP_SOURCE_DIR}/${LIBDEFLATE_SOURCE}"

rm -rf "${BUILD_DIR}"
mkdir -p "${BUILD_DIR}"
cd "${BUILD_DIR}"

"${CMAKE_CMD}" -G "${GENERATOR}" -DCMAKE_INSTALL_PREFIX="${TP_INSTALL_DIR}" -DCMAKE_BUILD_TYPE=Release ..
"${BUILD_SYSTEM}" -j "${PARALLEL}"
"${BUILD_SYSTEM}" install
}

if [[ "${#packages[@]}" -eq 0 ]]; then
packages=(
libunixodbc
Expand Down Expand Up @@ -1754,6 +1768,7 @@ if [[ "${#packages[@]}" -eq 0 ]]; then
libunwind
dragonbox
avx2neon
libdeflate
)
if [[ "$(uname -s)" == 'Darwin' ]]; then
read -r -a packages <<<"binutils gettext ${packages[*]}"
Expand Down
7 changes: 7 additions & 0 deletions thirdparty/vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,12 @@ AVX2NEON_NAME=v1.0.0.tar.gz
AVX2NEON_SOURCE=AvxToNeon-1.0.0
AVX2NEON_MD5SUM="692d0e0f8b885a86ebc5172a9d8ee8db"

# libdeflate
LIBDEFLATE_DOWNLOAD="https://github.com/ebiggers/libdeflate/archive/refs/tags/v1.19.zip"
LIBDEFLATE_NAME="libdeflate-1.19.zip"
LIBDEFLATE_SOURCE="libdeflate-1.19"
LIBDEFLATE_MD5SUM="1ec42dfe7d777929ade295281560d750"

# all thirdparties which need to be downloaded is set in array TP_ARCHIVES
export TP_ARCHIVES=(
'LIBEVENT'
Expand Down Expand Up @@ -548,6 +554,7 @@ export TP_ARCHIVES=(
'HADOOP_LIBS'
'DRAGONBOX'
'AVX2NEON'
'LIBDEFLATE'
)

if [[ "$(uname -s)" == 'Darwin' ]]; then
Expand Down

0 comments on commit 08c3330

Please sign in to comment.