Skip to content

Commit

Permalink
Enhance ConcurrentBitset and BitsetView
Browse files Browse the repository at this point in the history
Signed-off-by: zhenshan.cao <[email protected]>
  • Loading branch information
czs007 committed Apr 6, 2022
1 parent b40dfb2 commit b3a5672
Show file tree
Hide file tree
Showing 12 changed files with 635 additions and 301 deletions.
12 changes: 8 additions & 4 deletions knowhere/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ if ( LINUX )
set(depend_libs
faiss
pthread
knowhere_utils
)

if (KNOWHERE_SUPPORT_SPTAG)
Expand Down Expand Up @@ -179,10 +178,12 @@ if ( LINUX )
${vector_index_srcs}
${vector_offset_index_srcs}
)
target_include_directories(knowhere PUBLIC ${KNOWHERE_SOURCE_DIR}/knowere)
target_include_directories(knowhere PUBLIC ${KNOWHERE_SOURCE_DIR}/knowhere)
endif ()

target_link_libraries(knowhere ${depend_libs})
target_link_libraries(knowhere -Wl,--whole-archive knowhere_utils -Wl,--no-whole-archive)
#target_link_libraries(knowhere knowhere_utils)

set(KNOWHERE_INCLUDE_DIRS
${KNOWHERE_SOURCE_DIR}
${KNOWHERE_SOURCE_DIR}/thirdparty
Expand Down Expand Up @@ -212,7 +213,10 @@ if (MACOS)
)
endif ()

target_link_libraries(knowhere pthread knowhere_utils)
target_link_libraries(knowhere pthread)
target_link_libraries(knowhere -Wl,--whole-archive knowhere_utils -Wl,--no-whole-archive)
#target_link_libraries(knowhere knowhere_utils)

set(KNOWHERE_INCLUDE_DIRS ${KNOWHERE_SOURCE_DIR})
endif()

Expand Down
2 changes: 1 addition & 1 deletion knowhere/index/vector_index/Statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class Statistics {

void
update_filter_percentage(const faiss::BitsetView bitset) {
double fps = !bitset.empty() ? static_cast<double>(bitset.count_1()) / bitset.size() : 0.0;
double fps = !bitset.empty() ? static_cast<double>(bitset.count()) / bitset.size() : 0.0;
filter_stat[static_cast<int>(fps * 100) / 5] += 1;
}

Expand Down
327 changes: 327 additions & 0 deletions knowhere/utils/Bitset.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,327 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.

#include <cstring>
#include <string>
#include <memory>
#include "Bitset.h"
#include "BitsetView.h"

namespace faiss {

ConcurrentBitset&
ConcurrentBitset::operator&=(const ConcurrentBitset& bitset) {
auto u8_1 = mutable_data();
auto u8_2 = bitset.data();
auto u64_1 = reinterpret_cast<uint64_t*>(u8_1);
auto u64_2 = reinterpret_cast<const uint64_t*>(u8_2);

size_t n8 = bitset_.size();
size_t n64 = n8 / 8;

for (size_t i = 0; i < n64; i++) {
u64_1[i] &= u64_2[i];
}

size_t remain = n8 % 8;
u8_1 += n64 * 8;
u8_2 += n64 * 8;
for (size_t i = 0; i < remain; i++) {
u8_1[i] &= u8_2[i];
}

return *this;
}

ConcurrentBitset&
ConcurrentBitset::operator&=(const BitsetView& view) {
auto u8_1 = mutable_data();
auto u8_2 = view.data();
auto u64_1 = reinterpret_cast<uint64_t*>(u8_1);
auto u64_2 = reinterpret_cast<const uint64_t*>(u8_2);

size_t n8 = bitset_.size();
size_t n64 = n8 / 8;

for (size_t i = 0; i < n64; i++) {
u64_1[i] &= u64_2[i];
}

size_t remain = n8 % 8;
u8_1 += n64 * 8;
u8_2 += n64 * 8;
for (size_t i = 0; i < remain; i++) {
u8_1[i] &= u8_2[i];
}

return *this;
}

std::shared_ptr<ConcurrentBitset>
ConcurrentBitset::operator&(const ConcurrentBitset& bitset) const {
auto result_bitset = std::make_shared<ConcurrentBitset>(bitset.count());

auto result_8 = result_bitset->mutable_data();
auto result_64 = reinterpret_cast<uint64_t*>(result_8);

auto u8_1 = data();
auto u8_2 = bitset.data();
auto u64_1 = reinterpret_cast<const uint64_t*>(u8_1);
auto u64_2 = reinterpret_cast<const uint64_t*>(u8_2);

size_t n8 = bitset_.size();
size_t n64 = n8 / 8;

for (size_t i = 0; i < n64; i++) {
result_64[i] = u64_1[i] & u64_2[i];
}

size_t remain = n8 % 8;
u8_1 += n64 * 8;
u8_2 += n64 * 8;
result_8 += n64 * 8;
for (size_t i = 0; i < remain; i++) {
result_8[i] = u8_1[i] & u8_2[i];
}

return result_bitset;
}

std::shared_ptr<ConcurrentBitset>
ConcurrentBitset::operator&(const BitsetView& view) const {
auto result_bitset = std::make_shared<ConcurrentBitset>(view.count());

auto result_8 = result_bitset->mutable_data();
auto result_64 = reinterpret_cast<uint64_t*>(result_8);

auto u8_1 = data();
auto u8_2 = view.data();
auto u64_1 = reinterpret_cast<const uint64_t*>(u8_1);
auto u64_2 = reinterpret_cast<const uint64_t*>(u8_2);

size_t n8 = bitset_.size();
size_t n64 = n8 / 8;

for (size_t i = 0; i < n64; i++) {
result_64[i] = u64_1[i] & u64_2[i];
}

size_t remain = n8 % 8;
u8_1 += n64 * 8;
u8_2 += n64 * 8;
result_8 += n64 * 8;
for (size_t i = 0; i < remain; i++) {
result_8[i] = u8_1[i] & u8_2[i];
}

return result_bitset;
}


ConcurrentBitset&
ConcurrentBitset::operator|=(const ConcurrentBitset& bitset) {
auto u8_1 = mutable_data();
auto u8_2 = bitset.data();
auto u64_1 = reinterpret_cast<uint64_t*>(u8_1);
auto u64_2 = reinterpret_cast<const uint64_t*>(u8_2);

size_t n8 = bitset_.size();
size_t n64 = n8 / 8;

for (size_t i = 0; i < n64; i++) {
u64_1[i] |= u64_2[i];
}

size_t remain = n8 % 8;
u8_1 += n64 * 8;
u8_2 += n64 * 8;
for (size_t i = 0; i < remain; i++) {
u8_1[i] |= u8_2[i];
}

return *this;
}

ConcurrentBitset&
ConcurrentBitset::operator|=(const BitsetView& view) {
auto u8_1 = mutable_data();
auto u8_2 = view.data();
auto u64_1 = reinterpret_cast<uint64_t*>(u8_1);
auto u64_2 = reinterpret_cast<const uint64_t*>(u8_2);

size_t n8 = bitset_.size();
size_t n64 = n8 / 8;

for (size_t i = 0; i < n64; i++) {
u64_1[i] |= u64_2[i];
}

size_t remain = n8 % 8;
u8_1 += n64 * 8;
u8_2 += n64 * 8;
for (size_t i = 0; i < remain; i++) {
u8_1[i] |= u8_2[i];
}

return *this;
}


std::shared_ptr<ConcurrentBitset>
ConcurrentBitset::operator|(const ConcurrentBitset& bitset) const {
auto result_bitset = std::make_shared<ConcurrentBitset>(bitset.count());

auto result_8 = result_bitset->mutable_data();
auto result_64 = reinterpret_cast<uint64_t*>(result_8);

auto u8_1 = data();
auto u8_2 = bitset.data();
auto u64_1 = reinterpret_cast<const uint64_t*>(u8_1);
auto u64_2 = reinterpret_cast<const uint64_t*>(u8_2);

size_t n8 = bitset_.size();
size_t n64 = n8 / 8;

for (size_t i = 0; i < n64; i++) {
result_64[i] = u64_1[i] | u64_2[i];
}

size_t remain = n8 % 8;
u8_1 += n64 * 8;
u8_2 += n64 * 8;
result_8 += n64 * 8;
for (size_t i = 0; i < remain; i++) {
result_8[i] = u8_1[i] | u8_2[i];
}

return result_bitset;
}

std::shared_ptr<ConcurrentBitset>
ConcurrentBitset::operator|(const BitsetView& view) const {
auto result_bitset = std::make_shared<ConcurrentBitset>(view.count());

auto result_8 = result_bitset->mutable_data();
auto result_64 = reinterpret_cast<uint64_t*>(result_8);

auto u8_1 = data();
auto u8_2 = view.data();
auto u64_1 = reinterpret_cast<const uint64_t*>(u8_1);
auto u64_2 = reinterpret_cast<const uint64_t*>(u8_2);

size_t n8 = bitset_.size();
size_t n64 = n8 / 8;

for (size_t i = 0; i < n64; i++) {
result_64[i] = u64_1[i] | u64_2[i];
}

size_t remain = n8 % 8;
u8_1 += n64 * 8;
u8_2 += n64 * 8;
result_8 += n64 * 8;
for (size_t i = 0; i < remain; i++) {
result_8[i] = u8_1[i] | u8_2[i];
}

return result_bitset;
}


ConcurrentBitset&
ConcurrentBitset::negate() {
auto u8_1 = mutable_data();
auto u64_1 = reinterpret_cast<uint64_t*>(u8_1);

size_t n8 = bitset_.size();
size_t n64 = n8 / 8;

for (size_t i = 0; i < n64; i++) {
u64_1[i] = ~u64_1[i];
}

size_t remain = n8 % 8;
u8_1 += n64 * 8;
for (size_t i = 0; i < remain; i++) {
u8_1[i] = ~u8_1[i];
}

return *this;
}

size_t
ConcurrentBitset::count() const {
size_t ret = 0;
auto p_data = reinterpret_cast<const uint64_t *>(data());
auto len = size() >> 3;
//auto remainder = size() % 8;
auto popcount8 = [&](uint8_t x) -> int{
x = (x & 0x55) + ((x >> 1) & 0x55);
x = (x & 0x33) + ((x >> 2) & 0x33);
x = (x & 0x0F) + ((x >> 4) & 0x0F);
return x;
};
for (size_t i = 0; i < len; ++i) {
ret += __builtin_popcountl(*p_data);
p_data++;
}
auto p_byte = data() + (len << 3);
for (auto i = (len << 3); i < size(); ++i) {
ret += popcount8(*p_byte);
p_byte++;
}
return ret;
}

ConcurrentBitset::operator std::string() const {
const char one = '1';
const char zero = '0';
const size_t len = size();
std::string s;
s.assign (len, zero);

for (size_t i = 0; i < len; ++i) {
if (test(id_type_t(i)))
s.assign(len - 1 - i, one);
}
return s;
}

bool operator==(const ConcurrentBitset& lhs, const ConcurrentBitset& rhs) {
if (std::addressof(lhs) == std::addressof(rhs)){
return true;
}

if (lhs.size() != rhs.size()){
return false;
}

if (lhs.byte_size() != rhs.byte_size()){
return false;
}


auto ret = std::memcmp(lhs.data(), rhs.data(), lhs.byte_size());
return ret == 0;
}

bool operator!=(const ConcurrentBitset& lhs, const ConcurrentBitset& rhs){
return !(lhs == rhs);
}

std::ostream& operator<<(std::ostream& os, const ConcurrentBitset& bitset)
{
os << std::string(bitset);
return os;
}


} // namespace faiss
Loading

0 comments on commit b3a5672

Please sign in to comment.