Skip to content

Commit

Permalink
enhance: refine variable-length-type memory usage(#38736)
Browse files Browse the repository at this point in the history
Signed-off-by: MrPresent-Han <[email protected]>
  • Loading branch information
MrPresent-Han committed Jan 24, 2025
1 parent 6d8441a commit 058527d
Show file tree
Hide file tree
Showing 20 changed files with 335 additions and 188 deletions.
68 changes: 33 additions & 35 deletions internal/core/src/common/Chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,42 @@
namespace milvus {

std::pair<std::vector<std::string_view>, FixedVector<bool>>
StringChunk::StringViews() {
StringChunk::StringViews(
std::optional<std::pair<int64_t, int64_t>> offset_len = std::nullopt) {
auto start_offset = 0;
auto len = row_nums_;
if (offset_len.has_value()) {
start_offset = offset_len->first;
len = offset_len->second;
AssertInfo(

Check warning on line 29 in internal/core/src/common/Chunk.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/Chunk.cpp#L27-L29

Added lines #L27 - L29 were not covered by tests
start_offset >= 0 && start_offset < row_nums_,
"Retrieve string views with out-of-bound offset:{}, len:{}, wrong",
start_offset,
len);
AssertInfo(

Check warning on line 34 in internal/core/src/common/Chunk.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/Chunk.cpp#L34

Added line #L34 was not covered by tests
len > 0 && len <= row_nums_,
"Retrieve string views with out-of-bound offset:{}, len:{}, wrong",
start_offset,
len);
AssertInfo(

Check warning on line 39 in internal/core/src/common/Chunk.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/Chunk.cpp#L39

Added line #L39 was not covered by tests
start_offset + len <= row_nums_,
"Retrieve string views with out-of-bound offset:{}, len:{}, wrong",
start_offset,
len);
}

std::vector<std::string_view> ret;
ret.reserve(row_nums_);
for (int i = 0; i < row_nums_; i++) {
ret.reserve(len);
auto end_offset = start_offset + len;
for (auto i = start_offset; i < end_offset; i++) {
ret.emplace_back(data_ + offsets_[i], offsets_[i + 1] - offsets_[i]);
}
return {ret, valid_};
if (nullable_) {
FixedVector<bool> res_valid(valid_.begin() + start_offset,
valid_.begin() + end_offset);
return {ret, std::move(res_valid)};
}
return {ret, {}};
}

std::pair<std::vector<std::string_view>, FixedVector<bool>>
Expand All @@ -43,35 +72,4 @@ StringChunk::ViewsByOffsets(const FixedVector<int32_t>& offsets) {
return {ret, valid_res};
}

void
ArrayChunk::ConstructViews() {
views_.reserve(row_nums_);

for (int i = 0; i < row_nums_; ++i) {
int offset = offsets_lens_[2 * i];
int next_offset = offsets_lens_[2 * (i + 1)];
int len = offsets_lens_[2 * i + 1];
auto data_ptr = data_ + offset;
auto offsets_bytes_len = 0;
uint32_t* offsets_ptr = nullptr;
if (IsStringDataType(element_type_)) {
offsets_bytes_len = len * sizeof(uint32_t);
offsets_ptr = reinterpret_cast<uint32_t*>(data_ptr);
}
views_.emplace_back(data_ptr + offsets_bytes_len,
len,
next_offset - offset - offsets_bytes_len,
element_type_,
offsets_ptr);
}
}

SpanBase
ArrayChunk::Span() const {
return SpanBase(views_.data(),
nullable_ ? valid_.data() : nullptr,
views_.size(),
sizeof(ArrayView));
}

} // namespace milvus
87 changes: 70 additions & 17 deletions internal/core/src/common/Chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "simdjson/common_defs.h"
#include "sys/mman.h"
#include "common/Types.h"

namespace milvus {
constexpr uint64_t MMAP_STRING_PADDING = 1;
constexpr uint64_t MMAP_ARRAY_PADDING = 1;
Expand Down Expand Up @@ -132,8 +133,11 @@ class StringChunk : public Chunk {
StringChunk() = default;
StringChunk(int32_t row_nums, char* data, uint64_t size, bool nullable)
: Chunk(row_nums, data, size, nullable) {
auto null_bitmap_bytes_num = (row_nums + 7) / 8;
offsets_ = reinterpret_cast<uint64_t*>(data + null_bitmap_bytes_num);
auto null_bitmap_bytes_num = 0;
if (nullable) {
null_bitmap_bytes_num = (row_nums + 7) / 8;
}
offsets_ = reinterpret_cast<uint32_t*>(data + null_bitmap_bytes_num);
}

std::string_view
Expand All @@ -146,7 +150,7 @@ class StringChunk : public Chunk {
}

std::pair<std::vector<std::string_view>, FixedVector<bool>>
StringViews();
StringViews(std::optional<std::pair<int64_t, int64_t>> offset_len);

int
binary_search_string(std::string_view target) {
Expand Down Expand Up @@ -181,13 +185,13 @@ class StringChunk : public Chunk {
return (*this)[idx].data();
}

uint64_t*
uint32_t*
Offsets() {
return offsets_;
}

protected:
uint64_t* offsets_;
uint32_t* offsets_;
};

using JSONChunk = StringChunk;
Expand All @@ -200,22 +204,72 @@ class ArrayChunk : public Chunk {
milvus::DataType element_type,
bool nullable)
: Chunk(row_nums, data, size, nullable), element_type_(element_type) {
auto null_bitmap_bytes_num = (row_nums + 7) / 8;
auto null_bitmap_bytes_num = 0;
if (nullable) {
null_bitmap_bytes_num = (row_nums + 7) / 8;

Check warning on line 209 in internal/core/src/common/Chunk.h

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/Chunk.h#L209

Added line #L209 was not covered by tests
}
offsets_lens_ =
reinterpret_cast<uint64_t*>(data + null_bitmap_bytes_num);
ConstructViews();
reinterpret_cast<uint32_t*>(data + null_bitmap_bytes_num);
}

SpanBase
Span() const;

ArrayView
View(int64_t idx) const {
return views_[idx];
View(int idx) const {
int idx_off = 2 * idx;
auto offset = offsets_lens_[idx_off];
auto len = offsets_lens_[idx_off + 1];
auto next_offset = offsets_lens_[idx_off + 2];
auto data_ptr = data_ + offset;
uint32_t offsets_bytes_len = 0;
uint32_t* offsets_ptr = nullptr;
if (IsStringDataType(element_type_)) {
offsets_bytes_len = len * sizeof(uint32_t);
offsets_ptr = reinterpret_cast<uint32_t*>(data_ptr);
}

return ArrayView(data_ptr + offsets_bytes_len,
len,
next_offset - offset - offsets_bytes_len,
element_type_,
offsets_ptr);
}

void
ConstructViews();
std::pair<std::vector<ArrayView>, FixedVector<bool>>
Views(std::optional<std::pair<int64_t, int64_t>> offset_len =
std::nullopt) const {
auto start_offset = 0;
auto len = row_nums_;
if (offset_len.has_value()) {
start_offset = offset_len->first;
len = offset_len->second;
AssertInfo(start_offset >= 0 && start_offset < row_nums_,

Check warning on line 244 in internal/core/src/common/Chunk.h

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/Chunk.h#L242-L244

Added lines #L242 - L244 were not covered by tests
"Retrieve array views with out-of-bound offset:{}, "
"len:{}, wrong",
start_offset,
len);
AssertInfo(len > 0 && len <= row_nums_,

Check warning on line 249 in internal/core/src/common/Chunk.h

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/Chunk.h#L249

Added line #L249 was not covered by tests
"Retrieve array views with out-of-bound offset:{}, "
"len:{}, wrong",
start_offset,
len);
AssertInfo(start_offset + len <= row_nums_,

Check warning on line 254 in internal/core/src/common/Chunk.h

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/Chunk.h#L254

Added line #L254 was not covered by tests
"Retrieve array views with out-of-bound offset:{}, "
"len:{}, wrong",
start_offset,
len);
}
std::vector<ArrayView> views;
views.reserve(len);
auto end_offset = start_offset + len;
for (auto i = start_offset; i < end_offset; i++) {
views.emplace_back(View(i));
}
if (nullable_) {
FixedVector<bool> res_valid(valid_.begin() + start_offset,
valid_.begin() + end_offset);
return {std::move(views), std::move(res_valid)};

Check warning on line 269 in internal/core/src/common/Chunk.h

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/Chunk.h#L267-L269

Added lines #L267 - L269 were not covered by tests
}
return {std::move(views), {}};
}

const char*
ValueAt(int64_t idx) const override {
Expand All @@ -225,8 +279,7 @@ class ArrayChunk : public Chunk {

private:
milvus::DataType element_type_;
uint64_t* offsets_lens_;
std::vector<ArrayView> views_;
uint32_t* offsets_lens_;
};

class SparseFloatVectorChunk : public Chunk {
Expand Down
6 changes: 6 additions & 0 deletions internal/core/src/common/ChunkTarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ class MmapChunkTarget : public ChunkTarget {
clear() {
pos = 0;
}

void
write(uint32_t value) {
*reinterpret_cast<uint32_t*>(buf + pos) = value;
pos += sizeof(uint32_t);
}
};

public:
Expand Down
Loading

0 comments on commit 058527d

Please sign in to comment.