Skip to content

Commit

Permalink
[CB] Return Block manager asserts to destructors (#1569)
Browse files Browse the repository at this point in the history
# Ticket:
* [161217](https://jira.devtools.intel.com/browse/CVS-161217)

---------

Co-authored-by: Ilya Lavrenov <[email protected]>
  • Loading branch information
iefode and ilya-lavrenov authored Feb 7, 2025
1 parent d18d890 commit 8919eb1
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 31 deletions.
11 changes: 7 additions & 4 deletions src/cpp/src/block_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ class BlockAllocator {
size_t m_num_layers;
bool m_enable_prefix_caching;
ov::genai::OverwritableBlocksHashStore m_overwriteable_blocks;

public:
/**
* Constructs the BlockAllocator.
Expand All @@ -215,15 +216,17 @@ class BlockAllocator {
per_layer_block_list.push_back(std::make_shared<KVCacheBlock>(block_id));
}
}
}
else {
} else {
m_free_blocks_num = std::vector<size_t>(m_num_layers, 0);
}
}

~BlockAllocator() {
// sanity check to validate that all blocks are freed
// OPENVINO_ASSERT(m_total_num_blocks == m_free_blocks.size());
for (auto& free_block : m_free_blocks_num) {
size_t free_and_overwritable_block_cnt = free_block + num_overwriteable_blocks();
OPENVINO_ASSERT(m_total_num_blocks == free_and_overwritable_block_cnt, "Expected num free blocks: ", m_total_num_blocks, ", actual: ", free_and_overwritable_block_cnt);
}
}

void increase_kv_blocks_number(size_t new_kv_blocks_count) {
Expand Down Expand Up @@ -527,7 +530,7 @@ class BlockManager {

~BlockManager() {
// sanity check that all sequences are freed
// OPENVINO_ASSERT(m_block_table.empty());
OPENVINO_ASSERT(m_block_table.empty());
}

/**
Expand Down
102 changes: 76 additions & 26 deletions tests/cpp/block_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ TEST_P(TestBlockAllocatorWithNumLayers, AllocatesBlocksAccordingToNumLayers) {
for (size_t i = 0; i < num_layers; i++) {
EXPECT_EQ(allocator.num_free_blocks(i), initial_num_free_blocks - 1);
}

allocator.free(blocks);
}

INSTANTIATE_TEST_SUITE_P(VariousNumLayers, TestBlockAllocatorWithNumLayers, ::testing::Values(1, 2, 15, 23, 42));
Expand All @@ -29,25 +31,31 @@ TEST(TestBlockAllocator, AllocatesBlocksIndependentlyToLayers) {
size_t initial_num_free_blocks = 10;
auto allocator = ov::genai::BlockAllocator(initial_num_free_blocks, false, num_layers);

allocator.allocate_block(0);
allocator.allocate_block(0);
std::map<ov::genai::KVCacheBlock::Ptr, size_t> blocks_to_release;
blocks_to_release.insert({allocator.allocate_block(0), 0});
blocks_to_release.insert({allocator.allocate_block(0), 0});
EXPECT_EQ(allocator.num_free_blocks(0), 8);
EXPECT_EQ(allocator.num_free_blocks(1), 10);
EXPECT_EQ(allocator.num_free_blocks(2), 10);

allocator.allocate_block(2);
blocks_to_release.insert({allocator.allocate_block(2), 2});

EXPECT_EQ(allocator.num_free_blocks(0), 8);
EXPECT_EQ(allocator.num_free_blocks(1), 10);
EXPECT_EQ(allocator.num_free_blocks(2), 9);

allocator.allocate_block(1);
allocator.allocate_block(1);
allocator.allocate_block(1);
blocks_to_release.insert({allocator.allocate_block(1), 1});
blocks_to_release.insert({allocator.allocate_block(1), 1});
blocks_to_release.insert({allocator.allocate_block(1), 1});

EXPECT_EQ(allocator.num_free_blocks(0), 8);
EXPECT_EQ(allocator.num_free_blocks(1), 7);
EXPECT_EQ(allocator.num_free_blocks(2), 9);

for (auto& block_to_release : blocks_to_release) {
ov::genai::KVCacheBlock::Ptr tmp = block_to_release.first;
allocator.free(tmp, block_to_release.second);
}
}

TEST(TestBlockAllocator, FreesBlocksIndependentlyFromLayers) {
Expand Down Expand Up @@ -80,6 +88,9 @@ TEST(TestBlockAllocator, FreesBlocksIndependentlyFromLayers) {
EXPECT_EQ(allocator.num_free_blocks(0), 9);
EXPECT_EQ(allocator.num_free_blocks(1), 9);
EXPECT_EQ(allocator.num_free_blocks(2), 10);

allocator.free(block_01, 0);
allocator.free(block_11, 1);
}

class PrefixCachingBlockAllocatorTest : public testing::Test {
Expand Down Expand Up @@ -108,22 +119,42 @@ TEST_F(PrefixCachingBlockAllocatorTest, OnlyAllocatesAndFreesBlocksFromAllLayers

TEST_F(PrefixCachingBlockAllocatorTest, HandlesFreesCorrectlyWithMixedHashFrees) {
// allocate one block so that there is something to free
allocator.allocate_block(0, cached_blocks_map);
allocator.allocate_block(1, cached_blocks_map);
allocator.allocate_block(2, cached_blocks_map);
auto a = allocator.allocate_block(0, cached_blocks_map);
auto b = allocator.allocate_block(1, cached_blocks_map);
auto c = allocator.allocate_block(2, cached_blocks_map);
ASSERT_EQ(allocator.num_free_blocks(0), 7);

ov::genai::BlocksPerLayer mixed_hash_blocks;
mixed_hash_blocks.reserve(num_layers);
auto hash_0_blocks = cached_blocks_map[0];
auto hash_1_blocks = cached_blocks_map[1];
std::copy(hash_0_blocks.begin(), hash_0_blocks.begin() + num_layers / 2, std::back_inserter(mixed_hash_blocks));
std::copy(hash_1_blocks.begin() + num_layers / 2, hash_1_blocks.end(), std::back_inserter(mixed_hash_blocks));
// allocator.free(cached_blocks_map);

EXPECT_NO_THROW(allocator.free(mixed_hash_blocks));
EXPECT_EQ(allocator.num_free_blocks(0), 8);
EXPECT_EQ(allocator.num_free_blocks(num_layers - 1), 8);
EXPECT_EQ(allocator.num_overwriteable_blocks(), 0); // mixed hash, can't store under blocks across layers under same hash
{
ov::genai::BlocksPerLayer mixed_hash_blocks;
mixed_hash_blocks.reserve(num_layers);
auto hash_0_blocks = cached_blocks_map[0];
auto hash_1_blocks = cached_blocks_map[1];
std::copy(hash_0_blocks.begin(), hash_0_blocks.begin() + num_layers / 2, std::back_inserter(mixed_hash_blocks));
std::copy(hash_1_blocks.begin() + num_layers / 2, hash_1_blocks.end(), std::back_inserter(mixed_hash_blocks));

EXPECT_NO_THROW(allocator.free(mixed_hash_blocks));
EXPECT_EQ(allocator.num_free_blocks(0), 8);
EXPECT_EQ(allocator.num_free_blocks(num_layers - 1), 8);
EXPECT_EQ(allocator.num_overwriteable_blocks(), 0); // mixed hash, can't store under blocks across layers under same hash
}

{
ov::genai::BlocksPerLayer mixed_hash_blocks;
mixed_hash_blocks.reserve(num_layers);
auto hash_0_blocks = cached_blocks_map[0];
auto hash_1_blocks = cached_blocks_map[1];
std::copy(hash_0_blocks.begin() + num_layers / 2, hash_0_blocks.end(), std::back_inserter(mixed_hash_blocks));
std::copy(hash_1_blocks.begin(), hash_1_blocks.begin() + num_layers / 2, std::back_inserter(mixed_hash_blocks));

EXPECT_NO_THROW(allocator.free(mixed_hash_blocks));
EXPECT_EQ(allocator.num_free_blocks(0), 9);
EXPECT_EQ(allocator.num_free_blocks(num_layers - 1), 9);
EXPECT_EQ(allocator.num_overwriteable_blocks(), 0); // mixed hash, can't store under blocks across layers under same hash
}

allocator.free(c);
}

TEST_F(PrefixCachingBlockAllocatorTest, AllocatesFromOverwriteableBlocksWhenFreePoolIsExhausted) {
Expand All @@ -137,25 +168,35 @@ TEST_F(PrefixCachingBlockAllocatorTest, AllocatesFromOverwriteableBlocksWhenFree

ASSERT_EQ(allocator.num_overwriteable_blocks(), 3);

std::vector<ov::genai::BlocksPerLayer> block_to_release;
for (size_t i = 0; i < initial_num_free_blocks - 3; i++) {
allocator.allocate_block(1337 + i, cached_blocks_map);
block_to_release.push_back(allocator.allocate_block(1337 + i, cached_blocks_map));
EXPECT_EQ(allocator.num_overwriteable_blocks(), 3);
}

EXPECT_EQ(allocator.num_overwriteable_blocks(), 3);
allocator.allocate_block(31337, cached_blocks_map);
block_to_release.push_back(allocator.allocate_block(31337, cached_blocks_map));
EXPECT_EQ(allocator.num_overwriteable_blocks(), 2);

for (auto& block : block_to_release) {
allocator.free(block);
}
}

TEST_F(PrefixCachingBlockAllocatorTest, ThrowsAtAllocationWhenFull) {
std::vector<ov::genai::BlocksPerLayer> blocks_to_release;
for (size_t i = 0; i < initial_num_free_blocks; i++) {
allocator.allocate_block(1337 + i, cached_blocks_map);
blocks_to_release.push_back(allocator.allocate_block(1337 + i, cached_blocks_map));
}

ASSERT_EQ(allocator.num_overwriteable_blocks(), 0);
ASSERT_EQ(allocator.num_free_blocks(0), 0);

EXPECT_THROW(allocator.allocate_block(31337, cached_blocks_map), ov::Exception);
EXPECT_THROW(blocks_to_release.push_back(allocator.allocate_block(31337, cached_blocks_map)), ov::Exception);

for (auto& block : blocks_to_release) {
allocator.free(block);
}
}

TEST_F(PrefixCachingBlockAllocatorTest, HandlesHashCollisionsAtFreeCorrectly) {
Expand All @@ -168,18 +209,22 @@ TEST_F(PrefixCachingBlockAllocatorTest, HandlesHashCollisionsAtFreeCorrectly) {
// double free
ASSERT_THROW(allocator.free(first_hash_0_block), ov::Exception);

allocator.allocate_block(1, cached_blocks_map);
ov::genai::BlocksPerLayer blocks_to_release = allocator.allocate_block(1, cached_blocks_map);
auto second_hash_0_block = allocator.allocate_block(0, cached_blocks_map);
EXPECT_EQ(allocator.num_overwriteable_blocks(), 1);

// this "free" should replace the old block with the same hash in the overwritable store
allocator.free(second_hash_0_block);
EXPECT_EQ(allocator.num_overwriteable_blocks(), 1);

std::map<uint64_t, ov::genai::BlocksPerLayer> empty_map{}; // to force allocator to take the block from overwritable store
auto internal_overwriteable_block = allocator.get_cached_block(0, empty_map);
for (size_t layer_idx = 0; layer_idx < internal_overwriteable_block.size(); layer_idx++) {
EXPECT_EQ(internal_overwriteable_block[layer_idx], second_hash_0_block[layer_idx]);
}
allocator.free(internal_overwriteable_block);

allocator.free(blocks_to_release);
}

TEST(TestBlockAllocator, CalculatesUsagePercentageCorrectly) {
Expand All @@ -196,6 +241,8 @@ TEST(TestBlockAllocator, CalculatesUsagePercentageCorrectly) {

allocator.free(one_block_from_each_layer);
EXPECT_NEAR(allocator.get_used_percentage(), 1.0, 1e-5);

allocator.free(one_block_from_some_layer, 7);
}


Expand All @@ -206,15 +253,18 @@ TEST(TestBlockAllocator, CalculatesUsagePercentageCorrectlyWithPrefixCaching) {
ASSERT_NEAR(allocator.get_used_percentage(), 0.0, 1e-5);

std::map<uint64_t, ov::genai::BlocksPerLayer> prefix_hash_map;

for (uint64_t mock_hash: {13, 42, 1337}) {
auto one_block_from_each_layer = allocator.allocate_block(mock_hash, prefix_hash_map);
allocator.allocate_block(mock_hash, prefix_hash_map);
}
ASSERT_NEAR(allocator.get_used_percentage(), 30.0, 1e-5);

allocator.free(prefix_hash_map[13]);
prefix_hash_map.erase(13);
ASSERT_NEAR(allocator.get_used_percentage(), 20.0, 1e-5);

allocator.allocate_block(13, prefix_hash_map);
ASSERT_NEAR(allocator.get_used_percentage(), 30.0, 1e-5);
for (auto& allocated_block : prefix_hash_map) {
allocator.free(prefix_hash_map[allocated_block.first]);
}
}
11 changes: 10 additions & 1 deletion tests/cpp/block_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ TEST(TestBlockManager, general_test) {
bm.fork_sequence(seq_id, 1);
EXPECT_TRUE(bm.has_block_table(1));
EXPECT_EQ(bm.get_block_table(1, 0).back()->get_references_count(), 2);

bm.free_sequence(0);
bm.free_sequence(1);
}

TEST(TestBlockManager, required_blocks_count) {
Expand Down Expand Up @@ -82,6 +83,10 @@ TEST(TestBlockManager, required_blocks_count) {
// require 1 extra block for each sequence in group
EXPECT_EQ(required_blocks, 5);
EXPECT_FALSE(bm.can_append_slots(sequence_group));

for (auto& sequence : sequence_group->get_sequences()) {
bm.free_sequence(sequence->get_id());
}
}


Expand All @@ -103,4 +108,8 @@ TEST(TestBlockManager, CanFreeBlocksFromSequence) {
size_t seq_id = sequence_group->get_sequences()[0]->get_id();
bm.free_blocks_from_sequence(seq_id, { {0}, {1}, {2} });
EXPECT_EQ(bm.num_free_blocks(), 6);

for (auto& sequence : sequence_group->get_sequences()) {
bm.free_sequence(sequence->get_id());
}
}
63 changes: 63 additions & 0 deletions tests/cpp/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ TEST(TestScheduler, general_test) {
// requests1[1] should be fully scheduled plus 1 slot for requests[0] for generate phase
EXPECT_EQ(out4.m_total_num_scheduled_tokens, requests[1]->get_context_len() + 1);
EXPECT_EQ(out4.is_prompt, false);

for (auto& req : requests) {
for (auto& seq : req->get_sequences()) {
scheduler.free_sequence(seq->get_id());
}
}
}

}
Expand Down Expand Up @@ -193,6 +199,12 @@ TEST_P(AppendSlotsSchedulerTest, test_append_slots_considers_all_sequences) {
EXPECT_EQ(out2.m_total_num_scheduled_tokens, 1);

EXPECT_FALSE(out2.is_prompt);

for (auto& req : requests) {
for (auto& seq : req->get_sequences()) {
scheduler.free_sequence(seq->get_id());
}
}
}

INSTANTIATE_TEST_SUITE_P(VariousSchedulerConfigs, AppendSlotsSchedulerTest,
Expand Down Expand Up @@ -286,6 +298,12 @@ TEST_P(PartialPreemptionSchedulerTest, test_partial_preemption) {
EXPECT_EQ(block_table2[2]->get_index(), 0);

EXPECT_FALSE(scheduler.has_block_table(idx0));

for (auto& req : requests) {
for (auto& seq : req->get_sequences()) {
scheduler.free_sequence(seq->get_id());
}
}
}

INSTANTIATE_TEST_SUITE_P(VariousSchedulerConfigs, PartialPreemptionSchedulerTest ,
Expand Down Expand Up @@ -393,6 +411,12 @@ TEST(TestScheduler, test_partial_preemption_beam_search) {
EXPECT_EQ(scheduler.get_block_tables(*seqs[2])[0].size(), 1);
EXPECT_EQ(scheduler.get_block_tables(*seqs[3])[0].size(), 1);
EXPECT_EQ(scheduler.get_block_tables(*seqs[4])[0].size(), 1);

for (auto& req : new_requests) {
for (auto& seq : req->get_sequences()) {
scheduler.free_sequence(seq->get_id());
}
}
}
}

Expand Down Expand Up @@ -492,6 +516,12 @@ TEST(TestScheduler, test_partially_preempted_prompt) {
EXPECT_EQ(block_table2[2]->get_index(), 0);

EXPECT_FALSE(scheduler.has_block_table(idx0));

for (auto& req : requests) {
for (auto& seq : req->get_sequences()) {
scheduler.free_sequence(seq->get_id());
}
}
}
}

Expand Down Expand Up @@ -555,6 +585,13 @@ TEST(TestScheduler, prefix_caching_test) {

histrory_tokens.insert(histrory_tokens.end(), prompt_tokens.begin(), prompt_tokens.end());
histrory_tokens.insert(histrory_tokens.end(), generated_ids.begin(), generated_ids.end());

for (auto& seq : sequence_group->get_sequences()) {
if (seq->get_id() == idx0) {
continue;
}
scheduler.free_sequence(seq->get_id());
}
}
}

Expand Down Expand Up @@ -755,6 +792,15 @@ TEST(TestScheduler, test_partially_preempted_prompt_not_allowed) {
ASSERT_EQ(block_table2[0][2]->get_index(), 0);

EXPECT_FALSE(scheduler.has_block_table(idx0));

for (auto& req : requests) {
for (auto& seq : req->get_sequences()) {
if (seq->get_id() == idx0) {
continue;
}
scheduler.free_sequence(seq->get_id());
}
}
}

TEST(TestScheduler, test_partially_preempted_prompt_not_allowed2) {
Expand Down Expand Up @@ -840,6 +886,15 @@ TEST(TestScheduler, test_partially_preempted_prompt_not_allowed2) {
ASSERT_EQ(block_table2[0][2]->get_index(), 0);

EXPECT_FALSE(scheduler.has_block_table(idx0));

for (auto& req : requests) {
for (auto& seq : req->get_sequences()) {
if (seq->get_id() == idx0) {
continue;
}
scheduler.free_sequence(seq->get_id());
}
}
}


Expand Down Expand Up @@ -947,4 +1002,12 @@ TEST(TestScheduler, FullyPreemptsCacheEvictedSequences) {
const std::vector<size_t> ref_block_table2_after_recompute{4, 5, 0, 1, 2}; // should restore the old state before first eviction in terms of block count
EXPECT_EQ(block_table2, ref_block_table2_after_recompute);

for (auto& req : requests) {
for (auto& seq : req->get_sequences()) {
if (seq->get_id() == idx1) {
continue;
}
scheduler.free_sequence(seq->get_id());
}
}
}

0 comments on commit 8919eb1

Please sign in to comment.