Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] Avoid acquire multiple lock in the same stack for tablet report #55808

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions be/src/storage/tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1534,6 +1534,13 @@ bool Tablet::_contains_rowset(const RowsetId rowset_id) {
}

void Tablet::build_tablet_report_info(TTabletInfo* tablet_info) {
// primary key specified info will protected by independent lock context
// in TabletUpdates which means that those lock context can be sperated from
// the tablet meta_lock when trying to get the extra info
if (_updates) {
_updates->get_tablet_info_extra(tablet_info);
}

std::shared_lock rdlock(_meta_lock);
tablet_info->__set_tablet_id(_tablet_meta->tablet_id());
tablet_info->__set_schema_hash(_tablet_meta->schema_hash());
Expand All @@ -1546,9 +1553,7 @@ void Tablet::build_tablet_report_info(TTabletInfo* tablet_info) {
if (_tablet_meta->get_binlog_config() != nullptr) {
tablet_info->__set_binlog_config_version(_tablet_meta->get_binlog_config()->version);
}
if (_updates) {
_updates->get_tablet_info_extra(tablet_info);
} else {
if (_updates == nullptr) {
int64_t max_version = _timestamped_version_tracker.get_max_continuous_version();
auto max_rowset = rowset_with_max_version();
if (max_rowset != nullptr) {
Expand Down
41 changes: 24 additions & 17 deletions be/src/storage/tablet_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -983,28 +983,35 @@ Status TabletManager::report_all_tablets_info(std::map<TTabletId, TTablet>* tabl

StarRocksMetrics::instance()->report_all_tablets_requests_total.increment(1);

size_t max_tablet_rowset_num = 0;
std::vector<TabletSharedPtr> all_tablets;
for (const auto& tablets_shard : _tablets_shards) {
std::shared_lock rlock(tablets_shard.lock);
for (const auto& [tablet_id, tablet_ptr] : tablets_shard.tablet_map) {
TTablet t_tablet;
TTabletInfo tablet_info;
tablet_ptr->build_tablet_report_info(&tablet_info);
max_tablet_rowset_num = std::max(max_tablet_rowset_num, tablet_ptr->version_count());
// find expired transaction corresponding to this tablet
TabletInfo tinfo(tablet_id, tablet_ptr->schema_hash(), tablet_ptr->tablet_uid());
auto find = expire_txn_map.find(tinfo);
if (find != expire_txn_map.end()) {
tablet_info.__set_transaction_ids(find->second);
expire_txn_map.erase(find);
}
t_tablet.tablet_infos.push_back(tablet_info);
for (const auto& [_, tablet_ptr] : tablets_shard.tablet_map) {
all_tablets.push_back(tablet_ptr);
}
}

if (!t_tablet.tablet_infos.empty()) {
tablets_info->emplace(tablet_id, t_tablet);
}
size_t max_tablet_rowset_num = 0;
for (auto& tablet_ptr : all_tablets) {
TTablet t_tablet;
TTabletInfo tablet_info;
tablet_ptr->build_tablet_report_info(&tablet_info);
max_tablet_rowset_num = std::max(max_tablet_rowset_num, tablet_ptr->version_count());
// find expired transaction corresponding to this tablet
TabletInfo tinfo(tablet_ptr->tablet_id(), tablet_ptr->schema_hash(), tablet_ptr->tablet_uid());
auto find = expire_txn_map.find(tinfo);
if (find != expire_txn_map.end()) {
tablet_info.__set_transaction_ids(find->second);
expire_txn_map.erase(find);
}
t_tablet.tablet_infos.push_back(tablet_info);

if (!t_tablet.tablet_infos.empty()) {
tablets_info->emplace(tablet_ptr->tablet_id(), t_tablet);
}
}
all_tablets.clear();

LOG(INFO) << "Report all " << tablets_info->size()
<< " tablets info. max_tablet_rowset_num:" << max_tablet_rowset_num;
StarRocksMetrics::instance()->max_tablet_rowset_num.set_value(max_tablet_rowset_num);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The most risky bug in this code is:
Potential deadlock when accessing _tablets_shards.

You can modify the code like this:

std::vector<TabletSharedPtr> all_tablets;
for (const auto& tablets_shard : _tablets_shards) {
    // Ensure lock acquisition is done inside a smaller scope to prevent prolonged holding of locks
    {
        std::shared_lock rlock(tablets_shard.lock);
        for (const auto& [_, tablet_ptr] : tablets_shard.tablet_map) {
            all_tablets.push_back(tablet_ptr);
        }
    }
}

size_t max_tablet_rowset_num = 0;
for (auto& tablet_ptr : all_tablets) {
    TTablet t_tablet;
    TTabletInfo tablet_info;
    tablet_ptr->build_tablet_report_info(&tablet_info);
    max_tablet_rowset_num = std::max(max_tablet_rowset_num, tablet_ptr->version_count());
    // find expired transaction corresponding to this tablet
    TabletInfo tinfo(tablet_ptr->tablet_id(), tablet_ptr->schema_hash(), tablet_ptr->tablet_uid());
    auto find = expire_txn_map.find(tinfo);
    if (find != expire_txn_map.end()) {
        tablet_info.__set_transaction_ids(find->second);
        expire_txn_map.erase(find);
    }
    t_tablet.tablet_infos.push_back(tablet_info);

    if (!t_tablet.tablet_infos.empty()) {
        tablets_info->emplace(tablet_ptr->tablet_id(), t_tablet);
    }
}
all_tablets.clear();

Expand Down
14 changes: 14 additions & 0 deletions be/test/storage/tablet_mgr_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,4 +516,18 @@ TEST_F(TabletMgrTest, RemoveTabletInDiskDisable) {
StorageEngine::instance()->tablet_manager()->drop_tablets_on_error_root_path(tablet_info_vec);
}

TEST_F(TabletMgrTest, GetTabletReportInfo) {
TTabletId tablet_id = 4251234666;
TSchemaHash schema_hash = 3929134666;
TCreateTabletReq create_tablet_req = get_create_tablet_request(tablet_id, schema_hash);
Status create_st = StorageEngine::instance()->create_tablet(create_tablet_req);
ASSERT_TRUE(create_st.ok());

TReportRequest request;
request.__isset.tablets = true;
Status st_report = StorageEngine::instance()->tablet_manager()->report_all_tablets_info(&request.tablets);
ASSERT_TRUE(st_report.ok());
ASSERT_TRUE(request.tablets.size() == 1);
}

} // namespace starrocks
Loading