Skip to content

Commit

Permalink
NEBDUTY-965: avoid data race when reading WaitMode from rdma client c…
Browse files Browse the repository at this point in the history
…onfig

Rdma client config can be modified during initial hand shake that's why CM
thread overwrites it by original config when Qp created  in
`TClientEndpoint::CreateQP`

thread sanitizer reports errors since this Config.WaitMode is used in
completion poller thread. To avoid this error WaitMode is read only once during
the initialization and of the endpoint and we don't modify it any more

Thread sanitizer error looks like that
```
WARNING: ThreadSanitizer: data race (pid=2354960)
  Write of size 8 at 0x7b5c00000118 by thread T2:
    #0 __tsan_memcpy contrib/libs/clang16-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:3165:3 (cloud-blockstore-libs-rdma-impl-ut+0x22f79f4) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #1 NCloud::NBlockStore::NRdma::(anonymous namespace)::TClientEndpoint::CreateQP() cloud/blockstore/libs/rdma/impl/client.cpp:591:16 (cloud-blockstore-libs-rdma-impl-ut+0x49e974a) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #2 NCloud::NBlockStore::NRdma::(anonymous namespace)::TClient::BeginConnect(NCloud::NBlockStore::NRdma::(anonymous namespace)::TClientEndpoint*) cloud/blockstore/libs/rdma/impl/client.cpp:1876:19 (cloud-blockstore-libs-rdma-impl-ut+0x49e85dc) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #3 NCloud::NBlockStore::NRdma::(anonymous namespace)::TClient::HandleConnectionEvent(std::__y1::unique_ptr<rdma_cm_event, int (*)(rdma_cm_event*)>) cloud/blockstore/libs/rdma/impl/client.cpp:1736:13 (cloud-blockstore-libs-rdma-impl-ut+0x49bbfb0) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #4 non-virtual thunk to NCloud::NBlockStore::NRdma::(anonymous namespace)::TClient::HandleConnectionEvent(std::__y1::unique_ptr<rdma_cm_event, int (*)(rdma_cm_event*)>) cloud/blockstore/libs/rdma/impl/client.cpp (cloud-blockstore-libs-rdma-impl-ut+0x49bc761) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #5 NCloud::NBlockStore::NRdma::(anonymous namespace)::TConnectionPoller::HandleConnectionEvents() cloud/blockstore/libs/rdma/impl/client.cpp:1282:27 (cloud-blockstore-libs-rdma-impl-ut+0x49d3dca) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #6 NCloud::NBlockStore::NRdma::(anonymous namespace)::TConnectionPoller::ThreadProc() cloud/blockstore/libs/rdma/impl/client.cpp:1254:25 (cloud-blockstore-libs-rdma-impl-ut+0x49d3870) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #7 non-virtual thunk to NCloud::NBlockStore::NRdma::(anonymous namespace)::TConnectionPoller::ThreadProc() cloud/blockstore/libs/rdma/impl/client.cpp (cloud-blockstore-libs-rdma-impl-ut+0x49d39d9) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #8 void* (anonymous namespace)::ThreadProcWrapper<ISimpleThread>(void*) util/system/thread.cpp:383:45 (cloud-blockstore-libs-rdma-impl-ut+0x2680796) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #9 (anonymous namespace)::TPosixThread::ThreadProxy(void*) util/system/thread.cpp:244:20 (cloud-blockstore-libs-rdma-impl-ut+0x268b82e) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
  Previous read of size 4 at 0x7b5c00000118 by thread T1:
    #0 NCloud::NBlockStore::NRdma::(anonymous namespace)::TClientEndpoint::AbortRequests() cloud/blockstore/libs/rdma/impl/client.cpp:814:16 (cloud-blockstore-libs-rdma-impl-ut+0x49c4494) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #1 NCloud::NBlockStore::NRdma::(anonymous namespace)::TCompletionPoller::HandlePollEvent(epoll_event const&) cloud/blockstore/libs/rdma/impl/client.cpp:1428:31 (cloud-blockstore-libs-rdma-impl-ut+0x49c4042) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #2 NCloud::NBlockStore::NRdma::(anonymous namespace)::TCompletionPoller::HandlePollEvents() cloud/blockstore/libs/rdma/impl/client.cpp:1446:17 (cloud-blockstore-libs-rdma-impl-ut+0x49c3736) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #3 void NCloud::NBlockStore::NRdma::(anonymous namespace)::TCompletionPoller::Execute<(NCloud::NBlockStore::NRdma::EWaitMode)0>() cloud/blockstore/libs/rdma/impl/client.cpp:1530:21 (cloud-blockstore-libs-rdma-impl-ut+0x49c3341) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #4 NCloud::NBlockStore::NRdma::(anonymous namespace)::TCompletionPoller::ThreadProc() cloud/blockstore/libs/rdma/impl/client.cpp:1398:17 (cloud-blockstore-libs-rdma-impl-ut+0x49c1c18) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #5 non-virtual thunk to NCloud::NBlockStore::NRdma::(anonymous namespace)::TCompletionPoller::ThreadProc() cloud/blockstore/libs/rdma/impl/client.cpp (cloud-blockstore-libs-rdma-impl-ut+0x49c1cf9) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #6 void* (anonymous namespace)::ThreadProcWrapper<ISimpleThread>(void*) util/system/thread.cpp:383:45 (cloud-blockstore-libs-rdma-impl-ut+0x2680796) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
    #7 (anonymous namespace)::TPosixThread::ThreadProxy(void*) util/system/thread.cpp:244:20 (cloud-blockstore-libs-rdma-impl-ut+0x268b82e) (BuildId: 49c919960fac17dc20567e23b8a866c9294bf128)
```
budevg committed Apr 4, 2024
1 parent a64cf5a commit 2e0f0f2
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions cloud/blockstore/libs/rdma/impl/client.cpp
Original file line number Diff line number Diff line change
@@ -389,6 +389,7 @@ class TClientEndpoint final
// config might be adjusted during initial handshake
TClientConfigPtr OriginalConfig;
TClientConfig Config;
const EWaitMode WaitMode;
bool ResetConfig = false;

TCompletionPoller* Poller = nullptr;
@@ -524,6 +525,7 @@ TClientEndpoint::TClientEndpoint(
, Reconnect(config->MaxReconnectDelay)
, OriginalConfig(std::move(config))
, Config(*OriginalConfig)
, WaitMode(Config.WaitMode)
{
// user data attached to connection events
Connection->context = this;
@@ -769,14 +771,14 @@ void TClientEndpoint::SendRequest(
Counters->RequestEnqueued();
InputRequests.Enqueue(std::move(req));

if (Config.WaitMode == EWaitMode::Poll) {
if (WaitMode == EWaitMode::Poll) {
RequestEvent.Set();
}
}

bool TClientEndpoint::HandleInputRequests()
{
if (Config.WaitMode == EWaitMode::Poll) {
if (WaitMode == EWaitMode::Poll) {
RequestEvent.Clear();
}

@@ -811,7 +813,7 @@ bool TClientEndpoint::AbortRequests() noexcept
{
bool ret = false;

if (Config.WaitMode == EWaitMode::Poll) {
if (WaitMode == EWaitMode::Poll) {
DisconnectEvent.Clear();
}

@@ -856,7 +858,7 @@ bool TClientEndpoint::HandleCompletionEvents()
{
ibv_cq* cq = CompletionQueue.get();

if (Config.WaitMode == EWaitMode::Poll) {
if (WaitMode == EWaitMode::Poll) {
Verbs->GetCompletionEvent(cq);
Verbs->AckCompletionEvents(cq, 1);
Verbs->RequestCompletionEvent(cq, 0);
@@ -1113,7 +1115,7 @@ void TClientEndpoint::SetError() noexcept
RDMA_ERROR("flush error: " << e.what());
}

if (Config.WaitMode == EWaitMode::Poll) {
if (WaitMode == EWaitMode::Poll) {
DisconnectEvent.Set();
}
}

0 comments on commit 2e0f0f2

Please sign in to comment.