From 0fd208c959db3d3d360cd79c94f15e092a7fc6c3 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 19:29:54 +0100 Subject: [PATCH 01/16] debug cluster tests --- .../TestProvider.cs | 15 ++++---- src/Proto.Cluster/Cluster.cs | 35 ++++++++++++------- tests/Proto.Cluster.Tests/ClusterTests.cs | 3 +- .../RetryOnDeadLetterTests.cs | 2 +- 4 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/Proto.Cluster.TestProvider/TestProvider.cs b/src/Proto.Cluster.TestProvider/TestProvider.cs index 6cfdc3fa36..283a6b65ef 100644 --- a/src/Proto.Cluster.TestProvider/TestProvider.cs +++ b/src/Proto.Cluster.TestProvider/TestProvider.cs @@ -28,8 +28,9 @@ public TestProvider(TestProviderOptions options, InMemAgent agent) _agent = agent; } - public Task StartMemberAsync(Cluster cluster) + public async Task StartMemberAsync(Cluster cluster) { + await Task.Yield(); var memberList = cluster.MemberList; var (host, port) = cluster.System.GetAddress(); var kinds = cluster.GetClusterKinds(); @@ -46,30 +47,26 @@ public Task StartMemberAsync(Cluster cluster) Port = port } ); - - return Task.CompletedTask; } - public Task StartClientAsync(Cluster cluster) + public async Task StartClientAsync(Cluster cluster) { + await Task.Yield(); var memberList = cluster.MemberList; _id = cluster.System.Id; _memberList = memberList; _agent.StatusUpdate += AgentOnStatusUpdate; _agent.ForceUpdate(); - - return Task.CompletedTask; } - public Task ShutdownAsync(bool graceful) + public async Task ShutdownAsync(bool graceful) { + await Task.Delay(100); Logger.LogDebug("Unregistering service {Service}", _id); _ttlReportTimer?.Stop(); _agent.DeregisterService(_id); - - return Task.CompletedTask; } private void AgentOnStatusUpdate(object sender, EventArgs e) => NotifyStatuses(); diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index 688b7185e7..a17b87286d 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -285,46 +285,55 @@ private void InitIdentityProxy() => public async Task ShutdownAsync(bool graceful = true, string reason = "") { Logger.LogInformation("Stopping Cluster {Id}", System.Id); - + // Inform all members of the cluster that this node intends to leave. Also, let the MemberList know that this // node was the one that initiated the shutdown to prevent another shutdown from being called. Logger.LogInformation("Setting GracefullyLeft gossip state for {Id}", System.Id); MemberList.Stopping = true; await Gossip.SetStateAsync(GossipKeys.GracefullyLeft, new Empty()).ConfigureAwait(false); - + Logger.LogInformation("Waiting for two gossip intervals to pass for {Id}", System.Id); // In case provider shutdown is quick, let's wait at least 2 gossip intervals. await Task.Delay((int)Config.GossipInterval.TotalMilliseconds * 2).ConfigureAwait(false); Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); // Deregister from configured cluster provider. - await Provider.ShutdownAsync(graceful).ConfigureAwait(false); + + //TODO: find out why this changes anything.... + //Provider.ShutdownAsync seems to freeze the system, sometimes.... + var t = Task.Run(async () => + { + await Task.Delay(100); + await Provider.ShutdownAsync(graceful); + }); + await t.WaitAsync(TimeSpan.FromSeconds(2)); + if (_clusterKindObserver != null) { ClusterMetrics.VirtualActorsCount.RemoveObserver(_clusterKindObserver); _clusterKindObserver = null; } - + if (_clusterMembersObserver != null) { ClusterMetrics.ClusterMembersCount.RemoveObserver(_clusterMembersObserver); _clusterMembersObserver = null; } - + // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. await System.ShutdownAsync(reason).ConfigureAwait(false); - + // Shut down the rest of the dependencies in reverse order that they were started. await Gossip.ShutdownAsync().ConfigureAwait(false); - - if (graceful) - { - await IdentityLookup.ShutdownAsync().ConfigureAwait(false); - } - + + if (graceful) + { + await IdentityLookup.ShutdownAsync().ConfigureAwait(false); + } + await Remote.ShutdownAsync(graceful).ConfigureAwait(false); - + _shutdownCompletedTcs.TrySetResult(true); Logger.LogInformation("Stopped Cluster {Id}", System.Id); } diff --git a/tests/Proto.Cluster.Tests/ClusterTests.cs b/tests/Proto.Cluster.Tests/ClusterTests.cs index 02b469e8d1..738eed386a 100644 --- a/tests/Proto.Cluster.Tests/ClusterTests.cs +++ b/tests/Proto.Cluster.Tests/ClusterTests.cs @@ -243,8 +243,9 @@ await Trace(async () => await CanGetResponseFromAllIdsOnAllNodes(ids, Members, 20000); var toBeRemoved = Members.Last(); + _testOutputHelper.WriteLine("provider " + Members.First().Provider.GetType().Name); _testOutputHelper.WriteLine("Removing node " + toBeRemoved.System.Id + " / " + toBeRemoved.System.Address); - await ClusterFixture.RemoveNode(toBeRemoved); + await ClusterFixture.RemoveNode(toBeRemoved).ConfigureAwait(false); _testOutputHelper.WriteLine("Removed node " + toBeRemoved.System.Id + " / " + toBeRemoved.System.Address); await ClusterFixture.SpawnMember(); diff --git a/tests/Proto.Cluster.Tests/RetryOnDeadLetterTests.cs b/tests/Proto.Cluster.Tests/RetryOnDeadLetterTests.cs index fb9f47dd57..1c91832a25 100644 --- a/tests/Proto.Cluster.Tests/RetryOnDeadLetterTests.cs +++ b/tests/Proto.Cluster.Tests/RetryOnDeadLetterTests.cs @@ -10,7 +10,7 @@ namespace Proto.Cluster.Tests; [Collection("ClusterTests")] public class RetryOnDeadLetterTests { - [Fact(Skip = "Flaky")] + [Fact] public async Task ShouldRetryRequestOnDeadLetterResponseRegardlessOfResponseType() { var fixture = new Fixture(1); From 8e2139354a2e1b3164574f3e315d4db3bd2a504f Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 19:40:17 +0100 Subject: [PATCH 02/16] try again --- src/Proto.Cluster/Cluster.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index a17b87286d..f35bf03837 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -299,14 +299,14 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); // Deregister from configured cluster provider. - //TODO: find out why this changes anything.... - //Provider.ShutdownAsync seems to freeze the system, sometimes.... - var t = Task.Run(async () => - { - await Task.Delay(100); - await Provider.ShutdownAsync(graceful); - }); - await t.WaitAsync(TimeSpan.FromSeconds(2)); + // //TODO: find out why this changes anything.... + // //Provider.ShutdownAsync seems to freeze the system, sometimes.... + // var t = Task.Run(async () => + // { + // await Task.Delay(100); + // await Provider.ShutdownAsync(graceful); + // }); + // await t.WaitAsync(TimeSpan.FromSeconds(2)); if (_clusterKindObserver != null) From 933024680ef38f74b262357376776217d91885c7 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 19:48:31 +0100 Subject: [PATCH 03/16] disable otel --- .github/workflows/build-dev.yml | 2 -- .github/workflows/pull-request.yml | 2 -- 2 files changed, 4 deletions(-) diff --git a/.github/workflows/build-dev.yml b/.github/workflows/build-dev.yml index 74b8c28feb..0aa5bbc397 100644 --- a/.github/workflows/build-dev.yml +++ b/.github/workflows/build-dev.yml @@ -9,8 +9,6 @@ on: env: DOTNET_SYSTEM_CONSOLE_ALLOW_ANSI_COLOR_REDIRECTION: 1 TERM: xterm - OPENTELEMETRY_URL: http://otel.ornell.io:30798 - TRACEVIEW_URL: http://traceview.ornell.io jobs: test-slow: # slow tests that should run in parallel diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index d5200cc0af..855052d79c 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -5,8 +5,6 @@ on: [pull_request] env: DOTNET_SYSTEM_CONSOLE_ALLOW_ANSI_COLOR_REDIRECTION: 1 TERM: xterm - OPENTELEMETRY_URL: http://otel.ornell.io:30798 - TRACEVIEW_URL: http://traceview.ornell.io jobs: build: From 1a06e4e57f3ef7b22a808625054b06da91cb5958 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 19:53:50 +0100 Subject: [PATCH 04/16] =?UTF-8?q?don=C2=B4t=20timeout=20task.delay?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/Proto.Cluster.Tests/ClusterTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Proto.Cluster.Tests/ClusterTests.cs b/tests/Proto.Cluster.Tests/ClusterTests.cs index 738eed386a..768cae6a14 100644 --- a/tests/Proto.Cluster.Tests/ClusterTests.cs +++ b/tests/Proto.Cluster.Tests/ClusterTests.cs @@ -566,7 +566,7 @@ private async Task PingPong( if (response == null) { - await Task.Delay(200, token); + await Task.Delay(200); } } while (response == null && !token.IsCancellationRequested); From 1e9929a3acc076b37e4f6aaeaefeeb0b9b1f7829 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 19:59:54 +0100 Subject: [PATCH 05/16] rollback testprovider --- src/Proto.Cluster.TestProvider/TestProvider.cs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Proto.Cluster.TestProvider/TestProvider.cs b/src/Proto.Cluster.TestProvider/TestProvider.cs index 283a6b65ef..6cfdc3fa36 100644 --- a/src/Proto.Cluster.TestProvider/TestProvider.cs +++ b/src/Proto.Cluster.TestProvider/TestProvider.cs @@ -28,9 +28,8 @@ public TestProvider(TestProviderOptions options, InMemAgent agent) _agent = agent; } - public async Task StartMemberAsync(Cluster cluster) + public Task StartMemberAsync(Cluster cluster) { - await Task.Yield(); var memberList = cluster.MemberList; var (host, port) = cluster.System.GetAddress(); var kinds = cluster.GetClusterKinds(); @@ -47,26 +46,30 @@ public async Task StartMemberAsync(Cluster cluster) Port = port } ); + + return Task.CompletedTask; } - public async Task StartClientAsync(Cluster cluster) + public Task StartClientAsync(Cluster cluster) { - await Task.Yield(); var memberList = cluster.MemberList; _id = cluster.System.Id; _memberList = memberList; _agent.StatusUpdate += AgentOnStatusUpdate; _agent.ForceUpdate(); + + return Task.CompletedTask; } - public async Task ShutdownAsync(bool graceful) + public Task ShutdownAsync(bool graceful) { - await Task.Delay(100); Logger.LogDebug("Unregistering service {Service}", _id); _ttlReportTimer?.Stop(); _agent.DeregisterService(_id); + + return Task.CompletedTask; } private void AgentOnStatusUpdate(object sender, EventArgs e) => NotifyStatuses(); From 38f5e1b1184c00dccccbe4885c87134fd4b3dbbc Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 20:03:26 +0100 Subject: [PATCH 06/16] rollback cluster test changes --- tests/Proto.Cluster.Tests/ClusterTests.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/Proto.Cluster.Tests/ClusterTests.cs b/tests/Proto.Cluster.Tests/ClusterTests.cs index 768cae6a14..c2c0811993 100644 --- a/tests/Proto.Cluster.Tests/ClusterTests.cs +++ b/tests/Proto.Cluster.Tests/ClusterTests.cs @@ -243,9 +243,8 @@ await Trace(async () => await CanGetResponseFromAllIdsOnAllNodes(ids, Members, 20000); var toBeRemoved = Members.Last(); - _testOutputHelper.WriteLine("provider " + Members.First().Provider.GetType().Name); _testOutputHelper.WriteLine("Removing node " + toBeRemoved.System.Id + " / " + toBeRemoved.System.Address); - await ClusterFixture.RemoveNode(toBeRemoved).ConfigureAwait(false); + await ClusterFixture.RemoveNode(toBeRemoved); _testOutputHelper.WriteLine("Removed node " + toBeRemoved.System.Id + " / " + toBeRemoved.System.Address); await ClusterFixture.SpawnMember(); From 45f8c5441cfc45cb4cc90ee5f3b69fa95ab456b8 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 20:06:45 +0100 Subject: [PATCH 07/16] random stuff --- src/Proto.Cluster/Cluster.cs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index f35bf03837..df8770de82 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -298,16 +298,13 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); // Deregister from configured cluster provider. - - // //TODO: find out why this changes anything.... - // //Provider.ShutdownAsync seems to freeze the system, sometimes.... - // var t = Task.Run(async () => - // { - // await Task.Delay(100); - // await Provider.ShutdownAsync(graceful); - // }); - // await t.WaitAsync(TimeSpan.FromSeconds(2)); + var t = Task.Run(async () => + { + await Task.Delay(100); + await Provider.ShutdownAsync(graceful); + }); + await t.WaitAsync(TimeSpan.FromSeconds(2)); if (_clusterKindObserver != null) { From 8b40994a861291fd52559e067c46516481f2d394 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 20:20:10 +0100 Subject: [PATCH 08/16] random stuff --- src/Proto.Cluster/Cluster.cs | 8 +------- tests/Proto.Cluster.Tests/ClusterFixture.cs | 6 ++++-- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index df8770de82..baf98577c6 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -298,13 +298,7 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); // Deregister from configured cluster provider. - - var t = Task.Run(async () => - { - await Task.Delay(100); - await Provider.ShutdownAsync(graceful); - }); - await t.WaitAsync(TimeSpan.FromSeconds(2)); + await Provider.ShutdownAsync(graceful); if (_clusterKindObserver != null) { diff --git a/tests/Proto.Cluster.Tests/ClusterFixture.cs b/tests/Proto.Cluster.Tests/ClusterFixture.cs index 05b184a516..7648b027a9 100644 --- a/tests/Proto.Cluster.Tests/ClusterFixture.cs +++ b/tests/Proto.Cluster.Tests/ClusterFixture.cs @@ -182,12 +182,14 @@ public async Task RemoveNode(Cluster member, bool graceful = true) if (Members.Contains(member)) { Members.Remove(member); - await member.ShutdownAsync(graceful, "Stopped by ClusterFixture"); + var t = member.ShutdownAsync(graceful, "Stopped by ClusterFixture"); + await t.WaitAsync(TimeSpan.FromSeconds(5)); } else if (Clients.Contains(member)) { Clients.Remove(member); - await member.ShutdownAsync(graceful, "Stopped by ClusterFixture"); + var t = member.ShutdownAsync(graceful, "Stopped by ClusterFixture"); + await t.WaitAsync(TimeSpan.FromSeconds(5)); } else { From ba0a9c7925578f227566e0926d40915385bf9176 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 20:37:50 +0100 Subject: [PATCH 09/16] debugging... --- src/Proto.Cluster/Cluster.cs | 86 ++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index baf98577c6..83949881f3 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -284,49 +284,49 @@ private void InitIdentityProxy() => /// Provide the reason for the shutdown, that can be used for diagnosing problems public async Task ShutdownAsync(bool graceful = true, string reason = "") { - Logger.LogInformation("Stopping Cluster {Id}", System.Id); - - // Inform all members of the cluster that this node intends to leave. Also, let the MemberList know that this - // node was the one that initiated the shutdown to prevent another shutdown from being called. - Logger.LogInformation("Setting GracefullyLeft gossip state for {Id}", System.Id); - MemberList.Stopping = true; - await Gossip.SetStateAsync(GossipKeys.GracefullyLeft, new Empty()).ConfigureAwait(false); - - Logger.LogInformation("Waiting for two gossip intervals to pass for {Id}", System.Id); - // In case provider shutdown is quick, let's wait at least 2 gossip intervals. - await Task.Delay((int)Config.GossipInterval.TotalMilliseconds * 2).ConfigureAwait(false); - - Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); - // Deregister from configured cluster provider. - await Provider.ShutdownAsync(graceful); - - if (_clusterKindObserver != null) - { - ClusterMetrics.VirtualActorsCount.RemoveObserver(_clusterKindObserver); - _clusterKindObserver = null; - } - - if (_clusterMembersObserver != null) - { - ClusterMetrics.ClusterMembersCount.RemoveObserver(_clusterMembersObserver); - _clusterMembersObserver = null; - } - - // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. - await System.ShutdownAsync(reason).ConfigureAwait(false); - - // Shut down the rest of the dependencies in reverse order that they were started. - await Gossip.ShutdownAsync().ConfigureAwait(false); - - if (graceful) - { - await IdentityLookup.ShutdownAsync().ConfigureAwait(false); - } - - await Remote.ShutdownAsync(graceful).ConfigureAwait(false); - - _shutdownCompletedTcs.TrySetResult(true); - Logger.LogInformation("Stopped Cluster {Id}", System.Id); + // Logger.LogInformation("Stopping Cluster {Id}", System.Id); + // + // // Inform all members of the cluster that this node intends to leave. Also, let the MemberList know that this + // // node was the one that initiated the shutdown to prevent another shutdown from being called. + // Logger.LogInformation("Setting GracefullyLeft gossip state for {Id}", System.Id); + // MemberList.Stopping = true; + // await Gossip.SetStateAsync(GossipKeys.GracefullyLeft, new Empty()).ConfigureAwait(false); + // + // Logger.LogInformation("Waiting for two gossip intervals to pass for {Id}", System.Id); + // // In case provider shutdown is quick, let's wait at least 2 gossip intervals. + // await Task.Delay((int)Config.GossipInterval.TotalMilliseconds * 2).ConfigureAwait(false); + // + // Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); + // // Deregister from configured cluster provider. + // await Provider.ShutdownAsync(graceful); + // + // if (_clusterKindObserver != null) + // { + // ClusterMetrics.VirtualActorsCount.RemoveObserver(_clusterKindObserver); + // _clusterKindObserver = null; + // } + // + // if (_clusterMembersObserver != null) + // { + // ClusterMetrics.ClusterMembersCount.RemoveObserver(_clusterMembersObserver); + // _clusterMembersObserver = null; + // } + // + // // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. + // await System.ShutdownAsync(reason).ConfigureAwait(false); + // + // // Shut down the rest of the dependencies in reverse order that they were started. + // await Gossip.ShutdownAsync().ConfigureAwait(false); + // + // if (graceful) + // { + // await IdentityLookup.ShutdownAsync().ConfigureAwait(false); + // } + // + // await Remote.ShutdownAsync(graceful).ConfigureAwait(false); + // + // _shutdownCompletedTcs.TrySetResult(true); + // Logger.LogInformation("Stopped Cluster {Id}", System.Id); } /// From 65abc07f5c3ccb4e9e003097a01f53c9b75a1672 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 20:38:50 +0100 Subject: [PATCH 10/16] debugging --- src/Proto.Cluster/Cluster.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index 83949881f3..1666264a5e 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -284,14 +284,14 @@ private void InitIdentityProxy() => /// Provide the reason for the shutdown, that can be used for diagnosing problems public async Task ShutdownAsync(bool graceful = true, string reason = "") { - // Logger.LogInformation("Stopping Cluster {Id}", System.Id); - // - // // Inform all members of the cluster that this node intends to leave. Also, let the MemberList know that this - // // node was the one that initiated the shutdown to prevent another shutdown from being called. - // Logger.LogInformation("Setting GracefullyLeft gossip state for {Id}", System.Id); - // MemberList.Stopping = true; - // await Gossip.SetStateAsync(GossipKeys.GracefullyLeft, new Empty()).ConfigureAwait(false); - // + Logger.LogInformation("Stopping Cluster {Id}", System.Id); + + // Inform all members of the cluster that this node intends to leave. Also, let the MemberList know that this + // node was the one that initiated the shutdown to prevent another shutdown from being called. + Logger.LogInformation("Setting GracefullyLeft gossip state for {Id}", System.Id); + MemberList.Stopping = true; + await Gossip.SetStateAsync(GossipKeys.GracefullyLeft, new Empty()).ConfigureAwait(false); + // Logger.LogInformation("Waiting for two gossip intervals to pass for {Id}", System.Id); // // In case provider shutdown is quick, let's wait at least 2 gossip intervals. // await Task.Delay((int)Config.GossipInterval.TotalMilliseconds * 2).ConfigureAwait(false); From b93fa8d59667c454ed7f0cd512beb5da793f34ac Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 20:44:01 +0100 Subject: [PATCH 11/16] try again --- src/Proto.Cluster/Cluster.cs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index 1666264a5e..6686134244 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -315,18 +315,18 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") // // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. // await System.ShutdownAsync(reason).ConfigureAwait(false); // - // // Shut down the rest of the dependencies in reverse order that they were started. - // await Gossip.ShutdownAsync().ConfigureAwait(false); - // - // if (graceful) - // { - // await IdentityLookup.ShutdownAsync().ConfigureAwait(false); - // } - // - // await Remote.ShutdownAsync(graceful).ConfigureAwait(false); - // - // _shutdownCompletedTcs.TrySetResult(true); - // Logger.LogInformation("Stopped Cluster {Id}", System.Id); + // Shut down the rest of the dependencies in reverse order that they were started. + await Gossip.ShutdownAsync().ConfigureAwait(false); + + if (graceful) + { + await IdentityLookup.ShutdownAsync().ConfigureAwait(false); + } + + await Remote.ShutdownAsync(graceful).ConfigureAwait(false); + + _shutdownCompletedTcs.TrySetResult(true); + Logger.LogInformation("Stopped Cluster {Id}", System.Id); } /// From bda950c20b23dbe8dfee71f0f00c74acdadab60d Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 20:49:40 +0100 Subject: [PATCH 12/16] debug --- src/Proto.Cluster/Cluster.cs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index 6686134244..f6b10bfd0c 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -292,10 +292,10 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") MemberList.Stopping = true; await Gossip.SetStateAsync(GossipKeys.GracefullyLeft, new Empty()).ConfigureAwait(false); - // Logger.LogInformation("Waiting for two gossip intervals to pass for {Id}", System.Id); - // // In case provider shutdown is quick, let's wait at least 2 gossip intervals. - // await Task.Delay((int)Config.GossipInterval.TotalMilliseconds * 2).ConfigureAwait(false); - // + Logger.LogInformation("Waiting for two gossip intervals to pass for {Id}", System.Id); + // In case provider shutdown is quick, let's wait at least 2 gossip intervals. + await Task.Delay((int)Config.GossipInterval.TotalMilliseconds * 2).ConfigureAwait(false); + // Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); // // Deregister from configured cluster provider. // await Provider.ShutdownAsync(graceful); @@ -313,18 +313,18 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") // } // // // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. - // await System.ShutdownAsync(reason).ConfigureAwait(false); + await System.ShutdownAsync(reason).ConfigureAwait(false); // // Shut down the rest of the dependencies in reverse order that they were started. - await Gossip.ShutdownAsync().ConfigureAwait(false); - - if (graceful) - { - await IdentityLookup.ShutdownAsync().ConfigureAwait(false); - } - - await Remote.ShutdownAsync(graceful).ConfigureAwait(false); - + // await Gossip.ShutdownAsync().ConfigureAwait(false); + // + // if (graceful) + // { + // await IdentityLookup.ShutdownAsync().ConfigureAwait(false); + // } + // + // await Remote.ShutdownAsync(graceful).ConfigureAwait(false); + // _shutdownCompletedTcs.TrySetResult(true); Logger.LogInformation("Stopped Cluster {Id}", System.Id); } From 5be9b77285a708e08d4f1b969969bfb477d008d2 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 20:57:53 +0100 Subject: [PATCH 13/16] debug --- src/Proto.Cluster/Cluster.cs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index f6b10bfd0c..f42893cc8a 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -300,19 +300,19 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") // // Deregister from configured cluster provider. // await Provider.ShutdownAsync(graceful); // - // if (_clusterKindObserver != null) - // { - // ClusterMetrics.VirtualActorsCount.RemoveObserver(_clusterKindObserver); - // _clusterKindObserver = null; - // } - // - // if (_clusterMembersObserver != null) - // { - // ClusterMetrics.ClusterMembersCount.RemoveObserver(_clusterMembersObserver); - // _clusterMembersObserver = null; - // } - // - // // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. + if (_clusterKindObserver != null) + { + ClusterMetrics.VirtualActorsCount.RemoveObserver(_clusterKindObserver); + _clusterKindObserver = null; + } + + if (_clusterMembersObserver != null) + { + ClusterMetrics.ClusterMembersCount.RemoveObserver(_clusterMembersObserver); + _clusterMembersObserver = null; + } + + // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. await System.ShutdownAsync(reason).ConfigureAwait(false); // // Shut down the rest of the dependencies in reverse order that they were started. From 1cf5b658b903192ee4f4d8ff80c6e31d51007370 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 21:06:01 +0100 Subject: [PATCH 14/16] . --- src/Proto.Cluster/Cluster.cs | 22 +++++++++++----------- src/Proto.Remote/GrpcNet/GrpcNetRemote.cs | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index f42893cc8a..d376d1d0a6 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -312,19 +312,19 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") _clusterMembersObserver = null; } + // Shut down the rest of the dependencies in reverse order that they were started. + await Gossip.ShutdownAsync().ConfigureAwait(false); + + if (graceful) + { + await IdentityLookup.ShutdownAsync().ConfigureAwait(false); + } + + await Remote.ShutdownAsync(graceful).ConfigureAwait(false); + // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. await System.ShutdownAsync(reason).ConfigureAwait(false); - // - // Shut down the rest of the dependencies in reverse order that they were started. - // await Gossip.ShutdownAsync().ConfigureAwait(false); - // - // if (graceful) - // { - // await IdentityLookup.ShutdownAsync().ConfigureAwait(false); - // } - // - // await Remote.ShutdownAsync(graceful).ConfigureAwait(false); - // + _shutdownCompletedTcs.TrySetResult(true); Logger.LogInformation("Stopped Cluster {Id}", System.Id); } diff --git a/src/Proto.Remote/GrpcNet/GrpcNetRemote.cs b/src/Proto.Remote/GrpcNet/GrpcNetRemote.cs index 038441bcd5..1c873517f5 100644 --- a/src/Proto.Remote/GrpcNet/GrpcNetRemote.cs +++ b/src/Proto.Remote/GrpcNet/GrpcNetRemote.cs @@ -161,7 +161,7 @@ public async Task ShutdownAsync(bool graceful = true) if (_host is not null) { - await _host.StopAsync().ConfigureAwait(false); + await _host.StopAsync().WaitAsync(TimeSpan.FromSeconds(5)).ConfigureAwait(false); } } } From 862ba9ccd7965c9df7500f10813e1d09570be4aa Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 21:13:01 +0100 Subject: [PATCH 15/16] now then? --- src/Proto.Cluster/Cluster.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index d376d1d0a6..8df5e602f9 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -296,10 +296,10 @@ public async Task ShutdownAsync(bool graceful = true, string reason = "") // In case provider shutdown is quick, let's wait at least 2 gossip intervals. await Task.Delay((int)Config.GossipInterval.TotalMilliseconds * 2).ConfigureAwait(false); - // Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); + Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); // // Deregister from configured cluster provider. - // await Provider.ShutdownAsync(graceful); - // + await Provider.ShutdownAsync(graceful); + if (_clusterKindObserver != null) { ClusterMetrics.VirtualActorsCount.RemoveObserver(_clusterKindObserver); From 4c1bdc6dc0d5ed642a7de12e6857e27be85de730 Mon Sep 17 00:00:00 2001 From: Roger Johansson Date: Thu, 29 Feb 2024 21:21:52 +0100 Subject: [PATCH 16/16] cleanup --- src/Proto.Cluster/Cluster.cs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Proto.Cluster/Cluster.cs b/src/Proto.Cluster/Cluster.cs index 8df5e602f9..23718d8c68 100644 --- a/src/Proto.Cluster/Cluster.cs +++ b/src/Proto.Cluster/Cluster.cs @@ -285,46 +285,46 @@ private void InitIdentityProxy() => public async Task ShutdownAsync(bool graceful = true, string reason = "") { Logger.LogInformation("Stopping Cluster {Id}", System.Id); - + // Inform all members of the cluster that this node intends to leave. Also, let the MemberList know that this // node was the one that initiated the shutdown to prevent another shutdown from being called. Logger.LogInformation("Setting GracefullyLeft gossip state for {Id}", System.Id); MemberList.Stopping = true; await Gossip.SetStateAsync(GossipKeys.GracefullyLeft, new Empty()).ConfigureAwait(false); - + Logger.LogInformation("Waiting for two gossip intervals to pass for {Id}", System.Id); // In case provider shutdown is quick, let's wait at least 2 gossip intervals. await Task.Delay((int)Config.GossipInterval.TotalMilliseconds * 2).ConfigureAwait(false); - + Logger.LogInformation("Stopping cluster provider for {Id}", System.Id); - // // Deregister from configured cluster provider. + // Deregister from configured cluster provider. await Provider.ShutdownAsync(graceful); - + if (_clusterKindObserver != null) { ClusterMetrics.VirtualActorsCount.RemoveObserver(_clusterKindObserver); _clusterKindObserver = null; } - + if (_clusterMembersObserver != null) { ClusterMetrics.ClusterMembersCount.RemoveObserver(_clusterMembersObserver); _clusterMembersObserver = null; } - + // Shut down the rest of the dependencies in reverse order that they were started. await Gossip.ShutdownAsync().ConfigureAwait(false); - - if (graceful) - { - await IdentityLookup.ShutdownAsync().ConfigureAwait(false); - } - + + if (graceful) + { + await IdentityLookup.ShutdownAsync().ConfigureAwait(false); + } + await Remote.ShutdownAsync(graceful).ConfigureAwait(false); - + // Cancel the primary CancellationToken first which will shut down a number of concurrent systems simultaneously. await System.ShutdownAsync(reason).ConfigureAwait(false); - + _shutdownCompletedTcs.TrySetResult(true); Logger.LogInformation("Stopped Cluster {Id}", System.Id); }