-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathWatchdog.cs
151 lines (139 loc) · 6.67 KB
/
Watchdog.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
using System;
using System.Collections.Concurrent;
using System.Diagnostics;
using System.Linq;
using System.Threading.Tasks;
using CompatBot.Commands;
using CompatBot.Database.Providers;
using CompatBot.EventHandlers;
using DSharpPlus;
using DSharpPlus.EventArgs;
using Microsoft.ApplicationInsights;
using NLog;
namespace CompatBot;
internal static class Watchdog
{
public static readonly ConcurrentQueue<DateTime> DisconnectTimestamps = new();
public static readonly Stopwatch TimeSinceLastIncomingMessage = Stopwatch.StartNew();
private static bool IsOk => DisconnectTimestamps.IsEmpty && TimeSinceLastIncomingMessage.Elapsed < Config.IncomingMessageCheckIntervalInMin;
private static DiscordClient? discordClient;
public static async Task Watch(DiscordClient client)
{
discordClient = client;
do
{
await Task.Delay(Config.SocketDisconnectCheckIntervalInSec, Config.Cts.Token).ConfigureAwait(false);
foreach (var sudoer in ModProvider.Mods.Values.Where(m => m.Sudoer))
{
var user = await client.GetUserAsync(sudoer.DiscordId).ConfigureAwait(false);
if (user?.Presence?.Activity?.CustomStatus?.Name is string cmd && cmd.StartsWith("restart"))
{
var instance = cmd.Split(' ', StringSplitOptions.RemoveEmptyEntries).LastOrDefault();
if (ulong.TryParse(instance, out var botId) && botId == client.CurrentUser.Id)
{
Config.Log.Warn($"Found request to restart on {user.Username}#{user.Discriminator}'s custom status");
Sudo.Bot.Restart(Program.InvalidChannelId, $"Restarted by request from {user.Username}#{user.Discriminator}'s custom status");
}
}
}
if (IsOk)
continue;
try
{
Config.TelemetryClient?.TrackEvent("socket-deadlock-potential");
Config.Log.Warn("Potential socket deadlock detected, reconnecting...");
await client.ReconnectAsync(true).ConfigureAwait(false);
await Task.Delay(Config.SocketDisconnectCheckIntervalInSec, Config.Cts.Token).ConfigureAwait(false);
if (IsOk)
{
Config.Log.Info("Looks like we're back in business");
continue;
}
Config.TelemetryClient?.TrackEvent("socket-deadlock-for-sure");
Config.Log.Error("Hard reconnect failed, restarting...");
Sudo.Bot.Restart(Program.InvalidChannelId, $@"Restarted to reset potential socket deadlock (last incoming message event: {TimeSinceLastIncomingMessage.Elapsed:h\:mm\:ss} ago)");
}
catch (Exception e)
{
Config.Log.Error(e);
}
} while (!Config.Cts.IsCancellationRequested);
}
public static void OnLogHandler(string level, string message)
{
if (level == nameof(LogLevel.Info))
{
if (message.Contains("Session resumed"))
DisconnectTimestamps.Clear();
}
else if (level == nameof(LogLevel.Warn))
{
if (message.Contains("Dispatch:PRESENCES_REPLACE")
&& discordClient != null)
BotStatusMonitor.RefreshAsync(discordClient).ConfigureAwait(false).GetAwaiter().GetResult();
else if (message.Contains("Pre-emptive ratelimit triggered"))
Config.TelemetryClient?.TrackEvent("preemptive-rate-limit");
}
else if (level == nameof(LogLevel.Error))
{
if (message.Contains("System.Threading.Tasks.TaskSchedulerException")
|| message.Contains("System.OutOfMemoryException"))
Sudo.Bot.RestartNoSaving();
}
else if (level == nameof(LogLevel.Fatal))
{
if (message.Contains("Socket connection terminated")
|| message.Contains("heartbeats were skipped. Issuing reconnect."))
DisconnectTimestamps.Enqueue(DateTime.UtcNow);
}
}
public static Task OnMessageCreated(DiscordClient c, MessageCreateEventArgs args)
{
if (Config.TelemetryClient is TelemetryClient tc)
{
var userToBotDelay = (DateTime.UtcNow - args.Message.Timestamp.UtcDateTime).TotalMilliseconds;
tc.TrackMetric("gw-latency", c.Ping);
tc.TrackMetric("user-to-bot-latency", userToBotDelay);
tc.TrackMetric("time-since-last-incoming-message", TimeSinceLastIncomingMessage.ElapsedMilliseconds);
}
return Task.CompletedTask;
}
public static async Task SendMetrics(DiscordClient client)
{
do
{
await Task.Delay(Config.MetricsIntervalInSec).ConfigureAwait(false);
var gcMemInfo = GC.GetGCMemoryInfo();
using var process = Process.GetCurrentProcess();
if (Config.TelemetryClient is not TelemetryClient tc)
continue;
tc.TrackMetric("gw-latency", client.Ping);
tc.TrackMetric("memory-gc-total", gcMemInfo.HeapSizeBytes);
tc.TrackMetric("memory-gc-load", gcMemInfo.MemoryLoadBytes);
tc.TrackMetric("memory-gc-committed", gcMemInfo.TotalCommittedBytes);
tc.TrackMetric("memory-process-private", process.PrivateMemorySize64);
tc.TrackMetric("memory-process-ws", process.WorkingSet64);
tc.TrackMetric("github-limit-remaining", GithubClient.Client.RateLimitRemaining);
tc.Flush();
if (gcMemInfo.TotalCommittedBytes > 3_000_000_000)
Sudo.Bot.Restart(Program.InvalidChannelId, "GC Memory overcommitment");
} while (!Config.Cts.IsCancellationRequested);
}
public static async Task CheckGCStats()
{
do
{
var gcMemInfo = GC.GetGCMemoryInfo();
using var process = Process.GetCurrentProcess();
Config.Log.Info($"Process memory stats:\n" +
$"GC Heap: {gcMemInfo.HeapSizeBytes}\n" +
$"Private: {process.PrivateMemorySize64}\n" +
$"Working set: {process.WorkingSet64}\n" +
$"Virtual: {process.VirtualMemorySize64}\n" +
$"Paged: {process.PagedMemorySize64}\n" +
$"Paged system: {process.PagedSystemMemorySize64}\n" +
$"Non-paged system: {process.NonpagedSystemMemorySize64}");
await Task.Delay(TimeSpan.FromHours(1)).ConfigureAwait(false);
} while (!Config.Cts.IsCancellationRequested);
}
}