Add graceful shutdown on SIGKILL/SIGINT

This commit is contained in:
Ske 2020-05-05 18:12:34 +02:00
parent a052c8331c
commit 0b41b4f6b8
2 changed files with 92 additions and 36 deletions

View File

@ -1,6 +1,7 @@
using System; using System;
using System.Linq; using System.Linq;
using System.Net.WebSockets; using System.Net.WebSockets;
using System.Threading;
using System.Threading.Tasks; using System.Threading.Tasks;
using App.Metrics; using App.Metrics;
@ -30,7 +31,7 @@ namespace PluralKit.Bot
private readonly PeriodicStatCollector _collector; private readonly PeriodicStatCollector _collector;
private readonly IMetrics _metrics; private readonly IMetrics _metrics;
private Task _periodicTask; // Never read, just kept here for GC reasons private Timer _periodicTask; // Never read, just kept here for GC reasons
public Bot(DiscordShardedClient client, ILifetimeScope services, ILogger logger, PeriodicStatCollector collector, IMetrics metrics) public Bot(DiscordShardedClient client, ILifetimeScope services, ILogger logger, PeriodicStatCollector collector, IMetrics metrics)
{ {
@ -55,11 +56,33 @@ namespace PluralKit.Bot
_client.MessagesBulkDeleted += HandleEvent; _client.MessagesBulkDeleted += HandleEvent;
_client.MessageReactionAdded += HandleEvent; _client.MessageReactionAdded += HandleEvent;
// Update shard status for shards immediately on connect
_client.Ready += args => UpdateBotStatus(args.Client);
_client.Resumed += args => UpdateBotStatus(args.Client);
// Init the shard stuff // Init the shard stuff
_services.Resolve<ShardInfoService>().Init(_client); _services.Resolve<ShardInfoService>().Init(_client);
// Not awaited, just needs to run in the background // Not awaited, just needs to run in the background
_periodicTask = UpdatePeriodic(); // Trying our best to run it at whole minute boundaries (xx:00), with ~250ms buffer
// This *probably* doesn't matter in practice but I jut think it's neat, y'know.
var timeNow = SystemClock.Instance.GetCurrentInstant();
var timeTillNextWholeMinute = TimeSpan.FromMilliseconds(60000 - timeNow.ToUnixTimeMilliseconds() % 60000 + 250);
_periodicTask = new Timer(_ =>
{
var __ = UpdatePeriodic();
}, null, timeTillNextWholeMinute, TimeSpan.FromMinutes(1));
}
public async Task Shutdown()
{
// This will stop the timer and prevent any subsequent invocations
await _periodicTask.DisposeAsync();
// Send users a lil status message
// We're not actually properly disconnecting from the gateway (lol) so it'll linger for a few minutes
// Should be plenty of time for the bot to connect again next startup and set the real status
await _client.UpdateStatusAsync(new DiscordActivity("Restarting... (please wait)"));
} }
private Task HandleEvent<T>(T evt) where T: DiscordEventArgs private Task HandleEvent<T>(T evt) where T: DiscordEventArgs
@ -123,28 +146,32 @@ namespace PluralKit.Bot
private async Task UpdatePeriodic() private async Task UpdatePeriodic()
{ {
while (true) _logger.Information("Running once-per-minute scheduled tasks");
{
// Run at every whole minute (:00), mostly because I feel like it
var timeNow = SystemClock.Instance.GetCurrentInstant();
var timeTillNextWholeMinute = 60000 - (timeNow.ToUnixTimeMilliseconds() % 60000);
await Task.Delay((int) timeTillNextWholeMinute);
// Change bot status await UpdateBotStatus();
var totalGuilds = _client.ShardClients.Values.Sum(c => c.Guilds.Count);
try // DiscordClient may throw an exception if the socket is closed (e.g just after OP 7 received)
{
foreach (var c in _client.ShardClients.Values)
await c.UpdateStatusAsync(new DiscordActivity($"pk;help | in {totalGuilds} servers | shard #{c.ShardId}"));
}
catch (WebSocketException) { }
// Collect some stats, submit them to the metrics backend // Collect some stats, submit them to the metrics backend
await _collector.CollectStats(); await _collector.CollectStats();
await Task.WhenAll(((IMetricsRoot) _metrics).ReportRunner.RunAllAsync()); await Task.WhenAll(((IMetricsRoot) _metrics).ReportRunner.RunAllAsync());
_logger.Information("Submitted metrics to backend"); _logger.Information("Submitted metrics to backend");
} }
private async Task UpdateBotStatus(DiscordClient specificShard = null)
{
var totalGuilds = _client.ShardClients.Values.Sum(c => c.Guilds.Count);
try // DiscordClient may throw an exception if the socket is closed (e.g just after OP 7 received)
{
Task UpdateStatus(DiscordClient shard) =>
shard.UpdateStatusAsync(new DiscordActivity($"pk;help | in {totalGuilds} servers | shard #{shard.ShardId}"));
if (specificShard != null)
await UpdateStatus(specificShard);
else // Run shard updates concurrently
await Task.WhenAll(_client.ShardClients.Values.Select(UpdateStatus));
} }
catch (WebSocketException) { }
}
private void FrameworkLog(object sender, DebugLogMessageEventArgs args) private void FrameworkLog(object sender, DebugLogMessageEventArgs args)
{ {
// Bridge D#+ logging to Serilog // Bridge D#+ logging to Serilog

View File

@ -11,6 +11,7 @@ using Microsoft.Extensions.Configuration;
using PluralKit.Core; using PluralKit.Core;
using Serilog; using Serilog;
using Serilog.Core;
namespace PluralKit.Bot namespace PluralKit.Bot
{ {
@ -41,39 +42,67 @@ namespace PluralKit.Bot
await services.Resolve<DiscordShardedClient>().StartAsync(); await services.Resolve<DiscordShardedClient>().StartAsync();
// Start the bot stuff and let it register things // Start the bot stuff and let it register things
services.Resolve<Bot>().Init(); var bot = services.Resolve<Bot>();
bot.Init();
// Lastly, we just... wait. Everything else is handled in the DiscordClient event loop // Lastly, we just... wait. Everything else is handled in the DiscordClient event loop
try
{
await Task.Delay(-1, ct); await Task.Delay(-1, ct);
}
catch (TaskCanceledException)
{
// Once the CancellationToken fires, we need to shut stuff down
// (generally happens given a SIGINT/SIGKILL/Ctrl-C, see calling wrapper)
await bot.Shutdown();
}
}); });
} }
private static async Task RunWrapper(IContainer services, Func<CancellationToken, Task> taskFunc) private static async Task RunWrapper(IContainer services, Func<CancellationToken, Task> taskFunc)
{ {
// This function does a couple things: // This function does a couple things:
// - Creates a CancellationToken that'll cancel tasks once we get a Ctrl-C / SIGINT // - Creates a CancellationToken that'll cancel tasks once needed
// - Wraps the given function in an exception handler that properly logs errors // - Wraps the given function in an exception handler that properly logs errors
// - Adds a SIGINT (Ctrl-C) listener through Console.CancelKeyPress to gracefully shut down
// - Adds a SIGTERM (kill, systemctl stop, docker stop) listener through AppDomain.ProcessExit (same as above)
var logger = services.Resolve<ILogger>().ForContext<Init>(); var logger = services.Resolve<ILogger>().ForContext<Init>();
var cts = new CancellationTokenSource(); var shutdown = new TaskCompletionSource<object>();
Console.CancelKeyPress += delegate { cts.Cancel(); }; var gracefulShutdownCts = new CancellationTokenSource();
Console.CancelKeyPress += delegate
{
// ReSharper disable once AccessToDisposedClosure (will only be hit before the below disposal)
logger.Information("Received SIGINT/Ctrl-C, attempting graceful shutdown...");
gracefulShutdownCts.Cancel();
};
AppDomain.CurrentDomain.ProcessExit += (_, __) =>
{
// This callback is fired on a SIGKILL is sent.
// The runtime will kill the program as soon as this callback is finished, so we have to
// block on the shutdown task's completion to ensure everything is sorted by the time this returns.
// ReSharper disable once AccessToDisposedClosure (it's only disposed after the block)
logger.Information("Received SIGKILL event, attempting graceful shutdown...");
gracefulShutdownCts.Cancel();
var ___ = shutdown.Task.Result; // Blocking! This is the only time it's justified...
};
try try
{ {
await taskFunc(cts.Token); await taskFunc(gracefulShutdownCts.Token);
} logger.Information("Shutdown complete. Have a nice day~");
catch (TaskCanceledException e) when (e.CancellationToken == cts.Token)
{
// The CancellationToken we made got triggered - this is normal!
// Therefore, exception handler is empty.
} }
catch (Exception e) catch (Exception e)
{ {
logger.Fatal(e, "Error while running bot"); logger.Fatal(e, "Error while running bot");
}
// Allow the log buffer to flush properly before exiting // Allow the log buffer to flush properly before exiting
await Task.Delay(1000, cts.Token); ((Logger) logger).Dispose();
} shutdown.SetResult(null);
} }
private static IContainer BuildContainer(IConfiguration config) private static IContainer BuildContainer(IConfiguration config)