diff --git a/PluralKit.Bot/BotMetrics.cs b/PluralKit.Bot/BotMetrics.cs index eca473a0..832a2b84 100644 --- a/PluralKit.Bot/BotMetrics.cs +++ b/PluralKit.Bot/BotMetrics.cs @@ -40,46 +40,6 @@ public static class BotMetrics Context = "Bot" }; - public static GaugeOptions MembersTotal => new() - { - Name = "Members total", - MeasurementUnit = Unit.None, - Context = "Bot" - }; - - public static GaugeOptions MembersOnline => new() - { - Name = "Members online", - MeasurementUnit = Unit.None, - Context = "Bot" - }; - - public static GaugeOptions Guilds => new() - { - Name = "Guilds", - MeasurementUnit = Unit.None, - Context = "Bot" - }; - public static GaugeOptions Channels => new() - { - Name = "Channels", - MeasurementUnit = Unit.None, - Context = "Bot" - }; - - public static GaugeOptions ShardLatency => new() - { - Name = "Shard Latency", - Context = "Bot" - }; - - public static GaugeOptions ShardsConnected => new() - { - Name = "Shards Connected", - Context = "Bot", - MeasurementUnit = Unit.Connections - }; - public static MeterOptions WebhookCacheMisses => new() { Name = "Webhook cache misses", @@ -87,13 +47,6 @@ public static class BotMetrics MeasurementUnit = Unit.Calls }; - public static GaugeOptions WebhookCacheSize => new() - { - Name = "Webhook Cache Size", - Context = "Bot", - MeasurementUnit = Unit.Items - }; - public static TimerOptions WebhookResponseTime => new() { Name = "Webhook Response Time", diff --git a/PluralKit.Bot/Commands/Checks.cs b/PluralKit.Bot/Commands/Checks.cs index b783f1be..cbfa79ae 100644 --- a/PluralKit.Bot/Commands/Checks.cs +++ b/PluralKit.Bot/Commands/Checks.cs @@ -156,7 +156,7 @@ public class Checks throw new PKSyntaxError("You need to specify a channel."); var error = "Channel not found or you do not have permissions to access it."; - + // todo: this breaks if channel is not in cache and bot does not have View Channel permissions var channel = await ctx.MatchChannel(); if (channel == null || channel.GuildId == null) diff --git a/PluralKit.Bot/Services/PeriodicStatCollector.cs b/PluralKit.Bot/Services/PeriodicStatCollector.cs index 39117613..92c5ea05 100644 --- a/PluralKit.Bot/Services/PeriodicStatCollector.cs +++ b/PluralKit.Bot/Services/PeriodicStatCollector.cs @@ -4,6 +4,8 @@ using App.Metrics; using Myriad.Cache; +using Newtonsoft.Json; + using NodaTime.Extensions; using PluralKit.Core; @@ -20,17 +22,20 @@ public class PeriodicStatCollector private readonly DbConnectionCountHolder _countHolder; private readonly CpuStatService _cpu; + private readonly BotConfig _botConfig; + private readonly CoreConfig _config; private readonly ILogger _logger; private readonly IMetrics _metrics; private readonly ModelRepository _repo; + private readonly RedisService _redis; private readonly WebhookCacheService _webhookCache; public PeriodicStatCollector(IMetrics metrics, ILogger logger, WebhookCacheService webhookCache, DbConnectionCountHolder countHolder, CpuStatService cpu, ModelRepository repo, - IDiscordCache cache) + BotConfig botConfig, CoreConfig config, RedisService redis, IDiscordCache cache) { _metrics = metrics; _webhookCache = webhookCache; @@ -38,6 +43,9 @@ public class PeriodicStatCollector _cpu = cpu; _repo = repo; _cache = cache; + _botConfig = botConfig; + _config = config; + _redis = redis; _logger = logger.ForContext(); } @@ -59,19 +67,19 @@ public class PeriodicStatCollector channelCount++; } - _metrics.Measure.Gauge.SetValue(BotMetrics.Guilds, guildCount); - _metrics.Measure.Gauge.SetValue(BotMetrics.Channels, channelCount); - - // Aggregate DB stats - // just fetching from database here - actual updating of the data is done in PluralKit.ScheduledTasks - // if you're not running ScheduledTasks and want up-to-date counts, uncomment the following line: - // await _repo.UpdateStats(); - var counts = await _repo.GetStats(); - _metrics.Measure.Gauge.SetValue(CoreMetrics.SystemCount, counts.SystemCount); - _metrics.Measure.Gauge.SetValue(CoreMetrics.MemberCount, counts.MemberCount); - _metrics.Measure.Gauge.SetValue(CoreMetrics.GroupCount, counts.GroupCount); - _metrics.Measure.Gauge.SetValue(CoreMetrics.SwitchCount, counts.SwitchCount); - _metrics.Measure.Gauge.SetValue(CoreMetrics.MessageCount, counts.MessageCount); + if (_config.UseRedisMetrics) + { + var db = _redis.Connection.GetDatabase(); + await db.HashSetAsync("pluralkit:cluster_stats", new StackExchange.Redis.HashEntry[] { + new(_botConfig.Cluster.NodeIndex, JsonConvert.SerializeObject(new ClusterMetricInfo + { + GuildCount = guildCount, + ChannelCount = channelCount, + DatabaseConnectionCount = _countHolder.ConnectionCount, + WebhookCacheSize = _webhookCache.CacheSize, + })), + }); + } // Process info var process = Process.GetCurrentProcess(); @@ -82,12 +90,6 @@ public class PeriodicStatCollector _metrics.Measure.Gauge.SetValue(CoreMetrics.ProcessHandles, process.HandleCount); _metrics.Measure.Gauge.SetValue(CoreMetrics.CpuUsage, await _cpu.EstimateCpuUsage()); - // Database info - _metrics.Measure.Gauge.SetValue(CoreMetrics.DatabaseConnections, _countHolder.ConnectionCount); - - // Other shiz - _metrics.Measure.Gauge.SetValue(BotMetrics.WebhookCacheSize, _webhookCache.CacheSize); - stopwatch.Stop(); _logger.Debug("Updated metrics in {Time}", stopwatch.ElapsedDuration()); } diff --git a/PluralKit.Core/CoreConfig.cs b/PluralKit.Core/CoreConfig.cs index 7623005d..5c2a11f0 100644 --- a/PluralKit.Core/CoreConfig.cs +++ b/PluralKit.Core/CoreConfig.cs @@ -6,6 +6,7 @@ public class CoreConfig { public string Database { get; set; } public string RedisAddr { get; set; } + public bool UseRedisMetrics { get; set; } = false; public string SentryUrl { get; set; } public string InfluxUrl { get; set; } public string InfluxDb { get; set; } diff --git a/PluralKit.Core/CoreMetrics.cs b/PluralKit.Core/CoreMetrics.cs index 9cb6bdb2..09856bb3 100644 --- a/PluralKit.Core/CoreMetrics.cs +++ b/PluralKit.Core/CoreMetrics.cs @@ -102,4 +102,12 @@ public static class CoreMetrics MeasurementUnit = Unit.Connections, Context = "Database" }; +} + +public record ClusterMetricInfo +{ + public int GuildCount; + public int ChannelCount; + public int DatabaseConnectionCount; + public int WebhookCacheSize; } \ No newline at end of file diff --git a/PluralKit.ScheduledTasks/Metrics.cs b/PluralKit.ScheduledTasks/Metrics.cs new file mode 100644 index 00000000..461f7351 --- /dev/null +++ b/PluralKit.ScheduledTasks/Metrics.cs @@ -0,0 +1,26 @@ +using App.Metrics; +using App.Metrics.Gauge; + +public static class Metrics +{ + public static GaugeOptions Guilds => new() + { + Name = "Guilds", + MeasurementUnit = Unit.None, + Context = "Bot" + }; + public static GaugeOptions Channels => new() + { + Name = "Channels", + MeasurementUnit = Unit.None, + Context = "Bot" + }; + + public static GaugeOptions WebhookCacheSize => new() + { + Name = "Webhook Cache Size", + Context = "Bot", + MeasurementUnit = Unit.Items + }; + +} \ No newline at end of file diff --git a/PluralKit.ScheduledTasks/Startup.cs b/PluralKit.ScheduledTasks/Startup.cs index 6aff729f..3c51e3ae 100644 --- a/PluralKit.ScheduledTasks/Startup.cs +++ b/PluralKit.ScheduledTasks/Startup.cs @@ -19,6 +19,11 @@ internal class Startup await BuildInfoService.LoadVersion(); var services = BuildContainer(config); + + var cfg = services.Resolve(); + if (cfg.UseRedisMetrics) + await services.Resolve().InitAsync(cfg); + services.Resolve().Run(); await Task.Delay(-1); diff --git a/PluralKit.ScheduledTasks/TaskHandler.cs b/PluralKit.ScheduledTasks/TaskHandler.cs index 71f25deb..c4767e6a 100644 --- a/PluralKit.ScheduledTasks/TaskHandler.cs +++ b/PluralKit.ScheduledTasks/TaskHandler.cs @@ -1,11 +1,16 @@ using System; +using System.Linq; using System.Diagnostics; using System.Threading; using System.Threading.Tasks; +using App.Metrics; + using NodaTime; using NodaTime.Extensions; +using Newtonsoft.Json; + using PluralKit.Core; using Serilog; @@ -16,16 +21,23 @@ public class TaskHandler { private static readonly Duration CommandMessageRetention = Duration.FromHours(24); private readonly IDatabase _db; + private readonly RedisService _redis; + private readonly bool _useRedisMetrics; private readonly ILogger _logger; + private readonly IMetrics _metrics; private readonly ModelRepository _repo; private Timer _periodicTask; - public TaskHandler(ILogger logger, IDatabase db, ModelRepository repo) + public TaskHandler(ILogger logger, IMetrics metrics, CoreConfig config, IDatabase db, RedisService redis, ModelRepository repo) { _logger = logger; + _metrics = metrics; _db = db; + _redis = redis; _repo = repo; + + _useRedisMetrics = config.UseRedisMetrics; } public void Run() @@ -49,6 +61,10 @@ public class TaskHandler _logger.Information("Updating database stats..."); await _repo.UpdateStats(); + // Collect bot cluster statistics from Redis (if it's enabled) + if (_useRedisMetrics) + await CollectBotStats(); + // Clean up message cache in postgres await CleanupOldMessages(); @@ -56,6 +72,32 @@ public class TaskHandler _logger.Information("Ran scheduled tasks in {Time}", stopwatch.ElapsedDuration()); } + private async Task CollectBotStats() + { + var redisStats = await _redis.Connection.GetDatabase().HashGetAllAsync("pluralkit:cluster_stats"); + + var stats = redisStats.Select(v => JsonConvert.DeserializeObject(v.Value)); + + _metrics.Measure.Gauge.SetValue(Metrics.Guilds, stats.Sum(x => x.GuildCount)); + _metrics.Measure.Gauge.SetValue(Metrics.Channels, stats.Sum(x => x.ChannelCount)); + + // Aggregate DB stats + // just fetching from database here - actual updating of the data is done elsewiere + var counts = await _repo.GetStats(); + _metrics.Measure.Gauge.SetValue(CoreMetrics.SystemCount, counts.SystemCount); + _metrics.Measure.Gauge.SetValue(CoreMetrics.MemberCount, counts.MemberCount); + _metrics.Measure.Gauge.SetValue(CoreMetrics.GroupCount, counts.GroupCount); + _metrics.Measure.Gauge.SetValue(CoreMetrics.SwitchCount, counts.SwitchCount); + _metrics.Measure.Gauge.SetValue(CoreMetrics.MessageCount, counts.MessageCount); + + // Database info + // this is pretty much always inaccurate but oh well + _metrics.Measure.Gauge.SetValue(CoreMetrics.DatabaseConnections, stats.Sum(x => x.DatabaseConnectionCount)); + + // Other shiz + _metrics.Measure.Gauge.SetValue(Metrics.WebhookCacheSize, stats.Sum(x => x.WebhookCacheSize)); + } + private async Task CleanupOldMessages() { var deleteThresholdInstant = SystemClock.Instance.GetCurrentInstant() - CommandMessageRetention;