feat: aggregate cluster statistics and send to influx with scheduled tasks worker
This commit is contained in:
parent
857e20b601
commit
8909330db2
@ -40,46 +40,6 @@ public static class BotMetrics
|
||||
Context = "Bot"
|
||||
};
|
||||
|
||||
public static GaugeOptions MembersTotal => new()
|
||||
{
|
||||
Name = "Members total",
|
||||
MeasurementUnit = Unit.None,
|
||||
Context = "Bot"
|
||||
};
|
||||
|
||||
public static GaugeOptions MembersOnline => new()
|
||||
{
|
||||
Name = "Members online",
|
||||
MeasurementUnit = Unit.None,
|
||||
Context = "Bot"
|
||||
};
|
||||
|
||||
public static GaugeOptions Guilds => new()
|
||||
{
|
||||
Name = "Guilds",
|
||||
MeasurementUnit = Unit.None,
|
||||
Context = "Bot"
|
||||
};
|
||||
public static GaugeOptions Channels => new()
|
||||
{
|
||||
Name = "Channels",
|
||||
MeasurementUnit = Unit.None,
|
||||
Context = "Bot"
|
||||
};
|
||||
|
||||
public static GaugeOptions ShardLatency => new()
|
||||
{
|
||||
Name = "Shard Latency",
|
||||
Context = "Bot"
|
||||
};
|
||||
|
||||
public static GaugeOptions ShardsConnected => new()
|
||||
{
|
||||
Name = "Shards Connected",
|
||||
Context = "Bot",
|
||||
MeasurementUnit = Unit.Connections
|
||||
};
|
||||
|
||||
public static MeterOptions WebhookCacheMisses => new()
|
||||
{
|
||||
Name = "Webhook cache misses",
|
||||
@ -87,13 +47,6 @@ public static class BotMetrics
|
||||
MeasurementUnit = Unit.Calls
|
||||
};
|
||||
|
||||
public static GaugeOptions WebhookCacheSize => new()
|
||||
{
|
||||
Name = "Webhook Cache Size",
|
||||
Context = "Bot",
|
||||
MeasurementUnit = Unit.Items
|
||||
};
|
||||
|
||||
public static TimerOptions WebhookResponseTime => new()
|
||||
{
|
||||
Name = "Webhook Response Time",
|
||||
|
@ -156,7 +156,7 @@ public class Checks
|
||||
throw new PKSyntaxError("You need to specify a channel.");
|
||||
|
||||
var error = "Channel not found or you do not have permissions to access it.";
|
||||
|
||||
|
||||
// todo: this breaks if channel is not in cache and bot does not have View Channel permissions
|
||||
var channel = await ctx.MatchChannel();
|
||||
if (channel == null || channel.GuildId == null)
|
||||
|
@ -4,6 +4,8 @@ using App.Metrics;
|
||||
|
||||
using Myriad.Cache;
|
||||
|
||||
using Newtonsoft.Json;
|
||||
|
||||
using NodaTime.Extensions;
|
||||
|
||||
using PluralKit.Core;
|
||||
@ -20,17 +22,20 @@ public class PeriodicStatCollector
|
||||
|
||||
private readonly DbConnectionCountHolder _countHolder;
|
||||
private readonly CpuStatService _cpu;
|
||||
private readonly BotConfig _botConfig;
|
||||
private readonly CoreConfig _config;
|
||||
|
||||
private readonly ILogger _logger;
|
||||
private readonly IMetrics _metrics;
|
||||
|
||||
private readonly ModelRepository _repo;
|
||||
private readonly RedisService _redis;
|
||||
|
||||
private readonly WebhookCacheService _webhookCache;
|
||||
|
||||
public PeriodicStatCollector(IMetrics metrics, ILogger logger, WebhookCacheService webhookCache,
|
||||
DbConnectionCountHolder countHolder, CpuStatService cpu, ModelRepository repo,
|
||||
IDiscordCache cache)
|
||||
BotConfig botConfig, CoreConfig config, RedisService redis, IDiscordCache cache)
|
||||
{
|
||||
_metrics = metrics;
|
||||
_webhookCache = webhookCache;
|
||||
@ -38,6 +43,9 @@ public class PeriodicStatCollector
|
||||
_cpu = cpu;
|
||||
_repo = repo;
|
||||
_cache = cache;
|
||||
_botConfig = botConfig;
|
||||
_config = config;
|
||||
_redis = redis;
|
||||
_logger = logger.ForContext<PeriodicStatCollector>();
|
||||
}
|
||||
|
||||
@ -59,19 +67,19 @@ public class PeriodicStatCollector
|
||||
channelCount++;
|
||||
}
|
||||
|
||||
_metrics.Measure.Gauge.SetValue(BotMetrics.Guilds, guildCount);
|
||||
_metrics.Measure.Gauge.SetValue(BotMetrics.Channels, channelCount);
|
||||
|
||||
// Aggregate DB stats
|
||||
// just fetching from database here - actual updating of the data is done in PluralKit.ScheduledTasks
|
||||
// if you're not running ScheduledTasks and want up-to-date counts, uncomment the following line:
|
||||
// await _repo.UpdateStats();
|
||||
var counts = await _repo.GetStats();
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.SystemCount, counts.SystemCount);
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.MemberCount, counts.MemberCount);
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.GroupCount, counts.GroupCount);
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.SwitchCount, counts.SwitchCount);
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.MessageCount, counts.MessageCount);
|
||||
if (_config.UseRedisMetrics)
|
||||
{
|
||||
var db = _redis.Connection.GetDatabase();
|
||||
await db.HashSetAsync("pluralkit:cluster_stats", new StackExchange.Redis.HashEntry[] {
|
||||
new(_botConfig.Cluster.NodeIndex, JsonConvert.SerializeObject(new ClusterMetricInfo
|
||||
{
|
||||
GuildCount = guildCount,
|
||||
ChannelCount = channelCount,
|
||||
DatabaseConnectionCount = _countHolder.ConnectionCount,
|
||||
WebhookCacheSize = _webhookCache.CacheSize,
|
||||
})),
|
||||
});
|
||||
}
|
||||
|
||||
// Process info
|
||||
var process = Process.GetCurrentProcess();
|
||||
@ -82,12 +90,6 @@ public class PeriodicStatCollector
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.ProcessHandles, process.HandleCount);
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.CpuUsage, await _cpu.EstimateCpuUsage());
|
||||
|
||||
// Database info
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.DatabaseConnections, _countHolder.ConnectionCount);
|
||||
|
||||
// Other shiz
|
||||
_metrics.Measure.Gauge.SetValue(BotMetrics.WebhookCacheSize, _webhookCache.CacheSize);
|
||||
|
||||
stopwatch.Stop();
|
||||
_logger.Debug("Updated metrics in {Time}", stopwatch.ElapsedDuration());
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ public class CoreConfig
|
||||
{
|
||||
public string Database { get; set; }
|
||||
public string RedisAddr { get; set; }
|
||||
public bool UseRedisMetrics { get; set; } = false;
|
||||
public string SentryUrl { get; set; }
|
||||
public string InfluxUrl { get; set; }
|
||||
public string InfluxDb { get; set; }
|
||||
|
@ -102,4 +102,12 @@ public static class CoreMetrics
|
||||
MeasurementUnit = Unit.Connections,
|
||||
Context = "Database"
|
||||
};
|
||||
}
|
||||
|
||||
public record ClusterMetricInfo
|
||||
{
|
||||
public int GuildCount;
|
||||
public int ChannelCount;
|
||||
public int DatabaseConnectionCount;
|
||||
public int WebhookCacheSize;
|
||||
}
|
26
PluralKit.ScheduledTasks/Metrics.cs
Normal file
26
PluralKit.ScheduledTasks/Metrics.cs
Normal file
@ -0,0 +1,26 @@
|
||||
using App.Metrics;
|
||||
using App.Metrics.Gauge;
|
||||
|
||||
public static class Metrics
|
||||
{
|
||||
public static GaugeOptions Guilds => new()
|
||||
{
|
||||
Name = "Guilds",
|
||||
MeasurementUnit = Unit.None,
|
||||
Context = "Bot"
|
||||
};
|
||||
public static GaugeOptions Channels => new()
|
||||
{
|
||||
Name = "Channels",
|
||||
MeasurementUnit = Unit.None,
|
||||
Context = "Bot"
|
||||
};
|
||||
|
||||
public static GaugeOptions WebhookCacheSize => new()
|
||||
{
|
||||
Name = "Webhook Cache Size",
|
||||
Context = "Bot",
|
||||
MeasurementUnit = Unit.Items
|
||||
};
|
||||
|
||||
}
|
@ -19,6 +19,11 @@ internal class Startup
|
||||
await BuildInfoService.LoadVersion();
|
||||
|
||||
var services = BuildContainer(config);
|
||||
|
||||
var cfg = services.Resolve<CoreConfig>();
|
||||
if (cfg.UseRedisMetrics)
|
||||
await services.Resolve<RedisService>().InitAsync(cfg);
|
||||
|
||||
services.Resolve<TaskHandler>().Run();
|
||||
|
||||
await Task.Delay(-1);
|
||||
|
@ -1,11 +1,16 @@
|
||||
using System;
|
||||
using System.Linq;
|
||||
using System.Diagnostics;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
using App.Metrics;
|
||||
|
||||
using NodaTime;
|
||||
using NodaTime.Extensions;
|
||||
|
||||
using Newtonsoft.Json;
|
||||
|
||||
using PluralKit.Core;
|
||||
|
||||
using Serilog;
|
||||
@ -16,16 +21,23 @@ public class TaskHandler
|
||||
{
|
||||
private static readonly Duration CommandMessageRetention = Duration.FromHours(24);
|
||||
private readonly IDatabase _db;
|
||||
private readonly RedisService _redis;
|
||||
private readonly bool _useRedisMetrics;
|
||||
|
||||
private readonly ILogger _logger;
|
||||
private readonly IMetrics _metrics;
|
||||
private readonly ModelRepository _repo;
|
||||
private Timer _periodicTask;
|
||||
|
||||
public TaskHandler(ILogger logger, IDatabase db, ModelRepository repo)
|
||||
public TaskHandler(ILogger logger, IMetrics metrics, CoreConfig config, IDatabase db, RedisService redis, ModelRepository repo)
|
||||
{
|
||||
_logger = logger;
|
||||
_metrics = metrics;
|
||||
_db = db;
|
||||
_redis = redis;
|
||||
_repo = repo;
|
||||
|
||||
_useRedisMetrics = config.UseRedisMetrics;
|
||||
}
|
||||
|
||||
public void Run()
|
||||
@ -49,6 +61,10 @@ public class TaskHandler
|
||||
_logger.Information("Updating database stats...");
|
||||
await _repo.UpdateStats();
|
||||
|
||||
// Collect bot cluster statistics from Redis (if it's enabled)
|
||||
if (_useRedisMetrics)
|
||||
await CollectBotStats();
|
||||
|
||||
// Clean up message cache in postgres
|
||||
await CleanupOldMessages();
|
||||
|
||||
@ -56,6 +72,32 @@ public class TaskHandler
|
||||
_logger.Information("Ran scheduled tasks in {Time}", stopwatch.ElapsedDuration());
|
||||
}
|
||||
|
||||
private async Task CollectBotStats()
|
||||
{
|
||||
var redisStats = await _redis.Connection.GetDatabase().HashGetAllAsync("pluralkit:cluster_stats");
|
||||
|
||||
var stats = redisStats.Select(v => JsonConvert.DeserializeObject<ClusterMetricInfo>(v.Value));
|
||||
|
||||
_metrics.Measure.Gauge.SetValue(Metrics.Guilds, stats.Sum(x => x.GuildCount));
|
||||
_metrics.Measure.Gauge.SetValue(Metrics.Channels, stats.Sum(x => x.ChannelCount));
|
||||
|
||||
// Aggregate DB stats
|
||||
// just fetching from database here - actual updating of the data is done elsewiere
|
||||
var counts = await _repo.GetStats();
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.SystemCount, counts.SystemCount);
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.MemberCount, counts.MemberCount);
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.GroupCount, counts.GroupCount);
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.SwitchCount, counts.SwitchCount);
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.MessageCount, counts.MessageCount);
|
||||
|
||||
// Database info
|
||||
// this is pretty much always inaccurate but oh well
|
||||
_metrics.Measure.Gauge.SetValue(CoreMetrics.DatabaseConnections, stats.Sum(x => x.DatabaseConnectionCount));
|
||||
|
||||
// Other shiz
|
||||
_metrics.Measure.Gauge.SetValue(Metrics.WebhookCacheSize, stats.Sum(x => x.WebhookCacheSize));
|
||||
}
|
||||
|
||||
private async Task CleanupOldMessages()
|
||||
{
|
||||
var deleteThresholdInstant = SystemClock.Instance.GetCurrentInstant() - CommandMessageRetention;
|
||||
|
Loading…
Reference in New Issue
Block a user