Add InfluxDB/Grafana monitoring
This commit is contained in:
parent
9c0811afe8
commit
0b8488606b
@ -1 +1 @@
|
||||
from . import commands, db, proxy
|
||||
from . import commands, db, proxy, stats
|
@ -1,6 +1,8 @@
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
import discord
|
||||
|
||||
@ -38,7 +40,7 @@ async def on_message(message):
|
||||
# Split into args. shlex sucks so we don't bother with quotes
|
||||
args = message.content.split(" ")
|
||||
|
||||
from pluralkit import proxy, utils
|
||||
from pluralkit import proxy, utils, stats
|
||||
|
||||
command_items = utils.command_map.items()
|
||||
command_items = sorted(command_items, key=lambda x: len(x[0]), reverse=True)
|
||||
@ -54,7 +56,14 @@ async def on_message(message):
|
||||
args = []
|
||||
|
||||
async with client.pool.acquire() as conn:
|
||||
time_before = time.perf_counter()
|
||||
await func(conn, message, args)
|
||||
time_after = time.perf_counter()
|
||||
|
||||
# Report command time stats
|
||||
execution_time = time_after - time_before
|
||||
response_time = (datetime.now() - message.timestamp).total_seconds()
|
||||
await stats.report_command(command, execution_time, response_time)
|
||||
return
|
||||
|
||||
# Try doing proxy parsing
|
||||
@ -82,7 +91,7 @@ async def on_socket_raw_receive(msg):
|
||||
pass
|
||||
|
||||
async def run():
|
||||
from pluralkit import db
|
||||
from pluralkit import db, stats
|
||||
try:
|
||||
logger.info("Connecting to database...")
|
||||
pool = await db.connect()
|
||||
@ -92,6 +101,7 @@ async def run():
|
||||
await db.create_tables(conn)
|
||||
|
||||
logger.info("Connecting to InfluxDB...")
|
||||
await stats.connect()
|
||||
|
||||
client.pool = pool
|
||||
logger.info("Connecting to Discord...")
|
||||
|
@ -3,9 +3,9 @@ import time
|
||||
import asyncpg
|
||||
import asyncpg.exceptions
|
||||
|
||||
from pluralkit import stats
|
||||
from pluralkit.bot import logger
|
||||
|
||||
|
||||
async def connect():
|
||||
while True:
|
||||
try:
|
||||
@ -17,11 +17,17 @@ async def connect():
|
||||
def db_wrap(func):
|
||||
async def inner(*args, **kwargs):
|
||||
before = time.perf_counter()
|
||||
try:
|
||||
res = await func(*args, **kwargs)
|
||||
after = time.perf_counter()
|
||||
|
||||
logger.debug(" - DB call {} took {:.2f} ms".format(func.__name__, (after - before) * 1000))
|
||||
await stats.report_db_query(func.__name__, after - before, True)
|
||||
|
||||
return res
|
||||
except asyncpg.exceptions.PostgresError:
|
||||
await stats.report_db_query(func.__name__, time.perf_counter() - before, False)
|
||||
logger.exception("Error from database query {}".format(func.__name__))
|
||||
return inner
|
||||
|
||||
@db_wrap
|
||||
@ -223,6 +229,14 @@ async def update_server(conn, server_id: str, logging_channel_id: str):
|
||||
logger.debug("Updating server settings (id={}, log_channel={})".format(server_id, logging_channel_id))
|
||||
await conn.execute("insert into servers (id, log_channel) values ($1, $2) on conflict (id) do update set log_channel = $2", int(server_id), logging_channel_id)
|
||||
|
||||
@db_wrap
|
||||
async def member_count(conn):
|
||||
return await conn.fetchval("select count(*) from members")
|
||||
|
||||
@db_wrap
|
||||
async def system_count(conn):
|
||||
return await conn.fetchval("select count(*) from systems")
|
||||
|
||||
async def create_tables(conn):
|
||||
await conn.execute("""create table if not exists systems (
|
||||
id serial primary key,
|
||||
|
@ -6,7 +6,7 @@ import time
|
||||
import aiohttp
|
||||
import discord
|
||||
|
||||
from pluralkit import db
|
||||
from pluralkit import db, stats
|
||||
from pluralkit.bot import client, logger
|
||||
|
||||
def make_log_embed(hook_message, member, channel_name):
|
||||
@ -98,20 +98,28 @@ async def send_hook_message(member, hook_id, hook_token, text=None, image_url=No
|
||||
fd.add_field("file", image_resp.content, content_type=image_resp.content_type, filename=image_resp.url.name)
|
||||
|
||||
# Send the actual webhook request, and wait for a response
|
||||
time_before = time.perf_counter()
|
||||
try:
|
||||
async with session.post("https://discordapp.com/api/v6/webhooks/{}/{}?wait=true".format(hook_id, hook_token),
|
||||
data=fd,
|
||||
headers=req_headers) as resp:
|
||||
if resp.status == 200:
|
||||
resp_data = await resp.json()
|
||||
|
||||
# Make a fake message object for passing on - this is slightly broken but works for most things
|
||||
msg = discord.Message(reactions=[], **resp_data)
|
||||
|
||||
# Make sure it's added to the client's message cache - otherwise events r
|
||||
#client.messages.append(msg)
|
||||
# Report to stats
|
||||
await stats.report_webhook(time.perf_counter() - time_before, True)
|
||||
return msg
|
||||
else:
|
||||
await stats.report_webhook(time.perf_counter() - time_before, False)
|
||||
|
||||
# Fake a Discord exception, also because #yolo
|
||||
raise discord.HTTPException(resp, await resp.text())
|
||||
except aiohttp.ClientResponseError:
|
||||
await stats.report_webhook(time.perf_counter() - time_before, False)
|
||||
logger.exception("Error while sending webhook message")
|
||||
|
||||
|
||||
async def proxy_message(conn, member, trigger_message, inner):
|
||||
|
29
bot/pluralkit/stats.py
Normal file
29
bot/pluralkit/stats.py
Normal file
@ -0,0 +1,29 @@
|
||||
from aioinflux import InfluxDBClient
|
||||
|
||||
from pluralkit.bot import logger
|
||||
|
||||
client = None
|
||||
async def connect():
|
||||
global client
|
||||
client = InfluxDBClient(host="influx", db="pluralkit")
|
||||
await client.create_database(db="pluralkit")
|
||||
|
||||
async def report_db_query(query_name, time, success):
|
||||
await client.write({
|
||||
"measurement": "database_query",
|
||||
"tags": {"query": query_name},
|
||||
"fields": {"response_time": time, "success": int(success)}
|
||||
})
|
||||
|
||||
async def report_command(command_name, execution_time, response_time):
|
||||
await client.write({
|
||||
"measurement": "command",
|
||||
"tags": {"command": command_name},
|
||||
"fields": {"execution_time": execution_time, "response_time": response_time}
|
||||
})
|
||||
|
||||
async def report_webhook(time, success):
|
||||
await client.write({
|
||||
"measurement": "webhook",
|
||||
"fields": {"response_time": time, "success": int(success)}
|
||||
})
|
@ -4,6 +4,7 @@ services:
|
||||
build: bot
|
||||
depends_on:
|
||||
- db
|
||||
- influx
|
||||
environment:
|
||||
- CLIENT_ID
|
||||
- TOKEN
|
||||
@ -12,5 +13,19 @@ services:
|
||||
volumes:
|
||||
- "db_data:/var/lib/postgresql/data"
|
||||
restart: always
|
||||
influx:
|
||||
image: influxdb:alpine
|
||||
volumes:
|
||||
- "influx_data:/var/lib/influxdb:Z"
|
||||
restart: always
|
||||
grafana:
|
||||
build: grafana
|
||||
depends_on:
|
||||
- influx
|
||||
ports:
|
||||
- "3000:3000"
|
||||
restart: always
|
||||
|
||||
volumes:
|
||||
db_data:
|
||||
influx_data:
|
3
grafana/Dockerfile
Normal file
3
grafana/Dockerfile
Normal file
@ -0,0 +1,3 @@
|
||||
FROM grafana/grafana
|
||||
|
||||
COPY . /etc/grafana
|
537
grafana/dashboards/pluralkit.json
Normal file
537
grafana/dashboards/pluralkit.json
Normal file
@ -0,0 +1,537 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"limit": 100,
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": 1,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "InfluxDB",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "Webhook success rate",
|
||||
"yaxis": 2
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"alias": "Webhook response time",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"linear"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "webhook",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"response_time"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"alias": "Webhook success rate",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"linear"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "webhook",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"success"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Webhook executions",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": "0",
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "InfluxDB",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"hideEmpty": false,
|
||||
"hideZero": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "Database Success Rate (%)",
|
||||
"yaxis": 2
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"alias": "Database response time (ms)",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"linear"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "database_query",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"response_time"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"alias": "Database success rate (%)",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"linear"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "database_query",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT mean(\"success\") FROM \"database_query\" WHERE $timeFilter GROUP BY time($__interval) fill(linear)",
|
||||
"rawQuery": false,
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"success"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Database Queries",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"transparent": false,
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": "",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": "0",
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"decimals": null,
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": "1",
|
||||
"min": "0",
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "InfluxDB",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"alias": "Command execution time",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"linear"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "command",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"execution_time"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"alias": "Command response time",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"linear"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"measurement": "command",
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"response_time"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Commands",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": "0",
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"refresh": false,
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "PluralKit Stats",
|
||||
"uid": "pk",
|
||||
"version": 1
|
||||
}
|
12
grafana/grafana.ini
Normal file
12
grafana/grafana.ini
Normal file
@ -0,0 +1,12 @@
|
||||
instance_name = pluralkit
|
||||
|
||||
[security]
|
||||
allow_sign_up = false
|
||||
allow_org_create = false
|
||||
|
||||
[auth]
|
||||
disable_login_form = true
|
||||
|
||||
[auth.anonymous]
|
||||
enabled = true
|
||||
org_role = Viewer
|
8
grafana/provisioning/dashboards/pluralkit.yml
Normal file
8
grafana/provisioning/dashboards/pluralkit.yml
Normal file
@ -0,0 +1,8 @@
|
||||
apiVersion: 1
|
||||
providers:
|
||||
- name: "pluralkit"
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
options:
|
||||
path: /etc/grafana/dashboards
|
9
grafana/provisioning/datasources/pluralkit.yml
Normal file
9
grafana/provisioning/datasources/pluralkit.yml
Normal file
@ -0,0 +1,9 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: InfluxDB
|
||||
type: influxdb
|
||||
database: pluralkit
|
||||
access: proxy
|
||||
url: http://influx:8086
|
||||
editable: false
|
Loading…
Reference in New Issue
Block a user