From 0b8488606be3f37cf3cff4d96b162e8abbd7d942 Mon Sep 17 00:00:00 2001 From: Ske Date: Mon, 16 Jul 2018 20:53:41 +0200 Subject: [PATCH] Add InfluxDB/Grafana monitoring --- bot/pluralkit/__init__.py | 2 +- bot/pluralkit/bot.py | 14 +- bot/pluralkit/db.py | 24 +- bot/pluralkit/proxy.py | 36 +- bot/pluralkit/stats.py | 29 + docker-compose.yml | 17 +- grafana/Dockerfile | 3 + grafana/dashboards/pluralkit.json | 537 ++++++++++++++++++ grafana/grafana.ini | 12 + grafana/provisioning/dashboards/pluralkit.yml | 8 + .../provisioning/datasources/pluralkit.yml | 9 + 11 files changed, 668 insertions(+), 23 deletions(-) create mode 100644 bot/pluralkit/stats.py create mode 100644 grafana/Dockerfile create mode 100644 grafana/dashboards/pluralkit.json create mode 100644 grafana/grafana.ini create mode 100644 grafana/provisioning/dashboards/pluralkit.yml create mode 100644 grafana/provisioning/datasources/pluralkit.yml diff --git a/bot/pluralkit/__init__.py b/bot/pluralkit/__init__.py index b2a23fcc..65af38e0 100644 --- a/bot/pluralkit/__init__.py +++ b/bot/pluralkit/__init__.py @@ -1 +1 @@ -from . import commands, db, proxy +from . import commands, db, proxy, stats \ No newline at end of file diff --git a/bot/pluralkit/bot.py b/bot/pluralkit/bot.py index 307d288d..f3ca66a6 100644 --- a/bot/pluralkit/bot.py +++ b/bot/pluralkit/bot.py @@ -1,6 +1,8 @@ +from datetime import datetime import logging import json import os +import time import discord @@ -38,7 +40,7 @@ async def on_message(message): # Split into args. shlex sucks so we don't bother with quotes args = message.content.split(" ") - from pluralkit import proxy, utils + from pluralkit import proxy, utils, stats command_items = utils.command_map.items() command_items = sorted(command_items, key=lambda x: len(x[0]), reverse=True) @@ -54,7 +56,14 @@ async def on_message(message): args = [] async with client.pool.acquire() as conn: + time_before = time.perf_counter() await func(conn, message, args) + time_after = time.perf_counter() + + # Report command time stats + execution_time = time_after - time_before + response_time = (datetime.now() - message.timestamp).total_seconds() + await stats.report_command(command, execution_time, response_time) return # Try doing proxy parsing @@ -82,7 +91,7 @@ async def on_socket_raw_receive(msg): pass async def run(): - from pluralkit import db + from pluralkit import db, stats try: logger.info("Connecting to database...") pool = await db.connect() @@ -92,6 +101,7 @@ async def run(): await db.create_tables(conn) logger.info("Connecting to InfluxDB...") + await stats.connect() client.pool = pool logger.info("Connecting to Discord...") diff --git a/bot/pluralkit/db.py b/bot/pluralkit/db.py index d8c34d6e..62532ddf 100644 --- a/bot/pluralkit/db.py +++ b/bot/pluralkit/db.py @@ -3,9 +3,9 @@ import time import asyncpg import asyncpg.exceptions +from pluralkit import stats from pluralkit.bot import logger - async def connect(): while True: try: @@ -17,11 +17,17 @@ async def connect(): def db_wrap(func): async def inner(*args, **kwargs): before = time.perf_counter() - res = await func(*args, **kwargs) - after = time.perf_counter() + try: + res = await func(*args, **kwargs) + after = time.perf_counter() - logger.debug(" - DB call {} took {:.2f} ms".format(func.__name__, (after - before) * 1000)) - return res + logger.debug(" - DB call {} took {:.2f} ms".format(func.__name__, (after - before) * 1000)) + await stats.report_db_query(func.__name__, after - before, True) + + return res + except asyncpg.exceptions.PostgresError: + await stats.report_db_query(func.__name__, time.perf_counter() - before, False) + logger.exception("Error from database query {}".format(func.__name__)) return inner @db_wrap @@ -223,6 +229,14 @@ async def update_server(conn, server_id: str, logging_channel_id: str): logger.debug("Updating server settings (id={}, log_channel={})".format(server_id, logging_channel_id)) await conn.execute("insert into servers (id, log_channel) values ($1, $2) on conflict (id) do update set log_channel = $2", int(server_id), logging_channel_id) +@db_wrap +async def member_count(conn): + return await conn.fetchval("select count(*) from members") + +@db_wrap +async def system_count(conn): + return await conn.fetchval("select count(*) from systems") + async def create_tables(conn): await conn.execute("""create table if not exists systems ( id serial primary key, diff --git a/bot/pluralkit/proxy.py b/bot/pluralkit/proxy.py index 73146f30..de048d33 100644 --- a/bot/pluralkit/proxy.py +++ b/bot/pluralkit/proxy.py @@ -6,7 +6,7 @@ import time import aiohttp import discord -from pluralkit import db +from pluralkit import db, stats from pluralkit.bot import client, logger def make_log_embed(hook_message, member, channel_name): @@ -98,20 +98,28 @@ async def send_hook_message(member, hook_id, hook_token, text=None, image_url=No fd.add_field("file", image_resp.content, content_type=image_resp.content_type, filename=image_resp.url.name) # Send the actual webhook request, and wait for a response - async with session.post("https://discordapp.com/api/v6/webhooks/{}/{}?wait=true".format(hook_id, hook_token), - data=fd, - headers=req_headers) as resp: - if resp.status == 200: - resp_data = await resp.json() - # Make a fake message object for passing on - this is slightly broken but works for most things - msg = discord.Message(reactions=[], **resp_data) + time_before = time.perf_counter() + try: + async with session.post("https://discordapp.com/api/v6/webhooks/{}/{}?wait=true".format(hook_id, hook_token), + data=fd, + headers=req_headers) as resp: + if resp.status == 200: + resp_data = await resp.json() - # Make sure it's added to the client's message cache - otherwise events r - #client.messages.append(msg) - return msg - else: - # Fake a Discord exception, also because #yolo - raise discord.HTTPException(resp, await resp.text()) + # Make a fake message object for passing on - this is slightly broken but works for most things + msg = discord.Message(reactions=[], **resp_data) + + # Report to stats + await stats.report_webhook(time.perf_counter() - time_before, True) + return msg + else: + await stats.report_webhook(time.perf_counter() - time_before, False) + + # Fake a Discord exception, also because #yolo + raise discord.HTTPException(resp, await resp.text()) + except aiohttp.ClientResponseError: + await stats.report_webhook(time.perf_counter() - time_before, False) + logger.exception("Error while sending webhook message") async def proxy_message(conn, member, trigger_message, inner): diff --git a/bot/pluralkit/stats.py b/bot/pluralkit/stats.py new file mode 100644 index 00000000..3077f935 --- /dev/null +++ b/bot/pluralkit/stats.py @@ -0,0 +1,29 @@ +from aioinflux import InfluxDBClient + +from pluralkit.bot import logger + +client = None +async def connect(): + global client + client = InfluxDBClient(host="influx", db="pluralkit") + await client.create_database(db="pluralkit") + +async def report_db_query(query_name, time, success): + await client.write({ + "measurement": "database_query", + "tags": {"query": query_name}, + "fields": {"response_time": time, "success": int(success)} + }) + +async def report_command(command_name, execution_time, response_time): + await client.write({ + "measurement": "command", + "tags": {"command": command_name}, + "fields": {"execution_time": execution_time, "response_time": response_time} + }) + +async def report_webhook(time, success): + await client.write({ + "measurement": "webhook", + "fields": {"response_time": time, "success": int(success)} + }) \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 36b0487f..1de8ab86 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,7 @@ services: build: bot depends_on: - db + - influx environment: - CLIENT_ID - TOKEN @@ -12,5 +13,19 @@ services: volumes: - "db_data:/var/lib/postgresql/data" restart: always + influx: + image: influxdb:alpine + volumes: + - "influx_data:/var/lib/influxdb:Z" + restart: always + grafana: + build: grafana + depends_on: + - influx + ports: + - "3000:3000" + restart: always + volumes: - db_data: \ No newline at end of file + db_data: + influx_data: \ No newline at end of file diff --git a/grafana/Dockerfile b/grafana/Dockerfile new file mode 100644 index 00000000..99b8379a --- /dev/null +++ b/grafana/Dockerfile @@ -0,0 +1,3 @@ +FROM grafana/grafana + +COPY . /etc/grafana diff --git a/grafana/dashboards/pluralkit.json b/grafana/dashboards/pluralkit.json new file mode 100644 index 00000000..79ee1dcf --- /dev/null +++ b/grafana/dashboards/pluralkit.json @@ -0,0 +1,537 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": 1, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "InfluxDB", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Webhook success rate", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Webhook response time", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "webhook", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "response_time" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Webhook success rate", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "webhook", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "success" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Webhook executions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "InfluxDB", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "Database Success Rate (%)", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Database response time (ms)", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "database_query", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "response_time" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Database success rate (%)", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "database_query", + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"success\") FROM \"database_query\" WHERE $timeFilter GROUP BY time($__interval) fill(linear)", + "rawQuery": false, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "success" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Database Queries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "InfluxDB", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Command execution time", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "command", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "execution_time" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "Command response time", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "command", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "response_time" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Commands", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "PluralKit Stats", + "uid": "pk", + "version": 1 +} \ No newline at end of file diff --git a/grafana/grafana.ini b/grafana/grafana.ini new file mode 100644 index 00000000..13b28285 --- /dev/null +++ b/grafana/grafana.ini @@ -0,0 +1,12 @@ +instance_name = pluralkit + +[security] +allow_sign_up = false +allow_org_create = false + +[auth] +disable_login_form = true + +[auth.anonymous] +enabled = true +org_role = Viewer \ No newline at end of file diff --git a/grafana/provisioning/dashboards/pluralkit.yml b/grafana/provisioning/dashboards/pluralkit.yml new file mode 100644 index 00000000..7a642c10 --- /dev/null +++ b/grafana/provisioning/dashboards/pluralkit.yml @@ -0,0 +1,8 @@ +apiVersion: 1 +providers: +- name: "pluralkit" + orgId: 1 + folder: '' + type: file + options: + path: /etc/grafana/dashboards \ No newline at end of file diff --git a/grafana/provisioning/datasources/pluralkit.yml b/grafana/provisioning/datasources/pluralkit.yml new file mode 100644 index 00000000..ec172e8f --- /dev/null +++ b/grafana/provisioning/datasources/pluralkit.yml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: +- name: InfluxDB + type: influxdb + database: pluralkit + access: proxy + url: http://influx:8086 + editable: false \ No newline at end of file