From fcc270f1219db8ca9a3eaa81ce99db4c08ffef89 Mon Sep 17 00:00:00 2001 From: spiral Date: Sat, 18 Mar 2023 23:06:55 -0400 Subject: [PATCH] feat(api): prometheus metrics --- Cargo.lock | 233 ++++++++++++++++++++++- lib/libpk/Cargo.toml | 2 + lib/libpk/src/_config.rs | 7 + lib/libpk/src/lib.rs | 10 + services/api/Cargo.toml | 1 + services/api/src/main.rs | 3 +- services/api/src/middleware/logger.rs | 29 ++- services/api/src/middleware/ratelimit.rs | 11 +- 8 files changed, 283 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e23e382..c2a6e683 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -39,6 +39,7 @@ dependencies = [ "hyper-reverse-proxy", "lazy_static", "libpk", + "metrics", "tokio", "tower", "tracing", @@ -164,6 +165,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bumpalo" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" + [[package]] name = "bytes" version = "1.4.0" @@ -226,6 +233,28 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "338089f42c427b86394a5ee60ff321da23a5c89c9d89514c829687b26359fcff" +[[package]] +name = "crossbeam-epoch" +version = "0.9.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" +dependencies = [ + "cfg-if", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -450,7 +479,7 @@ checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", ] [[package]] @@ -623,12 +652,27 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "ipnet" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" + [[package]] name = "itoa" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" +[[package]] +name = "js-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "json5" version = "0.4.1" @@ -660,6 +704,8 @@ dependencies = [ "config", "gethostname", "lazy_static", + "metrics", + "metrics-exporter-prometheus", "serde", "tokio", "tracing", @@ -692,6 +738,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + [[package]] name = "match_cfg" version = "0.1.0" @@ -719,6 +774,73 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memoffset" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +dependencies = [ + "autocfg", +] + +[[package]] +name = "metrics" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b9b8653cec6897f73b519a43fba5ee3d50f62fe9af80b428accdcc093b4a849" +dependencies = [ + "ahash", + "metrics-macros", + "portable-atomic", +] + +[[package]] +name = "metrics-exporter-prometheus" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8603921e1f54ef386189335f288441af761e0fc61bcb552168d9cedfe63ebc70" +dependencies = [ + "hyper", + "indexmap", + "ipnet", + "metrics", + "metrics-util", + "parking_lot 0.12.1", + "portable-atomic", + "quanta", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "metrics-macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "731f8ecebd9f3a4aa847dfe75455e4757a45da40a7793d2f0b1f9b6ed18b23f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "metrics-util" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d24dc2dbae22bff6f1f9326ffce828c9f07ef9cc1e8002e5279f845432a30a" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown", + "metrics", + "num_cpus", + "parking_lot 0.12.1", + "portable-atomic", + "quanta", + "sketches-ddsketch", +] + [[package]] name = "mime" version = "0.3.16" @@ -739,7 +861,7 @@ checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc", "log", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.42.0", ] @@ -946,6 +1068,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "portable-atomic" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26f6a7b87c2e435a3241addceeeff740ff8b7e76b74c13bf9acb17fa454ea00b" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -971,6 +1099,22 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quanta" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e31331286705f455e56cca62e0e717158474ff02b7936c1fa596d983f4ae27" +dependencies = [ + "crossbeam-utils", + "libc", + "mach", + "once_cell", + "raw-cpuid", + "wasi 0.10.2+wasi-snapshot-preview1", + "web-sys", + "winapi", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -1016,6 +1160,15 @@ dependencies = [ "getrandom", ] +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags", +] + [[package]] name = "redis-protocol" version = "4.1.0" @@ -1204,6 +1357,12 @@ dependencies = [ "libc", ] +[[package]] +name = "sketches-ddsketch" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ceb945e54128e09c43d8e4f1277851bd5044c6fc540bbaa2ad888f60b3da9ae7" + [[package]] name = "slab" version = "0.4.7" @@ -1598,12 +1757,82 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" + +[[package]] +name = "web-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/lib/libpk/Cargo.toml b/lib/libpk/Cargo.toml index 5ae30d86..bd27edb3 100644 --- a/lib/libpk/Cargo.toml +++ b/lib/libpk/Cargo.toml @@ -8,6 +8,8 @@ anyhow = { workspace = true } config = "0.13.3" gethostname = "0.4.1" lazy_static = { workspace = true } +metrics = "0.20.1" +metrics-exporter-prometheus = { version = "0.11.0", default-features = false, features = ["tokio", "http-listener", "tracing"] } serde = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/lib/libpk/src/_config.rs b/lib/libpk/src/_config.rs index ee2b48e1..f49db243 100644 --- a/lib/libpk/src/_config.rs +++ b/lib/libpk/src/_config.rs @@ -33,11 +33,18 @@ pub struct ApiConfig { pub remote_url: String, } +fn _metrics_default() -> bool { + false +} + #[derive(Deserialize, Debug)] pub struct PKConfig { pub discord: DiscordConfig, pub api: ApiConfig, + #[serde(default = "_metrics_default")] + pub run_metrics_server: bool, + pub(crate) gelf_log_url: Option, } diff --git a/lib/libpk/src/lib.rs b/lib/libpk/src/lib.rs index 785dedb4..26284efc 100644 --- a/lib/libpk/src/lib.rs +++ b/lib/libpk/src/lib.rs @@ -1,4 +1,5 @@ use gethostname::gethostname; +use metrics_exporter_prometheus::PrometheusBuilder; use tracing_subscriber::{prelude::__tracing_subscriber_SubscriberExt, EnvFilter, Registry}; mod _config; @@ -25,3 +26,12 @@ pub fn init_logging(component: &str) -> anyhow::Result<()> { Ok(()) } + +pub fn init_metrics() -> anyhow::Result<()> { + if config.run_metrics_server { + // automatically spawns a http listener at :9000 + let builder = PrometheusBuilder::new(); + builder.install()?; + } + Ok(()) +} diff --git a/services/api/Cargo.toml b/services/api/Cargo.toml index d38d1091..67bc0b99 100644 --- a/services/api/Cargo.toml +++ b/services/api/Cargo.toml @@ -11,6 +11,7 @@ http = "0.2.8" hyper-reverse-proxy = "0.5.1" lazy_static = "1.4.0" libpk = { path = "../../lib/libpk" } +metrics = "0.20.1" tokio = { workspace = true } tower = "0.4.13" tracing = { workspace = true } diff --git a/services/api/src/main.rs b/services/api/src/main.rs index 5298c2d2..074c5b55 100644 --- a/services/api/src/main.rs +++ b/services/api/src/main.rs @@ -12,6 +12,7 @@ mod util; #[tokio::main] async fn main() -> anyhow::Result<()> { libpk::init_logging("api")?; + libpk::init_metrics()?; info!("hello world"); // processed upside down (???) so we have to put middleware at the end @@ -69,8 +70,8 @@ async fn main() -> anyhow::Result<()> { .route("/v2/members/:member_id/oembed.json", get(util::rproxy)) .route("/v2/groups/:group_id/oembed.json", get(util::rproxy)) - .layer(middleware::ratelimit::ratelimiter(middleware::ratelimit::do_request_ratelimited)) // this sucks .layer(axum::middleware::from_fn(middleware::logger)) + .layer(middleware::ratelimit::ratelimiter(middleware::ratelimit::do_request_ratelimited)) // this sucks .layer(axum::middleware::from_fn(middleware::ignore_invalid_routes)) .layer(axum::middleware::from_fn(middleware::cors)) diff --git a/services/api/src/middleware/logger.rs b/services/api/src/middleware/logger.rs index 09d2136b..eaf26c8b 100644 --- a/services/api/src/middleware/logger.rs +++ b/services/api/src/middleware/logger.rs @@ -1,10 +1,15 @@ use std::time::Instant; use axum::{extract::MatchedPath, http::Request, middleware::Next, response::Response}; -use tracing::{info, span, Instrument, Level}; +use metrics::histogram; +use tracing::{info, span, warn, Instrument, Level}; use crate::util::header_or_unknown; +// log any requests that take longer than 2 seconds +// todo: change as necessary +const MIN_LOG_TIME: u128 = 2_000; + pub async fn logger(request: Request, next: Next) -> Response { let method = request.method().clone(); @@ -12,14 +17,14 @@ pub async fn logger(request: Request, next: Next) -> Response { let remote_ip = header_or_unknown(request.headers().get("Fly-Client-IP")); let user_agent = header_or_unknown(request.headers().get("User-Agent")); - let path = request + let endpoint = request .extensions() .get::() .cloned() .map(|v| v.as_str().to_string()) .unwrap_or("unknown".to_string()); - // todo: prometheus metrics + let uri = request.uri().clone(); let request_id_span = span!( Level::INFO, @@ -27,7 +32,7 @@ pub async fn logger(request: Request, next: Next) -> Response { request_id, remote_ip, method = method.as_str(), - path, + endpoint = endpoint.clone(), user_agent ); @@ -35,7 +40,21 @@ pub async fn logger(request: Request, next: Next) -> Response { let response = next.run(request).instrument(request_id_span).await; let elapsed = start.elapsed().as_millis(); - info!("handled request for {} {} in {}ms", method, path, elapsed); + info!( + "handled request for {} {} in {}ms", + method, endpoint, elapsed + ); + histogram!("pk_http_requests", (elapsed as f64) / 1_000_f64, "method" => method.to_string(), "endpoint" => endpoint.clone()); + + if elapsed > MIN_LOG_TIME { + warn!( + "request to {} full path {} (endpoint {}) took a long time ({}ms)!", + method, + uri.path(), + endpoint, + elapsed + ) + } response } diff --git a/services/api/src/middleware/ratelimit.rs b/services/api/src/middleware/ratelimit.rs index d95ee04d..daad827a 100644 --- a/services/api/src/middleware/ratelimit.rs +++ b/services/api/src/middleware/ratelimit.rs @@ -8,7 +8,8 @@ use axum::{ }; use fred::{pool::RedisPool, prelude::LuaInterface, types::ReconnectPolicy, util::sha1_hash}; use http::{HeaderValue, StatusCode}; -use tracing::{error, info, warn}; +use metrics::increment_counter; +use tracing::{debug, error, info, warn}; use crate::util::{header_or_unknown, json_err}; @@ -107,13 +108,13 @@ pub async fn do_request_ratelimited( let mut response = if remaining > 0 { next.run(request).await } else { - println!("{}", reset_after); + let retry_after = (retry_after * 1_000_f64).ceil() as u64; + debug!("ratelimited request from {rl_key}, retry_after={retry_after}"); + increment_counter!("pk_http_requests_ratelimited"); json_err( StatusCode::TOO_MANY_REQUESTS, format!( - // todo: the retry_after is horribly wrong - r#"{{"message":"429: too many requests","retry_after":{},"code":0}}"#, - (retry_after * 1_000_f64).ceil() as u64 + r#"{{"message":"429: too many requests","retry_after":{retry_after},"code":0}}"#, ), ) };