diff --git a/.gitignore b/.gitignore index 44ae98d5..aa8d987d 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,7 @@ $RECYCLE.BIN/ ### Rust target/ logs/ + +flamegraph.svg +perf.data +perf.data.old \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index b0f99ce6..84701cd6 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -103,6 +103,28 @@ ], "cwd": "${workspaceFolder}/target/debug/" }, + { + "type": "lldb", + "request": "launch", + "name": "Debug veilid-tools unit test", + "cargo": { + "args": [ + "test", + "--no-run", + "--features=rt-tokio", + "--manifest-path", + "veilid-tools/Cargo.toml" + ], + "filter": { + "kind": "cdylib", + "name": "veilid-tools" + } + }, + "args": [ + "${selectedText}" + ], + "cwd": "${workspaceFolder}/target/debug/" + }, { "type": "lldb", "request": "launch", diff --git a/Cargo.lock b/Cargo.lock index f7d5ed30..cb542298 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6467,9 +6467,9 @@ dependencies = [ "netlink-packet-route", "netlink-sys", "nix 0.26.2", + "num-traits", "once_cell", "owning_ref", - "owo-colors", "paranoid-android", "parking_lot 0.12.1", "paste", @@ -6606,8 +6606,10 @@ dependencies = [ "async_executors", "backtrace", "cfg-if 1.0.0", + "chrono", "console_error_panic_hook", "eyre", + "flume", "fn_name", "futures-util", "jni 0.21.1", @@ -6622,10 +6624,10 @@ dependencies = [ "nix 0.26.2", "once_cell", "oslog", - "owo-colors", "paranoid-android", "parking_lot 0.11.2", "rand 0.7.3", + "range-set-blaze", "rust-fsm", "send_wrapper 0.6.0", "serial_test", diff --git a/doc/config/sample.config b/doc/config/sample.config index 41a6a2f7..b59cd7c2 100644 --- a/doc/config/sample.config +++ b/doc/config/sample.config @@ -48,7 +48,8 @@ core: max_connection_frequency_per_min: 128 client_whitelist_timeout_ms: 300000 reverse_connection_receipt_time_ms: 5000 - hole_punch_receipt_time_ms: 5000 + hole_punch_receipt_time_ms: 5000 + network_key_password: null routing_table: node_id: null node_id_secret: null diff --git a/doc/config/veilid-server-config.md b/doc/config/veilid-server-config.md index a76b328b..116d2dec 100644 --- a/doc/config/veilid-server-config.md +++ b/doc/config/veilid-server-config.md @@ -188,8 +188,9 @@ network: client_whitelist_timeout_ms: 300000 reverse_connection_receipt_time_ms: 5000 hole_punch_receipt_time_ms: 5000 - node_id: '' - node_id_secret: '' + network_key_password: null + node_id: null + node_id_secret: null bootstrap: ['bootstrap.veilid.net'] upnp: true detect_address_changes: true diff --git a/scripts/tools/keytool.py b/scripts/tools/keytool.py new file mode 100644 index 00000000..90323314 --- /dev/null +++ b/scripts/tools/keytool.py @@ -0,0 +1,99 @@ +import base64 +import sys +import argparse + +def urlsafe_b64encode_no_pad(b: bytes) -> str: + """ + Removes any `=` used as padding from the encoded string. + """ + return base64.urlsafe_b64encode(b).decode().rstrip("=") + + +def urlsafe_b64decode_no_pad(s: str) -> bytes: + """ + Adds back in the required padding before decoding. + """ + padding = 4 - (len(s) % 4) + s = s + ("=" * padding) + return base64.urlsafe_b64decode(s) + + +def do_value(args): + + key = urlsafe_b64decode_no_pad(args.key) + + print("key:", key.hex()) + + +def dist(key1: bytes, key2: bytes) -> bytes: + distance = bytearray(len(key1)) + for n in range(len(key1)): + distance[n] = key1[n] ^ key2[n] + + return bytes(distance) + + +def do_distance(args): + + key1 = urlsafe_b64decode_no_pad(args.key1) + key2 = urlsafe_b64decode_no_pad(args.key2) + + print("key1:", key1.hex()) + print("key2:", key2.hex()) + + distance = dist(key1, key2) + print("dist:", distance.hex()) + +def keycmp(key1: bytes, key2: bytes) -> int: + for n in range(len(key1)): + if key1[n] < key2[n]: + return -1 + if key1[n] > key2[n]: + return 1 + return 0 + +def do_closer(args): + + key = urlsafe_b64decode_no_pad(args.key) + near = urlsafe_b64decode_no_pad(args.near) + far = urlsafe_b64decode_no_pad(args.far) + + print(" key:", key.hex()) + print("near:", near.hex()) + print(" far:", far.hex()) + + distance_near = dist(key, near) + distance_far = dist(key, far) + + print(" dn:", distance_near.hex()) + print(" df:", distance_far.hex()) + + c = keycmp(distance_near, distance_far) + print(" cmp:", c) + +def main(): + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(required=True) + + parser_value = subparsers.add_parser('value') + parser_value.add_argument('key', type=str) + parser_value.set_defaults(func=do_value) + + parser_value = subparsers.add_parser('distance') + parser_value.add_argument('key1', type=str) + parser_value.add_argument('key2', type=str) + parser_value.set_defaults(func=do_distance) + + parser_value = subparsers.add_parser('closer') + parser_value.add_argument('key', type=str) + parser_value.add_argument('near', type=str) + parser_value.add_argument('far', type=str) + parser_value.set_defaults(func=do_closer) + + args = parser.parse_args() + args.func(args) + + sys.exit(0) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/veilid-cli/src/command_processor.rs b/veilid-cli/src/command_processor.rs index c6ab46c7..72311f8c 100644 --- a/veilid-cli/src/command_processor.rs +++ b/veilid-cli/src/command_processor.rs @@ -46,6 +46,7 @@ struct CommandProcessorInner { server_addr: Option, connection_waker: Eventual, last_call_id: Option, + enable_app_messages: bool, } #[derive(Clone)] @@ -66,6 +67,7 @@ impl CommandProcessor { server_addr: None, connection_waker: Eventual::new(), last_call_id: None, + enable_app_messages: false, })), } } @@ -122,6 +124,10 @@ reply reply to an AppCall not handled directly by must be exact call id reported in VeilidUpdate can be a string (left trimmed) or it can start with a '#' followed by a string of undelimited hex bytes +enable [flag] set a flag +disable [flag] unset a flag + valid flags in include: + app_messages "# .to_owned(), ); @@ -305,6 +311,52 @@ reply reply to an AppCall not handled directly by Ok(()) } + pub fn cmd_enable(&self, rest: Option, callback: UICallback) -> Result<(), String> { + trace!("CommandProcessor::cmd_enable"); + + let ui = self.ui_sender(); + let this = self.clone(); + spawn_detached_local(async move { + let flag = rest.clone().unwrap_or_default(); + match flag.as_str() { + "app_messages" => { + this.inner.lock().enable_app_messages = true; + ui.add_node_event(Level::Info, format!("flag enabled: {}", flag)); + ui.send_callback(callback); + } + _ => { + ui.add_node_event(Level::Error, format!("unknown flag: {}", flag)); + ui.send_callback(callback); + return; + } + } + }); + Ok(()) + } + + pub fn cmd_disable(&self, rest: Option, callback: UICallback) -> Result<(), String> { + trace!("CommandProcessor::cmd_disable"); + + let ui = self.ui_sender(); + let this = self.clone(); + spawn_detached_local(async move { + let flag = rest.clone().unwrap_or_default(); + match flag.as_str() { + "app_messages" => { + this.inner.lock().enable_app_messages = false; + ui.add_node_event(Level::Info, format!("flag disabled: {}", flag)); + ui.send_callback(callback); + } + _ => { + ui.add_node_event(Level::Error, format!("unknown flag: {}", flag)); + ui.send_callback(callback); + return; + } + } + }); + Ok(()) + } + pub fn run_command(&self, command_line: &str, callback: UICallback) -> Result<(), String> { // let (cmd, rest) = Self::word_split(command_line); @@ -319,6 +371,8 @@ reply reply to an AppCall not handled directly by "debug" => self.cmd_debug(rest, callback), "change_log_level" => self.cmd_change_log_level(rest, callback), "reply" => self.cmd_reply(rest, callback), + "enable" => self.cmd_enable(rest, callback), + "disable" => self.cmd_disable(rest, callback), _ => { let ui = self.ui_sender(); ui.send_callback(callback); @@ -472,6 +526,10 @@ reply reply to an AppCall not handled directly by } pub fn update_app_message(&self, msg: &json::JsonValue) { + if !self.inner.lock().enable_app_messages { + return; + } + let message = json_str_vec_u8(&msg["message"]); // check is message body is ascii printable @@ -482,19 +540,34 @@ reply reply to an AppCall not handled directly by } } + let (message, truncated) = if message.len() > 64 { + (&message[0..64], true) + } else { + (&message[..], false) + }; + let strmsg = if printable { - String::from_utf8_lossy(&message).to_string() + format!("\"{}\"", String::from_utf8_lossy(&message).to_string()) } else { hex::encode(message) }; self.inner().ui_sender.add_node_event( Level::Info, - format!("AppMessage ({:?}): {}", msg["sender"], strmsg), + format!( + "AppMessage ({:?}): {}{}", + msg["sender"], + strmsg, + if truncated { "..." } else { "" } + ), ); } pub fn update_app_call(&self, call: &json::JsonValue) { + if !self.inner.lock().enable_app_messages { + return; + } + let message = json_str_vec_u8(&call["message"]); // check is message body is ascii printable @@ -505,10 +578,16 @@ reply reply to an AppCall not handled directly by } } - let strmsg = if printable { - String::from_utf8_lossy(&message).to_string() + let (message, truncated) = if message.len() > 64 { + (&message[0..64], true) } else { - format!("#{}", hex::encode(&message)) + (&message[..], false) + }; + + let strmsg = if printable { + format!("\"{}\"", String::from_utf8_lossy(&message).to_string()) + } else { + hex::encode(message) }; let id = json_str_u64(&call["call_id"]); @@ -516,8 +595,11 @@ reply reply to an AppCall not handled directly by self.inner().ui_sender.add_node_event( Level::Info, format!( - "AppCall ({:?}) id = {:016x} : {}", - call["sender"], id, strmsg + "AppCall ({:?}) id = {:016x} : {}{}", + call["sender"], + id, + strmsg, + if truncated { "..." } else { "" } ), ); diff --git a/veilid-core/Cargo.toml b/veilid-core/Cargo.toml index 8919c73a..783b966d 100644 --- a/veilid-core/Cargo.toml +++ b/veilid-core/Cargo.toml @@ -15,6 +15,7 @@ crypto-test = ["enable-crypto-vld0", "enable-crypto-none"] crypto-test-none = ["enable-crypto-none"] enable-crypto-vld0 = [] enable-crypto-none = [] +verbose-tracing = [] rt-async-std = ["async-std", "async-std-resolver", "async_executors/async_std", "rtnetlink/smol_socket", "veilid-tools/rt-async-std"] rt-tokio = ["tokio", "tokio-util", "tokio-stream", "trust-dns-resolver/tokio-runtime", "async_executors/tokio_tp", "async_executors/tokio_io", "async_executors/tokio_timer", "rtnetlink/tokio_socket", "veilid-tools/rt-tokio"] rt-wasm-bindgen = ["veilid-tools/rt-wasm-bindgen", "async_executors/bindgen"] @@ -22,6 +23,7 @@ rt-wasm-bindgen = ["veilid-tools/rt-wasm-bindgen", "async_executors/bindgen"] veilid_core_android_tests = ["dep:paranoid-android"] veilid_core_ios_tests = ["dep:tracing-oslog"] tracking = [] +network-result-extra = ["veilid-tools/network-result-extra"] [dependencies] veilid-tools = { path = "../veilid-tools", features = [ "tracing" ] } @@ -52,8 +54,8 @@ owning_ref = "^0" flume = { version = "^0", features = ["async"] } enumset = { version= "^1", features = ["serde"] } backtrace = { version = "^0" } -owo-colors = "^3" stop-token = { version = "^0", default-features = false } +num-traits = "0.2.15" ed25519-dalek = { version = "^1", default_features = false, features = ["alloc", "u64_backend"] } x25519-dalek = { version = "^1", default_features = false, features = ["u64_backend"] } diff --git a/veilid-core/src/attachment_manager.rs b/veilid-core/src/attachment_manager.rs index 1e92567d..a373bea2 100644 --- a/veilid-core/src/attachment_manager.rs +++ b/veilid-core/src/attachment_manager.rs @@ -196,7 +196,6 @@ impl AttachmentManager { if let Err(err) = netman.startup().await { error!("network startup failed: {}", err); netman.shutdown().await; - restart = true; break; } diff --git a/veilid-core/src/crypto/byte_array_types.rs b/veilid-core/src/crypto/byte_array_types.rs index 5590e467..9d60d4f4 100644 --- a/veilid-core/src/crypto/byte_array_types.rs +++ b/veilid-core/src/crypto/byte_array_types.rs @@ -77,18 +77,7 @@ where macro_rules! byte_array_type { ($name:ident, $size:expr, $encoded_size:expr) => { - #[derive( - Clone, - Copy, - Hash, - Eq, - PartialEq, - PartialOrd, - Ord, - RkyvArchive, - RkyvSerialize, - RkyvDeserialize, - )] + #[derive(Clone, Copy, Hash, RkyvArchive, RkyvSerialize, RkyvDeserialize)] #[archive_attr(repr(C), derive(CheckBytes, Hash, Eq, PartialEq, PartialOrd, Ord))] pub struct $name { pub bytes: [u8; $size], @@ -125,6 +114,32 @@ macro_rules! byte_array_type { } } + impl PartialOrd for $name { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } + } + + impl Ord for $name { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + for n in 0..$size { + let c = self.bytes[n].cmp(&other.bytes[n]); + if c != core::cmp::Ordering::Equal { + return c; + } + } + core::cmp::Ordering::Equal + } + } + + impl PartialEq for $name { + fn eq(&self, other: &Self) -> bool { + self.bytes == other.bytes + } + } + + impl Eq for $name {} + impl $name { pub fn new(bytes: [u8; $size]) -> Self { Self { bytes } diff --git a/veilid-core/src/crypto/envelope.rs b/veilid-core/src/crypto/envelope.rs index c123ea52..21c940f4 100644 --- a/veilid-core/src/crypto/envelope.rs +++ b/veilid-core/src/crypto/envelope.rs @@ -66,7 +66,11 @@ impl Envelope { } } - pub fn from_signed_data(crypto: Crypto, data: &[u8]) -> VeilidAPIResult { + pub fn from_signed_data( + crypto: Crypto, + data: &[u8], + network_key: &Option, + ) -> VeilidAPIResult { // Ensure we are at least the length of the envelope // Silent drop here, as we use zero length packets as part of the protocol for hole punching if data.len() < MIN_ENVELOPE_SIZE { @@ -135,9 +139,22 @@ impl Envelope { let recipient_id_slice: [u8; PUBLIC_KEY_LENGTH] = data[0x4A..0x6A] .try_into() .map_err(VeilidAPIError::internal)?; - let nonce: Nonce = Nonce::new(nonce_slice); - let sender_id = PublicKey::new(sender_id_slice); - let recipient_id = PublicKey::new(recipient_id_slice); + let mut nonce: Nonce = Nonce::new(nonce_slice); + let mut sender_id = PublicKey::new(sender_id_slice); + let mut recipient_id = PublicKey::new(recipient_id_slice); + + // Apply network key (not the best, but it will keep networks from colliding without much overhead) + if let Some(nk) = network_key.as_ref() { + for n in 0..NONCE_LENGTH { + nonce.bytes[n] ^= nk.bytes[n]; + } + for n in 0..CRYPTO_KEY_LENGTH { + sender_id.bytes[n] ^= nk.bytes[n]; + } + for n in 0..CRYPTO_KEY_LENGTH { + recipient_id.bytes[n] ^= nk.bytes[n]; + } + } // Ensure sender_id and recipient_id are not the same if sender_id == recipient_id { @@ -175,13 +192,20 @@ impl Envelope { crypto: Crypto, data: &[u8], node_id_secret: &SecretKey, + network_key: &Option, ) -> VeilidAPIResult> { // Get DH secret let vcrypto = crypto .get(self.crypto_kind) .expect("need to ensure only valid crypto kinds here"); - let dh_secret = vcrypto.cached_dh(&self.sender_id, node_id_secret)?; + let mut dh_secret = vcrypto.cached_dh(&self.sender_id, node_id_secret)?; + // Apply network key + if let Some(nk) = network_key.as_ref() { + for n in 0..CRYPTO_KEY_LENGTH { + dh_secret.bytes[n] ^= nk.bytes[n]; + } + } // Decrypt message without authentication let body = vcrypto.crypt_no_auth_aligned_8( &data[0x6A..data.len() - 64], @@ -197,6 +221,7 @@ impl Envelope { crypto: Crypto, body: &[u8], node_id_secret: &SecretKey, + network_key: &Option, ) -> VeilidAPIResult> { // Ensure body isn't too long let envelope_size: usize = body.len() + MIN_ENVELOPE_SIZE; @@ -207,7 +232,7 @@ impl Envelope { let vcrypto = crypto .get(self.crypto_kind) .expect("need to ensure only valid crypto kinds here"); - let dh_secret = vcrypto.cached_dh(&self.recipient_id, node_id_secret)?; + let mut dh_secret = vcrypto.cached_dh(&self.recipient_id, node_id_secret)?; // Write envelope body let mut data = vec![0u8; envelope_size]; @@ -229,6 +254,22 @@ impl Envelope { // Write recipient node id data[0x4A..0x6A].copy_from_slice(&self.recipient_id.bytes); + // Apply network key (not the best, but it will keep networks from colliding without much overhead) + if let Some(nk) = network_key.as_ref() { + for n in 0..SECRET_KEY_LENGTH { + dh_secret.bytes[n] ^= nk.bytes[n]; + } + for n in 0..NONCE_LENGTH { + data[0x12 + n] ^= nk.bytes[n]; + } + for n in 0..CRYPTO_KEY_LENGTH { + data[0x2A + n] ^= nk.bytes[n]; + } + for n in 0..CRYPTO_KEY_LENGTH { + data[0x4A + n] ^= nk.bytes[n]; + } + } + // Encrypt and authenticate message let encrypted_body = vcrypto.crypt_no_auth_unaligned(body, &self.nonce.bytes, &dh_secret); diff --git a/veilid-core/src/crypto/tests/test_envelope_receipt.rs b/veilid-core/src/crypto/tests/test_envelope_receipt.rs index 1e2698fc..1b8a41f6 100644 --- a/veilid-core/src/crypto/tests/test_envelope_receipt.rs +++ b/veilid-core/src/crypto/tests/test_envelope_receipt.rs @@ -3,8 +3,16 @@ use super::*; pub async fn test_envelope_round_trip( envelope_version: EnvelopeVersion, vcrypto: CryptoSystemVersion, + network_key: Option, ) { - info!("--- test envelope round trip ---"); + if network_key.is_some() { + info!( + "--- test envelope round trip {} w/network key ---", + vcrypto.kind() + ); + } else { + info!("--- test envelope round trip {} ---", vcrypto.kind()); + } // Create envelope let ts = Timestamp::from(0x12345678ABCDEF69u64); @@ -25,15 +33,15 @@ pub async fn test_envelope_round_trip( // Serialize to bytes let enc_data = envelope - .to_encrypted_data(vcrypto.crypto(), body, &sender_secret) + .to_encrypted_data(vcrypto.crypto(), body, &sender_secret, &network_key) .expect("failed to encrypt data"); // Deserialize from bytes - let envelope2 = Envelope::from_signed_data(vcrypto.crypto(), &enc_data) + let envelope2 = Envelope::from_signed_data(vcrypto.crypto(), &enc_data, &network_key) .expect("failed to deserialize envelope from data"); let body2 = envelope2 - .decrypt_body(vcrypto.crypto(), &enc_data, &recipient_secret) + .decrypt_body(vcrypto.crypto(), &enc_data, &recipient_secret, &network_key) .expect("failed to decrypt envelope body"); // Compare envelope and body @@ -45,13 +53,13 @@ pub async fn test_envelope_round_trip( let mut mod_enc_data = enc_data.clone(); mod_enc_data[enc_data_len - 1] ^= 0x80u8; assert!( - Envelope::from_signed_data(vcrypto.crypto(), &mod_enc_data).is_err(), + Envelope::from_signed_data(vcrypto.crypto(), &mod_enc_data, &network_key).is_err(), "should have failed to decode envelope with modified signature" ); let mut mod_enc_data2 = enc_data.clone(); mod_enc_data2[enc_data_len - 65] ^= 0x80u8; assert!( - Envelope::from_signed_data(vcrypto.crypto(), &mod_enc_data2).is_err(), + Envelope::from_signed_data(vcrypto.crypto(), &mod_enc_data2, &network_key).is_err(), "should have failed to decode envelope with modified data" ); } @@ -97,7 +105,9 @@ pub async fn test_all() { for v in VALID_CRYPTO_KINDS { let vcrypto = crypto.get(v).unwrap(); - test_envelope_round_trip(ev, vcrypto.clone()).await; + test_envelope_round_trip(ev, vcrypto.clone(), None).await; + test_envelope_round_trip(ev, vcrypto.clone(), Some(vcrypto.random_shared_secret())) + .await; test_receipt_round_trip(ev, vcrypto).await; } } diff --git a/veilid-core/src/crypto/vld0/mod.rs b/veilid-core/src/crypto/vld0/mod.rs index 3d77c9c2..680e7468 100644 --- a/veilid-core/src/crypto/vld0/mod.rs +++ b/veilid-core/src/crypto/vld0/mod.rs @@ -176,10 +176,10 @@ impl CryptoSystem for CryptoSystemVLD0 { } // Distance Metric fn distance(&self, key1: &PublicKey, key2: &PublicKey) -> CryptoKeyDistance { - let mut bytes = [0u8; PUBLIC_KEY_LENGTH]; + let mut bytes = [0u8; CRYPTO_KEY_LENGTH]; - for (n, byte) in bytes.iter_mut().enumerate() { - *byte = key1.bytes[n] ^ key2.bytes[n]; + for n in 0..CRYPTO_KEY_LENGTH { + bytes[n] = key1.bytes[n] ^ key2.bytes[n]; } CryptoKeyDistance::new(bytes) diff --git a/veilid-core/src/lib.rs b/veilid-core/src/lib.rs index f49417d9..ef3bdeb9 100644 --- a/veilid-core/src/lib.rs +++ b/veilid-core/src/lib.rs @@ -97,7 +97,6 @@ use tracing::*; use veilid_tools::*; type RkyvDefaultValidator<'t> = rkyv::validation::validators::DefaultValidator<'t>; use futures_util::stream::FuturesUnordered; -use owo_colors::OwoColorize; use schemars::{schema_for, JsonSchema}; use serde::*; use stop_token::*; diff --git a/veilid-core/src/network_manager/connection_handle.rs b/veilid-core/src/network_manager/connection_handle.rs index 3dc0adb5..dbf53117 100644 --- a/veilid-core/src/network_manager/connection_handle.rs +++ b/veilid-core/src/network_manager/connection_handle.rs @@ -34,14 +34,14 @@ impl ConnectionHandle { self.descriptor.clone() } - #[instrument(level="trace", skip(self, message), fields(message.len = message.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", skip(self, message), fields(message.len = message.len())))] pub fn send(&self, message: Vec) -> ConnectionHandleSendResult { match self.channel.send((Span::current().id(), message)) { Ok(()) => ConnectionHandleSendResult::Sent, Err(e) => ConnectionHandleSendResult::NotSent(e.0 .1), } } - #[instrument(level="trace", skip(self, message), fields(message.len = message.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", skip(self, message), fields(message.len = message.len())))] pub async fn send_async(&self, message: Vec) -> ConnectionHandleSendResult { match self .channel diff --git a/veilid-core/src/network_manager/connection_manager.rs b/veilid-core/src/network_manager/connection_manager.rs index 342925c9..1d1445d5 100644 --- a/veilid-core/src/network_manager/connection_manager.rs +++ b/veilid-core/src/network_manager/connection_manager.rs @@ -256,8 +256,8 @@ impl ConnectionManager { log_net!( "== get_or_create_connection local_addr={:?} dial_info={:?}", - local_addr.green(), - dial_info.green() + local_addr, + dial_info ); // Kill off any possibly conflicting connections @@ -273,8 +273,8 @@ impl ConnectionManager { { log_net!( "== Returning existing connection local_addr={:?} peer_address={:?}", - local_addr.green(), - peer_address.green() + local_addr, + peer_address ); return Ok(NetworkResult::Value(conn)); @@ -300,8 +300,8 @@ impl ConnectionManager { { log_net!( "== Returning existing connection in race local_addr={:?} peer_address={:?}", - local_addr.green(), - peer_address.green() + local_addr, + peer_address ); return Ok(NetworkResult::Value(conn)); diff --git a/veilid-core/src/network_manager/direct_boot.rs b/veilid-core/src/network_manager/direct_boot.rs new file mode 100644 index 00000000..d739c54d --- /dev/null +++ b/veilid-core/src/network_manager/direct_boot.rs @@ -0,0 +1,59 @@ +use super::*; + +impl NetworkManager { + // Direct bootstrap request handler (separate fallback mechanism from cheaper TXT bootstrap mechanism) + #[instrument(level = "trace", skip(self), ret, err)] + pub(crate) async fn handle_boot_request( + &self, + descriptor: ConnectionDescriptor, + ) -> EyreResult> { + let routing_table = self.routing_table(); + + // Get a bunch of nodes with the various + let bootstrap_nodes = routing_table.find_bootstrap_nodes_filtered(2); + + // Serialize out peer info + let bootstrap_peerinfo: Vec = bootstrap_nodes + .iter() + .filter_map(|nr| nr.make_peer_info(RoutingDomain::PublicInternet)) + .collect(); + let json_bytes = serialize_json(bootstrap_peerinfo).as_bytes().to_vec(); + + // Reply with a chunk of signed routing table + match self + .net() + .send_data_to_existing_connection(descriptor, json_bytes) + .await? + { + None => { + // Bootstrap reply was sent + Ok(NetworkResult::value(())) + } + Some(_) => Ok(NetworkResult::no_connection_other( + "bootstrap reply could not be sent", + )), + } + } + + // Direct bootstrap request + #[instrument(level = "trace", err, skip(self))] + pub async fn boot_request(&self, dial_info: DialInfo) -> EyreResult> { + let timeout_ms = self.with_config(|c| c.network.rpc.timeout_ms); + // Send boot magic to requested peer address + let data = BOOT_MAGIC.to_vec(); + + let out_data: Vec = network_result_value_or_log!(self + .net() + .send_recv_data_unbound_to_dial_info(dial_info, data, timeout_ms) + .await? => [ format!(": dial_info={}, data.len={}", dial_info, data.len()) ] + { + return Ok(Vec::new()); + }); + + let bootstrap_peerinfo: Vec = + deserialize_json(std::str::from_utf8(&out_data).wrap_err("bad utf8 in boot peerinfo")?) + .wrap_err("failed to deserialize boot peerinfo")?; + + Ok(bootstrap_peerinfo) + } +} diff --git a/veilid-core/src/network_manager/mod.rs b/veilid-core/src/network_manager/mod.rs index 68623a9e..02187f24 100644 --- a/veilid-core/src/network_manager/mod.rs +++ b/veilid-core/src/network_manager/mod.rs @@ -5,6 +5,8 @@ mod native; #[cfg(target_arch = "wasm32")] mod wasm; +mod direct_boot; +mod send_data; mod connection_handle; mod connection_limits; mod connection_manager; @@ -12,6 +14,7 @@ mod connection_table; mod network_connection; mod tasks; mod types; +mod stats; pub mod tests; @@ -20,6 +23,9 @@ pub mod tests; pub use connection_manager::*; pub use network_connection::*; pub use types::*; +pub use send_data::*; +pub use direct_boot::*; +pub use stats::*; //////////////////////////////////////////////////////////////////////////////////////// use connection_handle::*; @@ -42,6 +48,7 @@ use wasm::*; pub const MAX_MESSAGE_SIZE: usize = MAX_ENVELOPE_SIZE; pub const IPADDR_TABLE_SIZE: usize = 1024; pub const IPADDR_MAX_INACTIVE_DURATION_US: TimestampDuration = TimestampDuration::new(300_000_000u64); // 5 minutes +pub const NODE_CONTACT_METHOD_CACHE_SIZE: usize = 1024; pub const PUBLIC_ADDRESS_CHANGE_DETECTION_COUNT: usize = 3; pub const PUBLIC_ADDRESS_CHECK_CACHE_SIZE: usize = 8; pub const PUBLIC_ADDRESS_CHECK_TASK_INTERVAL_SECS: u32 = 60; @@ -67,38 +74,6 @@ struct NetworkComponents { receipt_manager: ReceiptManager, } -// Statistics per address -#[derive(Clone, Default)] -pub struct PerAddressStats { - last_seen_ts: Timestamp, - transfer_stats_accounting: TransferStatsAccounting, - transfer_stats: TransferStatsDownUp, -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] -pub struct PerAddressStatsKey(IpAddr); - -impl Default for PerAddressStatsKey { - fn default() -> Self { - Self(IpAddr::V4(Ipv4Addr::UNSPECIFIED)) - } -} - -// Statistics about the low-level network -#[derive(Clone)] -pub struct NetworkManagerStats { - self_stats: PerAddressStats, - per_address_stats: LruCache, -} - -impl Default for NetworkManagerStats { - fn default() -> Self { - Self { - self_stats: PerAddressStats::default(), - per_address_stats: LruCache::new(IPADDR_TABLE_SIZE), - } - } -} #[derive(Debug)] struct ClientWhitelistEntry { @@ -130,6 +105,13 @@ pub(crate) enum NodeContactMethod { /// Must use outbound relay to reach the node OutboundRelay(NodeRef), } +#[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)] +struct NodeContactMethodCacheKey { + own_node_info_ts: Option, + target_node_info_ts: Timestamp, + target_node_ref_filter: Option, + target_node_ref_sequencing: Sequencing, +} #[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)] struct PublicAddressCheckCacheKey(ProtocolType, AddressType); @@ -138,6 +120,7 @@ struct PublicAddressCheckCacheKey(ProtocolType, AddressType); struct NetworkManagerInner { stats: NetworkManagerStats, client_whitelist: LruCache, + node_contact_method_cache: LruCache, public_address_check_cache: BTreeMap>, public_address_inconsistencies_table: @@ -160,6 +143,8 @@ struct NetworkManagerUnlockedInner { // Background processes rolling_transfers_task: TickTask, public_address_check_task: TickTask, + // Network Key + network_key: Option, } #[derive(Clone)] @@ -173,6 +158,7 @@ impl NetworkManager { NetworkManagerInner { stats: NetworkManagerStats::default(), client_whitelist: LruCache::new_unbounded(), + node_contact_method_cache: LruCache::new(NODE_CONTACT_METHOD_CACHE_SIZE), public_address_check_cache: BTreeMap::new(), public_address_inconsistencies_table: BTreeMap::new(), } @@ -185,6 +171,7 @@ impl NetworkManager { #[cfg(feature="unstable-blockstore")] block_store: BlockStore, crypto: Crypto, + network_key: Option, ) -> NetworkManagerUnlockedInner { NetworkManagerUnlockedInner { config, @@ -199,6 +186,7 @@ impl NetworkManager { update_callback: RwLock::new(None), rolling_transfers_task: TickTask::new(ROLLING_TRANSFERS_INTERVAL_SECS), public_address_check_task: TickTask::new(PUBLIC_ADDRESS_CHECK_TASK_INTERVAL_SECS), + network_key, } } @@ -211,6 +199,37 @@ impl NetworkManager { block_store: BlockStore, crypto: Crypto, ) -> Self { + + // Make the network key + let network_key = { + let c = config.get(); + let network_key_password = if let Some(nkp) = c.network.network_key_password.clone() { + Some(nkp) + } else { + if c.network.routing_table.bootstrap.contains(&"bootstrap.veilid.net".to_owned()) { + None + } else { + Some(c.network.routing_table.bootstrap.join(",")) + } + }; + + let network_key = if let Some(network_key_password) = network_key_password { + if !network_key_password.is_empty() { + info!("Using network key"); + + let bcs = crypto.best(); + // Yes the use of the salt this way is generally bad, but this just needs to be hashed + Some(bcs.derive_shared_secret(network_key_password.as_bytes(), network_key_password.as_bytes()).expect("failed to derive network key")) + } else { + None + } + } else { + None + }; + + network_key + }; + let this = Self { inner: Arc::new(Mutex::new(Self::new_inner())), unlocked_inner: Arc::new(Self::new_unlocked_inner( @@ -221,6 +240,7 @@ impl NetworkManager { #[cfg(feature="unstable-blockstore")] block_store, crypto, + network_key, )), }; @@ -756,7 +776,7 @@ impl NetworkManager { } /// Builds an envelope for sending over the network - #[instrument(level = "trace", skip(self, body), err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, body), err))] fn build_envelope>( &self, dest_node_id: TypedKey, @@ -779,7 +799,7 @@ impl NetworkManager { // Encode envelope let envelope = Envelope::new(version, node_id.kind, ts, nonce, node_id.value, dest_node_id.value); envelope - .to_encrypted_data(self.crypto(), body.as_ref(), &node_id_secret) + .to_encrypted_data(self.crypto(), body.as_ref(), &node_id_secret, &self.unlocked_inner.network_key) .wrap_err("envelope failed to encode") } @@ -787,7 +807,7 @@ impl NetworkManager { /// node_ref is the direct destination to which the envelope will be sent /// If 'destination_node_ref' is specified, it can be different than the node_ref being sent to /// which will cause the envelope to be relayed - #[instrument(level = "trace", skip(self, body), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, body), ret, err))] pub async fn send_envelope>( &self, node_ref: NodeRef, @@ -798,7 +818,7 @@ impl NetworkManager { let destination_node_ref = destination_node_ref.as_ref().unwrap_or(&node_ref).clone(); if !node_ref.same_entry(&destination_node_ref) { - log_net!( + log_net!( "sending envelope to {:?} via {:?}", destination_node_ref, node_ref @@ -826,7 +846,7 @@ impl NetworkManager { } /// Called by the RPC handler when we want to issue an direct receipt - #[instrument(level = "trace", skip(self, rcpt_data), err)] + #[instrument(level = "debug", skip(self, rcpt_data), err)] pub async fn send_out_of_band_receipt( &self, dial_info: DialInfo, @@ -839,473 +859,26 @@ impl NetworkManager { // should not be subject to our ability to decode it // Send receipt directly - log_net!(debug "send_out_of_band_receipt: dial_info={}", dial_info); network_result_value_or_log!(self .net() .send_data_unbound_to_dial_info(dial_info, rcpt_data) - .await? => { + .await? => [ format!(": dial_info={}, rcpt_data.len={}", dial_info, rcpt_data.len()) ] { return Ok(()); } ); Ok(()) } - /// Send a reverse connection signal and wait for the return receipt over it - /// Then send the data across the new connection - /// Only usable for PublicInternet routing domain - #[instrument(level = "trace", skip(self, data), err)] - pub async fn do_reverse_connect( - &self, - relay_nr: NodeRef, - target_nr: NodeRef, - data: Vec, - ) -> EyreResult> { - // Build a return receipt for the signal - let receipt_timeout = ms_to_us( - self.unlocked_inner - .config - .get() - .network - .reverse_connection_receipt_time_ms, - ); - let (receipt, eventual_value) = self.generate_single_shot_receipt(receipt_timeout, [])?; - - // Get target routing domain - let Some(routing_domain) = target_nr.best_routing_domain() else { - return Ok(NetworkResult::no_connection_other("No routing domain for target")); - }; - - // Get our peer info - let Some(peer_info) = self - .routing_table() - .get_own_peer_info(routing_domain) else { - return Ok(NetworkResult::no_connection_other("Own peer info not available")); - }; - - // Issue the signal - let rpc = self.rpc_processor(); - network_result_try!(rpc - .rpc_call_signal( - Destination::relay(relay_nr, target_nr.clone()), - SignalInfo::ReverseConnect { receipt, peer_info }, - ) - .await - .wrap_err("failed to send signal")?); - - // Wait for the return receipt - let inbound_nr = match eventual_value.await.take_value().unwrap() { - ReceiptEvent::ReturnedPrivate { private_route: _ } - | ReceiptEvent::ReturnedOutOfBand - | ReceiptEvent::ReturnedSafety => { - return Ok(NetworkResult::invalid_message( - "reverse connect receipt should be returned in-band", - )); - } - ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef, - ReceiptEvent::Expired => { - return Ok(NetworkResult::timeout()); - } - ReceiptEvent::Cancelled => { - return Ok(NetworkResult::no_connection_other(format!("reverse connect receipt cancelled from {}", target_nr))) - } - }; - - // We expect the inbound noderef to be the same as the target noderef - // if they aren't the same, we should error on this and figure out what then hell is up - if !target_nr.same_entry(&inbound_nr) { - bail!("unexpected noderef mismatch on reverse connect"); - } - - // And now use the existing connection to send over - if let Some(descriptor) = inbound_nr.last_connection() { - match self - .net() - .send_data_to_existing_connection(descriptor, data) - .await? - { - None => Ok(NetworkResult::value(descriptor)), - Some(_) => Ok(NetworkResult::no_connection_other( - "unable to send over reverse connection", - )), - } - } else { - bail!("no reverse connection available") - } - } - - /// Send a hole punch signal and do a negotiating ping and wait for the return receipt - /// Then send the data across the new connection - /// Only usable for PublicInternet routing domain - #[instrument(level = "trace", skip(self, data), err)] - pub async fn do_hole_punch( - &self, - relay_nr: NodeRef, - target_nr: NodeRef, - data: Vec, - ) -> EyreResult> { - // Ensure we are filtered down to UDP (the only hole punch protocol supported today) - assert!(target_nr - .filter_ref() - .map(|nrf| nrf.dial_info_filter.protocol_type_set - == ProtocolTypeSet::only(ProtocolType::UDP)) - .unwrap_or_default()); - - // Build a return receipt for the signal - let receipt_timeout = ms_to_us( - self.unlocked_inner - .config - .get() - .network - .hole_punch_receipt_time_ms, - ); - let (receipt, eventual_value) = self.generate_single_shot_receipt(receipt_timeout, [])?; - - // Get target routing domain - let Some(routing_domain) = target_nr.best_routing_domain() else { - return Ok(NetworkResult::no_connection_other("No routing domain for target")); - }; - - // Get our peer info - let Some(peer_info) = self - .routing_table() - .get_own_peer_info(routing_domain) else { - return Ok(NetworkResult::no_connection_other("Own peer info not available")); - }; - - // Get the udp direct dialinfo for the hole punch - let hole_punch_did = target_nr - .first_filtered_dial_info_detail() - .ok_or_else(|| eyre!("No hole punch capable dialinfo found for node"))?; - - // Do our half of the hole punch by sending an empty packet - // Both sides will do this and then the receipt will get sent over the punched hole - // Don't bother storing the returned connection descriptor as the 'last connection' because the other side of the hole - // punch should come through and create a real 'last connection' for us if this succeeds - network_result_try!( - self.net() - .send_data_to_dial_info(hole_punch_did.dial_info, Vec::new()) - .await? - ); - - // Issue the signal - let rpc = self.rpc_processor(); - network_result_try!(rpc - .rpc_call_signal( - Destination::relay(relay_nr, target_nr.clone()), - SignalInfo::HolePunch { receipt, peer_info }, - ) - .await - .wrap_err("failed to send signal")?); - - // Wait for the return receipt - let inbound_nr = match eventual_value.await.take_value().unwrap() { - ReceiptEvent::ReturnedPrivate { private_route: _ } - | ReceiptEvent::ReturnedOutOfBand - | ReceiptEvent::ReturnedSafety => { - return Ok(NetworkResult::invalid_message( - "hole punch receipt should be returned in-band", - )); - } - ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef, - ReceiptEvent::Expired => { - return Ok(NetworkResult::timeout()); - } - ReceiptEvent::Cancelled => { - return Ok(NetworkResult::no_connection_other(format!("hole punch receipt cancelled from {}", target_nr))) - } - }; - - // We expect the inbound noderef to be the same as the target noderef - // if they aren't the same, we should error on this and figure out what then hell is up - if !target_nr.same_entry(&inbound_nr) { - bail!( - "unexpected noderef mismatch on hole punch {}, expected {}", - inbound_nr, - target_nr - ); - } - - // And now use the existing connection to send over - if let Some(descriptor) = inbound_nr.last_connection() { - match self - .net() - .send_data_to_existing_connection(descriptor, data) - .await? - { - None => Ok(NetworkResult::value(descriptor)), - Some(_) => Ok(NetworkResult::no_connection_other( - "unable to send over hole punch", - )), - } - } else { - bail!("no hole punch available") - } - } - - /// Figure out how to reach a node from our own node over the best routing domain and reference the nodes we want to access - /// Uses NodeRefs to ensure nodes are referenced, this is not a part of 'RoutingTable' because RoutingTable is not - /// allowed to use NodeRefs due to recursive locking - #[instrument(level = "trace", skip(self), ret)] - pub(crate) fn get_node_contact_method( - &self, - target_node_ref: NodeRef, - ) -> EyreResult { - let routing_table = self.routing_table(); - - // Figure out the best routing domain to get the contact method over - let routing_domain = match target_node_ref.best_routing_domain() { - Some(rd) => rd, - None => { - log_net!("no routing domain for node {:?}", target_node_ref); - return Ok(NodeContactMethod::Unreachable); - } - }; - - // Node A is our own node - // Use whatever node info we've calculated so far - let peer_a = routing_table.get_best_effort_own_peer_info(routing_domain); - - // Node B is the target node - let peer_b = match target_node_ref.make_peer_info(routing_domain) { - Some(ni) => ni, - None => { - log_net!("no node info for node {:?}", target_node_ref); - return Ok(NodeContactMethod::Unreachable); - } - }; - - // Dial info filter comes from the target node ref - let dial_info_filter = target_node_ref.dial_info_filter(); - let mut sequencing = target_node_ref.sequencing(); - - // If the node has had lost questions or failures to send, prefer sequencing - // to improve reliability. The node may be experiencing UDP fragmentation drops - // or other firewalling issues and may perform better with TCP. - let unreliable = target_node_ref.peer_stats().rpc_stats.failed_to_send > 0 || target_node_ref.peer_stats().rpc_stats.recent_lost_answers > 0; - if unreliable && sequencing < Sequencing::PreferOrdered { - sequencing = Sequencing::PreferOrdered; - } - - // Get the best contact method with these parameters from the routing domain - let cm = routing_table.get_contact_method( - routing_domain, - &peer_a, - &peer_b, - dial_info_filter, - sequencing, - ); - - // Translate the raw contact method to a referenced contact method - Ok(match cm { - ContactMethod::Unreachable => NodeContactMethod::Unreachable, - ContactMethod::Existing => NodeContactMethod::Existing, - ContactMethod::Direct(di) => NodeContactMethod::Direct(di), - ContactMethod::SignalReverse(relay_key, target_key) => { - let relay_nr = routing_table - .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)? - .ok_or_else(|| eyre!("couldn't look up relay"))?; - if !target_node_ref.node_ids().contains(&target_key) { - bail!("signalreverse target noderef didn't match target key: {:?} != {} for relay {}", target_node_ref, target_key, relay_key ); - } - NodeContactMethod::SignalReverse(relay_nr, target_node_ref) - } - ContactMethod::SignalHolePunch(relay_key, target_key) => { - let relay_nr = routing_table - .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)? - .ok_or_else(|| eyre!("couldn't look up relay"))?; - if !target_node_ref.node_ids().contains(&target_key) { - bail!("signalholepunch target noderef didn't match target key: {:?} != {} for relay {}", target_node_ref, target_key, relay_key ); - } - // if any other protocol were possible here we could update this and do_hole_punch - // but tcp hole punch is very very unreliable it seems - let udp_target_node_ref = target_node_ref.filtered_clone(NodeRefFilter::new().with_protocol_type(ProtocolType::UDP)); - - NodeContactMethod::SignalHolePunch(relay_nr, udp_target_node_ref) - } - ContactMethod::InboundRelay(relay_key) => { - let relay_nr = routing_table - .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)? - .ok_or_else(|| eyre!("couldn't look up relay"))?; - NodeContactMethod::InboundRelay(relay_nr) - } - ContactMethod::OutboundRelay(relay_key) => { - let relay_nr = routing_table - .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)? - .ok_or_else(|| eyre!("couldn't look up relay"))?; - NodeContactMethod::OutboundRelay(relay_nr) - } - }) - } - - /// Send raw data to a node - /// - /// We may not have dial info for a node, but have an existing connection for it - /// because an inbound connection happened first, and no FindNodeQ has happened to that - /// node yet to discover its dial info. The existing connection should be tried first - /// in this case. - /// - /// Sending to a node requires determining a NetworkClass compatible mechanism - pub fn send_data( - &self, - node_ref: NodeRef, - data: Vec, - ) -> SendPinBoxFuture>> { - let this = self.clone(); - Box::pin( - async move { - // info!("{}", format!("send_data to: {:?}", node_ref).red()); - - // First try to send data to the last socket we've seen this peer on - let data = if let Some(connection_descriptor) = node_ref.last_connection() { - // info!( - // "{}", - // format!("last_connection to: {:?}", connection_descriptor).red() - // ); - - match this - .net() - .send_data_to_existing_connection(connection_descriptor, data) - .await? - { - None => { - // info!( - // "{}", - // format!("sent to existing connection: {:?}", connection_descriptor) - // .red() - // ); - - // Update timestamp for this last connection since we just sent to it - node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp()); - - return Ok(NetworkResult::value(SendDataKind::Existing( - connection_descriptor, - ))); - } - Some(d) => d, - } - } else { - data - }; - - // info!("{}", "no existing connection".red()); - - // If we don't have last_connection, try to reach out to the peer via its dial info - let contact_method = this.get_node_contact_method(node_ref.clone())?; - log_net!( - "send_data via {:?} to dialinfo {:?}", - contact_method, - node_ref - ); - match contact_method { - NodeContactMethod::OutboundRelay(relay_nr) - | NodeContactMethod::InboundRelay(relay_nr) => { - network_result_try!(this.send_data(relay_nr, data).await?); - Ok(NetworkResult::value(SendDataKind::Indirect)) - } - NodeContactMethod::Direct(dial_info) => { - let connection_descriptor = network_result_try!( - this.net().send_data_to_dial_info(dial_info, data).await? - ); - // If we connected to this node directly, save off the last connection so we can use it again - node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp()); - - Ok(NetworkResult::value(SendDataKind::Direct( - connection_descriptor, - ))) - } - NodeContactMethod::SignalReverse(relay_nr, target_node_ref) => { - let connection_descriptor = network_result_try!( - this.do_reverse_connect(relay_nr, target_node_ref, data) - .await? - ); - Ok(NetworkResult::value(SendDataKind::Direct( - connection_descriptor, - ))) - } - NodeContactMethod::SignalHolePunch(relay_nr, target_node_ref) => { - let connection_descriptor = network_result_try!( - this.do_hole_punch(relay_nr, target_node_ref, data).await? - ); - Ok(NetworkResult::value(SendDataKind::Direct( - connection_descriptor, - ))) - } - NodeContactMethod::Existing => Ok(NetworkResult::no_connection_other( - "should have found an existing connection", - )), - NodeContactMethod::Unreachable => Ok(NetworkResult::no_connection_other( - "Can't send to this node", - )), - } - } - .instrument(trace_span!("send_data")), - ) - } - - // Direct bootstrap request handler (separate fallback mechanism from cheaper TXT bootstrap mechanism) - #[instrument(level = "trace", skip(self), ret, err)] - async fn handle_boot_request( - &self, - descriptor: ConnectionDescriptor, - ) -> EyreResult> { - let routing_table = self.routing_table(); - - // Get a bunch of nodes with the various - let bootstrap_nodes = routing_table.find_bootstrap_nodes_filtered(2); - - // Serialize out peer info - let bootstrap_peerinfo: Vec = bootstrap_nodes - .iter() - .filter_map(|nr| nr.make_peer_info(RoutingDomain::PublicInternet)) - .collect(); - let json_bytes = serialize_json(bootstrap_peerinfo).as_bytes().to_vec(); - - // Reply with a chunk of signed routing table - match self - .net() - .send_data_to_existing_connection(descriptor, json_bytes) - .await? - { - None => { - // Bootstrap reply was sent - Ok(NetworkResult::value(())) - } - Some(_) => Ok(NetworkResult::no_connection_other( - "bootstrap reply could not be sent", - )), - } - } - - // Direct bootstrap request - #[instrument(level = "trace", err, skip(self))] - pub async fn boot_request(&self, dial_info: DialInfo) -> EyreResult> { - let timeout_ms = self.with_config(|c| c.network.rpc.timeout_ms); - // Send boot magic to requested peer address - let data = BOOT_MAGIC.to_vec(); - let out_data: Vec = network_result_value_or_log!(self - .net() - .send_recv_data_unbound_to_dial_info(dial_info, data, timeout_ms) - .await? => - { - return Ok(Vec::new()); - }); - - let bootstrap_peerinfo: Vec = - deserialize_json(std::str::from_utf8(&out_data).wrap_err("bad utf8 in boot peerinfo")?) - .wrap_err("failed to deserialize boot peerinfo")?; - - Ok(bootstrap_peerinfo) - } - // Called when a packet potentially containing an RPC envelope is received by a low-level // network protocol handler. Processes the envelope, authenticates and decrypts the RPC message // and passes it to the RPC handler - #[instrument(level = "trace", ret, err, skip(self, data), fields(data.len = data.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", ret, err, skip(self, data), fields(data.len = data.len())))] async fn on_recv_envelope( &self, - data: &[u8], + data: &mut [u8], connection_descriptor: ConnectionDescriptor, ) -> EyreResult { + #[cfg(feature="verbose-tracing")] let root = span!( parent: None, Level::TRACE, @@ -1313,6 +886,7 @@ impl NetworkManager { "data.len" = data.len(), "descriptor" = ?connection_descriptor ); + #[cfg(feature="verbose-tracing")] let _root_enter = root.enter(); log_net!( @@ -1354,18 +928,18 @@ impl NetworkManager { // Is this a direct bootstrap request instead of an envelope? if data[0..4] == *BOOT_MAGIC { - network_result_value_or_log!(self.handle_boot_request(connection_descriptor).await? => {}); + network_result_value_or_log!(self.handle_boot_request(connection_descriptor).await? => [ format!(": connection_descriptor={:?}", connection_descriptor) ] {}); return Ok(true); } // Is this an out-of-band receipt instead of an envelope? if data[0..3] == *RECEIPT_MAGIC { - network_result_value_or_log!(self.handle_out_of_band_receipt(data).await => {}); + network_result_value_or_log!(self.handle_out_of_band_receipt(data).await => [ format!(": data.len={}", data.len()) ] {}); return Ok(true); } // Decode envelope header (may fail signature validation) - let envelope = match Envelope::from_signed_data(self.crypto(), data) { + let envelope = match Envelope::from_signed_data(self.crypto(), data, &self.unlocked_inner.network_key) { Ok(v) => v, Err(e) => { log_net!(debug "envelope failed to decode: {}", e); @@ -1387,8 +961,9 @@ impl NetworkManager { if let Some(tsbehind) = tsbehind { if tsbehind.as_u64() != 0 && (ts > ets && ts.saturating_sub(ets) > tsbehind) { log_net!(debug - "envelope time was too far in the past: {}ms ", - timestamp_to_secs(ts.saturating_sub(ets).as_u64()) * 1000f64 + "Timestamp behind: {}ms ({})", + timestamp_to_secs(ts.saturating_sub(ets).as_u64()) * 1000f64, + connection_descriptor.remote() ); return Ok(false); } @@ -1396,8 +971,9 @@ impl NetworkManager { if let Some(tsahead) = tsahead { if tsahead.as_u64() != 0 && (ts < ets && ets.saturating_sub(ts) > tsahead) { log_net!(debug - "envelope time was too far in the future: {}ms", - timestamp_to_secs(ets.saturating_sub(ts).as_u64()) * 1000f64 + "Timestamp ahead: {}ms ({})", + timestamp_to_secs(ets.saturating_sub(ts).as_u64()) * 1000f64, + connection_descriptor.remote() ); return Ok(false); } @@ -1443,8 +1019,20 @@ impl NetworkManager { }; if let Some(relay_nr) = some_relay_nr { + // Force sequencing if this came in sequenced. + // The sender did the prefer/ensure calculation when it did get_contact_method, + // so we don't need to do it here. + let relay_nr = if connection_descriptor.remote().protocol_type().is_ordered() { + let mut relay_nr = relay_nr.clone(); + relay_nr.set_sequencing(Sequencing::EnsureOrdered); + relay_nr + } else { + relay_nr + }; + // Relay the packet to the desired destination log_net!("relaying {} bytes to {}", data.len(), relay_nr); + network_result_value_or_log!(match self.send_data(relay_nr, data.to_vec()) .await { Ok(v) => v, @@ -1452,7 +1040,7 @@ impl NetworkManager { log_net!(debug "failed to forward envelope: {}" ,e); return Ok(false); } - } => { + } => [ format!(": relay_nr={}, data.len={}", relay_nr, data.len()) ] { return Ok(false); } ); @@ -1466,7 +1054,7 @@ impl NetworkManager { // Decrypt the envelope body let body = match envelope - .decrypt_body(self.crypto(), data, &node_id_secret) { + .decrypt_body(self.crypto(), data, &node_id_secret, &self.unlocked_inner.network_key) { Ok(v) => v, Err(e) => { log_net!(debug "failed to decrypt envelope body: {}",e); @@ -1505,108 +1093,6 @@ impl NetworkManager { Ok(true) } - // Callbacks from low level network for statistics gathering - pub fn stats_packet_sent(&self, addr: IpAddr, bytes: ByteCount) { - let inner = &mut *self.inner.lock(); - inner - .stats - .self_stats - .transfer_stats_accounting - .add_up(bytes); - inner - .stats - .per_address_stats - .entry(PerAddressStatsKey(addr), |_k,_v| { - // do nothing on LRU evict - }) - .or_insert(PerAddressStats::default()) - .transfer_stats_accounting - .add_up(bytes); - } - - pub fn stats_packet_rcvd(&self, addr: IpAddr, bytes: ByteCount) { - let inner = &mut *self.inner.lock(); - inner - .stats - .self_stats - .transfer_stats_accounting - .add_down(bytes); - inner - .stats - .per_address_stats - .entry(PerAddressStatsKey(addr), |_k,_v| { - // do nothing on LRU evict - }) - .or_insert(PerAddressStats::default()) - .transfer_stats_accounting - .add_down(bytes); - } - - // Get stats - pub fn get_stats(&self) -> NetworkManagerStats { - let inner = self.inner.lock(); - inner.stats.clone() - } - - pub fn get_veilid_state(&self) -> VeilidStateNetwork { - let has_state = self - .unlocked_inner - .components - .read() - .as_ref() - .map(|c| c.net.is_started()) - .unwrap_or(false); - - if !has_state { - return VeilidStateNetwork { - started: false, - bps_down: 0.into(), - bps_up: 0.into(), - peers: Vec::new(), - - }; - } - let routing_table = self.routing_table(); - - let (bps_down, bps_up) = { - let inner = self.inner.lock(); - ( - inner.stats.self_stats.transfer_stats.down.average, - inner.stats.self_stats.transfer_stats.up.average, - ) - }; - - VeilidStateNetwork { - started: true, - bps_down, - bps_up, - peers: { - let mut out = Vec::new(); - for (k, v) in routing_table.get_recent_peers() { - if let Ok(Some(nr)) = routing_table.lookup_node_ref(k) { - let peer_stats = nr.peer_stats(); - let peer = PeerTableData { - node_ids: nr.node_ids().iter().copied().collect(), - peer_address: v.last_connection.remote().to_string(), - peer_stats, - }; - out.push(peer); - } - } - out - }, - } - } - - fn send_network_update(&self) { - let update_cb = self.unlocked_inner.update_callback.read().clone(); - if update_cb.is_none() { - return; - } - let state = self.get_veilid_state(); - (update_cb.unwrap())(VeilidUpdate::Network(state)); - } - // Determine if a local IP address has changed // this means we should restart the low level network and and recreate all of our dial info // Wait until we have received confirmation from N different peers @@ -1628,8 +1114,8 @@ impl NetworkManager { connection_descriptor: ConnectionDescriptor, // the connection descriptor used reporting_peer: NodeRef, // the peer's noderef reporting the socket address ) { - // debug code - //info!("report_global_socket_address\nsocket_address: {:#?}\nconnection_descriptor: {:#?}\nreporting_peer: {:#?}", socket_address, connection_descriptor, reporting_peer); + #[cfg(feature = "verbose-tracing")] + debug!("report_global_socket_address\nsocket_address: {:#?}\nconnection_descriptor: {:#?}\nreporting_peer: {:#?}", socket_address, connection_descriptor, reporting_peer); // Ignore these reports if we are currently detecting public dial info let net = self.net(); diff --git a/veilid-core/src/network_manager/native/mod.rs b/veilid-core/src/network_manager/native/mod.rs index 319a68c5..8a3a7023 100644 --- a/veilid-core/src/network_manager/native/mod.rs +++ b/veilid-core/src/network_manager/native/mod.rs @@ -358,7 +358,7 @@ impl Network { // This creates a short-lived connection in the case of connection-oriented protocols // for the purpose of sending this one message. // This bypasses the connection table as it is not a 'node to node' connection. - #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] pub async fn send_data_unbound_to_dial_info( &self, dial_info: DialInfo, @@ -416,7 +416,7 @@ impl Network { // This creates a short-lived connection in the case of connection-oriented protocols // for the purpose of sending this one message. // This bypasses the connection table as it is not a 'node to node' connection. - #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] pub async fn send_recv_data_unbound_to_dial_info( &self, dial_info: DialInfo, @@ -496,7 +496,7 @@ impl Network { } } - #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] pub async fn send_data_to_existing_connection( &self, descriptor: ConnectionDescriptor, @@ -515,7 +515,8 @@ impl Network { network_result_value_or_log!(ph.clone() .send_message(data.clone(), peer_socket_addr) .await - .wrap_err("sending data to existing conection")? => { return Ok(Some(data)); } ); + .wrap_err("sending data to existing conection")? => [ format!(": data.len={}, descriptor={:?}", data.len(), descriptor) ] + { return Ok(Some(data)); } ); // Network accounting self.network_manager() @@ -556,7 +557,7 @@ impl Network { // Send data directly to a dial info, possibly without knowing which node it is going to // Returns a descriptor for the connection used to send the data - #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] pub async fn send_data_to_dial_info( &self, dial_info: DialInfo, diff --git a/veilid-core/src/network_manager/native/network_class_discovery.rs b/veilid-core/src/network_manager/native/network_class_discovery.rs index 0d7de718..01dbde9d 100644 --- a/veilid-core/src/network_manager/native/network_class_discovery.rs +++ b/veilid-core/src/network_manager/native/network_class_discovery.rs @@ -92,7 +92,7 @@ impl DiscoveryContext { ); return None; } - } => { + } => [ format!(": node_ref={}", node_ref) ] { return None; } ); diff --git a/veilid-core/src/network_manager/native/network_udp.rs b/veilid-core/src/network_manager/native/network_udp.rs index 58203c9d..002ccf06 100644 --- a/veilid-core/src/network_manager/native/network_udp.rs +++ b/veilid-core/src/network_manager/native/network_udp.rs @@ -67,7 +67,6 @@ impl Network { { Ok(Ok((size, descriptor))) => { // XXX: Limit the number of packets from the same IP address? - log_net!("UDP packet: {:?}", descriptor); // Network accounting network_manager.stats_packet_rcvd( @@ -77,7 +76,7 @@ impl Network { // Pass it up for processing if let Err(e) = network_manager - .on_recv_envelope(&data[..size], descriptor) + .on_recv_envelope(&mut data[..size], descriptor) .await { log_net!(debug "failed to process received udp envelope: {}", e); diff --git a/veilid-core/src/network_manager/native/protocol/tcp.rs b/veilid-core/src/network_manager/native/protocol/tcp.rs index 847c8608..c82fc04d 100644 --- a/veilid-core/src/network_manager/native/protocol/tcp.rs +++ b/veilid-core/src/network_manager/native/protocol/tcp.rs @@ -56,10 +56,11 @@ impl RawTcpNetworkConnection { stream.flush().await.into_network_result() } - #[instrument(level="trace", err, skip(self, message), fields(network_result, message.len = message.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, message), fields(network_result, message.len = message.len())))] pub async fn send(&self, message: Vec) -> io::Result> { let mut stream = self.stream.clone(); let out = Self::send_internal(&mut stream, message).await?; + #[cfg(feature = "verbose-tracing")] tracing::Span::current().record("network_result", &tracing::field::display(&out)); Ok(out) } @@ -87,11 +88,15 @@ impl RawTcpNetworkConnection { Ok(NetworkResult::Value(out)) } - // #[instrument(level = "trace", err, skip(self), fields(network_result))] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", err, skip(self), fields(network_result)) + )] pub async fn recv(&self) -> io::Result>> { let mut stream = self.stream.clone(); let out = Self::recv_internal(&mut stream).await?; - //tracing::Span::current().record("network_result", &tracing::field::display(&out)); + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("network_result", &tracing::field::display(&out)); Ok(out) } } @@ -143,7 +148,7 @@ impl RawTcpProtocolHandler { ps, )); - log_net!(debug "TCP: on_accept_async from: {}", socket_addr); + log_net!(debug "Connection accepted from: {} (TCP)", socket_addr); Ok(Some(conn)) } diff --git a/veilid-core/src/network_manager/native/protocol/udp.rs b/veilid-core/src/network_manager/native/protocol/udp.rs index a8fe5ce8..a8d33b5a 100644 --- a/veilid-core/src/network_manager/native/protocol/udp.rs +++ b/veilid-core/src/network_manager/native/protocol/udp.rs @@ -4,22 +4,38 @@ use sockets::*; #[derive(Clone)] pub struct RawUdpProtocolHandler { socket: Arc, + assembly_buffer: AssemblyBuffer, } impl RawUdpProtocolHandler { pub fn new(socket: Arc) -> Self { - Self { socket } + Self { + socket, + assembly_buffer: AssemblyBuffer::new(), + } } - // #[instrument(level = "trace", err, skip(self, data), fields(data.len = data.len(), ret.len, ret.descriptor))] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self, data), fields(data.len = data.len(), ret.len, ret.descriptor)))] pub async fn recv_message(&self, data: &mut [u8]) -> io::Result<(usize, ConnectionDescriptor)> { - let (size, descriptor) = loop { + let (message_len, descriptor) = loop { + // Get a packet let (size, remote_addr) = network_result_value_or_log!(self.socket.recv_from(data).await.into_network_result()? => continue); - if size > MAX_MESSAGE_SIZE { - log_net!(debug "{}({}) at {}@{}:{}", "Invalid message".green(), "received too large UDP message", file!(), line!(), column!()); + + // Insert into assembly buffer + let Some(message) = self.assembly_buffer.insert_frame(&data[0..size], remote_addr) else { + continue; + }; + + // Check length of reassembled message (same for all protocols) + if message.len() > MAX_MESSAGE_SIZE { + log_net!(debug "{}({}) at {}@{}:{}", "Invalid message", "received too large UDP message", file!(), line!(), column!()); continue; } + // Copy assemble message out if we got one + data[0..message.len()].copy_from_slice(&message); + + // Return a connection descriptor and the amount of data in the message let peer_addr = PeerAddress::new( SocketAddress::from_socket_addr(remote_addr), ProtocolType::UDP, @@ -30,25 +46,48 @@ impl RawUdpProtocolHandler { SocketAddress::from_socket_addr(local_socket_addr), ); - break (size, descriptor); + break (message.len(), descriptor); }; - // tracing::Span::current().record("ret.len", &size); - // tracing::Span::current().record("ret.descriptor", &format!("{:?}", descriptor).as_str()); - Ok((size, descriptor)) + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("ret.len", &size); + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("ret.descriptor", &format!("{:?}", descriptor).as_str()); + Ok((message_len, descriptor)) } - #[instrument(level = "trace", err, skip(self, data), fields(data.len = data.len(), ret.len, ret.descriptor))] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self, data), fields(data.len = data.len(), ret.len, ret.descriptor)))] pub async fn send_message( &self, data: Vec, - socket_addr: SocketAddr, + remote_addr: SocketAddr, ) -> io::Result> { if data.len() > MAX_MESSAGE_SIZE { bail_io_error_other!("sending too large UDP message"); } + + // Fragment and send + let sender = |framed_chunk: Vec, remote_addr: SocketAddr| async move { + let len = network_result_try!(self + .socket + .send_to(&framed_chunk, remote_addr) + .await + .into_network_result()?); + if len != framed_chunk.len() { + bail_io_error_other!("UDP partial send") + } + Ok(NetworkResult::value(())) + }; + + network_result_try!( + self.assembly_buffer + .split_message(data, remote_addr, sender) + .await? + ); + + // Return a connection descriptor for the sent message let peer_addr = PeerAddress::new( - SocketAddress::from_socket_addr(socket_addr), + SocketAddress::from_socket_addr(remote_addr), ProtocolType::UDP, ); let local_socket_addr = self.socket.local_addr()?; @@ -58,16 +97,9 @@ impl RawUdpProtocolHandler { SocketAddress::from_socket_addr(local_socket_addr), ); - let len = network_result_try!(self - .socket - .send_to(&data, socket_addr) - .await - .into_network_result()?); - if len != data.len() { - bail_io_error_other!("UDP partial send") - } - + #[cfg(feature = "verbose-tracing")] tracing::Span::current().record("ret.len", &len); + #[cfg(feature = "verbose-tracing")] tracing::Span::current().record("ret.descriptor", &format!("{:?}", descriptor).as_str()); Ok(NetworkResult::value(descriptor)) } diff --git a/veilid-core/src/network_manager/native/protocol/ws.rs b/veilid-core/src/network_manager/native/protocol/ws.rs index 89e3c46d..ab421193 100644 --- a/veilid-core/src/network_manager/native/protocol/ws.rs +++ b/veilid-core/src/network_manager/native/protocol/ws.rs @@ -72,7 +72,7 @@ where // .map_err(to_io_error_other) // } - #[instrument(level = "trace", err, skip(self, message), fields(network_result, message.len = message.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self, message), fields(network_result, message.len = message.len())))] pub async fn send(&self, message: Vec) -> io::Result> { if message.len() > MAX_MESSAGE_SIZE { bail_io_error_other!("received too large WS message"); @@ -82,6 +82,7 @@ where Err(e) => err_to_network_result(e), }; if !out.is_value() { + #[cfg(feature = "verbose-tracing")] tracing::Span::current().record("network_result", &tracing::field::display(&out)); return Ok(out); } @@ -89,11 +90,13 @@ where Ok(v) => NetworkResult::value(v), Err(e) => err_to_network_result(e), }; + + #[cfg(feature = "verbose-tracing")] tracing::Span::current().record("network_result", &tracing::field::display(&out)); Ok(out) } - // #[instrument(level = "trace", err, skip(self), fields(network_result, ret.len))] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self), fields(network_result, ret.len)))] pub async fn recv(&self) -> io::Result>> { let out = match self.stream.clone().next().await { Some(Ok(Message::Binary(v))) => { @@ -120,7 +123,8 @@ where )), }; - // tracing::Span::current().record("network_result", &tracing::field::display(&out)); + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("network_result", &tracing::field::display(&out)); Ok(out) } } @@ -212,7 +216,7 @@ impl WebsocketProtocolHandler { ws_stream, )); - log_net!(debug "{}: on_accept_async from: {}", if self.arc.tls { "WSS" } else { "WS" }, socket_addr); + log_net!(debug "Connection accepted from: {} ({})", socket_addr, if self.arc.tls { "WSS" } else { "WS" }); Ok(Some(conn)) } diff --git a/veilid-core/src/network_manager/network_connection.rs b/veilid-core/src/network_manager/network_connection.rs index dfd2e326..333ae677 100644 --- a/veilid-core/src/network_manager/network_connection.rs +++ b/veilid-core/src/network_manager/network_connection.rs @@ -179,7 +179,7 @@ impl NetworkConnection { } } - #[instrument(level="trace", skip(message, stats), fields(message.len = message.len()), ret)] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", skip(message, stats), fields(message.len = message.len()), ret))] async fn send_internal( protocol_connection: &ProtocolNetworkConnection, stats: Arc>, @@ -194,7 +194,7 @@ impl NetworkConnection { Ok(NetworkResult::Value(out)) } - #[instrument(level="trace", skip(stats), fields(ret.len))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", skip(stats), fields(ret.len)))] async fn recv_internal( protocol_connection: &ProtocolNetworkConnection, stats: Arc>, @@ -205,6 +205,7 @@ impl NetworkConnection { let mut stats = stats.lock(); stats.last_message_recv_time.max_assign(Some(ts)); + #[cfg(feature = "verbose-tracing")] tracing::Span::current().record("ret.len", out.len()); Ok(NetworkResult::Value(out)) @@ -235,7 +236,7 @@ impl NetworkConnection { Box::pin(async move { log_net!( "== Starting process_connection loop for id={}, {:?}", connection_id, - descriptor.green() + descriptor ); let network_manager = connection_manager.network_manager(); @@ -248,7 +249,7 @@ impl NetworkConnection { let new_timer = || { sleep(connection_manager.connection_inactivity_timeout_ms()).then(|_| async { // timeout - log_net!("== Connection timeout on {:?}", descriptor.green()); + log_net!("== Connection timeout on {:?}", descriptor); RecvLoopAction::Timeout }) }; @@ -300,14 +301,18 @@ impl NetworkConnection { .then(|res| async { match res { Ok(v) => { - - let message = network_result_value_or_log!(v => { + if v.is_no_connection() { + let peer_address = protocol_connection.descriptor().remote(); + log_net!(debug "Connection closed from: {} ({})", peer_address.socket_address().to_socket_addr(), peer_address.protocol_type()); + return RecvLoopAction::Finish; + } + let mut message = network_result_value_or_log!(v => [ format!(": protocol_connection={:?}", protocol_connection) ] { return RecvLoopAction::Finish; }); // Pass received messages up to the network manager for processing if let Err(e) = network_manager - .on_recv_envelope(message.as_slice(), descriptor) + .on_recv_envelope(message.as_mut_slice(), descriptor) .await { log_net!(debug "failed to process received envelope: {}", e); @@ -361,7 +366,7 @@ impl NetworkConnection { log_net!( "== Connection loop finished descriptor={:?}", - descriptor.green() + descriptor ); // Let the connection manager know the receive loop exited diff --git a/veilid-core/src/network_manager/send_data.rs b/veilid-core/src/network_manager/send_data.rs new file mode 100644 index 00000000..8774f5ad --- /dev/null +++ b/veilid-core/src/network_manager/send_data.rs @@ -0,0 +1,610 @@ +use super::*; + +impl NetworkManager { + /// Send raw data to a node + /// + /// We may not have dial info for a node, but have an existing connection for it + /// because an inbound connection happened first, and no FindNodeQ has happened to that + /// node yet to discover its dial info. The existing connection should be tried first + /// in this case, if it matches the node ref's filters and no more permissive connection + /// could be established. + /// + /// Sending to a node requires determining a NetworkClass compatible mechanism + pub fn send_data( + &self, + target_node_ref: NodeRef, + data: Vec, + ) -> SendPinBoxFuture>> { + let this = self.clone(); + Box::pin( + async move { + // Get the best way to contact this node + let contact_method = this.get_node_contact_method(target_node_ref.clone())?; + + // If we need to relay, do it + let (contact_method, node_ref, relayed) = match contact_method { + NodeContactMethod::OutboundRelay(relay_nr) + | NodeContactMethod::InboundRelay(relay_nr) => { + let cm = this.get_node_contact_method(relay_nr.clone())?; + (cm, relay_nr, true) + } + cm => (cm, target_node_ref.clone(), false), + }; + + #[cfg(feature = "verbose-tracing")] + debug!( + "ContactMethod: {:?} for {:?}", + contact_method, target_node_ref + ); + + // Try the contact method + let sdk = match contact_method { + NodeContactMethod::OutboundRelay(relay_nr) + | NodeContactMethod::InboundRelay(relay_nr) => { + // Relay loop or multiple relays + bail!( + "Relay loop or multiple relays detected: {} -> {} -> {}", + target_node_ref, + node_ref, + relay_nr + ); + } + NodeContactMethod::Direct(dial_info) => { + network_result_try!( + this.send_data_ncm_direct(node_ref, dial_info, data).await? + ) + } + NodeContactMethod::SignalReverse(relay_nr, target_node_ref) => { + network_result_try!( + this.send_data_ncm_signal_reverse(relay_nr, target_node_ref, data) + .await? + ) + } + NodeContactMethod::SignalHolePunch(relay_nr, target_node_ref) => { + network_result_try!( + this.send_data_ncm_signal_hole_punch(relay_nr, target_node_ref, data) + .await? + ) + } + NodeContactMethod::Existing => { + network_result_try!( + this.send_data_ncm_existing(target_node_ref, data).await? + ) + } + NodeContactMethod::Unreachable => { + network_result_try!( + this.send_data_ncm_unreachable(target_node_ref, data) + .await? + ) + } + }; + + if relayed { + return Ok(NetworkResult::value(SendDataKind::Indirect)); + } + Ok(NetworkResult::value(sdk)) + } + .instrument(trace_span!("send_data")), + ) + } + + /// Send data using NodeContactMethod::Existing + async fn send_data_ncm_existing( + &self, + target_node_ref: NodeRef, + data: Vec, + ) -> EyreResult> { + // First try to send data to the last socket we've seen this peer on + let Some(connection_descriptor) = target_node_ref.last_connection() else { + return Ok(NetworkResult::no_connection_other( + "should have found an existing connection", + )); + }; + + if self + .net() + .send_data_to_existing_connection(connection_descriptor, data) + .await? + .is_some() + { + return Ok(NetworkResult::no_connection_other( + "failed to send to existing connection", + )); + } + + // Update timestamp for this last connection since we just sent to it + target_node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp()); + + Ok(NetworkResult::value(SendDataKind::Existing( + connection_descriptor, + ))) + } + + /// Send data using NodeContactMethod::Unreachable + async fn send_data_ncm_unreachable( + &self, + target_node_ref: NodeRef, + data: Vec, + ) -> EyreResult> { + // Try to send data to the last socket we've seen this peer on + let Some(connection_descriptor) = target_node_ref.last_connection() else { + return Ok(NetworkResult::no_connection_other( + "Node is not reachable and has no existing connection", + )); + }; + + if self + .net() + .send_data_to_existing_connection(connection_descriptor, data) + .await? + .is_some() + { + return Ok(NetworkResult::no_connection_other( + "failed to send to existing connection", + )); + } + + // Update timestamp for this last connection since we just sent to it + target_node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp()); + + Ok(NetworkResult::value(SendDataKind::Existing( + connection_descriptor, + ))) + } + + /// Send data using NodeContactMethod::SignalReverse + async fn send_data_ncm_signal_reverse( + &self, + relay_nr: NodeRef, + target_node_ref: NodeRef, + data: Vec, + ) -> EyreResult> { + // First try to send data to the last socket we've seen this peer on + let data = if let Some(connection_descriptor) = target_node_ref.last_connection() { + match self + .net() + .send_data_to_existing_connection(connection_descriptor, data) + .await? + { + None => { + // Update timestamp for this last connection since we just sent to it + target_node_ref + .set_last_connection(connection_descriptor, get_aligned_timestamp()); + + return Ok(NetworkResult::value(SendDataKind::Existing( + connection_descriptor, + ))); + } + Some(data) => { + // Couldn't send data to existing connection + // so pass the data back out + data + } + } + } else { + // No last connection + data + }; + + let connection_descriptor = network_result_try!( + self.do_reverse_connect(relay_nr, target_node_ref, data) + .await? + ); + Ok(NetworkResult::value(SendDataKind::Direct( + connection_descriptor, + ))) + } + + /// Send data using NodeContactMethod::SignalHolePunch + async fn send_data_ncm_signal_hole_punch( + &self, + relay_nr: NodeRef, + target_node_ref: NodeRef, + data: Vec, + ) -> EyreResult> { + // First try to send data to the last socket we've seen this peer on + let data = if let Some(connection_descriptor) = target_node_ref.last_connection() { + match self + .net() + .send_data_to_existing_connection(connection_descriptor, data) + .await? + { + None => { + // Update timestamp for this last connection since we just sent to it + target_node_ref + .set_last_connection(connection_descriptor, get_aligned_timestamp()); + + return Ok(NetworkResult::value(SendDataKind::Existing( + connection_descriptor, + ))); + } + Some(data) => { + // Couldn't send data to existing connection + // so pass the data back out + data + } + } + } else { + // No last connection + data + }; + + let connection_descriptor = + network_result_try!(self.do_hole_punch(relay_nr, target_node_ref, data).await?); + Ok(NetworkResult::value(SendDataKind::Direct( + connection_descriptor, + ))) + } + + /// Send data using NodeContactMethod::Direct + async fn send_data_ncm_direct( + &self, + node_ref: NodeRef, + dial_info: DialInfo, + data: Vec, + ) -> EyreResult> { + // Since we have the best dial info already, we can find a connection to use by protocol type + let node_ref = node_ref.filtered_clone(NodeRefFilter::from(dial_info.make_filter())); + + // First try to send data to the last socket we've seen this peer on + let data = if let Some(connection_descriptor) = node_ref.last_connection() { + #[cfg(feature = "verbose-tracing")] + debug!( + "ExistingConnection: {:?} for {:?}", + connection_descriptor, node_ref + ); + + match self + .net() + .send_data_to_existing_connection(connection_descriptor, data) + .await? + { + None => { + // Update timestamp for this last connection since we just sent to it + node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp()); + + return Ok(NetworkResult::value(SendDataKind::Existing( + connection_descriptor, + ))); + } + Some(d) => d, + } + } else { + data + }; + + // New direct connection was necessary for this dial info + let connection_descriptor = + network_result_try!(self.net().send_data_to_dial_info(dial_info, data).await?); + + // If we connected to this node directly, save off the last connection so we can use it again + node_ref.set_last_connection(connection_descriptor, get_aligned_timestamp()); + + Ok(NetworkResult::value(SendDataKind::Direct( + connection_descriptor, + ))) + } + + /// Figure out how to reach a node from our own node over the best routing domain and reference the nodes we want to access + /// Uses NodeRefs to ensure nodes are referenced, this is not a part of 'RoutingTable' because RoutingTable is not + /// allowed to use NodeRefs due to recursive locking + pub(crate) fn get_node_contact_method( + &self, + target_node_ref: NodeRef, + ) -> EyreResult { + let routing_table = self.routing_table(); + + // Figure out the best routing domain to get the contact method over + let routing_domain = match target_node_ref.best_routing_domain() { + Some(rd) => rd, + None => { + log_net!("no routing domain for node {:?}", target_node_ref); + return Ok(NodeContactMethod::Unreachable); + } + }; + + // Get cache key + let ncm_key = NodeContactMethodCacheKey { + own_node_info_ts: routing_table.get_own_node_info_ts(routing_domain), + target_node_info_ts: target_node_ref.node_info_ts(routing_domain), + target_node_ref_filter: target_node_ref.filter_ref().cloned(), + target_node_ref_sequencing: target_node_ref.sequencing(), + }; + if let Some(ncm) = self.inner.lock().node_contact_method_cache.get(&ncm_key) { + return Ok(ncm.clone()); + } + + // Node A is our own node + // Use whatever node info we've calculated so far + let peer_a = routing_table.get_best_effort_own_peer_info(routing_domain); + + // Node B is the target node + let peer_b = match target_node_ref.make_peer_info(routing_domain) { + Some(ni) => ni, + None => { + log_net!("no node info for node {:?}", target_node_ref); + return Ok(NodeContactMethod::Unreachable); + } + }; + + // Dial info filter comes from the target node ref + let dial_info_filter = target_node_ref.dial_info_filter(); + let sequencing = target_node_ref.sequencing(); + + // If the node has had lost questions or failures to send, prefer sequencing + // to improve reliability. The node may be experiencing UDP fragmentation drops + // or other firewalling issues and may perform better with TCP. + // let unreliable = target_node_ref.peer_stats().rpc_stats.failed_to_send > 2 || target_node_ref.peer_stats().rpc_stats.recent_lost_answers > 2; + // if unreliable && sequencing < Sequencing::PreferOrdered { + // log_net!(debug "Node contact failing over to Ordered for {}", target_node_ref.to_string().cyan()); + // sequencing = Sequencing::PreferOrdered; + // } + + // Get the best contact method with these parameters from the routing domain + let cm = routing_table.get_contact_method( + routing_domain, + &peer_a, + &peer_b, + dial_info_filter, + sequencing, + ); + + // Translate the raw contact method to a referenced contact method + let ncm = match cm { + ContactMethod::Unreachable => NodeContactMethod::Unreachable, + ContactMethod::Existing => NodeContactMethod::Existing, + ContactMethod::Direct(di) => NodeContactMethod::Direct(di), + ContactMethod::SignalReverse(relay_key, target_key) => { + let mut relay_nr = routing_table + .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)? + .ok_or_else(|| eyre!("couldn't look up relay"))?; + if !target_node_ref.node_ids().contains(&target_key) { + bail!("signalreverse target noderef didn't match target key: {:?} != {} for relay {}", target_node_ref, target_key, relay_key ); + } + relay_nr.set_sequencing(sequencing); + NodeContactMethod::SignalReverse(relay_nr, target_node_ref) + } + ContactMethod::SignalHolePunch(relay_key, target_key) => { + let mut relay_nr = routing_table + .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)? + .ok_or_else(|| eyre!("couldn't look up relay"))?; + if !target_node_ref.node_ids().contains(&target_key) { + bail!("signalholepunch target noderef didn't match target key: {:?} != {} for relay {}", target_node_ref, target_key, relay_key ); + } + relay_nr.set_sequencing(sequencing); + + // if any other protocol were possible here we could update this and do_hole_punch + // but tcp hole punch is very very unreliable it seems + let udp_target_node_ref = target_node_ref + .filtered_clone(NodeRefFilter::new().with_protocol_type(ProtocolType::UDP)); + + NodeContactMethod::SignalHolePunch(relay_nr, udp_target_node_ref) + } + ContactMethod::InboundRelay(relay_key) => { + let mut relay_nr = routing_table + .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)? + .ok_or_else(|| eyre!("couldn't look up relay"))?; + relay_nr.set_sequencing(sequencing); + NodeContactMethod::InboundRelay(relay_nr) + } + ContactMethod::OutboundRelay(relay_key) => { + let mut relay_nr = routing_table + .lookup_and_filter_noderef(relay_key, routing_domain.into(), dial_info_filter)? + .ok_or_else(|| eyre!("couldn't look up relay"))?; + relay_nr.set_sequencing(sequencing); + NodeContactMethod::OutboundRelay(relay_nr) + } + }; + + // Cache this + self.inner + .lock() + .node_contact_method_cache + .insert(ncm_key, ncm.clone(), |_, _| {}); + Ok(ncm) + } + + /// Send a reverse connection signal and wait for the return receipt over it + /// Then send the data across the new connection + /// Only usable for PublicInternet routing domain + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, data), err) + )] + async fn do_reverse_connect( + &self, + relay_nr: NodeRef, + target_nr: NodeRef, + data: Vec, + ) -> EyreResult> { + // Build a return receipt for the signal + let receipt_timeout = ms_to_us( + self.unlocked_inner + .config + .get() + .network + .reverse_connection_receipt_time_ms, + ); + let (receipt, eventual_value) = self.generate_single_shot_receipt(receipt_timeout, [])?; + + // Get target routing domain + let Some(routing_domain) = target_nr.best_routing_domain() else { + return Ok(NetworkResult::no_connection_other("No routing domain for target")); + }; + + // Get our peer info + let Some(peer_info) = self + .routing_table() + .get_own_peer_info(routing_domain) else { + return Ok(NetworkResult::no_connection_other("Own peer info not available")); + }; + + // Issue the signal + let rpc = self.rpc_processor(); + network_result_try!(rpc + .rpc_call_signal( + Destination::relay(relay_nr, target_nr.clone()), + SignalInfo::ReverseConnect { receipt, peer_info }, + ) + .await + .wrap_err("failed to send signal")?); + + // Wait for the return receipt + let inbound_nr = match eventual_value.await.take_value().unwrap() { + ReceiptEvent::ReturnedPrivate { private_route: _ } + | ReceiptEvent::ReturnedOutOfBand + | ReceiptEvent::ReturnedSafety => { + return Ok(NetworkResult::invalid_message( + "reverse connect receipt should be returned in-band", + )); + } + ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef, + ReceiptEvent::Expired => { + return Ok(NetworkResult::timeout()); + } + ReceiptEvent::Cancelled => { + return Ok(NetworkResult::no_connection_other(format!( + "reverse connect receipt cancelled from {}", + target_nr + ))) + } + }; + + // We expect the inbound noderef to be the same as the target noderef + // if they aren't the same, we should error on this and figure out what then hell is up + if !target_nr.same_entry(&inbound_nr) { + bail!("unexpected noderef mismatch on reverse connect"); + } + + // And now use the existing connection to send over + if let Some(descriptor) = inbound_nr.last_connection() { + match self + .net() + .send_data_to_existing_connection(descriptor, data) + .await? + { + None => Ok(NetworkResult::value(descriptor)), + Some(_) => Ok(NetworkResult::no_connection_other( + "unable to send over reverse connection", + )), + } + } else { + bail!("no reverse connection available") + } + } + + /// Send a hole punch signal and do a negotiating ping and wait for the return receipt + /// Then send the data across the new connection + /// Only usable for PublicInternet routing domain + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, data), err) + )] + async fn do_hole_punch( + &self, + relay_nr: NodeRef, + target_nr: NodeRef, + data: Vec, + ) -> EyreResult> { + // Ensure we are filtered down to UDP (the only hole punch protocol supported today) + assert!(target_nr + .filter_ref() + .map(|nrf| nrf.dial_info_filter.protocol_type_set + == ProtocolTypeSet::only(ProtocolType::UDP)) + .unwrap_or_default()); + + // Build a return receipt for the signal + let receipt_timeout = ms_to_us( + self.unlocked_inner + .config + .get() + .network + .hole_punch_receipt_time_ms, + ); + let (receipt, eventual_value) = self.generate_single_shot_receipt(receipt_timeout, [])?; + + // Get target routing domain + let Some(routing_domain) = target_nr.best_routing_domain() else { + return Ok(NetworkResult::no_connection_other("No routing domain for target")); + }; + + // Get our peer info + let Some(peer_info) = self + .routing_table() + .get_own_peer_info(routing_domain) else { + return Ok(NetworkResult::no_connection_other("Own peer info not available")); + }; + + // Get the udp direct dialinfo for the hole punch + let hole_punch_did = target_nr + .first_filtered_dial_info_detail() + .ok_or_else(|| eyre!("No hole punch capable dialinfo found for node"))?; + + // Do our half of the hole punch by sending an empty packet + // Both sides will do this and then the receipt will get sent over the punched hole + // Don't bother storing the returned connection descriptor as the 'last connection' because the other side of the hole + // punch should come through and create a real 'last connection' for us if this succeeds + network_result_try!( + self.net() + .send_data_to_dial_info(hole_punch_did.dial_info, Vec::new()) + .await? + ); + + // Issue the signal + let rpc = self.rpc_processor(); + network_result_try!(rpc + .rpc_call_signal( + Destination::relay(relay_nr, target_nr.clone()), + SignalInfo::HolePunch { receipt, peer_info }, + ) + .await + .wrap_err("failed to send signal")?); + + // Wait for the return receipt + let inbound_nr = match eventual_value.await.take_value().unwrap() { + ReceiptEvent::ReturnedPrivate { private_route: _ } + | ReceiptEvent::ReturnedOutOfBand + | ReceiptEvent::ReturnedSafety => { + return Ok(NetworkResult::invalid_message( + "hole punch receipt should be returned in-band", + )); + } + ReceiptEvent::ReturnedInBand { inbound_noderef } => inbound_noderef, + ReceiptEvent::Expired => { + return Ok(NetworkResult::timeout()); + } + ReceiptEvent::Cancelled => { + return Ok(NetworkResult::no_connection_other(format!( + "hole punch receipt cancelled from {}", + target_nr + ))) + } + }; + + // We expect the inbound noderef to be the same as the target noderef + // if they aren't the same, we should error on this and figure out what then hell is up + if !target_nr.same_entry(&inbound_nr) { + bail!( + "unexpected noderef mismatch on hole punch {}, expected {}", + inbound_nr, + target_nr + ); + } + + // And now use the existing connection to send over + if let Some(descriptor) = inbound_nr.last_connection() { + match self + .net() + .send_data_to_existing_connection(descriptor, data) + .await? + { + None => Ok(NetworkResult::value(descriptor)), + Some(_) => Ok(NetworkResult::no_connection_other( + "unable to send over hole punch", + )), + } + } else { + bail!("no hole punch available") + } + } +} diff --git a/veilid-core/src/network_manager/stats.rs b/veilid-core/src/network_manager/stats.rs new file mode 100644 index 00000000..d5660470 --- /dev/null +++ b/veilid-core/src/network_manager/stats.rs @@ -0,0 +1,137 @@ +use super::*; + +// Statistics per address +#[derive(Clone, Default)] +pub struct PerAddressStats { + pub last_seen_ts: Timestamp, + pub transfer_stats_accounting: TransferStatsAccounting, + pub transfer_stats: TransferStatsDownUp, +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct PerAddressStatsKey(IpAddr); + +impl Default for PerAddressStatsKey { + fn default() -> Self { + Self(IpAddr::V4(Ipv4Addr::UNSPECIFIED)) + } +} + +// Statistics about the low-level network +#[derive(Clone)] +pub struct NetworkManagerStats { + pub self_stats: PerAddressStats, + pub per_address_stats: LruCache, +} + +impl Default for NetworkManagerStats { + fn default() -> Self { + Self { + self_stats: PerAddressStats::default(), + per_address_stats: LruCache::new(IPADDR_TABLE_SIZE), + } + } +} + +impl NetworkManager { + // Callbacks from low level network for statistics gathering + pub fn stats_packet_sent(&self, addr: IpAddr, bytes: ByteCount) { + let inner = &mut *self.inner.lock(); + inner + .stats + .self_stats + .transfer_stats_accounting + .add_up(bytes); + inner + .stats + .per_address_stats + .entry(PerAddressStatsKey(addr), |_k, _v| { + // do nothing on LRU evict + }) + .or_insert(PerAddressStats::default()) + .transfer_stats_accounting + .add_up(bytes); + } + + pub fn stats_packet_rcvd(&self, addr: IpAddr, bytes: ByteCount) { + let inner = &mut *self.inner.lock(); + inner + .stats + .self_stats + .transfer_stats_accounting + .add_down(bytes); + inner + .stats + .per_address_stats + .entry(PerAddressStatsKey(addr), |_k, _v| { + // do nothing on LRU evict + }) + .or_insert(PerAddressStats::default()) + .transfer_stats_accounting + .add_down(bytes); + } + + // Get stats + pub fn get_stats(&self) -> NetworkManagerStats { + let inner = self.inner.lock(); + inner.stats.clone() + } + + pub fn get_veilid_state(&self) -> VeilidStateNetwork { + let has_state = self + .unlocked_inner + .components + .read() + .as_ref() + .map(|c| c.net.is_started()) + .unwrap_or(false); + + if !has_state { + return VeilidStateNetwork { + started: false, + bps_down: 0.into(), + bps_up: 0.into(), + peers: Vec::new(), + }; + } + let routing_table = self.routing_table(); + + let (bps_down, bps_up) = { + let inner = self.inner.lock(); + ( + inner.stats.self_stats.transfer_stats.down.average, + inner.stats.self_stats.transfer_stats.up.average, + ) + }; + + VeilidStateNetwork { + started: true, + bps_down, + bps_up, + peers: { + let mut out = Vec::new(); + for (k, v) in routing_table.get_recent_peers() { + if let Ok(Some(nr)) = routing_table.lookup_node_ref(k) { + let peer_stats = nr.peer_stats(); + let peer = PeerTableData { + node_ids: nr.node_ids().iter().copied().collect(), + peer_address: v.last_connection.remote().to_string(), + peer_stats, + }; + out.push(peer); + } + } + out + }, + } + } + + pub(super) fn send_network_update(&self) { + let update_cb = self.unlocked_inner.update_callback.read().clone(); + if update_cb.is_none() { + return; + } + let state = self.get_veilid_state(); + (update_cb.unwrap())(VeilidUpdate::Network(state)); + } +} diff --git a/veilid-core/src/network_manager/types/connection_descriptor.rs b/veilid-core/src/network_manager/types/connection_descriptor.rs index 1046838f..54481c39 100644 --- a/veilid-core/src/network_manager/types/connection_descriptor.rs +++ b/veilid-core/src/network_manager/types/connection_descriptor.rs @@ -30,9 +30,7 @@ pub struct ConnectionDescriptor { impl ConnectionDescriptor { pub fn new(remote: PeerAddress, local: SocketAddress) -> Self { - assert!( - !remote.protocol_type().is_connection_oriented() || !local.address().is_unspecified() - ); + assert!(!remote.protocol_type().is_ordered() || !local.address().is_unspecified()); Self { remote, diff --git a/veilid-core/src/network_manager/types/dial_info/mod.rs b/veilid-core/src/network_manager/types/dial_info/mod.rs index a6a12c69..5eec84f5 100644 --- a/veilid-core/src/network_manager/types/dial_info/mod.rs +++ b/veilid-core/src/network_manager/types/dial_info/mod.rs @@ -491,13 +491,9 @@ impl DialInfo { } pub fn ordered_sequencing_sort(a: &DialInfo, b: &DialInfo) -> core::cmp::Ordering { - let ca = a.protocol_type().sort_order(Sequencing::EnsureOrdered); - let cb = b.protocol_type().sort_order(Sequencing::EnsureOrdered); - if ca < cb { - return core::cmp::Ordering::Less; - } - if ca > cb { - return core::cmp::Ordering::Greater; + let s = ProtocolType::ordered_sequencing_sort(a.protocol_type(), b.protocol_type()); + if s != core::cmp::Ordering::Equal { + return s; } match (a, b) { (DialInfo::UDP(a), DialInfo::UDP(b)) => a.cmp(b), diff --git a/veilid-core/src/network_manager/types/dial_info/tcp.rs b/veilid-core/src/network_manager/types/dial_info/tcp.rs index 0f93273d..ce7de8b4 100644 --- a/veilid-core/src/network_manager/types/dial_info/tcp.rs +++ b/veilid-core/src/network_manager/types/dial_info/tcp.rs @@ -18,4 +18,4 @@ use super::*; #[archive_attr(repr(C), derive(CheckBytes))] pub struct DialInfoTCP { pub socket_address: SocketAddress, -} \ No newline at end of file +} diff --git a/veilid-core/src/network_manager/types/dial_info_filter.rs b/veilid-core/src/network_manager/types/dial_info_filter.rs index c3635957..c51a5acb 100644 --- a/veilid-core/src/network_manager/types/dial_info_filter.rs +++ b/veilid-core/src/network_manager/types/dial_info_filter.rs @@ -7,6 +7,7 @@ use super::*; Eq, PartialOrd, Ord, + Hash, Serialize, Deserialize, RkyvArchive, @@ -61,6 +62,20 @@ impl DialInfoFilter { pub fn is_dead(&self) -> bool { self.protocol_type_set.is_empty() || self.address_type_set.is_empty() } + pub fn with_sequencing(self, sequencing: Sequencing) -> (bool, DialInfoFilter) { + // Get first filtered dialinfo + match sequencing { + Sequencing::NoPreference => (false, self), + Sequencing::PreferOrdered => (true, self), + Sequencing::EnsureOrdered => ( + true, + self.filtered( + &DialInfoFilter::all().with_protocol_type_set(ProtocolType::all_ordered_set()), + ), + ), + } + // return ordered sort and filter with ensure applied + } } impl fmt::Debug for DialInfoFilter { @@ -80,7 +95,24 @@ impl fmt::Debug for DialInfoFilter { } } +impl From for DialInfoFilter { + fn from(other: ProtocolType) -> Self { + Self { + protocol_type_set: ProtocolTypeSet::from(other), + address_type_set: AddressTypeSet::all(), + } + } +} + +impl From for DialInfoFilter { + fn from(other: AddressType) -> Self { + Self { + protocol_type_set: ProtocolTypeSet::all(), + address_type_set: AddressTypeSet::from(other), + } + } +} + pub trait MatchesDialInfoFilter { fn matches_filter(&self, filter: &DialInfoFilter) -> bool; } - diff --git a/veilid-core/src/network_manager/types/protocol_type.rs b/veilid-core/src/network_manager/types/protocol_type.rs index 4ba47000..41167580 100644 --- a/veilid-core/src/network_manager/types/protocol_type.rs +++ b/veilid-core/src/network_manager/types/protocol_type.rs @@ -25,7 +25,7 @@ pub enum ProtocolType { } impl ProtocolType { - pub fn is_connection_oriented(&self) -> bool { + pub fn is_ordered(&self) -> bool { matches!( self, ProtocolType::TCP | ProtocolType::WS | ProtocolType::WSS @@ -72,6 +72,18 @@ impl ProtocolType { pub fn all_ordered_set() -> ProtocolTypeSet { ProtocolType::TCP | ProtocolType::WS | ProtocolType::WSS } + + pub fn ordered_sequencing_sort(a: Self, b: Self) -> core::cmp::Ordering { + let ca = a.sort_order(Sequencing::EnsureOrdered); + let cb = b.sort_order(Sequencing::EnsureOrdered); + if ca < cb { + return core::cmp::Ordering::Less; + } + if ca > cb { + return core::cmp::Ordering::Greater; + } + core::cmp::Ordering::Equal + } } impl fmt::Display for ProtocolType { diff --git a/veilid-core/src/network_manager/wasm/mod.rs b/veilid-core/src/network_manager/wasm/mod.rs index e5db561e..4b87a071 100644 --- a/veilid-core/src/network_manager/wasm/mod.rs +++ b/veilid-core/src/network_manager/wasm/mod.rs @@ -79,7 +79,7 @@ impl Network { ///////////////////////////////////////////////////////////////// - #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] pub async fn send_data_unbound_to_dial_info( &self, dial_info: DialInfo, @@ -119,7 +119,7 @@ impl Network { // This creates a short-lived connection in the case of connection-oriented protocols // for the purpose of sending this one message. // This bypasses the connection table as it is not a 'node to node' connection. - #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] pub async fn send_recv_data_unbound_to_dial_info( &self, dial_info: DialInfo, @@ -167,7 +167,7 @@ impl Network { } } - #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] pub async fn send_data_to_existing_connection( &self, descriptor: ConnectionDescriptor, @@ -212,7 +212,7 @@ impl Network { Ok(Some(data)) } - #[instrument(level="trace", err, skip(self, data), fields(data.len = data.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level="trace", err, skip(self, data), fields(data.len = data.len())))] pub async fn send_data_to_dial_info( &self, dial_info: DialInfo, diff --git a/veilid-core/src/network_manager/wasm/protocol/ws.rs b/veilid-core/src/network_manager/wasm/protocol/ws.rs index 56082a86..127122b3 100644 --- a/veilid-core/src/network_manager/wasm/protocol/ws.rs +++ b/veilid-core/src/network_manager/wasm/protocol/ws.rs @@ -64,7 +64,7 @@ impl WebsocketNetworkConnection { // self.inner.ws_meta.close().await.map_err(to_io).map(drop) // } - #[instrument(level = "trace", err, skip(self, message), fields(network_result, message.len = message.len()))] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self, message), fields(network_result, message.len = message.len())))] pub async fn send(&self, message: Vec) -> io::Result> { if message.len() > MAX_MESSAGE_SIZE { bail_io_error_other!("sending too large WS message"); @@ -79,11 +79,12 @@ impl WebsocketNetworkConnection { .map_err(to_io) .into_network_result()?; + #[cfg(feature = "verbose-tracing")] tracing::Span::current().record("network_result", &tracing::field::display(&out)); Ok(out) } - // #[instrument(level = "trace", err, skip(self), fields(network_result, ret.len))] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", err, skip(self), fields(network_result, ret.len)))] pub async fn recv(&self) -> io::Result>> { let out = match SendWrapper::new(self.inner.ws_stream.clone().next()).await { Some(WsMessage::Binary(v)) => { @@ -103,7 +104,8 @@ impl WebsocketNetworkConnection { ))); } }; - // tracing::Span::current().record("network_result", &tracing::field::display(&out)); + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("network_result", &tracing::field::display(&out)); Ok(out) } } diff --git a/veilid-core/src/routing_table/bucket_entry.rs b/veilid-core/src/routing_table/bucket_entry.rs index f133f4e5..be41c1b9 100644 --- a/veilid-core/src/routing_table/bucket_entry.rs +++ b/veilid-core/src/routing_table/bucket_entry.rs @@ -316,7 +316,7 @@ impl BucketEntryInner { let last_connections = self.last_connections( rti, true, - Some(NodeRefFilter::new().with_routing_domain(routing_domain)), + NodeRefFilter::from(routing_domain), ); !last_connections.is_empty() } @@ -370,7 +370,7 @@ impl BucketEntryInner { let last_connections = self.last_connections( rti, true, - Some(NodeRefFilter::new().with_routing_domain_set(routing_domain_set)), + NodeRefFilter::from(routing_domain_set), ); for lc in last_connections { if let Some(rd) = @@ -412,7 +412,7 @@ impl BucketEntryInner { &self, rti: &RoutingTableInner, only_live: bool, - filter: Option, + filter: NodeRefFilter, ) -> Vec<(ConnectionDescriptor, Timestamp)> { let connection_manager = rti.unlocked_inner.network_manager.connection_manager(); @@ -421,26 +421,13 @@ impl BucketEntryInner { .last_connections .iter() .filter_map(|(k, v)| { - let include = if let Some(filter) = &filter { + let include = { let remote_address = v.0.remote_address().address(); - if let Some(routing_domain) = rti.routing_domain_for_address(remote_address) { - if filter.routing_domain_set.contains(routing_domain) + rti.routing_domain_for_address(remote_address).map(|rd| { + filter.routing_domain_set.contains(rd) && filter.dial_info_filter.protocol_type_set.contains(k.0) && filter.dial_info_filter.address_type_set.contains(k.1) - { - // matches filter - true - } else { - // does not match filter - false - } - } else { - // no valid routing domain - false - } - } else { - // no filter - true + }).unwrap_or(false) }; if !include { @@ -454,7 +441,7 @@ impl BucketEntryInner { // Check if the connection is still considered live let alive = // Should we check the connection table? - if v.0.protocol_type().is_connection_oriented() { + if v.0.protocol_type().is_ordered() { // Look the connection up in the connection manager and see if it's still there connection_manager.get_connection(v.0).is_some() } else { @@ -471,8 +458,10 @@ impl BucketEntryInner { } }) .collect(); - // Sort with newest timestamps first - out.sort_by(|a, b| b.1.cmp(&a.1)); + // Sort with newest timestamps + out.sort_by(|a, b| { + b.1.cmp(&a.1) + }); out } diff --git a/veilid-core/src/routing_table/find_peers.rs b/veilid-core/src/routing_table/find_peers.rs index 2a240e26..e7f289ea 100644 --- a/veilid-core/src/routing_table/find_peers.rs +++ b/veilid-core/src/routing_table/find_peers.rs @@ -99,8 +99,8 @@ impl RoutingTable { }, ); - // xxx test // Validate peers returned are, in fact, closer to the key than the node we sent this to + // This same test is used on the other side so we vet things here let valid = match Self::verify_peers_closer(vcrypto2, own_node_id, key, &closest_nodes) { Ok(v) => v, Err(e) => { @@ -108,13 +108,16 @@ impl RoutingTable { } }; if !valid { - panic!("non-closer peers returned"); + error!( + "non-closer peers returned: own_node_id={:#?} key={:#?} closest_nodes={:#?}", + own_node_id, key, closest_nodes + ); } NetworkResult::value(closest_nodes) } - /// Determine if set of peers is closer to key_near than key_far + /// Determine if set of peers is closer to key_near than key_far is to key_near pub(crate) fn verify_peers_closer( vcrypto: CryptoSystemVersion, key_far: TypedKey, @@ -128,14 +131,30 @@ impl RoutingTable { } let mut closer = true; + let d_far = vcrypto.distance(&key_far.value, &key_near.value); for peer in peers { let Some(key_peer) = peer.node_ids().get(kind) else { bail!("peers need to have a key with the same cryptosystem"); }; let d_near = vcrypto.distance(&key_near.value, &key_peer.value); - let d_far = vcrypto.distance(&key_far.value, &key_peer.value); if d_far < d_near { + let warning = format!( + r#"peer: {} +near (key): {} +far (self): {} + d_near: {} + d_far: {} + cmp: {:?}"#, + key_peer.value, + key_near.value, + key_far.value, + d_near, + d_far, + d_near.cmp(&d_far) + ); + warn!("{}", warning); closer = false; + break; } } diff --git a/veilid-core/src/routing_table/mod.rs b/veilid-core/src/routing_table/mod.rs index 42b4be0f..91143fb3 100644 --- a/veilid-core/src/routing_table/mod.rs +++ b/veilid-core/src/routing_table/mod.rs @@ -518,7 +518,10 @@ impl RoutingTable { } /// Look up the best way for two nodes to reach each other over a specific routing domain - #[instrument(level = "trace", skip(self), ret)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self), ret) + )] pub fn get_contact_method( &self, routing_domain: RoutingDomain, @@ -1028,7 +1031,7 @@ impl RoutingTable { .sort_and_clean_closest_noderefs(node_id, closest_nodes) } - #[instrument(level = "trace", skip(self), ret)] + #[instrument(level = "trace", skip(self, peers), ret)] pub fn register_find_node_answer( &self, crypto_kind: CryptoKind, @@ -1118,7 +1121,7 @@ impl RoutingTable { return; } Ok(v) => v, - } => { + } => [ format!(": crypto_kind={} node_ref={} wide={}", crypto_kind, node_ref, wide) ] { return; }); @@ -1134,107 +1137,13 @@ impl RoutingTable { continue; } Ok(v) => v, - } => { + } => [ format!(": crypto_kind={} closest_nr={} wide={}", crypto_kind, closest_nr, wide) ] { // Do nothing with non-values continue; }); } } } - - pub fn make_public_internet_relay_node_filter(&self) -> impl Fn(&BucketEntryInner) -> bool { - // Get all our outbound protocol/address types - let outbound_dif = self.get_outbound_dial_info_filter(RoutingDomain::PublicInternet); - let mapped_port_info = self.get_low_level_port_info(); - - move |e: &BucketEntryInner| { - // Ensure this node is not on the local network - if e.has_node_info(RoutingDomain::LocalNetwork.into()) { - return false; - } - - // Disqualify nodes that don't cover all our inbound ports for tcp and udp - // as we need to be able to use the relay for keepalives for all nat mappings - let mut low_level_protocol_ports = mapped_port_info.low_level_protocol_ports.clone(); - - let can_serve_as_relay = e - .node_info(RoutingDomain::PublicInternet) - .map(|n| { - let dids = n.all_filtered_dial_info_details( - Some(DialInfoDetail::ordered_sequencing_sort), // By default, choose connection-oriented protocol for relay - |did| did.matches_filter(&outbound_dif), - ); - for did in &dids { - let pt = did.dial_info.protocol_type(); - let at = did.dial_info.address_type(); - if let Some((llpt, port)) = mapped_port_info.protocol_to_port.get(&(pt, at)) - { - low_level_protocol_ports.remove(&(*llpt, at, *port)); - } - } - low_level_protocol_ports.is_empty() - }) - .unwrap_or(false); - if !can_serve_as_relay { - return false; - } - - true - } - } - - #[instrument(level = "trace", skip(self), ret)] - pub fn find_inbound_relay( - &self, - routing_domain: RoutingDomain, - cur_ts: Timestamp, - ) -> Option { - // Get relay filter function - let relay_node_filter = match routing_domain { - RoutingDomain::PublicInternet => self.make_public_internet_relay_node_filter(), - RoutingDomain::LocalNetwork => { - unimplemented!(); - } - }; - - // Go through all entries and find fastest entry that matches filter function - let inner = self.inner.read(); - let inner = &*inner; - let mut best_inbound_relay: Option> = None; - - // Iterate all known nodes for candidates - inner.with_entries(cur_ts, BucketEntryState::Unreliable, |rti, entry| { - let entry2 = entry.clone(); - entry.with(rti, |rti, e| { - // Ensure we have the node's status - if let Some(node_status) = e.node_status(routing_domain) { - // Ensure the node will relay - if node_status.will_relay() { - // Compare against previous candidate - if let Some(best_inbound_relay) = best_inbound_relay.as_mut() { - // Less is faster - let better = best_inbound_relay.with(rti, |_rti, best| { - // choose low latency stability for relays - BucketEntryInner::cmp_fastest_reliable(cur_ts, e, best) - == std::cmp::Ordering::Less - }); - // Now apply filter function and see if this node should be included - if better && relay_node_filter(e) { - *best_inbound_relay = entry2; - } - } else if relay_node_filter(e) { - // Always store the first candidate - best_inbound_relay = Some(entry2); - } - } - } - }); - // Don't end early, iterate through all entries - Option::<()>::None - }); - // Return the best inbound relay noderef - best_inbound_relay.map(|e| NodeRef::new(self.clone(), e, None)) - } } impl core::ops::Deref for RoutingTable { diff --git a/veilid-core/src/routing_table/node_ref.rs b/veilid-core/src/routing_table/node_ref.rs index 5cb53c64..09c968a1 100644 --- a/veilid-core/src/routing_table/node_ref.rs +++ b/veilid-core/src/routing_table/node_ref.rs @@ -217,25 +217,24 @@ pub trait NodeRefBase: Sized { fn first_filtered_dial_info_detail(&self) -> Option { let routing_domain_set = self.routing_domain_set(); let dial_info_filter = self.dial_info_filter(); + let sequencing = self.common().sequencing; + let (ordered, dial_info_filter) = dial_info_filter.with_sequencing(sequencing); - let (sort, dial_info_filter) = match self.common().sequencing { - Sequencing::NoPreference => (None, dial_info_filter), - Sequencing::PreferOrdered => ( - Some(DialInfoDetail::ordered_sequencing_sort), - dial_info_filter, - ), - Sequencing::EnsureOrdered => ( - Some(DialInfoDetail::ordered_sequencing_sort), - dial_info_filter.filtered( - &DialInfoFilter::all().with_protocol_type_set(ProtocolType::all_ordered_set()), - ), - ), + let sort = if ordered { + Some(DialInfoDetail::ordered_sequencing_sort) + } else { + None }; + if dial_info_filter.is_dead() { + return None; + } + + let filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter); + self.operate(|_rt, e| { for routing_domain in routing_domain_set { if let Some(ni) = e.node_info(routing_domain) { - let filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter); if let Some(did) = ni.first_filtered_dial_info_detail(sort, filter) { return Some(did); } @@ -278,11 +277,22 @@ pub trait NodeRefBase: Sized { out } + /// Get the most recent 'last connection' to this node + /// Filtered first and then sorted by ordering preference and then by most recent fn last_connection(&self) -> Option { - // Get the last connections and the last time we saw anything with this connection - // Filtered first and then sorted by most recent self.operate(|rti, e| { - let last_connections = e.last_connections(rti, true, self.common().filter.clone()); + // apply sequencing to filter and get sort + let sequencing = self.common().sequencing; + let filter = self.common().filter.clone().unwrap_or_default(); + let (ordered, filter) = filter.with_sequencing(sequencing); + let mut last_connections = e.last_connections(rti, true, filter); + + if ordered { + last_connections.sort_by(|a, b| { + ProtocolType::ordered_sequencing_sort(a.0.protocol_type(), b.0.protocol_type()) + }); + } + last_connections.first().map(|x| x.0) }) } diff --git a/veilid-core/src/routing_table/node_ref_filter.rs b/veilid-core/src/routing_table/node_ref_filter.rs index 934d93a1..9c4d44cb 100644 --- a/veilid-core/src/routing_table/node_ref_filter.rs +++ b/veilid-core/src/routing_table/node_ref_filter.rs @@ -1,6 +1,6 @@ use super::*; -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct NodeRefFilter { pub routing_domain_set: RoutingDomainSet, pub dial_info_filter: DialInfoFilter, @@ -19,7 +19,6 @@ impl NodeRefFilter { dial_info_filter: DialInfoFilter::all(), } } - pub fn with_routing_domain(mut self, routing_domain: RoutingDomain) -> Self { self.routing_domain_set = routing_domain.into(); self @@ -58,4 +57,54 @@ impl NodeRefFilter { pub fn is_dead(&self) -> bool { self.dial_info_filter.is_dead() || self.routing_domain_set.is_empty() } + pub fn with_sequencing(mut self, sequencing: Sequencing) -> (bool, Self) { + let (ordered, dif) = self.dial_info_filter.with_sequencing(sequencing); + self.dial_info_filter = dif; + (ordered, self) + } +} + +impl From for NodeRefFilter { + fn from(other: RoutingDomain) -> Self { + Self { + routing_domain_set: other.into(), + dial_info_filter: DialInfoFilter::all(), + } + } +} + +impl From for NodeRefFilter { + fn from(other: RoutingDomainSet) -> Self { + Self { + routing_domain_set: other, + dial_info_filter: DialInfoFilter::all(), + } + } +} + +impl From for NodeRefFilter { + fn from(other: DialInfoFilter) -> Self { + Self { + routing_domain_set: RoutingDomainSet::all(), + dial_info_filter: other, + } + } +} + +impl From for NodeRefFilter { + fn from(other: ProtocolType) -> Self { + Self { + routing_domain_set: RoutingDomainSet::all(), + dial_info_filter: DialInfoFilter::from(other), + } + } +} + +impl From for NodeRefFilter { + fn from(other: AddressType) -> Self { + Self { + routing_domain_set: RoutingDomainSet::all(), + dial_info_filter: DialInfoFilter::from(other), + } + } } diff --git a/veilid-core/src/routing_table/route_spec_store/route_spec_store.rs b/veilid-core/src/routing_table/route_spec_store/route_spec_store.rs index db490b08..83167860 100644 --- a/veilid-core/src/routing_table/route_spec_store/route_spec_store.rs +++ b/veilid-core/src/routing_table/route_spec_store/route_spec_store.rs @@ -533,7 +533,7 @@ impl RouteSpecStore { } /// validate data using a private route's key and signature chain - #[instrument(level = "trace", skip(self, data, callback), ret)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, data, callback), ret))] pub fn with_signature_validated_route( &self, public_key: &TypedKey, @@ -593,7 +593,7 @@ impl RouteSpecStore { Some(callback(rssd, rsd)) } - #[instrument(level = "trace", skip(self), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), ret, err))] async fn test_allocated_route(&self, private_route_id: RouteId) -> EyreResult { // Make loopback route to test with let dest = { @@ -616,8 +616,8 @@ impl RouteSpecStore { let private_route = self.assemble_private_route(&key, None)?; // Always test routes with safety routes that are more likely to succeed let stability = Stability::Reliable; - // Routes can test with whatever sequencing they were allocated with - let sequencing = Sequencing::NoPreference; + // Routes should test with the most likely to succeed sequencing they are capable of + let sequencing = Sequencing::PreferOrdered; let safety_spec = SafetySpec { preferred_route: Some(private_route_id), @@ -657,12 +657,17 @@ impl RouteSpecStore { bail!("no best key to test remote route"); }; + // Always test routes with safety routes that are more likely to succeed + let stability = Stability::Reliable; + // Routes should test with the most likely to succeed sequencing they are capable of + let sequencing = Sequencing::PreferOrdered; + // Get a safety route that is good enough let safety_spec = SafetySpec { preferred_route: None, hop_count: self.unlocked_inner.default_route_hop_count, - stability: Stability::default(), - sequencing: Sequencing::default(), + stability, + sequencing, }; let safety_selection = SafetySelection::Safe(safety_spec); @@ -711,7 +716,7 @@ impl RouteSpecStore { } /// Test an allocated route for continuity - #[instrument(level = "trace", skip(self), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), ret, err))] pub async fn test_route(&self, id: RouteId) -> EyreResult { let is_remote = self.is_route_id_remote(&id); if is_remote { @@ -1078,7 +1083,7 @@ impl RouteSpecStore { } /// Get an allocated route that matches a particular safety spec - #[instrument(level = "trace", skip(self, inner, rti), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, inner, rti), ret, err))] fn get_route_for_safety_spec_inner( &self, inner: &mut RouteSpecStoreInner, @@ -1149,7 +1154,7 @@ impl RouteSpecStore { } /// Get a private route to use for the answer to question - #[instrument(level = "trace", skip(self), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), ret, err))] pub fn get_private_route_for_safety_spec( &self, crypto_kind: CryptoKind, @@ -1258,7 +1263,7 @@ impl RouteSpecStore { /// Assemble a single private route for publication /// Returns a PrivateRoute object for an allocated private route key - #[instrument(level = "trace", skip(self), err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), err))] pub fn assemble_private_route( &self, key: &PublicKey, @@ -1285,7 +1290,7 @@ impl RouteSpecStore { /// Assemble private route set for publication /// Returns a vec of PrivateRoute objects for an allocated private route - #[instrument(level = "trace", skip(self), err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), err))] pub fn assemble_private_routes( &self, id: &RouteId, @@ -1311,7 +1316,7 @@ impl RouteSpecStore { /// Import a remote private route for compilation /// It is safe to import the same route more than once and it will return the same route id /// Returns a route set id - #[instrument(level = "trace", skip(self, blob), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, blob), ret, err))] pub fn import_remote_private_route(&self, blob: Vec) -> EyreResult { let cur_ts = get_aligned_timestamp(); @@ -1342,7 +1347,7 @@ impl RouteSpecStore { } /// Release a remote private route that is no longer in use - #[instrument(level = "trace", skip(self), ret)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), ret))] pub fn release_remote_private_route(&self, id: RouteId) -> bool { let inner = &mut *self.inner.lock(); inner.cache.remove_remote_private_route(id) diff --git a/veilid-core/src/routing_table/route_spec_store/route_stats.rs b/veilid-core/src/routing_table/route_spec_store/route_stats.rs index 93b4f304..1327e87f 100644 --- a/veilid-core/src/routing_table/route_spec_store/route_stats.rs +++ b/veilid-core/src/routing_table/route_spec_store/route_stats.rs @@ -61,6 +61,9 @@ impl RouteStats { pub fn record_sent(&mut self, cur_ts: Timestamp, bytes: ByteCount) { self.last_sent_ts = Some(cur_ts); self.transfer_stats_accounting.add_up(bytes); + + // If we sent successfully, then reset 'failed_to_send' + self.failed_to_send = 0; } /// Mark a route as having been sent to @@ -101,6 +104,8 @@ impl RouteStats { self.last_tested_ts = None; self.last_sent_ts = None; self.last_received_ts = None; + self.failed_to_send = 0; + self.questions_lost = 0; } /// Check if a route needs testing diff --git a/veilid-core/src/routing_table/routing_domains.rs b/veilid-core/src/routing_table/routing_domains.rs index 096d777d..e9f96a1b 100644 --- a/veilid-core/src/routing_table/routing_domains.rs +++ b/veilid-core/src/routing_table/routing_domains.rs @@ -223,7 +223,7 @@ impl Default for PublicInternetRoutingDomainDetail { } } -fn first_filtered_dial_info_detail( +fn first_filtered_dial_info_detail_between_nodes( from_node: &NodeInfo, to_node: &NodeInfo, dial_info_filter: &DialInfoFilter, @@ -235,28 +235,21 @@ fn first_filtered_dial_info_detail( .with_protocol_type_set(from_node.outbound_protocols()), ); - // Get first filtered dialinfo - let (sort, dial_info_filter) = match sequencing { - Sequencing::NoPreference => (None, dial_info_filter), - Sequencing::PreferOrdered => ( - Some(DialInfoDetail::ordered_sequencing_sort), - dial_info_filter, - ), - Sequencing::EnsureOrdered => ( - Some(DialInfoDetail::ordered_sequencing_sort), - dial_info_filter.filtered( - &DialInfoFilter::all().with_protocol_type_set(ProtocolType::all_ordered_set()), - ), - ), + // Apply sequencing and get sort + let (ordered, dial_info_filter) = dial_info_filter.with_sequencing(sequencing); + let sort = if ordered { + Some(DialInfoDetail::ordered_sequencing_sort) + } else { + None }; + // If the filter is dead then we won't be able to connect if dial_info_filter.is_dead() { return None; } - let direct_filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter); - // Get the best match dial info for node B if we have it + let direct_filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter); to_node.first_filtered_dial_info_detail(sort, direct_filter) } @@ -294,7 +287,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail { // Get the best match dial info for node B if we have it if let Some(target_did) = - first_filtered_dial_info_detail(node_a, node_b, &dial_info_filter, sequencing) + first_filtered_dial_info_detail_between_nodes(node_a, node_b, &dial_info_filter, sequencing) { // Do we need to signal before going inbound? if !target_did.class.requires_signal() { @@ -319,7 +312,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail { }; // Can node A reach the inbound relay directly? - if first_filtered_dial_info_detail( + if first_filtered_dial_info_detail_between_nodes( node_a, node_b_relay, &dial_info_filter, @@ -332,7 +325,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail { ///////// Reverse connection // Get the best match dial info for an reverse inbound connection from node B to node A - if let Some(reverse_did) = first_filtered_dial_info_detail( + if let Some(reverse_did) = first_filtered_dial_info_detail_between_nodes( node_b, node_a, &dial_info_filter, @@ -358,14 +351,14 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail { let udp_dial_info_filter = dial_info_filter .clone() .filtered(&DialInfoFilter::all().with_protocol_type(ProtocolType::UDP)); - if let Some(target_udp_did) = first_filtered_dial_info_detail( + if let Some(target_udp_did) = first_filtered_dial_info_detail_between_nodes( node_a, node_b, &udp_dial_info_filter, sequencing, ) { // Does node A have a direct udp dialinfo that node B can reach? - if let Some(reverse_udp_did) = first_filtered_dial_info_detail( + if let Some(reverse_udp_did) = first_filtered_dial_info_detail_between_nodes( node_b, node_a, &udp_dial_info_filter, @@ -407,7 +400,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail { }; // Can we reach the full relay? - if first_filtered_dial_info_detail( + if first_filtered_dial_info_detail_between_nodes( node_a, &node_b_relay, &dial_info_filter, diff --git a/veilid-core/src/routing_table/routing_table_inner.rs b/veilid-core/src/routing_table/routing_table_inner.rs index 4f5af94c..79297831 100644 --- a/veilid-core/src/routing_table/routing_table_inner.rs +++ b/veilid-core/src/routing_table/routing_table_inner.rs @@ -111,6 +111,9 @@ impl RoutingTableInner { routing_domain_set: RoutingDomainSet, filter: &DialInfoFilter, ) -> Option { + if filter.is_dead() || routing_domain_set.is_empty() { + return None; + } for routing_domain in routing_domain_set { let did = self.with_routing_domain(routing_domain, |rd| { for did in rd.common().dial_info_details() { @@ -133,6 +136,9 @@ impl RoutingTableInner { filter: &DialInfoFilter, ) -> Vec { let mut ret = Vec::new(); + if filter.is_dead() || routing_domain_set.is_empty() { + return ret; + } for routing_domain in routing_domain_set { self.with_routing_domain(routing_domain, |rd| { for did in rd.common().dial_info_details() { @@ -201,7 +207,10 @@ impl RoutingTableInner { true } - #[instrument(level = "trace", skip(self), ret)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self), ret) + )] pub fn get_contact_method( &self, routing_domain: RoutingDomain, diff --git a/veilid-core/src/routing_table/tasks/bootstrap.rs b/veilid-core/src/routing_table/tasks/bootstrap.rs index f811e4aa..a23b5688 100644 --- a/veilid-core/src/routing_table/tasks/bootstrap.rs +++ b/veilid-core/src/routing_table/tasks/bootstrap.rs @@ -377,11 +377,8 @@ impl RoutingTable { // Ensure we got the signed peer info if !nr.signed_node_info_has_valid_signature(RoutingDomain::PublicInternet) { - log_rtab!(warn - "bootstrap at {:?} did not return valid signed node info", - nr - ); - // If this node info is invalid, it will time out after being unpingable + log_rtab!(warn "bootstrap server is not responding"); + log_rtab!(debug "bootstrap server is not responding: {}", nr); } else { // otherwise this bootstrap is valid, lets ask it to find ourselves now routing_table.reverse_find_node(crypto_kind, nr, true).await diff --git a/veilid-core/src/routing_table/tasks/private_route_management.rs b/veilid-core/src/routing_table/tasks/private_route_management.rs index 3f44a529..a5fe37d8 100644 --- a/veilid-core/src/routing_table/tasks/private_route_management.rs +++ b/veilid-core/src/routing_table/tasks/private_route_management.rs @@ -198,12 +198,12 @@ impl RoutingTable { // Newly allocated routes let mut newly_allocated_routes = Vec::new(); for _n in 0..routes_to_allocate { - // Parameters here must be the default safety route spec + // Parameters here must be the most inclusive safety route spec // These will be used by test_remote_route as well if let Some(k) = rss.allocate_route( &VALID_CRYPTO_KINDS, Stability::default(), - Sequencing::default(), + Sequencing::EnsureOrdered, default_route_hop_count, DirectionSet::all(), &[], diff --git a/veilid-core/src/routing_table/tasks/relay_management.rs b/veilid-core/src/routing_table/tasks/relay_management.rs index 8d980c04..37d5ec5f 100644 --- a/veilid-core/src/routing_table/tasks/relay_management.rs +++ b/veilid-core/src/routing_table/tasks/relay_management.rs @@ -81,4 +81,97 @@ impl RoutingTable { Ok(()) } + + pub fn make_public_internet_relay_node_filter(&self) -> impl Fn(&BucketEntryInner) -> bool { + // Get all our outbound protocol/address types + let outbound_dif = self.get_outbound_dial_info_filter(RoutingDomain::PublicInternet); + let mapped_port_info = self.get_low_level_port_info(); + + move |e: &BucketEntryInner| { + // Ensure this node is not on the local network + if e.has_node_info(RoutingDomain::LocalNetwork.into()) { + return false; + } + + // Disqualify nodes that don't cover all our inbound ports for tcp and udp + // as we need to be able to use the relay for keepalives for all nat mappings + let mut low_level_protocol_ports = mapped_port_info.low_level_protocol_ports.clone(); + + let can_serve_as_relay = e + .node_info(RoutingDomain::PublicInternet) + .map(|n| { + let dids = n.all_filtered_dial_info_details(DialInfoDetail::NO_SORT, |did| { + did.matches_filter(&outbound_dif) + }); + for did in &dids { + let pt = did.dial_info.protocol_type(); + let at = did.dial_info.address_type(); + if let Some((llpt, port)) = mapped_port_info.protocol_to_port.get(&(pt, at)) + { + low_level_protocol_ports.remove(&(*llpt, at, *port)); + } + } + low_level_protocol_ports.is_empty() + }) + .unwrap_or(false); + if !can_serve_as_relay { + return false; + } + + true + } + } + + #[instrument(level = "trace", skip(self), ret)] + pub fn find_inbound_relay( + &self, + routing_domain: RoutingDomain, + cur_ts: Timestamp, + ) -> Option { + // Get relay filter function + let relay_node_filter = match routing_domain { + RoutingDomain::PublicInternet => self.make_public_internet_relay_node_filter(), + RoutingDomain::LocalNetwork => { + unimplemented!(); + } + }; + + // Go through all entries and find fastest entry that matches filter function + let inner = self.inner.read(); + let inner = &*inner; + let mut best_inbound_relay: Option> = None; + + // Iterate all known nodes for candidates + inner.with_entries(cur_ts, BucketEntryState::Unreliable, |rti, entry| { + let entry2 = entry.clone(); + entry.with(rti, |rti, e| { + // Ensure we have the node's status + if let Some(node_status) = e.node_status(routing_domain) { + // Ensure the node will relay + if node_status.will_relay() { + // Compare against previous candidate + if let Some(best_inbound_relay) = best_inbound_relay.as_mut() { + // Less is faster + let better = best_inbound_relay.with(rti, |_rti, best| { + // choose low latency stability for relays + BucketEntryInner::cmp_fastest_reliable(cur_ts, e, best) + == std::cmp::Ordering::Less + }); + // Now apply filter function and see if this node should be included + if better && relay_node_filter(e) { + *best_inbound_relay = entry2; + } + } else if relay_node_filter(e) { + // Always store the first candidate + best_inbound_relay = Some(entry2); + } + } + } + }); + // Don't end early, iterate through all entries + Option::<()>::None + }); + // Return the best inbound relay noderef + best_inbound_relay.map(|e| NodeRef::new(self.clone(), e, None)) + } } diff --git a/veilid-core/src/routing_table/types/dial_info_detail.rs b/veilid-core/src/routing_table/types/dial_info_detail.rs index 22adf233..52e51492 100644 --- a/veilid-core/src/routing_table/types/dial_info_detail.rs +++ b/veilid-core/src/routing_table/types/dial_info_detail.rs @@ -29,13 +29,11 @@ impl MatchesDialInfoFilter for DialInfoDetail { impl DialInfoDetail { pub fn ordered_sequencing_sort(a: &DialInfoDetail, b: &DialInfoDetail) -> core::cmp::Ordering { - if a.class < b.class { - return core::cmp::Ordering::Less; + let c = DialInfo::ordered_sequencing_sort(&a.dial_info, &b.dial_info); + if c != core::cmp::Ordering::Equal { + return c; } - if a.class > b.class { - return core::cmp::Ordering::Greater; - } - DialInfo::ordered_sequencing_sort(&a.dial_info, &b.dial_info) + a.class.cmp(&b.class) } pub const NO_SORT: std::option::Option< for<'r, 's> fn(&'r DialInfoDetail, &'s DialInfoDetail) -> std::cmp::Ordering, diff --git a/veilid-core/src/routing_table/types/signed_node_info.rs b/veilid-core/src/routing_table/types/signed_node_info.rs index 60e99c89..fe2ecbc1 100644 --- a/veilid-core/src/routing_table/types/signed_node_info.rs +++ b/veilid-core/src/routing_table/types/signed_node_info.rs @@ -71,7 +71,7 @@ impl SignedNodeInfo { match sequencing { Sequencing::NoPreference | Sequencing::PreferOrdered => return true, Sequencing::EnsureOrdered => { - if did.dial_info.protocol_type().is_connection_oriented() { + if did.dial_info.protocol_type().is_ordered() { return true; } } @@ -85,7 +85,7 @@ impl SignedNodeInfo { match sequencing { Sequencing::NoPreference | Sequencing::PreferOrdered => return true, Sequencing::EnsureOrdered => { - if did.dial_info.protocol_type().is_connection_oriented() { + if did.dial_info.protocol_type().is_ordered() { return true; } } diff --git a/veilid-core/src/rpc_processor/coders/operations/operation_route.rs b/veilid-core/src/rpc_processor/coders/operations/operation_route.rs index 7d72bd4c..38a7c3a1 100644 --- a/veilid-core/src/rpc_processor/coders/operations/operation_route.rs +++ b/veilid-core/src/rpc_processor/coders/operations/operation_route.rs @@ -1,6 +1,6 @@ use super::*; -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct RoutedOperation { sequencing: Sequencing, signatures: Vec, @@ -8,6 +8,17 @@ pub struct RoutedOperation { data: Vec, } +impl fmt::Debug for RoutedOperation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RoutedOperation") + .field("sequencing", &self.sequencing) + .field("signatures.len", &self.signatures.len()) + .field("nonce", &self.nonce) + .field("data(len)", &self.data.len()) + .finish() + } +} + impl RoutedOperation { pub fn new(sequencing: Sequencing, nonce: Nonce, data: Vec) -> Self { Self { diff --git a/veilid-core/src/rpc_processor/coders/operations/operation_value_changed.rs b/veilid-core/src/rpc_processor/coders/operations/operation_value_changed.rs index d7a815a2..b138d884 100644 --- a/veilid-core/src/rpc_processor/coders/operations/operation_value_changed.rs +++ b/veilid-core/src/rpc_processor/coders/operations/operation_value_changed.rs @@ -4,7 +4,7 @@ use crate::storage_manager::SignedValueData; #[derive(Debug, Clone)] pub struct RPCOperationValueChanged { key: TypedKey, - subkeys: Vec, + subkeys: ValueSubkeyRangeSet, count: u32, value: SignedValueData, } @@ -12,7 +12,7 @@ pub struct RPCOperationValueChanged { impl RPCOperationValueChanged { pub fn new( key: TypedKey, - subkeys: Vec, + subkeys: ValueSubkeyRangeSet, count: u32, value: SignedValueData, ) -> Self { @@ -32,7 +32,7 @@ impl RPCOperationValueChanged { pub fn key(&self) -> &TypedKey { &self.key } - pub fn subkeys(&self) -> &[ValueSubkeyRange] { + pub fn subkeys(&self) -> &ValueSubkeyRangeSet { &self.subkeys } pub fn count(&self) -> u32 { @@ -41,7 +41,7 @@ impl RPCOperationValueChanged { pub fn value(&self) -> &SignedValueData { &self.value } - pub fn destructure(self) -> (TypedKey, Vec, u32, SignedValueData) { + pub fn destructure(self) -> (TypedKey, ValueSubkeyRangeSet, u32, SignedValueData) { (self.key, self.subkeys, self.count, self.value) } @@ -52,25 +52,20 @@ impl RPCOperationValueChanged { let key = decode_typed_key(&k_reader)?; let sk_reader = reader.get_subkeys().map_err(RPCError::protocol)?; - let mut subkeys = Vec::::with_capacity( - sk_reader - .len() - .try_into() - .map_err(RPCError::map_protocol("too many subkey ranges"))?, - ); + let mut subkeys = ValueSubkeyRangeSet::new(); for skr in sk_reader.iter() { let vskr = (skr.get_start(), skr.get_end()); if vskr.0 > vskr.1 { return Err(RPCError::protocol("invalid subkey range")); } if let Some(lvskr) = subkeys.last() { - if lvskr.1 >= vskr.0 { + if lvskr >= vskr.0 { return Err(RPCError::protocol( "subkey range out of order or not merged", )); } } - subkeys.push(vskr); + subkeys.ranges_insert(vskr.0..=vskr.1); } let count = reader.get_count(); let v_reader = reader.get_value().map_err(RPCError::protocol)?; @@ -91,14 +86,14 @@ impl RPCOperationValueChanged { let mut sk_builder = builder.reborrow().init_subkeys( self.subkeys - .len() + .ranges_len() .try_into() .map_err(RPCError::map_internal("invalid subkey range list length"))?, ); - for (i, skr) in self.subkeys.iter().enumerate() { + for (i, skr) in self.subkeys.ranges().enumerate() { let mut skr_builder = sk_builder.reborrow().get(i as u32); - skr_builder.set_start(skr.0); - skr_builder.set_end(skr.1); + skr_builder.set_start(*skr.start()); + skr_builder.set_end(*skr.end()); } builder.set_count(self.count); diff --git a/veilid-core/src/rpc_processor/coders/operations/operation_watch_value.rs b/veilid-core/src/rpc_processor/coders/operations/operation_watch_value.rs index 9da63df7..d2aefa79 100644 --- a/veilid-core/src/rpc_processor/coders/operations/operation_watch_value.rs +++ b/veilid-core/src/rpc_processor/coders/operations/operation_watch_value.rs @@ -6,7 +6,7 @@ const MAX_WATCH_VALUE_A_PEERS_LEN: usize = 20; #[derive(Debug, Clone)] pub struct RPCOperationWatchValueQ { key: TypedKey, - subkeys: Vec, + subkeys: ValueSubkeyRangeSet, expiration: u64, count: u32, watcher: PublicKey, @@ -16,7 +16,7 @@ pub struct RPCOperationWatchValueQ { impl RPCOperationWatchValueQ { pub fn new( key: TypedKey, - subkeys: Vec, + subkeys: ValueSubkeyRangeSet, expiration: u64, count: u32, watcher: PublicKey, @@ -41,9 +41,9 @@ impl RPCOperationWatchValueQ { Vec::with_capacity(PUBLIC_KEY_LENGTH + 4 + (self.subkeys.len() * 8) + 8 + 4); sig_data.extend_from_slice(&self.key.kind.0); sig_data.extend_from_slice(&self.key.value.bytes); - for sk in &self.subkeys { - sig_data.extend_from_slice(&sk.0.to_le_bytes()); - sig_data.extend_from_slice(&sk.1.to_le_bytes()); + for sk in self.subkeys.ranges() { + sig_data.extend_from_slice(&sk.start().to_le_bytes()); + sig_data.extend_from_slice(&sk.end().to_le_bytes()); } sig_data.extend_from_slice(&self.expiration.to_le_bytes()); sig_data.extend_from_slice(&self.count.to_le_bytes()); @@ -66,7 +66,7 @@ impl RPCOperationWatchValueQ { pub fn key(&self) -> &TypedKey { &self.key } - pub fn subkeys(&self) -> &[ValueSubkeyRange] { + pub fn subkeys(&self) -> &ValueSubkeyRangeSet { &self.subkeys } pub fn expiration(&self) -> u64 { @@ -86,7 +86,7 @@ impl RPCOperationWatchValueQ { self, ) -> ( TypedKey, - Vec, + ValueSubkeyRangeSet, u64, u32, PublicKey, @@ -112,25 +112,20 @@ impl RPCOperationWatchValueQ { if sk_reader.len() as usize > MAX_WATCH_VALUE_Q_SUBKEYS_LEN { return Err(RPCError::protocol("WatchValueQ subkeys length too long")); } - let mut subkeys = Vec::::with_capacity( - sk_reader - .len() - .try_into() - .map_err(RPCError::map_protocol("too many subkey ranges"))?, - ); + let mut subkeys = ValueSubkeyRangeSet::new(); for skr in sk_reader.iter() { let vskr = (skr.get_start(), skr.get_end()); if vskr.0 > vskr.1 { return Err(RPCError::protocol("invalid subkey range")); } if let Some(lvskr) = subkeys.last() { - if lvskr.1 >= vskr.0 { + if lvskr >= vskr.0 { return Err(RPCError::protocol( "subkey range out of order or not merged", )); } } - subkeys.push(vskr); + subkeys.ranges_insert(vskr.0..=vskr.1); } let expiration = reader.get_expiration(); @@ -165,10 +160,10 @@ impl RPCOperationWatchValueQ { .try_into() .map_err(RPCError::map_internal("invalid subkey range list length"))?, ); - for (i, skr) in self.subkeys.iter().enumerate() { + for (i, skr) in self.subkeys.ranges().enumerate() { let mut skr_builder = sk_builder.reborrow().get(i as u32); - skr_builder.set_start(skr.0); - skr_builder.set_end(skr.1); + skr_builder.set_start(*skr.start()); + skr_builder.set_end(*skr.end()); } builder.set_expiration(self.expiration); builder.set_count(self.count); diff --git a/veilid-core/src/rpc_processor/destination.rs b/veilid-core/src/rpc_processor/destination.rs index 9ffe6995..9e82758a 100644 --- a/veilid-core/src/rpc_processor/destination.rs +++ b/veilid-core/src/rpc_processor/destination.rs @@ -44,7 +44,7 @@ impl Destination { } } pub fn relay(relay: NodeRef, target: NodeRef) -> Self { - let sequencing = relay.sequencing(); + let sequencing = relay.sequencing().max(target.sequencing()); Self::Relay { relay, target, diff --git a/veilid-core/src/rpc_processor/mod.rs b/veilid-core/src/rpc_processor/mod.rs index f9b41323..1e41497c 100644 --- a/veilid-core/src/rpc_processor/mod.rs +++ b/veilid-core/src/rpc_processor/mod.rs @@ -110,6 +110,28 @@ impl RPCMessageHeader { RPCMessageHeaderDetail::PrivateRouted(p) => p.direct.envelope.get_crypto_kind(), } } + // pub fn direct_peer_noderef(&self) -> NodeRef { + // match &self.detail { + // RPCMessageHeaderDetail::Direct(d) => d.peer_noderef.clone(), + // RPCMessageHeaderDetail::SafetyRouted(s) => s.direct.peer_noderef.clone(), + // RPCMessageHeaderDetail::PrivateRouted(p) => p.direct.peer_noderef.clone(), + // } + // } + pub fn direct_sender_node_id(&self) -> TypedKey { + match &self.detail { + RPCMessageHeaderDetail::Direct(d) => { + TypedKey::new(d.envelope.get_crypto_kind(), d.envelope.get_sender_id()) + } + RPCMessageHeaderDetail::SafetyRouted(s) => TypedKey::new( + s.direct.envelope.get_crypto_kind(), + s.direct.envelope.get_sender_id(), + ), + RPCMessageHeaderDetail::PrivateRouted(p) => TypedKey::new( + p.direct.envelope.get_crypto_kind(), + p.direct.envelope.get_sender_id(), + ), + } + } } #[derive(Debug)] @@ -182,7 +204,7 @@ impl Answer { } } -/// An operation that has been fully prepared for envelope r +/// An operation that has been fully prepared for envelope struct RenderedOperation { /// The rendered operation bytes message: Vec, @@ -200,6 +222,20 @@ struct RenderedOperation { reply_private_route: Option, } +impl fmt::Debug for RenderedOperation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RenderedOperation") + .field("message(len)", &self.message.len()) + .field("destination_node_ref", &self.destination_node_ref) + .field("node_ref", &self.node_ref) + .field("hop_count", &self.hop_count) + .field("safety_route", &self.safety_route) + .field("remote_private_route", &self.remote_private_route) + .field("reply_private_route", &self.reply_private_route) + .finish() + } +} + /// Node information exchanged during every RPC message #[derive(Default, Debug, Clone)] pub struct SenderPeerInfo { @@ -431,7 +467,7 @@ impl RPCProcessor { .await { Ok(v) => { - let v = network_result_value_or_log!(v => { + let v = network_result_value_or_log!(v => [ format!(": node_id={} count={} fanout={} fanout={} safety_selection={:?}", node_id, count, fanout, timeout_us, safety_selection) ] { // Any other failures, just try the next node return Ok(None); }); @@ -525,10 +561,14 @@ impl RPCProcessor { }) } - #[instrument(level = "trace", skip(self, waitable_reply), err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, waitable_reply), err) + )] async fn wait_for_reply( &self, waitable_reply: WaitableReply, + debug_string: String, ) -> Result, RPCError> { let out = self .unlocked_inner @@ -536,7 +576,18 @@ impl RPCProcessor { .wait_for_op(waitable_reply.handle, waitable_reply.timeout_us) .await; match &out { - Err(_) | Ok(TimeoutOr::Timeout) => { + Err(e) => { + log_rpc!(debug "RPC Lost ({}): {}", debug_string, e); + self.record_question_lost( + waitable_reply.send_ts, + waitable_reply.node_ref.clone(), + waitable_reply.safety_route, + waitable_reply.remote_private_route, + waitable_reply.reply_private_route, + ); + } + Ok(TimeoutOr::Timeout) => { + log_rpc!(debug "RPC Lost ({}): Timeout", debug_string); self.record_question_lost( waitable_reply.send_ts, waitable_reply.node_ref.clone(), @@ -648,7 +699,10 @@ impl RPCProcessor { /// Produce a byte buffer that represents the wire encoding of the entire /// unencrypted envelope body for a RPC message. This incorporates /// wrapping a private and/or safety route if they are specified. - #[instrument(level = "debug", skip(self, operation), err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "debug", skip(self, operation), err) + )] fn render_operation( &self, dest: Destination, @@ -708,6 +762,10 @@ impl RPCProcessor { if sequencing > node_ref.sequencing() { node_ref.set_sequencing(sequencing) } + let mut destination_node_ref = destination_node_ref.clone(); + if sequencing > destination_node_ref.sequencing() { + destination_node_ref.set_sequencing(sequencing) + } // Reply private route should be None here, even for questions assert!(reply_private_route.is_none()); @@ -776,7 +834,10 @@ impl RPCProcessor { /// routing table caching when it is okay to do so /// Also check target's timestamp of our own node info, to see if we should send that /// And send our timestamp of the target's node info so they can determine if they should update us on their next rpc - #[instrument(level = "trace", skip(self), ret)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self), ret) + )] fn get_sender_peer_info(&self, dest: &Destination) -> SenderPeerInfo { // Don't do this if the sender is to remain private // Otherwise we would be attaching the original sender's identity to the final destination, @@ -1077,7 +1138,10 @@ impl RPCProcessor { /// Issue a question over the network, possibly using an anonymized route /// Optionally keeps a context to be passed to the answer processor when an answer is received - #[instrument(level = "debug", skip(self, question), err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "debug", skip(self, question), err) + )] async fn question( &self, dest: Destination, @@ -1092,7 +1156,8 @@ impl RPCProcessor { let op_id = operation.op_id(); // Log rpc send - trace!(target: "rpc_message", dir = "send", kind = "question", op_id = op_id.as_u64(), desc = operation.kind().desc(), ?dest); + #[cfg(feature = "verbose-tracing")] + debug!(target: "rpc_message", dir = "send", kind = "question", op_id = op_id.as_u64(), desc = operation.kind().desc(), ?dest); // Produce rendered operation let RenderedOperation { @@ -1118,17 +1183,31 @@ impl RPCProcessor { // Send question let bytes: ByteCount = (message.len() as u64).into(); let send_ts = get_aligned_timestamp(); - let send_data_kind = network_result_try!(self + #[allow(unused_variables)] + let message_len = message.len(); + let res = self .network_manager() - .send_envelope(node_ref.clone(), Some(destination_node_ref), message) + .send_envelope( + node_ref.clone(), + Some(destination_node_ref.clone()), + message, + ) .await .map_err(|e| { // If we're returning an error, clean up - self.record_send_failure(RPCKind::Question, send_ts, node_ref.clone(), safety_route, remote_private_route); + self.record_send_failure( + RPCKind::Question, + send_ts, + node_ref.clone(), + safety_route, + remote_private_route, + ); RPCError::network(e) - })? => { + })?; + let send_data_kind = network_result_value_or_log!( res => [ format!(": node_ref={}, destination_node_ref={}, message.len={}", node_ref, destination_node_ref, message_len) ] { // If we couldn't send we're still cleaning up self.record_send_failure(RPCKind::Question, send_ts, node_ref.clone(), safety_route, remote_private_route); + network_result_raise!(res); } ); @@ -1156,7 +1235,10 @@ impl RPCProcessor { } /// Issue a statement over the network, possibly using an anonymized route - #[instrument(level = "debug", skip(self, statement), err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "debug", skip(self, statement), err) + )] async fn statement( &self, dest: Destination, @@ -1169,7 +1251,8 @@ impl RPCProcessor { let operation = RPCOperation::new_statement(statement, spi); // Log rpc send - trace!(target: "rpc_message", dir = "send", kind = "statement", op_id = operation.op_id().as_u64(), desc = operation.kind().desc(), ?dest); + #[cfg(feature = "verbose-tracing")] + debug!(target: "rpc_message", dir = "send", kind = "statement", op_id = operation.op_id().as_u64(), desc = operation.kind().desc(), ?dest); // Produce rendered operation let RenderedOperation { @@ -1185,17 +1268,31 @@ impl RPCProcessor { // Send statement let bytes: ByteCount = (message.len() as u64).into(); let send_ts = get_aligned_timestamp(); - let _send_data_kind = network_result_try!(self + #[allow(unused_variables)] + let message_len = message.len(); + let res = self .network_manager() - .send_envelope(node_ref.clone(), Some(destination_node_ref), message) + .send_envelope( + node_ref.clone(), + Some(destination_node_ref.clone()), + message, + ) .await .map_err(|e| { // If we're returning an error, clean up - self.record_send_failure(RPCKind::Statement, send_ts, node_ref.clone(), safety_route, remote_private_route); + self.record_send_failure( + RPCKind::Statement, + send_ts, + node_ref.clone(), + safety_route, + remote_private_route, + ); RPCError::network(e) - })? => { + })?; + let _send_data_kind = network_result_value_or_log!( res => [ format!(": node_ref={}, destination_node_ref={}, message.len={}", node_ref, destination_node_ref, message_len) ] { // If we couldn't send we're still cleaning up self.record_send_failure(RPCKind::Statement, send_ts, node_ref.clone(), safety_route, remote_private_route); + network_result_raise!(res); } ); @@ -1213,7 +1310,10 @@ impl RPCProcessor { } /// Issue a reply over the network, possibly using an anonymized route /// The request must want a response, or this routine fails - #[instrument(level = "debug", skip(self, request, answer), err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "debug", skip(self, request, answer), err) + )] async fn answer( &self, request: RPCMessage, @@ -1229,7 +1329,8 @@ impl RPCProcessor { let operation = RPCOperation::new_answer(&request.operation, answer, spi); // Log rpc send - trace!(target: "rpc_message", dir = "send", kind = "answer", op_id = operation.op_id().as_u64(), desc = operation.kind().desc(), ?dest); + #[cfg(feature = "verbose-tracing")] + debug!(target: "rpc_message", dir = "send", kind = "answer", op_id = operation.op_id().as_u64(), desc = operation.kind().desc(), ?dest); // Produce rendered operation let RenderedOperation { @@ -1245,16 +1346,31 @@ impl RPCProcessor { // Send the reply let bytes: ByteCount = (message.len() as u64).into(); let send_ts = get_aligned_timestamp(); - network_result_try!(self.network_manager() - .send_envelope(node_ref.clone(), Some(destination_node_ref), message) + #[allow(unused_variables)] + let message_len = message.len(); + let res = self + .network_manager() + .send_envelope( + node_ref.clone(), + Some(destination_node_ref.clone()), + message, + ) .await .map_err(|e| { // If we're returning an error, clean up - self.record_send_failure(RPCKind::Answer, send_ts, node_ref.clone(), safety_route, remote_private_route); + self.record_send_failure( + RPCKind::Answer, + send_ts, + node_ref.clone(), + safety_route, + remote_private_route, + ); RPCError::network(e) - })? => { + })?; + let _send_data_kind = network_result_value_or_log!( res => [ format!(": node_ref={}, destination_node_ref={}, message.len={}", node_ref, destination_node_ref, message_len) ] { // If we couldn't send we're still cleaning up self.record_send_failure(RPCKind::Answer, send_ts, node_ref.clone(), safety_route, remote_private_route); + network_result_raise!(res); } ); @@ -1321,7 +1437,10 @@ impl RPCProcessor { } ////////////////////////////////////////////////////////////////////// - #[instrument(level = "trace", skip(self, encoded_msg), err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, encoded_msg), err) + )] async fn process_rpc_message( &self, encoded_msg: RPCMessageEncoded, @@ -1474,7 +1593,8 @@ impl RPCProcessor { let rpc_worker_span = span!(parent: None, Level::TRACE, "rpc_worker recv"); // xxx: causes crash (Missing otel data span extensions) // rpc_worker_span.follows_from(span_id); - let res = match self + + network_result_value_or_log!(match self .process_rpc_message(msg) .instrument(rpc_worker_span) .await @@ -1485,13 +1605,14 @@ impl RPCProcessor { } Ok(v) => v, - }; - - network_result_value_or_log!(res => {}); + } => [ format!(": msg.header={:?}", msg.header) ] {}); } } - #[instrument(level = "trace", skip(self, body), err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, body), err) + )] pub fn enqueue_direct_message( &self, envelope: Envelope, @@ -1500,19 +1621,22 @@ impl RPCProcessor { routing_domain: RoutingDomain, body: Vec, ) -> EyreResult<()> { + let header = RPCMessageHeader { + detail: RPCMessageHeaderDetail::Direct(RPCMessageHeaderDetailDirect { + envelope, + peer_noderef, + connection_descriptor, + routing_domain, + }), + timestamp: get_aligned_timestamp(), + body_len: ByteCount::new(body.len() as u64), + }; + let msg = RPCMessageEncoded { - header: RPCMessageHeader { - detail: RPCMessageHeaderDetail::Direct(RPCMessageHeaderDetailDirect { - envelope, - peer_noderef, - connection_descriptor, - routing_domain, - }), - timestamp: get_aligned_timestamp(), - body_len: ByteCount::new(body.len() as u64), - }, + header, data: RPCMessageData { contents: body }, }; + let send_channel = { let inner = self.inner.lock(); inner.send_channel.as_ref().unwrap().clone() @@ -1524,7 +1648,10 @@ impl RPCProcessor { Ok(()) } - #[instrument(level = "trace", skip(self, body), err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, body), err) + )] fn enqueue_safety_routed_message( &self, direct: RPCMessageHeaderDetailDirect, @@ -1532,16 +1659,18 @@ impl RPCProcessor { sequencing: Sequencing, body: Vec, ) -> EyreResult<()> { + let header = RPCMessageHeader { + detail: RPCMessageHeaderDetail::SafetyRouted(RPCMessageHeaderDetailSafetyRouted { + direct, + remote_safety_route, + sequencing, + }), + timestamp: get_aligned_timestamp(), + body_len: (body.len() as u64).into(), + }; + let msg = RPCMessageEncoded { - header: RPCMessageHeader { - detail: RPCMessageHeaderDetail::SafetyRouted(RPCMessageHeaderDetailSafetyRouted { - direct, - remote_safety_route, - sequencing, - }), - timestamp: get_aligned_timestamp(), - body_len: (body.len() as u64).into(), - }, + header, data: RPCMessageData { contents: body }, }; let send_channel = { @@ -1555,7 +1684,10 @@ impl RPCProcessor { Ok(()) } - #[instrument(level = "trace", skip(self, body), err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, body), err) + )] fn enqueue_private_routed_message( &self, direct: RPCMessageHeaderDetailDirect, @@ -1564,21 +1696,22 @@ impl RPCProcessor { safety_spec: SafetySpec, body: Vec, ) -> EyreResult<()> { + let header = RPCMessageHeader { + detail: RPCMessageHeaderDetail::PrivateRouted(RPCMessageHeaderDetailPrivateRouted { + direct, + remote_safety_route, + private_route, + safety_spec, + }), + timestamp: get_aligned_timestamp(), + body_len: (body.len() as u64).into(), + }; + let msg = RPCMessageEncoded { - header: RPCMessageHeader { - detail: RPCMessageHeaderDetail::PrivateRouted( - RPCMessageHeaderDetailPrivateRouted { - direct, - remote_safety_route, - private_route, - safety_spec, - }, - ), - timestamp: get_aligned_timestamp(), - body_len: (body.len() as u64).into(), - }, + header, data: RPCMessageData { contents: body }, }; + let send_channel = { let inner = self.inner.lock(); inner.send_channel.as_ref().unwrap().clone() diff --git a/veilid-core/src/rpc_processor/operation_waiter.rs b/veilid-core/src/rpc_processor/operation_waiter.rs index 2f39e200..3588e10e 100644 --- a/veilid-core/src/rpc_processor/operation_waiter.rs +++ b/veilid-core/src/rpc_processor/operation_waiter.rs @@ -114,7 +114,10 @@ where } /// Complete the app call - #[instrument(level = "trace", skip(self, message), err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, message), err) + )] pub async fn complete_op_waiter(&self, op_id: OperationId, message: T) -> Result<(), RPCError> { let waiting_op = { let mut inner = self.inner.lock(); @@ -152,7 +155,7 @@ where .into_timeout_or(); Ok(res .on_timeout(|| { - log_rpc!(debug "op wait timed out: {}", handle.op_id); + // log_rpc!(debug "op wait timed out: {}", handle.op_id); // debug_print_backtrace(); self.cancel_op_waiter(handle.op_id); }) diff --git a/veilid-core/src/rpc_processor/rpc_app_call.rs b/veilid-core/src/rpc_processor/rpc_app_call.rs index f3de4828..cf0318de 100644 --- a/veilid-core/src/rpc_processor/rpc_app_call.rs +++ b/veilid-core/src/rpc_processor/rpc_app_call.rs @@ -3,12 +3,17 @@ use super::*; impl RPCProcessor { // Sends a high level app request and wait for response // Can be sent via all methods including relays and routes - #[instrument(level = "trace", skip(self), ret, err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, message), fields(message.len = message.len(), ret.latency, ret.len), err) + )] pub async fn rpc_call_app_call( self, dest: Destination, message: Vec, ) -> Result>>, RPCError> { + let debug_string = format!("AppCall(message(len)={}) => {}", message.len(), dest); + let app_call_q = RPCOperationAppCallQ::new(message)?; let question = RPCQuestion::new( network_result_try!(self.get_destination_respond_to(&dest)?), @@ -19,7 +24,7 @@ impl RPCProcessor { let waitable_reply = network_result_try!(self.question(dest, question, None).await?); // Wait for reply - let (msg, latency) = match self.wait_for_reply(waitable_reply).await? { + let (msg, latency) = match self.wait_for_reply(waitable_reply, debug_string).await? { TimeoutOr::Timeout => return Ok(NetworkResult::Timeout), TimeoutOr::Value(v) => v, }; @@ -36,10 +41,14 @@ impl RPCProcessor { let a_message = app_call_a.destructure(); + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("ret.latency", latency.as_u64()); + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("ret.len", a_message.len()); Ok(NetworkResult::value(Answer::new(latency, a_message))) } - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_app_call_q( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_app_message.rs b/veilid-core/src/rpc_processor/rpc_app_message.rs index 726515a4..ba49c701 100644 --- a/veilid-core/src/rpc_processor/rpc_app_message.rs +++ b/veilid-core/src/rpc_processor/rpc_app_message.rs @@ -3,7 +3,10 @@ use super::*; impl RPCProcessor { // Sends a high level app message // Can be sent via all methods including relays and routes - #[instrument(level = "trace", skip(self), ret, err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, message), fields(message.len = message.len()), err) + )] pub async fn rpc_call_app_message( self, dest: Destination, @@ -16,7 +19,7 @@ impl RPCProcessor { self.statement(dest, statement).await } - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_app_message( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_cancel_tunnel.rs b/veilid-core/src/rpc_processor/rpc_cancel_tunnel.rs index 36cd9edf..4698234a 100644 --- a/veilid-core/src/rpc_processor/rpc_cancel_tunnel.rs +++ b/veilid-core/src/rpc_processor/rpc_cancel_tunnel.rs @@ -1,7 +1,7 @@ use super::*; impl RPCProcessor { - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_cancel_tunnel_q( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_complete_tunnel.rs b/veilid-core/src/rpc_processor/rpc_complete_tunnel.rs index 5143f143..307e659f 100644 --- a/veilid-core/src/rpc_processor/rpc_complete_tunnel.rs +++ b/veilid-core/src/rpc_processor/rpc_complete_tunnel.rs @@ -1,7 +1,7 @@ use super::*; impl RPCProcessor { - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_complete_tunnel_q( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_find_block.rs b/veilid-core/src/rpc_processor/rpc_find_block.rs index f4fe9057..3bfe81ea 100644 --- a/veilid-core/src/rpc_processor/rpc_find_block.rs +++ b/veilid-core/src/rpc_processor/rpc_find_block.rs @@ -1,7 +1,7 @@ use super::*; impl RPCProcessor { - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_find_block_q( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_find_node.rs b/veilid-core/src/rpc_processor/rpc_find_node.rs index 8b7ded6b..afd05e18 100644 --- a/veilid-core/src/rpc_processor/rpc_find_node.rs +++ b/veilid-core/src/rpc_processor/rpc_find_node.rs @@ -7,7 +7,10 @@ impl RPCProcessor { /// Because this leaks information about the identity of the node itself, /// replying to this request received over a private route will leak /// the identity of the node and defeat the private route. - #[instrument(level = "trace", skip(self), ret, err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self), err) + )] pub async fn rpc_call_find_node( self, dest: Destination, @@ -32,11 +35,13 @@ impl RPCProcessor { find_node_q_detail, ); + let debug_string = format!("FindNode(node_id={}) => {}", node_id, dest); + // Send the find_node request let waitable_reply = network_result_try!(self.question(dest, find_node_q, None).await?); // Wait for reply - let (msg, latency) = match self.wait_for_reply(waitable_reply).await? { + let (msg, latency) = match self.wait_for_reply(waitable_reply, debug_string).await? { TimeoutOr::Timeout => return Ok(NetworkResult::Timeout), TimeoutOr::Value(v) => v, }; @@ -65,7 +70,7 @@ impl RPCProcessor { Ok(NetworkResult::value(Answer::new(latency, peers))) } - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_find_node_q( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_get_value.rs b/veilid-core/src/rpc_processor/rpc_get_value.rs index cd82f067..324279f3 100644 --- a/veilid-core/src/rpc_processor/rpc_get_value.rs +++ b/veilid-core/src/rpc_processor/rpc_get_value.rs @@ -15,7 +15,17 @@ impl RPCProcessor { /// Because this leaks information about the identity of the node itself, /// replying to this request received over a private route will leak /// the identity of the node and defeat the private route. - #[instrument(level = "trace", skip(self), ret, err)] + + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, last_descriptor), + fields(ret.value.data.len, + ret.value.data.seq, + ret.value.data.writer, + ret.peers.len, + ret.latency + ),err) + )] pub async fn rpc_call_get_value( self, dest: Destination, @@ -39,6 +49,18 @@ impl RPCProcessor { return Err(RPCError::internal("No node id for crypto kind")); }; + let debug_string = format!( + "OUT ==> GetValueQ({} #{}{}) => {}", + key, + subkey, + if last_descriptor.is_some() { + " +lastdesc" + } else { + "" + }, + dest + ); + // Send the getvalue question let get_value_q = RPCOperationGetValueQ::new(key, subkey, last_descriptor.is_none()); let question = RPCQuestion::new( @@ -52,13 +74,15 @@ impl RPCProcessor { vcrypto: vcrypto.clone(), }); + log_rpc!(debug "{}", debug_string); + let waitable_reply = network_result_try!( self.question(dest, question, Some(question_context)) .await? ); // Wait for reply - let (msg, latency) = match self.wait_for_reply(waitable_reply).await? { + let (msg, latency) = match self.wait_for_reply(waitable_reply, debug_string).await? { TimeoutOr::Timeout => return Ok(NetworkResult::Timeout), TimeoutOr::Value(v) => v, }; @@ -75,6 +99,28 @@ impl RPCProcessor { let (value, peers, descriptor) = get_value_a.destructure(); + let debug_string_value = value.as_ref().map(|v| { + format!(" len={} writer={}", + v.value_data().data().len(), + v.value_data().writer(), + ) + }).unwrap_or_default(); + + let debug_string_answer = format!( + "OUT <== GetValueA({} #{}{}{} peers={})", + key, + subkey, + debug_string_value, + if descriptor.is_some() { + " +desc" + } else { + "" + }, + peers.len(), + ); + + log_rpc!(debug "{}", debug_string_answer); + // Validate peers returned are, in fact, closer to the key than the node we sent this to let valid = match RoutingTable::verify_peers_closer(vcrypto, target_node_id, key, &peers) { Ok(v) => v, @@ -89,6 +135,17 @@ impl RPCProcessor { return Ok(NetworkResult::invalid_message("non-closer peers returned")); } + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("ret.latency", latency.as_u64()); + #[cfg(feature = "verbose-tracing")] + if let Some(value) = &value { + tracing::Span::current().record("ret.value.data.len", value.value_data().data().len()); + tracing::Span::current().record("ret.value.data.seq", value.value_data().seq()); + tracing::Span::current().record("ret.value.data.writer", value.value_data().writer().to_string()); + } + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("ret.peers.len", peers.len()); + Ok(NetworkResult::value(Answer::new( latency, GetValueAnswer { @@ -99,7 +156,7 @@ impl RPCProcessor { ))) } - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_get_value_q( &self, msg: RPCMessage, @@ -131,13 +188,50 @@ impl RPCProcessor { let routing_table = self.routing_table(); let closer_to_key_peers = network_result_try!(routing_table.find_peers_closer_to_key(key)); + let debug_string = format!( + "IN <=== GetValueQ({} #{}{}) <== {}", + key, + subkey, + if want_descriptor { + " +wantdesc" + } else { + "" + }, + msg.header.direct_sender_node_id() + ); + + log_rpc!(debug "{}", debug_string); + // See if we have this record ourselves let storage_manager = self.storage_manager(); let subkey_result = network_result_try!(storage_manager .inbound_get_value(key, subkey, want_descriptor) .await .map_err(RPCError::internal)?); + + let debug_string_value = subkey_result.value.as_ref().map(|v| { + format!(" len={} writer={}", + v.value_data().data().len(), + v.value_data().writer(), + ) + }).unwrap_or_default(); + let debug_string_answer = format!( + "IN ===> GetValueA({} #{}{}{} peers={}) ==> {}", + key, + subkey, + debug_string_value, + if subkey_result.descriptor.is_some() { + " +desc" + } else { + "" + }, + closer_to_key_peers.len(), + msg.header.direct_sender_node_id() + ); + + log_rpc!(debug "{}", debug_string_answer); + // Make GetValue answer let get_value_a = RPCOperationGetValueA::new( subkey_result.value, diff --git a/veilid-core/src/rpc_processor/rpc_return_receipt.rs b/veilid-core/src/rpc_processor/rpc_return_receipt.rs index 383dcc6d..66397c72 100644 --- a/veilid-core/src/rpc_processor/rpc_return_receipt.rs +++ b/veilid-core/src/rpc_processor/rpc_return_receipt.rs @@ -3,7 +3,10 @@ use super::*; impl RPCProcessor { // Sends a unidirectional in-band return receipt // Can be sent via all methods including relays and routes - #[instrument(level = "trace", skip(self, receipt), ret, err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, receipt), ret, err) + )] pub async fn rpc_call_return_receipt>( self, dest: Destination, @@ -20,7 +23,7 @@ impl RPCProcessor { Ok(NetworkResult::value(())) } - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_return_receipt( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_route.rs b/veilid-core/src/rpc_processor/rpc_route.rs index b397f26c..7e45bccd 100644 --- a/veilid-core/src/rpc_processor/rpc_route.rs +++ b/veilid-core/src/rpc_processor/rpc_route.rs @@ -1,7 +1,7 @@ use super::*; impl RPCProcessor { - #[instrument(level = "trace", skip_all, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))] async fn process_route_safety_route_hop( &self, routed_operation: RoutedOperation, @@ -52,7 +52,7 @@ impl RPCProcessor { .await } - #[instrument(level = "trace", skip_all, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))] async fn process_route_private_route_hop( &self, routed_operation: RoutedOperation, @@ -99,7 +99,7 @@ impl RPCProcessor { /// Note: it is important that we never respond with a safety route to questions that come /// in without a private route. Giving away a safety route when the node id is known is /// a privacy violation! - #[instrument(level = "trace", skip_all, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))] fn process_safety_routed_operation( &self, detail: RPCMessageHeaderDetailDirect, @@ -141,7 +141,7 @@ impl RPCProcessor { } /// Process a routed operation that came in over both a safety route and a private route - #[instrument(level = "trace", skip_all, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))] fn process_private_routed_operation( &self, detail: RPCMessageHeaderDetailDirect, @@ -209,7 +209,7 @@ impl RPCProcessor { Ok(NetworkResult::value(())) } - #[instrument(level = "trace", skip_all, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))] fn process_routed_operation( &self, detail: RPCMessageHeaderDetailDirect, @@ -239,7 +239,7 @@ impl RPCProcessor { ) } } - #[instrument(level = "trace", skip_all, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip_all, err))] pub(crate) async fn process_private_route_first_hop( &self, mut routed_operation: RoutedOperation, @@ -360,7 +360,7 @@ impl RPCProcessor { Ok(NetworkResult::value(route_hop)) } - #[instrument(level = "trace", skip(self, msg), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self), ret, err))] pub(crate) async fn process_route( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_set_value.rs b/veilid-core/src/rpc_processor/rpc_set_value.rs index e36852af..9d29a432 100644 --- a/veilid-core/src/rpc_processor/rpc_set_value.rs +++ b/veilid-core/src/rpc_processor/rpc_set_value.rs @@ -14,6 +14,20 @@ impl RPCProcessor { /// Because this leaks information about the identity of the node itself, /// replying to this request received over a private route will leak /// the identity of the node and defeat the private route. + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self, value, descriptor), + fields(value.data.len = value.value_data().data().len(), + value.data.seq = value.value_data().seq(), + value.data.writer = value.value_data().writer().to_string(), + ret.set, + ret.value.data.len, + ret.value.data.seq, + ret.value.data.writer, + ret.peers.len, + ret.latency + ), err) + )] pub async fn rpc_call_set_value( self, dest: Destination, @@ -39,6 +53,20 @@ impl RPCProcessor { return Err(RPCError::internal("No node id for crypto kind")); }; + let debug_string = format!( + "OUT ==> SetValueQ({} #{} len={} writer={}{}) => {}", + key, + subkey, + value.value_data().data().len(), + value.value_data().writer(), + if send_descriptor { + " +senddesc" + } else { + "" + }, + dest + ); + // Send the setvalue question let set_value_q = RPCOperationSetValueQ::new( key, @@ -59,13 +87,17 @@ impl RPCProcessor { subkey, vcrypto: vcrypto.clone(), }); + + log_rpc!(debug "{}", debug_string); + let waitable_reply = network_result_try!( self.question(dest, question, Some(question_context)) .await? ); + // Wait for reply - let (msg, latency) = match self.wait_for_reply(waitable_reply).await? { + let (msg, latency) = match self.wait_for_reply(waitable_reply, debug_string).await? { TimeoutOr::Timeout => return Ok(NetworkResult::Timeout), TimeoutOr::Value(v) => v, }; @@ -81,6 +113,28 @@ impl RPCProcessor { }; let (set, value, peers) = set_value_a.destructure(); + + let debug_string_value = value.as_ref().map(|v| { + format!(" len={} writer={}", + v.value_data().data().len(), + v.value_data().writer(), + ) + }).unwrap_or_default(); + + let debug_string_answer = format!( + "OUT <== SetValueA({} #{}{}{} peers={})", + key, + subkey, + if set { + " +set" + } else { + "" + }, + debug_string_value, + peers.len(), + ); + + log_rpc!(debug "{}", debug_string_answer); // Validate peers returned are, in fact, closer to the key than the node we sent this to let valid = match RoutingTable::verify_peers_closer(vcrypto, target_node_id, key, &peers) { @@ -96,13 +150,26 @@ impl RPCProcessor { return Ok(NetworkResult::invalid_message("non-closer peers returned")); } + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("ret.latency", latency.as_u64()); + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("ret.set", set); + #[cfg(feature = "verbose-tracing")] + if let Some(value) = &value { + tracing::Span::current().record("ret.value.data.len", value.value_data().data().len()); + tracing::Span::current().record("ret.value.data.seq", value.value_data().seq()); + tracing::Span::current().record("ret.value.data.writer", value.value_data().writer().to_string()); + } + #[cfg(feature = "verbose-tracing")] + tracing::Span::current().record("ret.peers.len", peers.len()); + Ok(NetworkResult::value(Answer::new( latency, SetValueAnswer { set, value, peers }, ))) } - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_set_value_q( &self, msg: RPCMessage, @@ -134,6 +201,22 @@ impl RPCProcessor { let routing_table = self.routing_table(); let closer_to_key_peers = network_result_try!(routing_table.find_peers_closer_to_key(key)); + let debug_string = format!( + "IN <=== SetValueQ({} #{} len={} writer={}{}) <== {}", + key, + subkey, + value.value_data().data().len(), + value.value_data().writer(), + if descriptor.is_some() { + " +desc" + } else { + "" + }, + msg.header.direct_sender_node_id() + ); + + log_rpc!(debug "{}", debug_string); + // If there are less than 'set_value_count' peers that are closer, then store here too let set_value_count = { let c = self.config.get(); @@ -155,6 +238,29 @@ impl RPCProcessor { (true, new_value) }; + let debug_string_value = new_value.as_ref().map(|v| { + format!(" len={} writer={}", + v.value_data().data().len(), + v.value_data().writer(), + ) + }).unwrap_or_default(); + + let debug_string_answer = format!( + "IN ===> SetValueA({} #{}{}{} peers={}) ==> {}", + key, + subkey, + if set { + " +set" + } else { + "" + }, + debug_string_value, + closer_to_key_peers.len(), + msg.header.direct_sender_node_id() + ); + + log_rpc!(debug "{}", debug_string_answer); + // Make SetValue answer let set_value_a = RPCOperationSetValueA::new(set, new_value, closer_to_key_peers)?; diff --git a/veilid-core/src/rpc_processor/rpc_signal.rs b/veilid-core/src/rpc_processor/rpc_signal.rs index 57f81513..224a9bac 100644 --- a/veilid-core/src/rpc_processor/rpc_signal.rs +++ b/veilid-core/src/rpc_processor/rpc_signal.rs @@ -3,7 +3,10 @@ use super::*; impl RPCProcessor { // Sends a unidirectional signal to a node // Can be sent via relays but not routes. For routed 'signal' like capabilities, use AppMessage. - #[instrument(level = "trace", skip(self), ret, err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self), ret, err) + )] pub async fn rpc_call_signal( self, dest: Destination, @@ -29,7 +32,7 @@ impl RPCProcessor { self.statement(dest, statement).await } - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_signal( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_start_tunnel.rs b/veilid-core/src/rpc_processor/rpc_start_tunnel.rs index 881f9f51..ce9bce42 100644 --- a/veilid-core/src/rpc_processor/rpc_start_tunnel.rs +++ b/veilid-core/src/rpc_processor/rpc_start_tunnel.rs @@ -1,7 +1,7 @@ use super::*; impl RPCProcessor { - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_start_tunnel_q( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_status.rs b/veilid-core/src/rpc_processor/rpc_status.rs index 323789b3..22bb3817 100644 --- a/veilid-core/src/rpc_processor/rpc_status.rs +++ b/veilid-core/src/rpc_processor/rpc_status.rs @@ -15,7 +15,10 @@ impl RPCProcessor { // direct -> node status + sender info // safety -> node status // private -> nothing - #[instrument(level = "trace", skip(self), ret, err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self), ret, err) + )] pub async fn rpc_call_status( self, dest: Destination, @@ -101,6 +104,8 @@ impl RPCProcessor { RPCQuestionDetail::StatusQ(status_q), ); + let debug_string = format!("Status => {}", dest); + // Send the info request let waitable_reply = network_result_try!(self.question(dest.clone(), question, None).await?); @@ -109,7 +114,7 @@ impl RPCProcessor { let send_data_kind = waitable_reply.send_data_kind; // Wait for reply - let (msg, latency) = match self.wait_for_reply(waitable_reply).await? { + let (msg, latency) = match self.wait_for_reply(waitable_reply, debug_string).await? { TimeoutOr::Timeout => return Ok(NetworkResult::Timeout), TimeoutOr::Value(v) => v, }; @@ -208,7 +213,7 @@ impl RPCProcessor { Ok(NetworkResult::value(Answer::new(latency, opt_sender_info))) } - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_status_q( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_supply_block.rs b/veilid-core/src/rpc_processor/rpc_supply_block.rs index 234fb71b..5b27a512 100644 --- a/veilid-core/src/rpc_processor/rpc_supply_block.rs +++ b/veilid-core/src/rpc_processor/rpc_supply_block.rs @@ -1,7 +1,7 @@ use super::*; impl RPCProcessor { - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_supply_block_q( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs b/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs index e1530359..2ff38e4e 100644 --- a/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs +++ b/veilid-core/src/rpc_processor/rpc_validate_dial_info.rs @@ -2,7 +2,10 @@ use super::*; impl RPCProcessor { // Can only be sent directly, not via relays or routes - #[instrument(level = "trace", skip(self), ret, err)] + #[cfg_attr( + feature = "verbose-tracing", + instrument(level = "trace", skip(self), ret, err) + )] pub async fn rpc_call_validate_dial_info( self, peer: NodeRef, @@ -23,7 +26,7 @@ impl RPCProcessor { // Send the validate_dial_info request // This can only be sent directly, as relays can not validate dial info network_result_value_or_log!(self.statement(Destination::direct(peer), statement) - .await? => { + .await? => [ format!(": peer={} statement={:?}", peer, statement) ] { return Ok(false); } ); @@ -33,7 +36,7 @@ impl RPCProcessor { ReceiptEvent::ReturnedPrivate { private_route: _ } | ReceiptEvent::ReturnedInBand { inbound_noderef: _ } | ReceiptEvent::ReturnedSafety => { - log_net!(debug "validate_dial_info receipt should be returned out-of-band".green()); + log_net!(debug "validate_dial_info receipt should be returned out-of-band"); Ok(false) } ReceiptEvent::ReturnedOutOfBand => { @@ -41,7 +44,7 @@ impl RPCProcessor { Ok(true) } ReceiptEvent::Expired => { - log_net!(debug "validate_dial_info receipt expired".green()); + log_net!(debug "validate_dial_info receipt expired"); Ok(false) } ReceiptEvent::Cancelled => { @@ -50,7 +53,7 @@ impl RPCProcessor { } } - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_validate_dial_info( &self, msg: RPCMessage, @@ -138,7 +141,7 @@ impl RPCProcessor { // Send the validate_dial_info request // This can only be sent directly, as relays can not validate dial info network_result_value_or_log!(self.statement(Destination::direct(peer), statement) - .await? => { + .await? => [ format!(": peer={} statement={:?}", peer, statement) ] { continue; } ); diff --git a/veilid-core/src/rpc_processor/rpc_value_changed.rs b/veilid-core/src/rpc_processor/rpc_value_changed.rs index 6df3c332..621569e3 100644 --- a/veilid-core/src/rpc_processor/rpc_value_changed.rs +++ b/veilid-core/src/rpc_processor/rpc_value_changed.rs @@ -1,7 +1,7 @@ use super::*; impl RPCProcessor { - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), err))] pub(crate) async fn process_value_changed( &self, msg: RPCMessage, diff --git a/veilid-core/src/rpc_processor/rpc_watch_value.rs b/veilid-core/src/rpc_processor/rpc_watch_value.rs index 08b3ee23..0f4d24d1 100644 --- a/veilid-core/src/rpc_processor/rpc_watch_value.rs +++ b/veilid-core/src/rpc_processor/rpc_watch_value.rs @@ -1,7 +1,7 @@ use super::*; impl RPCProcessor { - #[instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err)] + #[cfg_attr(feature="verbose-tracing", instrument(level = "trace", skip(self, msg), fields(msg.operation.op_id), ret, err))] pub(crate) async fn process_watch_value_q( &self, msg: RPCMessage, diff --git a/veilid-core/src/storage_manager/debug.rs b/veilid-core/src/storage_manager/debug.rs index 0c47b56a..714061c0 100644 --- a/veilid-core/src/storage_manager/debug.rs +++ b/veilid-core/src/storage_manager/debug.rs @@ -15,4 +15,24 @@ impl StorageManager { }; remote_record_store.debug_records() } + pub(crate) async fn purge_local_records(&self, reclaim: Option) -> String { + let mut inner = self.inner.lock().await; + let Some(local_record_store) = &mut inner.local_record_store else { + return "not initialized".to_owned(); + }; + let reclaimed = local_record_store + .reclaim_space(reclaim.unwrap_or(usize::MAX)) + .await; + return format!("Local records purged: reclaimed {} bytes", reclaimed); + } + pub(crate) async fn purge_remote_records(&self, reclaim: Option) -> String { + let mut inner = self.inner.lock().await; + let Some(remote_record_store) = &mut inner.remote_record_store else { + return "not initialized".to_owned(); + }; + let reclaimed = remote_record_store + .reclaim_space(reclaim.unwrap_or(usize::MAX)) + .await; + return format!("Remote records purged: reclaimed {} bytes", reclaimed); + } } diff --git a/veilid-core/src/storage_manager/get_value.rs b/veilid-core/src/storage_manager/get_value.rs index b05e4de5..b2a34303 100644 --- a/veilid-core/src/storage_manager/get_value.rs +++ b/veilid-core/src/storage_manager/get_value.rs @@ -58,13 +58,13 @@ impl StorageManager { let vres = rpc_processor .clone() .rpc_call_get_value( - Destination::direct(next_node).with_safety(safety_selection), + Destination::direct(next_node.clone()).with_safety(safety_selection), key, subkey, last_descriptor, ) .await?; - let gva = network_result_value_or_log!(vres => { + let gva = network_result_value_or_log!(vres => [ format!(": next_node={} safety_selection={:?} key={} subkey={}", next_node, safety_selection, key, subkey) ] { // Any other failures, just try the next node return Ok(None); }); diff --git a/veilid-core/src/storage_manager/limited_size.rs b/veilid-core/src/storage_manager/limited_size.rs new file mode 100644 index 00000000..dbb82bec --- /dev/null +++ b/veilid-core/src/storage_manager/limited_size.rs @@ -0,0 +1,111 @@ +use super::*; +use num_traits::{PrimInt, Unsigned}; + +#[derive(ThisError, Debug, Clone, Copy, Eq, PartialEq)] +pub enum LimitError { + #[error("limit overflow")] + OverLimit, +} + +#[derive(ThisError, Debug, Clone, Copy, Eq, PartialEq)] +pub enum NumericError { + #[error("numeric overflow")] + Overflow, + #[error("numeric underflow")] + Underflow, +} + +#[derive(Debug, Clone)] +pub struct LimitedSize { + description: String, + value: T, + limit: Option, + uncommitted_value: Option, +} + +impl LimitedSize { + pub fn new(description: &str, value: T, limit: Option) -> Self { + Self { + description: description.to_owned(), + value, + limit, + uncommitted_value: None, + } + } + + fn current_value(&self) -> T { + self.uncommitted_value.unwrap_or(self.value) + } + + pub fn set(&mut self, new_value: T) { + self.uncommitted_value = Some(new_value); + } + + pub fn add(&mut self, v: T) -> Result { + let current_value = self.current_value(); + let max_v = T::max_value() - current_value; + if v > max_v { + return Err(NumericError::Overflow); + } + let new_value = current_value + v; + self.uncommitted_value = Some(new_value); + Ok(new_value) + } + pub fn sub(&mut self, v: T) -> Result { + let current_value = self.current_value(); + let max_v = current_value - T::min_value(); + if v > max_v { + return Err(NumericError::Underflow); + } + let new_value = current_value - v; + self.uncommitted_value = Some(new_value); + Ok(new_value) + } + pub fn saturating_sub(&mut self, mut v: T) -> T { + let current_value = self.current_value(); + let max_v = current_value - T::min_value(); + if v > max_v { + log_stor!(debug "Numeric underflow ({})", self.description); + v = max_v; + } + let new_value = current_value - v; + self.uncommitted_value = Some(new_value); + new_value + } + + pub fn check_limit(&self) -> bool { + if let Some(uncommitted_value) = self.uncommitted_value { + if let Some(limit) = self.limit { + if uncommitted_value > limit { + return false; + } + } + } + true + } + + pub fn commit(&mut self) -> Result { + if let Some(uncommitted_value) = self.uncommitted_value { + if let Some(limit) = self.limit { + if uncommitted_value > limit { + log_stor!(debug "Commit over limit failed ({}): {} > {}", self.description, uncommitted_value, limit); + return Err(LimitError::OverLimit); + } + } + log_stor!(debug "Commit ({}): {} => {}", self.description, self.value, uncommitted_value); + self.value = uncommitted_value; + } + Ok(self.value) + } + + pub fn rollback(&mut self) -> T { + if let Some(uv) = self.uncommitted_value { + log_stor!(debug "Rollback ({}): {} (drop {})", self.description, self.value, uv); + } + return self.value; + } + + pub fn get(&self) -> T { + return self.value; + } +} diff --git a/veilid-core/src/storage_manager/mod.rs b/veilid-core/src/storage_manager/mod.rs index 56a906bc..2fb7ab80 100644 --- a/veilid-core/src/storage_manager/mod.rs +++ b/veilid-core/src/storage_manager/mod.rs @@ -1,6 +1,7 @@ mod debug; mod get_value; mod keys; +mod limited_size; mod record_store; mod record_store_limits; mod set_value; @@ -9,6 +10,7 @@ mod tasks; mod types; use keys::*; +use limited_size::*; use record_store::*; use record_store_limits::*; use storage_manager_inner::*; @@ -95,6 +97,7 @@ impl StorageManager { Ok(()) } + #[instrument(level = "debug", skip_all)] pub async fn terminate(&self) { debug!("starting storage manager shutdown"); diff --git a/veilid-core/src/storage_manager/record_store.rs b/veilid-core/src/storage_manager/record_store.rs index 671835ce..f65e4a38 100644 --- a/veilid-core/src/storage_manager/record_store.rs +++ b/veilid-core/src/storage_manager/record_store.rs @@ -21,8 +21,8 @@ where subkey_table: Option, record_index: LruCache>, subkey_cache: LruCache, - subkey_cache_total_size: usize, - total_storage_space: usize, + subkey_cache_total_size: LimitedSize, + total_storage_space: LimitedSize, dead_records: Vec<(RecordTableKey, Record)>, changed_records: HashSet, @@ -47,6 +47,13 @@ where { pub fn new(table_store: TableStore, name: &str, limits: RecordStoreLimits) -> Self { let subkey_cache_size = limits.subkey_cache_size as usize; + let limit_subkey_cache_total_size = limits + .max_subkey_cache_memory_mb + .map(|mb| mb * 1_048_576usize); + let limit_max_storage_space = limits + .max_storage_space_mb + .map(|mb| mb as u64 * 1_048_576u64); + Self { table_store, name: name.to_owned(), @@ -55,8 +62,16 @@ where subkey_table: None, record_index: LruCache::new(limits.max_records.unwrap_or(usize::MAX)), subkey_cache: LruCache::new(subkey_cache_size), - subkey_cache_total_size: 0, - total_storage_space: 0, + subkey_cache_total_size: LimitedSize::new( + "subkey_cache_total_size", + 0, + limit_subkey_cache_total_size, + ), + total_storage_space: LimitedSize::new( + "total_storage_space", + 0, + limit_max_storage_space, + ), dead_records: Vec::new(), changed_records: HashSet::new(), purge_dead_records_mutex: Arc::new(AsyncMutex::new(())), @@ -89,8 +104,17 @@ where let mut dead_records = Vec::new(); for ri in record_index_saved { // total the storage space - self.total_storage_space += mem::size_of::(); - self.total_storage_space += ri.1.total_size(); + self.total_storage_space + .add(mem::size_of::() as u64) + .unwrap(); + self.total_storage_space + .add(ri.1.total_size() as u64) + .unwrap(); + if let Err(_) = self.total_storage_space.commit() { + // If we overflow the limit, kill off the record + dead_records.push((ri.0, ri.1)); + continue; + } // add to index and ensure we deduplicate in the case of an error if let Some(v) = self.record_index.insert(ri.0, ri.1, |k, v| { @@ -130,24 +154,32 @@ where // Old data dead_size += old_record_data.total_size(); } - self.subkey_cache_total_size -= dead_size; - self.subkey_cache_total_size += record_data_total_size; + self.subkey_cache_total_size.sub(dead_size).unwrap(); + self.subkey_cache_total_size + .add(record_data_total_size) + .unwrap(); // Purge over size limit - if let Some(max_subkey_cache_memory_mb) = self.limits.max_subkey_cache_memory_mb { - while self.subkey_cache_total_size > (max_subkey_cache_memory_mb * 1_048_576usize) { - if let Some((_, v)) = self.subkey_cache.remove_lru() { - self.subkey_cache_total_size -= v.total_size(); - } else { - break; - } + while self.subkey_cache_total_size.commit().is_err() { + if let Some((_, v)) = self.subkey_cache.remove_lru() { + self.subkey_cache_total_size.saturating_sub(v.total_size()); + } else { + self.subkey_cache_total_size.rollback(); + + log_stor!(error "subkey cache should not be empty, has {} bytes unaccounted for",self.subkey_cache_total_size.get()); + + self.subkey_cache_total_size.set(0); + self.subkey_cache_total_size.commit().unwrap(); + break; } } } fn remove_from_subkey_cache(&mut self, key: SubkeyTableKey) { if let Some(dead_record_data) = self.subkey_cache.remove(&key) { - self.subkey_cache_total_size -= dead_record_data.total_size(); + self.subkey_cache_total_size + .saturating_sub(dead_record_data.total_size()); + self.subkey_cache_total_size.commit().unwrap(); } } @@ -206,8 +238,11 @@ where } // Remove from total size - self.total_storage_space -= mem::size_of::(); - self.total_storage_space -= v.total_size(); + self.total_storage_space + .saturating_sub(mem::size_of::() as u64); + self.total_storage_space + .saturating_sub(v.total_size() as u64); + self.total_storage_space.commit().unwrap(); } if let Err(e) = rt_xact.commit().await { log_stor!(error "failed to commit record table transaction: {}", e); @@ -258,12 +293,12 @@ where }; // If over size limit, dont create record - let new_total_storage_space = - self.total_storage_space + mem::size_of::() + record.total_size(); - if let Some(max_storage_space_mb) = &self.limits.max_storage_space_mb { - if new_total_storage_space > (max_storage_space_mb * 1_048_576usize) { - apibail_try_again!(); - } + self.total_storage_space + .add((mem::size_of::() + record.total_size()) as u64) + .unwrap(); + if !self.total_storage_space.check_limit() { + self.total_storage_space.rollback(); + apibail_try_again!(); } // Save to record table @@ -286,7 +321,7 @@ where } // Update storage space - self.total_storage_space = new_total_storage_space; + self.total_storage_space.commit().unwrap(); Ok(()) } @@ -482,12 +517,14 @@ where } // Check new total storage space - let new_total_storage_space = - self.total_storage_space + new_record_data_size - prior_record_data_size; - if let Some(max_storage_space_mb) = self.limits.max_storage_space_mb { - if new_total_storage_space > (max_storage_space_mb * 1_048_576usize) { - apibail_try_again!(); - } + self.total_storage_space + .sub(prior_record_data_size as u64) + .unwrap(); + self.total_storage_space + .add(new_record_data_size as u64) + .unwrap(); + if !self.total_storage_space.check_limit() { + apibail_try_again!(); } // Write subkey @@ -506,6 +543,9 @@ where }) .expect("record should still be here"); + // Update storage space + self.total_storage_space.commit().unwrap(); + Ok(()) } @@ -513,16 +553,19 @@ where /// This will force a garbage collection of the space immediately /// If zero is passed in here, a garbage collection will be performed of dead records /// without removing any live records - pub async fn reclaim_space(&mut self, space: usize) { + pub async fn reclaim_space(&mut self, space: usize) -> usize { let mut reclaimed = 0usize; while reclaimed < space { if let Some((k, v)) = self.record_index.remove_lru() { reclaimed += mem::size_of::(); reclaimed += v.total_size(); self.add_dead_record(k, v); + } else { + break; } } self.purge_dead_records(false).await; + reclaimed } pub(super) fn debug_records(&self) -> String { @@ -532,9 +575,9 @@ where out += "Record Index:\n"; for (rik, rec) in &self.record_index { out += &format!( - " {} @ {} len={} subkeys={}\n", + " {} age={} len={} subkeys={}\n", rik.key.to_string(), - rec.last_touched().as_u64(), + debug_duration(get_timestamp() - rec.last_touched().as_u64()), rec.record_data_size(), rec.stored_subkeys(), ); @@ -542,9 +585,9 @@ where out += &format!("Subkey Cache Count: {}\n", self.subkey_cache.len()); out += &format!( "Subkey Cache Total Size: {}\n", - self.subkey_cache_total_size + self.subkey_cache_total_size.get() ); - out += &format!("Total Storage Space: {}\n", self.total_storage_space); + out += &format!("Total Storage Space: {}\n", self.total_storage_space.get()); out += &format!("Dead Records: {}\n", self.dead_records.len()); for dr in &self.dead_records { out += &format!(" {}\n", dr.0.key.to_string()); diff --git a/veilid-core/src/storage_manager/set_value.rs b/veilid-core/src/storage_manager/set_value.rs index 0d79dfe1..5222f89a 100644 --- a/veilid-core/src/storage_manager/set_value.rs +++ b/veilid-core/src/storage_manager/set_value.rs @@ -62,7 +62,7 @@ impl StorageManager { let vres = rpc_processor .clone() .rpc_call_set_value( - Destination::direct(next_node).with_safety(safety_selection), + Destination::direct(next_node.clone()).with_safety(safety_selection), key, subkey, value, @@ -70,7 +70,7 @@ impl StorageManager { send_descriptor, ) .await?; - let sva = network_result_value_or_log!(vres => { + let sva = network_result_value_or_log!(vres => [ format!(": next_node={} safety_selection={:?} key={} subkey={} send_descriptor={}", next_node, safety_selection, key, subkey, send_descriptor) ] { // Any other failures, just try the next node and pretend this one never happened return Ok(None); }); diff --git a/veilid-core/src/tests/common/test_veilid_config.rs b/veilid-core/src/tests/common/test_veilid_config.rs index f7951d6d..ebb4838d 100644 --- a/veilid-core/src/tests/common/test_veilid_config.rs +++ b/veilid-core/src/tests/common/test_veilid_config.rs @@ -196,6 +196,7 @@ fn config_callback(key: String) -> ConfigCallbackReturn { "network.client_whitelist_timeout_ms" => Ok(Box::new(300_000u32)), "network.reverse_connection_receipt_time_ms" => Ok(Box::new(5_000u32)), "network.hole_punch_receipt_time_ms" => Ok(Box::new(5_000u32)), + "network.network_key_password" => Ok(Box::new(Option::::None)), "network.routing_table.node_id" => Ok(Box::new(TypedKeySet::new())), "network.routing_table.node_id_secret" => Ok(Box::new(TypedSecretSet::new())), "network.routing_table.bootstrap" => Ok(Box::new(Vec::::new())), @@ -330,6 +331,7 @@ pub async fn test_config() { assert_eq!(inner.network.client_whitelist_timeout_ms, 300_000u32); assert_eq!(inner.network.reverse_connection_receipt_time_ms, 5_000u32); assert_eq!(inner.network.hole_punch_receipt_time_ms, 5_000u32); + assert_eq!(inner.network.network_key_password, Option::::None); assert_eq!(inner.network.rpc.concurrency, 2u32); assert_eq!(inner.network.rpc.queue_size, 1024u32); assert_eq!(inner.network.rpc.timeout_ms, 5_000u32); diff --git a/veilid-core/src/veilid_api/api.rs b/veilid-core/src/veilid_api/api.rs index e694f111..5953a251 100644 --- a/veilid-core/src/veilid_api/api.rs +++ b/veilid-core/src/veilid_api/api.rs @@ -142,7 +142,6 @@ impl VeilidAPI { } /// Connect to the network - #[instrument(level = "debug", err, skip_all)] pub async fn attach(&self) -> VeilidAPIResult<()> { let attachment_manager = self.attachment_manager()?; if !attachment_manager.attach().await { @@ -152,7 +151,6 @@ impl VeilidAPI { } /// Disconnect from the network - #[instrument(level = "debug", err, skip_all)] pub async fn detach(&self) -> VeilidAPIResult<()> { let attachment_manager = self.attachment_manager()?; if !attachment_manager.detach().await { @@ -164,7 +162,6 @@ impl VeilidAPI { //////////////////////////////////////////////////////////////// // Routing Context - #[instrument(level = "debug", skip(self))] pub fn routing_context(&self) -> RoutingContext { RoutingContext::new(self.clone()) } @@ -175,7 +172,6 @@ impl VeilidAPI { /// Allocate a new private route set with default cryptography and network options /// Returns a route id and a publishable 'blob' with the route encrypted with each crypto kind /// Those nodes importing the blob will have their choice of which crypto kind to use - #[instrument(level = "debug", skip(self))] pub async fn new_private_route(&self) -> VeilidAPIResult<(RouteId, Vec)> { self.new_custom_private_route( &VALID_CRYPTO_KINDS, @@ -186,7 +182,6 @@ impl VeilidAPI { } /// - #[instrument(level = "debug", skip(self))] pub async fn new_custom_private_route( &self, crypto_kinds: &[CryptoKind], @@ -238,14 +233,12 @@ impl VeilidAPI { Ok((route_id, blob)) } - #[instrument(level = "debug", skip(self))] pub fn import_remote_private_route(&self, blob: Vec) -> VeilidAPIResult { let rss = self.routing_table()?.route_spec_store(); rss.import_remote_private_route(blob) .map_err(|e| VeilidAPIError::invalid_argument(e, "blob", "private route blob")) } - #[instrument(level = "debug", skip(self))] pub fn release_private_route(&self, route_id: RouteId) -> VeilidAPIResult<()> { let rss = self.routing_table()?.route_spec_store(); if !rss.release_route(route_id) { @@ -257,7 +250,6 @@ impl VeilidAPI { //////////////////////////////////////////////////////////////// // App Calls - #[instrument(level = "debug", skip(self))] pub async fn app_call_reply( &self, call_id: OperationId, @@ -274,7 +266,6 @@ impl VeilidAPI { // Tunnel Building #[cfg(feature = "unstable-tunnels")] - #[instrument(level = "debug", err, skip(self))] pub async fn start_tunnel( &self, _endpoint_mode: TunnelMode, @@ -284,7 +275,6 @@ impl VeilidAPI { } #[cfg(feature = "unstable-tunnels")] - #[instrument(level = "debug", err, skip(self))] pub async fn complete_tunnel( &self, _endpoint_mode: TunnelMode, @@ -295,7 +285,6 @@ impl VeilidAPI { } #[cfg(feature = "unstable-tunnels")] - #[instrument(level = "debug", err, skip(self))] pub async fn cancel_tunnel(&self, _tunnel_id: TunnelId) -> VeilidAPIResult { panic!("unimplemented"); } diff --git a/veilid-core/src/veilid_api/debug.rs b/veilid-core/src/veilid_api/debug.rs index 36ae4d4a..c40c46df 100644 --- a/veilid-core/src/veilid_api/debug.rs +++ b/veilid-core/src/veilid_api/debug.rs @@ -31,6 +31,14 @@ fn get_string(text: &str) -> Option { Some(text.to_owned()) } +fn get_subkeys(text: &str) -> Option { + if let Some(n) = get_number(text) { + Some(ValueSubkeyRangeSet::single(n.try_into().ok()?)) + } else { + ValueSubkeyRangeSet::from_str(text).ok() + } +} + fn get_route_id( rss: RouteSpecStore, allow_allocated: bool, @@ -901,6 +909,31 @@ impl VeilidAPI { return Ok(out); } + async fn debug_record_purge(&self, args: Vec) -> VeilidAPIResult { + // [bytes] + let storage_manager = self.storage_manager()?; + + let scope = get_debug_argument_at(&args, 1, "debug_record_purge", "scope", get_string)?; + let bytes = get_debug_argument_at(&args, 2, "debug_record_purge", "bytes", get_number).ok(); + let out = match scope.as_str() { + "local" => storage_manager.purge_local_records(bytes).await, + "remote" => storage_manager.purge_remote_records(bytes).await, + _ => "Invalid scope\n".to_owned(), + }; + return Ok(out); + } + async fn debug_record_get(&self, args: Vec) -> VeilidAPIResult { + let storage_manager = self.storage_manager()?; + + let key = get_debug_argument_at(&args, 1, "debug_record_get", "key", get_typed_key)?; + let subkeys = + get_debug_argument_at(&args, 2, "debug_record_subkeys", "subkeys", get_string)?; + + // let rc = self.routing_context(); + + return Ok("TODO".to_owned()); + } + async fn debug_record(&self, args: String) -> VeilidAPIResult { let args: Vec = args.split_whitespace().map(|s| s.to_owned()).collect(); @@ -908,6 +941,10 @@ impl VeilidAPI { if command == "list" { self.debug_record_list(args).await + } else if command == "purge" { + self.debug_record_purge(args).await + } else if command == "get" { + self.debug_record_get(args).await } else { Ok(">>> Unknown command\n".to_owned()) } @@ -936,7 +973,9 @@ impl VeilidAPI { list import test - record list + record list + purge [bytes] + get is: * direct: [+][] @@ -949,6 +988,9 @@ impl VeilidAPI { is: udp|tcp|ws|wss is: ipv4|ipv6 is: public|local + is: + * a number: 2 + * a comma-separated inclusive range list: 1..=3,5..=8 "# .to_owned()) } diff --git a/veilid-core/src/veilid_api/json_api/process.rs b/veilid-core/src/veilid_api/json_api/process.rs index 023820d0..f27a7c9a 100644 --- a/veilid-core/src/veilid_api/json_api/process.rs +++ b/veilid-core/src/veilid_api/json_api/process.rs @@ -259,11 +259,11 @@ impl JsonRequestProcessor { .add_routing_context(routing_context.clone().with_sequencing(sequencing)), } } - RoutingContextRequestOp::AppCall { target, request } => { + RoutingContextRequestOp::AppCall { target, message } => { RoutingContextResponseOp::AppCall { result: to_json_api_result_with_vec_u8( self.parse_target(target) - .then(|tr| async { routing_context.app_call(tr?, request).await }) + .then(|tr| async { routing_context.app_call(tr?, message).await }) .await, ), } diff --git a/veilid-core/src/veilid_api/json_api/routing_context.rs b/veilid-core/src/veilid_api/json_api/routing_context.rs index 94c55a9e..4ec8a881 100644 --- a/veilid-core/src/veilid_api/json_api/routing_context.rs +++ b/veilid-core/src/veilid_api/json_api/routing_context.rs @@ -29,7 +29,7 @@ pub enum RoutingContextRequestOp { target: String, #[serde(with = "json_as_base64")] #[schemars(with = "String")] - request: Vec, + message: Vec, }, AppMessage { target: String, diff --git a/veilid-core/src/veilid_api/routing_context.rs b/veilid-core/src/veilid_api/routing_context.rs index 26a8cc12..37989be1 100644 --- a/veilid-core/src/veilid_api/routing_context.rs +++ b/veilid-core/src/veilid_api/routing_context.rs @@ -137,15 +137,14 @@ impl RoutingContext { //////////////////////////////////////////////////////////////// // App-level Messaging - #[instrument(level = "debug", err, skip(self))] - pub async fn app_call(&self, target: Target, request: Vec) -> VeilidAPIResult> { + pub async fn app_call(&self, target: Target, message: Vec) -> VeilidAPIResult> { let rpc_processor = self.api.rpc_processor()?; // Get destination let dest = self.get_destination(target).await?; // Send app message - let answer = match rpc_processor.rpc_call_app_call(dest, request).await { + let answer = match rpc_processor.rpc_call_app_call(dest, message).await { Ok(NetworkResult::Value(v)) => v, Ok(NetworkResult::Timeout) => apibail_timeout!(), Ok(NetworkResult::ServiceUnavailable(e)) => { @@ -165,7 +164,6 @@ impl RoutingContext { Ok(answer.answer) } - #[instrument(level = "debug", err, skip(self))] pub async fn app_message(&self, target: Target, message: Vec) -> VeilidAPIResult<()> { let rpc_processor = self.api.rpc_processor()?; diff --git a/veilid-core/src/veilid_api/tests/fixtures.rs b/veilid-core/src/veilid_api/tests/fixtures.rs index bb503260..5373d7dd 100644 --- a/veilid-core/src/veilid_api/tests/fixtures.rs +++ b/veilid-core/src/veilid_api/tests/fixtures.rs @@ -110,6 +110,7 @@ pub fn fix_veilidconfiginner() -> VeilidConfigInner { client_whitelist_timeout_ms: 7000, reverse_connection_receipt_time_ms: 8000, hole_punch_receipt_time_ms: 9000, + network_key_password: None, routing_table: VeilidConfigRoutingTable { node_id: TypedKeySet::new(), node_id_secret: TypedSecretSet::new(), diff --git a/veilid-core/src/veilid_api/types/dht/mod.rs b/veilid-core/src/veilid_api/types/dht/mod.rs index 3a830b47..8b757907 100644 --- a/veilid-core/src/veilid_api/types/dht/mod.rs +++ b/veilid-core/src/veilid_api/types/dht/mod.rs @@ -12,7 +12,5 @@ pub use value_subkey_range_set::*; /// Value subkey pub type ValueSubkey = u32; -/// Value subkey range -pub type ValueSubkeyRange = (u32, u32); /// Value sequence number pub type ValueSeqNum = u32; diff --git a/veilid-core/src/veilid_api/types/dht/value_subkey_range_set.rs b/veilid-core/src/veilid_api/types/dht/value_subkey_range_set.rs index 68298f37..241223c1 100644 --- a/veilid-core/src/veilid_api/types/dht/value_subkey_range_set.rs +++ b/veilid-core/src/veilid_api/types/dht/value_subkey_range_set.rs @@ -41,6 +41,28 @@ impl ValueSubkeyRangeSet { } } +impl FromStr for ValueSubkeyRangeSet { + type Err = VeilidAPIError; + + fn from_str(value: &str) -> Result { + let mut data = RangeSetBlaze::::new(); + + for r in value.split(",") { + let r = r.trim(); + let Some((ss, es)) = r.split_once("..=") else { + return Err(VeilidAPIError::parse_error("can not parse ValueSubkeyRangeSet", r)); + }; + let sn = ValueSubkey::from_str(ss) + .map_err(|e| VeilidAPIError::parse_error("could not parse ValueSubkey", e))?; + let en = ValueSubkey::from_str(es) + .map_err(|e| VeilidAPIError::parse_error("could not parse ValueSubkey", e))?; + data.ranges_insert(sn..=en); + } + + Ok(ValueSubkeyRangeSet { data }) + } +} + impl Deref for ValueSubkeyRangeSet { type Target = RangeSetBlaze; diff --git a/veilid-core/src/veilid_config.rs b/veilid-core/src/veilid_config.rs index 98badf13..07ddf337 100644 --- a/veilid-core/src/veilid_config.rs +++ b/veilid-core/src/veilid_config.rs @@ -384,6 +384,7 @@ pub struct VeilidConfigNetwork { pub client_whitelist_timeout_ms: u32, pub reverse_connection_receipt_time_ms: u32, pub hole_punch_receipt_time_ms: u32, + pub network_key_password: Option, pub routing_table: VeilidConfigRoutingTable, pub rpc: VeilidConfigRPC, pub dht: VeilidConfigDHT, @@ -695,6 +696,7 @@ impl VeilidConfig { get_config!(inner.network.client_whitelist_timeout_ms); get_config!(inner.network.reverse_connection_receipt_time_ms); get_config!(inner.network.hole_punch_receipt_time_ms); + get_config!(inner.network.network_key_password); get_config!(inner.network.routing_table.node_id); get_config!(inner.network.routing_table.node_id_secret); get_config!(inner.network.routing_table.bootstrap); diff --git a/veilid-flutter/lib/routing_context.dart b/veilid-flutter/lib/routing_context.dart index 39956503..ec547f56 100644 --- a/veilid-flutter/lib/routing_context.dart +++ b/veilid-flutter/lib/routing_context.dart @@ -266,7 +266,7 @@ abstract class VeilidRoutingContext { Future deleteDHTRecord(TypedKey key); Future getDHTValue(TypedKey key, int subkey, bool forceRefresh); Future setDHTValue(TypedKey key, int subkey, Uint8List data); - Future watchDHTValues( - TypedKey key, ValueSubkeyRange subkeys, Timestamp expiration, int count); - Future cancelDHTWatch(TypedKey key, ValueSubkeyRange subkeys); + Future watchDHTValues(TypedKey key, List subkeys, + Timestamp expiration, int count); + Future cancelDHTWatch(TypedKey key, List subkeys); } diff --git a/veilid-flutter/lib/veilid_config.dart b/veilid-flutter/lib/veilid_config.dart index 86a32406..e6a5001e 100644 --- a/veilid-flutter/lib/veilid_config.dart +++ b/veilid-flutter/lib/veilid_config.dart @@ -703,6 +703,7 @@ class VeilidConfigNetwork { int clientWhitelistTimeoutMs; int reverseConnectionReceiptTimeMs; int holePunchReceiptTimeMs; + String? networkKeyPassword; VeilidConfigRoutingTable routingTable; VeilidConfigRPC rpc; VeilidConfigDHT dht; @@ -723,6 +724,7 @@ class VeilidConfigNetwork { required this.clientWhitelistTimeoutMs, required this.reverseConnectionReceiptTimeMs, required this.holePunchReceiptTimeMs, + this.networkKeyPassword, required this.routingTable, required this.rpc, required this.dht, @@ -745,6 +747,7 @@ class VeilidConfigNetwork { 'client_whitelist_timeout_ms': clientWhitelistTimeoutMs, 'reverse_connection_receipt_time_ms': reverseConnectionReceiptTimeMs, 'hole_punch_receipt_time_ms': holePunchReceiptTimeMs, + 'network_key_password': networkKeyPassword, 'routing_table': routingTable.toJson(), 'rpc': rpc.toJson(), 'dht': dht.toJson(), @@ -770,6 +773,7 @@ class VeilidConfigNetwork { reverseConnectionReceiptTimeMs = json['reverse_connection_receipt_time_ms'], holePunchReceiptTimeMs = json['hole_punch_receipt_time_ms'], + networkKeyPassword = json['network_key_password'], routingTable = VeilidConfigRoutingTable.fromJson(json['routing_table']), rpc = VeilidConfigRPC.fromJson(json['rpc']), dht = VeilidConfigDHT.fromJson(json['dht']), diff --git a/veilid-flutter/lib/veilid_ffi.dart b/veilid-flutter/lib/veilid_ffi.dart index f39773e8..1bf46e84 100644 --- a/veilid-flutter/lib/veilid_ffi.dart +++ b/veilid-flutter/lib/veilid_ffi.dart @@ -710,7 +710,7 @@ class VeilidRoutingContextFFI implements VeilidRoutingContext { } @override - Future watchDHTValues(TypedKey key, ValueSubkeyRange subkeys, + Future watchDHTValues(TypedKey key, List subkeys, Timestamp expiration, int count) async { final nativeKey = jsonEncode(key).toNativeUtf8(); final nativeSubkeys = jsonEncode(subkeys).toNativeUtf8(); @@ -726,7 +726,8 @@ class VeilidRoutingContextFFI implements VeilidRoutingContext { } @override - Future cancelDHTWatch(TypedKey key, ValueSubkeyRange subkeys) async { + Future cancelDHTWatch( + TypedKey key, List subkeys) async { final nativeKey = jsonEncode(key).toNativeUtf8(); final nativeSubkeys = jsonEncode(subkeys).toNativeUtf8(); diff --git a/veilid-flutter/lib/veilid_js.dart b/veilid-flutter/lib/veilid_js.dart index f1c5f84b..08a88140 100644 --- a/veilid-flutter/lib/veilid_js.dart +++ b/veilid-flutter/lib/veilid_js.dart @@ -129,7 +129,7 @@ class VeilidRoutingContextJS implements VeilidRoutingContext { } @override - Future watchDHTValues(TypedKey key, ValueSubkeyRange subkeys, + Future watchDHTValues(TypedKey key, List subkeys, Timestamp expiration, int count) async { final ts = await _wrapApiPromise(js_util.callMethod( wasm, "routing_context_watch_dht_values", [ @@ -143,7 +143,7 @@ class VeilidRoutingContextJS implements VeilidRoutingContext { } @override - Future cancelDHTWatch(TypedKey key, ValueSubkeyRange subkeys) { + Future cancelDHTWatch(TypedKey key, List subkeys) { return _wrapApiPromise(js_util.callMethod( wasm, "routing_context_cancel_dht_watch", diff --git a/veilid-flutter/rust/src/dart_ffi.rs b/veilid-flutter/rust/src/dart_ffi.rs index ba969ee7..9aae4eeb 100644 --- a/veilid-flutter/rust/src/dart_ffi.rs +++ b/veilid-flutter/rust/src/dart_ffi.rs @@ -264,7 +264,6 @@ pub extern "C" fn initialize_veilid_core(platform_config: FfiStr) { } #[no_mangle] -#[instrument(level = "debug")] pub extern "C" fn change_log_level(layer: FfiStr, log_level: FfiStr) { // get layer to change level on let layer = layer.into_opt_string().unwrap_or("all".to_owned()); @@ -1502,7 +1501,6 @@ pub struct VeilidVersion { } #[no_mangle] -#[instrument] pub extern "C" fn veilid_version() -> VeilidVersion { let (major, minor, patch) = veilid_core::veilid_version(); VeilidVersion { diff --git a/veilid-python/poetry_install.sh b/veilid-python/poetry_install.sh index c83dad5c..21eb50f6 100755 --- a/veilid-python/poetry_install.sh +++ b/veilid-python/poetry_install.sh @@ -1,3 +1,4 @@ #!/bin/bash +poetry config --local virtualenvs.in-project true export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring poetry install diff --git a/veilid-python/tests/test_dht.py b/veilid-python/tests/test_dht.py index af17bc64..8bce06e8 100644 --- a/veilid-python/tests/test_dht.py +++ b/veilid-python/tests/test_dht.py @@ -67,10 +67,15 @@ async def test_set_get_dht_value(api_connection: veilid.VeilidAPI): vd2 = await rc.get_dht_value(rec.key, 0, False) assert vd2 != None + vd3 = await rc.get_dht_value(rec.key, 0, True) + assert vd3 != None + print("vd: {}", vd.__dict__) print("vd2: {}", vd2.__dict__) + print("vd3: {}", vd3.__dict__) assert vd == vd2 + assert vd2 == vd3 await rc.close_dht_record(rec.key) await rc.delete_dht_record(rec.key) diff --git a/veilid-python/tests/test_routing_context.py b/veilid-python/tests/test_routing_context.py index bf20d77c..d92b133b 100644 --- a/veilid-python/tests/test_routing_context.py +++ b/veilid-python/tests/test_routing_context.py @@ -1,6 +1,9 @@ # Routing context veilid tests import asyncio +import random +import sys +import os import pytest import veilid @@ -15,11 +18,11 @@ from .conftest import server_info async def test_routing_contexts(api_connection: veilid.VeilidAPI): rc = await api_connection.new_routing_context() async with rc: - rcp = await rc.with_privacy(release = False) + rcp = await rc.with_privacy(release=False) async with rcp: - rcps = await rcp.with_sequencing(veilid.Sequencing.ENSURE_ORDERED, release = False) + rcps = await rcp.with_sequencing(veilid.Sequencing.ENSURE_ORDERED, release=False) async with rcps: - rcpscp = await rcps.with_custom_privacy(veilid.Stability.RELIABLE, release = False) + rcpscp = await rcps.with_custom_privacy(veilid.Stability.RELIABLE, release=False) await rcpscp.release() @@ -43,7 +46,7 @@ async def test_routing_context_app_message_loopback(): # make a routing context that uses a safety route rc = await (await api.new_routing_context()).with_privacy() async with rc: - + # make a new local private route prl, blob = await api.new_private_route() @@ -80,7 +83,7 @@ async def test_routing_context_app_call_loopback(): # make a routing context that uses a safety route rc = await (await api.new_routing_context()).with_privacy() async with rc: - + # make a new local private route prl, blob = await api.new_private_route() @@ -109,3 +112,158 @@ async def test_routing_context_app_call_loopback(): # now we should get the reply from the call result = await app_call_task assert result == reply + + +@pytest.mark.asyncio +async def test_routing_context_app_message_loopback_big_packets(): + + app_message_queue: asyncio.Queue = asyncio.Queue() + + global got_message + got_message = 0 + async def app_message_queue_update_callback(update: veilid.VeilidUpdate): + if update.kind == veilid.VeilidUpdateKind.APP_MESSAGE: + global got_message + got_message += 1 + sys.stdout.write("{} ".format(got_message)) + await app_message_queue.put(update) + + sent_messages: set[bytes] = set() + + hostname, port = server_info() + api = await veilid.json_api_connect( + hostname, port, app_message_queue_update_callback + ) + async with api: + # purge routes to ensure we start fresh + await api.debug("purge routes") + + # make a routing context that uses a safety route + rc = await (await (await api.new_routing_context()).with_privacy()).with_sequencing(veilid.Sequencing.ENSURE_ORDERED) + async with rc: + + # make a new local private route + prl, blob = await api.new_private_route() + + # import it as a remote route as well so we can send to it + prr = await api.import_remote_private_route(blob) + + # do this test 1000 times + for _ in range(1000): + + # send a random sized random app message to our own private route + message = random.randbytes(random.randint(0, 32768)) + await rc.app_message(prr, message) + + sent_messages.add(message) + + # we should get the same messages back + print(len(sent_messages)) + for n in range(len(sent_messages)): + print(n) + update: veilid.VeilidUpdate = await asyncio.wait_for( + app_message_queue.get(), timeout=10 + ) + assert isinstance(update.detail, veilid.VeilidAppMessage) + + assert update.detail.message in sent_messages + +@pytest.mark.asyncio +async def test_routing_context_app_call_loopback_big_packets(): + global got_message + got_message = 0 + + app_call_queue: asyncio.Queue = asyncio.Queue() + + async def app_call_queue_update_callback(update: veilid.VeilidUpdate): + if update.kind == veilid.VeilidUpdateKind.APP_CALL: + await app_call_queue.put(update) + + async def app_call_queue_task_handler(api: veilid.VeilidAPI): + while True: + update = await app_call_queue.get() + + global got_message + got_message += 1 + + sys.stdout.write("{} ".format(got_message)) + sys.stdout.flush() + + await api.app_call_reply(update.detail.call_id, update.detail.message) + + hostname, port = server_info() + api = await veilid.json_api_connect( + hostname, port, app_call_queue_update_callback + ) + async with api: + # purge routes to ensure we start fresh + await api.debug("purge routes") + + app_call_task = asyncio.create_task( + app_call_queue_task_handler(api), name="app call task" + ) + + # make a routing context that uses a safety route + rc = await (await (await api.new_routing_context()).with_privacy()).with_sequencing(veilid.Sequencing.ENSURE_ORDERED) + async with rc: + + # make a new local private route + prl, blob = await api.new_private_route() + + # import it as a remote route as well so we can send to it + prr = await api.import_remote_private_route(blob) + + # do this test 10 times + for _ in range(10): + + # send a random sized random app message to our own private route + message = random.randbytes(random.randint(0, 32768)) + out_message = await rc.app_call(prr, message) + + assert message == out_message + + app_call_task.cancel() + + +@pytest.mark.skipif(os.getenv("NOSKIP")!="1", reason="unneeded test, only for performance check") +@pytest.mark.asyncio +async def test_routing_context_app_message_loopback_bandwidth(): + + app_message_queue: asyncio.Queue = asyncio.Queue() + + async def app_message_queue_update_callback(update: veilid.VeilidUpdate): + if update.kind == veilid.VeilidUpdateKind.APP_MESSAGE: + await app_message_queue.put(True) + + hostname, port = server_info() + api = await veilid.json_api_connect( + hostname, port, app_message_queue_update_callback + ) + async with api: + # purge routes to ensure we start fresh + await api.debug("purge routes") + + # make a routing context that uses a safety route + #rc = await (await (await api.new_routing_context()).with_privacy()).with_sequencing(veilid.Sequencing.ENSURE_ORDERED) + #rc = await (await api.new_routing_context()).with_privacy() + rc = await api.new_routing_context() + async with rc: + + # make a new local private route + prl, blob = await api.new_private_route() + + # import it as a remote route as well so we can send to it + prr = await api.import_remote_private_route(blob) + + # do this test 1000 times + message = random.randbytes(16384) + for _ in range(10000): + + # send a random sized random app message to our own private route + await rc.app_message(prr, message) + + # we should get the same number of messages back (not storing all that data) + for _ in range(10000): + await asyncio.wait_for( + app_message_queue.get(), timeout=10 + ) diff --git a/veilid-python/veilid/api.py b/veilid-python/veilid/api.py index e468eb6d..9008bbf4 100644 --- a/veilid-python/veilid/api.py +++ b/veilid-python/veilid/api.py @@ -166,7 +166,7 @@ class TableDb(ABC): class CryptoSystem(ABC): - + async def __aenter__(self) -> Self: return self diff --git a/veilid-python/veilid/config.py b/veilid-python/veilid/config.py index 59522c50..9533a64b 100644 --- a/veilid-python/veilid/config.py +++ b/veilid-python/veilid/config.py @@ -203,6 +203,7 @@ class VeilidConfigNetwork(ConfigBase): client_whitelist_timeout_ms: int reverse_connection_receipt_time_ms: int hole_punch_receipt_time_ms: int + network_key_password: Optional[str] routing_table: VeilidConfigRoutingTable rpc: VeilidConfigRPC dht: VeilidConfigDHT diff --git a/veilid-python/veilid/json_api.py b/veilid-python/veilid/json_api.py index 6881395a..4725246e 100644 --- a/veilid-python/veilid/json_api.py +++ b/veilid-python/veilid/json_api.py @@ -135,6 +135,8 @@ class _JsonVeilidAPI(VeilidAPI): # Resolve the request's future to the response json if reqfuture is not None: reqfuture.set_result(j) + else: + print("Missing id: {}", id) async def handle_recv_messages(self): # Read lines until we're done @@ -485,7 +487,7 @@ class _JsonRoutingContext(RoutingContext): await self.release() return self.__class__(self.api, new_rc_id) - async def app_call(self, target: TypedKey | RouteId, request: bytes) -> bytes: + async def app_call(self, target: TypedKey | RouteId, message: bytes) -> bytes: return urlsafe_b64decode_no_pad( raise_api_result( await self.api.send_ndjson_request( @@ -494,7 +496,7 @@ class _JsonRoutingContext(RoutingContext): rc_id=self.rc_id, rc_op=RoutingContextOperation.APP_CALL, target=target, - request=request, + message=message, ) ) ) diff --git a/veilid-python/veilid/schema/RecvMessage.json b/veilid-python/veilid/schema/RecvMessage.json index 073e0bf1..142ebd10 100644 --- a/veilid-python/veilid/schema/RecvMessage.json +++ b/veilid-python/veilid/schema/RecvMessage.json @@ -3516,6 +3516,12 @@ "format": "uint32", "minimum": 0.0 }, + "network_key_password": { + "type": [ + "string", + "null" + ] + }, "protocol": { "$ref": "#/definitions/VeilidConfigProtocol" }, diff --git a/veilid-python/veilid/schema/Request.json b/veilid-python/veilid/schema/Request.json index 82119da0..83ae07e8 100644 --- a/veilid-python/veilid/schema/Request.json +++ b/veilid-python/veilid/schema/Request.json @@ -259,20 +259,20 @@ { "type": "object", "required": [ + "message", "rc_op", - "request", "target" ], "properties": { + "message": { + "type": "string" + }, "rc_op": { "type": "string", "enum": [ "AppCall" ] }, - "request": { - "type": "string" - }, "target": { "type": "string" } diff --git a/veilid-server/Cargo.toml b/veilid-server/Cargo.toml index 11317371..a2193b74 100644 --- a/veilid-server/Cargo.toml +++ b/veilid-server/Cargo.toml @@ -17,6 +17,7 @@ crypto-test-none = ["rt-tokio", "veilid-core/crypto-test-none"] rt-async-std = ["veilid-core/rt-async-std", "async-std", "opentelemetry/rt-async-std", "opentelemetry-otlp/grpc-sys"] rt-tokio = ["veilid-core/rt-tokio", "tokio", "tokio-stream", "tokio-util", "opentelemetry/rt-tokio", "console-subscriber"] tracking = ["veilid-core/tracking"] +network-result-extra = ["veilid-core/network-result-extra"] [dependencies] veilid-core = { path = "../veilid-core", default-features = false } diff --git a/veilid-server/src/client_api.rs b/veilid-server/src/client_api.rs index d13b2a7d..d4ea2541 100644 --- a/veilid-server/src/client_api.rs +++ b/veilid-server/src/client_api.rs @@ -76,7 +76,6 @@ impl ClientApi { crate::server::shutdown(); } - #[instrument(level = "trace", skip_all)] fn change_log_level( &self, layer: String, @@ -107,7 +106,6 @@ impl ClientApi { trace!("ClientApi::stop: stopped"); } - #[instrument(level = "trace", skip(self), err)] async fn handle_incoming(self, bind_addr: SocketAddr) -> std::io::Result<()> { let listener = TcpListener::bind(bind_addr).await?; debug!("Client API listening on: {:?}", bind_addr); @@ -402,7 +400,6 @@ impl ClientApi { awg.done(); } - #[instrument(level = "trace", skip(self))] pub fn handle_update(&self, veilid_update: veilid_core::VeilidUpdate) { // serialize update to NDJSON let veilid_update = serialize_json(json_api::RecvMessage::Update(veilid_update)) + "\n"; diff --git a/veilid-server/src/cmdline.rs b/veilid-server/src/cmdline.rs index cdc4158a..4701b791 100644 --- a/veilid-server/src/cmdline.rs +++ b/veilid-server/src/cmdline.rs @@ -152,6 +152,11 @@ fn do_clap_matches(default_config_path: &OsStr) -> Result EyreResult<(Settings, ArgMatches)> { if matches.occurrences_of("new-password") != 0 { settingsrw.core.protected_store.new_device_encryption_key_password = Some(matches.value_of("new-password").unwrap().to_owned()); } + if matches.occurrences_of("network-key") != 0 { + settingsrw.core.network.network_key_password = Some(matches.value_of("new-password").unwrap().to_owned()); + } if matches.occurrences_of("dump-txt-record") != 0 { // Turn off terminal logging so we can be interactive @@ -287,8 +295,10 @@ pub fn process_command_line() -> EyreResult<(Settings, ArgMatches)> { let mut out: Vec = Vec::new(); for x in x.split(',') { let x = x.trim().to_string(); - println!(" {}", x); - out.push(x); + if !x.is_empty() { + println!(" {}", x); + out.push(x); + } } out } diff --git a/veilid-server/src/settings.rs b/veilid-server/src/settings.rs index 0fc09ce9..772742e9 100644 --- a/veilid-server/src/settings.rs +++ b/veilid-server/src/settings.rs @@ -69,6 +69,7 @@ core: client_whitelist_timeout_ms: 300000 reverse_connection_receipt_time_ms: 5000 hole_punch_receipt_time_ms: 5000 + network_key_password: null routing_table: node_id: null node_id_secret: null @@ -582,6 +583,7 @@ pub struct Network { pub client_whitelist_timeout_ms: u32, pub reverse_connection_receipt_time_ms: u32, pub hole_punch_receipt_time_ms: u32, + pub network_key_password: Option, pub routing_table: RoutingTable, pub rpc: Rpc, pub dht: Dht, @@ -994,6 +996,7 @@ impl Settings { set_config_value!(inner.core.network.client_whitelist_timeout_ms, value); set_config_value!(inner.core.network.reverse_connection_receipt_time_ms, value); set_config_value!(inner.core.network.hole_punch_receipt_time_ms, value); + set_config_value!(inner.core.network.network_key_password, value); set_config_value!(inner.core.network.routing_table.node_id, value); set_config_value!(inner.core.network.routing_table.node_id_secret, value); set_config_value!(inner.core.network.routing_table.bootstrap, value); @@ -1174,6 +1177,9 @@ impl Settings { "network.hole_punch_receipt_time_ms" => { Ok(Box::new(inner.core.network.hole_punch_receipt_time_ms)) } + "network.network_key_password" => { + Ok(Box::new(inner.core.network.network_key_password.clone())) + } "network.routing_table.node_id" => Ok(Box::new( inner .core @@ -1575,6 +1581,7 @@ mod tests { assert_eq!(s.core.network.client_whitelist_timeout_ms, 300_000u32); assert_eq!(s.core.network.reverse_connection_receipt_time_ms, 5_000u32); assert_eq!(s.core.network.hole_punch_receipt_time_ms, 5_000u32); + assert_eq!(s.core.network.network_key_password, None); assert_eq!(s.core.network.routing_table.node_id, None); assert_eq!(s.core.network.routing_table.node_id_secret, None); // diff --git a/veilid-tools/Cargo.toml b/veilid-tools/Cargo.toml index 1deb901a..4b3875da 100644 --- a/veilid-tools/Cargo.toml +++ b/veilid-tools/Cargo.toml @@ -18,6 +18,8 @@ rt-wasm-bindgen = [ "async_executors/bindgen", "async_executors/timer"] veilid_tools_android_tests = [ "dep:paranoid-android" ] veilid_tools_ios_tests = [ "dep:oslog", "dep:tracing-oslog" ] tracing = [ "dep:tracing", "dep:tracing-subscriber" ] +network-result-extra = [] +network-result-info = [] [dependencies] tracing = { version = "^0", features = ["log", "attributes"], optional = true } @@ -30,12 +32,13 @@ thiserror = "^1" futures-util = { version = "^0", default_features = false, features = ["alloc"] } parking_lot = "^0" once_cell = "^1" -owo-colors = "^3" stop-token = { version = "^0", default-features = false } rand = "^0.7" rust-fsm = "^0" backtrace = "^0" fn_name = "^0" +range-set-blaze = "0.1.5" +flume = { version = "^0", features = ["async"] } # Dependencies for native builds only # Linux, Windows, Mac, iOS, Android @@ -45,6 +48,7 @@ tokio = { version = "^1", features = ["full"], optional = true} tokio-util = { version = "^0", features = ["compat"], optional = true} maplit = "^1" futures-util = { version = "^0", default-features = false, features = ["async-await", "sink", "std", "io"] } +chrono = "^0" libc = "^0" nix = "^0" diff --git a/veilid-tools/src/assembly_buffer.rs b/veilid-tools/src/assembly_buffer.rs new file mode 100644 index 00000000..d60e7108 --- /dev/null +++ b/veilid-tools/src/assembly_buffer.rs @@ -0,0 +1,383 @@ +use super::*; +use range_set_blaze::RangeSetBlaze; +use std::io::{Error, ErrorKind}; +use std::sync::atomic::{AtomicU16, Ordering}; + +// AssemblyBuffer Version 1 properties +const VERSION_1: u8 = 1; +type LengthType = u16; +type SequenceType = u16; +const HEADER_LEN: usize = 8; +const MAX_LEN: usize = LengthType::MAX as usize; + +// XXX: keep statistics on all drops and why we dropped them +// XXX: move to config eventually? +const FRAGMENT_LEN: usize = 1280 - HEADER_LEN; +const MAX_CONCURRENT_HOSTS: usize = 256; +const MAX_ASSEMBLIES_PER_HOST: usize = 256; +const MAX_BUFFER_PER_HOST: usize = 256 * 1024; +const MAX_ASSEMBLY_AGE_US: u64 = 10_000_000; + +///////////////////////////////////////////////////////// + +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +struct PeerKey { + remote_addr: SocketAddr, +} + +#[derive(Clone, Eq, PartialEq)] +struct MessageAssembly { + timestamp: u64, + seq: SequenceType, + data: Vec, + parts: RangeSetBlaze, +} + +#[derive(Clone, Eq, PartialEq)] +struct PeerMessages { + total_buffer: usize, + assemblies: VecDeque, +} + +impl PeerMessages { + pub fn new() -> Self { + Self { + total_buffer: 0, + assemblies: VecDeque::new(), + } + } + + fn merge_in_data( + &mut self, + timestamp: u64, + ass: usize, + off: LengthType, + len: LengthType, + chunk: &[u8], + ) -> bool { + let assembly = &mut self.assemblies[ass]; + + // Ensure the new fragment hasn't redefined the message length, reusing the same seq + if assembly.data.len() != len as usize { + // Drop the assembly and just go with the new fragment as starting a new assembly + let seq = assembly.seq; + drop(assembly); + self.remove_assembly(ass); + self.new_assembly(timestamp, seq, off, len, chunk); + return false; + } + + let part_start = off; + let part_end = off + chunk.len() as LengthType - 1; + let part = RangeSetBlaze::from_iter([part_start..=part_end]); + + // if fragments overlap, drop the old assembly and go with a new one + if !assembly.parts.is_disjoint(&part) { + let seq = assembly.seq; + drop(assembly); + self.remove_assembly(ass); + self.new_assembly(timestamp, seq, off, len, chunk); + return false; + } + + // Merge part + assembly.parts |= part; + assembly.data[part_start as usize..=part_end as usize].copy_from_slice(chunk); + + // Check to see if this part is done + if assembly.parts.ranges_len() == 1 + && assembly.parts.first().unwrap() == 0 + && assembly.parts.last().unwrap() == len - 1 + { + return true; + } + false + } + + fn new_assembly( + &mut self, + timestamp: u64, + seq: SequenceType, + off: LengthType, + len: LengthType, + chunk: &[u8], + ) -> usize { + // ensure we have enough space for the new assembly + self.reclaim_space(len as usize); + + // make the assembly + let part_start = off; + let part_end = off + chunk.len() as LengthType - 1; + + let mut assembly = MessageAssembly { + timestamp, + seq, + data: vec![0u8; len as usize], + parts: RangeSetBlaze::from_iter([part_start..=part_end]), + }; + assembly.data[part_start as usize..=part_end as usize].copy_from_slice(chunk); + + // Add the buffer length in + self.total_buffer += assembly.data.len(); + self.assemblies.push_front(assembly); + + // Was pushed front, return the front index + 0 + } + + fn remove_assembly(&mut self, index: usize) -> MessageAssembly { + let assembly = self.assemblies.remove(index).unwrap(); + self.total_buffer -= assembly.data.len(); + assembly + } + + fn truncate_assemblies(&mut self, new_len: usize) { + for an in new_len..self.assemblies.len() { + self.total_buffer -= self.assemblies[an].data.len(); + } + self.assemblies.truncate(new_len); + } + + fn reclaim_space(&mut self, needed_space: usize) { + // If we have too many assemblies or too much buffer rotate some out + while self.assemblies.len() > (MAX_ASSEMBLIES_PER_HOST - 1) + || self.total_buffer > (MAX_BUFFER_PER_HOST - needed_space) + { + self.remove_assembly(self.assemblies.len() - 1); + } + } + + pub fn insert_fragment( + &mut self, + seq: SequenceType, + off: LengthType, + len: LengthType, + chunk: &[u8], + ) -> Option> { + // Get the current timestamp + let cur_ts = get_timestamp(); + + // Get the assembly this belongs to by its sequence number + let mut ass = None; + for an in 0..self.assemblies.len() { + // If this assembly's timestamp is too old, then everything after it will be too, drop em all + let age = cur_ts.saturating_sub(self.assemblies[an].timestamp); + if age > MAX_ASSEMBLY_AGE_US { + self.truncate_assemblies(an); + break; + } + // If this assembly has a matching seq, then assemble with it + if self.assemblies[an].seq == seq { + ass = Some(an); + } + } + if ass.is_none() { + // Add a new assembly to the front and return the first index + self.new_assembly(cur_ts, seq, off, len, chunk); + return None; + } + let ass = ass.unwrap(); + + // Now that we have an assembly, merge in the fragment + let done = self.merge_in_data(cur_ts, ass, off, len, chunk); + + // If the assembly is now equal to the entire range, then return it + if done { + let assembly = self.remove_assembly(ass); + return Some(assembly.data); + } + + // Otherwise, do nothing + None + } +} + +///////////////////////////////////////////////////////// + +struct AssemblyBufferInner { + peer_message_map: HashMap, +} + +struct AssemblyBufferUnlockedInner { + outbound_lock_table: AsyncTagLockTable, + next_seq: AtomicU16, +} + +/// Packet reassembly and fragmentation handler +/// No retry, no acknowledgment, no flow control +/// Just trying to survive lower path MTU for larger messages +#[derive(Clone)] +pub struct AssemblyBuffer { + inner: Arc>, + unlocked_inner: Arc, +} + +impl AssemblyBuffer { + fn new_unlocked_inner() -> AssemblyBufferUnlockedInner { + AssemblyBufferUnlockedInner { + outbound_lock_table: AsyncTagLockTable::new(), + next_seq: AtomicU16::new(0), + } + } + fn new_inner() -> AssemblyBufferInner { + AssemblyBufferInner { + peer_message_map: HashMap::new(), + } + } + + pub fn new() -> Self { + Self { + inner: Arc::new(Mutex::new(Self::new_inner())), + unlocked_inner: Arc::new(Self::new_unlocked_inner()), + } + } + + /// Receive a packet chunk and add to the message assembly + /// if a message has been completely, return it + pub fn insert_frame(&self, frame: &[u8], remote_addr: SocketAddr) -> Option> { + // If we receive a zero length frame, send it + if frame.len() == 0 { + return Some(frame.to_vec()); + } + + // If we receive a frame smaller than or equal to the length of the header, drop it + // or if this frame is larger than our max message length, then drop it + if frame.len() <= HEADER_LEN || frame.len() > MAX_LEN { + return None; + } + + // --- Decode the header + + // Drop versions we don't understand + if frame[0] != VERSION_1 { + return None; + } + // Version 1 header + let seq = SequenceType::from_be_bytes(frame[2..4].try_into().unwrap()); + let off = LengthType::from_be_bytes(frame[4..6].try_into().unwrap()); + let len = LengthType::from_be_bytes(frame[6..HEADER_LEN].try_into().unwrap()); + let chunk = &frame[HEADER_LEN..]; + + // See if we have a whole message and not a fragment + if off == 0 && len as usize == chunk.len() { + return Some(chunk.to_vec()); + } + + // Drop fragments with offsets greater than or equal to the message length + if off >= len { + return None; + } + // Drop fragments where the chunk would be applied beyond the message length + if off as usize + chunk.len() > len as usize { + return None; + } + + // Get or create the peer message assemblies + // and drop the packet if we have too many peers + let mut inner = self.inner.lock(); + let peer_key = PeerKey { remote_addr }; + let peer_count = inner.peer_message_map.len(); + match inner.peer_message_map.entry(peer_key) { + std::collections::hash_map::Entry::Occupied(mut e) => { + let peer_messages = e.get_mut(); + + // Insert the fragment and see what comes out + let out = peer_messages.insert_fragment(seq, off, len, chunk); + + // If we are returning a message, see if there are any more assemblies for this peer + // If not, remove the peer + if out.is_some() { + if peer_messages.assemblies.len() == 0 { + e.remove(); + } + } + + out + } + std::collections::hash_map::Entry::Vacant(v) => { + // See if we have room for one more + if peer_count == MAX_CONCURRENT_HOSTS { + return None; + } + // Add the peer + let peer_messages = v.insert(PeerMessages::new()); + + // Insert the fragment and see what comes out + peer_messages.insert_fragment(seq, off, len, chunk) + } + } + } + + /// Add framing to chunk to send to the wire + fn frame_chunk(chunk: &[u8], offset: usize, message_len: usize, seq: SequenceType) -> Vec { + assert!(chunk.len() > 0); + assert!(message_len <= MAX_LEN); + assert!(offset + chunk.len() <= message_len); + + let off: LengthType = offset as LengthType; + let len: LengthType = message_len as LengthType; + + unsafe { + // Uninitialized vector, careful! + let mut out = unaligned_u8_vec_uninit(chunk.len() + HEADER_LEN); + + // Write out header + out[0] = VERSION_1; + out[1] = 0; // reserved + out[2..4].copy_from_slice(&seq.to_be_bytes()); // sequence number + out[4..6].copy_from_slice(&off.to_be_bytes()); // offset of chunk inside message + out[6..HEADER_LEN].copy_from_slice(&len.to_be_bytes()); // total length of message + + // Write out body + out[HEADER_LEN..].copy_from_slice(chunk); + out + } + } + + /// Split a message into packets and send them serially, ensuring + /// that they are sent consecutively to a particular remote address, + /// never interleaving packets from one message and other to minimize reassembly problems + pub async fn split_message( + &self, + data: Vec, + remote_addr: SocketAddr, + mut sender: S, + ) -> std::io::Result> + where + S: FnMut(Vec, SocketAddr) -> F, + F: Future>>, + { + if data.len() > MAX_LEN { + return Err(Error::from(ErrorKind::InvalidData)); + } + + // Do not frame or split anything zero bytes long, just send it + if data.len() == 0 { + return sender(data, remote_addr).await; + } + + // Lock per remote addr + let _tag_lock = self + .unlocked_inner + .outbound_lock_table + .lock_tag(remote_addr) + .await; + + // Get a message seq + let seq = self.unlocked_inner.next_seq.fetch_add(1, Ordering::Relaxed); + + // Chunk it up + let mut offset = 0usize; + let message_len = data.len(); + for chunk in data.chunks(FRAGMENT_LEN) { + // Frame chunk + let framed_chunk = Self::frame_chunk(chunk, offset, message_len, seq); + // Send chunk + network_result_try!(sender(framed_chunk, remote_addr).await?); + // Go to next chunk + offset += chunk.len() + } + + Ok(NetworkResult::value(())) + } +} diff --git a/veilid-tools/src/lib.rs b/veilid-tools/src/lib.rs index 0e2f1d94..a2844887 100644 --- a/veilid-tools/src/lib.rs +++ b/veilid-tools/src/lib.rs @@ -1,4 +1,5 @@ // mod bump_port; +mod assembly_buffer; mod async_peek_stream; mod async_tag_lock; mod callback_state_machine; @@ -88,6 +89,7 @@ cfg_if! { } // pub use bump_port::*; +pub use assembly_buffer::*; pub use async_peek_stream::*; pub use async_tag_lock::*; pub use callback_state_machine::*; diff --git a/veilid-tools/src/network_result.rs b/veilid-tools/src/network_result.rs index e73a0599..ef782a1c 100644 --- a/veilid-tools/src/network_result.rs +++ b/veilid-tools/src/network_result.rs @@ -271,6 +271,20 @@ impl Error for NetworkResult {} ////////////////////////////////////////////////////////////////// // Non-fallible network result macros +#[macro_export] +macro_rules! network_result_raise { + ($r: expr) => { + match $r { + NetworkResult::Timeout => return Ok(NetworkResult::Timeout), + NetworkResult::ServiceUnavailable(s) => return Ok(NetworkResult::ServiceUnavailable(s)), + NetworkResult::NoConnection(e) => return Ok(NetworkResult::NoConnection(e)), + NetworkResult::AlreadyExists(e) => return Ok(NetworkResult::AlreadyExists(e)), + NetworkResult::InvalidMessage(s) => return Ok(NetworkResult::InvalidMessage(s)), + NetworkResult::Value(_) => panic!("Can not raise value"), + } + }; +} + #[macro_export] macro_rules! network_result_try { ($r: expr) => { @@ -313,89 +327,102 @@ macro_rules! network_result_try { #[macro_export] macro_rules! log_network_result { ($text:expr) => { - // cfg_if::cfg_if! { - // if #[cfg(debug_assertions)] { - // info!(target: "network_result", "{}", $text) - // } else { - debug!(target: "network_result", "{}", $text) - // } - // } + cfg_if::cfg_if! { + if #[cfg(feature="network-result-extra")] { + info!(target: "network_result", "{}", format!("{}", $text)) + } else { + debug!(target: "network_result", "{}", format!("{}", $text)) + } + } }; ($fmt:literal, $($arg:expr),+) => { - // cfg_if::cfg_if! { - // if #[cfg(debug_assertions)] { - // info!(target: "network_result", $fmt, $($arg),+); - // } else { - debug!(target: "network_result", $fmt, $($arg),+); - // } - // } + cfg_if::cfg_if! { + if #[cfg(feature="network-result-extra")] { + info!(target: "network_result", "{}", format!($fmt, $($arg),+)); + } else { + debug!(target: "network_result", "{}", format!($fmt, $($arg),+)); + } + } }; } #[macro_export] macro_rules! network_result_value_or_log { - ($r: expr => $f:tt) => { + ($r:expr => $f:expr) => { + network_result_value_or_log!($r => [ "" ] $f ) + }; + ($r:expr => [ $d:expr ] $f:expr) => { { + #[cfg(feature="network-result-extra")] + let __extra_message = $d; + #[cfg(not(feature="network-result-extra"))] + let __extra_message = ""; match $r { NetworkResult::Timeout => { log_network_result!( - "{} at {}@{}:{} in {}", + "{} at {}@{}:{} in {}{}", "Timeout", file!(), line!(), column!(), - fn_name::uninstantiated!() + fn_name::uninstantiated!(), + __extra_message ); $f } - NetworkResult::ServiceUnavailable(s) => { + NetworkResult::ServiceUnavailable(ref s) => { log_network_result!( - "{}({}) at {}@{}:{} in {}", + "{}({}) at {}@{}:{} in {}{}", "ServiceUnavailable", s, file!(), line!(), column!(), - fn_name::uninstantiated!() + fn_name::uninstantiated!(), + __extra_message ); $f } - NetworkResult::NoConnection(e) => { + NetworkResult::NoConnection(ref e) => { log_network_result!( - "{}({}) at {}@{}:{} in {}", + "{}({}) at {}@{}:{} in {}{}", "No connection", e.to_string(), file!(), line!(), column!(), - fn_name::uninstantiated!() + fn_name::uninstantiated!(), + __extra_message ); $f } - NetworkResult::AlreadyExists(e) => { + NetworkResult::AlreadyExists(ref e) => { log_network_result!( - "{}({}) at {}@{}:{} in {}", + "{}({}) at {}@{}:{} in {}{}", "Already exists", e.to_string(), file!(), line!(), column!(), - fn_name::uninstantiated!() + fn_name::uninstantiated!(), + __extra_message ); $f } - NetworkResult::InvalidMessage(s) => { + NetworkResult::InvalidMessage(ref s) => { log_network_result!( - "{}({}) at {}@{}:{} in {}", + "{}({}) at {}@{}:{} in {}{}", "Invalid message", s, file!(), line!(), column!(), - fn_name::uninstantiated!() + fn_name::uninstantiated!(), + __extra_message ); $f } NetworkResult::Value(v) => v, } - }; + } }; + } diff --git a/veilid-tools/src/tests/native/mod.rs b/veilid-tools/src/tests/native/mod.rs index 75a481a8..65da6ca5 100644 --- a/veilid-tools/src/tests/native/mod.rs +++ b/veilid-tools/src/tests/native/mod.rs @@ -1,6 +1,7 @@ //! Test suite for Native #![cfg(not(target_arch = "wasm32"))] +mod test_assembly_buffer; mod test_async_peek_stream; use super::*; @@ -16,6 +17,8 @@ pub async fn run_all_tests() { test_async_peek_stream::test_all().await; info!("TEST: exec_test_async_tag_lock"); test_async_tag_lock::test_all().await; + info!("TEST: exec_test_assembly_buffer"); + test_assembly_buffer::test_all().await; info!("Finished unit tests"); } @@ -96,5 +99,14 @@ cfg_if! { test_async_tag_lock::test_all().await; }); } + + #[test] + #[serial] + fn run_test_assembly_buffer() { + setup(); + block_on(async { + test_assembly_buffer::test_all().await; + }); + } } } diff --git a/veilid-tools/src/tests/native/test_assembly_buffer.rs b/veilid-tools/src/tests/native/test_assembly_buffer.rs new file mode 100644 index 00000000..52550dc3 --- /dev/null +++ b/veilid-tools/src/tests/native/test_assembly_buffer.rs @@ -0,0 +1,406 @@ +use rand::seq::SliceRandom; + +use crate::*; + +fn random_sockaddr() -> SocketAddr { + if get_random_u32() & 1 == 0 { + let mut addr = [0u8; 16]; + random_bytes(&mut addr); + let port = get_random_u32() as u16; + SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::from(addr), port, 0, 0)) + } else { + let mut addr = [0u8; 4]; + random_bytes(&mut addr); + let port = get_random_u32() as u16; + SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::from(addr), port)) + } +} + +pub async fn test_single_out_in() { + info!("-- test_single_out_in"); + let assbuf_out = AssemblyBuffer::new(); + let assbuf_in = AssemblyBuffer::new(); + let (net_tx, net_rx) = flume::unbounded(); + let sender = |framed_chunk: Vec, remote_addr: SocketAddr| { + let net_tx = net_tx.clone(); + async move { + net_tx + .send_async((framed_chunk, remote_addr)) + .await + .expect("should send"); + Ok(NetworkResult::value(())) + } + }; + + for _ in 0..1000 { + let random_len = (get_random_u32() % 1000) as usize; + let mut message = vec![1u8; random_len]; + random_bytes(&mut message); + let remote_addr = random_sockaddr(); + + // Send single message below fragmentation limit + assert!(matches!( + assbuf_out + .split_message(message.clone(), remote_addr, sender) + .await, + Ok(NetworkResult::Value(())) + )); + + // Ensure we didn't fragment + let (frame, r_remote_addr) = net_rx.recv_async().await.expect("should recv"); + + // Send to input + let r_message = assbuf_in + .insert_frame(&frame, r_remote_addr) + .expect("should get one out"); + + // We should have gotten the same message + assert_eq!(r_message, message); + assert_eq!(r_remote_addr, remote_addr); + } + + // Shoud have consumed everything + assert!(net_rx.is_empty()) +} + +pub async fn test_one_frag_out_in() { + info!("-- test_one_frag_out_in"); + let assbuf_out = AssemblyBuffer::new(); + let assbuf_in = AssemblyBuffer::new(); + let (net_tx, net_rx) = flume::unbounded(); + let sender = |framed_chunk: Vec, remote_addr: SocketAddr| { + let net_tx = net_tx.clone(); + async move { + net_tx + .send_async((framed_chunk, remote_addr)) + .await + .expect("should send"); + Ok(NetworkResult::value(())) + } + }; + + let mut all_sent = HashSet::new(); + + // Sending + println!("sending"); + for _ in 0..10000 { + let random_len = (get_random_u32() % 1000) as usize + 1280; + let mut message = vec![1u8; random_len]; + random_bytes(&mut message); + let remote_addr = random_sockaddr(); + + // Send single message above fragmentation limit + all_sent.insert((message.clone(), remote_addr)); + assert!(matches!( + assbuf_out + .split_message(message.clone(), remote_addr, sender) + .await, + Ok(NetworkResult::Value(())) + )); + } + + println!("all_sent len={}", all_sent.len()); + + println!("fragments sent = {}", net_rx.len()); + + drop(net_tx); + + // Receiving + println!("receiving"); + + while let Ok((frame, r_remote_addr)) = net_rx.recv_async().await { + // Send to input + let r_message = assbuf_in.insert_frame(&frame, r_remote_addr); + + // We should have gotten the same message + if let Some(r_message) = r_message { + assert!(all_sent.remove(&(r_message, r_remote_addr))); + } + } + println!("all_sent len={}", all_sent.len()); + + // Shoud have dropped no packets + assert_eq!(all_sent.len(), 0); +} + +pub async fn test_many_frags_out_in() { + info!("-- test_many_frags_out_in"); + let assbuf_out = AssemblyBuffer::new(); + let assbuf_in = AssemblyBuffer::new(); + let (net_tx, net_rx) = flume::unbounded(); + let sender = |framed_chunk: Vec, remote_addr: SocketAddr| { + let net_tx = net_tx.clone(); + async move { + net_tx + .send_async((framed_chunk, remote_addr)) + .await + .expect("should send"); + Ok(NetworkResult::value(())) + } + }; + + let mut all_sent = HashSet::new(); + + // Sending + let mut total_sent_size = 0usize; + println!("sending"); + for _ in 0..1000 { + let random_len = (get_random_u32() % 65536) as usize; + total_sent_size += random_len; + let mut message = vec![1u8; random_len]; + random_bytes(&mut message); + let remote_addr = random_sockaddr(); + + // Send single message + all_sent.insert((message.clone(), remote_addr)); + assert!(matches!( + assbuf_out + .split_message(message.clone(), remote_addr, sender) + .await, + Ok(NetworkResult::Value(())) + )); + } + + println!("all_sent len={}", all_sent.len()); + println!("total_sent_size = {}", total_sent_size); + println!("fragments sent = {}", net_rx.len()); + + drop(net_tx); + + // Receiving + println!("receiving"); + + while let Ok((frame, r_remote_addr)) = net_rx.recv_async().await { + // Send to input + let r_message = assbuf_in.insert_frame(&frame, r_remote_addr); + + // We should have gotten the same message + if let Some(r_message) = r_message { + assert!(all_sent.remove(&(r_message, r_remote_addr))); + } + } + println!("all_sent len={}", all_sent.len()); + + // Shoud have dropped no packets + assert_eq!(all_sent.len(), 0); +} + +pub async fn test_many_frags_out_in_single_host() { + info!("-- test_many_frags_out_in_single_host"); + let assbuf_out = AssemblyBuffer::new(); + let assbuf_in = AssemblyBuffer::new(); + let (net_tx, net_rx) = flume::unbounded(); + let sender = |framed_chunk: Vec, remote_addr: SocketAddr| { + let net_tx = net_tx.clone(); + async move { + net_tx + .send_async((framed_chunk, remote_addr)) + .await + .expect("should send"); + Ok(NetworkResult::value(())) + } + }; + + let mut all_sent = HashSet::new(); + + // Sending + let mut total_sent_size = 0usize; + println!("sending"); + for _ in 0..1000 { + let random_len = (get_random_u32() % 65536) as usize; + total_sent_size += random_len; + let mut message = vec![1u8; random_len]; + random_bytes(&mut message); + let remote_addr = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::new(1, 2, 3, 4), 5678)); + + // Send single message + all_sent.insert((message.clone(), remote_addr)); + assert!(matches!( + assbuf_out + .split_message(message.clone(), remote_addr, sender) + .await, + Ok(NetworkResult::Value(())) + )); + } + + println!("all_sent len={}", all_sent.len()); + println!("total_sent_size = {}", total_sent_size); + println!("fragments sent = {}", net_rx.len()); + + drop(net_tx); + + // Receiving + println!("receiving"); + + while let Ok((frame, r_remote_addr)) = net_rx.recv_async().await { + // Send to input + let r_message = assbuf_in.insert_frame(&frame, r_remote_addr); + + // We should have gotten the same message + if let Some(r_message) = r_message { + assert!(all_sent.remove(&(r_message, r_remote_addr))); + } + } + println!("all_sent len={}", all_sent.len()); + + // Shoud have dropped no packets + assert_eq!(all_sent.len(), 0); +} + +pub async fn test_many_frags_with_drops() { + info!("-- test_many_frags_with_drops"); + let assbuf_out = AssemblyBuffer::new(); + let assbuf_in = AssemblyBuffer::new(); + let (net_tx, net_rx) = flume::unbounded(); + + let first = Arc::new(AtomicBool::new(true)); + + let sender = |framed_chunk: Vec, remote_addr: SocketAddr| { + let net_tx = net_tx.clone(); + let first = first.clone(); + async move { + // Send only first packet, drop rest + if first.swap(false, Ordering::Relaxed) { + net_tx + .send_async((framed_chunk, remote_addr)) + .await + .expect("should send"); + } + Ok(NetworkResult::value(())) + } + }; + + let mut all_sent = HashSet::new(); + + // Sending + let mut total_sent_size = 0usize; + let mut total_fragged = 0usize; + println!("sending"); + for _ in 0..1000 { + let random_len = (get_random_u32() % 65536) as usize; + if random_len > 1280 { + total_fragged += 1; + } + total_sent_size += random_len; + let mut message = vec![1u8; random_len]; + random_bytes(&mut message); + let remote_addr = random_sockaddr(); + + // Send single message + all_sent.insert((message.clone(), remote_addr)); + assert!(matches!( + assbuf_out + .split_message(message.clone(), remote_addr, sender) + .await, + Ok(NetworkResult::Value(())) + )); + + first.store(true, Ordering::Relaxed); + } + + println!("all_sent len={}", all_sent.len()); + println!("total_sent_size = {}", total_sent_size); + println!("fragments sent = {}", net_rx.len()); + println!("total_fragged = {}", total_fragged); + drop(net_tx); + + // Receiving + println!("receiving"); + + while let Ok((frame, r_remote_addr)) = net_rx.recv_async().await { + // Send to input + let r_message = assbuf_in.insert_frame(&frame, r_remote_addr); + + // We should have gotten the same message + if let Some(r_message) = r_message { + assert!(all_sent.remove(&(r_message, r_remote_addr))); + } + } + println!("all_sent len={}", all_sent.len()); + + // Shoud have dropped all fragged packets + assert_eq!(all_sent.len(), total_fragged); +} + +pub async fn test_many_frags_reordered() { + info!("-- test_many_frags_reordered"); + let assbuf_out = AssemblyBuffer::new(); + let assbuf_in = AssemblyBuffer::new(); + let (net_tx, net_rx) = flume::unbounded(); + + let reorder_buffer = Arc::new(Mutex::new(Vec::new())); + let sender = |framed_chunk: Vec, remote_addr: SocketAddr| { + let reorder_buffer = reorder_buffer.clone(); + async move { + reorder_buffer.lock().push((framed_chunk, remote_addr)); + Ok(NetworkResult::Value(())) + } + }; + + let mut all_sent = HashSet::new(); + + // Sending + let mut total_sent_size = 0usize; + let mut rng = rand::thread_rng(); + println!("sending"); + for _ in 0..1000 { + let random_len = (get_random_u32() % 65536) as usize; + total_sent_size += random_len; + let mut message = vec![1u8; random_len]; + random_bytes(&mut message); + let remote_addr = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::new(1, 2, 3, 4), 5678)); + + // Send single message + all_sent.insert((message.clone(), remote_addr)); + assert!(matches!( + assbuf_out + .split_message(message.clone(), remote_addr, sender) + .await, + Ok(NetworkResult::Value(())) + )); + + // Shuffle fragments + let items = { + let mut rbinner = reorder_buffer.lock(); + rbinner.shuffle(&mut rng); + let items = rbinner.clone(); + rbinner.clear(); + items + }; + for p in items { + net_tx.send_async(p).await.expect("should send"); + } + } + + println!("all_sent len={}", all_sent.len()); + println!("total_sent_size = {}", total_sent_size); + println!("fragments sent = {}", net_rx.len()); + + drop(net_tx); + + // Receiving + println!("receiving"); + + while let Ok((frame, r_remote_addr)) = net_rx.recv_async().await { + // Send to input + let r_message = assbuf_in.insert_frame(&frame, r_remote_addr); + + // We should have gotten the same message + if let Some(r_message) = r_message { + assert!(all_sent.remove(&(r_message, r_remote_addr))); + } + } + println!("all_sent len={}", all_sent.len()); + + // Shoud have dropped no packets + assert_eq!(all_sent.len(), 0); +} + +pub async fn test_all() { + test_single_out_in().await; + test_one_frag_out_in().await; + test_many_frags_out_in().await; + test_many_frags_out_in_single_host().await; + test_many_frags_with_drops().await; + test_many_frags_reordered().await; +} diff --git a/veilid-tools/src/timestamp.rs b/veilid-tools/src/timestamp.rs index 4b5c187e..48fb4b89 100644 --- a/veilid-tools/src/timestamp.rs +++ b/veilid-tools/src/timestamp.rs @@ -11,8 +11,46 @@ cfg_if! { panic!("WASM requires browser environment"); } } + + pub fn debug_ts(ts: u64) -> String { + if is_browser() { + let mut now = Date::now(); + let mut date = Date::new_0(); + date.set_time((ts / 1000u64) as f64); + + let show_year = now.get_utc_full_year() != date.get_utc_full_year(); + let show_month = show_year || now.get_utc_month() != date.get_utc_month(); + let show_date = show_month || now.get_utc_date() != date.get_utc_date(); + + format!("{}{}{}{}", + if show_year { + format!("{:04}/",date.get_utc_full_year()) + } else { + "".to_owned() + }, + if show_month { + format!("{:02}/",date.get_utc_month()) + } else { + "".to_owned() + }, + if show_date { + format!("{:02}-",date.get_utc_date()) + } else { + "".to_owned() + }, + format!("{:02}:{:02}:{:02}.{:04}", + date.get_utc_hours(), + date.get_utc_minutes(), + date.get_utc_seconds(), + date.get_utc_milliseconds() + )) + } else { + panic!("WASM requires browser environment"); + } + } } else { use std::time::{SystemTime, UNIX_EPOCH}; + use chrono::{Datelike, Timelike}; pub fn get_timestamp() -> u64 { match SystemTime::now().duration_since(UNIX_EPOCH) { @@ -21,5 +59,80 @@ cfg_if! { } } + pub fn debug_ts(ts: u64) -> String { + let now = chrono::DateTime::::from(SystemTime::now()); + let date = chrono::DateTime::::from(UNIX_EPOCH + Duration::from_micros(ts)); + + let show_year = now.year() != date.year(); + let show_month = show_year || now.month() != date.month(); + let show_date = show_month || now.day() != date.day(); + + format!("{}{}{}{}", + if show_year { + format!("{:04}/",date.year()) + } else { + "".to_owned() + }, + if show_month { + format!("{:02}/",date.month()) + } else { + "".to_owned() + }, + if show_date { + format!("{:02}-",date.day()) + } else { + "".to_owned() + }, + format!("{:02}:{:02}:{:02}.{:04}", + date.hour(), + date.minute(), + date.second(), + date.nanosecond()/1_000_000 + )) + + } } } + +const DAY: u64 = 1_000_000u64 * 60 * 60 * 24; +const HOUR: u64 = 1_000_000u64 * 60 * 60; +const MIN: u64 = 1_000_000u64 * 60; +const SEC: u64 = 1_000_000u64; +const MSEC: u64 = 1_000u64; + +pub fn debug_duration(dur: u64) -> String { + let days = dur / DAY; + let dur = dur % DAY; + let hours = dur / HOUR; + let dur = dur % HOUR; + let mins = dur / MIN; + let dur = dur % MIN; + let secs = dur / SEC; + let dur = dur % SEC; + let msecs = dur / MSEC; + + format!( + "{}{}{}{}.{:03}", + if days != 0 { + format!("{}d", days) + } else { + "".to_owned() + }, + if hours != 0 { + format!("{}h", hours) + } else { + "".to_owned() + }, + if mins != 0 { + format!("{}m", mins) + } else { + "".to_owned() + }, + if secs != 0 { + format!("{}s", secs) + } else { + "".to_owned() + }, + msecs + ) +}