reliability work

This commit is contained in:
John Smith 2023-06-24 11:16:34 -04:00
parent acebcb7947
commit 197b7fef6e
14 changed files with 191 additions and 84 deletions

View File

@ -1122,6 +1122,7 @@ impl NetworkManager {
// or other firewalling issues and may perform better with TCP.
let unreliable = target_node_ref.peer_stats().rpc_stats.failed_to_send > 2 || target_node_ref.peer_stats().rpc_stats.recent_lost_answers > 2;
if unreliable && sequencing < Sequencing::PreferOrdered {
log_net!(debug "Node contact failing over to Ordered for {}", target_node_ref.to_string().cyan());
sequencing = Sequencing::PreferOrdered;
}

View File

@ -491,13 +491,9 @@ impl DialInfo {
}
pub fn ordered_sequencing_sort(a: &DialInfo, b: &DialInfo) -> core::cmp::Ordering {
let ca = a.protocol_type().sort_order(Sequencing::EnsureOrdered);
let cb = b.protocol_type().sort_order(Sequencing::EnsureOrdered);
if ca < cb {
return core::cmp::Ordering::Less;
}
if ca > cb {
return core::cmp::Ordering::Greater;
let s = ProtocolType::ordered_sequencing_sort(a.protocol_type(), b.protocol_type());
if s != core::cmp::Ordering::Equal {
return s;
}
match (a, b) {
(DialInfo::UDP(a), DialInfo::UDP(b)) => a.cmp(b),

View File

@ -61,6 +61,20 @@ impl DialInfoFilter {
pub fn is_dead(&self) -> bool {
self.protocol_type_set.is_empty() || self.address_type_set.is_empty()
}
pub fn with_sequencing(mut self, sequencing: Sequencing) -> (bool, DialInfoFilter) {
// Get first filtered dialinfo
match sequencing {
Sequencing::NoPreference => (false, self),
Sequencing::PreferOrdered => (true, self),
Sequencing::EnsureOrdered => (
true,
self.filtered(
&DialInfoFilter::all().with_protocol_type_set(ProtocolType::all_ordered_set()),
),
),
}
// return ordered sort and filter with ensure applied
}
}
impl fmt::Debug for DialInfoFilter {
@ -80,7 +94,24 @@ impl fmt::Debug for DialInfoFilter {
}
}
impl From<ProtocolType> for DialInfoFilter {
fn from(other: ProtocolType) -> Self {
Self {
protocol_type_set: ProtocolTypeSet::from(other),
address_type_set: AddressTypeSet::all(),
}
}
}
impl From<AddressType> for DialInfoFilter {
fn from(other: AddressType) -> Self {
Self {
protocol_type_set: ProtocolTypeSet::all(),
address_type_set: AddressTypeSet::from(other),
}
}
}
pub trait MatchesDialInfoFilter {
fn matches_filter(&self, filter: &DialInfoFilter) -> bool;
}

View File

@ -72,6 +72,18 @@ impl ProtocolType {
pub fn all_ordered_set() -> ProtocolTypeSet {
ProtocolType::TCP | ProtocolType::WS | ProtocolType::WSS
}
pub fn ordered_sequencing_sort(a: Self, b: Self) -> core::cmp::Ordering {
let ca = a.sort_order(Sequencing::EnsureOrdered);
let cb = b.sort_order(Sequencing::EnsureOrdered);
if ca < cb {
return core::cmp::Ordering::Less;
}
if ca > cb {
return core::cmp::Ordering::Greater;
}
core::cmp::Ordering::Equal
}
}
impl fmt::Display for ProtocolType {

View File

@ -316,7 +316,8 @@ impl BucketEntryInner {
let last_connections = self.last_connections(
rti,
true,
Some(NodeRefFilter::new().with_routing_domain(routing_domain)),
NodeRefFilter::from(routing_domain),
false,
);
!last_connections.is_empty()
}
@ -370,7 +371,8 @@ impl BucketEntryInner {
let last_connections = self.last_connections(
rti,
true,
Some(NodeRefFilter::new().with_routing_domain_set(routing_domain_set)),
NodeRefFilter::from(routing_domain_set),
false
);
for lc in last_connections {
if let Some(rd) =
@ -412,7 +414,8 @@ impl BucketEntryInner {
&self,
rti: &RoutingTableInner,
only_live: bool,
filter: Option<NodeRefFilter>,
filter: NodeRefFilter,
ordered: bool,
) -> Vec<(ConnectionDescriptor, Timestamp)> {
let connection_manager =
rti.unlocked_inner.network_manager.connection_manager();
@ -421,26 +424,13 @@ impl BucketEntryInner {
.last_connections
.iter()
.filter_map(|(k, v)| {
let include = if let Some(filter) = &filter {
let include = {
let remote_address = v.0.remote_address().address();
if let Some(routing_domain) = rti.routing_domain_for_address(remote_address) {
if filter.routing_domain_set.contains(routing_domain)
rti.routing_domain_for_address(remote_address).map(|rd| {
filter.routing_domain_set.contains(rd)
&& filter.dial_info_filter.protocol_type_set.contains(k.0)
&& filter.dial_info_filter.address_type_set.contains(k.1)
{
// matches filter
true
} else {
// does not match filter
false
}
} else {
// no valid routing domain
false
}
} else {
// no filter
true
}).unwrap_or(false)
};
if !include {
@ -471,8 +461,16 @@ impl BucketEntryInner {
}
})
.collect();
// Sort with newest timestamps first
out.sort_by(|a, b| b.1.cmp(&a.1));
// Sort with ordering preference first and then sort with newest timestamps
out.sort_by(|a, b| {
if ordered {
let s = ProtocolType::ordered_sequencing_sort(a.0.protocol_type(), b.0.protocol_type());
if s != core::cmp::Ordering::Equal {
return s;
}
}
b.1.cmp(&a.1)
});
out
}

View File

@ -217,25 +217,24 @@ pub trait NodeRefBase: Sized {
fn first_filtered_dial_info_detail(&self) -> Option<DialInfoDetail> {
let routing_domain_set = self.routing_domain_set();
let dial_info_filter = self.dial_info_filter();
let sequencing = self.common().sequencing;
let (ordered, dial_info_filter) = dial_info_filter.with_sequencing(sequencing);
let (sort, dial_info_filter) = match self.common().sequencing {
Sequencing::NoPreference => (None, dial_info_filter),
Sequencing::PreferOrdered => (
Some(DialInfoDetail::ordered_sequencing_sort),
dial_info_filter,
),
Sequencing::EnsureOrdered => (
Some(DialInfoDetail::ordered_sequencing_sort),
dial_info_filter.filtered(
&DialInfoFilter::all().with_protocol_type_set(ProtocolType::all_ordered_set()),
),
),
let sort = if ordered {
Some(DialInfoDetail::ordered_sequencing_sort)
} else {
None
};
if dial_info_filter.is_dead() {
return None;
}
let filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter);
self.operate(|_rt, e| {
for routing_domain in routing_domain_set {
if let Some(ni) = e.node_info(routing_domain) {
let filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter);
if let Some(did) = ni.first_filtered_dial_info_detail(sort, filter) {
return Some(did);
}
@ -280,9 +279,13 @@ pub trait NodeRefBase: Sized {
fn last_connection(&self) -> Option<ConnectionDescriptor> {
// Get the last connections and the last time we saw anything with this connection
// Filtered first and then sorted by most recent
// Filtered first and then sorted by sequencing and then by most recent
self.operate(|rti, e| {
let last_connections = e.last_connections(rti, true, self.common().filter.clone());
// apply sequencing to filter and get sort
let sequencing = self.common().sequencing;
let filter = self.common().filter.clone().unwrap_or_default();
let (ordered, filter) = filter.with_sequencing(sequencing);
let last_connections = e.last_connections(rti, true, filter, ordered);
last_connections.first().map(|x| x.0)
})
}

View File

@ -19,7 +19,6 @@ impl NodeRefFilter {
dial_info_filter: DialInfoFilter::all(),
}
}
pub fn with_routing_domain(mut self, routing_domain: RoutingDomain) -> Self {
self.routing_domain_set = routing_domain.into();
self
@ -58,4 +57,54 @@ impl NodeRefFilter {
pub fn is_dead(&self) -> bool {
self.dial_info_filter.is_dead() || self.routing_domain_set.is_empty()
}
pub fn with_sequencing(mut self, sequencing: Sequencing) -> (bool, Self) {
let (ordered, dif) = self.dial_info_filter.with_sequencing(sequencing);
self.dial_info_filter = dif;
(ordered, self)
}
}
impl From<RoutingDomain> for NodeRefFilter {
fn from(other: RoutingDomain) -> Self {
Self {
routing_domain_set: other.into(),
dial_info_filter: DialInfoFilter::all(),
}
}
}
impl From<RoutingDomainSet> for NodeRefFilter {
fn from(other: RoutingDomainSet) -> Self {
Self {
routing_domain_set: other,
dial_info_filter: DialInfoFilter::all(),
}
}
}
impl From<DialInfoFilter> for NodeRefFilter {
fn from(other: DialInfoFilter) -> Self {
Self {
routing_domain_set: RoutingDomainSet::all(),
dial_info_filter: other,
}
}
}
impl From<ProtocolType> for NodeRefFilter {
fn from(other: ProtocolType) -> Self {
Self {
routing_domain_set: RoutingDomainSet::all(),
dial_info_filter: DialInfoFilter::from(other),
}
}
}
impl From<AddressType> for NodeRefFilter {
fn from(other: AddressType) -> Self {
Self {
routing_domain_set: RoutingDomainSet::all(),
dial_info_filter: DialInfoFilter::from(other),
}
}
}

View File

@ -616,8 +616,8 @@ impl RouteSpecStore {
let private_route = self.assemble_private_route(&key, None)?;
// Always test routes with safety routes that are more likely to succeed
let stability = Stability::Reliable;
// Routes can test with whatever sequencing they were allocated with
let sequencing = Sequencing::NoPreference;
// Routes should test with the most likely to succeed sequencing they are capable of
let sequencing = Sequencing::PreferOrdered;
let safety_spec = SafetySpec {
preferred_route: Some(private_route_id),
@ -657,12 +657,17 @@ impl RouteSpecStore {
bail!("no best key to test remote route");
};
// Always test routes with safety routes that are more likely to succeed
let stability = Stability::Reliable;
// Routes should test with the most likely to succeed sequencing they are capable of
let sequencing = Sequencing::PreferOrdered;
// Get a safety route that is good enough
let safety_spec = SafetySpec {
preferred_route: None,
hop_count: self.unlocked_inner.default_route_hop_count,
stability: Stability::default(),
sequencing: Sequencing::default(),
stability,
sequencing,
};
let safety_selection = SafetySelection::Safe(safety_spec);

View File

@ -61,6 +61,9 @@ impl RouteStats {
pub fn record_sent(&mut self, cur_ts: Timestamp, bytes: ByteCount) {
self.last_sent_ts = Some(cur_ts);
self.transfer_stats_accounting.add_up(bytes);
// If we sent successfully, then reset 'failed_to_send'
self.failed_to_send = 0;
}
/// Mark a route as having been sent to
@ -101,6 +104,8 @@ impl RouteStats {
self.last_tested_ts = None;
self.last_sent_ts = None;
self.last_received_ts = None;
self.failed_to_send = 0;
self.questions_lost = 0;
}
/// Check if a route needs testing

View File

@ -223,7 +223,7 @@ impl Default for PublicInternetRoutingDomainDetail {
}
}
fn first_filtered_dial_info_detail(
fn first_filtered_dial_info_detail_between_nodes(
from_node: &NodeInfo,
to_node: &NodeInfo,
dial_info_filter: &DialInfoFilter,
@ -235,28 +235,21 @@ fn first_filtered_dial_info_detail(
.with_protocol_type_set(from_node.outbound_protocols()),
);
// Get first filtered dialinfo
let (sort, dial_info_filter) = match sequencing {
Sequencing::NoPreference => (None, dial_info_filter),
Sequencing::PreferOrdered => (
Some(DialInfoDetail::ordered_sequencing_sort),
dial_info_filter,
),
Sequencing::EnsureOrdered => (
Some(DialInfoDetail::ordered_sequencing_sort),
dial_info_filter.filtered(
&DialInfoFilter::all().with_protocol_type_set(ProtocolType::all_ordered_set()),
),
),
// Apply sequencing and get sort
let (ordered, dial_info_filter) = dial_info_filter.with_sequencing(sequencing);
let sort = if ordered {
Some(DialInfoDetail::ordered_sequencing_sort)
} else {
None
};
// If the filter is dead then we won't be able to connect
if dial_info_filter.is_dead() {
return None;
}
let direct_filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter);
// Get the best match dial info for node B if we have it
let direct_filter = |did: &DialInfoDetail| did.matches_filter(&dial_info_filter);
to_node.first_filtered_dial_info_detail(sort, direct_filter)
}
@ -294,7 +287,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail {
// Get the best match dial info for node B if we have it
if let Some(target_did) =
first_filtered_dial_info_detail(node_a, node_b, &dial_info_filter, sequencing)
first_filtered_dial_info_detail_between_nodes(node_a, node_b, &dial_info_filter, sequencing)
{
// Do we need to signal before going inbound?
if !target_did.class.requires_signal() {
@ -319,7 +312,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail {
};
// Can node A reach the inbound relay directly?
if first_filtered_dial_info_detail(
if first_filtered_dial_info_detail_between_nodes(
node_a,
node_b_relay,
&dial_info_filter,
@ -332,7 +325,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail {
///////// Reverse connection
// Get the best match dial info for an reverse inbound connection from node B to node A
if let Some(reverse_did) = first_filtered_dial_info_detail(
if let Some(reverse_did) = first_filtered_dial_info_detail_between_nodes(
node_b,
node_a,
&dial_info_filter,
@ -358,14 +351,14 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail {
let udp_dial_info_filter = dial_info_filter
.clone()
.filtered(&DialInfoFilter::all().with_protocol_type(ProtocolType::UDP));
if let Some(target_udp_did) = first_filtered_dial_info_detail(
if let Some(target_udp_did) = first_filtered_dial_info_detail_between_nodes(
node_a,
node_b,
&udp_dial_info_filter,
sequencing,
) {
// Does node A have a direct udp dialinfo that node B can reach?
if let Some(reverse_udp_did) = first_filtered_dial_info_detail(
if let Some(reverse_udp_did) = first_filtered_dial_info_detail_between_nodes(
node_b,
node_a,
&udp_dial_info_filter,
@ -407,7 +400,7 @@ impl RoutingDomainDetail for PublicInternetRoutingDomainDetail {
};
// Can we reach the full relay?
if first_filtered_dial_info_detail(
if first_filtered_dial_info_detail_between_nodes(
node_a,
&node_b_relay,
&dial_info_filter,

View File

@ -111,6 +111,9 @@ impl RoutingTableInner {
routing_domain_set: RoutingDomainSet,
filter: &DialInfoFilter,
) -> Option<DialInfoDetail> {
if filter.is_dead() || routing_domain_set.is_empty() {
return None;
}
for routing_domain in routing_domain_set {
let did = self.with_routing_domain(routing_domain, |rd| {
for did in rd.common().dial_info_details() {
@ -133,6 +136,9 @@ impl RoutingTableInner {
filter: &DialInfoFilter,
) -> Vec<DialInfoDetail> {
let mut ret = Vec::new();
if filter.is_dead() || routing_domain_set.is_empty() {
return ret;
}
for routing_domain in routing_domain_set {
self.with_routing_domain(routing_domain, |rd| {
for did in rd.common().dial_info_details() {

View File

@ -377,11 +377,8 @@ impl RoutingTable {
// Ensure we got the signed peer info
if !nr.signed_node_info_has_valid_signature(RoutingDomain::PublicInternet) {
log_rtab!(warn
"bootstrap at {:?} did not return valid signed node info",
nr
);
// If this node info is invalid, it will time out after being unpingable
log_rtab!(warn "bootstrap server is not responding");
log_rtab!(debug "bootstrap server is not responding: {}", nr);
} else {
// otherwise this bootstrap is valid, lets ask it to find ourselves now
routing_table.reverse_find_node(crypto_kind, nr, true).await

View File

@ -121,6 +121,8 @@ async def test_routing_context_app_message_loopback_big_packets():
if update.kind == veilid.VeilidUpdateKind.APP_MESSAGE:
await app_message_queue.put(update)
sent_messages: set[bytes] = set()
hostname, port = server_info()
api = await veilid.json_api_connect(
hostname, port, app_message_queue_update_callback
@ -130,8 +132,7 @@ async def test_routing_context_app_message_loopback_big_packets():
await api.debug("purge routes")
# make a routing context that uses a safety route
#rc = await (await (await api.new_routing_context()).with_privacy()).with_sequencing(veilid.Sequencing.ENSURE_ORDERED)
rc = await (await api.new_routing_context()).with_privacy()
rc = await (await (await api.new_routing_context()).with_privacy()).with_sequencing(veilid.Sequencing.ENSURE_ORDERED)
async with rc:
# make a new local private route
@ -140,17 +141,21 @@ async def test_routing_context_app_message_loopback_big_packets():
# import it as a remote route as well so we can send to it
prr = await api.import_remote_private_route(blob)
# do this test 10 times
for _ in range(10):
# do this test 100 times
for _ in range(1000):
# send a random sized random app message to our own private route
message = random.randbytes(random.randint(0, 32768))
await rc.app_message(prr, message)
# we should get the same message back
sent_messages.add(message)
# we should get the same messages back
for _ in range(len(sent_messages)):
update: veilid.VeilidUpdate = await asyncio.wait_for(
app_message_queue.get(), timeout=10
)
assert isinstance(update.detail, veilid.VeilidAppMessage)
assert update.detail.message == message
assert update.detail.message in sent_messages

View File

@ -3516,6 +3516,12 @@
"format": "uint32",
"minimum": 0.0
},
"network_key_password": {
"type": [
"string",
"null"
]
},
"protocol": {
"$ref": "#/definitions/VeilidConfigProtocol"
},