refactor checkpoint

This commit is contained in:
John Smith
2022-09-03 13:57:25 -04:00
parent 9966d25672
commit e0a5b1bd69
30 changed files with 1274 additions and 838 deletions

View File

@@ -552,33 +552,29 @@ impl NetworkManager {
// Run the rolling transfers task
self.unlocked_inner.rolling_transfers_task.tick().await?;
// Process global peer scope ticks
// These only make sense when connected to the actual internet and not just the local network
// Must have at least one outbound protocol enabled, and one global peer scope address family enabled
let global_peer_scope_enabled =
!protocol_config.outbound.is_empty() && !protocol_config.family_global.is_empty();
if global_peer_scope_enabled {
// Run the relay management task
self.unlocked_inner.relay_management_task.tick().await?;
// Run the relay management task
self.unlocked_inner.relay_management_task.tick().await?;
// If routing table has no live entries, then add the bootstrap nodes to it
let live_entry_count = routing_table.get_entry_count(BucketEntryState::Unreliable);
if live_entry_count == 0 {
self.unlocked_inner.bootstrap_task.tick().await?;
}
// If we still don't have enough peers, find nodes until we do
let min_peer_count = {
let c = self.config.get();
c.network.dht.min_peer_count as usize
};
if live_entry_count < min_peer_count {
self.unlocked_inner.peer_minimum_refresh_task.tick().await?;
}
// Ping validate some nodes to groom the table
self.unlocked_inner.ping_validator_task.tick().await?;
// See how many live PublicInternet entries we have
let live_public_internet_entry_count = routing_table.get_entry_count(
RoutingTableSet::only(RoutingTable::PublicInternet),
BucketEntryState::Unreliable,
);
let min_peer_count = {
let c = self.config.get();
c.network.dht.min_peer_count as usize
};
// If none, then add the bootstrap nodes to it
if live_public_internet_entry_count == 0 {
self.unlocked_inner.bootstrap_task.tick().await?;
}
// If we still don't have enough peers, find nodes until we do
else if live_public_internet_entry_count < min_peer_count {
self.unlocked_inner.peer_minimum_refresh_task.tick().await?;
}
// Ping validate some nodes to groom the table
self.unlocked_inner.ping_validator_task.tick().await?;
// Run the routing table tick
routing_table.tick().await?;
@@ -605,21 +601,18 @@ impl NetworkManager {
}
// Get our node's capabilities
pub fn generate_node_status(&self) -> NodeStatus {
let peer_info = self
fn generate_public_internet_node_status(&self) -> PublicInternetNodeStatus {
let node_info = self
.routing_table()
.get_own_peer_info(RoutingDomain::PublicInternet);
.get_own_node_info(RoutingDomain::PublicInternet);
let will_route = peer_info.signed_node_info.node_info.can_inbound_relay(); // xxx: eventually this may have more criteria added
let will_tunnel = peer_info.signed_node_info.node_info.can_inbound_relay(); // xxx: we may want to restrict by battery life and network bandwidth at some point
let will_signal = peer_info.signed_node_info.node_info.can_signal();
let will_relay = peer_info.signed_node_info.node_info.can_inbound_relay();
let will_validate_dial_info = peer_info
.signed_node_info
.node_info
.can_validate_dial_info();
let will_route = node_info.can_inbound_relay(); // xxx: eventually this may have more criteria added
let will_tunnel = node_info.can_inbound_relay(); // xxx: we may want to restrict by battery life and network bandwidth at some point
let will_signal = node_info.can_signal();
let will_relay = node_info.can_inbound_relay();
let will_validate_dial_info = node_info.can_validate_dial_info();
NodeStatus {
PublicInternetNodeStatus {
will_route,
will_tunnel,
will_signal,
@@ -627,6 +620,30 @@ impl NetworkManager {
will_validate_dial_info,
}
}
fn generate_local_network_node_status(&self) -> LocalNetworkNodeStatus {
let node_info = self
.routing_table()
.get_own_node_info(RoutingDomain::LocalNetwork);
let will_relay = node_info.can_inbound_relay();
let will_validate_dial_info = node_info.can_validate_dial_info();
LocalNetworkNodeStatus {
will_relay,
will_validate_dial_info,
}
}
pub fn generate_node_status(&self, routing_domain: RoutingDomain) -> NodeStatus {
match routing_domain {
RoutingDomain::PublicInternet => {
NodeStatus::PublicInternet(self.generate_public_internet_node_status())
}
RoutingDomain::LocalNetwork => {
NodeStatus::LocalNetwork(self.generate_local_network_node_status())
}
}
}
// Return what protocols we have enabled
pub fn get_protocol_config(&self) -> ProtocolConfig {
@@ -650,6 +667,12 @@ impl NetworkManager {
.clone(),
}
}
pub fn get_inbound_node_ref_filter(&self, routing_domain: RoutingDomain) -> NodeRefFilter {
let dif = self.get_inbound_dial_info_filter(routing_domain);
NodeRefFilter::new()
.with_routing_domain(routing_domain)
.with_dial_info_filter(dif)
}
// Return a dial info filter for what we can send out
pub fn get_outbound_dial_info_filter(&self, routing_domain: RoutingDomain) -> DialInfoFilter {
@@ -667,6 +690,12 @@ impl NetworkManager {
.clone(),
}
}
pub fn get_outbound_node_ref_filter(&self, routing_domain: RoutingDomain) -> NodeRefFilter {
let dif = self.get_outbound_dial_info_filter(routing_domain);
NodeRefFilter::new()
.with_routing_domain(routing_domain)
.with_dial_info_filter(dif)
}
// Generates a multi-shot/normal receipt
#[instrument(level = "trace", skip(self, extra_data, callback), err)]
@@ -949,12 +978,11 @@ impl NetworkManager {
#[instrument(level = "trace", skip(self), ret)]
fn get_contact_method_public(&self, target_node_ref: NodeRef) -> ContactMethod {
// Scope noderef down to protocols we can do outbound
let public_outbound_dif = self.get_outbound_dial_info_filter(RoutingDomain::PublicInternet);
let target_node_ref = target_node_ref.filtered_clone(public_outbound_dif.clone());
let public_outbound_nrf = self.get_outbound_node_ref_filter(RoutingDomain::PublicInternet);
let target_node_ref = target_node_ref.filtered_clone(public_outbound_nrf.clone());
// Get the best match internet dial info if we have it
let opt_target_public_did =
target_node_ref.first_filtered_dial_info_detail(Some(RoutingDomain::PublicInternet));
let opt_target_public_did = target_node_ref.first_filtered_dial_info_detail();
if let Some(target_public_did) = opt_target_public_did {
// Do we need to signal before going inbound?
if !target_public_did.class.requires_signal() {
@@ -966,12 +994,9 @@ impl NetworkManager {
// Note that .relay() never returns our own node. We can't relay to ourselves.
if let Some(inbound_relay_nr) = target_node_ref.relay(RoutingDomain::PublicInternet) {
// Scope down to protocols we can do outbound
let inbound_relay_nr = inbound_relay_nr.filtered_clone(public_outbound_dif.clone());
let inbound_relay_nr = inbound_relay_nr.filtered_clone(public_outbound_nrf.clone());
// Can we reach the inbound relay?
if inbound_relay_nr
.first_filtered_dial_info_detail(Some(RoutingDomain::PublicInternet))
.is_some()
{
if inbound_relay_nr.first_filtered_dial_info_detail().is_some() {
// Can we receive anything inbound ever?
let our_network_class = self
.get_network_class(RoutingDomain::PublicInternet)
@@ -985,11 +1010,11 @@ impl NetworkManager {
let reverse_dif = self
.get_inbound_dial_info_filter(RoutingDomain::PublicInternet)
.filtered(
target_node_ref
&target_node_ref
.node_info_outbound_filter(RoutingDomain::PublicInternet),
);
if let Some(reverse_did) = routing_table.first_filtered_dial_info_detail(
Some(RoutingDomain::PublicInternet),
RoutingDomainSet::only(RoutingDomain::PublicInternet),
&reverse_dif,
) {
// Ensure we aren't on the same public IP address (no hairpin nat)
@@ -1010,16 +1035,16 @@ impl NetworkManager {
// Does the target have a direct udp dialinfo we can reach?
let udp_target_nr = target_node_ref.filtered_clone(
DialInfoFilter::global().with_protocol_type(ProtocolType::UDP),
NodeRefFilter::new().with_protocol_type(ProtocolType::UDP),
);
if let Some(target_udp_dialinfo_detail) = udp_target_nr
.first_filtered_dial_info_detail(Some(RoutingDomain::PublicInternet))
if let Some(target_udp_dialinfo_detail) =
udp_target_nr.first_filtered_dial_info_detail()
{
// Does the self node have a direct udp dialinfo the target can reach?
let inbound_udp_dif = self
.get_inbound_dial_info_filter(RoutingDomain::PublicInternet)
.filtered(
target_node_ref
&target_node_ref
.node_info_outbound_filter(RoutingDomain::PublicInternet),
)
.filtered(
@@ -1027,7 +1052,7 @@ impl NetworkManager {
);
if let Some(self_udp_dialinfo_detail) = routing_table
.first_filtered_dial_info_detail(
Some(RoutingDomain::PublicInternet),
RoutingDomainSet::only(RoutingDomain::PublicInternet),
&inbound_udp_dif,
)
{
@@ -1056,7 +1081,7 @@ impl NetworkManager {
{
// Can we reach the full relay?
if target_inbound_relay_nr
.first_filtered_dial_info_detail(Some(RoutingDomain::PublicInternet))
.first_filtered_dial_info_detail()
.is_some()
{
return ContactMethod::InboundRelay(target_inbound_relay_nr);
@@ -1078,15 +1103,13 @@ impl NetworkManager {
fn get_contact_method_local(&self, target_node_ref: NodeRef) -> ContactMethod {
// Scope noderef down to protocols we can do outbound
let local_outbound_dif = self.get_outbound_dial_info_filter(RoutingDomain::LocalNetwork);
let target_node_ref = target_node_ref.filtered_clone(local_outbound_dif);
let target_node_ref = target_node_ref.filtered_clone(NodeRefFilter::local_outbound_dif);
// Get the best matching local direct dial info if we have it
// ygjghhtiygiukuymyg
if target_node_ref.is_filter_dead() {
return ContactMethod::Unreachable;
}
let opt_target_local_did =
target_node_ref.first_filtered_dial_info_detail(Some(RoutingDomain::LocalNetwork));
let opt_target_local_did = target_node_ref.first_filtered_dial_info_detail();
if let Some(target_local_did) = opt_target_local_did {
return ContactMethod::Direct(target_local_did.dial_info);
}
@@ -1096,21 +1119,18 @@ impl NetworkManager {
// Figure out how to reach a node
#[instrument(level = "trace", skip(self), ret)]
pub(crate) fn get_contact_method(&self, target_node_ref: NodeRef) -> ContactMethod {
// Try local first
let out = self.get_contact_method_local(target_node_ref.clone());
if !matches!(out, ContactMethod::Unreachable) {
return out;
}
let routing_domain = match target_node_ref.best_routing_domain() {
Some(rd) => rd,
None => {
log_net!("no routing domain for node {:?}", target_node_ref);
return ContactMethod::Unreachable;
}
};
// Try public next
let out = self.get_contact_method_public(target_node_ref.clone());
if !matches!(out, ContactMethod::Unreachable) {
return out;
match routing_domain {
RoutingDomain::LocalNetwork => self.get_contact_method_local(target_node_ref),
RoutingDomain::PublicInternet => self.get_contact_method_public(target_node_ref),
}
// Otherwise, we can't reach this node
log_net!("unable to reach node {:?}", target_node_ref);
ContactMethod::Unreachable
}
// Send a reverse connection signal and wait for the return receipt over it
@@ -1461,6 +1481,18 @@ impl NetworkManager {
return Ok(false);
}
// Get the routing domain for this data
let routing_domain = match self
.routing_table()
.routing_domain_for_address(connection_descriptor.remote().address())
{
Some(rd) => rd,
None => {
log_net!(debug "no routing domain for envelope received from {:?}", connection_descriptor);
return Ok(false);
}
};
// Is this a direct bootstrap request instead of an envelope?
if data[0..4] == *BOOT_MAGIC {
network_result_value_or_log!(debug self.handle_boot_request(connection_descriptor).await? => {});
@@ -1588,7 +1620,13 @@ impl NetworkManager {
// xxx: deal with spoofing and flooding here?
// Pass message to RPC system
rpc.enqueue_message(envelope, body, source_noderef, connection_descriptor)?;
rpc.enqueue_message(
envelope,
body,
source_noderef,
connection_descriptor,
routing_domain,
)?;
// Inform caller that we dealt with the envelope locally
Ok(true)
@@ -1669,7 +1707,7 @@ impl NetworkManager {
// Determine if a local IP address has changed
// this means we should restart the low level network and and recreate all of our dial info
// Wait until we have received confirmation from N different peers
pub fn report_local_socket_address(
pub fn report_local_network_socket_address(
&self,
_socket_address: SocketAddress,
_connection_descriptor: ConnectionDescriptor,
@@ -1681,7 +1719,7 @@ impl NetworkManager {
// Determine if a global IP address has changed
// this means we should recreate our public dial info if it is not static and rediscover it
// Wait until we have received confirmation from N different peers
pub fn report_global_socket_address(
pub fn report_public_internet_socket_address(
&self,
socket_address: SocketAddress, // the socket address as seen by the remote peer
connection_descriptor: ConnectionDescriptor, // the connection descriptor used

View File

@@ -603,6 +603,29 @@ impl Network {
// initialize interfaces
self.unlocked_inner.interfaces.refresh().await?;
// build the set of networks we should consider for the 'LocalNetwork' routing domain
let mut local_networks: HashSet<(IpAddr, IpAddr)> = HashSet::new();
self.unlocked_inner
.interfaces
.with_interfaces(|interfaces| {
for (name, intf) in interfaces {
// Skip networks that we should never encounter
if intf.is_loopback() || !intf.is_running() {
return;
}
// Add network to local networks table
for addr in intf.addrs {
let netmask = addr.if_addr().netmask();
let network_ip = ipaddr_apply_netmask(addr.if_addr().ip(), netmask);
local_networks.insert((network_ip, netmask));
}
}
});
let local_networks: Vec<(IpAddr, IpAddr)> = local_networks.into_iter().collect();
self.unlocked_inner
.routing_table
.configure_local_network_routing_domain(local_networks);
// determine if we have ipv4/ipv6 addresses
{
let mut inner = self.inner.lock();

View File

@@ -367,13 +367,12 @@ xxx write routing table sieve for routing domain from dialinfo and local network
pub(super) async fn start_ws_listeners(&self) -> EyreResult<()> {
trace!("starting ws listeners");
let routing_table = self.routing_table();
let (listen_address, url, path, enable_local_peer_scope, detect_address_changes) = {
let (listen_address, url, path, detect_address_changes) = {
let c = self.config.get();
(
c.network.protocol.ws.listen_address.clone(),
c.network.protocol.ws.url.clone(),
c.network.protocol.ws.path.clone(),
c.network.enable_local_peer_scope,
c.network.detect_address_changes,
)
};
@@ -586,12 +585,11 @@ xxx write routing table sieve for routing domain from dialinfo and local network
trace!("starting tcp listeners");
let routing_table = self.routing_table();
let (listen_address, public_address, enable_local_peer_scope, detect_address_changes) = {
let (listen_address, public_address, detect_address_changes) = {
let c = self.config.get();
(
c.network.protocol.tcp.listen_address.clone(),
c.network.protocol.tcp.public_address.clone(),
c.network.enable_local_peer_scope,
c.network.detect_address_changes,
)
};

View File

@@ -197,7 +197,11 @@ impl NetworkManager {
let routing_table = routing_table.clone();
unord.push(
// lets ask bootstrap to find ourselves now
async move { routing_table.reverse_find_node(nr, true).await },
async move {
routing_table
.reverse_find_node(RoutingDomain::PublicInternet, nr, true)
.await
},
);
}
}
@@ -299,7 +303,9 @@ impl NetworkManager {
unord.push(async move {
// Need VALID signed peer info, so ask bootstrap to find_node of itself
// which will ensure it has the bootstrap's signed peer info as part of the response
let _ = routing_table.find_target(nr.clone()).await;
let _ = routing_table
.find_target(RoutingDomain::PublicInternet, nr.clone())
.await;
// Ensure we got the signed peer info
if !nr.has_valid_signed_node_info(Some(RoutingDomain::PublicInternet)) {
@@ -310,7 +316,9 @@ impl NetworkManager {
// If this node info is invalid, it will time out after being unpingable
} else {
// otherwise this bootstrap is valid, lets ask it to find ourselves now
routing_table.reverse_find_node(nr, true).await
routing_table
.reverse_find_node(RoutingDomain::PublicInternet, nr, true)
.await
}
});
}
@@ -324,33 +332,35 @@ impl NetworkManager {
// Ping each node in the routing table if they need to be pinged
// to determine their reliability
#[instrument(level = "trace", skip(self), err)]
pub(super) async fn ping_validator_task_routine(
self,
stop_token: StopToken,
_last_ts: u64,
fn ping_validator_public_internet(
&self,
cur_ts: u64,
unord: &mut FuturesUnordered,
) -> EyreResult<()> {
let rpc = self.rpc_processor();
let routing_table = self.routing_table();
let mut unord = FuturesUnordered::new();
// Get all nodes needing pings in the PublicInternet routing domain
let node_refs = routing_table.get_nodes_needing_ping(RoutingDomain::PublicInternet, cur_ts);
let node_refs = routing_table.get_nodes_needing_ping(cur_ts);
// Look up any NAT mappings we may need to try to preserve with keepalives
let mut mapped_port_info = routing_table.get_mapped_port_info();
let opt_public_internet_relay_nr = routing_table.relay_node(RoutingDomain::PublicInternet);
let opt_public_internet_relay_id = opt_public_internet_relay_nr.map(|nr| nr.node_id());
// let opt_local_network_relay_nr = routing_table.relay_node(RoutingDomain::LocalNetwork);
// let opt_local_network_relay_id = opt_local_network_relay_nr.map(|nr| nr.node_id());
// Public Internet Routing Domain
// Get the PublicInternet relay if we are using one
let opt_relay_nr = routing_table.relay_node(RoutingDomain::PublicInternet);
let opt_relay_id = opt_relay_nr.map(|nr| nr.node_id());
// Get our publicinternet dial info
let dids = routing_table.all_filtered_dial_info_details(
Some(RoutingDomain::PublicInternet),
&DialInfoFilter::global(),
RoutingDomainSet::only(RoutingDomain::PublicInternet),
&DialInfoFilter::all(),
);
// For all nodes needing pings, figure out how many and over what protocols
for nr in node_refs {
let rpc = rpc.clone();
if Some(nr.node_id()) == opt_public_internet_relay_id {
// If this is a relay, let's check for NAT keepalives
let mut did_pings = false;
if Some(nr.node_id()) == opt_relay_id {
// Relay nodes get pinged over all protocols we have inbound dialinfo for
// This is so we can preserve the inbound NAT mappings at our router
for did in &dids {
@@ -370,19 +380,84 @@ impl NetworkManager {
};
if needs_ping {
let rpc = rpc.clone();
let dif = did.dial_info.make_filter(true);
let dif = did.dial_info.make_filter();
let nr_filtered = nr.filtered_clone(dif);
log_net!("--> Keepalive ping to {:?}", nr_filtered);
unord.push(async move { rpc.rpc_call_status(nr_filtered).await }.boxed());
unord.push(
async move {
rpc.rpc_call_status(Some(routing_domain), nr_filtered).await
}
.boxed(),
);
did_pings = true;
}
}
} else {
// Just do a single ping with the best protocol for all the other nodes
unord.push(async move { rpc.rpc_call_status(nr).await }.boxed());
}
// Just do a single ping with the best protocol for all the other nodes,
// ensuring that we at least ping a relay with -something- even if we didnt have
// any mapped ports to preserve
if !did_pings {
let rpc = rpc.clone();
unord.push(
async move { rpc.rpc_call_status(Some(routing_domain), nr).await }.boxed(),
);
}
}
// Wait for futures to complete
Ok(())
}
// Ping each node in the LocalNetwork routing domain if they
// need to be pinged to determine their reliability
#[instrument(level = "trace", skip(self), err)]
fn ping_validator_local_network(
&self,
cur_ts: u64,
unord: &mut FuturesUnordered,
) -> EyreResult<()> {
let rpc = self.rpc_processor();
let routing_table = self.routing_table();
// Get all nodes needing pings in the LocalNetwork routing domain
let node_refs = routing_table.get_nodes_needing_ping(RoutingDomain::LocalNetwork, cur_ts);
// Get our LocalNetwork dial info
let dids = routing_table.all_filtered_dial_info_details(
RoutingDomainSet::only(RoutingDomain::LocalNetwork),
&DialInfoFilter::all(),
);
// For all nodes needing pings, figure out how many and over what protocols
for nr in node_refs {
let rpc = rpc.clone();
// Just do a single ping with the best protocol for all the nodes
unord.push(async move { rpc.rpc_call_status(Some(routing_domain), nr).await }.boxed());
}
Ok(())
}
// Ping each node in the routing table if they need to be pinged
// to determine their reliability
#[instrument(level = "trace", skip(self), err)]
pub(super) async fn ping_validator_task_routine(
self,
stop_token: StopToken,
_last_ts: u64,
cur_ts: u64,
) -> EyreResult<()> {
let rpc = self.rpc_processor();
let routing_table = self.routing_table();
let mut unord = FuturesUnordered::new();
// PublicInternet
self.ping_validator_public_internet(cur_ts, &mut unord)?;
// LocalNetwork
self.ping_validator_local_network(cur_ts, &mut unord)?;
// Wait for ping futures to complete in parallel
while let Ok(Some(_)) = unord.next().timeout_at(stop_token.clone()).await {}
Ok(())
@@ -390,24 +465,38 @@ impl NetworkManager {
// Ask our remaining peers to give us more peers before we go
// back to the bootstrap servers to keep us from bothering them too much
// This only adds PublicInternet routing domain peers. The discovery
// mechanism for LocalNetwork suffices for locating all the local network
// peers that are available. This, however, may query other LocalNetwork
// nodes for their PublicInternet peers, which is a very fast way to get
// a new node online.
#[instrument(level = "trace", skip(self), err)]
pub(super) async fn peer_minimum_refresh_task_routine(
self,
stop_token: StopToken,
) -> EyreResult<()> {
let routing_table = self.routing_table();
let cur_ts = intf::get_timestamp();
let mut unord = FuturesOrdered::new();
let min_peer_count = {
let c = self.config.get();
c.network.dht.min_peer_count as usize
};
// get list of all peers we know about, even the unreliable ones, and ask them to find nodes close to our node too
let noderefs = routing_table.get_all_nodes(cur_ts);
// do peer minimum search concurrently
let mut unord = FuturesUnordered::new();
// For the PublicInternet routing domain, get list of all peers we know about
// even the unreliable ones, and ask them to find nodes close to our node too
let noderefs = routing_table.find_fastest_nodes(
min_peer_count,
None,
|k: DHTKey, v: Option<Arc<BucketEntry>>| {
NodeRef::new(self.clone(), k, v.unwrap().clone(), None)
},
);
for nr in noderefs {
log_net!("--- peer minimum search with {:?}", nr);
let routing_table = routing_table.clone();
unord.push(async move { routing_table.reverse_find_node(nr, false).await });
}
// do peer minimum search in order from fastest to slowest
while let Ok(Some(_)) = unord.next().timeout_at(stop_token.clone()).await {}
Ok(())