many fixes for bootstrap and public internet connectivity
This commit is contained in:
@@ -23,6 +23,9 @@ const UNRELIABLE_PING_INTERVAL_SECS: u32 = 5;
|
||||
// remains valid, as well as to make sure we remain in any relay node's routing table
|
||||
const KEEPALIVE_PING_INTERVAL_SECS: u32 = 20;
|
||||
|
||||
// How many times do we try to ping a never-reached node before we call it dead
|
||||
const NEVER_REACHED_PING_COUNT: u32 = 3;
|
||||
|
||||
// Do not change order here, it will mess up other sorts
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum BucketEntryState {
|
||||
@@ -58,7 +61,6 @@ impl BucketEntry {
|
||||
transfer_stats_accounting: TransferStatsAccounting::new(),
|
||||
peer_stats: PeerStats {
|
||||
time_added: now,
|
||||
last_seen: None,
|
||||
rpc_stats: RPCStats::default(),
|
||||
latency: None,
|
||||
transfer: TransferStatsDownUp::default(),
|
||||
@@ -129,7 +131,7 @@ impl BucketEntry {
|
||||
|
||||
pub fn has_valid_signed_node_info(&self) -> bool {
|
||||
if let Some(sni) = &self.opt_signed_node_info {
|
||||
sni.signature.valid
|
||||
sni.is_valid()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
@@ -213,8 +215,13 @@ impl BucketEntry {
|
||||
|
||||
///// state machine handling
|
||||
pub(super) fn check_reliable(&self, cur_ts: u64) -> bool {
|
||||
// if we have had consecutive ping replies for longer that UNRELIABLE_PING_SPAN_SECS
|
||||
match self.peer_stats.rpc_stats.first_consecutive_answer_time {
|
||||
// If we have had any failures to send, this is not reliable
|
||||
if self.peer_stats.rpc_stats.failed_to_send > 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
// if we have seen the node consistently for longer that UNRELIABLE_PING_SPAN_SECS
|
||||
match self.peer_stats.rpc_stats.first_consecutive_seen_ts {
|
||||
None => false,
|
||||
Some(ts) => {
|
||||
cur_ts.saturating_sub(ts) >= (UNRELIABLE_PING_SPAN_SECS as u64 * 1000000u64)
|
||||
@@ -222,10 +229,15 @@ impl BucketEntry {
|
||||
}
|
||||
}
|
||||
pub(super) fn check_dead(&self, cur_ts: u64) -> bool {
|
||||
// If we have failured to send NEVER_REACHED_PING_COUNT times in a row, the node is dead
|
||||
if self.peer_stats.rpc_stats.failed_to_send >= NEVER_REACHED_PING_COUNT {
|
||||
return true;
|
||||
}
|
||||
// if we have not heard from the node at all for the duration of the unreliable ping span
|
||||
// a node is not dead if we haven't heard from it yet
|
||||
match self.peer_stats.last_seen {
|
||||
None => false,
|
||||
// a node is not dead if we haven't heard from it yet,
|
||||
// but we give it NEVER_REACHED_PING_COUNT chances to ping before we say it's dead
|
||||
match self.peer_stats.rpc_stats.last_seen_ts {
|
||||
None => self.peer_stats.rpc_stats.recent_lost_answers < NEVER_REACHED_PING_COUNT,
|
||||
Some(ts) => {
|
||||
cur_ts.saturating_sub(ts) >= (UNRELIABLE_PING_SPAN_SECS as u64 * 1000000u64)
|
||||
}
|
||||
@@ -233,9 +245,20 @@ impl BucketEntry {
|
||||
}
|
||||
|
||||
fn needs_constant_ping(&self, cur_ts: u64, interval: u64) -> bool {
|
||||
match self.peer_stats.last_seen {
|
||||
// If we have not either seen the node, nor asked it a question in the last 'interval'
|
||||
// then we should ping it
|
||||
let latest_contact_time = self
|
||||
.peer_stats
|
||||
.rpc_stats
|
||||
.last_seen_ts
|
||||
.max(self.peer_stats.rpc_stats.last_question);
|
||||
|
||||
match latest_contact_time {
|
||||
None => true,
|
||||
Some(last_seen) => cur_ts.saturating_sub(last_seen) >= (interval * 1000000u64),
|
||||
Some(latest_contact_time) => {
|
||||
// If we haven't done anything with this node in 'interval' seconds
|
||||
cur_ts.saturating_sub(latest_contact_time) >= (interval * 1000000u64)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -259,19 +282,26 @@ impl BucketEntry {
|
||||
match state {
|
||||
BucketEntryState::Reliable => {
|
||||
// If we are in a reliable state, we need a ping on an exponential scale
|
||||
match self.peer_stats.last_seen {
|
||||
None => true,
|
||||
Some(last_seen) => {
|
||||
let first_consecutive_answer_time = self
|
||||
.peer_stats
|
||||
.rpc_stats
|
||||
.first_consecutive_answer_time
|
||||
.unwrap();
|
||||
let start_of_reliable_time = first_consecutive_answer_time
|
||||
let latest_contact_time = self
|
||||
.peer_stats
|
||||
.rpc_stats
|
||||
.last_seen_ts
|
||||
.max(self.peer_stats.rpc_stats.last_question);
|
||||
|
||||
match latest_contact_time {
|
||||
None => {
|
||||
error!("Peer is reliable, but not seen!");
|
||||
true
|
||||
}
|
||||
Some(latest_contact_time) => {
|
||||
let first_consecutive_seen_ts =
|
||||
self.peer_stats.rpc_stats.first_consecutive_seen_ts.unwrap();
|
||||
let start_of_reliable_time = first_consecutive_seen_ts
|
||||
+ ((UNRELIABLE_PING_SPAN_SECS - UNRELIABLE_PING_INTERVAL_SECS) as u64
|
||||
* 1_000_000u64);
|
||||
let reliable_cur = cur_ts.saturating_sub(start_of_reliable_time);
|
||||
let reliable_last = last_seen.saturating_sub(start_of_reliable_time);
|
||||
let reliable_last =
|
||||
latest_contact_time.saturating_sub(start_of_reliable_time);
|
||||
|
||||
retry_falloff_log(
|
||||
reliable_last,
|
||||
@@ -292,37 +322,44 @@ impl BucketEntry {
|
||||
}
|
||||
|
||||
pub(super) fn touch_last_seen(&mut self, ts: u64) {
|
||||
// If we've heard from the node at all, we can always restart our lost ping count
|
||||
self.peer_stats.rpc_stats.recent_lost_answers = 0;
|
||||
// Mark the node as seen
|
||||
self.peer_stats.last_seen = Some(ts);
|
||||
if self
|
||||
.peer_stats
|
||||
.rpc_stats
|
||||
.first_consecutive_seen_ts
|
||||
.is_none()
|
||||
{
|
||||
self.peer_stats.rpc_stats.first_consecutive_seen_ts = Some(ts);
|
||||
}
|
||||
|
||||
self.peer_stats.rpc_stats.last_seen_ts = Some(ts);
|
||||
}
|
||||
|
||||
pub(super) fn state_debug_info(&self, cur_ts: u64) -> String {
|
||||
let first_consecutive_answer_time = if let Some(first_consecutive_answer_time) =
|
||||
self.peer_stats.rpc_stats.first_consecutive_answer_time
|
||||
let first_consecutive_seen_ts = if let Some(first_consecutive_seen_ts) =
|
||||
self.peer_stats.rpc_stats.first_consecutive_seen_ts
|
||||
{
|
||||
format!(
|
||||
"{}s ago",
|
||||
timestamp_to_secs(cur_ts.saturating_sub(first_consecutive_answer_time))
|
||||
timestamp_to_secs(cur_ts.saturating_sub(first_consecutive_seen_ts))
|
||||
)
|
||||
} else {
|
||||
"never".to_owned()
|
||||
};
|
||||
let last_seen = if let Some(last_seen) = self.peer_stats.last_seen {
|
||||
let last_seen_ts_str = if let Some(last_seen_ts) = self.peer_stats.rpc_stats.last_seen_ts {
|
||||
format!(
|
||||
"{}s ago",
|
||||
timestamp_to_secs(cur_ts.saturating_sub(last_seen))
|
||||
timestamp_to_secs(cur_ts.saturating_sub(last_seen_ts))
|
||||
)
|
||||
} else {
|
||||
"never".to_owned()
|
||||
};
|
||||
|
||||
format!(
|
||||
"state: {:?}, first_consecutive_answer_time: {}, last_seen: {}",
|
||||
"state: {:?}, first_consecutive_seen_ts: {}, last_seen_ts: {}",
|
||||
self.state(cur_ts),
|
||||
first_consecutive_answer_time,
|
||||
last_seen
|
||||
first_consecutive_seen_ts,
|
||||
last_seen_ts_str
|
||||
)
|
||||
}
|
||||
|
||||
@@ -332,11 +369,10 @@ impl BucketEntry {
|
||||
pub(super) fn question_sent(&mut self, ts: u64, bytes: u64, expects_answer: bool) {
|
||||
self.transfer_stats_accounting.add_up(bytes);
|
||||
self.peer_stats.rpc_stats.messages_sent += 1;
|
||||
self.peer_stats.rpc_stats.failed_to_send = 0;
|
||||
if expects_answer {
|
||||
self.peer_stats.rpc_stats.questions_in_flight += 1;
|
||||
}
|
||||
if self.peer_stats.last_seen.is_none() {
|
||||
self.peer_stats.last_seen = Some(ts);
|
||||
self.peer_stats.rpc_stats.last_question = Some(ts);
|
||||
}
|
||||
}
|
||||
pub(super) fn question_rcvd(&mut self, ts: u64, bytes: u64) {
|
||||
@@ -344,33 +380,40 @@ impl BucketEntry {
|
||||
self.peer_stats.rpc_stats.messages_rcvd += 1;
|
||||
self.touch_last_seen(ts);
|
||||
}
|
||||
pub(super) fn answer_sent(&mut self, _ts: u64, bytes: u64) {
|
||||
pub(super) fn answer_sent(&mut self, bytes: u64) {
|
||||
self.transfer_stats_accounting.add_up(bytes);
|
||||
self.peer_stats.rpc_stats.messages_sent += 1;
|
||||
self.peer_stats.rpc_stats.failed_to_send = 0;
|
||||
}
|
||||
pub(super) fn answer_rcvd(&mut self, send_ts: u64, recv_ts: u64, bytes: u64) {
|
||||
self.transfer_stats_accounting.add_down(bytes);
|
||||
self.peer_stats.rpc_stats.messages_rcvd += 1;
|
||||
self.peer_stats.rpc_stats.questions_in_flight -= 1;
|
||||
if self
|
||||
.peer_stats
|
||||
.rpc_stats
|
||||
.first_consecutive_answer_time
|
||||
.is_none()
|
||||
{
|
||||
self.peer_stats.rpc_stats.first_consecutive_answer_time = Some(recv_ts);
|
||||
}
|
||||
self.record_latency(recv_ts - send_ts);
|
||||
self.touch_last_seen(recv_ts);
|
||||
self.peer_stats.rpc_stats.recent_lost_answers = 0;
|
||||
}
|
||||
pub(super) fn question_lost(&mut self, _ts: u64) {
|
||||
self.peer_stats.rpc_stats.first_consecutive_answer_time = None;
|
||||
pub(super) fn question_lost(&mut self) {
|
||||
self.peer_stats.rpc_stats.first_consecutive_seen_ts = None;
|
||||
self.peer_stats.rpc_stats.questions_in_flight -= 1;
|
||||
self.peer_stats.rpc_stats.recent_lost_answers += 1;
|
||||
}
|
||||
pub(super) fn failed_to_send(&mut self, ts: u64, expects_answer: bool) {
|
||||
if expects_answer {
|
||||
self.peer_stats.rpc_stats.last_question = Some(ts);
|
||||
}
|
||||
self.peer_stats.rpc_stats.failed_to_send += 1;
|
||||
self.peer_stats.rpc_stats.first_consecutive_seen_ts = None;
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for BucketEntry {
|
||||
fn drop(&mut self) {
|
||||
assert_eq!(self.ref_count, 0);
|
||||
if self.ref_count != 0 {
|
||||
panic!(
|
||||
"bucket entry dropped with non-zero refcount: {:#?}",
|
||||
self.node_info()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -75,25 +75,30 @@ impl RoutingTable {
|
||||
let mut cnt = 0;
|
||||
out += &format!("Entries: {}\n", inner.bucket_entry_count);
|
||||
while b < blen {
|
||||
if inner.buckets[b].entries().len() > 0 {
|
||||
out += &format!(" Bucket #{}:\n", b);
|
||||
for e in inner.buckets[b].entries() {
|
||||
let filtered_entries: Vec<(&DHTKey, &BucketEntry)> = inner.buckets[b]
|
||||
.entries()
|
||||
.filter(|e| {
|
||||
let state = e.1.state(cur_ts);
|
||||
if state >= min_state {
|
||||
out += &format!(
|
||||
" {} [{}]\n",
|
||||
e.0.encode(),
|
||||
match state {
|
||||
BucketEntryState::Reliable => "R",
|
||||
BucketEntryState::Unreliable => "U",
|
||||
BucketEntryState::Dead => "D",
|
||||
}
|
||||
);
|
||||
|
||||
cnt += 1;
|
||||
if cnt >= limit {
|
||||
break;
|
||||
state >= min_state
|
||||
})
|
||||
.collect();
|
||||
if !filtered_entries.is_empty() {
|
||||
out += &format!(" Bucket #{}:\n", b);
|
||||
for e in filtered_entries {
|
||||
let state = e.1.state(cur_ts);
|
||||
out += &format!(
|
||||
" {} [{}]\n",
|
||||
e.0.encode(),
|
||||
match state {
|
||||
BucketEntryState::Reliable => "R",
|
||||
BucketEntryState::Unreliable => "U",
|
||||
BucketEntryState::Dead => "D",
|
||||
}
|
||||
);
|
||||
|
||||
cnt += 1;
|
||||
if cnt >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if cnt >= limit {
|
||||
|
@@ -72,10 +72,13 @@ impl RoutingTable {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn filter_has_valid_signed_node_info(kv: &(&DHTKey, Option<&mut BucketEntry>)) -> bool {
|
||||
pub fn filter_has_valid_signed_node_info(
|
||||
kv: &(&DHTKey, Option<&mut BucketEntry>),
|
||||
own_peer_info_is_valid: bool,
|
||||
) -> bool {
|
||||
match &kv.1 {
|
||||
None => true,
|
||||
Some(b) => b.has_node_info(),
|
||||
None => own_peer_info_is_valid,
|
||||
Some(b) => b.has_valid_signed_node_info(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,10 +120,11 @@ impl RoutingTable {
|
||||
nodes.push(selfkv);
|
||||
}
|
||||
// add all nodes from buckets
|
||||
// Can't use with_entries() here due to lifetime issues
|
||||
for b in &mut inner.buckets {
|
||||
for (k, v) in b.entries_mut() {
|
||||
// Don't bother with dead nodes
|
||||
if !v.check_dead(cur_ts) {
|
||||
if v.state(cur_ts) >= BucketEntryState::Unreliable {
|
||||
// Apply filter
|
||||
let kv = (k, Some(v));
|
||||
if filter(&kv) {
|
||||
@@ -159,13 +163,11 @@ impl RoutingTable {
|
||||
// filter
|
||||
|kv| {
|
||||
if kv.1.is_none() {
|
||||
// filter out self peer, as it is irrelevant to the 'fastest nodes' search
|
||||
return false;
|
||||
// always filter out self peer, as it is irrelevant to the 'fastest nodes' search
|
||||
false
|
||||
} else {
|
||||
filter.as_ref().map(|f| f(kv)).unwrap_or(true)
|
||||
}
|
||||
if filter.is_some() && !filter.as_ref().unwrap()(kv) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
},
|
||||
// sort
|
||||
|(a_key, a_entry), (b_key, b_entry)| {
|
||||
@@ -237,16 +239,7 @@ impl RoutingTable {
|
||||
node_count,
|
||||
cur_ts,
|
||||
// filter
|
||||
|kv| {
|
||||
if kv.1.is_none() {
|
||||
// include self peer, as it is relevant to the 'closest nodes' search
|
||||
return true;
|
||||
}
|
||||
if filter.is_some() && !filter.as_ref().unwrap()(kv) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
},
|
||||
|kv| filter.as_ref().map(|f| f(kv)).unwrap_or(true),
|
||||
// sort
|
||||
|(a_key, a_entry), (b_key, b_entry)| {
|
||||
// same nodes are always the same
|
||||
|
@@ -311,11 +311,11 @@ impl RoutingTable {
|
||||
|
||||
// Public dial info changed, go through all nodes and reset their 'seen our node info' bit
|
||||
if matches!(domain, RoutingDomain::PublicInternet) {
|
||||
for bucket in &mut inner.buckets {
|
||||
for entry in bucket.entries_mut() {
|
||||
entry.1.set_seen_our_node_info(false);
|
||||
}
|
||||
}
|
||||
let cur_ts = intf::get_timestamp();
|
||||
Self::with_entries(&mut *inner, cur_ts, BucketEntryState::Dead, |_, e| {
|
||||
e.set_seen_our_node_info(false);
|
||||
Option::<()>::None
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -393,26 +393,18 @@ impl RoutingTable {
|
||||
let mut inner = this.inner.lock();
|
||||
let mut node_refs = Vec::<NodeRef>::with_capacity(inner.bucket_entry_count);
|
||||
let cur_ts = intf::get_timestamp();
|
||||
for bucket in &mut inner.buckets {
|
||||
for entry in bucket.entries_mut() {
|
||||
match entry.1.state(cur_ts) {
|
||||
BucketEntryState::Reliable | BucketEntryState::Unreliable => {
|
||||
// Only update nodes that haven't seen our node info yet
|
||||
if !entry.1.has_seen_our_node_info() {
|
||||
node_refs.push(NodeRef::new(
|
||||
this.clone(),
|
||||
*entry.0,
|
||||
entry.1,
|
||||
None,
|
||||
));
|
||||
}
|
||||
}
|
||||
BucketEntryState::Dead => {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
Self::with_entries(&mut *inner, cur_ts, BucketEntryState::Unreliable, |k, e| {
|
||||
// Only update nodes that haven't seen our node info yet
|
||||
if !e.has_seen_our_node_info() {
|
||||
node_refs.push(NodeRef::new(
|
||||
this.clone(),
|
||||
*k,
|
||||
e,
|
||||
None,
|
||||
));
|
||||
}
|
||||
}
|
||||
Option::<()>::None
|
||||
});
|
||||
node_refs
|
||||
};
|
||||
|
||||
@@ -458,8 +450,8 @@ impl RoutingTable {
|
||||
for bucket in &mut inner.buckets {
|
||||
bucket.kick(0);
|
||||
}
|
||||
log_rtab!(
|
||||
"Routing table purge complete. Routing table now has {} nodes",
|
||||
log_rtab!(debug
|
||||
"Routing table purge complete. Routing table now has {} nodes",
|
||||
inner.bucket_entry_count
|
||||
);
|
||||
}
|
||||
@@ -473,7 +465,7 @@ impl RoutingTable {
|
||||
if let Some(dead_node_ids) = bucket.kick(bucket_depth) {
|
||||
// Remove counts
|
||||
inner.bucket_entry_count -= dead_node_ids.len();
|
||||
log_rtab!("Routing table now has {} nodes", inner.bucket_entry_count);
|
||||
log_rtab!(debug "Routing table now has {} nodes", inner.bucket_entry_count);
|
||||
|
||||
// Now purge the routing table inner vectors
|
||||
//let filter = |k: &DHTKey| dead_node_ids.contains(k);
|
||||
@@ -490,6 +482,34 @@ impl RoutingTable {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn get_entry_count(inner: &mut RoutingTableInner, min_state: BucketEntryState) -> usize {
|
||||
let mut count = 0usize;
|
||||
let cur_ts = intf::get_timestamp();
|
||||
Self::with_entries(inner, cur_ts, min_state, |_, _| {
|
||||
count += 1;
|
||||
Option::<()>::None
|
||||
});
|
||||
count
|
||||
}
|
||||
|
||||
fn with_entries<T, F: FnMut(&DHTKey, &mut BucketEntry) -> Option<T>>(
|
||||
inner: &mut RoutingTableInner,
|
||||
cur_ts: u64,
|
||||
min_state: BucketEntryState,
|
||||
mut f: F,
|
||||
) -> Option<T> {
|
||||
for bucket in &mut inner.buckets {
|
||||
for entry in bucket.entries_mut() {
|
||||
if entry.1.state(cur_ts) >= min_state {
|
||||
if let Some(out) = f(entry.0, entry.1) {
|
||||
return Some(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn drop_node_ref(&self, node_id: DHTKey) {
|
||||
// Reduce ref count on entry
|
||||
let mut inner = self.inner.lock();
|
||||
@@ -536,7 +556,8 @@ impl RoutingTable {
|
||||
None => {
|
||||
// Make new entry
|
||||
inner.bucket_entry_count += 1;
|
||||
log_rtab!("Routing table now has {} nodes", inner.bucket_entry_count);
|
||||
let cnt = inner.bucket_entry_count;
|
||||
log_rtab!(debug "Routing table now has {} nodes, {} live", cnt, Self::get_entry_count(&mut *inner, BucketEntryState::Unreliable));
|
||||
let bucket = &mut inner.buckets[idx];
|
||||
let nr = bucket.add_entry(node_id);
|
||||
|
||||
@@ -639,38 +660,32 @@ impl RoutingTable {
|
||||
let mut best_inbound_relay: Option<NodeRef> = None;
|
||||
|
||||
// Iterate all known nodes for candidates
|
||||
for b in &mut inner.buckets {
|
||||
for (k, entry) in b.entries_mut() {
|
||||
// Ensure it's not dead
|
||||
if !matches!(entry.state(cur_ts), BucketEntryState::Dead) {
|
||||
// Ensure this node is not on our local network
|
||||
if !entry
|
||||
.local_node_info()
|
||||
.map(|l| l.has_dial_info())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
// Ensure we have the node's status
|
||||
if let Some(node_status) = &entry.peer_stats().status {
|
||||
// Ensure the node will relay
|
||||
if node_status.will_relay {
|
||||
if let Some(best_inbound_relay) = best_inbound_relay.as_mut() {
|
||||
if best_inbound_relay.operate(|best| {
|
||||
BucketEntry::cmp_fastest_reliable(cur_ts, best, entry)
|
||||
}) == std::cmp::Ordering::Greater
|
||||
{
|
||||
*best_inbound_relay =
|
||||
NodeRef::new(self.clone(), *k, entry, None);
|
||||
}
|
||||
} else {
|
||||
best_inbound_relay =
|
||||
Some(NodeRef::new(self.clone(), *k, entry, None));
|
||||
}
|
||||
Self::with_entries(&mut *inner, cur_ts, BucketEntryState::Unreliable, |k, e| {
|
||||
// Ensure this node is not on our local network
|
||||
if !e
|
||||
.local_node_info()
|
||||
.map(|l| l.has_dial_info())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
// Ensure we have the node's status
|
||||
if let Some(node_status) = &e.peer_stats().status {
|
||||
// Ensure the node will relay
|
||||
if node_status.will_relay {
|
||||
if let Some(best_inbound_relay) = best_inbound_relay.as_mut() {
|
||||
if best_inbound_relay
|
||||
.operate(|best| BucketEntry::cmp_fastest_reliable(cur_ts, best, e))
|
||||
== std::cmp::Ordering::Greater
|
||||
{
|
||||
*best_inbound_relay = NodeRef::new(self.clone(), *k, e, None);
|
||||
}
|
||||
} else {
|
||||
best_inbound_relay = Some(NodeRef::new(self.clone(), *k, e, None));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Option::<()>::None
|
||||
});
|
||||
|
||||
best_inbound_relay
|
||||
}
|
||||
@@ -771,11 +786,105 @@ impl RoutingTable {
|
||||
}
|
||||
}
|
||||
|
||||
async fn resolve_bootstrap(&self, bootstrap: Vec<String>) -> Result<Vec<String>, String> {
|
||||
let mut out = Vec::<String>::new();
|
||||
// Bootstrap lookup process
|
||||
async fn resolve_bootstrap(&self, bootstrap: Vec<String>) -> Result<Vec<NodeDialInfo>, String> {
|
||||
let mut out = Vec::<NodeDialInfo>::new();
|
||||
|
||||
// Resolve from bootstrap root to bootstrap hostnames
|
||||
let mut bsnames = Vec::<String>::new();
|
||||
for bh in bootstrap {
|
||||
//
|
||||
// Get TXT record for bootstrap (bootstrap.veilid.net, or similar)
|
||||
let records = intf::txt_lookup(&bh).await?;
|
||||
for record in records {
|
||||
// Split the bootstrap name record by commas
|
||||
for rec in record.split(',') {
|
||||
let rec = rec.trim();
|
||||
// If the name specified is fully qualified, go with it
|
||||
let bsname = if rec.ends_with('.') {
|
||||
rec.to_string()
|
||||
}
|
||||
// If the name is not fully qualified, prepend it to the bootstrap name
|
||||
else {
|
||||
format!("{}.{}", rec, bh)
|
||||
};
|
||||
|
||||
// Add to the list of bootstrap name to look up
|
||||
bsnames.push(bsname);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get bootstrap nodes from hostnames concurrently
|
||||
let mut unord = FuturesUnordered::new();
|
||||
for bsname in bsnames {
|
||||
unord.push(async move {
|
||||
// look up boostrap node txt records
|
||||
let bsnirecords = match intf::txt_lookup(&bsname).await {
|
||||
Err(e) => {
|
||||
warn!("bootstrap node txt lookup failed for {}: {}", bsname, e);
|
||||
return None;
|
||||
}
|
||||
Ok(v) => v,
|
||||
};
|
||||
// for each record resolve into node dial info strings
|
||||
let mut nodedialinfos: Vec<NodeDialInfo> = Vec::new();
|
||||
for bsnirecord in bsnirecords {
|
||||
// split bootstrap node record by commas. example:
|
||||
// 7lxDEabK_qgjbe38RtBa3IZLrud84P6NhGP-pRTZzdQ,tcp://bootstrap-dev-alpha.veilid.net:5150,udp://bootstrap-dev-alpha.veilid.net:5150,ws://bootstrap-dev-alpha.veilid.net:5150/ws
|
||||
let mut records = bsnirecord.split(',').map(|x| x.trim());
|
||||
let node_id_str = match records.next() {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
warn!("no node id specified in bootstrap node txt record");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
// Decode the node id
|
||||
let node_id_key = match DHTKey::try_decode(node_id_str) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Invalid node id in bootstrap node record {}: {}",
|
||||
node_id_str, e
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// If this is our own node id, then we skip it for bootstrap, in case we are a bootstrap node
|
||||
if self.node_id() == node_id_key {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Resolve each record and store in node dial infos list
|
||||
let node_id = NodeId::new(node_id_key);
|
||||
for rec in records {
|
||||
let rec = rec.trim();
|
||||
let dial_infos = match DialInfo::try_vec_from_url(rec) {
|
||||
Ok(dis) => dis,
|
||||
Err(e) => {
|
||||
warn!("Couldn't resolve bootstrap node dial info {}: {}", rec, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
for dial_info in dial_infos {
|
||||
nodedialinfos.push(NodeDialInfo {
|
||||
node_id: node_id.clone(),
|
||||
dial_info,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(nodedialinfos)
|
||||
});
|
||||
}
|
||||
while let Some(ndis) = unord.next().await {
|
||||
if let Some(mut ndis) = ndis {
|
||||
out.append(&mut ndis);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
@@ -791,8 +900,18 @@ impl RoutingTable {
|
||||
log_rtab!("--- bootstrap_task");
|
||||
|
||||
// If we aren't specifying a bootstrap node list explicitly, then pull from the bootstrap server(s)
|
||||
let bootstrap_nodes = if !bootstrap_nodes.is_empty() {
|
||||
bootstrap_nodes
|
||||
let bootstrap_node_dial_infos = if !bootstrap_nodes.is_empty() {
|
||||
let mut bsnvec = Vec::new();
|
||||
for b in bootstrap_nodes {
|
||||
let ndis = NodeDialInfo::from_str(b.as_str())
|
||||
.map_err(map_to_string)
|
||||
.map_err(logthru_rtab!(
|
||||
"Invalid node dial info in bootstrap entry: {}",
|
||||
b
|
||||
))?;
|
||||
bsnvec.push(ndis);
|
||||
}
|
||||
bsnvec
|
||||
} else {
|
||||
// Resolve bootstrap servers and recurse their TXT entries
|
||||
self.resolve_bootstrap(bootstrap).await?
|
||||
@@ -800,16 +919,13 @@ impl RoutingTable {
|
||||
|
||||
// Map all bootstrap entries to a single key with multiple dialinfo
|
||||
let mut bsmap: BTreeMap<DHTKey, Vec<DialInfoDetail>> = BTreeMap::new();
|
||||
for b in bootstrap_nodes {
|
||||
let ndis = NodeDialInfo::from_str(b.as_str())
|
||||
.map_err(map_to_string)
|
||||
.map_err(logthru_rtab!("Invalid dial info in bootstrap entry: {}", b))?;
|
||||
let node_id = ndis.node_id.key;
|
||||
for ndi in bootstrap_node_dial_infos {
|
||||
let node_id = ndi.node_id.key;
|
||||
bsmap
|
||||
.entry(node_id)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(DialInfoDetail {
|
||||
dial_info: ndis.dial_info,
|
||||
dial_info: ndi.dial_info,
|
||||
class: DialInfoClass::Direct, // Bootstraps are always directly reachable
|
||||
});
|
||||
}
|
||||
@@ -846,13 +962,15 @@ impl RoutingTable {
|
||||
"bootstrap at {:?} did not return valid signed node info",
|
||||
nr
|
||||
);
|
||||
// xxx: delete the node?
|
||||
// If this node info is invalid, it will time out after being unpingable
|
||||
} else {
|
||||
// otherwise this bootstrap is valid, lets ask it to find ourselves now
|
||||
this.reverse_find_node(nr, true).await
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Wait for all bootstrap operations to complete before we complete the singlefuture
|
||||
while unord.next().await.is_some() {}
|
||||
Ok(())
|
||||
}
|
||||
@@ -865,15 +983,20 @@ impl RoutingTable {
|
||||
async fn peer_minimum_refresh_task_routine(self) -> Result<(), String> {
|
||||
log_rtab!("--- peer_minimum_refresh task");
|
||||
|
||||
// get list of all peers we know about, even the unreliable ones, and ask them to bootstrap too
|
||||
// get list of all peers we know about, even the unreliable ones, and ask them to find nodes close to our node too
|
||||
let noderefs = {
|
||||
let mut inner = self.inner.lock();
|
||||
let mut noderefs = Vec::<NodeRef>::with_capacity(inner.bucket_entry_count);
|
||||
for b in &mut inner.buckets {
|
||||
for (k, entry) in b.entries_mut() {
|
||||
noderefs.push(NodeRef::new(self.clone(), *k, entry, None))
|
||||
}
|
||||
}
|
||||
let cur_ts = intf::get_timestamp();
|
||||
Self::with_entries(
|
||||
&mut *inner,
|
||||
cur_ts,
|
||||
BucketEntryState::Unreliable,
|
||||
|k, entry| {
|
||||
noderefs.push(NodeRef::new(self.clone(), *k, entry, None));
|
||||
Option::<()>::None
|
||||
},
|
||||
);
|
||||
noderefs
|
||||
};
|
||||
log_rtab!(" refreshing with nodes: {:?}", noderefs);
|
||||
@@ -892,32 +1015,31 @@ impl RoutingTable {
|
||||
// Ping each node in the routing table if they need to be pinged
|
||||
// to determine their reliability
|
||||
async fn ping_validator_task_routine(self, _last_ts: u64, cur_ts: u64) -> Result<(), String> {
|
||||
log_rtab!("--- ping_validator task");
|
||||
// log_rtab!("--- ping_validator task");
|
||||
|
||||
let rpc = self.rpc_processor();
|
||||
let netman = self.network_manager();
|
||||
let relay_node_id = netman.relay_node().map(|nr| nr.node_id());
|
||||
|
||||
let mut inner = self.inner.lock();
|
||||
for b in &mut inner.buckets {
|
||||
for (k, entry) in b.entries_mut() {
|
||||
if entry.needs_ping(k, cur_ts, relay_node_id) {
|
||||
let nr = NodeRef::new(self.clone(), *k, entry, None);
|
||||
log_rtab!(
|
||||
" --- ping validating: {:?} ({})",
|
||||
nr,
|
||||
entry.state_debug_info(cur_ts)
|
||||
);
|
||||
intf::spawn_local(rpc.clone().rpc_call_status(nr)).detach();
|
||||
}
|
||||
Self::with_entries(&mut *inner, cur_ts, BucketEntryState::Unreliable, |k, e| {
|
||||
if e.needs_ping(k, cur_ts, relay_node_id) {
|
||||
let nr = NodeRef::new(self.clone(), *k, e, None);
|
||||
log_rtab!(
|
||||
" --- ping validating: {:?} ({})",
|
||||
nr,
|
||||
e.state_debug_info(cur_ts)
|
||||
);
|
||||
intf::spawn_local(rpc.clone().rpc_call_status(nr)).detach();
|
||||
}
|
||||
}
|
||||
Option::<()>::None
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Compute transfer statistics to determine how 'fast' a node is
|
||||
async fn rolling_transfers_task_routine(self, last_ts: u64, cur_ts: u64) -> Result<(), String> {
|
||||
log_rtab!("--- rolling_transfers task");
|
||||
// log_rtab!("--- rolling_transfers task");
|
||||
let inner = &mut *self.inner.lock();
|
||||
|
||||
// Roll our own node's transfers
|
||||
@@ -940,8 +1062,8 @@ impl RoutingTable {
|
||||
// Do rolling transfers every ROLLING_TRANSFERS_INTERVAL_SECS secs
|
||||
self.unlocked_inner.rolling_transfers_task.tick().await?;
|
||||
|
||||
// If routing table is empty, then add the bootstrap nodes to it
|
||||
if self.inner.lock().bucket_entry_count == 0 {
|
||||
// If routing table has no live entries, then add the bootstrap nodes to it
|
||||
if Self::get_entry_count(&mut *self.inner.lock(), BucketEntryState::Unreliable) == 0 {
|
||||
self.unlocked_inner.bootstrap_task.tick().await?;
|
||||
}
|
||||
|
||||
@@ -950,7 +1072,9 @@ impl RoutingTable {
|
||||
let c = self.config.get();
|
||||
c.network.dht.min_peer_count as usize
|
||||
};
|
||||
if self.inner.lock().bucket_entry_count < min_peer_count {
|
||||
if Self::get_entry_count(&mut *self.inner.lock(), BucketEntryState::Unreliable)
|
||||
< min_peer_count
|
||||
{
|
||||
self.unlocked_inner.peer_minimum_refresh_task.tick().await?;
|
||||
}
|
||||
// Ping validate some nodes to groom the table
|
||||
@@ -987,13 +1111,13 @@ impl RoutingTable {
|
||||
e.question_rcvd(ts, bytes);
|
||||
})
|
||||
}
|
||||
pub fn stats_answer_sent(&self, node_ref: NodeRef, ts: u64, bytes: u64) {
|
||||
pub fn stats_answer_sent(&self, node_ref: NodeRef, bytes: u64) {
|
||||
self.inner
|
||||
.lock()
|
||||
.self_transfer_stats_accounting
|
||||
.add_up(bytes);
|
||||
node_ref.operate(|e| {
|
||||
e.answer_sent(ts, bytes);
|
||||
e.answer_sent(bytes);
|
||||
})
|
||||
}
|
||||
pub fn stats_answer_rcvd(&self, node_ref: NodeRef, send_ts: u64, recv_ts: u64, bytes: u64) {
|
||||
@@ -1009,9 +1133,14 @@ impl RoutingTable {
|
||||
e.answer_rcvd(send_ts, recv_ts, bytes);
|
||||
})
|
||||
}
|
||||
pub fn stats_question_lost(&self, node_ref: NodeRef, ts: u64) {
|
||||
pub fn stats_question_lost(&self, node_ref: NodeRef) {
|
||||
node_ref.operate(|e| {
|
||||
e.question_lost(ts);
|
||||
e.question_lost();
|
||||
})
|
||||
}
|
||||
pub fn stats_failed_to_send(&self, node_ref: NodeRef, ts: u64, expects_answer: bool) {
|
||||
node_ref.operate(|e| {
|
||||
e.failed_to_send(ts, expects_answer);
|
||||
})
|
||||
}
|
||||
|
||||
|
@@ -214,8 +214,8 @@ impl NodeRef {
|
||||
// Get the last connection and the last time we saw anything with this connection
|
||||
let (last_connection, last_seen) = self.operate(|e| {
|
||||
if let Some((last_connection, connection_ts)) = e.last_connection() {
|
||||
if let Some(last_seen) = e.peer_stats().last_seen {
|
||||
Some((last_connection, u64::max(last_seen, connection_ts)))
|
||||
if let Some(last_seen_ts) = e.peer_stats().rpc_stats.last_seen_ts {
|
||||
Some((last_connection, u64::max(last_seen_ts, connection_ts)))
|
||||
} else {
|
||||
Some((last_connection, connection_ts))
|
||||
}
|
||||
|
Reference in New Issue
Block a user