various fixes, including node_ref last_connection sorting problem

This commit is contained in:
John Smith
2022-10-04 11:27:38 -04:00
parent 0a01c0d23e
commit 4b2164a546
16 changed files with 740 additions and 517 deletions

View File

@@ -26,6 +26,7 @@ struct ConnectionManagerArc {
connection_initial_timeout_ms: u32,
connection_inactivity_timeout_ms: u32,
connection_table: ConnectionTable,
address_lock_table: AsyncTagLockTable<SocketAddr>,
inner: Mutex<Option<ConnectionManagerInner>>,
}
impl core::fmt::Debug for ConnectionManagerArc {
@@ -69,6 +70,7 @@ impl ConnectionManager {
connection_initial_timeout_ms,
connection_inactivity_timeout_ms,
connection_table: ConnectionTable::new(config),
address_lock_table: AsyncTagLockTable::new(),
inner: Mutex::new(None),
}
}
@@ -196,7 +198,7 @@ impl ConnectionManager {
}
// Returns a network connection if one already is established
#[instrument(level = "trace", skip(self), ret)]
//#[instrument(level = "trace", skip(self), ret)]
pub fn get_connection(&self, descriptor: ConnectionDescriptor) -> Option<ConnectionHandle> {
self.arc
.connection_table
@@ -236,11 +238,6 @@ impl ConnectionManager {
did_kill
}
/// Locak remote address
// async fn lock_remote_address(&self, remote_addr: SocketAddr) -> {
// }
/// Called when we want to create a new connection or get the current one that already exists
/// This will kill off any connections that are in conflict with the new connection to be made
/// in order to make room for the new connection in the system's connection table
@@ -251,18 +248,17 @@ impl ConnectionManager {
local_addr: Option<SocketAddr>,
dial_info: DialInfo,
) -> EyreResult<NetworkResult<ConnectionHandle>> {
warn!(
// Async lock on the remote address for atomicity per remote
let peer_address = dial_info.to_peer_address();
let remote_addr = peer_address.to_socket_addr();
let _lock_guard = self.arc.address_lock_table.lock_tag(remote_addr);
log_net!(
"== get_or_create_connection local_addr={:?} dial_info={:?}",
local_addr.green(),
dial_info.green()
);
// Make a connection descriptor for this dialinfo
let peer_address = dial_info.to_peer_address();
// Async lock on the remote address for atomicity
//let _lock_guard = self.lock_remote_address(peer_address.to_socket_addr());
// Kill off any possibly conflicting connections
let did_kill = self.kill_off_colliding_connections(&dial_info).await;
let mut retry_count = if did_kill { 2 } else { 0 };
@@ -299,6 +295,22 @@ impl ConnectionManager {
}
Err(e) => {
if retry_count == 0 {
// Try one last time to return a connection from the table, in case
// an 'accept' happened at literally the same time as our connect
if let Some(conn) = self
.arc
.connection_table
.get_last_connection_by_remote(peer_address)
{
log_net!(
"== Returning existing connection in race local_addr={:?} peer_address={:?}",
local_addr.green(),
peer_address.green()
);
return Ok(NetworkResult::Value(conn));
}
return Err(e).wrap_err("failed to connect");
}
}

View File

@@ -144,7 +144,7 @@ impl ConnectionTable {
let mut out_conn = None;
if inner.conn_by_id[protocol_index].len() > inner.max_connections[protocol_index] {
if let Some((lruk, lru_conn)) = inner.conn_by_id[protocol_index].remove_lru() {
debug!("connection lru out: {:?}", lru_conn);
log_net!(debug "connection lru out: {:?}", lru_conn);
out_conn = Some(lru_conn);
Self::remove_connection_records(&mut *inner, lruk);
}
@@ -158,7 +158,8 @@ impl ConnectionTable {
Ok(out_conn)
}
#[instrument(level = "trace", skip(self), ret)]
//#[instrument(level = "trace", skip(self), ret)]
#[allow(dead_code)]
pub fn get_connection_by_id(&self, id: NetworkConnectionId) -> Option<ConnectionHandle> {
let mut inner = self.inner.lock();
let protocol_index = *inner.protocol_index_by_id.get(&id)?;
@@ -166,7 +167,7 @@ impl ConnectionTable {
Some(out.get_handle())
}
#[instrument(level = "trace", skip(self), ret)]
//#[instrument(level = "trace", skip(self), ret)]
pub fn get_connection_by_descriptor(
&self,
descriptor: ConnectionDescriptor,
@@ -179,7 +180,7 @@ impl ConnectionTable {
Some(out.get_handle())
}
#[instrument(level = "trace", skip(self), ret)]
//#[instrument(level = "trace", skip(self), ret)]
pub fn get_last_connection_by_remote(&self, remote: PeerAddress) -> Option<ConnectionHandle> {
let mut inner = self.inner.lock();
@@ -189,7 +190,8 @@ impl ConnectionTable {
Some(out.get_handle())
}
#[instrument(level = "trace", skip(self), ret)]
//#[instrument(level = "trace", skip(self), ret)]
#[allow(dead_code)]
pub fn get_connection_ids_by_remote(&self, remote: PeerAddress) -> Vec<NetworkConnectionId> {
let inner = self.inner.lock();
inner

View File

@@ -1949,47 +1949,53 @@ impl NetworkManager {
.clone()
.unlocked_inner
.node_info_update_single_future
.single_spawn(async move {
// Only update if we actually have valid signed node info for this routing domain
if !this.routing_table().has_valid_own_node_info(routing_domain) {
trace!(
.single_spawn(
async move {
// Only update if we actually have valid signed node info for this routing domain
if !this.routing_table().has_valid_own_node_info(routing_domain) {
trace!(
"not sending node info update because our network class is not yet valid"
);
return;
return;
}
// Get the list of refs to all nodes to update
let cur_ts = intf::get_timestamp();
let node_refs =
this.routing_table()
.get_nodes_needing_updates(routing_domain, cur_ts, all);
// Send the updates
log_net!(debug "Sending node info updates to {} nodes", node_refs.len());
let mut unord = FuturesUnordered::new();
for nr in node_refs {
let rpc = this.rpc_processor();
unord.push(
async move {
// Update the node
if let Err(e) = rpc
.rpc_call_node_info_update(nr.clone(), routing_domain)
.await
{
// Not fatal, but we should be able to see if this is happening
trace!("failed to send node info update to {:?}: {}", nr, e);
return;
}
// Mark the node as having seen our node info
nr.set_seen_our_node_info(routing_domain);
}
.instrument(Span::current()),
);
}
// Wait for futures to complete
while unord.next().await.is_some() {}
log_rtab!(debug "Finished sending node updates");
}
// Get the list of refs to all nodes to update
let cur_ts = intf::get_timestamp();
let node_refs =
this.routing_table()
.get_nodes_needing_updates(routing_domain, cur_ts, all);
// Send the updates
log_net!(debug "Sending node info updates to {} nodes", node_refs.len());
let mut unord = FuturesUnordered::new();
for nr in node_refs {
let rpc = this.rpc_processor();
unord.push(async move {
// Update the node
if let Err(e) = rpc
.rpc_call_node_info_update(nr.clone(), routing_domain)
.await
{
// Not fatal, but we should be able to see if this is happening
trace!("failed to send node info update to {:?}: {}", nr, e);
return;
}
// Mark the node as having seen our node info
nr.set_seen_our_node_info(routing_domain);
});
}
// Wait for futures to complete
while unord.next().await.is_some() {}
log_rtab!(debug "Finished sending node updates");
})
.instrument(Span::current()),
)
.await;
}
}

View File

@@ -255,6 +255,13 @@ impl DiscoveryContext {
{
return Some(external_mapped_dial_info);
} else {
warn!("UPNP port mapping succeeded but port {}/{} is still unreachable.\nYou may need to add a local firewall allowed port on this machine.\n",
local_port, match llpt {
LowLevelProtocolType::UDP => "udp",
LowLevelProtocolType::TCP => "tcp",
}
);
// release the mapping if we're still unreachable
let _ = self
.net
@@ -628,6 +635,7 @@ impl Network {
}
Some(vec![udpv4_context])
}
.instrument(trace_span!("do_public_dial_info_check UDPv4"))
.boxed(),
);
}
@@ -647,6 +655,7 @@ impl Network {
}
Some(vec![udpv6_context])
}
.instrument(trace_span!("do_public_dial_info_check UDPv6"))
.boxed(),
);
}
@@ -669,6 +678,7 @@ impl Network {
}
Some(vec![tcpv4_context])
}
.instrument(trace_span!("do_public_dial_info_check TCPv4"))
.boxed(),
);
}
@@ -688,6 +698,7 @@ impl Network {
}
Some(vec![wsv4_context])
}
.instrument(trace_span!("do_public_dial_info_check WSv4"))
.boxed(),
);
}
@@ -710,6 +721,7 @@ impl Network {
}
Some(vec![tcpv6_context])
}
.instrument(trace_span!("do_public_dial_info_check TCPv6"))
.boxed(),
);
}
@@ -729,6 +741,7 @@ impl Network {
}
Some(vec![wsv6_context])
}
.instrument(trace_span!("do_public_dial_info_check WSv6"))
.boxed(),
);
}

View File

@@ -109,11 +109,11 @@ impl Network {
};
// XXX
warn!(
"DEBUGACCEPT: local={} remote={}",
tcp_stream.local_addr().unwrap(),
tcp_stream.peer_addr().unwrap(),
);
// warn!(
// "DEBUGACCEPT: local={} remote={}",
// tcp_stream.local_addr().unwrap(),
// tcp_stream.peer_addr().unwrap(),
// );
let listener_state = listener_state.clone();
let connection_manager = connection_manager.clone();

View File

@@ -84,7 +84,7 @@ impl Network {
}
}
}
};
}.instrument(Span::current());
protocol_handlers_unordered.push(ph_future);
}

View File

@@ -185,7 +185,7 @@ pub async fn nonblocking_connect(
let socket2_addr = socket2::SockAddr::from(addr);
// XXX
let bind_local_addr = socket.local_addr().unwrap().as_socket().unwrap();
//let bind_local_addr = socket.local_addr().unwrap().as_socket().unwrap();
// Connect to the remote address
match socket.connect(&socket2_addr) {
@@ -197,24 +197,24 @@ pub async fn nonblocking_connect(
}
.map_err(|e| {
// XXX
warn!(
"DEBUGCONNECT XXXFAILXXX: bind={} local={} remote={}\nbacktrace={:?}",
bind_local_addr,
socket.local_addr().unwrap().as_socket().unwrap(),
addr,
backtrace::Backtrace::new(),
);
// warn!(
// "DEBUGCONNECT XXXFAILXXX: bind={} local={} remote={}\nbacktrace={:?}",
// bind_local_addr,
// socket.local_addr().unwrap().as_socket().unwrap(),
// addr,
// backtrace::Backtrace::new(),
// );
e
})?;
// XXX
warn!(
"DEBUGCONNECT: bind={} local={} remote={}\nbacktrace={:?}",
bind_local_addr,
socket.local_addr().unwrap().as_socket().unwrap(),
addr,
backtrace::Backtrace::new(),
);
// warn!(
// "DEBUGCONNECT: bind={} local={} remote={}\nbacktrace={:?}",
// bind_local_addr,
// socket.local_addr().unwrap().as_socket().unwrap(),
// addr,
// backtrace::Backtrace::new(),
// );
let async_stream = Async::new(std::net::TcpStream::from(socket))?;

View File

@@ -210,11 +210,13 @@ impl NetworkConnection {
Ok(NetworkResult::Value(out))
}
#[allow(dead_code)]
pub fn stats(&self) -> NetworkConnectionStats {
let stats = self.stats.lock();
stats.clone()
}
#[allow(dead_code)]
pub fn established_time(&self) -> u64 {
self.established_time
}
@@ -260,10 +262,11 @@ impl NetworkConnection {
need_sender = false;
let sender_fut = receiver.recv_async().then(|res| async {
match res {
Ok((span_id, message)) => {
Ok((_span_id, message)) => {
let recv_span = span!(parent: None, Level::TRACE, "process_connection recv");
recv_span.follows_from(span_id);
let recv_span = span!(Level::TRACE, "process_connection recv");
// xxx: causes crash (Missing otel data span extensions)
// recv_span.follows_from(span_id);
// send the packet
if let Err(e) = Self::send_internal(