various fixes, including node_ref last_connection sorting problem
This commit is contained in:
@@ -26,6 +26,7 @@ struct ConnectionManagerArc {
|
||||
connection_initial_timeout_ms: u32,
|
||||
connection_inactivity_timeout_ms: u32,
|
||||
connection_table: ConnectionTable,
|
||||
address_lock_table: AsyncTagLockTable<SocketAddr>,
|
||||
inner: Mutex<Option<ConnectionManagerInner>>,
|
||||
}
|
||||
impl core::fmt::Debug for ConnectionManagerArc {
|
||||
@@ -69,6 +70,7 @@ impl ConnectionManager {
|
||||
connection_initial_timeout_ms,
|
||||
connection_inactivity_timeout_ms,
|
||||
connection_table: ConnectionTable::new(config),
|
||||
address_lock_table: AsyncTagLockTable::new(),
|
||||
inner: Mutex::new(None),
|
||||
}
|
||||
}
|
||||
@@ -196,7 +198,7 @@ impl ConnectionManager {
|
||||
}
|
||||
|
||||
// Returns a network connection if one already is established
|
||||
#[instrument(level = "trace", skip(self), ret)]
|
||||
//#[instrument(level = "trace", skip(self), ret)]
|
||||
pub fn get_connection(&self, descriptor: ConnectionDescriptor) -> Option<ConnectionHandle> {
|
||||
self.arc
|
||||
.connection_table
|
||||
@@ -236,11 +238,6 @@ impl ConnectionManager {
|
||||
did_kill
|
||||
}
|
||||
|
||||
/// Locak remote address
|
||||
// async fn lock_remote_address(&self, remote_addr: SocketAddr) -> {
|
||||
|
||||
// }
|
||||
|
||||
/// Called when we want to create a new connection or get the current one that already exists
|
||||
/// This will kill off any connections that are in conflict with the new connection to be made
|
||||
/// in order to make room for the new connection in the system's connection table
|
||||
@@ -251,18 +248,17 @@ impl ConnectionManager {
|
||||
local_addr: Option<SocketAddr>,
|
||||
dial_info: DialInfo,
|
||||
) -> EyreResult<NetworkResult<ConnectionHandle>> {
|
||||
warn!(
|
||||
// Async lock on the remote address for atomicity per remote
|
||||
let peer_address = dial_info.to_peer_address();
|
||||
let remote_addr = peer_address.to_socket_addr();
|
||||
let _lock_guard = self.arc.address_lock_table.lock_tag(remote_addr);
|
||||
|
||||
log_net!(
|
||||
"== get_or_create_connection local_addr={:?} dial_info={:?}",
|
||||
local_addr.green(),
|
||||
dial_info.green()
|
||||
);
|
||||
|
||||
// Make a connection descriptor for this dialinfo
|
||||
let peer_address = dial_info.to_peer_address();
|
||||
|
||||
// Async lock on the remote address for atomicity
|
||||
//let _lock_guard = self.lock_remote_address(peer_address.to_socket_addr());
|
||||
|
||||
// Kill off any possibly conflicting connections
|
||||
let did_kill = self.kill_off_colliding_connections(&dial_info).await;
|
||||
let mut retry_count = if did_kill { 2 } else { 0 };
|
||||
@@ -299,6 +295,22 @@ impl ConnectionManager {
|
||||
}
|
||||
Err(e) => {
|
||||
if retry_count == 0 {
|
||||
// Try one last time to return a connection from the table, in case
|
||||
// an 'accept' happened at literally the same time as our connect
|
||||
if let Some(conn) = self
|
||||
.arc
|
||||
.connection_table
|
||||
.get_last_connection_by_remote(peer_address)
|
||||
{
|
||||
log_net!(
|
||||
"== Returning existing connection in race local_addr={:?} peer_address={:?}",
|
||||
local_addr.green(),
|
||||
peer_address.green()
|
||||
);
|
||||
|
||||
return Ok(NetworkResult::Value(conn));
|
||||
}
|
||||
|
||||
return Err(e).wrap_err("failed to connect");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,7 +144,7 @@ impl ConnectionTable {
|
||||
let mut out_conn = None;
|
||||
if inner.conn_by_id[protocol_index].len() > inner.max_connections[protocol_index] {
|
||||
if let Some((lruk, lru_conn)) = inner.conn_by_id[protocol_index].remove_lru() {
|
||||
debug!("connection lru out: {:?}", lru_conn);
|
||||
log_net!(debug "connection lru out: {:?}", lru_conn);
|
||||
out_conn = Some(lru_conn);
|
||||
Self::remove_connection_records(&mut *inner, lruk);
|
||||
}
|
||||
@@ -158,7 +158,8 @@ impl ConnectionTable {
|
||||
Ok(out_conn)
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", skip(self), ret)]
|
||||
//#[instrument(level = "trace", skip(self), ret)]
|
||||
#[allow(dead_code)]
|
||||
pub fn get_connection_by_id(&self, id: NetworkConnectionId) -> Option<ConnectionHandle> {
|
||||
let mut inner = self.inner.lock();
|
||||
let protocol_index = *inner.protocol_index_by_id.get(&id)?;
|
||||
@@ -166,7 +167,7 @@ impl ConnectionTable {
|
||||
Some(out.get_handle())
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", skip(self), ret)]
|
||||
//#[instrument(level = "trace", skip(self), ret)]
|
||||
pub fn get_connection_by_descriptor(
|
||||
&self,
|
||||
descriptor: ConnectionDescriptor,
|
||||
@@ -179,7 +180,7 @@ impl ConnectionTable {
|
||||
Some(out.get_handle())
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", skip(self), ret)]
|
||||
//#[instrument(level = "trace", skip(self), ret)]
|
||||
pub fn get_last_connection_by_remote(&self, remote: PeerAddress) -> Option<ConnectionHandle> {
|
||||
let mut inner = self.inner.lock();
|
||||
|
||||
@@ -189,7 +190,8 @@ impl ConnectionTable {
|
||||
Some(out.get_handle())
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", skip(self), ret)]
|
||||
//#[instrument(level = "trace", skip(self), ret)]
|
||||
#[allow(dead_code)]
|
||||
pub fn get_connection_ids_by_remote(&self, remote: PeerAddress) -> Vec<NetworkConnectionId> {
|
||||
let inner = self.inner.lock();
|
||||
inner
|
||||
|
||||
@@ -1949,47 +1949,53 @@ impl NetworkManager {
|
||||
.clone()
|
||||
.unlocked_inner
|
||||
.node_info_update_single_future
|
||||
.single_spawn(async move {
|
||||
// Only update if we actually have valid signed node info for this routing domain
|
||||
if !this.routing_table().has_valid_own_node_info(routing_domain) {
|
||||
trace!(
|
||||
.single_spawn(
|
||||
async move {
|
||||
// Only update if we actually have valid signed node info for this routing domain
|
||||
if !this.routing_table().has_valid_own_node_info(routing_domain) {
|
||||
trace!(
|
||||
"not sending node info update because our network class is not yet valid"
|
||||
);
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the list of refs to all nodes to update
|
||||
let cur_ts = intf::get_timestamp();
|
||||
let node_refs =
|
||||
this.routing_table()
|
||||
.get_nodes_needing_updates(routing_domain, cur_ts, all);
|
||||
|
||||
// Send the updates
|
||||
log_net!(debug "Sending node info updates to {} nodes", node_refs.len());
|
||||
let mut unord = FuturesUnordered::new();
|
||||
for nr in node_refs {
|
||||
let rpc = this.rpc_processor();
|
||||
unord.push(
|
||||
async move {
|
||||
// Update the node
|
||||
if let Err(e) = rpc
|
||||
.rpc_call_node_info_update(nr.clone(), routing_domain)
|
||||
.await
|
||||
{
|
||||
// Not fatal, but we should be able to see if this is happening
|
||||
trace!("failed to send node info update to {:?}: {}", nr, e);
|
||||
return;
|
||||
}
|
||||
|
||||
// Mark the node as having seen our node info
|
||||
nr.set_seen_our_node_info(routing_domain);
|
||||
}
|
||||
.instrument(Span::current()),
|
||||
);
|
||||
}
|
||||
|
||||
// Wait for futures to complete
|
||||
while unord.next().await.is_some() {}
|
||||
|
||||
log_rtab!(debug "Finished sending node updates");
|
||||
}
|
||||
|
||||
// Get the list of refs to all nodes to update
|
||||
let cur_ts = intf::get_timestamp();
|
||||
let node_refs =
|
||||
this.routing_table()
|
||||
.get_nodes_needing_updates(routing_domain, cur_ts, all);
|
||||
|
||||
// Send the updates
|
||||
log_net!(debug "Sending node info updates to {} nodes", node_refs.len());
|
||||
let mut unord = FuturesUnordered::new();
|
||||
for nr in node_refs {
|
||||
let rpc = this.rpc_processor();
|
||||
unord.push(async move {
|
||||
// Update the node
|
||||
if let Err(e) = rpc
|
||||
.rpc_call_node_info_update(nr.clone(), routing_domain)
|
||||
.await
|
||||
{
|
||||
// Not fatal, but we should be able to see if this is happening
|
||||
trace!("failed to send node info update to {:?}: {}", nr, e);
|
||||
return;
|
||||
}
|
||||
|
||||
// Mark the node as having seen our node info
|
||||
nr.set_seen_our_node_info(routing_domain);
|
||||
});
|
||||
}
|
||||
|
||||
// Wait for futures to complete
|
||||
while unord.next().await.is_some() {}
|
||||
|
||||
log_rtab!(debug "Finished sending node updates");
|
||||
})
|
||||
.instrument(Span::current()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -255,6 +255,13 @@ impl DiscoveryContext {
|
||||
{
|
||||
return Some(external_mapped_dial_info);
|
||||
} else {
|
||||
warn!("UPNP port mapping succeeded but port {}/{} is still unreachable.\nYou may need to add a local firewall allowed port on this machine.\n",
|
||||
local_port, match llpt {
|
||||
LowLevelProtocolType::UDP => "udp",
|
||||
LowLevelProtocolType::TCP => "tcp",
|
||||
}
|
||||
);
|
||||
|
||||
// release the mapping if we're still unreachable
|
||||
let _ = self
|
||||
.net
|
||||
@@ -628,6 +635,7 @@ impl Network {
|
||||
}
|
||||
Some(vec![udpv4_context])
|
||||
}
|
||||
.instrument(trace_span!("do_public_dial_info_check UDPv4"))
|
||||
.boxed(),
|
||||
);
|
||||
}
|
||||
@@ -647,6 +655,7 @@ impl Network {
|
||||
}
|
||||
Some(vec![udpv6_context])
|
||||
}
|
||||
.instrument(trace_span!("do_public_dial_info_check UDPv6"))
|
||||
.boxed(),
|
||||
);
|
||||
}
|
||||
@@ -669,6 +678,7 @@ impl Network {
|
||||
}
|
||||
Some(vec![tcpv4_context])
|
||||
}
|
||||
.instrument(trace_span!("do_public_dial_info_check TCPv4"))
|
||||
.boxed(),
|
||||
);
|
||||
}
|
||||
@@ -688,6 +698,7 @@ impl Network {
|
||||
}
|
||||
Some(vec![wsv4_context])
|
||||
}
|
||||
.instrument(trace_span!("do_public_dial_info_check WSv4"))
|
||||
.boxed(),
|
||||
);
|
||||
}
|
||||
@@ -710,6 +721,7 @@ impl Network {
|
||||
}
|
||||
Some(vec![tcpv6_context])
|
||||
}
|
||||
.instrument(trace_span!("do_public_dial_info_check TCPv6"))
|
||||
.boxed(),
|
||||
);
|
||||
}
|
||||
@@ -729,6 +741,7 @@ impl Network {
|
||||
}
|
||||
Some(vec![wsv6_context])
|
||||
}
|
||||
.instrument(trace_span!("do_public_dial_info_check WSv6"))
|
||||
.boxed(),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -109,11 +109,11 @@ impl Network {
|
||||
};
|
||||
|
||||
// XXX
|
||||
warn!(
|
||||
"DEBUGACCEPT: local={} remote={}",
|
||||
tcp_stream.local_addr().unwrap(),
|
||||
tcp_stream.peer_addr().unwrap(),
|
||||
);
|
||||
// warn!(
|
||||
// "DEBUGACCEPT: local={} remote={}",
|
||||
// tcp_stream.local_addr().unwrap(),
|
||||
// tcp_stream.peer_addr().unwrap(),
|
||||
// );
|
||||
|
||||
let listener_state = listener_state.clone();
|
||||
let connection_manager = connection_manager.clone();
|
||||
|
||||
@@ -84,7 +84,7 @@ impl Network {
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}.instrument(Span::current());
|
||||
|
||||
protocol_handlers_unordered.push(ph_future);
|
||||
}
|
||||
|
||||
@@ -185,7 +185,7 @@ pub async fn nonblocking_connect(
|
||||
let socket2_addr = socket2::SockAddr::from(addr);
|
||||
|
||||
// XXX
|
||||
let bind_local_addr = socket.local_addr().unwrap().as_socket().unwrap();
|
||||
//let bind_local_addr = socket.local_addr().unwrap().as_socket().unwrap();
|
||||
|
||||
// Connect to the remote address
|
||||
match socket.connect(&socket2_addr) {
|
||||
@@ -197,24 +197,24 @@ pub async fn nonblocking_connect(
|
||||
}
|
||||
.map_err(|e| {
|
||||
// XXX
|
||||
warn!(
|
||||
"DEBUGCONNECT XXXFAILXXX: bind={} local={} remote={}\nbacktrace={:?}",
|
||||
bind_local_addr,
|
||||
socket.local_addr().unwrap().as_socket().unwrap(),
|
||||
addr,
|
||||
backtrace::Backtrace::new(),
|
||||
);
|
||||
// warn!(
|
||||
// "DEBUGCONNECT XXXFAILXXX: bind={} local={} remote={}\nbacktrace={:?}",
|
||||
// bind_local_addr,
|
||||
// socket.local_addr().unwrap().as_socket().unwrap(),
|
||||
// addr,
|
||||
// backtrace::Backtrace::new(),
|
||||
// );
|
||||
e
|
||||
})?;
|
||||
|
||||
// XXX
|
||||
warn!(
|
||||
"DEBUGCONNECT: bind={} local={} remote={}\nbacktrace={:?}",
|
||||
bind_local_addr,
|
||||
socket.local_addr().unwrap().as_socket().unwrap(),
|
||||
addr,
|
||||
backtrace::Backtrace::new(),
|
||||
);
|
||||
// warn!(
|
||||
// "DEBUGCONNECT: bind={} local={} remote={}\nbacktrace={:?}",
|
||||
// bind_local_addr,
|
||||
// socket.local_addr().unwrap().as_socket().unwrap(),
|
||||
// addr,
|
||||
// backtrace::Backtrace::new(),
|
||||
// );
|
||||
|
||||
let async_stream = Async::new(std::net::TcpStream::from(socket))?;
|
||||
|
||||
|
||||
@@ -210,11 +210,13 @@ impl NetworkConnection {
|
||||
Ok(NetworkResult::Value(out))
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn stats(&self) -> NetworkConnectionStats {
|
||||
let stats = self.stats.lock();
|
||||
stats.clone()
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn established_time(&self) -> u64 {
|
||||
self.established_time
|
||||
}
|
||||
@@ -260,10 +262,11 @@ impl NetworkConnection {
|
||||
need_sender = false;
|
||||
let sender_fut = receiver.recv_async().then(|res| async {
|
||||
match res {
|
||||
Ok((span_id, message)) => {
|
||||
Ok((_span_id, message)) => {
|
||||
|
||||
let recv_span = span!(parent: None, Level::TRACE, "process_connection recv");
|
||||
recv_span.follows_from(span_id);
|
||||
let recv_span = span!(Level::TRACE, "process_connection recv");
|
||||
// xxx: causes crash (Missing otel data span extensions)
|
||||
// recv_span.follows_from(span_id);
|
||||
|
||||
// send the packet
|
||||
if let Err(e) = Self::send_internal(
|
||||
|
||||
Reference in New Issue
Block a user