better race condition handling

This commit is contained in:
John Smith
2022-10-04 13:09:03 -04:00
parent 4b2164a546
commit 7ed6b44d21
9 changed files with 120 additions and 56 deletions

View File

@@ -289,28 +289,29 @@ impl ConnectionManager {
.await;
match result_net_res {
Ok(net_res) => {
if net_res.is_value() || retry_count == 0 {
break net_res;
}
}
Err(e) => {
if retry_count == 0 {
// Try one last time to return a connection from the table, in case
// an 'accept' happened at literally the same time as our connect
// If the connection 'already exists', then try one last time to return a connection from the table, in case
// an 'accept' happened at literally the same time as our connect
if net_res.is_already_exists() {
if let Some(conn) = self
.arc
.connection_table
.get_last_connection_by_remote(peer_address)
{
log_net!(
"== Returning existing connection in race local_addr={:?} peer_address={:?}",
local_addr.green(),
peer_address.green()
);
"== Returning existing connection in race local_addr={:?} peer_address={:?}",
local_addr.green(),
peer_address.green()
);
return Ok(NetworkResult::Value(conn));
}
}
if net_res.is_value() || retry_count == 0 {
break net_res;
}
}
Err(e) => {
if retry_count == 0 {
return Err(e).wrap_err("failed to connect");
}
}

View File

@@ -223,7 +223,15 @@ impl NetworkManager {
this.unlocked_inner
.rolling_transfers_task
.set_routine(move |s, l, t| {
Box::pin(this2.clone().rolling_transfers_task_routine(s, l, t))
Box::pin(
this2
.clone()
.rolling_transfers_task_routine(s, l, t)
.instrument(trace_span!(
parent: None,
"NetworkManager rolling transfers task routine"
)),
)
});
}
// Set relay management tick task
@@ -232,7 +240,12 @@ impl NetworkManager {
this.unlocked_inner
.relay_management_task
.set_routine(move |s, l, t| {
Box::pin(this2.clone().relay_management_task_routine(s, l, t))
Box::pin(
this2
.clone()
.relay_management_task_routine(s, l, t)
.instrument(trace_span!(parent: None, "relay management task routine")),
)
});
}
// Set bootstrap tick task
@@ -240,7 +253,14 @@ impl NetworkManager {
let this2 = this.clone();
this.unlocked_inner
.bootstrap_task
.set_routine(move |s, _l, _t| Box::pin(this2.clone().bootstrap_task_routine(s)));
.set_routine(move |s, _l, _t| {
Box::pin(
this2
.clone()
.bootstrap_task_routine(s)
.instrument(trace_span!(parent: None, "bootstrap task routine")),
)
});
}
// Set peer minimum refresh tick task
{
@@ -248,7 +268,15 @@ impl NetworkManager {
this.unlocked_inner
.peer_minimum_refresh_task
.set_routine(move |s, _l, _t| {
Box::pin(this2.clone().peer_minimum_refresh_task_routine(s))
Box::pin(
this2
.clone()
.peer_minimum_refresh_task_routine(s)
.instrument(trace_span!(
parent: None,
"peer minimum refresh task routine"
)),
)
});
}
// Set ping validator tick task
@@ -257,7 +285,12 @@ impl NetworkManager {
this.unlocked_inner
.ping_validator_task
.set_routine(move |s, l, t| {
Box::pin(this2.clone().ping_validator_task_routine(s, l, t))
Box::pin(
this2
.clone()
.ping_validator_task_routine(s, l, t)
.instrument(trace_span!(parent: None, "ping validator task routine")),
)
});
}
// Set public address check task
@@ -266,7 +299,15 @@ impl NetworkManager {
this.unlocked_inner
.public_address_check_task
.set_routine(move |s, l, t| {
Box::pin(this2.clone().public_address_check_task_routine(s, l, t))
Box::pin(
this2
.clone()
.public_address_check_task_routine(s, l, t)
.instrument(trace_span!(
parent: None,
"public address check task routine"
)),
)
});
}
this

View File

@@ -462,11 +462,13 @@ impl Network {
// receive single response
let mut out = vec![0u8; MAX_MESSAGE_SIZE];
let (recv_len, recv_addr) =
network_result_try!(timeout(timeout_ms, h.recv_message(&mut out))
.await
.into_network_result())
.wrap_err("recv_message failure")?;
let (recv_len, recv_addr) = network_result_try!(timeout(
timeout_ms,
h.recv_message(&mut out).instrument(Span::current())
)
.await
.into_network_result())
.wrap_err("recv_message failure")?;
let recv_socket_addr = recv_addr.remote_address().to_socket_addr();
self.network_manager()

View File

@@ -54,7 +54,7 @@ impl Network {
loop {
match ph
.recv_message(&mut data)
.recv_message(&mut data).instrument(Span::current())
.timeout_at(stop_token.clone())
.await
{

View File

@@ -172,7 +172,8 @@ pub fn new_bound_first_tcp_socket(local_address: SocketAddr) -> io::Result<Socke
}
// Non-blocking connect is tricky when you want to start with a prepared socket
#[instrument(level = "trace", ret, err)]
// Errors should not be logged as they are valid conditions for this function
#[instrument(level = "trace", ret)]
pub async fn nonblocking_connect(
socket: Socket,
addr: SocketAddr,
@@ -184,9 +185,6 @@ pub async fn nonblocking_connect(
// Make socket2 SockAddr
let socket2_addr = socket2::SockAddr::from(addr);
// XXX
//let bind_local_addr = socket.local_addr().unwrap().as_socket().unwrap();
// Connect to the remote address
match socket.connect(&socket2_addr) {
Ok(()) => Ok(()),
@@ -194,28 +192,7 @@ pub async fn nonblocking_connect(
Err(err) if err.raw_os_error() == Some(libc::EINPROGRESS) => Ok(()),
Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => Ok(()),
Err(e) => Err(e),
}
.map_err(|e| {
// XXX
// warn!(
// "DEBUGCONNECT XXXFAILXXX: bind={} local={} remote={}\nbacktrace={:?}",
// bind_local_addr,
// socket.local_addr().unwrap().as_socket().unwrap(),
// addr,
// backtrace::Backtrace::new(),
// );
e
})?;
// XXX
// warn!(
// "DEBUGCONNECT: bind={} local={} remote={}\nbacktrace={:?}",
// bind_local_addr,
// socket.local_addr().unwrap().as_socket().unwrap(),
// addr,
// backtrace::Backtrace::new(),
// );
}?;
let async_stream = Async::new(std::net::TcpStream::from(socket))?;
// The stream becomes writable when connected