better race condition handling

This commit is contained in:
John Smith 2022-10-04 13:09:03 -04:00
parent 4b2164a546
commit 7ed6b44d21
9 changed files with 120 additions and 56 deletions

View File

@ -289,14 +289,9 @@ impl ConnectionManager {
.await; .await;
match result_net_res { match result_net_res {
Ok(net_res) => { Ok(net_res) => {
if net_res.is_value() || retry_count == 0 { // If the connection 'already exists', then try one last time to return a connection from the table, in case
break net_res;
}
}
Err(e) => {
if retry_count == 0 {
// Try one last time to return a connection from the table, in case
// an 'accept' happened at literally the same time as our connect // an 'accept' happened at literally the same time as our connect
if net_res.is_already_exists() {
if let Some(conn) = self if let Some(conn) = self
.arc .arc
.connection_table .connection_table
@ -310,7 +305,13 @@ impl ConnectionManager {
return Ok(NetworkResult::Value(conn)); return Ok(NetworkResult::Value(conn));
} }
}
if net_res.is_value() || retry_count == 0 {
break net_res;
}
}
Err(e) => {
if retry_count == 0 {
return Err(e).wrap_err("failed to connect"); return Err(e).wrap_err("failed to connect");
} }
} }

View File

@ -223,7 +223,15 @@ impl NetworkManager {
this.unlocked_inner this.unlocked_inner
.rolling_transfers_task .rolling_transfers_task
.set_routine(move |s, l, t| { .set_routine(move |s, l, t| {
Box::pin(this2.clone().rolling_transfers_task_routine(s, l, t)) Box::pin(
this2
.clone()
.rolling_transfers_task_routine(s, l, t)
.instrument(trace_span!(
parent: None,
"NetworkManager rolling transfers task routine"
)),
)
}); });
} }
// Set relay management tick task // Set relay management tick task
@ -232,7 +240,12 @@ impl NetworkManager {
this.unlocked_inner this.unlocked_inner
.relay_management_task .relay_management_task
.set_routine(move |s, l, t| { .set_routine(move |s, l, t| {
Box::pin(this2.clone().relay_management_task_routine(s, l, t)) Box::pin(
this2
.clone()
.relay_management_task_routine(s, l, t)
.instrument(trace_span!(parent: None, "relay management task routine")),
)
}); });
} }
// Set bootstrap tick task // Set bootstrap tick task
@ -240,7 +253,14 @@ impl NetworkManager {
let this2 = this.clone(); let this2 = this.clone();
this.unlocked_inner this.unlocked_inner
.bootstrap_task .bootstrap_task
.set_routine(move |s, _l, _t| Box::pin(this2.clone().bootstrap_task_routine(s))); .set_routine(move |s, _l, _t| {
Box::pin(
this2
.clone()
.bootstrap_task_routine(s)
.instrument(trace_span!(parent: None, "bootstrap task routine")),
)
});
} }
// Set peer minimum refresh tick task // Set peer minimum refresh tick task
{ {
@ -248,7 +268,15 @@ impl NetworkManager {
this.unlocked_inner this.unlocked_inner
.peer_minimum_refresh_task .peer_minimum_refresh_task
.set_routine(move |s, _l, _t| { .set_routine(move |s, _l, _t| {
Box::pin(this2.clone().peer_minimum_refresh_task_routine(s)) Box::pin(
this2
.clone()
.peer_minimum_refresh_task_routine(s)
.instrument(trace_span!(
parent: None,
"peer minimum refresh task routine"
)),
)
}); });
} }
// Set ping validator tick task // Set ping validator tick task
@ -257,7 +285,12 @@ impl NetworkManager {
this.unlocked_inner this.unlocked_inner
.ping_validator_task .ping_validator_task
.set_routine(move |s, l, t| { .set_routine(move |s, l, t| {
Box::pin(this2.clone().ping_validator_task_routine(s, l, t)) Box::pin(
this2
.clone()
.ping_validator_task_routine(s, l, t)
.instrument(trace_span!(parent: None, "ping validator task routine")),
)
}); });
} }
// Set public address check task // Set public address check task
@ -266,7 +299,15 @@ impl NetworkManager {
this.unlocked_inner this.unlocked_inner
.public_address_check_task .public_address_check_task
.set_routine(move |s, l, t| { .set_routine(move |s, l, t| {
Box::pin(this2.clone().public_address_check_task_routine(s, l, t)) Box::pin(
this2
.clone()
.public_address_check_task_routine(s, l, t)
.instrument(trace_span!(
parent: None,
"public address check task routine"
)),
)
}); });
} }
this this

View File

@ -462,8 +462,10 @@ impl Network {
// receive single response // receive single response
let mut out = vec![0u8; MAX_MESSAGE_SIZE]; let mut out = vec![0u8; MAX_MESSAGE_SIZE];
let (recv_len, recv_addr) = let (recv_len, recv_addr) = network_result_try!(timeout(
network_result_try!(timeout(timeout_ms, h.recv_message(&mut out)) timeout_ms,
h.recv_message(&mut out).instrument(Span::current())
)
.await .await
.into_network_result()) .into_network_result())
.wrap_err("recv_message failure")?; .wrap_err("recv_message failure")?;

View File

@ -54,7 +54,7 @@ impl Network {
loop { loop {
match ph match ph
.recv_message(&mut data) .recv_message(&mut data).instrument(Span::current())
.timeout_at(stop_token.clone()) .timeout_at(stop_token.clone())
.await .await
{ {

View File

@ -172,7 +172,8 @@ pub fn new_bound_first_tcp_socket(local_address: SocketAddr) -> io::Result<Socke
} }
// Non-blocking connect is tricky when you want to start with a prepared socket // Non-blocking connect is tricky when you want to start with a prepared socket
#[instrument(level = "trace", ret, err)] // Errors should not be logged as they are valid conditions for this function
#[instrument(level = "trace", ret)]
pub async fn nonblocking_connect( pub async fn nonblocking_connect(
socket: Socket, socket: Socket,
addr: SocketAddr, addr: SocketAddr,
@ -184,9 +185,6 @@ pub async fn nonblocking_connect(
// Make socket2 SockAddr // Make socket2 SockAddr
let socket2_addr = socket2::SockAddr::from(addr); let socket2_addr = socket2::SockAddr::from(addr);
// XXX
//let bind_local_addr = socket.local_addr().unwrap().as_socket().unwrap();
// Connect to the remote address // Connect to the remote address
match socket.connect(&socket2_addr) { match socket.connect(&socket2_addr) {
Ok(()) => Ok(()), Ok(()) => Ok(()),
@ -194,28 +192,7 @@ pub async fn nonblocking_connect(
Err(err) if err.raw_os_error() == Some(libc::EINPROGRESS) => Ok(()), Err(err) if err.raw_os_error() == Some(libc::EINPROGRESS) => Ok(()),
Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => Ok(()), Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => Ok(()),
Err(e) => Err(e), Err(e) => Err(e),
} }?;
.map_err(|e| {
// XXX
// warn!(
// "DEBUGCONNECT XXXFAILXXX: bind={} local={} remote={}\nbacktrace={:?}",
// bind_local_addr,
// socket.local_addr().unwrap().as_socket().unwrap(),
// addr,
// backtrace::Backtrace::new(),
// );
e
})?;
// XXX
// warn!(
// "DEBUGCONNECT: bind={} local={} remote={}\nbacktrace={:?}",
// bind_local_addr,
// socket.local_addr().unwrap().as_socket().unwrap(),
// addr,
// backtrace::Backtrace::new(),
// );
let async_stream = Async::new(std::net::TcpStream::from(socket))?; let async_stream = Async::new(std::net::TcpStream::from(socket))?;
// The stream becomes writable when connected // The stream becomes writable when connected

View File

@ -125,7 +125,15 @@ impl RoutingTable {
this.unlocked_inner this.unlocked_inner
.rolling_transfers_task .rolling_transfers_task
.set_routine(move |s, l, t| { .set_routine(move |s, l, t| {
Box::pin(this2.clone().rolling_transfers_task_routine(s, l, t)) Box::pin(
this2
.clone()
.rolling_transfers_task_routine(s, l, t)
.instrument(trace_span!(
parent: None,
"RoutingTable rolling transfers task routine"
)),
)
}); });
} }
@ -135,7 +143,12 @@ impl RoutingTable {
this.unlocked_inner this.unlocked_inner
.kick_buckets_task .kick_buckets_task
.set_routine(move |s, l, t| { .set_routine(move |s, l, t| {
Box::pin(this2.clone().kick_buckets_task_routine(s, l, t)) Box::pin(
this2
.clone()
.kick_buckets_task_routine(s, l, t)
.instrument(trace_span!(parent: None, "kick buckets task routine")),
)
}); });
} }
this this

View File

@ -406,6 +406,9 @@ impl VeilidAPI {
NetworkResult::NoConnection(e) => { NetworkResult::NoConnection(e) => {
return Ok(format!("NoConnection({})", e)); return Ok(format!("NoConnection({})", e));
} }
NetworkResult::AlreadyExists(e) => {
return Ok(format!("AlreadyExists({})", e));
}
NetworkResult::InvalidMessage(e) => { NetworkResult::InvalidMessage(e) => {
return Ok(format!("InvalidMessage({})", e)); return Ok(format!("InvalidMessage({})", e));
} }

View File

@ -130,11 +130,12 @@ impl RoutingContext {
let answer = match rpc_processor.rpc_call_app_call(dest, request).await { let answer = match rpc_processor.rpc_call_app_call(dest, request).await {
Ok(NetworkResult::Value(v)) => v, Ok(NetworkResult::Value(v)) => v,
Ok(NetworkResult::Timeout) => return Err(VeilidAPIError::Timeout), Ok(NetworkResult::Timeout) => return Err(VeilidAPIError::Timeout),
Ok(NetworkResult::NoConnection(e)) => { Ok(NetworkResult::NoConnection(e)) | Ok(NetworkResult::AlreadyExists(e)) => {
return Err(VeilidAPIError::NoConnection { return Err(VeilidAPIError::NoConnection {
message: e.to_string(), message: e.to_string(),
}) })
} }
Ok(NetworkResult::InvalidMessage(message)) => { Ok(NetworkResult::InvalidMessage(message)) => {
return Err(VeilidAPIError::Generic { message }) return Err(VeilidAPIError::Generic { message })
} }
@ -159,7 +160,7 @@ impl RoutingContext {
match rpc_processor.rpc_call_app_message(dest, message).await { match rpc_processor.rpc_call_app_message(dest, message).await {
Ok(NetworkResult::Value(())) => {} Ok(NetworkResult::Value(())) => {}
Ok(NetworkResult::Timeout) => return Err(VeilidAPIError::Timeout), Ok(NetworkResult::Timeout) => return Err(VeilidAPIError::Timeout),
Ok(NetworkResult::NoConnection(e)) => { Ok(NetworkResult::NoConnection(e)) | Ok(NetworkResult::AlreadyExists(e)) => {
return Err(VeilidAPIError::NoConnection { return Err(VeilidAPIError::NoConnection {
message: e.to_string(), message: e.to_string(),
}) })

View File

@ -35,6 +35,7 @@ impl<T> IoNetworkResultExt<T> for io::Result<T> {
| io::ErrorKind::ConnectionReset | io::ErrorKind::ConnectionReset
| io::ErrorKind::HostUnreachable | io::ErrorKind::HostUnreachable
| io::ErrorKind::NetworkUnreachable => Ok(NetworkResult::NoConnection(e)), | io::ErrorKind::NetworkUnreachable => Ok(NetworkResult::NoConnection(e)),
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
_ => Err(e), _ => Err(e),
}, },
#[cfg(not(feature = "io_error_more"))] #[cfg(not(feature = "io_error_more"))]
@ -50,6 +51,7 @@ impl<T> IoNetworkResultExt<T> for io::Result<T> {
io::ErrorKind::ConnectionAborted io::ErrorKind::ConnectionAborted
| io::ErrorKind::ConnectionRefused | io::ErrorKind::ConnectionRefused
| io::ErrorKind::ConnectionReset => Ok(NetworkResult::NoConnection(e)), | io::ErrorKind::ConnectionReset => Ok(NetworkResult::NoConnection(e)),
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
_ => Err(e), _ => Err(e),
} }
} }
@ -66,6 +68,7 @@ impl<T, E> NetworkResultResultExt<T, E> for NetworkResult<Result<T, E>> {
match self { match self {
NetworkResult::Timeout => Ok(NetworkResult::<T>::Timeout), NetworkResult::Timeout => Ok(NetworkResult::<T>::Timeout),
NetworkResult::NoConnection(e) => Ok(NetworkResult::<T>::NoConnection(e)), NetworkResult::NoConnection(e) => Ok(NetworkResult::<T>::NoConnection(e)),
NetworkResult::AlreadyExists(e) => Ok(NetworkResult::<T>::AlreadyExists(e)),
NetworkResult::InvalidMessage(s) => Ok(NetworkResult::<T>::InvalidMessage(s)), NetworkResult::InvalidMessage(s) => Ok(NetworkResult::<T>::InvalidMessage(s)),
NetworkResult::Value(Ok(v)) => Ok(NetworkResult::<T>::Value(v)), NetworkResult::Value(Ok(v)) => Ok(NetworkResult::<T>::Value(v)),
NetworkResult::Value(Err(e)) => Err(e), NetworkResult::Value(Err(e)) => Err(e),
@ -90,6 +93,7 @@ impl<T> FoldedNetworkResultExt<T> for io::Result<TimeoutOr<T>> {
| io::ErrorKind::ConnectionReset | io::ErrorKind::ConnectionReset
| io::ErrorKind::HostUnreachable | io::ErrorKind::HostUnreachable
| io::ErrorKind::NetworkUnreachable => Ok(NetworkResult::NoConnection(e)), | io::ErrorKind::NetworkUnreachable => Ok(NetworkResult::NoConnection(e)),
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
_ => Err(e), _ => Err(e),
}, },
#[cfg(not(feature = "io_error_more"))] #[cfg(not(feature = "io_error_more"))]
@ -105,6 +109,7 @@ impl<T> FoldedNetworkResultExt<T> for io::Result<TimeoutOr<T>> {
io::ErrorKind::ConnectionAborted io::ErrorKind::ConnectionAborted
| io::ErrorKind::ConnectionRefused | io::ErrorKind::ConnectionRefused
| io::ErrorKind::ConnectionReset => Ok(NetworkResult::NoConnection(e)), | io::ErrorKind::ConnectionReset => Ok(NetworkResult::NoConnection(e)),
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
_ => Err(e), _ => Err(e),
} }
} }
@ -124,6 +129,7 @@ impl<T> FoldedNetworkResultExt<T> for io::Result<NetworkResult<T>> {
| io::ErrorKind::ConnectionReset | io::ErrorKind::ConnectionReset
| io::ErrorKind::HostUnreachable | io::ErrorKind::HostUnreachable
| io::ErrorKind::NetworkUnreachable => Ok(NetworkResult::NoConnection(e)), | io::ErrorKind::NetworkUnreachable => Ok(NetworkResult::NoConnection(e)),
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
_ => Err(e), _ => Err(e),
}, },
#[cfg(not(feature = "io_error_more"))] #[cfg(not(feature = "io_error_more"))]
@ -139,6 +145,7 @@ impl<T> FoldedNetworkResultExt<T> for io::Result<NetworkResult<T>> {
io::ErrorKind::ConnectionAborted io::ErrorKind::ConnectionAborted
| io::ErrorKind::ConnectionRefused | io::ErrorKind::ConnectionRefused
| io::ErrorKind::ConnectionReset => Ok(NetworkResult::NoConnection(e)), | io::ErrorKind::ConnectionReset => Ok(NetworkResult::NoConnection(e)),
io::ErrorKind::AddrNotAvailable => Ok(NetworkResult::AlreadyExists(e)),
_ => Err(e), _ => Err(e),
} }
} }
@ -153,6 +160,7 @@ impl<T> FoldedNetworkResultExt<T> for io::Result<NetworkResult<T>> {
pub enum NetworkResult<T> { pub enum NetworkResult<T> {
Timeout, Timeout,
NoConnection(io::Error), NoConnection(io::Error),
AlreadyExists(io::Error),
InvalidMessage(String), InvalidMessage(String),
Value(T), Value(T),
} }
@ -170,7 +178,9 @@ impl<T> NetworkResult<T> {
pub fn invalid_message<S: ToString>(s: S) -> Self { pub fn invalid_message<S: ToString>(s: S) -> Self {
Self::InvalidMessage(s.to_string()) Self::InvalidMessage(s.to_string())
} }
pub fn already_exists(e: io::Error) -> Self {
Self::AlreadyExists(e)
}
pub fn value(value: T) -> Self { pub fn value(value: T) -> Self {
Self::Value(value) Self::Value(value)
} }
@ -181,6 +191,9 @@ impl<T> NetworkResult<T> {
pub fn is_no_connection(&self) -> bool { pub fn is_no_connection(&self) -> bool {
matches!(self, Self::NoConnection(_)) matches!(self, Self::NoConnection(_))
} }
pub fn is_already_exists(&self) -> bool {
matches!(self, Self::AlreadyExists(_))
}
pub fn is_value(&self) -> bool { pub fn is_value(&self) -> bool {
matches!(self, Self::Value(_)) matches!(self, Self::Value(_))
} }
@ -188,6 +201,7 @@ impl<T> NetworkResult<T> {
match self { match self {
Self::Timeout => NetworkResult::<X>::Timeout, Self::Timeout => NetworkResult::<X>::Timeout,
Self::NoConnection(e) => NetworkResult::<X>::NoConnection(e), Self::NoConnection(e) => NetworkResult::<X>::NoConnection(e),
Self::AlreadyExists(e) => NetworkResult::<X>::AlreadyExists(e),
Self::InvalidMessage(s) => NetworkResult::<X>::InvalidMessage(s), Self::InvalidMessage(s) => NetworkResult::<X>::InvalidMessage(s),
Self::Value(v) => NetworkResult::<X>::Value(f(v)), Self::Value(v) => NetworkResult::<X>::Value(f(v)),
} }
@ -196,6 +210,7 @@ impl<T> NetworkResult<T> {
match self { match self {
Self::Timeout => Err(io::Error::new(io::ErrorKind::TimedOut, "Timed out")), Self::Timeout => Err(io::Error::new(io::ErrorKind::TimedOut, "Timed out")),
Self::NoConnection(e) => Err(e), Self::NoConnection(e) => Err(e),
Self::AlreadyExists(e) => Err(e),
Self::InvalidMessage(s) => Err(io::Error::new( Self::InvalidMessage(s) => Err(io::Error::new(
io::ErrorKind::InvalidData, io::ErrorKind::InvalidData,
format!("Invalid message: {}", s), format!("Invalid message: {}", s),
@ -230,6 +245,7 @@ impl<T: Debug> Debug for NetworkResult<T> {
match self { match self {
Self::Timeout => write!(f, "Timeout"), Self::Timeout => write!(f, "Timeout"),
Self::NoConnection(e) => f.debug_tuple("NoConnection").field(e).finish(), Self::NoConnection(e) => f.debug_tuple("NoConnection").field(e).finish(),
Self::AlreadyExists(e) => f.debug_tuple("AlreadyExists").field(e).finish(),
Self::InvalidMessage(s) => f.debug_tuple("InvalidMessage").field(s).finish(), Self::InvalidMessage(s) => f.debug_tuple("InvalidMessage").field(s).finish(),
Self::Value(v) => f.debug_tuple("Value").field(v).finish(), Self::Value(v) => f.debug_tuple("Value").field(v).finish(),
} }
@ -241,6 +257,7 @@ impl<T> Display for NetworkResult<T> {
match self { match self {
Self::Timeout => write!(f, "Timeout"), Self::Timeout => write!(f, "Timeout"),
Self::NoConnection(e) => write!(f, "NoConnection({})", e.kind()), Self::NoConnection(e) => write!(f, "NoConnection({})", e.kind()),
Self::AlreadyExists(e) => write!(f, "AlreadyExists({})", e.kind()),
Self::InvalidMessage(s) => write!(f, "InvalidMessage({})", s), Self::InvalidMessage(s) => write!(f, "InvalidMessage({})", s),
Self::Value(_) => write!(f, "Value"), Self::Value(_) => write!(f, "Value"),
} }
@ -258,6 +275,7 @@ macro_rules! network_result_try {
match $r { match $r {
NetworkResult::Timeout => return Ok(NetworkResult::Timeout), NetworkResult::Timeout => return Ok(NetworkResult::Timeout),
NetworkResult::NoConnection(e) => return Ok(NetworkResult::NoConnection(e)), NetworkResult::NoConnection(e) => return Ok(NetworkResult::NoConnection(e)),
NetworkResult::AlreadyExists(e) => return Ok(NetworkResult::AlreadyExists(e)),
NetworkResult::InvalidMessage(s) => return Ok(NetworkResult::InvalidMessage(s)), NetworkResult::InvalidMessage(s) => return Ok(NetworkResult::InvalidMessage(s)),
NetworkResult::Value(v) => v, NetworkResult::Value(v) => v,
} }
@ -272,6 +290,10 @@ macro_rules! network_result_try {
$f; $f;
return Ok(NetworkResult::NoConnection(e)); return Ok(NetworkResult::NoConnection(e));
} }
NetworkResult::AlreadyExists(e) => {
$f;
return Ok(NetworkResult::AlreadyExists(e));
}
NetworkResult::InvalidMessage(s) => { NetworkResult::InvalidMessage(s) => {
$f; $f;
return Ok(NetworkResult::InvalidMessage(s)); return Ok(NetworkResult::InvalidMessage(s));
@ -293,6 +315,10 @@ macro_rules! network_result_value_or_log {
log_net!($level "{}({}) at {}@{}:{}", "No connection".green(), e.to_string(), file!(), line!(), column!()); log_net!($level "{}({}) at {}@{}:{}", "No connection".green(), e.to_string(), file!(), line!(), column!());
$f $f
} }
NetworkResult::AlreadyExists(e) => {
log_net!($level "{}({}) at {}@{}:{}", "Already exists".green(), e.to_string(), file!(), line!(), column!());
$f
}
NetworkResult::InvalidMessage(s) => { NetworkResult::InvalidMessage(s) => {
log_net!($level "{}({}) at {}@{}:{}", "Invalid message".green(), s, file!(), line!(), column!()); log_net!($level "{}({}) at {}@{}:{}", "Invalid message".green(), s, file!(), line!(), column!());
$f $f