checkpoint

This commit is contained in:
John Smith
2023-05-06 21:09:40 -04:00
parent e2c5691d7e
commit 1fe5004eef
23 changed files with 627 additions and 203 deletions

View File

@@ -1,46 +1,75 @@
use super::*;
/// The result of the do_get_value_operation
pub struct DoGetValueResult {
/// The subkey value if we got one
pub value: Option<SignedValueData>,
/// The descriptor if we got a fresh one or empty if no descriptor was needed
pub descriptor: Option<SignedValueDescriptor>,
}
/// The context of the do_get_value operation
struct DoGetValueContext {
/// The latest value of the subkey, may be the value passed in
pub value: Option<SignedValueData>,
/// The consensus count for the value we have received
pub value_count: usize,
/// The descriptor if we got a fresh one or empty if no descriptor was needed
pub descriptor: Option<SignedValueDescriptor>,
/// The parsed schema from the descriptor if we have one
pub schema: Option<DHTSchema>,
}
impl StorageManager {
pub async fn do_get_value(
&self,
mut inner: AsyncMutexGuardArc<StorageManagerInner>,
rpc_processor: RPCProcessor,
key: TypedKey,
subkey: ValueSubkey,
min_seq: ValueSeqNum,
last_value: Option<SignedValueData>,
last_descriptor: Option<SignedValueDescriptor>,
safety_selection: SafetySelection,
) -> Result<Option<DoGetValueResult>, VeilidAPIError> {
let Some(rpc_processor) = inner.rpc_processor.clone() else {
apibail_not_initialized!();
};
) -> Result<DoGetValueResult, VeilidAPIError> {
let routing_table = rpc_processor.routing_table();
// Get the DHT parameters for 'GetValue'
let (count, fanout, timeout) = {
let (key_count, consensus_count, fanout, timeout_us) = {
let c = self.unlocked_inner.config.get();
(
c.network.dht.max_find_node_count as usize,
c.network.dht.get_value_count as usize,
c.network.dht.get_value_fanout as usize,
TimestampDuration::from(ms_to_us(c.network.dht.get_value_timeout_ms)),
)
};
// Make do-get-value answer context
let schema = if let Some(d) = &last_descriptor {
Some(d.schema()?)
} else {
None
};
let context = Arc::new(Mutex::new(DoGetValueContext {
value: last_value,
value_count: 0,
descriptor: last_descriptor.clone(),
schema,
}));
// Routine to call to generate fanout
let call_routine = |next_node: NodeRef| {
let rpc_processor = rpc_processor.clone();
let context = context.clone();
let last_descriptor = last_descriptor.clone();
async move {
match rpc_processor
.clone()
.rpc_call_get_value(
Destination::direct(next_node).with_safety(safety_selection),
key, subkey, last_descriptor
key,
subkey,
last_descriptor,
)
.await
{
@@ -49,12 +78,61 @@ impl StorageManager {
// Any other failures, just try the next node
return Ok(None);
});
// Keep the descriptor if we got one. If we had a last_descriptor it will
// already be validated by rpc_call_get_value
if let Some(descriptor) = v.answer.descriptor {
let mut ctx = context.lock();
if ctx.descriptor.is_none() && ctx.schema.is_none() {
ctx.schema =
Some(descriptor.schema().map_err(RPCError::invalid_format)?);
ctx.descriptor = Some(descriptor);
}
}
// Keep the value if we got one and it is newer and it passes schema validation
if let Some(value) = v.answer.value {
// See if this is even a candidate
if value.value_data(). xxx apply min_seq and also to OperationGetValueQ
// Validate with scheam
let mut ctx = context.lock();
// Ensure we have a schema and descriptor
let (Some(descriptor), Some(schema)) = (&ctx.descriptor, &ctx.schema) else {
// Got a value but no descriptor for it
// Move to the next node
return Ok(None);
};
// Validate with schema
if !schema.check_subkey_value_data(
descriptor.owner(),
subkey,
value.value_data(),
) {
// Validation failed, ignore this value
// Move to the next node
return Ok(None);
}
// If we have a prior value, see if this is a newer sequence number
if let Some(prior_value) = &ctx.value {
let prior_seq = prior_value.value_data().seq();
let new_seq = value.value_data().seq();
if new_seq == prior_seq {
// If sequence number is the same, the data should be the same
if prior_value.value_data() != value.value_data() {
// Move to the next node
return Ok(None);
}
// Increase the consensus count for the existing value
ctx.value_count += 1;
} else if new_seq > prior_seq {
// If the sequence number is greater, go with it
ctx.value = Some(value);
ctx.value_count = 1;
} else {
// If the sequence number is older, ignore it
}
}
}
// Return peers if we have some
@@ -66,13 +144,11 @@ impl StorageManager {
};
// Routine to call to check if we're done at each step
let check_done = |closest_nodes: &[NodeRef]| {
// If the node we want to locate is one of the closest nodes, return it immediately
if let Some(out) = closest_nodes
.iter()
.find(|x| x.node_ids().contains(&node_id))
{
return Some(out.clone());
let check_done = |_closest_nodes: &[NodeRef]| {
// If we have reached sufficient consensus, return done
let ctx = context.lock();
if ctx.value.is_some() && ctx.descriptor.is_some() && ctx.value_count >= consensus_count {
return Some(());
}
None
};
@@ -80,28 +156,33 @@ impl StorageManager {
// Call the fanout
let fanout_call = FanoutCall::new(
routing_table.clone(),
node_id,
count,
key,
key_count,
fanout,
timeout_us,
call_routine,
check_done,
);
fanout_call.run().await
// Search in preferred cryptosystem order
let nr = this
.search_dht_single_key(node_id, count, fanout, timeout, safety_selection)
.await?;
if let Some(nr) = &nr {
if nr.node_ids().contains(&node_id) {
// found a close node, but not exact within our configured resolve_node timeout
return Ok(None);
match fanout_call.run().await {
// If we don't finish in the timeout (too much time passed checking for consensus)
TimeoutOr::Timeout |
// If we finished with consensus (enough nodes returning the same value)
TimeoutOr::Value(Ok(Some(()))) |
// If we finished without consensus (ran out of nodes before getting consensus)
TimeoutOr::Value(Ok(None)) => {
// Return the best answer we've got
let ctx = context.lock();
Ok(DoGetValueResult{
value: ctx.value.clone(),
descriptor: ctx.descriptor.clone(),
})
}
// Failed
TimeoutOr::Value(Err(e)) => {
// If we finished with an error, return that
Err(e.into())
}
}
Ok(nr)
}
}

View File

@@ -203,7 +203,7 @@ impl StorageManager {
/// # DHT Key = Hash(ownerKeyKind) of: [ ownerKeyValue, schema ]
fn get_key<D>(vcrypto: CryptoSystemVersion, record: &Record<D>) -> TypedKey
where
D: RkyvArchive + RkyvSerialize<RkyvSerializer>,
D: Clone + RkyvArchive + RkyvSerialize<RkyvSerializer>,
for<'t> <D as RkyvArchive>::Archived: CheckBytes<RkyvDefaultValidator<'t>>,
<D as RkyvArchive>::Archived: RkyvDeserialize<D, SharedDeserializeMap>,
{
@@ -237,6 +237,11 @@ impl StorageManager {
apibail_generic!("unsupported cryptosystem");
};
// Get local record store
let Some(local_record_store) = inner.local_record_store.as_mut() else {
apibail_not_initialized!();
};
// Compile the dht schema
let schema_data = schema.compile();
@@ -257,7 +262,6 @@ impl StorageManager {
let record =
Record::<LocalRecordDetail>::new(cur_ts, signed_value_descriptor, local_record_detail)?;
let local_record_store = inner.local_record_store.as_mut().unwrap();
let dht_key = Self::get_key(vcrypto.clone(), &record);
local_record_store.new_record(dht_key, record).await?;
@@ -266,23 +270,16 @@ impl StorageManager {
.await
}
async fn open_record_inner(
fn open_record_inner_check_existing(
&self,
mut inner: AsyncMutexGuardArc<StorageManagerInner>,
key: TypedKey,
writer: Option<KeyPair>,
safety_selection: SafetySelection,
) -> Result<DHTRecordDescriptor, VeilidAPIError> {
// Ensure the record is closed
if inner.opened_records.contains_key(&key) {
return Err(VeilidAPIError::generic(
"record is already open and should be closed first",
));
}
// Get cryptosystem
let Some(vcrypto) = self.unlocked_inner.crypto.get(key.kind) else {
apibail_generic!("unsupported cryptosystem");
) -> Option<Result<DHTRecordDescriptor, VeilidAPIError>> {
// Get local record store
let Some(local_record_store) = inner.local_record_store.as_mut() else {
return Some(Err(VeilidAPIError::not_initialized()));
};
// See if we have a local record already or not
@@ -295,45 +292,124 @@ impl StorageManager {
// Return record details
(r.owner().clone(), r.schema())
};
if let Some((owner, schema)) = inner
.local_record_store
.as_mut()
.unwrap()
.with_record_mut(key, cb)
{
// Had local record
let Some((owner, schema)) = local_record_store.with_record_mut(key, cb) else {
return None;
};
// Had local record
// If the writer we chose is also the owner, we have the owner secret
// Otherwise this is just another subkey writer
let owner_secret = if let Some(writer) = writer {
if writer.key == owner {
Some(writer.secret)
} else {
None
}
// If the writer we chose is also the owner, we have the owner secret
// Otherwise this is just another subkey writer
let owner_secret = if let Some(writer) = writer {
if writer.key == owner {
Some(writer.secret)
} else {
None
};
// Write open record
inner.opened_records.insert(key, OpenedRecord::new(writer));
// Make DHT Record Descriptor to return
let descriptor = DHTRecordDescriptor::new(key, owner, owner_secret, schema);
Ok(descriptor)
}
} else {
// No record yet, try to get it from the network
self.do_get_value(inner, key, 0, safety_selection).await
None
};
// Make DHT Record Descriptor to return
// let descriptor = DHTRecordDescriptor {
// key,
// owner,
// owner_secret,
// schema,
// };
// Ok(descriptor)
// Write open record
inner.opened_records.insert(key, OpenedRecord::new(writer));
// Make DHT Record Descriptor to return
let descriptor = DHTRecordDescriptor::new(key, owner, owner_secret, schema);
Some(Ok(descriptor))
}
async fn open_record_inner(
&self,
inner: AsyncMutexGuardArc<StorageManagerInner>,
key: TypedKey,
writer: Option<KeyPair>,
safety_selection: SafetySelection,
) -> Result<DHTRecordDescriptor, VeilidAPIError> {
// Ensure the record is closed
if inner.opened_records.contains_key(&key) {
apibail_generic!("record is already open and should be closed first");
}
// See if we have a local record already or not
if let Some(res) =
self.open_record_inner_check_existing(inner, key, writer, safety_selection)
{
return res;
}
// No record yet, try to get it from the network
// Get rpc processor and drop mutex so we don't block while getting the value from the network
let rpc_processor = {
let inner = self.lock().await?;
let Some(rpc_processor) = inner.rpc_processor.clone() else {
// Offline, try again later
apibail_try_again!();
};
rpc_processor
};
// No last descriptor, no last value
let subkey: ValueSubkey = 0;
let result = self
.do_get_value(rpc_processor, key, subkey, None, None, safety_selection)
.await?;
// If we got nothing back, the key wasn't found
if result.value.is_none() && result.descriptor.is_none() {
// No result
apibail_key_not_found!(key);
};
// Must have descriptor
let Some(signed_value_descriptor) = result.descriptor else {
// No descriptor for new record, can't store this
apibail_generic!("no descriptor");
};
let owner = signed_value_descriptor.owner().clone();
// If the writer we chose is also the owner, we have the owner secret
// Otherwise this is just another subkey writer
let owner_secret = if let Some(writer) = writer {
if writer.key == owner {
Some(writer.secret)
} else {
None
}
} else {
None
};
let schema = signed_value_descriptor.schema()?;
// Reopen inner to store value we just got
let mut inner = self.lock().await?;
// Get local record store
let Some(local_record_store) = inner.local_record_store.as_mut() else {
apibail_not_initialized!();
};
// Make and store a new record for this descriptor
let record = Record::<LocalRecordDetail>::new(
get_aligned_timestamp(),
signed_value_descriptor,
LocalRecordDetail { safety_selection },
)?;
local_record_store.new_record(key, record).await?;
// If we got a subkey with the getvalue, it has already been validated against the schema, so store it
if let Some(signed_value_data) = result.value {
// Write subkey to local store
local_record_store
.set_subkey(key, subkey, signed_value_data)
.await?;
}
// Write open record
inner.opened_records.insert(key, OpenedRecord::new(writer));
// Make DHT Record Descriptor to return
let descriptor = DHTRecordDescriptor::new(key, owner, owner_secret, schema);
Ok(descriptor)
}
pub async fn open_record(
@@ -349,32 +425,34 @@ impl StorageManager {
async fn close_record_inner(
&self,
mut inner: AsyncMutexGuardArc<StorageManagerInner>,
inner: &mut AsyncMutexGuardArc<StorageManagerInner>,
key: TypedKey,
) -> Result<(), VeilidAPIError> {
let Some(opened_record) = inner.opened_records.remove(&key) else {
let Some(_opened_record) = inner.opened_records.remove(&key) else {
apibail_generic!("record not open");
};
Ok(())
}
pub async fn close_record(&self, key: TypedKey) -> Result<(), VeilidAPIError> {
let inner = self.lock().await?;
self.close_record_inner(inner, key).await
let mut inner = self.lock().await?;
self.close_record_inner(&mut inner, key).await
}
pub async fn delete_record(&self, key: TypedKey) -> Result<(), VeilidAPIError> {
let inner = self.lock().await?;
let mut inner = self.lock().await?;
// Ensure the record is closed
if inner.opened_records.contains_key(&key) {
self.close_record_inner(inner, key).await?;
self.close_record_inner(&mut inner, key).await?;
}
// Remove the record from the local store
//inner.local_record_store.unwrap().de
let Some(local_record_store) = inner.local_record_store.as_mut() else {
apibail_not_initialized!();
};
unimplemented!();
// Remove the record from the local store
local_record_store.delete_record(key).await
}
pub async fn get_value(

View File

@@ -9,7 +9,7 @@ use hashlink::LruCache;
pub struct RecordStore<D>
where
D: RkyvArchive + RkyvSerialize<RkyvSerializer>,
D: Clone + RkyvArchive + RkyvSerialize<RkyvSerializer>,
for<'t> <D as RkyvArchive>::Archived: CheckBytes<RkyvDefaultValidator<'t>>,
<D as RkyvArchive>::Archived: RkyvDeserialize<D, SharedDeserializeMap>,
{
@@ -32,7 +32,7 @@ where
impl<D> RecordStore<D>
where
D: RkyvArchive + RkyvSerialize<RkyvSerializer>,
D: Clone + RkyvArchive + RkyvSerialize<RkyvSerializer>,
for<'t> <D as RkyvArchive>::Archived: CheckBytes<RkyvDefaultValidator<'t>>,
<D as RkyvArchive>::Archived: RkyvDeserialize<D, SharedDeserializeMap>,
{
@@ -169,6 +169,11 @@ where
let st_xact = subkey_table.transact();
let dead_records = mem::take(&mut self.dead_records);
for (k, v) in dead_records {
// Record should already be gone from index
if self.record_index.contains_key(&k) {
log_stor!(error "dead record found in index: {:?}", k);
}
// Delete record
rt_xact.delete(0, &k.bytes());
@@ -205,7 +210,6 @@ where
}
let record_table = self.record_table.clone().unwrap();
let subkey_table = self.subkey_table.clone().unwrap();
let rt_xact = record_table.transact();
let changed_records = mem::take(&mut self.changed_records);
@@ -277,6 +281,22 @@ where
Ok(())
}
pub async fn delete_record(&mut self, key: TypedKey) -> Result<(), VeilidAPIError> {
// Get the record table key
let rtk = RecordTableKey { key };
// Remove record from the index
let Some(record) = self.record_index.remove(&rtk) else {
apibail_key_not_found!(key);
};
self.add_dead_record(rtk, record);
self.purge_dead_records(false).await;
Ok(())
}
pub fn with_record<R, F>(&mut self, key: TypedKey, f: F) -> Option<R>
where
F: FnOnce(&Record<D>) -> R,
@@ -319,7 +339,7 @@ where
out
}
pub async fn get_subkey<R, F>(
pub async fn get_subkey(
&mut self,
key: TypedKey,
subkey: ValueSubkey,
@@ -371,7 +391,7 @@ where
return Ok(None);
}
pub async fn set_subkey<R, F>(
pub async fn set_subkey(
&mut self,
key: TypedKey,
subkey: ValueSubkey,

View File

@@ -6,7 +6,7 @@ use super::*;
#[archive_attr(repr(C), derive(CheckBytes))]
pub struct Record<D>
where
D: RkyvArchive + RkyvSerialize<RkyvSerializer>,
D: Clone + RkyvArchive + RkyvSerialize<RkyvSerializer>,
for<'t> <D as RkyvArchive>::Archived: CheckBytes<RkyvDefaultValidator<'t>>,
<D as RkyvArchive>::Archived: RkyvDeserialize<D, SharedDeserializeMap>,
{
@@ -19,7 +19,7 @@ where
impl<D> Record<D>
where
D: RkyvArchive + RkyvSerialize<RkyvSerializer>,
D: Clone + RkyvArchive + RkyvSerialize<RkyvSerializer>,
for<'t> <D as RkyvArchive>::Archived: CheckBytes<RkyvDefaultValidator<'t>>,
<D as RkyvArchive>::Archived: RkyvDeserialize<D, SharedDeserializeMap>,
{