Handle removing slots during account scans (#17471)
This commit is contained in:
@@ -10,7 +10,7 @@ use log::*;
|
||||
use ouroboros::self_referencing;
|
||||
use solana_measure::measure::Measure;
|
||||
use solana_sdk::{
|
||||
clock::Slot,
|
||||
clock::{BankId, Slot},
|
||||
pubkey::{Pubkey, PUBKEY_BYTES},
|
||||
};
|
||||
use std::{
|
||||
@@ -25,15 +25,16 @@ use std::{
|
||||
},
|
||||
sync::{
|
||||
atomic::{AtomicU64, Ordering},
|
||||
Arc, RwLock, RwLockReadGuard, RwLockWriteGuard,
|
||||
Arc, Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard,
|
||||
},
|
||||
};
|
||||
use thiserror::Error;
|
||||
|
||||
pub const ITER_BATCH_SIZE: usize = 1000;
|
||||
|
||||
pub type ScanResult<T> = Result<T, ScanError>;
|
||||
pub type SlotList<T> = Vec<(Slot, T)>;
|
||||
pub type SlotSlice<'s, T> = &'s [(Slot, T)];
|
||||
|
||||
pub type RefCount = u64;
|
||||
pub type AccountMap<K, V> = BTreeMap<K, V>;
|
||||
|
||||
@@ -55,6 +56,12 @@ impl IsCached for u64 {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug, PartialEq)]
|
||||
pub enum ScanError {
|
||||
#[error("Node detected it replayed bad version of slot {slot:?} with id {bank_id:?}, thus the scan on said slot was aborted")]
|
||||
SlotRemoved { slot: Slot, bank_id: BankId },
|
||||
}
|
||||
|
||||
enum ScanTypes<R: RangeBounds<Pubkey>> {
|
||||
Unindexed(Option<R>),
|
||||
Indexed(IndexKey),
|
||||
@@ -458,6 +465,10 @@ impl RollingBitField {
|
||||
std::mem::swap(&mut n, self);
|
||||
}
|
||||
|
||||
pub fn max(&self) -> u64 {
|
||||
self.max
|
||||
}
|
||||
|
||||
pub fn get_all(&self) -> Vec<u64> {
|
||||
let mut all = Vec::with_capacity(self.count);
|
||||
self.excess.iter().for_each(|slot| all.push(*slot));
|
||||
@@ -584,6 +595,22 @@ type MapType<T> = AccountMap<Pubkey, AccountMapEntry<T>>;
|
||||
type AccountMapsWriteLock<'a, T> = RwLockWriteGuard<'a, AccountMap<Pubkey, AccountMapEntry<T>>>;
|
||||
type AccountMapsReadLock<'a, T> = RwLockReadGuard<'a, AccountMap<Pubkey, AccountMapEntry<T>>>;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ScanSlotTracker {
|
||||
is_removed: bool,
|
||||
ref_count: u64,
|
||||
}
|
||||
|
||||
impl ScanSlotTracker {
|
||||
pub fn is_removed(&self) -> bool {
|
||||
self.is_removed
|
||||
}
|
||||
|
||||
pub fn mark_removed(&mut self) {
|
||||
self.is_removed = true;
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AccountsIndex<T> {
|
||||
pub account_maps: RwLock<MapType<T>>,
|
||||
@@ -592,6 +619,17 @@ pub struct AccountsIndex<T> {
|
||||
spl_token_owner_index: SecondaryIndex<RwLockSecondaryIndexEntry>,
|
||||
roots_tracker: RwLock<RootsTracker>,
|
||||
ongoing_scan_roots: RwLock<BTreeMap<Slot, u64>>,
|
||||
// Each scan has some latest slot `S` that is the tip of the fork the scan
|
||||
// is iterating over. The unique id of that slot `S` is recorded here (note we don't use
|
||||
// `S` as the id because there can be more than one version of a slot `S`). If a fork
|
||||
// is abandoned, all of the slots on that fork up to `S` will be removed via
|
||||
// `AccountsDb::remove_unrooted_slots()`. When the scan finishes, it'll realize that the
|
||||
// results of the scan may have been corrupted by `remove_unrooted_slots` and abort its results.
|
||||
//
|
||||
// `removed_bank_ids` tracks all the slot ids that were removed via `remove_unrooted_slots()` so any attempted scans
|
||||
// on any of these slots fails. This is safe to purge once the associated Bank is dropped and
|
||||
// scanning the fork with that Bank at the tip is no longer possible.
|
||||
pub removed_bank_ids: Mutex<HashSet<BankId>>,
|
||||
zero_lamport_pubkeys: DashSet<Pubkey>,
|
||||
}
|
||||
|
||||
@@ -610,6 +648,7 @@ impl<T> Default for AccountsIndex<T> {
|
||||
),
|
||||
roots_tracker: RwLock::<RootsTracker>::default(),
|
||||
ongoing_scan_roots: RwLock::<BTreeMap<Slot, u64>>::default(),
|
||||
removed_bank_ids: Mutex::<HashSet<BankId>>::default(),
|
||||
zero_lamport_pubkeys: DashSet::<Pubkey>::default(),
|
||||
}
|
||||
}
|
||||
@@ -627,12 +666,24 @@ impl<T: 'static + Clone + IsCached + ZeroLamport> AccountsIndex<T> {
|
||||
&self,
|
||||
metric_name: &'static str,
|
||||
ancestors: &Ancestors,
|
||||
scan_bank_id: BankId,
|
||||
func: F,
|
||||
scan_type: ScanTypes<R>,
|
||||
) where
|
||||
) -> Result<(), ScanError>
|
||||
where
|
||||
F: FnMut(&Pubkey, (&T, Slot)),
|
||||
R: RangeBounds<Pubkey>,
|
||||
{
|
||||
{
|
||||
let locked_removed_bank_ids = self.removed_bank_ids.lock().unwrap();
|
||||
if locked_removed_bank_ids.contains(&scan_bank_id) {
|
||||
return Err(ScanError::SlotRemoved {
|
||||
slot: ancestors.max_slot(),
|
||||
bank_id: scan_bank_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let max_root = {
|
||||
let mut w_ongoing_scan_roots = self
|
||||
// This lock is also grabbed by clean_accounts(), so clean
|
||||
@@ -809,6 +860,23 @@ impl<T: 'static + Clone + IsCached + ZeroLamport> AccountsIndex<T> {
|
||||
ongoing_scan_roots.remove(&max_root);
|
||||
}
|
||||
}
|
||||
|
||||
// If the fork with tip at bank `scan_bank_id` was removed during our scan, then the scan
|
||||
// may have been corrupted, so abort the results.
|
||||
let was_scan_corrupted = self
|
||||
.removed_bank_ids
|
||||
.lock()
|
||||
.unwrap()
|
||||
.contains(&scan_bank_id);
|
||||
|
||||
if was_scan_corrupted {
|
||||
Err(ScanError::SlotRemoved {
|
||||
slot: ancestors.max_slot(),
|
||||
bank_id: scan_bank_id,
|
||||
})
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn do_unchecked_scan_accounts<F, R>(
|
||||
@@ -1008,7 +1076,12 @@ impl<T: 'static + Clone + IsCached + ZeroLamport> AccountsIndex<T> {
|
||||
}
|
||||
|
||||
/// call func with every pubkey and index visible from a given set of ancestors
|
||||
pub(crate) fn scan_accounts<F>(&self, ancestors: &Ancestors, func: F)
|
||||
pub(crate) fn scan_accounts<F>(
|
||||
&self,
|
||||
ancestors: &Ancestors,
|
||||
scan_bank_id: BankId,
|
||||
func: F,
|
||||
) -> Result<(), ScanError>
|
||||
where
|
||||
F: FnMut(&Pubkey, (&T, Slot)),
|
||||
{
|
||||
@@ -1016,9 +1089,10 @@ impl<T: 'static + Clone + IsCached + ZeroLamport> AccountsIndex<T> {
|
||||
self.do_checked_scan_accounts(
|
||||
"",
|
||||
ancestors,
|
||||
scan_bank_id,
|
||||
func,
|
||||
ScanTypes::Unindexed(None::<Range<Pubkey>>),
|
||||
);
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn unchecked_scan_accounts<F>(
|
||||
@@ -1048,7 +1122,13 @@ impl<T: 'static + Clone + IsCached + ZeroLamport> AccountsIndex<T> {
|
||||
}
|
||||
|
||||
/// call func with every pubkey and index visible from a given set of ancestors
|
||||
pub(crate) fn index_scan_accounts<F>(&self, ancestors: &Ancestors, index_key: IndexKey, func: F)
|
||||
pub(crate) fn index_scan_accounts<F>(
|
||||
&self,
|
||||
ancestors: &Ancestors,
|
||||
scan_bank_id: BankId,
|
||||
index_key: IndexKey,
|
||||
func: F,
|
||||
) -> Result<(), ScanError>
|
||||
where
|
||||
F: FnMut(&Pubkey, (&T, Slot)),
|
||||
{
|
||||
@@ -1056,9 +1136,10 @@ impl<T: 'static + Clone + IsCached + ZeroLamport> AccountsIndex<T> {
|
||||
self.do_checked_scan_accounts(
|
||||
"",
|
||||
ancestors,
|
||||
scan_bank_id,
|
||||
func,
|
||||
ScanTypes::<Range<Pubkey>>::Indexed(index_key),
|
||||
);
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_rooted_entries(&self, slice: SlotSlice<T>, max: Option<Slot>) -> SlotList<T> {
|
||||
|
Reference in New Issue
Block a user