Prevent scans on unrooted slots from seeing partial clean (#13628)

Co-authored-by: Carl Lin <carl@solana.com>
2020-11-20 13:01:04 -08:00
parent 62fa8b0ed8
commit 791fb17437
7 changed files with 500 additions and 62 deletions
--- a/runtime/src/accounts_db.rs
+++ b/runtime/src/accounts_db.rs
@ -3043,7 +3043,7 @@ pub mod tests {
        assert_eq!(&db.load_slow(&ancestors, &key).unwrap().0, &account1);

        let accounts: Vec<Account> =
-            db.scan_accounts(&ancestors, |accounts: &mut Vec<Account>, option| {
+            db.unchecked_scan_accounts(&ancestors, |accounts: &mut Vec<Account>, option| {
                if let Some(data) = option {
                    accounts.push(data.1);
                }
@ -4368,7 +4368,7 @@ pub mod tests {

        let ancestors = vec![(0, 0)].into_iter().collect();
        let accounts: Vec<Account> =
-            db.scan_accounts(&ancestors, |accounts: &mut Vec<Account>, option| {
+            db.unchecked_scan_accounts(&ancestors, |accounts: &mut Vec<Account>, option| {
                if let Some(data) = option {
                    accounts.push(data.1);
                }
@ -4377,7 +4377,7 @@ pub mod tests {

        let ancestors = vec![(1, 1), (0, 0)].into_iter().collect();
        let accounts: Vec<Account> =
-            db.scan_accounts(&ancestors, |accounts: &mut Vec<Account>, option| {
+            db.unchecked_scan_accounts(&ancestors, |accounts: &mut Vec<Account>, option| {
                if let Some(data) = option {
                    accounts.push(data.1);
                }
--- a/runtime/src/accounts_index.rs
+++ b/runtime/src/accounts_index.rs
@ -220,9 +220,124 @@ impl<T: 'static + Clone> AccountsIndex<T> {
            // deadlock
            let max_root = self.max_root();
            *w_ongoing_scan_roots.entry(max_root).or_default() += 1;
+
            max_root
        };

+        // First we show that for any bank `B` that is a descendant of
+        // the current `max_root`, it must be true that and `B.ancestors.contains(max_root)`,
+        // regardless of the pattern of `squash()` behavior, `where` `ancestors` is the set
+        // of ancestors that is tracked in each bank.
+        //
+        // Proof: At startup, if starting from a snapshot, generate_index() adds all banks
+        // in the snapshot to the index via `add_root()` and so `max_root` will be the
+        // greatest of these. Thus, so the claim holds at startup since there are no
+        // descendants of `max_root`.
+        //
+        // Now we proceed by induction on each `BankForks::set_root()`.
+        // Assume the claim holds when the `max_root` is `R`. Call the set of
+        // descendants of `R` present in BankForks `R_descendants`.
+        //
+        // Then for any banks `B` in `R_descendants`, it must be that `B.ancestors.contains(S)`,
+        // where `S` is any ancestor of `B` such that `S >= R`.
+        //
+        // For example:
+        //          `R` -> `A` -> `C` -> `B`
+        // Then `B.ancestors == {R, A, C}`
+        //
+        // Next we call `BankForks::set_root()` at some descendant of `R`, `R_new`,
+        // where `R_new > R`.
+        //
+        // When we squash `R_new`, `max_root` in the AccountsIndex here is now set to `R_new`,
+        // and all nondescendants of `R_new` are pruned.
+        //
+        // Now consider any outstanding references to banks in the system that are descended from
+        // `max_root == R_new`. Take any one of these references and call it `B`. Because `B` is
+        // a descendant of `R_new`, this means `B` was also a descendant of `R`. Thus `B`
+        // must be a member of `R_descendants` because `B` was constructed and added to
+        // BankForks before the `set_root`.
+        //
+        // This means by the guarantees of `R_descendants` described above, because
+        // `R_new` is an ancestor of `B`, and `R < R_new < B`, then B.ancestors.contains(R_new)`.
+        //
+        // Now until the next `set_root`, any new banks constructed from `new_from_parent` will
+        // also have `max_root == R_new` in their ancestor set, so the claim holds for those descendants
+        // as well. Once the next `set_root` happens, we once again update `max_root` and the same
+        // inductive argument can be applied again to show the claim holds.
+
+        // Check that the `max_root` is present in `ancestors`. From the proof above, if
+        // `max_root` is not present in `ancestors`, this means the bank `B` with the
+        // given `ancestors` is not descended from `max_root, which means
+        // either:
+        // 1) `B` is on a different fork or
+        // 2) `B` is an ancestor of `max_root`.
+        // In both cases we can ignore the given ancestors and instead just rely on the roots
+        // present as `max_root` indicates the roots present in the index are more up to date
+        // than the ancestors given.
+        let empty = HashMap::new();
+        let ancestors = if ancestors.contains_key(&max_root) {
+            ancestors
+        } else {
+            /*
+            This takes of edge cases like:
+
+            Diagram 1:
+
+                        slot 0
+                          |
+                        slot 1
+                      /        \
+                 slot 2         |
+                    |       slot 3 (max root)
+            slot 4 (scan)
+
+            By the time the scan on slot 4 is called, slot 2 may already have been
+            cleaned by a clean on slot 3, but slot 4 may not have been cleaned.
+            The state in slot 2 would have been purged and is not saved in any roots.
+            In this case, a scan on slot 4 wouldn't accurately reflect the state when bank 4
+            was frozen. In cases like this, we default to a scan on the latest roots by
+            removing all `ancestors`.
+            */
+            &empty
+        };
+
+        /*
+        Now there are two cases, either `ancestors` is empty or nonempty:
+
+        1) If ancestors is empty, then this is the same as a scan on a rooted bank,
+        and `ongoing_scan_roots` provides protection against cleanup of roots necessary
+        for the scan, and  passing `Some(max_root)` to `do_scan_accounts()` ensures newer
+        roots don't appear in the scan.
+
+        2) If ancestors is non-empty, then from the `ancestors_contains(&max_root)` above, we know
+        that the fork structure must look something like:
+
+        Diagram 2:
+
+                Build fork structure:
+                        slot 0
+                          |
+                    slot 1 (max_root)
+                    /            \
+             slot 2              |
+                |            slot 3 (potential newer max root)
+              slot 4
+                |
+             slot 5 (scan)
+
+        Consider both types of ancestors, ancestor <= `max_root` and
+        ancestor > `max_root`, where `max_root == 1` as illustrated above.
+
+        a) The set of `ancestors <= max_root` are all rooted, which means their state
+        is protected by the same guarantees as 1).
+
+        b) As for the `ancestors > max_root`, those banks have at least one reference discoverable
+        through the chain of `Bank::BankRc::parent` starting from the calling bank. For instance
+        bank 5's parent reference keeps bank 4 alive, which will prevent the `Bank::drop()` from
+        running and cleaning up bank 4. Furthermore, no cleans can happen past the saved max_root == 1,
+        so a potential newer max root at 3 will not clean up any of the ancestors > 1, so slot 4
+        will not be cleaned in the middle of the scan either.
+        */
        self.do_scan_accounts(ancestors, func, range, Some(max_root));
        {
            let mut ongoing_scan_roots = self.ongoing_scan_roots.write().unwrap();
@ -246,6 +361,9 @@ impl<T: 'static + Clone> AccountsIndex<T> {
        self.do_scan_accounts(ancestors, func, range, None);
    }

+    // Scan accounts and return latest version of each account that is either:
+    // 1) rooted or
+    // 2) present in ancestors
    fn do_scan_accounts<'a, F, R>(
        &'a self,
        ancestors: &Ancestors,
@ -636,7 +754,7 @@ mod tests {
        assert!(index.get(&key.pubkey(), None, None).is_none());

        let mut num = 0;
-        index.scan_accounts(&ancestors, |_pubkey, _index| num += 1);
+        index.unchecked_scan_accounts(&ancestors, |_pubkey, _index| num += 1);
        assert_eq!(num, 0);
    }

@ -653,7 +771,7 @@ mod tests {
        assert!(index.get(&key.pubkey(), None, None).is_none());

        let mut num = 0;
-        index.scan_accounts(&ancestors, |_pubkey, _index| num += 1);
+        index.unchecked_scan_accounts(&ancestors, |_pubkey, _index| num += 1);
        assert_eq!(num, 0);
    }

@ -669,7 +787,7 @@ mod tests {
        assert!(index.get(&key.pubkey(), Some(&ancestors), None).is_none());

        let mut num = 0;
-        index.scan_accounts(&ancestors, |_pubkey, _index| num += 1);
+        index.unchecked_scan_accounts(&ancestors, |_pubkey, _index| num += 1);
        assert_eq!(num, 0);
    }

@ -687,7 +805,7 @@ mod tests {

        let mut num = 0;
        let mut found_key = false;
-        index.scan_accounts(&ancestors, |pubkey, _index| {
+        index.unchecked_scan_accounts(&ancestors, |pubkey, _index| {
            if pubkey == &key.pubkey() {
                found_key = true
            };
@ -813,7 +931,7 @@ mod tests {
        let ancestors: Ancestors = HashMap::new();

        let mut scanned_keys = HashSet::new();
-        index.scan_accounts(&ancestors, |pubkey, _index| {
+        index.unchecked_scan_accounts(&ancestors, |pubkey, _index| {
            scanned_keys.insert(*pubkey);
        });
        assert_eq!(scanned_keys.len(), num_pubkeys);
@ -1011,7 +1129,7 @@ mod tests {

        let mut num = 0;
        let mut found_key = false;
-        index.scan_accounts(&Ancestors::new(), |pubkey, _index| {
+        index.unchecked_scan_accounts(&Ancestors::new(), |pubkey, _index| {
            if pubkey == &key.pubkey() {
                found_key = true;
                assert_eq!(_index, (&true, 3));
--- a/runtime/src/bank.rs
+++ b/runtime/src/bank.rs
@ -1775,7 +1775,6 @@ impl Bank {
        //this bank and all its parents are now on the rooted path
        let mut roots = vec![self.slot()];
        roots.append(&mut self.parents().iter().map(|p| p.slot()).collect());
-        *self.rc.parent.write().unwrap() = None;

        let mut squash_accounts_time = Measure::start("squash_accounts_time");
        for slot in roots.iter().rev() {
@ -1784,6 +1783,8 @@ impl Bank {
        }
        squash_accounts_time.stop();

+        *self.rc.parent.write().unwrap() = None;
+
        let mut squash_cache_time = Measure::start("squash_cache_time");
        roots
            .iter()
@ -3506,6 +3507,13 @@ impl Bank {
        parents
    }

+    /// Compute all the parents of the bank including this bank itself
+    pub fn parents_inclusive(self: &Arc<Self>) -> Vec<Arc<Bank>> {
+        let mut all = vec![self.clone()];
+        all.extend(self.parents().into_iter());
+        all
+    }
+
    pub fn store_account(&self, pubkey: &Pubkey, account: &Account) {
        self.rc.accounts.store_slow(self.slot(), pubkey, account);

@ -3686,7 +3694,7 @@ impl Bank {
    }

    pub fn get_largest_accounts(
-        &self,
+        self: &Arc<Self>,
        num: usize,
        filter_by_address: &HashSet<Pubkey>,
        filter: AccountAddressFilter,
@ -10670,6 +10678,86 @@ pub(crate) mod tests {
        update_thread.join().unwrap();
    }

+    #[test]
+    fn test_store_scan_consistency_unrooted() {
+        test_store_scan_consistency(
+            |bank0, bank_to_scan_sender, pubkeys_to_modify, program_id, starting_lamports| {
+                let mut current_major_fork_bank = bank0;
+                loop {
+                    let mut current_minor_fork_bank = current_major_fork_bank.clone();
+                    let num_new_banks = 2;
+                    let lamports = current_minor_fork_bank.slot() + starting_lamports + 1;
+                    // Modify banks on the two banks on the minor fork
+                    for pubkeys_to_modify in &pubkeys_to_modify
+                        .iter()
+                        .chunks(pubkeys_to_modify.len() / num_new_banks)
+                    {
+                        current_minor_fork_bank = Arc::new(Bank::new_from_parent(
+                            &current_minor_fork_bank,
+                            &solana_sdk::pubkey::new_rand(),
+                            current_minor_fork_bank.slot() + 2,
+                        ));
+                        let account = Account::new(lamports, 0, &program_id);
+                        // Write partial updates to each of the banks in the minor fork so if any of them
+                        // get cleaned up, there will be keys with the wrong account value/missing.
+                        for key in pubkeys_to_modify {
+                            current_minor_fork_bank.store_account(key, &account);
+                        }
+                        current_minor_fork_bank.freeze();
+                    }
+
+                    // All the parent banks made in this iteration of the loop
+                    // are currently discoverable, previous parents should have
+                    // been squashed
+                    assert_eq!(
+                        current_minor_fork_bank.parents_inclusive().len(),
+                        num_new_banks + 1,
+                    );
+
+                    // `next_major_bank` needs to be sandwiched between the minor fork banks
+                    // That way, after the squash(), the minor fork has the potential to see a
+                    // *partial* clean of the banks < `next_major_bank`.
+                    current_major_fork_bank = Arc::new(Bank::new_from_parent(
+                        &current_major_fork_bank,
+                        &solana_sdk::pubkey::new_rand(),
+                        current_minor_fork_bank.slot() - 1,
+                    ));
+                    let lamports = current_major_fork_bank.slot() + starting_lamports + 1;
+                    let account = Account::new(lamports, 0, &program_id);
+                    for key in pubkeys_to_modify.iter() {
+                        // Store rooted updates to these pubkeys such that the minor
+                        // fork updates to the same keys will be deleted by clean
+                        current_major_fork_bank.store_account(key, &account);
+                    }
+
+                    // Send the last new bank to the scan thread to perform the scan.
+                    // Meanwhile this thread will continually set roots on a separate fork
+                    // and squash.
+                    /*
+                                bank 0
+                             /         \
+                     minor bank 1       \
+                          /         current_major_fork_bank
+                     minor bank 2
+
+                    */
+                    // The capacity of the channel is 1 so that this thread will wait for the scan to finish before starting
+                    // the next iteration, allowing the scan to stay in sync with these updates
+                    // such that every scan will see this interruption.
+                    current_major_fork_bank.freeze();
+                    current_major_fork_bank.squash();
+                    if bank_to_scan_sender.send(current_minor_fork_bank).is_err() {
+                        // Channel was disconnected, exit
+                        return;
+                    }
+
+                    // Try to get clean to overlap with the scan
+                    current_major_fork_bank.clean_accounts(false);
+                }
+            },
+        )
+    }
+
    #[test]
    fn test_store_scan_consistency_root() {
        test_store_scan_consistency(