Prevent scans on unrooted slots from seeing partial clean (#13628)

Co-authored-by: Carl Lin <carl@solana.com>
2020-11-20 13:01:04 -08:00
parent 62fa8b0ed8
commit 791fb17437
7 changed files with 500 additions and 62 deletions
--- a/runtime/src/accounts_index.rs
+++ b/runtime/src/accounts_index.rs
@@ -220,9 +220,124 @@ impl<T: 'static + Clone> AccountsIndex<T> {
            // deadlock
            let max_root = self.max_root();
            *w_ongoing_scan_roots.entry(max_root).or_default() += 1;
+
            max_root
        };

+        // First we show that for any bank `B` that is a descendant of
+        // the current `max_root`, it must be true that and `B.ancestors.contains(max_root)`,
+        // regardless of the pattern of `squash()` behavior, `where` `ancestors` is the set
+        // of ancestors that is tracked in each bank.
+        //
+        // Proof: At startup, if starting from a snapshot, generate_index() adds all banks
+        // in the snapshot to the index via `add_root()` and so `max_root` will be the
+        // greatest of these. Thus, so the claim holds at startup since there are no
+        // descendants of `max_root`.
+        //
+        // Now we proceed by induction on each `BankForks::set_root()`.
+        // Assume the claim holds when the `max_root` is `R`. Call the set of
+        // descendants of `R` present in BankForks `R_descendants`.
+        //
+        // Then for any banks `B` in `R_descendants`, it must be that `B.ancestors.contains(S)`,
+        // where `S` is any ancestor of `B` such that `S >= R`.
+        //
+        // For example:
+        //          `R` -> `A` -> `C` -> `B`
+        // Then `B.ancestors == {R, A, C}`
+        //
+        // Next we call `BankForks::set_root()` at some descendant of `R`, `R_new`,
+        // where `R_new > R`.
+        //
+        // When we squash `R_new`, `max_root` in the AccountsIndex here is now set to `R_new`,
+        // and all nondescendants of `R_new` are pruned.
+        //
+        // Now consider any outstanding references to banks in the system that are descended from
+        // `max_root == R_new`. Take any one of these references and call it `B`. Because `B` is
+        // a descendant of `R_new`, this means `B` was also a descendant of `R`. Thus `B`
+        // must be a member of `R_descendants` because `B` was constructed and added to
+        // BankForks before the `set_root`.
+        //
+        // This means by the guarantees of `R_descendants` described above, because
+        // `R_new` is an ancestor of `B`, and `R < R_new < B`, then B.ancestors.contains(R_new)`.
+        //
+        // Now until the next `set_root`, any new banks constructed from `new_from_parent` will
+        // also have `max_root == R_new` in their ancestor set, so the claim holds for those descendants
+        // as well. Once the next `set_root` happens, we once again update `max_root` and the same
+        // inductive argument can be applied again to show the claim holds.
+
+        // Check that the `max_root` is present in `ancestors`. From the proof above, if
+        // `max_root` is not present in `ancestors`, this means the bank `B` with the
+        // given `ancestors` is not descended from `max_root, which means
+        // either:
+        // 1) `B` is on a different fork or
+        // 2) `B` is an ancestor of `max_root`.
+        // In both cases we can ignore the given ancestors and instead just rely on the roots
+        // present as `max_root` indicates the roots present in the index are more up to date
+        // than the ancestors given.
+        let empty = HashMap::new();
+        let ancestors = if ancestors.contains_key(&max_root) {
+            ancestors
+        } else {
+            /*
+            This takes of edge cases like:
+
+            Diagram 1:
+
+                        slot 0
+                          |
+                        slot 1
+                      /        \
+                 slot 2         |
+                    |       slot 3 (max root)
+            slot 4 (scan)
+
+            By the time the scan on slot 4 is called, slot 2 may already have been
+            cleaned by a clean on slot 3, but slot 4 may not have been cleaned.
+            The state in slot 2 would have been purged and is not saved in any roots.
+            In this case, a scan on slot 4 wouldn't accurately reflect the state when bank 4
+            was frozen. In cases like this, we default to a scan on the latest roots by
+            removing all `ancestors`.
+            */
+            &empty
+        };
+
+        /*
+        Now there are two cases, either `ancestors` is empty or nonempty:
+
+        1) If ancestors is empty, then this is the same as a scan on a rooted bank,
+        and `ongoing_scan_roots` provides protection against cleanup of roots necessary
+        for the scan, and  passing `Some(max_root)` to `do_scan_accounts()` ensures newer
+        roots don't appear in the scan.
+
+        2) If ancestors is non-empty, then from the `ancestors_contains(&max_root)` above, we know
+        that the fork structure must look something like:
+
+        Diagram 2:
+
+                Build fork structure:
+                        slot 0
+                          |
+                    slot 1 (max_root)
+                    /            \
+             slot 2              |
+                |            slot 3 (potential newer max root)
+              slot 4
+                |
+             slot 5 (scan)
+
+        Consider both types of ancestors, ancestor <= `max_root` and
+        ancestor > `max_root`, where `max_root == 1` as illustrated above.
+
+        a) The set of `ancestors <= max_root` are all rooted, which means their state
+        is protected by the same guarantees as 1).
+
+        b) As for the `ancestors > max_root`, those banks have at least one reference discoverable
+        through the chain of `Bank::BankRc::parent` starting from the calling bank. For instance
+        bank 5's parent reference keeps bank 4 alive, which will prevent the `Bank::drop()` from
+        running and cleaning up bank 4. Furthermore, no cleans can happen past the saved max_root == 1,
+        so a potential newer max root at 3 will not clean up any of the ancestors > 1, so slot 4
+        will not be cleaned in the middle of the scan either.
+        */
        self.do_scan_accounts(ancestors, func, range, Some(max_root));
        {
            let mut ongoing_scan_roots = self.ongoing_scan_roots.write().unwrap();
@@ -246,6 +361,9 @@ impl<T: 'static + Clone> AccountsIndex<T> {
        self.do_scan_accounts(ancestors, func, range, None);
    }

+    // Scan accounts and return latest version of each account that is either:
+    // 1) rooted or
+    // 2) present in ancestors
    fn do_scan_accounts<'a, F, R>(
        &'a self,
        ancestors: &Ancestors,
@@ -636,7 +754,7 @@ mod tests {
        assert!(index.get(&key.pubkey(), None, None).is_none());

        let mut num = 0;
-        index.scan_accounts(&ancestors, |_pubkey, _index| num += 1);
+        index.unchecked_scan_accounts(&ancestors, |_pubkey, _index| num += 1);
        assert_eq!(num, 0);
    }

@@ -653,7 +771,7 @@ mod tests {
        assert!(index.get(&key.pubkey(), None, None).is_none());

        let mut num = 0;
-        index.scan_accounts(&ancestors, |_pubkey, _index| num += 1);
+        index.unchecked_scan_accounts(&ancestors, |_pubkey, _index| num += 1);
        assert_eq!(num, 0);
    }

@@ -669,7 +787,7 @@ mod tests {
        assert!(index.get(&key.pubkey(), Some(&ancestors), None).is_none());

        let mut num = 0;
-        index.scan_accounts(&ancestors, |_pubkey, _index| num += 1);
+        index.unchecked_scan_accounts(&ancestors, |_pubkey, _index| num += 1);
        assert_eq!(num, 0);
    }

@@ -687,7 +805,7 @@ mod tests {

        let mut num = 0;
        let mut found_key = false;
-        index.scan_accounts(&ancestors, |pubkey, _index| {
+        index.unchecked_scan_accounts(&ancestors, |pubkey, _index| {
            if pubkey == &key.pubkey() {
                found_key = true
            };
@@ -813,7 +931,7 @@ mod tests {
        let ancestors: Ancestors = HashMap::new();

        let mut scanned_keys = HashSet::new();
-        index.scan_accounts(&ancestors, |pubkey, _index| {
+        index.unchecked_scan_accounts(&ancestors, |pubkey, _index| {
            scanned_keys.insert(*pubkey);
        });
        assert_eq!(scanned_keys.len(), num_pubkeys);
@@ -1011,7 +1129,7 @@ mod tests {

        let mut num = 0;
        let mut found_key = false;
-        index.scan_accounts(&Ancestors::new(), |pubkey, _index| {
+        index.unchecked_scan_accounts(&Ancestors::new(), |pubkey, _index| {
            if pubkey == &key.pubkey() {
                found_key = true;
                assert_eq!(_index, (&true, 3));