From d1fbf77f3fc168babb6f0bad24b12563bf3ab68e Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Thu, 3 Jun 2021 05:07:02 +0000 Subject: [PATCH] RollingBitField: bug fixes and add tests (#17525) (#17674) * RollingBitField: bug fixes and add tests * respond to feedback (cherry picked from commit 8924fbf6a03158c894be2725327ed6ca9800ee8e) Co-authored-by: Jeff Washington (jwash) <75863576+jeffwashington@users.noreply.github.com> --- runtime/src/accounts_index.rs | 141 ++++++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 7 deletions(-) diff --git a/runtime/src/accounts_index.rs b/runtime/src/accounts_index.rs index b3920ade71..5217ab3134 100644 --- a/runtime/src/accounts_index.rs +++ b/runtime/src/accounts_index.rs @@ -223,7 +223,7 @@ impl WriteAccountMapEntry { } } -#[derive(Debug, Default)] +#[derive(Debug, Default, AbiExample, Clone)] pub struct RollingBitField { max_width: u64, min: u64, @@ -232,9 +232,26 @@ pub struct RollingBitField { count: usize, // These are items that are true and lower than min. // They would cause us to exceed max_width if we stored them in our bit field. - // We only expect these items in conditions where there is some other bug in the system. + // We only expect these items in conditions where there is some other bug in the system + // or in testing when large ranges are created. excess: HashSet, } + +impl PartialEq for RollingBitField { + fn eq(&self, other: &Self) -> bool { + // 2 instances could have different internal data for the same values, + // so we have to compare data. + self.len() == other.len() && { + for item in self.get_all() { + if !other.contains(&item) { + return false; + } + } + true + } + } +} + // functionally similar to a hashset // Relies on there being a sliding window of key values. The key values continue to increase. // Old key values are removed from the lesser values and do not accumulate. @@ -264,7 +281,7 @@ impl RollingBitField { } pub fn insert(&mut self, key: u64) { - let bits_empty = self.count == 0 || self.count == self.excess.len(); + let mut bits_empty = self.count == 0 || self.all_items_in_excess(); let update_bits = if bits_empty { true // nothing in bits, so in range } else if key < self.min { @@ -293,6 +310,12 @@ impl RollingBitField { let address = self.get_address(&key); self.bits.set(address, false); self.purge(&key); + + if self.all_items_in_excess() { + // if we moved the last existing item to excess, then we are ready to insert the new item in the bits + bits_empty = true; + break; + } } true // moved things to excess if necessary, so update bits with the new entry @@ -309,6 +332,13 @@ impl RollingBitField { } else { self.min = std::cmp::min(self.min, key); self.max = std::cmp::max(self.max, key + 1); + assert!( + self.min + self.max_width >= self.max, + "min: {}, max: {}, max_width: {}", + self.min, + self.max, + self.max_width + ); } self.count += 1; } @@ -340,9 +370,13 @@ impl RollingBitField { } } + fn all_items_in_excess(&self) -> bool { + self.excess.len() == self.count + } + // after removing 'key' where 'key' = min, make min the correct new min value fn purge(&mut self, key: &u64) { - if self.count > 0 { + if self.count > 0 && !self.all_items_in_excess() { if key == &self.min { let start = self.min + 1; // min just got removed for key in start..self.max { @@ -353,8 +387,18 @@ impl RollingBitField { } } } else { - self.min = Slot::default(); - self.max = Slot::default(); + // The idea is that there are no items in the bitfield anymore. + // But, there MAY be items in excess. The model works such that items < min go into excess. + // So, after purging all items from bitfield, we hold max to be what it previously was, but set min to max. + // Thus, if we lookup >= max, answer is always false without having to look in excess. + // If we changed max here to 0, we would lose the ability to know the range of items in excess (if any). + // So, now, with min updated = max: + // If we lookup < max, then we first check min. + // If >= min, then we look in bitfield. + // Otherwise, we look in excess since the request is < min. + // So, resetting min like this after a remove results in the correct behavior for the model. + // Later, if we insert and there are 0 items total (excess + bitfield), then we reset min/max to reflect the new item only. + self.min = self.max; } } @@ -364,9 +408,12 @@ impl RollingBitField { self.bits.get(address) } + // This is the 99% use case. + // This needs be fast for the most common case of asking for key >= min. pub fn contains(&self, key: &u64) -> bool { if key < &self.max { if key >= &self.min { + // in the bitfield range self.contains_assume_in_range(key) } else { self.excess.contains(key) @@ -1629,6 +1676,78 @@ pub mod tests { ) } + #[test] + fn test_bitfield_delete_non_excess() { + solana_logger::setup(); + let len = 16; + let mut bitfield = RollingBitField::new(len); + + bitfield.insert(0); + let too_big = len + 1; + bitfield.insert(too_big); + assert!(bitfield.contains(&0)); + assert!(bitfield.contains(&too_big)); + assert_eq!(bitfield.len(), 2); + assert_eq!(bitfield.excess.len(), 1); + assert_eq!(bitfield.min, too_big); + assert_eq!(bitfield.max, too_big + 1); + + // delete the thing that is NOT in excess + bitfield.remove(&too_big); + assert_eq!(bitfield.min, too_big + 1); + assert_eq!(bitfield.max, too_big + 1); + let too_big_times_2 = too_big * 2; + bitfield.insert(too_big_times_2); + assert!(bitfield.contains(&0)); + assert!(bitfield.contains(&too_big_times_2)); + assert_eq!(bitfield.len(), 2); + assert_eq!(bitfield.excess.len(), 1); + assert_eq!(bitfield.min, too_big_times_2); + assert_eq!(bitfield.max, too_big_times_2 + 1); + + bitfield.remove(&0); + bitfield.remove(&too_big_times_2); + assert!(bitfield.is_empty()); + let other = 5; + bitfield.insert(other); + assert!(bitfield.contains(&other)); + assert!(bitfield.excess.is_empty()); + assert_eq!(bitfield.min, other); + assert_eq!(bitfield.max, other + 1); + } + + #[test] + fn test_bitfield_insert_excess() { + solana_logger::setup(); + let len = 16; + let mut bitfield = RollingBitField::new(len); + + bitfield.insert(0); + let too_big = len + 1; + bitfield.insert(too_big); + assert!(bitfield.contains(&0)); + assert!(bitfield.contains(&too_big)); + assert_eq!(bitfield.len(), 2); + assert_eq!(bitfield.excess.len(), 1); + assert!(bitfield.excess.contains(&0)); + assert_eq!(bitfield.min, too_big); + assert_eq!(bitfield.max, too_big + 1); + + // delete the thing that IS in excess + // this does NOT affect min/max + bitfield.remove(&0); + assert_eq!(bitfield.min, too_big); + assert_eq!(bitfield.max, too_big + 1); + // re-add to excess + bitfield.insert(0); + assert!(bitfield.contains(&0)); + assert!(bitfield.contains(&too_big)); + assert_eq!(bitfield.len(), 2); + assert_eq!(bitfield.excess.len(), 1); + assert_eq!(bitfield.min, too_big); + assert_eq!(bitfield.max, too_big + 1); + } + #[test] fn test_bitfield_permutations() { solana_logger::setup(); @@ -1934,7 +2053,7 @@ pub mod tests { assert!(!tester.remove(&slot)); } - fn compare(hashset: &HashSet, bitfield: &RollingBitField) { + fn compare_internal(hashset: &HashSet, bitfield: &RollingBitField) { assert_eq!(hashset.len(), bitfield.len()); assert_eq!(hashset.is_empty(), bitfield.is_empty()); if !bitfield.is_empty() { @@ -1967,6 +2086,14 @@ pub mod tests { } } + fn compare(hashset: &HashSet, bitfield: &RollingBitField) { + compare_internal(hashset, bitfield); + let clone = bitfield.clone(); + compare_internal(hashset, &clone); + assert!(clone.eq(&bitfield)); + assert_eq!(clone, *bitfield); + } + #[test] fn test_bitfield_functionality() { solana_logger::setup();