diff --git a/src/bloom.rs b/src/bloom.rs index 3c59713ac0..3027db5d63 100644 --- a/src/bloom.rs +++ b/src/bloom.rs @@ -1,10 +1,16 @@ //! Simple Bloom Filter -use crate::bloom_hash_index::BloomHashIndex; use bv::BitVec; use rand::{self, Rng}; +use solana_sdk::hash::hashv; use std::cmp; use std::marker::PhantomData; +/// Generate a stable hash of `self` for each `hash_index` +/// Best effort can be made for uniqueness of each hash. +pub trait BloomHashIndex { + fn hash_at_index(&self, hash_index: u64) -> u64; +} + #[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)] pub struct Bloom { pub keys: Vec, @@ -13,6 +19,14 @@ pub struct Bloom { } impl Bloom { + pub fn new(num_bits: usize, keys: Vec) -> Self { + let bits = BitVec::new_fill(false, num_bits as u64); + Bloom { + keys, + bits, + _phantom: Default::default(), + } + } /// create filter optimal for num size given the `false_rate` /// the keys are randomized for picking data out of a collision resistant hash of size /// `keysize` bytes @@ -24,15 +38,13 @@ impl Bloom { let num_bits = cmp::max(1, cmp::min(min_num_bits, max_bits)); let num_keys = ((num_bits as f64 / num as f64) * 2f64.log(2f64)).round() as usize; let keys: Vec = (0..num_keys).map(|_| rand::thread_rng().gen()).collect(); - let bits = BitVec::new_fill(false, num_bits as u64); - Bloom { - keys, - bits, - _phantom: Default::default(), - } + Self::new(num_bits, keys) } fn pos(&self, key: &T, k: u64) -> u64 { - key.hash(k) % self.bits.len() + key.hash_at_index(k) % self.bits.len() + } + pub fn clear(&mut self) { + self.bits.clear(); } pub fn add(&mut self, key: &T) { for k in &self.keys { @@ -51,10 +63,54 @@ impl Bloom { } } +fn to_slice(v: u64) -> [u8; 8] { + [ + v as u8, + (v >> 8) as u8, + (v >> 16) as u8, + (v >> 24) as u8, + (v >> 32) as u8, + (v >> 40) as u8, + (v >> 48) as u8, + (v >> 56) as u8, + ] +} + +fn from_slice(v: &[u8]) -> u64 { + u64::from(v[0]) + | u64::from(v[1]) << 8 + | u64::from(v[2]) << 16 + | u64::from(v[3]) << 24 + | u64::from(v[4]) << 32 + | u64::from(v[5]) << 40 + | u64::from(v[6]) << 48 + | u64::from(v[7]) << 56 +} + +fn slice_hash(slice: &[u8], hash_index: u64) -> u64 { + let hash = hashv(&[slice, &to_slice(hash_index)]); + from_slice(hash.as_ref()) +} + +impl> BloomHashIndex for T { + fn hash_at_index(&self, hash_index: u64) -> u64 { + slice_hash(self.as_ref(), hash_index) + } +} + #[cfg(test)] mod test { use super::*; use solana_sdk::hash::{hash, Hash}; + #[test] + fn test_slice() { + assert_eq!(from_slice(&to_slice(10)), 10); + assert_eq!(from_slice(&to_slice(0x7fff7fff)), 0x7fff7fff); + assert_eq!( + from_slice(&to_slice(0x7fff7fff7fff7fff)), + 0x7fff7fff7fff7fff + ); + } #[test] fn test_bloom_filter() { diff --git a/src/bloom_hash_index.rs b/src/bloom_hash_index.rs deleted file mode 100644 index a6c4931937..0000000000 --- a/src/bloom_hash_index.rs +++ /dev/null @@ -1,31 +0,0 @@ -use solana_sdk::hash::Hash; -use solana_sdk::pubkey::Pubkey; - -fn slice_hash(slice: &[u8], hash_index: u64) -> u64 { - let len = slice.len(); - assert!(len < 256); - let mut rv = 0u64; - for i in 0..8 { - let pos = (hash_index >> i) & 0xff; - rv |= u64::from(slice[pos as usize % len]) << i; - } - rv -} - -/// Generate a stable hash of `self` for each `hash_index` -/// Best effort can be made for uniqueness of each hash. -pub trait BloomHashIndex { - fn hash(&self, hash_index: u64) -> u64; -} - -impl BloomHashIndex for Pubkey { - fn hash(&self, hash_index: u64) -> u64 { - slice_hash(self.as_ref(), hash_index) - } -} - -impl BloomHashIndex for Hash { - fn hash(&self, hash_index: u64) -> u64 { - slice_hash(self.as_ref(), hash_index) - } -} diff --git a/src/lib.rs b/src/lib.rs index 697d11952f..24c88634bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,6 @@ pub mod bank; pub mod banking_stage; pub mod blob_fetch_stage; pub mod bloom; -pub mod bloom_hash_index; pub mod broadcast_service; #[cfg(feature = "chacha")] pub mod chacha;