bloom for forking (#2431)

* bloom for forking
* clippy fixes
* remove bloom_hash_index
This commit is contained in:
Rob Walker
2019-01-15 13:56:54 -08:00
committed by GitHub
parent 4e2663023b
commit 45c247fa5b
3 changed files with 64 additions and 40 deletions

View File

@ -1,10 +1,16 @@
//! Simple Bloom Filter //! Simple Bloom Filter
use crate::bloom_hash_index::BloomHashIndex;
use bv::BitVec; use bv::BitVec;
use rand::{self, Rng}; use rand::{self, Rng};
use solana_sdk::hash::hashv;
use std::cmp; use std::cmp;
use std::marker::PhantomData; use std::marker::PhantomData;
/// Generate a stable hash of `self` for each `hash_index`
/// Best effort can be made for uniqueness of each hash.
pub trait BloomHashIndex {
fn hash_at_index(&self, hash_index: u64) -> u64;
}
#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)] #[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)]
pub struct Bloom<T: BloomHashIndex> { pub struct Bloom<T: BloomHashIndex> {
pub keys: Vec<u64>, pub keys: Vec<u64>,
@ -13,6 +19,14 @@ pub struct Bloom<T: BloomHashIndex> {
} }
impl<T: BloomHashIndex> Bloom<T> { impl<T: BloomHashIndex> Bloom<T> {
pub fn new(num_bits: usize, keys: Vec<u64>) -> Self {
let bits = BitVec::new_fill(false, num_bits as u64);
Bloom {
keys,
bits,
_phantom: Default::default(),
}
}
/// create filter optimal for num size given the `false_rate` /// create filter optimal for num size given the `false_rate`
/// the keys are randomized for picking data out of a collision resistant hash of size /// the keys are randomized for picking data out of a collision resistant hash of size
/// `keysize` bytes /// `keysize` bytes
@ -24,15 +38,13 @@ impl<T: BloomHashIndex> Bloom<T> {
let num_bits = cmp::max(1, cmp::min(min_num_bits, max_bits)); let num_bits = cmp::max(1, cmp::min(min_num_bits, max_bits));
let num_keys = ((num_bits as f64 / num as f64) * 2f64.log(2f64)).round() as usize; let num_keys = ((num_bits as f64 / num as f64) * 2f64.log(2f64)).round() as usize;
let keys: Vec<u64> = (0..num_keys).map(|_| rand::thread_rng().gen()).collect(); let keys: Vec<u64> = (0..num_keys).map(|_| rand::thread_rng().gen()).collect();
let bits = BitVec::new_fill(false, num_bits as u64); Self::new(num_bits, keys)
Bloom {
keys,
bits,
_phantom: Default::default(),
}
} }
fn pos(&self, key: &T, k: u64) -> u64 { fn pos(&self, key: &T, k: u64) -> u64 {
key.hash(k) % self.bits.len() key.hash_at_index(k) % self.bits.len()
}
pub fn clear(&mut self) {
self.bits.clear();
} }
pub fn add(&mut self, key: &T) { pub fn add(&mut self, key: &T) {
for k in &self.keys { for k in &self.keys {
@ -51,10 +63,54 @@ impl<T: BloomHashIndex> Bloom<T> {
} }
} }
fn to_slice(v: u64) -> [u8; 8] {
[
v as u8,
(v >> 8) as u8,
(v >> 16) as u8,
(v >> 24) as u8,
(v >> 32) as u8,
(v >> 40) as u8,
(v >> 48) as u8,
(v >> 56) as u8,
]
}
fn from_slice(v: &[u8]) -> u64 {
u64::from(v[0])
| u64::from(v[1]) << 8
| u64::from(v[2]) << 16
| u64::from(v[3]) << 24
| u64::from(v[4]) << 32
| u64::from(v[5]) << 40
| u64::from(v[6]) << 48
| u64::from(v[7]) << 56
}
fn slice_hash(slice: &[u8], hash_index: u64) -> u64 {
let hash = hashv(&[slice, &to_slice(hash_index)]);
from_slice(hash.as_ref())
}
impl<T: AsRef<[u8]>> BloomHashIndex for T {
fn hash_at_index(&self, hash_index: u64) -> u64 {
slice_hash(self.as_ref(), hash_index)
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
use solana_sdk::hash::{hash, Hash}; use solana_sdk::hash::{hash, Hash};
#[test]
fn test_slice() {
assert_eq!(from_slice(&to_slice(10)), 10);
assert_eq!(from_slice(&to_slice(0x7fff7fff)), 0x7fff7fff);
assert_eq!(
from_slice(&to_slice(0x7fff7fff7fff7fff)),
0x7fff7fff7fff7fff
);
}
#[test] #[test]
fn test_bloom_filter() { fn test_bloom_filter() {

View File

@ -1,31 +0,0 @@
use solana_sdk::hash::Hash;
use solana_sdk::pubkey::Pubkey;
fn slice_hash(slice: &[u8], hash_index: u64) -> u64 {
let len = slice.len();
assert!(len < 256);
let mut rv = 0u64;
for i in 0..8 {
let pos = (hash_index >> i) & 0xff;
rv |= u64::from(slice[pos as usize % len]) << i;
}
rv
}
/// Generate a stable hash of `self` for each `hash_index`
/// Best effort can be made for uniqueness of each hash.
pub trait BloomHashIndex {
fn hash(&self, hash_index: u64) -> u64;
}
impl BloomHashIndex for Pubkey {
fn hash(&self, hash_index: u64) -> u64 {
slice_hash(self.as_ref(), hash_index)
}
}
impl BloomHashIndex for Hash {
fn hash(&self, hash_index: u64) -> u64 {
slice_hash(self.as_ref(), hash_index)
}
}

View File

@ -14,7 +14,6 @@ pub mod bank;
pub mod banking_stage; pub mod banking_stage;
pub mod blob_fetch_stage; pub mod blob_fetch_stage;
pub mod bloom; pub mod bloom;
pub mod bloom_hash_index;
pub mod broadcast_service; pub mod broadcast_service;
#[cfg(feature = "chacha")] #[cfg(feature = "chacha")]
pub mod chacha; pub mod chacha;