bloom for forking (#2431)
* bloom for forking * clippy fixes * remove bloom_hash_index
This commit is contained in:
72
src/bloom.rs
72
src/bloom.rs
@ -1,10 +1,16 @@
|
|||||||
//! Simple Bloom Filter
|
//! Simple Bloom Filter
|
||||||
use crate::bloom_hash_index::BloomHashIndex;
|
|
||||||
use bv::BitVec;
|
use bv::BitVec;
|
||||||
use rand::{self, Rng};
|
use rand::{self, Rng};
|
||||||
|
use solana_sdk::hash::hashv;
|
||||||
use std::cmp;
|
use std::cmp;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
/// Generate a stable hash of `self` for each `hash_index`
|
||||||
|
/// Best effort can be made for uniqueness of each hash.
|
||||||
|
pub trait BloomHashIndex {
|
||||||
|
fn hash_at_index(&self, hash_index: u64) -> u64;
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)]
|
#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)]
|
||||||
pub struct Bloom<T: BloomHashIndex> {
|
pub struct Bloom<T: BloomHashIndex> {
|
||||||
pub keys: Vec<u64>,
|
pub keys: Vec<u64>,
|
||||||
@ -13,6 +19,14 @@ pub struct Bloom<T: BloomHashIndex> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T: BloomHashIndex> Bloom<T> {
|
impl<T: BloomHashIndex> Bloom<T> {
|
||||||
|
pub fn new(num_bits: usize, keys: Vec<u64>) -> Self {
|
||||||
|
let bits = BitVec::new_fill(false, num_bits as u64);
|
||||||
|
Bloom {
|
||||||
|
keys,
|
||||||
|
bits,
|
||||||
|
_phantom: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
/// create filter optimal for num size given the `false_rate`
|
/// create filter optimal for num size given the `false_rate`
|
||||||
/// the keys are randomized for picking data out of a collision resistant hash of size
|
/// the keys are randomized for picking data out of a collision resistant hash of size
|
||||||
/// `keysize` bytes
|
/// `keysize` bytes
|
||||||
@ -24,15 +38,13 @@ impl<T: BloomHashIndex> Bloom<T> {
|
|||||||
let num_bits = cmp::max(1, cmp::min(min_num_bits, max_bits));
|
let num_bits = cmp::max(1, cmp::min(min_num_bits, max_bits));
|
||||||
let num_keys = ((num_bits as f64 / num as f64) * 2f64.log(2f64)).round() as usize;
|
let num_keys = ((num_bits as f64 / num as f64) * 2f64.log(2f64)).round() as usize;
|
||||||
let keys: Vec<u64> = (0..num_keys).map(|_| rand::thread_rng().gen()).collect();
|
let keys: Vec<u64> = (0..num_keys).map(|_| rand::thread_rng().gen()).collect();
|
||||||
let bits = BitVec::new_fill(false, num_bits as u64);
|
Self::new(num_bits, keys)
|
||||||
Bloom {
|
|
||||||
keys,
|
|
||||||
bits,
|
|
||||||
_phantom: Default::default(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
fn pos(&self, key: &T, k: u64) -> u64 {
|
fn pos(&self, key: &T, k: u64) -> u64 {
|
||||||
key.hash(k) % self.bits.len()
|
key.hash_at_index(k) % self.bits.len()
|
||||||
|
}
|
||||||
|
pub fn clear(&mut self) {
|
||||||
|
self.bits.clear();
|
||||||
}
|
}
|
||||||
pub fn add(&mut self, key: &T) {
|
pub fn add(&mut self, key: &T) {
|
||||||
for k in &self.keys {
|
for k in &self.keys {
|
||||||
@ -51,10 +63,54 @@ impl<T: BloomHashIndex> Bloom<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn to_slice(v: u64) -> [u8; 8] {
|
||||||
|
[
|
||||||
|
v as u8,
|
||||||
|
(v >> 8) as u8,
|
||||||
|
(v >> 16) as u8,
|
||||||
|
(v >> 24) as u8,
|
||||||
|
(v >> 32) as u8,
|
||||||
|
(v >> 40) as u8,
|
||||||
|
(v >> 48) as u8,
|
||||||
|
(v >> 56) as u8,
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_slice(v: &[u8]) -> u64 {
|
||||||
|
u64::from(v[0])
|
||||||
|
| u64::from(v[1]) << 8
|
||||||
|
| u64::from(v[2]) << 16
|
||||||
|
| u64::from(v[3]) << 24
|
||||||
|
| u64::from(v[4]) << 32
|
||||||
|
| u64::from(v[5]) << 40
|
||||||
|
| u64::from(v[6]) << 48
|
||||||
|
| u64::from(v[7]) << 56
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slice_hash(slice: &[u8], hash_index: u64) -> u64 {
|
||||||
|
let hash = hashv(&[slice, &to_slice(hash_index)]);
|
||||||
|
from_slice(hash.as_ref())
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: AsRef<[u8]>> BloomHashIndex for T {
|
||||||
|
fn hash_at_index(&self, hash_index: u64) -> u64 {
|
||||||
|
slice_hash(self.as_ref(), hash_index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
use solana_sdk::hash::{hash, Hash};
|
use solana_sdk::hash::{hash, Hash};
|
||||||
|
#[test]
|
||||||
|
fn test_slice() {
|
||||||
|
assert_eq!(from_slice(&to_slice(10)), 10);
|
||||||
|
assert_eq!(from_slice(&to_slice(0x7fff7fff)), 0x7fff7fff);
|
||||||
|
assert_eq!(
|
||||||
|
from_slice(&to_slice(0x7fff7fff7fff7fff)),
|
||||||
|
0x7fff7fff7fff7fff
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_bloom_filter() {
|
fn test_bloom_filter() {
|
||||||
|
@ -1,31 +0,0 @@
|
|||||||
use solana_sdk::hash::Hash;
|
|
||||||
use solana_sdk::pubkey::Pubkey;
|
|
||||||
|
|
||||||
fn slice_hash(slice: &[u8], hash_index: u64) -> u64 {
|
|
||||||
let len = slice.len();
|
|
||||||
assert!(len < 256);
|
|
||||||
let mut rv = 0u64;
|
|
||||||
for i in 0..8 {
|
|
||||||
let pos = (hash_index >> i) & 0xff;
|
|
||||||
rv |= u64::from(slice[pos as usize % len]) << i;
|
|
||||||
}
|
|
||||||
rv
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generate a stable hash of `self` for each `hash_index`
|
|
||||||
/// Best effort can be made for uniqueness of each hash.
|
|
||||||
pub trait BloomHashIndex {
|
|
||||||
fn hash(&self, hash_index: u64) -> u64;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BloomHashIndex for Pubkey {
|
|
||||||
fn hash(&self, hash_index: u64) -> u64 {
|
|
||||||
slice_hash(self.as_ref(), hash_index)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BloomHashIndex for Hash {
|
|
||||||
fn hash(&self, hash_index: u64) -> u64 {
|
|
||||||
slice_hash(self.as_ref(), hash_index)
|
|
||||||
}
|
|
||||||
}
|
|
@ -14,7 +14,6 @@ pub mod bank;
|
|||||||
pub mod banking_stage;
|
pub mod banking_stage;
|
||||||
pub mod blob_fetch_stage;
|
pub mod blob_fetch_stage;
|
||||||
pub mod bloom;
|
pub mod bloom;
|
||||||
pub mod bloom_hash_index;
|
|
||||||
pub mod broadcast_service;
|
pub mod broadcast_service;
|
||||||
#[cfg(feature = "chacha")]
|
#[cfg(feature = "chacha")]
|
||||||
pub mod chacha;
|
pub mod chacha;
|
||||||
|
Reference in New Issue
Block a user