Persist cost table to blockstore (#18123)

* Add `ProgramCosts` Column Family to blockstore, implement LedgerColumn; add `delete_cf` to Rocks
* Add ProgramCosts to compaction excluding list alone side with TransactionStatusIndex in one place: `excludes_from_compaction()`

* Write cost table to blockstore after `replay_stage` replayed active banks; add stats to measure persist time
* Deletes program from `ProgramCosts` in blockstore when they are removed from cost_table in memory
* Only try to persist to blockstore when cost_table is changed.
* Restore cost table during validator startup

* Offload `cost_model` related operations from replay main thread to dedicated service thread, add channel to send execute_timings between these threads;
* Move `cost_update_service` to its own module; replay_stage is now decoupled from cost_model.
This commit is contained in:
Tao Zhu
2021-07-01 11:32:41 -05:00
committed by GitHub
parent 05924423c2
commit 5e424826ba
9 changed files with 575 additions and 149 deletions

View File

@ -143,6 +143,7 @@ pub struct Blockstore {
blocktime_cf: LedgerColumn<cf::Blocktime>,
perf_samples_cf: LedgerColumn<cf::PerfSamples>,
block_height_cf: LedgerColumn<cf::BlockHeight>,
program_costs_cf: LedgerColumn<cf::ProgramCosts>,
last_root: Arc<RwLock<Slot>>,
insert_shreds_lock: Arc<Mutex<()>>,
pub new_shreds_signals: Vec<SyncSender<bool>>,
@ -342,6 +343,7 @@ impl Blockstore {
let blocktime_cf = db.column();
let perf_samples_cf = db.column();
let block_height_cf = db.column();
let program_costs_cf = db.column();
let db = Arc::new(db);
@ -390,6 +392,7 @@ impl Blockstore {
blocktime_cf,
perf_samples_cf,
block_height_cf,
program_costs_cf,
new_shreds_signals: vec![],
completed_slots_senders: vec![],
insert_shreds_lock: Arc::new(Mutex::new(())),
@ -2686,6 +2689,26 @@ impl Blockstore {
self.perf_samples_cf.put(index, perf_sample)
}
pub fn read_program_costs(&self) -> Result<Vec<(Pubkey, u64)>> {
Ok(self
.db
.iter::<cf::ProgramCosts>(IteratorMode::End)?
.map(|(pubkey, data)| {
let program_cost: ProgramCost = deserialize(&data).unwrap();
(pubkey, program_cost.cost)
})
.collect())
}
pub fn write_program_cost(&self, key: &Pubkey, value: &u64) -> Result<()> {
self.program_costs_cf
.put(*key, &ProgramCost { cost: *value })
}
pub fn delete_program_cost(&self, key: &Pubkey) -> Result<()> {
self.program_costs_cf.delete(*key)
}
/// Returns the entry vector for the slot starting with `shred_start_index`
pub fn get_slot_entries(&self, slot: Slot, shred_start_index: u64) -> Result<Vec<Entry>> {
self.get_slot_entries_with_shred_info(slot, shred_start_index, false)
@ -8850,4 +8873,126 @@ pub mod tests {
Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction");
}
#[test]
fn test_read_write_cost_table() {
let blockstore_path = get_tmp_ledger_path!();
{
let blockstore = Blockstore::open(&blockstore_path).unwrap();
let num_entries: usize = 10;
let mut cost_table: HashMap<Pubkey, u64> = HashMap::new();
for x in 1..num_entries + 1 {
cost_table.insert(Pubkey::new_unique(), (x + 100) as u64);
}
// write to db
for (key, cost) in cost_table.iter() {
blockstore
.write_program_cost(key, cost)
.expect("write a program");
}
// read back from db
let read_back = blockstore.read_program_costs().expect("read programs");
// verify
assert_eq!(read_back.len(), cost_table.len());
for (read_key, read_cost) in read_back {
assert_eq!(read_cost, *cost_table.get(&read_key).unwrap());
}
// update value, write to db
for val in cost_table.values_mut() {
*val += 100;
}
for (key, cost) in cost_table.iter() {
blockstore
.write_program_cost(key, cost)
.expect("write a program");
}
// add a new record
let new_program_key = Pubkey::new_unique();
let new_program_cost = 999;
blockstore
.write_program_cost(&new_program_key, &new_program_cost)
.unwrap();
// confirm value updated
let read_back = blockstore.read_program_costs().expect("read programs");
// verify
assert_eq!(read_back.len(), cost_table.len() + 1);
for (key, cost) in cost_table.iter() {
assert_eq!(*cost, read_back.iter().find(|(k, _v)| k == key).unwrap().1);
}
assert_eq!(
new_program_cost,
read_back
.iter()
.find(|(k, _v)| *k == new_program_key)
.unwrap()
.1
);
// test delete
blockstore
.delete_program_cost(&new_program_key)
.expect("delete a progrma");
let read_back = blockstore.read_program_costs().expect("read programs");
// verify
assert_eq!(read_back.len(), cost_table.len());
for (read_key, read_cost) in read_back {
assert_eq!(read_cost, *cost_table.get(&read_key).unwrap());
}
}
Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction");
}
#[test]
fn test_delete_old_records_from_cost_table() {
let blockstore_path = get_tmp_ledger_path!();
{
let blockstore = Blockstore::open(&blockstore_path).unwrap();
let num_entries: usize = 10;
let mut cost_table: HashMap<Pubkey, u64> = HashMap::new();
for x in 1..num_entries + 1 {
cost_table.insert(Pubkey::new_unique(), (x + 100) as u64);
}
// write to db
for (key, cost) in cost_table.iter() {
blockstore
.write_program_cost(key, cost)
.expect("write a program");
}
// remove a record
let mut removed_key = Pubkey::new_unique();
for (key, cost) in cost_table.iter() {
if *cost == 101_u64 {
removed_key = *key;
break;
}
}
cost_table.remove(&removed_key);
// delete records from blockstore if they are no longer in cost_table
let db_records = blockstore.read_program_costs().expect("read programs");
db_records.iter().for_each(|(pubkey, _)| {
if !cost_table.iter().any(|(key, _)| key == pubkey) {
assert_eq!(*pubkey, removed_key);
blockstore
.delete_program_cost(pubkey)
.expect("delete old program");
}
});
// read back from db
let read_back = blockstore.read_program_costs().expect("read programs");
// verify
assert_eq!(read_back.len(), cost_table.len());
for (read_key, read_cost) in read_back {
assert_eq!(read_cost, *cost_table.get(&read_key).unwrap());
}
}
Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction");
}
}

View File

@ -22,7 +22,7 @@ use solana_sdk::{
};
use solana_storage_proto::convert::generated;
use std::{
collections::HashMap,
collections::{HashMap, HashSet},
ffi::{CStr, CString},
fs,
marker::PhantomData,
@ -71,6 +71,8 @@ const BLOCKTIME_CF: &str = "blocktime";
const PERF_SAMPLES_CF: &str = "perf_samples";
/// Column family for BlockHeight
const BLOCK_HEIGHT_CF: &str = "block_height";
/// Column family for ProgramCosts
const PROGRAM_COSTS_CF: &str = "program_costs";
// 1 day is chosen for the same reasoning of DEFAULT_COMPACTION_SLOT_INTERVAL
const PERIODIC_COMPACTION_SECONDS: u64 = 60 * 60 * 24;
@ -174,6 +176,10 @@ pub mod columns {
#[derive(Debug)]
/// The block height column
pub struct BlockHeight;
#[derive(Debug)]
// The program costs column
pub struct ProgramCosts;
}
pub enum AccessType {
@ -258,8 +264,8 @@ impl Rocks {
) -> Result<Rocks> {
use columns::{
AddressSignatures, BlockHeight, Blocktime, DeadSlots, DuplicateSlots, ErasureMeta,
Index, Orphans, PerfSamples, Rewards, Root, ShredCode, ShredData, SlotMeta,
TransactionStatus, TransactionStatusIndex,
Index, Orphans, PerfSamples, ProgramCosts, Rewards, Root, ShredCode, ShredData,
SlotMeta, TransactionStatus, TransactionStatusIndex,
};
fs::create_dir_all(&path)?;
@ -340,6 +346,10 @@ impl Rocks {
BlockHeight::NAME,
get_cf_options::<BlockHeight>(&access_type, &oldest_slot),
);
let program_costs_cf_descriptor = ColumnFamilyDescriptor::new(
ProgramCosts::NAME,
get_cf_options::<ProgramCosts>(&access_type, &oldest_slot),
);
// Don't forget to add to both run_purge_with_stats() and
// compact_storage() in ledger/src/blockstore/blockstore_purge.rs!!
@ -363,6 +373,7 @@ impl Rocks {
(Blocktime::NAME, blocktime_cf_descriptor),
(PerfSamples::NAME, perf_samples_cf_descriptor),
(BlockHeight::NAME, block_height_cf_descriptor),
(ProgramCosts::NAME, program_costs_cf_descriptor),
];
let cf_names: Vec<_> = cfs.iter().map(|c| c.0).collect();
@ -403,9 +414,9 @@ impl Rocks {
// this is only needed for LedgerCleanupService. so guard with PrimaryOnly (i.e. running solana-validator)
if matches!(access_type, AccessType::PrimaryOnly) {
for cf_name in cf_names {
// this special column family must be excluded from LedgerCleanupService's rocksdb
// these special column families must be excluded from LedgerCleanupService's rocksdb
// compactions
if cf_name == TransactionStatusIndex::NAME {
if excludes_from_compaction(cf_name) {
continue;
}
@ -463,8 +474,8 @@ impl Rocks {
fn columns(&self) -> Vec<&'static str> {
use columns::{
AddressSignatures, BlockHeight, Blocktime, DeadSlots, DuplicateSlots, ErasureMeta,
Index, Orphans, PerfSamples, Rewards, Root, ShredCode, ShredData, SlotMeta,
TransactionStatus, TransactionStatusIndex,
Index, Orphans, PerfSamples, ProgramCosts, Rewards, Root, ShredCode, ShredData,
SlotMeta, TransactionStatus, TransactionStatusIndex,
};
vec![
@ -484,6 +495,7 @@ impl Rocks {
Blocktime::NAME,
PerfSamples::NAME,
BlockHeight::NAME,
ProgramCosts::NAME,
]
}
@ -509,6 +521,11 @@ impl Rocks {
Ok(())
}
fn delete_cf(&self, cf: &ColumnFamily, key: &[u8]) -> Result<()> {
self.0.delete_cf(cf, key)?;
Ok(())
}
fn iterator_cf<C>(&self, cf: &ColumnFamily, iterator_mode: IteratorMode<C::Index>) -> DBIterator
where
C: Column,
@ -750,6 +767,39 @@ impl TypedColumn for columns::BlockHeight {
type Type = u64;
}
impl ColumnName for columns::ProgramCosts {
const NAME: &'static str = PROGRAM_COSTS_CF;
}
impl TypedColumn for columns::ProgramCosts {
type Type = blockstore_meta::ProgramCost;
}
impl Column for columns::ProgramCosts {
type Index = Pubkey;
fn key(pubkey: Pubkey) -> Vec<u8> {
let mut key = vec![0; 32]; // size_of Pubkey
key[0..32].clone_from_slice(&pubkey.as_ref()[0..32]);
key
}
fn index(key: &[u8]) -> Self::Index {
Pubkey::new(&key[0..32])
}
fn primary_index(_index: Self::Index) -> u64 {
unimplemented!()
}
fn slot(_index: Self::Index) -> Slot {
unimplemented!()
}
#[allow(clippy::wrong_self_convention)]
fn as_index(_index: u64) -> Self::Index {
Pubkey::default()
}
}
impl Column for columns::ShredCode {
type Index = (u64, u64);
@ -1113,6 +1163,10 @@ where
self.backend
.put_cf(self.handle(), &C::key(key), &serialized_value)
}
pub fn delete(&self, key: C::Index) -> Result<()> {
self.backend.delete_cf(self.handle(), &C::key(key))
}
}
impl<C> LedgerColumn<C>
@ -1260,11 +1314,9 @@ fn get_cf_options<C: 'static + Column + ColumnName>(
options.set_max_bytes_for_level_base(total_size_base);
options.set_target_file_size_base(file_size_base);
// TransactionStatusIndex must be excluded from LedgerCleanupService's rocksdb
// TransactionStatusIndex and ProgramCosts must be excluded from LedgerCleanupService's rocksdb
// compactions....
if matches!(access_type, AccessType::PrimaryOnly)
&& C::NAME != columns::TransactionStatusIndex::NAME
{
if matches!(access_type, AccessType::PrimaryOnly) && !excludes_from_compaction(C::NAME) {
options.set_compaction_filter_factory(PurgedSlotFilterFactory::<C> {
oldest_slot: oldest_slot.clone(),
name: CString::new(format!("purged_slot_filter_factory({})", C::NAME)).unwrap(),
@ -1304,6 +1356,18 @@ fn get_db_options(access_type: &AccessType) -> Options {
options
}
fn excludes_from_compaction(cf_name: &str) -> bool {
// list of Column Families must be excluded from compaction:
let no_compaction_cfs: HashSet<&'static str> = vec![
columns::TransactionStatusIndex::NAME,
columns::ProgramCosts::NAME,
]
.into_iter()
.collect();
no_compaction_cfs.get(cf_name).is_some()
}
#[cfg(test)]
pub mod tests {
use super::*;
@ -1356,4 +1420,14 @@ pub mod tests {
CompactionDecision::Keep
);
}
#[test]
fn test_excludes_from_compaction() {
// currently there are two CFs are excluded from compaction:
assert!(excludes_from_compaction(
columns::TransactionStatusIndex::NAME
));
assert!(excludes_from_compaction(columns::ProgramCosts::NAME));
assert!(!excludes_from_compaction("something else"));
}
}

View File

@ -253,6 +253,11 @@ pub struct PerfSample {
pub sample_period_secs: u16,
}
#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)]
pub struct ProgramCost {
pub cost: u64,
}
#[cfg(test)]
mod test {
use super::*;