Reject blocks for costs above the max block cost (#18994)

* added realtime cost checking logic to reject block that would exceed max limit:
- defines max limits at block_cost_limits.rs
- right after each bath's execution, accumulate its cost and check again
  limit, return error if limit is exceeded

* update abi that changed due to adding additional TransactionError

* To avoid counting stats mltiple times, only accumulate execute-timing when a bank is completed

* gate it by a feature

* move cost const def into block_cost_limits.rs

* redefine the cost for signature and account access, removed signer part as it is not well defined for now

* check if per_program_timings of execute_timings before sending
This commit is contained in:
Tao Zhu
2021-08-12 10:48:47 -05:00
committed by GitHub
parent 9d8594a046
commit 414d904959
10 changed files with 159 additions and 47 deletions

View File

@ -0,0 +1,42 @@
//! defines block cost related limits
//!
// see https://github.com/solana-labs/solana/issues/18944
// and https://github.com/solana-labs/solana/pull/18994#issuecomment-896128992
//
pub const MAX_BLOCK_TIME_US: u64 = 400_000; // aiming at 400ms/block max time
pub const AVG_INSTRUCTION_TIME_US: u64 = 1_000; // average instruction execution time
pub const SYSTEM_PARALLELISM: u64 = 10;
pub const MAX_INSTRUCTION_COST: u64 = 200_000;
pub const MAX_NUMBER_BPF_INSTRUCTIONS_PER_ACCOUNT: u64 = 200;
pub const fn max_instructions_per_block() -> u64 {
(MAX_BLOCK_TIME_US / AVG_INSTRUCTION_TIME_US) * SYSTEM_PARALLELISM
}
pub const fn block_cost_max() -> u64 {
MAX_INSTRUCTION_COST * max_instructions_per_block()
}
pub const fn account_cost_max() -> u64 {
MAX_INSTRUCTION_COST * max_instructions_per_block()
}
pub const fn compute_unit_to_us_ratio() -> u64 {
block_cost_max() / MAX_BLOCK_TIME_US
}
pub const fn signature_cost() -> u64 {
// signature takes average 10us
compute_unit_to_us_ratio() * 10
}
pub const fn account_read_cost() -> u64 {
// read account averages 5us
compute_unit_to_us_ratio() * 5
}
pub const fn account_write_cost() -> u64 {
// write account averages 25us
compute_unit_to_us_ratio() * 25
}

View File

@ -1,6 +1,7 @@
use crate::{
block_error::BlockError, blockstore::Blockstore, blockstore_db::BlockstoreError,
blockstore_meta::SlotMeta, leader_schedule_cache::LeaderScheduleCache,
block_cost_limits::*, block_error::BlockError, blockstore::Blockstore,
blockstore_db::BlockstoreError, blockstore_meta::SlotMeta,
leader_schedule_cache::LeaderScheduleCache,
};
use chrono_humanize::{Accuracy, HumanTime, Tense};
use crossbeam_channel::Sender;
@ -31,6 +32,7 @@ use solana_runtime::{
};
use solana_sdk::{
clock::{Slot, MAX_PROCESSING_AGE},
feature_set,
genesis_config::GenesisConfig,
hash::Hash,
pubkey::Pubkey,
@ -48,11 +50,40 @@ use std::{
convert::TryFrom,
path::PathBuf,
result,
sync::Arc,
sync::{Arc, RwLock},
time::{Duration, Instant},
};
use thiserror::Error;
// it tracks the block cost available capacity - number of compute-units allowed
// by max blockl cost limit
#[derive(Debug)]
pub struct BlockCostCapacityMeter {
pub capacity: u64,
pub accumulated_cost: u64,
}
impl Default for BlockCostCapacityMeter {
fn default() -> Self {
BlockCostCapacityMeter::new(block_cost_max())
}
}
impl BlockCostCapacityMeter {
pub fn new(capacity_limit: u64) -> Self {
Self {
capacity: capacity_limit,
accumulated_cost: 0_u64,
}
}
// return the remaining capacity
pub fn accumulate(&mut self, cost: u64) -> u64 {
self.accumulated_cost += cost;
self.capacity.saturating_sub(self.accumulated_cost)
}
}
pub type BlockstoreProcessorResult =
result::Result<(BankForks, LeaderScheduleCache), BlockstoreProcessorError>;
@ -100,12 +131,25 @@ fn get_first_error(
first_err
}
fn aggregate_total_execution_units(execute_timings: &ExecuteTimings) -> u64 {
let mut execute_cost_units: u64 = 0;
for (program_id, timing) in &execute_timings.details.per_program_timings {
if timing.count < 1 {
continue;
}
execute_cost_units += timing.accumulated_units / timing.count as u64;
trace!("aggregated execution cost of {:?} {:?}", program_id, timing);
}
execute_cost_units
}
fn execute_batch(
batch: &TransactionBatch,
bank: &Arc<Bank>,
transaction_status_sender: Option<&TransactionStatusSender>,
replay_vote_sender: Option<&ReplayVoteSender>,
timings: &mut ExecuteTimings,
cost_capacity_meter: Arc<RwLock<BlockCostCapacityMeter>>,
) -> Result<()> {
let record_token_balances = transaction_status_sender.is_some();
@ -117,6 +161,8 @@ fn execute_batch(
vec![]
};
let pre_process_units: u64 = aggregate_total_execution_units(timings);
let (tx_results, balances, inner_instructions, transaction_logs) =
batch.bank().load_execute_and_commit_transactions(
batch,
@ -127,6 +173,29 @@ fn execute_batch(
timings,
);
if bank
.feature_set
.is_active(&feature_set::gate_large_block::id())
{
let execution_cost_units = aggregate_total_execution_units(timings) - pre_process_units;
let remaining_block_cost_cap = cost_capacity_meter
.write()
.unwrap()
.accumulate(execution_cost_units);
debug!(
"bank {} executed a batch, number of transactions {}, total execute cu {}, remaining block cost cap {}",
bank.slot(),
batch.sanitized_transactions().len(),
execution_cost_units,
remaining_block_cost_cap,
);
if remaining_block_cost_cap == 0_u64 {
return Err(TransactionError::WouldExceedMaxBlockCostLimit);
}
}
bank_utils::find_and_send_votes(
batch.sanitized_transactions(),
&tx_results,
@ -174,6 +243,7 @@ fn execute_batches(
transaction_status_sender: Option<&TransactionStatusSender>,
replay_vote_sender: Option<&ReplayVoteSender>,
timings: &mut ExecuteTimings,
cost_capacity_meter: Arc<RwLock<BlockCostCapacityMeter>>,
) -> Result<()> {
inc_new_counter_debug!("bank-par_execute_entries-count", batches.len());
let (results, new_timings): (Vec<Result<()>>, Vec<ExecuteTimings>) =
@ -189,6 +259,7 @@ fn execute_batches(
transaction_status_sender,
replay_vote_sender,
&mut timings,
cost_capacity_meter.clone(),
);
if let Some(entry_callback) = entry_callback {
entry_callback(bank);
@ -233,6 +304,7 @@ pub fn process_entries(
transaction_status_sender,
replay_vote_sender,
&mut timings,
Arc::new(RwLock::new(BlockCostCapacityMeter::default())),
);
debug!("process_entries: {:?}", timings);
@ -248,6 +320,7 @@ fn process_entries_with_callback(
transaction_status_sender: Option<&TransactionStatusSender>,
replay_vote_sender: Option<&ReplayVoteSender>,
timings: &mut ExecuteTimings,
cost_capacity_meter: Arc<RwLock<BlockCostCapacityMeter>>,
) -> Result<()> {
// accumulator for entries that can be processed in parallel
let mut batches = vec![];
@ -269,6 +342,7 @@ fn process_entries_with_callback(
transaction_status_sender,
replay_vote_sender,
timings,
cost_capacity_meter.clone(),
)?;
batches.clear();
for hash in &tick_hashes {
@ -320,6 +394,7 @@ fn process_entries_with_callback(
transaction_status_sender,
replay_vote_sender,
timings,
cost_capacity_meter.clone(),
)?;
batches.clear();
}
@ -334,6 +409,7 @@ fn process_entries_with_callback(
transaction_status_sender,
replay_vote_sender,
timings,
cost_capacity_meter,
)?;
for hash in tick_hashes {
bank.register_tick(hash);
@ -820,6 +896,7 @@ pub fn confirm_slot(
let mut replay_elapsed = Measure::start("replay_elapsed");
let mut execute_timings = ExecuteTimings::default();
let cost_capacity_meter = Arc::new(RwLock::new(BlockCostCapacityMeter::default()));
// Note: This will shuffle entries' transactions in-place.
let process_result = process_entries_with_callback(
bank,
@ -829,6 +906,7 @@ pub fn confirm_slot(
transaction_status_sender,
replay_vote_sender,
&mut execute_timings,
cost_capacity_meter,
)
.map_err(BlockstoreProcessorError::from);
replay_elapsed.stop();

View File

@ -10,6 +10,7 @@ pub mod block_error;
#[macro_use]
pub mod blockstore;
pub mod ancestor_iterator;
pub mod block_cost_limits;
pub mod blockstore_db;
pub mod blockstore_meta;
pub mod blockstore_processor;