Reject blocks for costs above the max block cost (#18994)

* added realtime cost checking logic to reject block that would exceed max limit: - defines max limits at block_cost_limits.rs - right after each bath's execution, accumulate its cost and check again limit, return error if limit is exceeded * update abi that changed due to adding additional TransactionError * To avoid counting stats mltiple times, only accumulate execute-timing when a bank is completed * gate it by a feature * move cost const def into block_cost_limits.rs * redefine the cost for signature and account access, removed signer part as it is not well defined for now * check if per_program_timings of execute_timings before sending
2021-08-12 10:48:47 -05:00
parent 9d8594a046
commit 414d904959
10 changed files with 159 additions and 47 deletions
--- a/ledger/src/block_cost_limits.rs
+++ b/ledger/src/block_cost_limits.rs
@ -0,0 +1,42 @@
+//! defines block cost related limits
+//!
+
+// see https://github.com/solana-labs/solana/issues/18944
+// and https://github.com/solana-labs/solana/pull/18994#issuecomment-896128992
+//
+pub const MAX_BLOCK_TIME_US: u64 = 400_000; // aiming at 400ms/block max time
+pub const AVG_INSTRUCTION_TIME_US: u64 = 1_000; // average instruction execution time
+pub const SYSTEM_PARALLELISM: u64 = 10;
+pub const MAX_INSTRUCTION_COST: u64 = 200_000;
+pub const MAX_NUMBER_BPF_INSTRUCTIONS_PER_ACCOUNT: u64 = 200;
+
+pub const fn max_instructions_per_block() -> u64 {
+    (MAX_BLOCK_TIME_US / AVG_INSTRUCTION_TIME_US) * SYSTEM_PARALLELISM
+}
+
+pub const fn block_cost_max() -> u64 {
+    MAX_INSTRUCTION_COST * max_instructions_per_block()
+}
+
+pub const fn account_cost_max() -> u64 {
+    MAX_INSTRUCTION_COST * max_instructions_per_block()
+}
+
+pub const fn compute_unit_to_us_ratio() -> u64 {
+    block_cost_max() / MAX_BLOCK_TIME_US
+}
+
+pub const fn signature_cost() -> u64 {
+    // signature takes average 10us
+    compute_unit_to_us_ratio() * 10
+}
+
+pub const fn account_read_cost() -> u64 {
+    // read account averages 5us
+    compute_unit_to_us_ratio() * 5
+}
+
+pub const fn account_write_cost() -> u64 {
+    // write account averages 25us
+    compute_unit_to_us_ratio() * 25
+}
--- a/ledger/src/blockstore_processor.rs
+++ b/ledger/src/blockstore_processor.rs
@ -1,6 +1,7 @@
 use crate::{
-    block_error::BlockError, blockstore::Blockstore, blockstore_db::BlockstoreError,
-    blockstore_meta::SlotMeta, leader_schedule_cache::LeaderScheduleCache,
+    block_cost_limits::*, block_error::BlockError, blockstore::Blockstore,
+    blockstore_db::BlockstoreError, blockstore_meta::SlotMeta,
+    leader_schedule_cache::LeaderScheduleCache,
 };
 use chrono_humanize::{Accuracy, HumanTime, Tense};
 use crossbeam_channel::Sender;
@ -31,6 +32,7 @@ use solana_runtime::{
 };
 use solana_sdk::{
    clock::{Slot, MAX_PROCESSING_AGE},
+    feature_set,
    genesis_config::GenesisConfig,
    hash::Hash,
    pubkey::Pubkey,
@ -48,11 +50,40 @@ use std::{
    convert::TryFrom,
    path::PathBuf,
    result,
-    sync::Arc,
+    sync::{Arc, RwLock},
    time::{Duration, Instant},
 };
 use thiserror::Error;

+// it tracks the block cost available capacity - number of compute-units allowed
+// by max blockl cost limit
+#[derive(Debug)]
+pub struct BlockCostCapacityMeter {
+    pub capacity: u64,
+    pub accumulated_cost: u64,
+}
+
+impl Default for BlockCostCapacityMeter {
+    fn default() -> Self {
+        BlockCostCapacityMeter::new(block_cost_max())
+    }
+}
+
+impl BlockCostCapacityMeter {
+    pub fn new(capacity_limit: u64) -> Self {
+        Self {
+            capacity: capacity_limit,
+            accumulated_cost: 0_u64,
+        }
+    }
+
+    // return the remaining capacity
+    pub fn accumulate(&mut self, cost: u64) -> u64 {
+        self.accumulated_cost += cost;
+        self.capacity.saturating_sub(self.accumulated_cost)
+    }
+}
+
 pub type BlockstoreProcessorResult =
    result::Result<(BankForks, LeaderScheduleCache), BlockstoreProcessorError>;

@ -100,12 +131,25 @@ fn get_first_error(
    first_err
 }

+fn aggregate_total_execution_units(execute_timings: &ExecuteTimings) -> u64 {
+    let mut execute_cost_units: u64 = 0;
+    for (program_id, timing) in &execute_timings.details.per_program_timings {
+        if timing.count < 1 {
+            continue;
+        }
+        execute_cost_units += timing.accumulated_units / timing.count as u64;
+        trace!("aggregated execution cost of {:?} {:?}", program_id, timing);
+    }
+    execute_cost_units
+}
+
 fn execute_batch(
    batch: &TransactionBatch,
    bank: &Arc<Bank>,
    transaction_status_sender: Option<&TransactionStatusSender>,
    replay_vote_sender: Option<&ReplayVoteSender>,
    timings: &mut ExecuteTimings,
+    cost_capacity_meter: Arc<RwLock<BlockCostCapacityMeter>>,
 ) -> Result<()> {
    let record_token_balances = transaction_status_sender.is_some();

@ -117,6 +161,8 @@ fn execute_batch(
        vec![]
    };

+    let pre_process_units: u64 = aggregate_total_execution_units(timings);
+
    let (tx_results, balances, inner_instructions, transaction_logs) =
        batch.bank().load_execute_and_commit_transactions(
            batch,
@ -127,6 +173,29 @@ fn execute_batch(
            timings,
        );

+    if bank
+        .feature_set
+        .is_active(&feature_set::gate_large_block::id())
+    {
+        let execution_cost_units = aggregate_total_execution_units(timings) - pre_process_units;
+        let remaining_block_cost_cap = cost_capacity_meter
+            .write()
+            .unwrap()
+            .accumulate(execution_cost_units);
+
+        debug!(
+            "bank {} executed a batch, number of transactions {}, total execute cu {}, remaining block cost cap {}",
+            bank.slot(),
+            batch.sanitized_transactions().len(),
+            execution_cost_units,
+            remaining_block_cost_cap,
+        );
+
+        if remaining_block_cost_cap == 0_u64 {
+            return Err(TransactionError::WouldExceedMaxBlockCostLimit);
+        }
+    }
+
    bank_utils::find_and_send_votes(
        batch.sanitized_transactions(),
        &tx_results,
@ -174,6 +243,7 @@ fn execute_batches(
    transaction_status_sender: Option<&TransactionStatusSender>,
    replay_vote_sender: Option<&ReplayVoteSender>,
    timings: &mut ExecuteTimings,
+    cost_capacity_meter: Arc<RwLock<BlockCostCapacityMeter>>,
 ) -> Result<()> {
    inc_new_counter_debug!("bank-par_execute_entries-count", batches.len());
    let (results, new_timings): (Vec<Result<()>>, Vec<ExecuteTimings>) =
@ -189,6 +259,7 @@ fn execute_batches(
                            transaction_status_sender,
                            replay_vote_sender,
                            &mut timings,
+                            cost_capacity_meter.clone(),
                        );
                        if let Some(entry_callback) = entry_callback {
                            entry_callback(bank);
@ -233,6 +304,7 @@ pub fn process_entries(
        transaction_status_sender,
        replay_vote_sender,
        &mut timings,
+        Arc::new(RwLock::new(BlockCostCapacityMeter::default())),
    );

    debug!("process_entries: {:?}", timings);
@ -248,6 +320,7 @@ fn process_entries_with_callback(
    transaction_status_sender: Option<&TransactionStatusSender>,
    replay_vote_sender: Option<&ReplayVoteSender>,
    timings: &mut ExecuteTimings,
+    cost_capacity_meter: Arc<RwLock<BlockCostCapacityMeter>>,
 ) -> Result<()> {
    // accumulator for entries that can be processed in parallel
    let mut batches = vec![];
@ -269,6 +342,7 @@ fn process_entries_with_callback(
                        transaction_status_sender,
                        replay_vote_sender,
                        timings,
+                        cost_capacity_meter.clone(),
                    )?;
                    batches.clear();
                    for hash in &tick_hashes {
@ -320,6 +394,7 @@ fn process_entries_with_callback(
                            transaction_status_sender,
                            replay_vote_sender,
                            timings,
+                            cost_capacity_meter.clone(),
                        )?;
                        batches.clear();
                    }
@ -334,6 +409,7 @@ fn process_entries_with_callback(
        transaction_status_sender,
        replay_vote_sender,
        timings,
+        cost_capacity_meter,
    )?;
    for hash in tick_hashes {
        bank.register_tick(hash);
@ -820,6 +896,7 @@ pub fn confirm_slot(

    let mut replay_elapsed = Measure::start("replay_elapsed");
    let mut execute_timings = ExecuteTimings::default();
+    let cost_capacity_meter = Arc::new(RwLock::new(BlockCostCapacityMeter::default()));
    // Note: This will shuffle entries' transactions in-place.
    let process_result = process_entries_with_callback(
        bank,
@ -829,6 +906,7 @@ pub fn confirm_slot(
        transaction_status_sender,
        replay_vote_sender,
        &mut execute_timings,
+        cost_capacity_meter,
    )
    .map_err(BlockstoreProcessorError::from);
    replay_elapsed.stop();
--- a/ledger/src/lib.rs
+++ b/ledger/src/lib.rs
@ -10,6 +10,7 @@ pub mod block_error;
 #[macro_use]
 pub mod blockstore;
 pub mod ancestor_iterator;
+pub mod block_cost_limits;
 pub mod blockstore_db;
 pub mod blockstore_meta;
 pub mod blockstore_processor;