1. Persist to blockstore less frequently;

2. reduce alpha for EMA to 1 percent to have roughly 200 data points for estimatio
2022-02-04 18:57:02 -06:00
parent 6587dbfa47
commit 7aa1fb4e24
4 changed files with 232 additions and 176 deletions
--- a/runtime/src/cost_model.rs
+++ b/runtime/src/cost_model.rs
@ -96,17 +96,11 @@ impl CostModel {
        tx_cost
    }

-    pub fn upsert_instruction_cost(
-        &mut self,
-        program_key: &Pubkey,
-        cost: u64,
-    ) -> Result<u64, &'static str> {
+    // update-or-insert op is always successful. However the result of upsert, eg the aggregated
+    // value, requires additional calculation, which should only be envoked when needed.
+    pub fn upsert_instruction_cost(&mut self, program_key: &Pubkey, cost: u64) {
        self.instruction_execution_cost_table
            .upsert(program_key, cost);
-        match self.instruction_execution_cost_table.get_cost(program_key) {
-            Some(cost) => Ok(cost),
-            None => Err("failed to upsert to ExecuteCostTable"),
-        }
    }

    pub fn find_instruction_cost(&self, program_key: &Pubkey) -> u64 {
@ -115,7 +109,7 @@ impl CostModel {
            None => {
                let default_value = self.instruction_execution_cost_table.get_default();
                debug!(
-                    "Program key {:?} does not have assigned cost, using default value {}",
+                    "instruction {:?} does not have aggregated cost, using default {}",
                    program_key, default_value
                );
                default_value
@ -123,6 +117,10 @@ impl CostModel {
        }
    }

+    pub fn get_program_keys(&self) -> Vec<&Pubkey> {
+        self.instruction_execution_cost_table.get_program_keys()
+    }
+
    fn get_signature_cost(&self, transaction: &SanitizedTransaction) -> u64 {
        transaction.signatures().len() as u64 * SIGNATURE_COST
    }
@ -246,6 +244,7 @@ mod tests {
            transaction::Transaction,
        },
        std::{
+            collections::HashMap,
            str::FromStr,
            sync::{Arc, RwLock},
            thread::{self, JoinHandle},
@ -269,13 +268,11 @@ mod tests {
        let mut testee = CostModel::default();

        let known_key = Pubkey::from_str("known11111111111111111111111111111111111111").unwrap();
-        testee.upsert_instruction_cost(&known_key, 100).unwrap();
+        testee.upsert_instruction_cost(&known_key, 100);
        // find cost for known programs
        assert_eq!(100, testee.find_instruction_cost(&known_key));

-        testee
-            .upsert_instruction_cost(&bpf_loader::id(), 1999)
-            .unwrap();
+        testee.upsert_instruction_cost(&bpf_loader::id(), 1999);
        assert_eq!(1999, testee.find_instruction_cost(&bpf_loader::id()));

        // unknown program is assigned with default cost
@ -287,6 +284,35 @@ mod tests {
        );
    }

+    #[test]
+    fn test_iterating_instruction_cost_by_program_keys() {
+        solana_logger::setup();
+        let mut testee = CostModel::default();
+
+        let mut test_key_and_cost = HashMap::<Pubkey, u64>::new();
+        (0u64..10u64).for_each(|n| {
+            test_key_and_cost.insert(Pubkey::new_unique(), n);
+        });
+
+        test_key_and_cost.iter().for_each(|(key, cost)| {
+            testee.upsert_instruction_cost(key, *cost);
+            info!("key {:?} cost {}", key, cost);
+        });
+
+        let keys = testee.get_program_keys();
+        // verify each key has pre-set value
+        keys.iter().for_each(|key| {
+            let expected_cost = test_key_and_cost.get(key).unwrap();
+            info!(
+                "check key {:?} expect {} find {}",
+                key,
+                expected_cost,
+                testee.find_instruction_cost(key)
+            );
+            assert_eq!(*expected_cost, testee.find_instruction_cost(key));
+        });
+    }
+
    #[test]
    fn test_cost_model_data_len_cost() {
        let lamports = 0;
@ -351,9 +377,7 @@ mod tests {
        let expected_cost = 8;

        let mut testee = CostModel::default();
-        testee
-            .upsert_instruction_cost(&system_program::id(), expected_cost)
-            .unwrap();
+        testee.upsert_instruction_cost(&system_program::id(), expected_cost);
        assert_eq!(
            expected_cost,
            testee.get_transaction_cost(&simple_transaction)
@ -381,9 +405,7 @@ mod tests {
        let expected_cost = program_cost * 2;

        let mut testee = CostModel::default();
-        testee
-            .upsert_instruction_cost(&system_program::id(), program_cost)
-            .unwrap();
+        testee.upsert_instruction_cost(&system_program::id(), program_cost);
        assert_eq!(expected_cost, testee.get_transaction_cost(&tx));
    }

@ -464,7 +486,7 @@ mod tests {
        );

        // insert instruction cost to table
-        assert!(cost_model.upsert_instruction_cost(&key1, cost1).is_ok());
+        cost_model.upsert_instruction_cost(&key1, cost1);

        // now it is known insturction with known cost
        assert_eq!(cost1, cost_model.find_instruction_cost(&key1));
@ -484,9 +506,7 @@ mod tests {
        let expected_execution_cost = 8;

        let mut cost_model = CostModel::default();
-        cost_model
-            .upsert_instruction_cost(&system_program::id(), expected_execution_cost)
-            .unwrap();
+        cost_model.upsert_instruction_cost(&system_program::id(), expected_execution_cost);
        let tx_cost = cost_model.calculate_cost(&tx);
        assert_eq!(expected_account_cost, tx_cost.write_lock_cost);
        assert_eq!(expected_execution_cost, tx_cost.execution_cost);
@ -498,17 +518,17 @@ mod tests {
        let key1 = Pubkey::new_unique();
        let cost1 = 100;
        let cost2 = 200;
-        // updated_cost = (mean + 2*std)
-        let updated_cost = 238;
+        // updated_cost = (mean + 2*std) of [100, 200] => 120.899
+        let updated_cost = 121;

        let mut cost_model = CostModel::default();

        // insert instruction cost to table
-        assert!(cost_model.upsert_instruction_cost(&key1, cost1).is_ok());
+        cost_model.upsert_instruction_cost(&key1, cost1);
        assert_eq!(cost1, cost_model.find_instruction_cost(&key1));

        // update instruction cost
-        assert!(cost_model.upsert_instruction_cost(&key1, cost2).is_ok());
+        cost_model.upsert_instruction_cost(&key1, cost2);
        assert_eq!(updated_cost, cost_model.find_instruction_cost(&key1));
    }

@ -550,8 +570,8 @@ mod tests {
                if i == 5 {
                    thread::spawn(move || {
                        let mut cost_model = cost_model.write().unwrap();
-                        assert!(cost_model.upsert_instruction_cost(&prog1, cost1).is_ok());
-                        assert!(cost_model.upsert_instruction_cost(&prog2, cost2).is_ok());
+                        cost_model.upsert_instruction_cost(&prog1, cost1);
+                        cost_model.upsert_instruction_cost(&prog2, cost2);
                    })
                } else {
                    thread::spawn(move || {
--- a/runtime/src/execute_cost_table.rs
+++ b/runtime/src/execute_cost_table.rs
@ -4,7 +4,10 @@
 /// When its capacity limit is reached, it prunes old and less-used programs
 /// to make room for new ones.
 use log::*;
-use {solana_sdk::pubkey::Pubkey, std::collections::HashMap};
+use {
+    solana_sdk::pubkey::Pubkey,
+    std::collections::{hash_map::Entry, HashMap},
+};

 // prune is rather expensive op, free up bulk space in each operation
 // would be more efficient. PRUNE_RATIO defines the after prune table
@ -18,7 +21,8 @@ const DEFAULT_CAPACITY: usize = 1024;
 // The coefficient represents the degree of weighting decrease in EMA,
 // a constant smoothing factor between 0 and 1. A higher alpha
 // discounts older observations faster.
-const COEFFICIENT: f64 = 0.4;
+// Setting it using `2/(N+1)` where N is 200 samples
+const COEFFICIENT: f64 = 0.01;

 #[derive(Debug, Default)]
 struct AggregatedVarianceStats {
@ -53,19 +57,27 @@ impl ExecuteCostTable {
        self.table.len()
    }

-    // default prorgam cost to max
+    // default program cost to max
    pub fn get_default(&self) -> u64 {
-        // default max comoute units per program
+        // default max compute units per program
        200_000u64
    }

    // returns None if program doesn't exist in table. In this case,
-    // it is advised to call `get_default()` for default program costdefault/
+    // it is advised to call `get_default()` for default program cost.
    // Program cost is estimated as 2 standard deviations above mean, eg
    // cost = (mean + 2 * std)
    pub fn get_cost(&self, key: &Pubkey) -> Option<u64> {
        let aggregated = self.table.get(key)?;
-        Some((aggregated.ema + 2.0 * aggregated.ema_var.sqrt()).ceil() as u64)
+        let cost_f64 = (aggregated.ema + 2.0 * aggregated.ema_var.sqrt()).ceil();
+
+        // check if cost:f64 can be losslessly convert to u64, otherwise return None
+        let cost_u64 = cost_f64 as u64;
+        if cost_f64 == cost_u64 as f64 {
+            Some(cost_u64)
+        } else {
+            None
+        }
    }

    pub fn upsert(&mut self, key: &Pubkey, value: u64) {
@ -77,21 +89,21 @@ impl ExecuteCostTable {

        // exponential moving average algorithm
        // https://en.wikipedia.org/wiki/Moving_average#Exponentially_weighted_moving_variance_and_standard_deviation
-        if self.table.contains_key(key) {
-            let aggregated = self.table.get_mut(key).unwrap();
-            let theta = value as f64 - aggregated.ema;
-            aggregated.ema += theta * COEFFICIENT;
-            aggregated.ema_var =
-                (1.0 - COEFFICIENT) * (aggregated.ema_var + COEFFICIENT * theta * theta)
-        } else {
-            // the starting values
-            self.table.insert(
-                *key,
-                AggregatedVarianceStats {
+        match self.table.entry(*key) {
+            Entry::Occupied(mut entry) => {
+                let aggregated = entry.get_mut();
+                let theta = value as f64 - aggregated.ema;
+                aggregated.ema += theta * COEFFICIENT;
+                aggregated.ema_var =
+                    (1.0 - COEFFICIENT) * (aggregated.ema_var + COEFFICIENT * theta * theta);
+            }
+            Entry::Vacant(entry) => {
+                // the starting values
+                entry.insert(AggregatedVarianceStats {
                    ema: value as f64,
                    ema_var: 0.0,
-                },
-            );
+                });
+            }
        }

        let (count, timestamp) = self
@ -102,6 +114,10 @@ impl ExecuteCostTable {
        *timestamp = Self::micros_since_epoch();
    }

+    pub fn get_program_keys(&self) -> Vec<&Pubkey> {
+        self.table.keys().collect()
+    }
+
    // prune the old programs so the table contains `new_size` of records,
    // where `old` is defined as weighted age, which is negatively correlated
    // with program's age and
@ -189,9 +205,9 @@ mod tests {
        let key2 = Pubkey::new_unique();
        let key3 = Pubkey::new_unique();

-        // simulate a lot of occurences to key1, so even there're longer than
+        // simulate a lot of occurrences to key1, so even there're longer than
        // usual delay between upsert(key1..) and upsert(key2, ..), test
-        // would still satisfy as key1 has enough occurences to compensate
+        // would still satisfy as key1 has enough occurrences to compensate
        // its age.
        for i in 0..1000 {
            testee.upsert(&key1, i);
@ -235,8 +251,8 @@ mod tests {
        // update 1st record
        testee.upsert(&key1, cost2);
        assert_eq!(2, testee.get_count());
-        // expected key1 cost = (mean + 2*std) = (105 + 2*5) = 115
-        let expected_cost = 114;
+        // expected key1 cost is EMA of [100, 110] with alpha=0.01 => 103
+        let expected_cost = 103;
        assert_eq!(expected_cost, testee.get_cost(&key1).unwrap());
        assert_eq!(cost2, testee.get_cost(&key2).unwrap());
    }
@ -280,10 +296,29 @@ mod tests {
        testee.upsert(&key4, cost4);
        assert_eq!(2, testee.get_count());
        assert!(testee.get_cost(&key1).is_none());
-        // expected key2 cost = (mean + 2*std) = (105 + 2*5) = 115
-        let expected_cost_2 = 116;
+        // expected key2 cost = (mean + 2*std) of [110, 100] => 112
+        let expected_cost_2 = 112;
        assert_eq!(expected_cost_2, testee.get_cost(&key2).unwrap());
        assert!(testee.get_cost(&key3).is_none());
        assert_eq!(cost4, testee.get_cost(&key4).unwrap());
    }
+
+    #[test]
+    fn test_get_cost_overflow_u64() {
+        solana_logger::setup();
+        let mut testee = ExecuteCostTable::default();
+
+        let key1 = Pubkey::new_unique();
+        let cost1: u64 = f64::MAX as u64;
+        let cost2: u64 = u64::MAX / 2; // create large variance so the final result will overflow
+
+        // insert one record
+        testee.upsert(&key1, cost1);
+        assert_eq!(1, testee.get_count());
+        assert_eq!(cost1, testee.get_cost(&key1).unwrap());
+
+        // update cost
+        testee.upsert(&key1, cost2);
+        assert!(testee.get_cost(&key1).is_none());
+    }
 }