- estimate a program cost as 2 standard deviation above mean

- replaced get_average / get_mode with get_default to assign max units to unknown program
2022-01-04 19:01:46 -06:00
parent c899685cb2
commit a25ac1c988
3 changed files with 169 additions and 214 deletions
--- a/runtime/src/cost_model.rs
+++ b/runtime/src/cost_model.rs
@@ -11,7 +11,6 @@ use {
        instruction::CompiledInstruction, program_utils::limited_deserialize, pubkey::Pubkey,
        system_instruction::SystemInstruction, system_program, transaction::SanitizedTransaction,
    },
-    std::collections::HashMap,
 };

 const MAX_WRITABLE_ACCOUNTS: usize = 256;
@@ -79,28 +78,9 @@ impl CostModel {
            .map(|(key, cost)| (key, cost))
            .chain(BUILT_IN_INSTRUCTION_COSTS.iter())
            .for_each(|(program_id, cost)| {
-                match self
-                    .instruction_execution_cost_table
-                    .upsert(program_id, *cost)
-                {
-                    Some(c) => {
-                        debug!(
-                            "initiating cost table, instruction {:?} has cost {}",
-                            program_id, c
-                        );
-                    }
-                    None => {
-                        debug!(
-                            "initiating cost table, failed for instruction {:?}",
-                            program_id
-                        );
-                    }
-                }
+                self.instruction_execution_cost_table
+                    .upsert(program_id, *cost);
            });
-        debug!(
-            "restored cost model instruction cost table from blockstore, current values: {:?}",
-            self.get_instruction_cost_table()
-        );
    }

    pub fn calculate_cost(&self, transaction: &SanitizedTransaction) -> TransactionCost {
@@ -124,22 +104,18 @@ impl CostModel {
        self.instruction_execution_cost_table
            .upsert(program_key, cost);
        match self.instruction_execution_cost_table.get_cost(program_key) {
-            Some(cost) => Ok(*cost),
+            Some(cost) => Ok(cost),
            None => Err("failed to upsert to ExecuteCostTable"),
        }
    }

-    pub fn get_instruction_cost_table(&self) -> &HashMap<Pubkey, u64> {
-        self.instruction_execution_cost_table.get_cost_table()
-    }
-
    pub fn find_instruction_cost(&self, program_key: &Pubkey) -> u64 {
        match self.instruction_execution_cost_table.get_cost(program_key) {
-            Some(cost) => *cost,
+            Some(cost) => cost,
            None => {
-                let default_value = self.instruction_execution_cost_table.get_mode();
+                let default_value = self.instruction_execution_cost_table.get_default();
                debug!(
-                    "Program key {:?} does not have assigned cost, using mode {}",
+                    "Program key {:?} does not have assigned cost, using default value {}",
                    program_key, default_value
                );
                default_value
@@ -304,7 +280,7 @@ mod tests {

        // unknown program is assigned with default cost
        assert_eq!(
-            testee.instruction_execution_cost_table.get_mode(),
+            testee.instruction_execution_cost_table.get_default(),
            testee.find_instruction_cost(
                &Pubkey::from_str("unknown111111111111111111111111111111111111").unwrap()
            )
@@ -439,7 +415,7 @@ mod tests {
        let result = testee.get_transaction_cost(&tx);

        // expected cost for two random/unknown program is
-        let expected_cost = testee.instruction_execution_cost_table.get_mode() * 2;
+        let expected_cost = testee.instruction_execution_cost_table.get_default() * 2;
        assert_eq!(expected_cost, result);
    }

@@ -483,7 +459,7 @@ mod tests {
        let mut cost_model = CostModel::default();
        // Using default cost for unknown instruction
        assert_eq!(
-            cost_model.instruction_execution_cost_table.get_mode(),
+            cost_model.instruction_execution_cost_table.get_default(),
            cost_model.find_instruction_cost(&key1)
        );

@@ -522,7 +498,8 @@ mod tests {
        let key1 = Pubkey::new_unique();
        let cost1 = 100;
        let cost2 = 200;
-        let updated_cost = (cost1 + cost2) / 2;
+        // updated_cost = (mean + 2*std) = 150 + 2 * 50 = 250
+        let updated_cost = 250;

        let mut cost_model = CostModel::default();

--- a/runtime/src/execute_cost_table.rs
+++ b/runtime/src/execute_cost_table.rs
@@ -15,10 +15,17 @@ const OCCURRENCES_WEIGHT: i64 = 100;

 const DEFAULT_CAPACITY: usize = 1024;

-#[derive(AbiExample, Debug)]
+#[derive(Debug, Default)]
+struct AggregatedVarianceStats {
+    count: u64,
+    mean: f64,
+    squared_mean_distance: f64,
+}
+
+#[derive(Debug)]
 pub struct ExecuteCostTable {
    capacity: usize,
-    table: HashMap<Pubkey, u64>,
+    table: HashMap<Pubkey, AggregatedVarianceStats>,
    occurrences: HashMap<Pubkey, (usize, u128)>,
 }

@@ -37,55 +44,50 @@ impl ExecuteCostTable {
        }
    }

-    pub fn get_cost_table(&self) -> &HashMap<Pubkey, u64> {
-        &self.table
-    }
-
+    // number of programs in table
    pub fn get_count(&self) -> usize {
        self.table.len()
    }

-    // instead of assigning unknown program with a configured/hard-coded cost
-    // use average or mode function to make a educated guess.
-    pub fn get_average(&self) -> u64 {
-        if self.table.is_empty() {
-            0
-        } else {
-            self.table.iter().map(|(_, value)| value).sum::<u64>() / self.get_count() as u64
-        }
-    }
-
-    pub fn get_mode(&self) -> u64 {
-        if self.occurrences.is_empty() {
-            0
-        } else {
-            let key = self
-                .occurrences
-                .iter()
-                .max_by_key(|&(_, count)| count)
-                .map(|(key, _)| key)
-                .expect("cannot find mode from cost table");
-
-            *self.table.get(key).unwrap()
-        }
+    // default prorgam cost to max
+    pub fn get_default(&self) -> u64 {
+        // default max comoute units per program
+        200_000u64
    }

    // returns None if program doesn't exist in table. In this case,
-    // client is advised to call `get_average()` or `get_mode()` to
-    // assign a 'default' value for new program.
-    pub fn get_cost(&self, key: &Pubkey) -> Option<&u64> {
-        self.table.get(key)
+    // it is advised to call `get_default()` for default program costdefault/
+    // using Welford's Algorithm to calculate mean and std:
+    // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
+    // Program cost is estimated as 2 standard deviations above mean, eg
+    // cost = (mean + 2 * std)
+    pub fn get_cost(&self, key: &Pubkey) -> Option<u64> {
+        let aggregated = self.table.get(key)?;
+        if aggregated.count < 1 {
+            None
+        } else {
+            let variance = aggregated.squared_mean_distance / aggregated.count as f64;
+            Some((aggregated.mean + 2.0 * variance.sqrt()).ceil() as u64)
+        }
    }

-    pub fn upsert(&mut self, key: &Pubkey, value: u64) -> Option<u64> {
-        let need_to_add = self.table.get(key).is_none();
+    pub fn upsert(&mut self, key: &Pubkey, value: u64) {
+        let need_to_add = !self.table.contains_key(key);
        let current_size = self.get_count();
        if current_size == self.capacity && need_to_add {
            self.prune_to(&((current_size as f64 * PRUNE_RATIO) as usize));
        }

-        let program_cost = self.table.entry(*key).or_insert(value);
-        *program_cost = (*program_cost + value) / 2;
+        // Welford's algorithm
+        let aggregated = self
+            .table
+            .entry(*key)
+            .or_insert_with(AggregatedVarianceStats::default);
+        aggregated.count += 1;
+        let delta = value as f64 - aggregated.mean;
+        aggregated.mean += delta / aggregated.count as f64;
+        let delta_2 = value as f64 - aggregated.mean;
+        aggregated.squared_mean_distance += delta * delta_2;

        let (count, timestamp) = self
            .occurrences
@@ -93,8 +95,6 @@ impl ExecuteCostTable {
            .or_insert((0, Self::micros_since_epoch()));
        *count += 1;
        *timestamp = Self::micros_since_epoch();
-
-        Some(*program_cost)
    }

    // prune the old programs so the table contains `new_size` of records,
@@ -219,25 +219,21 @@ mod tests {
        // insert one record
        testee.upsert(&key1, cost1);
        assert_eq!(1, testee.get_count());
-        assert_eq!(cost1, testee.get_average());
-        assert_eq!(cost1, testee.get_mode());
-        assert_eq!(&cost1, testee.get_cost(&key1).unwrap());
+        assert_eq!(cost1, testee.get_cost(&key1).unwrap());

        // insert 2nd record
        testee.upsert(&key2, cost2);
        assert_eq!(2, testee.get_count());
-        assert_eq!((cost1 + cost2) / 2_u64, testee.get_average());
-        assert_eq!(cost2, testee.get_mode());
-        assert_eq!(&cost1, testee.get_cost(&key1).unwrap());
-        assert_eq!(&cost2, testee.get_cost(&key2).unwrap());
+        assert_eq!(cost1, testee.get_cost(&key1).unwrap());
+        assert_eq!(cost2, testee.get_cost(&key2).unwrap());

        // update 1st record
        testee.upsert(&key1, cost2);
        assert_eq!(2, testee.get_count());
-        assert_eq!(((cost1 + cost2) / 2 + cost2) / 2, testee.get_average());
-        assert_eq!((cost1 + cost2) / 2, testee.get_mode());
-        assert_eq!(&((cost1 + cost2) / 2), testee.get_cost(&key1).unwrap());
-        assert_eq!(&cost2, testee.get_cost(&key2).unwrap());
+        // expected key1 cost = (mean + 2*std) = (105 + 2*5) = 115
+        let expected_cost = 115;
+        assert_eq!(expected_cost, testee.get_cost(&key1).unwrap());
+        assert_eq!(cost2, testee.get_cost(&key2).unwrap());
    }

    #[test]
@@ -258,33 +254,31 @@ mod tests {
        // insert one record
        testee.upsert(&key1, cost1);
        assert_eq!(1, testee.get_count());
-        assert_eq!(&cost1, testee.get_cost(&key1).unwrap());
+        assert_eq!(cost1, testee.get_cost(&key1).unwrap());

        // insert 2nd record
        testee.upsert(&key2, cost2);
        assert_eq!(2, testee.get_count());
-        assert_eq!(&cost1, testee.get_cost(&key1).unwrap());
-        assert_eq!(&cost2, testee.get_cost(&key2).unwrap());
+        assert_eq!(cost1, testee.get_cost(&key1).unwrap());
+        assert_eq!(cost2, testee.get_cost(&key2).unwrap());

        // insert 3rd record, pushes out the oldest (eg 1st) record
        testee.upsert(&key3, cost3);
        assert_eq!(2, testee.get_count());
-        assert_eq!((cost2 + cost3) / 2_u64, testee.get_average());
-        assert_eq!(cost3, testee.get_mode());
        assert!(testee.get_cost(&key1).is_none());
-        assert_eq!(&cost2, testee.get_cost(&key2).unwrap());
-        assert_eq!(&cost3, testee.get_cost(&key3).unwrap());
+        assert_eq!(cost2, testee.get_cost(&key2).unwrap());
+        assert_eq!(cost3, testee.get_cost(&key3).unwrap());

        // update 2nd record, so the 3rd becomes the oldest
        // add 4th record, pushes out 3rd key
        testee.upsert(&key2, cost1);
        testee.upsert(&key4, cost4);
-        assert_eq!(((cost1 + cost2) / 2 + cost4) / 2_u64, testee.get_average());
-        assert_eq!((cost1 + cost2) / 2, testee.get_mode());
        assert_eq!(2, testee.get_count());
        assert!(testee.get_cost(&key1).is_none());
-        assert_eq!(&((cost1 + cost2) / 2), testee.get_cost(&key2).unwrap());
+        // expected key2 cost = (mean + 2*std) = (105 + 2*5) = 115
+        let expected_cost_2 = 115;
+        assert_eq!(expected_cost_2, testee.get_cost(&key2).unwrap());
        assert!(testee.get_cost(&key3).is_none());
-        assert_eq!(&cost4, testee.get_cost(&key4).unwrap());
+        assert_eq!(cost4, testee.get_cost(&key4).unwrap());
    }
 }