- estimate a program cost as 2 standard deviation above mean

- replaced get_average / get_mode with get_default to assign max units to unknown program
This commit is contained in:
Tao Zhu
2022-01-04 19:01:46 -06:00
committed by Tao Zhu
parent c899685cb2
commit a25ac1c988
3 changed files with 169 additions and 214 deletions

View File

@ -11,7 +11,6 @@ use {
instruction::CompiledInstruction, program_utils::limited_deserialize, pubkey::Pubkey,
system_instruction::SystemInstruction, system_program, transaction::SanitizedTransaction,
},
std::collections::HashMap,
};
const MAX_WRITABLE_ACCOUNTS: usize = 256;
@ -79,28 +78,9 @@ impl CostModel {
.map(|(key, cost)| (key, cost))
.chain(BUILT_IN_INSTRUCTION_COSTS.iter())
.for_each(|(program_id, cost)| {
match self
.instruction_execution_cost_table
.upsert(program_id, *cost)
{
Some(c) => {
debug!(
"initiating cost table, instruction {:?} has cost {}",
program_id, c
);
}
None => {
debug!(
"initiating cost table, failed for instruction {:?}",
program_id
);
}
}
self.instruction_execution_cost_table
.upsert(program_id, *cost);
});
debug!(
"restored cost model instruction cost table from blockstore, current values: {:?}",
self.get_instruction_cost_table()
);
}
pub fn calculate_cost(&self, transaction: &SanitizedTransaction) -> TransactionCost {
@ -124,22 +104,18 @@ impl CostModel {
self.instruction_execution_cost_table
.upsert(program_key, cost);
match self.instruction_execution_cost_table.get_cost(program_key) {
Some(cost) => Ok(*cost),
Some(cost) => Ok(cost),
None => Err("failed to upsert to ExecuteCostTable"),
}
}
pub fn get_instruction_cost_table(&self) -> &HashMap<Pubkey, u64> {
self.instruction_execution_cost_table.get_cost_table()
}
pub fn find_instruction_cost(&self, program_key: &Pubkey) -> u64 {
match self.instruction_execution_cost_table.get_cost(program_key) {
Some(cost) => *cost,
Some(cost) => cost,
None => {
let default_value = self.instruction_execution_cost_table.get_mode();
let default_value = self.instruction_execution_cost_table.get_default();
debug!(
"Program key {:?} does not have assigned cost, using mode {}",
"Program key {:?} does not have assigned cost, using default value {}",
program_key, default_value
);
default_value
@ -304,7 +280,7 @@ mod tests {
// unknown program is assigned with default cost
assert_eq!(
testee.instruction_execution_cost_table.get_mode(),
testee.instruction_execution_cost_table.get_default(),
testee.find_instruction_cost(
&Pubkey::from_str("unknown111111111111111111111111111111111111").unwrap()
)
@ -439,7 +415,7 @@ mod tests {
let result = testee.get_transaction_cost(&tx);
// expected cost for two random/unknown program is
let expected_cost = testee.instruction_execution_cost_table.get_mode() * 2;
let expected_cost = testee.instruction_execution_cost_table.get_default() * 2;
assert_eq!(expected_cost, result);
}
@ -483,7 +459,7 @@ mod tests {
let mut cost_model = CostModel::default();
// Using default cost for unknown instruction
assert_eq!(
cost_model.instruction_execution_cost_table.get_mode(),
cost_model.instruction_execution_cost_table.get_default(),
cost_model.find_instruction_cost(&key1)
);
@ -522,7 +498,8 @@ mod tests {
let key1 = Pubkey::new_unique();
let cost1 = 100;
let cost2 = 200;
let updated_cost = (cost1 + cost2) / 2;
// updated_cost = (mean + 2*std) = 150 + 2 * 50 = 250
let updated_cost = 250;
let mut cost_model = CostModel::default();

View File

@ -15,10 +15,17 @@ const OCCURRENCES_WEIGHT: i64 = 100;
const DEFAULT_CAPACITY: usize = 1024;
#[derive(AbiExample, Debug)]
#[derive(Debug, Default)]
struct AggregatedVarianceStats {
count: u64,
mean: f64,
squared_mean_distance: f64,
}
#[derive(Debug)]
pub struct ExecuteCostTable {
capacity: usize,
table: HashMap<Pubkey, u64>,
table: HashMap<Pubkey, AggregatedVarianceStats>,
occurrences: HashMap<Pubkey, (usize, u128)>,
}
@ -37,55 +44,50 @@ impl ExecuteCostTable {
}
}
pub fn get_cost_table(&self) -> &HashMap<Pubkey, u64> {
&self.table
}
// number of programs in table
pub fn get_count(&self) -> usize {
self.table.len()
}
// instead of assigning unknown program with a configured/hard-coded cost
// use average or mode function to make a educated guess.
pub fn get_average(&self) -> u64 {
if self.table.is_empty() {
0
} else {
self.table.iter().map(|(_, value)| value).sum::<u64>() / self.get_count() as u64
}
}
pub fn get_mode(&self) -> u64 {
if self.occurrences.is_empty() {
0
} else {
let key = self
.occurrences
.iter()
.max_by_key(|&(_, count)| count)
.map(|(key, _)| key)
.expect("cannot find mode from cost table");
*self.table.get(key).unwrap()
}
// default prorgam cost to max
pub fn get_default(&self) -> u64 {
// default max comoute units per program
200_000u64
}
// returns None if program doesn't exist in table. In this case,
// client is advised to call `get_average()` or `get_mode()` to
// assign a 'default' value for new program.
pub fn get_cost(&self, key: &Pubkey) -> Option<&u64> {
self.table.get(key)
// it is advised to call `get_default()` for default program costdefault/
// using Welford's Algorithm to calculate mean and std:
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
// Program cost is estimated as 2 standard deviations above mean, eg
// cost = (mean + 2 * std)
pub fn get_cost(&self, key: &Pubkey) -> Option<u64> {
let aggregated = self.table.get(key)?;
if aggregated.count < 1 {
None
} else {
let variance = aggregated.squared_mean_distance / aggregated.count as f64;
Some((aggregated.mean + 2.0 * variance.sqrt()).ceil() as u64)
}
}
pub fn upsert(&mut self, key: &Pubkey, value: u64) -> Option<u64> {
let need_to_add = self.table.get(key).is_none();
pub fn upsert(&mut self, key: &Pubkey, value: u64) {
let need_to_add = !self.table.contains_key(key);
let current_size = self.get_count();
if current_size == self.capacity && need_to_add {
self.prune_to(&((current_size as f64 * PRUNE_RATIO) as usize));
}
let program_cost = self.table.entry(*key).or_insert(value);
*program_cost = (*program_cost + value) / 2;
// Welford's algorithm
let aggregated = self
.table
.entry(*key)
.or_insert_with(AggregatedVarianceStats::default);
aggregated.count += 1;
let delta = value as f64 - aggregated.mean;
aggregated.mean += delta / aggregated.count as f64;
let delta_2 = value as f64 - aggregated.mean;
aggregated.squared_mean_distance += delta * delta_2;
let (count, timestamp) = self
.occurrences
@ -93,8 +95,6 @@ impl ExecuteCostTable {
.or_insert((0, Self::micros_since_epoch()));
*count += 1;
*timestamp = Self::micros_since_epoch();
Some(*program_cost)
}
// prune the old programs so the table contains `new_size` of records,
@ -219,25 +219,21 @@ mod tests {
// insert one record
testee.upsert(&key1, cost1);
assert_eq!(1, testee.get_count());
assert_eq!(cost1, testee.get_average());
assert_eq!(cost1, testee.get_mode());
assert_eq!(&cost1, testee.get_cost(&key1).unwrap());
assert_eq!(cost1, testee.get_cost(&key1).unwrap());
// insert 2nd record
testee.upsert(&key2, cost2);
assert_eq!(2, testee.get_count());
assert_eq!((cost1 + cost2) / 2_u64, testee.get_average());
assert_eq!(cost2, testee.get_mode());
assert_eq!(&cost1, testee.get_cost(&key1).unwrap());
assert_eq!(&cost2, testee.get_cost(&key2).unwrap());
assert_eq!(cost1, testee.get_cost(&key1).unwrap());
assert_eq!(cost2, testee.get_cost(&key2).unwrap());
// update 1st record
testee.upsert(&key1, cost2);
assert_eq!(2, testee.get_count());
assert_eq!(((cost1 + cost2) / 2 + cost2) / 2, testee.get_average());
assert_eq!((cost1 + cost2) / 2, testee.get_mode());
assert_eq!(&((cost1 + cost2) / 2), testee.get_cost(&key1).unwrap());
assert_eq!(&cost2, testee.get_cost(&key2).unwrap());
// expected key1 cost = (mean + 2*std) = (105 + 2*5) = 115
let expected_cost = 115;
assert_eq!(expected_cost, testee.get_cost(&key1).unwrap());
assert_eq!(cost2, testee.get_cost(&key2).unwrap());
}
#[test]
@ -258,33 +254,31 @@ mod tests {
// insert one record
testee.upsert(&key1, cost1);
assert_eq!(1, testee.get_count());
assert_eq!(&cost1, testee.get_cost(&key1).unwrap());
assert_eq!(cost1, testee.get_cost(&key1).unwrap());
// insert 2nd record
testee.upsert(&key2, cost2);
assert_eq!(2, testee.get_count());
assert_eq!(&cost1, testee.get_cost(&key1).unwrap());
assert_eq!(&cost2, testee.get_cost(&key2).unwrap());
assert_eq!(cost1, testee.get_cost(&key1).unwrap());
assert_eq!(cost2, testee.get_cost(&key2).unwrap());
// insert 3rd record, pushes out the oldest (eg 1st) record
testee.upsert(&key3, cost3);
assert_eq!(2, testee.get_count());
assert_eq!((cost2 + cost3) / 2_u64, testee.get_average());
assert_eq!(cost3, testee.get_mode());
assert!(testee.get_cost(&key1).is_none());
assert_eq!(&cost2, testee.get_cost(&key2).unwrap());
assert_eq!(&cost3, testee.get_cost(&key3).unwrap());
assert_eq!(cost2, testee.get_cost(&key2).unwrap());
assert_eq!(cost3, testee.get_cost(&key3).unwrap());
// update 2nd record, so the 3rd becomes the oldest
// add 4th record, pushes out 3rd key
testee.upsert(&key2, cost1);
testee.upsert(&key4, cost4);
assert_eq!(((cost1 + cost2) / 2 + cost4) / 2_u64, testee.get_average());
assert_eq!((cost1 + cost2) / 2, testee.get_mode());
assert_eq!(2, testee.get_count());
assert!(testee.get_cost(&key1).is_none());
assert_eq!(&((cost1 + cost2) / 2), testee.get_cost(&key2).unwrap());
// expected key2 cost = (mean + 2*std) = (105 + 2*5) = 115
let expected_cost_2 = 115;
assert_eq!(expected_cost_2, testee.get_cost(&key2).unwrap());
assert!(testee.get_cost(&key3).is_none());
assert_eq!(&cost4, testee.get_cost(&key4).unwrap());
assert_eq!(cost4, testee.get_cost(&key4).unwrap());
}
}