- Encapsulate QoS Service metrics reporting within QosServioce, so client (#21191)
code (eg banking_stage) doesn't need to worry about it. - Remove dead cost_* stats from banking_stage, clean up call path.
This commit is contained in:
@@ -9,58 +9,69 @@ use {
|
||||
cost_model::{CostModel, TransactionCost},
|
||||
cost_tracker::CostTrackerError,
|
||||
},
|
||||
solana_sdk::transaction::{self, SanitizedTransaction, TransactionError},
|
||||
std::sync::{Arc, RwLock},
|
||||
solana_sdk::{
|
||||
timing::AtomicInterval,
|
||||
transaction::{self, SanitizedTransaction, TransactionError},
|
||||
},
|
||||
std::{
|
||||
sync::{
|
||||
atomic::{AtomicBool, AtomicU64, Ordering},
|
||||
Arc, RwLock,
|
||||
},
|
||||
thread::{self, Builder, JoinHandle},
|
||||
time::Duration,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct QosServiceStats {
|
||||
compute_cost_time: u64,
|
||||
cost_tracking_time: u64,
|
||||
selected_txs_count: u64,
|
||||
retried_txs_per_block_limit_count: u64,
|
||||
retried_txs_per_account_limit_count: u64,
|
||||
}
|
||||
|
||||
impl QosServiceStats {
|
||||
pub fn report(&mut self) {
|
||||
datapoint_info!(
|
||||
"qos-service-stats",
|
||||
("compute_cost_time", self.compute_cost_time, i64),
|
||||
("cost_tracking_time", self.cost_tracking_time, i64),
|
||||
("selected_txs_count", self.selected_txs_count, i64),
|
||||
(
|
||||
"retried_txs_per_block_limit_count",
|
||||
self.retried_txs_per_block_limit_count,
|
||||
i64
|
||||
),
|
||||
(
|
||||
"retried_txs_per_account_limit_count",
|
||||
self.retried_txs_per_account_limit_count,
|
||||
i64
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct QosService {
|
||||
cost_model: Arc<RwLock<CostModel>>,
|
||||
metrics: Arc<QosServiceMetrics>,
|
||||
reporting_thread: Option<JoinHandle<()>>,
|
||||
running_flag: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl Drop for QosService {
|
||||
fn drop(&mut self) {
|
||||
self.running_flag.store(false, Ordering::Relaxed);
|
||||
self.reporting_thread
|
||||
.take()
|
||||
.unwrap()
|
||||
.join()
|
||||
.expect("qos service metrics reporting thread failed to join");
|
||||
}
|
||||
}
|
||||
|
||||
impl QosService {
|
||||
pub fn new(cost_model: Arc<RwLock<CostModel>>) -> Self {
|
||||
Self { cost_model }
|
||||
let running_flag = Arc::new(AtomicBool::new(true));
|
||||
let metrics = Arc::new(QosServiceMetrics::default());
|
||||
|
||||
let running_flag_clone = running_flag.clone();
|
||||
let metrics_clone = metrics.clone();
|
||||
let reporting_thread = Some(
|
||||
Builder::new()
|
||||
.name("solana-qos-service-metrics-repoting".to_string())
|
||||
.spawn(move || {
|
||||
Self::reporting_loop(running_flag_clone, metrics_clone);
|
||||
})
|
||||
.unwrap(),
|
||||
);
|
||||
Self {
|
||||
cost_model,
|
||||
metrics,
|
||||
reporting_thread,
|
||||
running_flag,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_transaction_costs<'a>(
|
||||
&self,
|
||||
transactions: impl Iterator<Item = &'a SanitizedTransaction>,
|
||||
demote_program_write_locks: bool,
|
||||
stats: &mut QosServiceStats,
|
||||
) -> Vec<TransactionCost> {
|
||||
let mut compute_cost_time = Measure::start("compute_cost_time");
|
||||
let cost_model = self.cost_model.read().unwrap();
|
||||
let txs_costs = transactions
|
||||
let txs_costs: Vec<_> = transactions
|
||||
.map(|tx| {
|
||||
let cost = cost_model.calculate_cost(tx, demote_program_write_locks);
|
||||
debug!(
|
||||
@@ -73,7 +84,12 @@ impl QosService {
|
||||
})
|
||||
.collect();
|
||||
compute_cost_time.stop();
|
||||
stats.compute_cost_time += compute_cost_time.as_us();
|
||||
self.metrics
|
||||
.compute_cost_time
|
||||
.fetch_add(compute_cost_time.as_us(), Ordering::Relaxed);
|
||||
self.metrics
|
||||
.compute_cost_count
|
||||
.fetch_add(txs_costs.len() as u64, Ordering::Relaxed);
|
||||
txs_costs
|
||||
}
|
||||
|
||||
@@ -84,7 +100,6 @@ impl QosService {
|
||||
transactions: impl Iterator<Item = &'a SanitizedTransaction>,
|
||||
transactions_costs: impl Iterator<Item = &'a TransactionCost>,
|
||||
bank: &Arc<Bank>,
|
||||
stats: &mut QosServiceStats,
|
||||
) -> Vec<transaction::Result<()>> {
|
||||
let mut cost_tracking_time = Measure::start("cost_tracking_time");
|
||||
let mut cost_tracker = bank.write_cost_tracker().unwrap();
|
||||
@@ -93,18 +108,18 @@ impl QosService {
|
||||
.map(|(tx, cost)| match cost_tracker.try_add(tx, cost) {
|
||||
Ok(current_block_cost) => {
|
||||
debug!("slot {:?}, transaction {:?}, cost {:?}, fit into current block, current block cost {}", bank.slot(), tx, cost, current_block_cost);
|
||||
stats.selected_txs_count += 1;
|
||||
self.metrics.selected_txs_count.fetch_add(1, Ordering::Relaxed);
|
||||
Ok(())
|
||||
},
|
||||
Err(e) => {
|
||||
debug!("slot {:?}, transaction {:?}, cost {:?}, not fit into current block, '{:?}'", bank.slot(), tx, cost, e);
|
||||
match e {
|
||||
CostTrackerError::WouldExceedBlockMaxLimit => {
|
||||
stats.retried_txs_per_block_limit_count += 1;
|
||||
self.metrics.retried_txs_per_block_limit_count.fetch_add(1, Ordering::Relaxed);
|
||||
Err(TransactionError::WouldExceedMaxBlockCostLimit)
|
||||
}
|
||||
CostTrackerError::WouldExceedAccountMaxLimit => {
|
||||
stats.retried_txs_per_account_limit_count += 1;
|
||||
self.metrics.retried_txs_per_account_limit_count.fetch_add(1, Ordering::Relaxed);
|
||||
Err(TransactionError::WouldExceedMaxAccountCostLimit)
|
||||
}
|
||||
}
|
||||
@@ -112,9 +127,72 @@ impl QosService {
|
||||
})
|
||||
.collect();
|
||||
cost_tracking_time.stop();
|
||||
stats.cost_tracking_time += cost_tracking_time.as_us();
|
||||
self.metrics
|
||||
.cost_tracking_time
|
||||
.fetch_add(cost_tracking_time.as_us(), Ordering::Relaxed);
|
||||
select_results
|
||||
}
|
||||
|
||||
fn reporting_loop(running_flag: Arc<AtomicBool>, metrics: Arc<QosServiceMetrics>) {
|
||||
while running_flag.load(Ordering::Relaxed) {
|
||||
// hardcode to report every 1000ms
|
||||
metrics.report(1000u64);
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct QosServiceMetrics {
|
||||
last_report: AtomicInterval,
|
||||
compute_cost_time: AtomicU64,
|
||||
compute_cost_count: AtomicU64,
|
||||
cost_tracking_time: AtomicU64,
|
||||
selected_txs_count: AtomicU64,
|
||||
retried_txs_per_block_limit_count: AtomicU64,
|
||||
retried_txs_per_account_limit_count: AtomicU64,
|
||||
}
|
||||
|
||||
impl QosServiceMetrics {
|
||||
pub fn report(&self, report_interval_ms: u64) {
|
||||
if self.last_report.should_update(report_interval_ms) {
|
||||
datapoint_info!(
|
||||
"qos-service-stats",
|
||||
(
|
||||
"compute_cost_time",
|
||||
self.compute_cost_time.swap(0, Ordering::Relaxed) as i64,
|
||||
i64
|
||||
),
|
||||
(
|
||||
"compute_cost_count",
|
||||
self.compute_cost_count.swap(0, Ordering::Relaxed) as i64,
|
||||
i64
|
||||
),
|
||||
(
|
||||
"cost_tracking_time",
|
||||
self.cost_tracking_time.swap(0, Ordering::Relaxed) as i64,
|
||||
i64
|
||||
),
|
||||
(
|
||||
"selected_txs_count",
|
||||
self.selected_txs_count.swap(0, Ordering::Relaxed) as i64,
|
||||
i64
|
||||
),
|
||||
(
|
||||
"retried_txs_per_block_limit_count",
|
||||
self.retried_txs_per_block_limit_count
|
||||
.swap(0, Ordering::Relaxed) as i64,
|
||||
i64
|
||||
),
|
||||
(
|
||||
"retried_txs_per_account_limit_count",
|
||||
self.retried_txs_per_account_limit_count
|
||||
.swap(0, Ordering::Relaxed) as i64,
|
||||
i64
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -135,7 +213,7 @@ mod tests {
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_compute_transactions_costs() {
|
||||
fn test_compute_transaction_costs() {
|
||||
solana_logger::setup();
|
||||
|
||||
// make a vec of txs
|
||||
@@ -158,11 +236,7 @@ mod tests {
|
||||
|
||||
let cost_model = Arc::new(RwLock::new(CostModel::default()));
|
||||
let qos_service = QosService::new(cost_model.clone());
|
||||
let txs_costs = qos_service.compute_transaction_costs(
|
||||
txs.iter(),
|
||||
false,
|
||||
&mut QosServiceStats::default(),
|
||||
);
|
||||
let txs_costs = qos_service.compute_transaction_costs(txs.iter(), false);
|
||||
|
||||
// verify the size of txs_costs and its contents
|
||||
assert_eq!(txs_costs.len(), txs.len());
|
||||
@@ -214,23 +288,14 @@ mod tests {
|
||||
let txs = vec![transfer_tx.clone(), vote_tx.clone(), transfer_tx, vote_tx];
|
||||
|
||||
let qos_service = QosService::new(cost_model);
|
||||
let txs_costs = qos_service.compute_transaction_costs(
|
||||
txs.iter(),
|
||||
false,
|
||||
&mut QosServiceStats::default(),
|
||||
);
|
||||
let txs_costs = qos_service.compute_transaction_costs(txs.iter(), false);
|
||||
|
||||
// set cost tracker limit to fit 1 transfer tx, vote tx bypasses limit check
|
||||
let cost_limit = transfer_tx_cost;
|
||||
bank.write_cost_tracker()
|
||||
.unwrap()
|
||||
.set_limits(cost_limit, cost_limit);
|
||||
let results = qos_service.select_transactions_per_cost(
|
||||
txs.iter(),
|
||||
txs_costs.iter(),
|
||||
&bank,
|
||||
&mut QosServiceStats::default(),
|
||||
);
|
||||
let results = qos_service.select_transactions_per_cost(txs.iter(), txs_costs.iter(), &bank);
|
||||
|
||||
// verify that first transfer tx and all votes are allowed
|
||||
assert_eq!(results.len(), txs.len());
|
||||
@@ -239,4 +304,46 @@ mod tests {
|
||||
assert!(results[2].is_err());
|
||||
assert!(results[3].is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_async_report_metrics() {
|
||||
solana_logger::setup();
|
||||
|
||||
// make a vec of txs
|
||||
let txs_count = 2048usize;
|
||||
let keypair = Keypair::new();
|
||||
let transfer_tx = SanitizedTransaction::from_transaction_for_tests(
|
||||
system_transaction::transfer(&keypair, &keypair.pubkey(), 1, Hash::default()),
|
||||
);
|
||||
let mut txs_1 = Vec::with_capacity(txs_count);
|
||||
let mut txs_2 = Vec::with_capacity(txs_count);
|
||||
for _i in 0..txs_count {
|
||||
txs_1.push(transfer_tx.clone());
|
||||
txs_2.push(transfer_tx.clone());
|
||||
}
|
||||
|
||||
let cost_model = Arc::new(RwLock::new(CostModel::default()));
|
||||
let qos_service = Arc::new(QosService::new(cost_model));
|
||||
let qos_service_1 = qos_service.clone();
|
||||
let qos_service_2 = qos_service.clone();
|
||||
|
||||
let th_1 = thread::spawn(move || {
|
||||
qos_service_1.compute_transaction_costs(txs_1.iter(), false);
|
||||
});
|
||||
|
||||
let th_2 = thread::spawn(move || {
|
||||
qos_service_2.compute_transaction_costs(txs_2.iter(), false);
|
||||
});
|
||||
|
||||
th_1.join().expect("qos service 1 faield to join");
|
||||
th_2.join().expect("qos service 2 faield to join");
|
||||
|
||||
assert_eq!(
|
||||
txs_count as u64 * 2,
|
||||
qos_service
|
||||
.metrics
|
||||
.compute_cost_count
|
||||
.load(Ordering::Relaxed)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user