* Discard pre hard fork persisted tower if hard-forking
* Relax config.require_tower
* Add cluster test
* nits
* Remove unnecessary check
Co-authored-by: Ryo Onodera <ryoqun@gmail.com>
Co-authored-by: Carl Lin <carl@solana.com>
(cherry picked from commit 9821a7754c
)
Co-authored-by: carllin <wumu727@gmail.com>
This commit is contained in:
@ -1182,6 +1182,9 @@ pub enum TowerError {
|
|||||||
|
|
||||||
#[error("The tower is fatally inconsistent with blockstore: {0}")]
|
#[error("The tower is fatally inconsistent with blockstore: {0}")]
|
||||||
FatallyInconsistent(&'static str),
|
FatallyInconsistent(&'static str),
|
||||||
|
|
||||||
|
#[error("The tower is useless because of new hard fork: {0}")]
|
||||||
|
HardFork(Slot),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TowerError {
|
impl TowerError {
|
||||||
|
@ -720,11 +720,38 @@ fn post_process_restored_tower(
|
|||||||
ledger_path: &Path,
|
ledger_path: &Path,
|
||||||
bank_forks: &BankForks,
|
bank_forks: &BankForks,
|
||||||
) -> Tower {
|
) -> Tower {
|
||||||
|
let mut should_require_tower = config.require_tower;
|
||||||
|
|
||||||
restored_tower
|
restored_tower
|
||||||
.and_then(|tower| {
|
.and_then(|tower| {
|
||||||
let root_bank = bank_forks.root_bank();
|
let root_bank = bank_forks.root_bank();
|
||||||
let slot_history = root_bank.get_slot_history();
|
let slot_history = root_bank.get_slot_history();
|
||||||
tower.adjust_lockouts_after_replay(root_bank.slot(), &slot_history)
|
let tower = tower.adjust_lockouts_after_replay(root_bank.slot(), &slot_history);
|
||||||
|
|
||||||
|
if let Some(wait_slot_for_supermajority) = config.wait_for_supermajority {
|
||||||
|
if root_bank.slot() == wait_slot_for_supermajority {
|
||||||
|
// intentionally fail to restore tower; we're supposedly in a new hard fork; past
|
||||||
|
// out-of-chain vote state doesn't make sense at all
|
||||||
|
// what if --wait-for-supermajority again if the validator restarted?
|
||||||
|
let message = format!("Hardfork is detected; discarding tower restoration result: {:?}", tower);
|
||||||
|
datapoint_error!(
|
||||||
|
"tower_error",
|
||||||
|
(
|
||||||
|
"error",
|
||||||
|
message,
|
||||||
|
String
|
||||||
|
),
|
||||||
|
);
|
||||||
|
error!("{}", message);
|
||||||
|
|
||||||
|
// unconditionally relax tower requirement so that we can always restore tower
|
||||||
|
// from root bank.
|
||||||
|
should_require_tower = false;
|
||||||
|
return Err(crate::consensus::TowerError::HardFork(wait_slot_for_supermajority));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tower
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|err| {
|
.unwrap_or_else(|err| {
|
||||||
let voting_has_been_active =
|
let voting_has_been_active =
|
||||||
@ -739,7 +766,7 @@ fn post_process_restored_tower(
|
|||||||
),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if config.require_tower && voting_has_been_active {
|
if should_require_tower && voting_has_been_active {
|
||||||
error!("Requested mandatory tower restore failed: {}", err);
|
error!("Requested mandatory tower restore failed: {}", err);
|
||||||
error!(
|
error!(
|
||||||
"And there is an existing vote_account containing actual votes. \
|
"And there is an existing vote_account containing actual votes. \
|
||||||
|
@ -40,5 +40,15 @@ pub trait Cluster {
|
|||||||
fn get_contact_info(&self, pubkey: &Pubkey) -> Option<&ContactInfo>;
|
fn get_contact_info(&self, pubkey: &Pubkey) -> Option<&ContactInfo>;
|
||||||
fn exit_node(&mut self, pubkey: &Pubkey) -> ClusterValidatorInfo;
|
fn exit_node(&mut self, pubkey: &Pubkey) -> ClusterValidatorInfo;
|
||||||
fn restart_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo);
|
fn restart_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo);
|
||||||
|
fn create_restart_context(
|
||||||
|
&mut self,
|
||||||
|
pubkey: &Pubkey,
|
||||||
|
cluster_validator_info: &mut ClusterValidatorInfo,
|
||||||
|
) -> (solana_core::cluster_info::Node, Option<ContactInfo>);
|
||||||
|
fn restart_node_with_context(
|
||||||
|
cluster_validator_info: ClusterValidatorInfo,
|
||||||
|
restart_context: (solana_core::cluster_info::Node, Option<ContactInfo>),
|
||||||
|
) -> ClusterValidatorInfo;
|
||||||
|
fn add_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo);
|
||||||
fn exit_restart_node(&mut self, pubkey: &Pubkey, config: ValidatorConfig);
|
fn exit_restart_node(&mut self, pubkey: &Pubkey, config: ValidatorConfig);
|
||||||
}
|
}
|
||||||
|
@ -615,7 +615,11 @@ impl Cluster for LocalCluster {
|
|||||||
node
|
node
|
||||||
}
|
}
|
||||||
|
|
||||||
fn restart_node(&mut self, pubkey: &Pubkey, mut cluster_validator_info: ClusterValidatorInfo) {
|
fn create_restart_context(
|
||||||
|
&mut self,
|
||||||
|
pubkey: &Pubkey,
|
||||||
|
cluster_validator_info: &mut ClusterValidatorInfo,
|
||||||
|
) -> (solana_core::cluster_info::Node, Option<ContactInfo>) {
|
||||||
// Update the stored ContactInfo for this node
|
// Update the stored ContactInfo for this node
|
||||||
let node = Node::new_localhost_with_pubkey(&pubkey);
|
let node = Node::new_localhost_with_pubkey(&pubkey);
|
||||||
cluster_validator_info.info.contact_info = node.info.clone();
|
cluster_validator_info.info.contact_info = node.info.clone();
|
||||||
@ -627,10 +631,28 @@ impl Cluster for LocalCluster {
|
|||||||
self.entry_point_info = node.info.clone();
|
self.entry_point_info = node.info.clone();
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
Some(&self.entry_point_info)
|
Some(self.entry_point_info.clone())
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
(node, entry_point_info)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn restart_node(&mut self, pubkey: &Pubkey, mut cluster_validator_info: ClusterValidatorInfo) {
|
||||||
|
let restart_context = self.create_restart_context(pubkey, &mut cluster_validator_info);
|
||||||
|
let cluster_validator_info =
|
||||||
|
Self::restart_node_with_context(cluster_validator_info, restart_context);
|
||||||
|
self.add_node(pubkey, cluster_validator_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo) {
|
||||||
|
self.validators.insert(*pubkey, cluster_validator_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn restart_node_with_context(
|
||||||
|
mut cluster_validator_info: ClusterValidatorInfo,
|
||||||
|
(node, entry_point_info): (Node, Option<ContactInfo>),
|
||||||
|
) -> ClusterValidatorInfo {
|
||||||
// Restart the node
|
// Restart the node
|
||||||
let validator_info = &cluster_validator_info.info;
|
let validator_info = &cluster_validator_info.info;
|
||||||
cluster_validator_info.config.account_paths =
|
cluster_validator_info.config.account_paths =
|
||||||
@ -641,12 +663,11 @@ impl Cluster for LocalCluster {
|
|||||||
&validator_info.ledger_path,
|
&validator_info.ledger_path,
|
||||||
&validator_info.voting_keypair.pubkey(),
|
&validator_info.voting_keypair.pubkey(),
|
||||||
vec![validator_info.voting_keypair.clone()],
|
vec![validator_info.voting_keypair.clone()],
|
||||||
entry_point_info,
|
entry_point_info.as_ref(),
|
||||||
&cluster_validator_info.config,
|
&cluster_validator_info.config,
|
||||||
);
|
);
|
||||||
|
|
||||||
cluster_validator_info.validator = Some(restarted_node);
|
cluster_validator_info.validator = Some(restarted_node);
|
||||||
self.validators.insert(*pubkey, cluster_validator_info);
|
cluster_validator_info
|
||||||
}
|
}
|
||||||
|
|
||||||
fn exit_restart_node(&mut self, pubkey: &Pubkey, validator_config: ValidatorConfig) {
|
fn exit_restart_node(&mut self, pubkey: &Pubkey, validator_config: ValidatorConfig) {
|
||||||
|
@ -1983,6 +1983,122 @@ fn test_future_tower_master_slave() {
|
|||||||
do_test_future_tower(ClusterMode::MasterSlave);
|
do_test_future_tower(ClusterMode::MasterSlave);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_hard_fork_invalidates_tower() {
|
||||||
|
solana_logger::setup();
|
||||||
|
|
||||||
|
// First set up the cluster with 2 nodes
|
||||||
|
let slots_per_epoch = 2048;
|
||||||
|
let node_stakes = vec![60, 40];
|
||||||
|
|
||||||
|
let validator_keys = vec![
|
||||||
|
"28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
|
||||||
|
"2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
.map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
|
||||||
|
.take(node_stakes.len())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let validators = validator_keys
|
||||||
|
.iter()
|
||||||
|
.map(|(kp, _)| kp.pubkey())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let validator_a_pubkey = validators[0];
|
||||||
|
let validator_b_pubkey = validators[1];
|
||||||
|
|
||||||
|
let config = ClusterConfig {
|
||||||
|
cluster_lamports: 100_000,
|
||||||
|
node_stakes: node_stakes.clone(),
|
||||||
|
validator_configs: vec![ValidatorConfig::default(); node_stakes.len()],
|
||||||
|
validator_keys: Some(validator_keys),
|
||||||
|
slots_per_epoch,
|
||||||
|
stakers_slot_offset: slots_per_epoch,
|
||||||
|
skip_warmup_slots: true,
|
||||||
|
..ClusterConfig::default()
|
||||||
|
};
|
||||||
|
let cluster = std::sync::Arc::new(std::sync::Mutex::new(LocalCluster::new(&config)));
|
||||||
|
|
||||||
|
let val_a_ledger_path = cluster.lock().unwrap().ledger_path(&validator_a_pubkey);
|
||||||
|
|
||||||
|
let min_root = 15;
|
||||||
|
loop {
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
|
||||||
|
if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) {
|
||||||
|
if root >= min_root {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut validator_a_info = cluster.lock().unwrap().exit_node(&validator_a_pubkey);
|
||||||
|
let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey);
|
||||||
|
|
||||||
|
// setup hard fork at slot < a previously rooted slot!
|
||||||
|
let hard_fork_slot = min_root - 5;
|
||||||
|
let hard_fork_slots = Some(vec![hard_fork_slot]);
|
||||||
|
let mut hard_forks = solana_sdk::hard_forks::HardForks::default();
|
||||||
|
hard_forks.register(hard_fork_slot);
|
||||||
|
|
||||||
|
let expected_shred_version = solana_sdk::shred_version::compute_shred_version(
|
||||||
|
&cluster.lock().unwrap().genesis_config.hash(),
|
||||||
|
Some(&hard_forks),
|
||||||
|
);
|
||||||
|
|
||||||
|
validator_a_info.config.new_hard_forks = hard_fork_slots.clone();
|
||||||
|
validator_a_info.config.wait_for_supermajority = Some(hard_fork_slot);
|
||||||
|
validator_a_info.config.expected_shred_version = Some(expected_shred_version);
|
||||||
|
|
||||||
|
validator_b_info.config.new_hard_forks = hard_fork_slots;
|
||||||
|
validator_b_info.config.wait_for_supermajority = Some(hard_fork_slot);
|
||||||
|
validator_b_info.config.expected_shred_version = Some(expected_shred_version);
|
||||||
|
|
||||||
|
// restart validator A first
|
||||||
|
let cluster_for_a = cluster.clone();
|
||||||
|
// Spawn a thread because wait_for_supermajority blocks in Validator::new()!
|
||||||
|
let thread = std::thread::spawn(move || {
|
||||||
|
let restart_context = cluster_for_a
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.create_restart_context(&validator_a_pubkey, &mut validator_a_info);
|
||||||
|
let restarted_validator_info =
|
||||||
|
LocalCluster::restart_node_with_context(validator_a_info, restart_context);
|
||||||
|
cluster_for_a
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.add_node(&validator_a_pubkey, restarted_validator_info);
|
||||||
|
});
|
||||||
|
|
||||||
|
// test validator A actually to wait for supermajority
|
||||||
|
let mut last_vote = None;
|
||||||
|
for _ in 0..10 {
|
||||||
|
sleep(Duration::from_millis(1000));
|
||||||
|
|
||||||
|
let new_last_vote = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap();
|
||||||
|
if let Some(last_vote) = last_vote {
|
||||||
|
assert_eq!(last_vote, new_last_vote);
|
||||||
|
} else {
|
||||||
|
last_vote = Some(new_last_vote);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// restart validator B normally
|
||||||
|
cluster
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.restart_node(&validator_b_pubkey, validator_b_info);
|
||||||
|
|
||||||
|
// validator A should now start so join its thread here
|
||||||
|
thread.join().unwrap();
|
||||||
|
|
||||||
|
// new slots should be rooted after hard-fork cluster relaunch
|
||||||
|
cluster
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.check_for_new_roots(16, &"hard fork");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[serial]
|
#[serial]
|
||||||
fn test_no_optimistic_confirmation_violation_with_tower() {
|
fn test_no_optimistic_confirmation_violation_with_tower() {
|
||||||
|
Reference in New Issue
Block a user