Add snapshotting integration test (#5519)

* Add snapshotting integration test

* Update ContactInfo on restart in local cluster nodes
This commit is contained in:
carllin 2019-08-21 23:59:11 -07:00 committed by GitHub
parent c18ea3ccc9
commit 087c43b9ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 214 additions and 28 deletions

2
Cargo.lock generated
View File

@ -3535,6 +3535,7 @@ name = "solana-local-cluster"
version = "0.18.0-pre2" version = "0.18.0-pre2"
dependencies = [ dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
"serial_test 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "serial_test 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serial_test_derive 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "serial_test_derive 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"solana-client 0.18.0-pre2", "solana-client 0.18.0-pre2",
@ -3547,6 +3548,7 @@ dependencies = [
"solana-storage-program 0.18.0-pre2", "solana-storage-program 0.18.0-pre2",
"solana-vote-api 0.18.0-pre2", "solana-vote-api 0.18.0-pre2",
"symlink 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "symlink 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]] [[package]]

View File

@ -238,6 +238,7 @@ impl BankForks {
.snapshot_config .snapshot_config
.as_ref() .as_ref()
.expect("Called package_snapshot without a snapshot configuration"); .expect("Called package_snapshot without a snapshot configuration");
info!("setting snapshot root: {}", root);
if root - self.slots_since_snapshot[0] >= config.snapshot_interval_slots as u64 { if root - self.slots_since_snapshot[0] >= config.snapshot_interval_slots as u64 {
let mut snapshot_time = Measure::start("total-snapshot-ms"); let mut snapshot_time = Measure::start("total-snapshot-ms");
let r = self.generate_snapshot( let r = self.generate_snapshot(

View File

@ -1,8 +1,9 @@
use crate::validator::ValidatorConfig;
use solana_client::thin_client::ThinClient; use solana_client::thin_client::ThinClient;
use solana_sdk::pubkey::Pubkey; use solana_sdk::pubkey::Pubkey;
pub trait Cluster { pub trait Cluster {
fn get_node_pubkeys(&self) -> Vec<Pubkey>; fn get_node_pubkeys(&self) -> Vec<Pubkey>;
fn get_validator_client(&self, pubkey: &Pubkey) -> Option<ThinClient>; fn get_validator_client(&self, pubkey: &Pubkey) -> Option<ThinClient>;
fn restart_node(&mut self, pubkey: Pubkey); fn restart_node(&mut self, pubkey: Pubkey, config: &ValidatorConfig);
} }

View File

@ -14,6 +14,7 @@ use std::fs::File;
use std::io::{BufReader, BufWriter, Error as IOError, ErrorKind}; use std::io::{BufReader, BufWriter, Error as IOError, ErrorKind};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use tar::Archive; use tar::Archive;
use tempfile::TempDir;
const SNAPSHOT_STATUS_CACHE_FILE_NAME: &str = "status_cache"; const SNAPSHOT_STATUS_CACHE_FILE_NAME: &str = "status_cache";
@ -57,8 +58,6 @@ pub fn package_snapshot<P: AsRef<Path>, Q: AsRef<Path>>(
snapshot_package_output_file: P, snapshot_package_output_file: P,
snapshot_path: Q, snapshot_path: Q,
) -> Result<SnapshotPackage> { ) -> Result<SnapshotPackage> {
let slot = bank.slot();
// Hard link all the snapshots we need for this package // Hard link all the snapshots we need for this package
let snapshot_hard_links_dir = tempfile::tempdir_in(snapshot_path)?; let snapshot_hard_links_dir = tempfile::tempdir_in(snapshot_path)?;
@ -73,7 +72,7 @@ pub fn package_snapshot<P: AsRef<Path>, Q: AsRef<Path>>(
// Create a snapshot package // Create a snapshot package
info!( info!(
"Snapshot for bank: {} has {} account storage entries", "Snapshot for bank: {} has {} account storage entries",
slot, bank.slot(),
account_storage_entries.len() account_storage_entries.len()
); );
@ -172,6 +171,20 @@ pub fn remove_snapshot<P: AsRef<Path>>(slot: u64, snapshot_path: P) -> Result<()
Ok(()) Ok(())
} }
pub fn bank_slot_from_archive<P: AsRef<Path>>(snapshot_tar: P) -> Result<u64> {
let tempdir = TempDir::new()?;
untar_snapshot_in(&snapshot_tar, &tempdir)?;
let unpacked_snapshots_dir = tempdir.path().join(TAR_SNAPSHOTS_DIR);
let snapshot_paths = get_snapshot_paths(&unpacked_snapshots_dir);
let last_root_paths = snapshot_paths
.last()
.ok_or_else(|| get_io_error("No snapshots found in snapshots directory"))?;
let file = File::open(&last_root_paths.snapshot_file_path)?;
let mut stream = BufReader::new(file);
let bank: Bank = deserialize_from(&mut stream).map_err(|e| get_io_error(&e.to_string()))?;
Ok(bank.slot())
}
pub fn bank_from_archive<P: AsRef<Path>>( pub fn bank_from_archive<P: AsRef<Path>>(
account_paths: String, account_paths: String,
snapshot_config: &SnapshotConfig, snapshot_config: &SnapshotConfig,

View File

@ -10,6 +10,7 @@ homepage = "https://solana.com/"
[dependencies] [dependencies]
log = "0.4.8" log = "0.4.8"
rand = "0.6.5"
solana-core = { path = "../core", version = "0.18.0-pre2" } solana-core = { path = "../core", version = "0.18.0-pre2" }
solana-client = { path = "../client", version = "0.18.0-pre2" } solana-client = { path = "../client", version = "0.18.0-pre2" }
solana-logger = { path = "../logger", version = "0.18.0-pre2" } solana-logger = { path = "../logger", version = "0.18.0-pre2" }
@ -20,6 +21,7 @@ solana-storage-api = { path = "../programs/storage_api", version = "0.18.0-pre2"
solana-storage-program = { path = "../programs/storage_program", version = "0.18.0-pre2" } solana-storage-program = { path = "../programs/storage_program", version = "0.18.0-pre2" }
solana-vote-api = { path = "../programs/vote_api", version = "0.18.0-pre2" } solana-vote-api = { path = "../programs/vote_api", version = "0.18.0-pre2" }
symlink = "0.1.0" symlink = "0.1.0"
tempfile = "3.1.0"
[dev-dependencies] [dev-dependencies]
serial_test = "0.2.0" serial_test = "0.2.0"

View File

@ -1,3 +1,4 @@
use rand::{thread_rng, Rng};
use solana_client::thin_client::create_client; use solana_client::thin_client::create_client;
/// Cluster independant integration tests /// Cluster independant integration tests
/// ///
@ -25,7 +26,12 @@ use solana_sdk::{
}, },
transport::TransportError, transport::TransportError,
}; };
use std::{collections::HashSet, path::Path, thread::sleep, time::Duration}; use std::{
collections::{HashMap, HashSet},
path::Path,
thread::sleep,
time::Duration,
};
const DEFAULT_SLOT_MILLIS: u64 = (DEFAULT_TICKS_PER_SLOT * 1000) / DEFAULT_TICKS_PER_SECOND; const DEFAULT_SLOT_MILLIS: u64 = (DEFAULT_TICKS_PER_SLOT * 1000) / DEFAULT_TICKS_PER_SECOND;
@ -65,8 +71,25 @@ pub fn spend_and_verify_all_nodes<S: ::std::hash::BuildHasher>(
} }
} }
pub fn send_many_transactions(node: &ContactInfo, funding_keypair: &Keypair, num_txs: u64) { pub fn verify_balances<S: ::std::hash::BuildHasher>(
expected_balances: HashMap<Pubkey, u64, S>,
node: &ContactInfo,
) {
let client = create_client(node.client_facing_addr(), FULLNODE_PORT_RANGE); let client = create_client(node.client_facing_addr(), FULLNODE_PORT_RANGE);
for (pk, b) in expected_balances {
let bal = client.poll_get_balance(&pk).expect("balance in source");
assert_eq!(bal, b);
}
}
pub fn send_many_transactions(
node: &ContactInfo,
funding_keypair: &Keypair,
max_tokens_per_transfer: u64,
num_txs: u64,
) -> HashMap<Pubkey, u64> {
let client = create_client(node.client_facing_addr(), FULLNODE_PORT_RANGE);
let mut expected_balances = HashMap::new();
for _ in 0..num_txs { for _ in 0..num_txs {
let random_keypair = Keypair::new(); let random_keypair = Keypair::new();
let bal = client let bal = client
@ -74,12 +97,23 @@ pub fn send_many_transactions(node: &ContactInfo, funding_keypair: &Keypair, num
.expect("balance in source"); .expect("balance in source");
assert!(bal > 0); assert!(bal > 0);
let (blockhash, _fee_calculator) = client.get_recent_blockhash().unwrap(); let (blockhash, _fee_calculator) = client.get_recent_blockhash().unwrap();
let mut transaction = let transfer_amount = thread_rng().gen_range(1, max_tokens_per_transfer);
system_transaction::transfer(&funding_keypair, &random_keypair.pubkey(), 1, blockhash);
let mut transaction = system_transaction::transfer(
&funding_keypair,
&random_keypair.pubkey(),
transfer_amount,
blockhash,
);
client client
.retry_transfer(&funding_keypair, &mut transaction, 5) .retry_transfer(&funding_keypair, &mut transaction, 5)
.unwrap(); .unwrap();
expected_balances.insert(random_keypair.pubkey(), transfer_amount);
} }
expected_balances
} }
pub fn fullnode_exit(entry_point_info: &ContactInfo, nodes: usize) { pub fn fullnode_exit(entry_point_info: &ContactInfo, nodes: usize) {

View File

@ -9,3 +9,5 @@ extern crate solana_core;
#[macro_use] #[macro_use]
extern crate solana_storage_program; extern crate solana_storage_program;
extern crate tempfile;

View File

@ -585,19 +585,28 @@ impl Cluster for LocalCluster {
}) })
} }
fn restart_node(&mut self, pubkey: Pubkey) { fn restart_node(&mut self, pubkey: Pubkey, config: &ValidatorConfig) {
// Shut down the fullnode // Shut down the fullnode
let mut node = self.fullnodes.remove(&pubkey).unwrap(); let mut node = self.fullnodes.remove(&pubkey).unwrap();
node.exit(); node.exit();
node.join().unwrap(); node.join().unwrap();
// Restart the node // Update the stored ContactInfo for this node
let fullnode_info = &self.fullnode_infos[&pubkey].info; let node_pubkey = &self.fullnode_infos[&pubkey].info.keypair.pubkey();
let config = &self.fullnode_infos[&pubkey].config; let node = Node::new_localhost_with_pubkey(&node_pubkey);
let node = Node::new_localhost_with_pubkey(&fullnode_info.keypair.pubkey()); self.fullnode_infos
.get_mut(&pubkey)
.unwrap()
.info
.contact_info = node.info.clone();
if pubkey == self.entry_point_info.id { if pubkey == self.entry_point_info.id {
self.entry_point_info = node.info.clone(); self.entry_point_info = node.info.clone();
} }
// Restart the node
self.fullnode_infos.get_mut(&pubkey).unwrap().config = config.clone();
let fullnode_info = &self.fullnode_infos[&pubkey].info;
let restarted_node = Validator::new( let restarted_node = Validator::new(
node, node,
&fullnode_info.keypair, &fullnode_info.keypair,

View File

@ -3,16 +3,25 @@ extern crate solana_core;
use log::*; use log::*;
use serial_test_derive::serial; use serial_test_derive::serial;
use solana_core::{ use solana_core::{
blocktree::Blocktree, broadcast_stage::BroadcastStageType, cluster::Cluster, bank_forks::SnapshotConfig, blocktree::Blocktree, broadcast_stage::BroadcastStageType,
gossip_service::discover_cluster, validator::ValidatorConfig, cluster::Cluster, gossip_service::discover_cluster, snapshot_utils, validator::ValidatorConfig,
}; };
use solana_local_cluster::{ use solana_local_cluster::{
cluster_tests, cluster_tests,
local_cluster::{ClusterConfig, LocalCluster}, local_cluster::{ClusterConfig, LocalCluster},
}; };
use solana_runtime::epoch_schedule::{EpochSchedule, MINIMUM_SLOTS_PER_EPOCH}; use solana_runtime::{
accounts_db::AccountsDB,
epoch_schedule::{EpochSchedule, MINIMUM_SLOTS_PER_EPOCH},
};
use solana_sdk::{client::SyncClient, poh_config::PohConfig, timing}; use solana_sdk::{client::SyncClient, poh_config::PohConfig, timing};
use std::{collections::HashSet, thread::sleep, time::Duration}; use std::{
collections::{HashMap, HashSet},
fs,
thread::sleep,
time::Duration,
};
use tempfile::TempDir;
#[test] #[test]
#[serial] #[serial]
@ -232,7 +241,7 @@ fn test_forwarding() {
.unwrap(); .unwrap();
// Confirm that transactions were forwarded to and processed by the leader. // Confirm that transactions were forwarded to and processed by the leader.
cluster_tests::send_many_transactions(&validator_info, &cluster.funding_keypair, 20); cluster_tests::send_many_transactions(&validator_info, &cluster.funding_keypair, 10, 20);
} }
#[test] #[test]
@ -242,10 +251,11 @@ fn test_restart_node() {
error!("test_restart_node"); error!("test_restart_node");
let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH as u64; let slots_per_epoch = MINIMUM_SLOTS_PER_EPOCH as u64;
let ticks_per_slot = 16; let ticks_per_slot = 16;
let validator_config = ValidatorConfig::default();
let mut cluster = LocalCluster::new(&ClusterConfig { let mut cluster = LocalCluster::new(&ClusterConfig {
node_stakes: vec![3], node_stakes: vec![3],
cluster_lamports: 100, cluster_lamports: 100,
validator_configs: vec![ValidatorConfig::default()], validator_configs: vec![validator_config.clone()],
ticks_per_slot, ticks_per_slot,
slots_per_epoch, slots_per_epoch,
..ClusterConfig::default() ..ClusterConfig::default()
@ -257,14 +267,19 @@ fn test_restart_node() {
timing::DEFAULT_TICKS_PER_SLOT, timing::DEFAULT_TICKS_PER_SLOT,
slots_per_epoch, slots_per_epoch,
); );
cluster.restart_node(nodes[0]); cluster.restart_node(nodes[0], &validator_config);
cluster_tests::sleep_n_epochs( cluster_tests::sleep_n_epochs(
0.5, 0.5,
&cluster.genesis_block.poh_config, &cluster.genesis_block.poh_config,
timing::DEFAULT_TICKS_PER_SLOT, timing::DEFAULT_TICKS_PER_SLOT,
slots_per_epoch, slots_per_epoch,
); );
cluster_tests::send_many_transactions(&cluster.entry_point_info, &cluster.funding_keypair, 1); cluster_tests::send_many_transactions(
&cluster.entry_point_info,
&cluster.funding_keypair,
10,
1,
);
} }
#[test] #[test]
@ -282,6 +297,100 @@ fn test_listener_startup() {
assert_eq!(cluster_nodes.len(), 4); assert_eq!(cluster_nodes.len(), 4);
} }
#[test]
#[serial]
fn test_snapshots_restart_validity() {
let temp_dir = TempDir::new().unwrap();
let snapshot_path = temp_dir.path().join("bank_states");
let snapshot_package_output_path = temp_dir.path().join("tar");
let snapshot_interval_slots = 25;
// Create the snapshot directories
fs::create_dir_all(&snapshot_path).expect("Failed to create snapshots bank state directory");
fs::create_dir_all(&snapshot_package_output_path)
.expect("Failed to create snapshots tar directory");
// Set up the cluster with 1 snapshotting validator
let mut snapshot_validator_config = ValidatorConfig::default();
snapshot_validator_config.rpc_config.enable_fullnode_exit = true;
snapshot_validator_config.snapshot_config = Some(SnapshotConfig::new(
snapshot_path,
snapshot_package_output_path.clone(),
snapshot_interval_slots,
));
let num_account_paths = 4;
let (account_storage_dirs, account_storage_paths) = generate_account_paths(num_account_paths);
let mut all_account_storage_dirs = vec![account_storage_dirs];
snapshot_validator_config.account_paths = Some(account_storage_paths);
let config = ClusterConfig {
node_stakes: vec![10000],
cluster_lamports: 100000,
validator_configs: vec![snapshot_validator_config.clone()],
..ClusterConfig::default()
};
// Create and reboot the node from snapshot `num_runs` times
let num_runs = 3;
let mut expected_balances = HashMap::new();
let mut cluster = LocalCluster::new(&config);
for _ in 0..num_runs {
// Push transactions to one of the nodes and confirm that transactions were
// forwarded to and processed.
trace!("Sending transactions");
let new_balances = cluster_tests::send_many_transactions(
&cluster.entry_point_info,
&cluster.funding_keypair,
10,
10,
);
expected_balances.extend(new_balances);
// Get slot after which this was generated
let client = cluster
.get_validator_client(&cluster.entry_point_info.id)
.unwrap();
let last_slot = client.get_slot().expect("Couldn't get slot");
// Wait for a snapshot for a bank >= last_slot to be made so we know that the snapshot
// must include the transactions just pushed
let tar = snapshot_utils::get_snapshot_tar_path(&snapshot_package_output_path);
trace!("Waiting for tar to be generated");
loop {
if tar.exists() && snapshot_utils::bank_slot_from_archive(&tar).unwrap() >= last_slot {
break;
}
sleep(Duration::from_millis(100));
}
// Create new account paths since fullnode exit is not guaranteed to cleanup RPC threads,
// which may delete the old accounts on exit at any point
let (new_account_storage_dirs, new_account_storage_paths) =
generate_account_paths(num_account_paths);
all_account_storage_dirs.push(new_account_storage_dirs);
snapshot_validator_config.account_paths = Some(new_account_storage_paths);
// Restart a node
trace!("Restarting cluster from snapshot");
let nodes = cluster.get_node_pubkeys();
cluster.restart_node(nodes[0], &snapshot_validator_config);
// Verify account balances on validator
trace!("Verifying balances");
cluster_tests::verify_balances(expected_balances.clone(), &cluster.entry_point_info);
// Check that we can still push transactions
trace!("Spending and verifying");
cluster_tests::spend_and_verify_all_nodes(
&cluster.entry_point_info,
&cluster.funding_keypair,
1,
HashSet::new(),
);
}
}
#[allow(unused_attributes)] #[allow(unused_attributes)]
#[test] #[test]
#[serial] #[serial]
@ -453,3 +562,15 @@ fn run_repairman_catchup(num_repairmen: u64) {
sleep(Duration::from_secs(1)); sleep(Duration::from_secs(1));
} }
} }
fn generate_account_paths(num_account_paths: usize) -> (Vec<TempDir>, String) {
let account_storage_dirs: Vec<TempDir> = (0..num_account_paths)
.map(|_| TempDir::new().unwrap())
.collect();
let account_storage_paths: Vec<_> = account_storage_dirs
.iter()
.map(|a| a.path().to_str().unwrap().to_string())
.collect();
let account_storage_paths = AccountsDB::format_paths(account_storage_paths);
(account_storage_dirs, account_storage_paths)
}

View File

@ -427,13 +427,10 @@ impl AccountsDB {
} }
} }
pub fn paths(&self) -> String { pub fn format_paths<P: AsRef<Path>>(paths: Vec<P>) -> String {
let paths: Vec<String> = self let paths: Vec<String> = paths
.paths
.read()
.unwrap()
.iter() .iter()
.map(|p| p.to_str().unwrap().to_owned()) .map(|p| p.as_ref().to_str().unwrap().to_owned())
.collect(); .collect();
paths.join(",") paths.join(",")
} }
@ -1417,7 +1414,11 @@ pub mod tests {
let buf = writer.into_inner(); let buf = writer.into_inner();
let mut reader = BufReader::new(&buf[..]); let mut reader = BufReader::new(&buf[..]);
let daccounts = AccountsDB::new(None); let daccounts = AccountsDB::new(None);
let local_paths = daccounts.paths();
let local_paths = {
let paths = daccounts.paths.read().unwrap();
AccountsDB::format_paths(paths.to_vec())
};
let copied_accounts = TempDir::new().unwrap(); let copied_accounts = TempDir::new().unwrap();
// Simulate obtaining a copy of the AppendVecs from a tarball // Simulate obtaining a copy of the AppendVecs from a tarball
copy_append_vecs(&accounts, copied_accounts.path()).unwrap(); copy_append_vecs(&accounts, copied_accounts.path()).unwrap();