solana/tests/multinode.rs
Anatoly Yakovenko be2bf69c93 initial vote stage
wip

voting

wip

move voting into the replicate stage

update

fixup!

fixup!

fixup!

fixup!

fixup!

fixup!

fixup!

fixup!

fixup!

fixup!

update

fixup!

fixup!

fixup!

tpu processing votes in entries before write stage

fixup!

fixup!

txs

make sure validators have an account

fixup!

fixup!

fixup!

exit fullnode correctly

exit on exit not err

try 50

add delay for voting

300

300

startup logs

par start

100

no rayon

retry longer

log leader drop

fix distance

50 nodes

100

handle deserialize error

update

fix broadcast

new table every time

tweaks

table

update

try shuffle table

skip kill

skip add

purge test

fixed tests

rebase 2

fixed tests

fixed rebase

cleanup

ok for blobs to be longer then window

fix init window

60 nodes
2018-07-10 13:32:31 -06:00

531 lines
17 KiB
Rust

#[macro_use]
extern crate log;
extern crate bincode;
extern crate serde_json;
extern crate solana;
use solana::crdt::TestNode;
use solana::crdt::{Crdt, ReplicatedData};
use solana::entry_writer::EntryWriter;
use solana::fullnode::{FullNode, InFile, OutFile};
use solana::logger;
use solana::mint::Mint;
use solana::ncp::Ncp;
use solana::signature::{KeyPair, KeyPairUtil, PublicKey};
use solana::streamer::default_window;
use solana::thin_client::ThinClient;
use std::fs::File;
use std::net::UdpSocket;
use std::sync::atomic::AtomicBool;
use std::sync::{Arc, RwLock};
use std::thread::sleep;
use std::time::Duration;
fn converge(leader: &ReplicatedData, num_nodes: usize) -> Vec<ReplicatedData> {
//lets spy on the network
let exit = Arc::new(AtomicBool::new(false));
let mut spy = TestNode::new();
let daddr = "0.0.0.0:0".parse().unwrap();
let me = spy.data.id.clone();
spy.data.contact_info.tvu = daddr;
spy.data.contact_info.rpu = daddr;
let mut spy_crdt = Crdt::new(spy.data);
spy_crdt.insert(&leader);
spy_crdt.set_leader(leader.id);
let spy_ref = Arc::new(RwLock::new(spy_crdt));
let spy_window = default_window();
let ncp = Ncp::new(
spy_ref.clone(),
spy_window,
spy.sockets.gossip,
spy.sockets.gossip_send,
exit.clone(),
).unwrap();
//wait for the network to converge
let mut converged = false;
let mut rv = vec![];
for _ in 0..30 {
let num = spy_ref.read().unwrap().convergence();
let mut v: Vec<ReplicatedData> = spy_ref
.read()
.unwrap()
.table
.values()
.into_iter()
.filter(|x| x.id != me)
.filter(|x| x.contact_info.rpu != daddr)
.cloned()
.collect();
if num >= num_nodes as u64 && v.len() >= num_nodes {
rv.append(&mut v);
converged = true;
break;
}
sleep(Duration::new(1, 0));
}
assert!(converged);
ncp.close().unwrap();
rv
}
fn genesis(num: i64) -> (Mint, String) {
let mint = Mint::new(num);
let id = {
let ids: Vec<_> = mint.pubkey().iter().map(|id| format!("{}", id)).collect();
ids.join("")
};
let path = format!("target/test_multi_node_dynamic_network-{}.log", id);
let mut writer = File::create(path.clone()).unwrap();
EntryWriter::write_entries(&mut writer, mint.create_entries()).unwrap();
(mint, path.to_string())
}
#[test]
fn test_multi_node_validator_catchup_from_zero() {
logger::setup();
const N: usize = 5;
trace!("test_multi_node_validator_catchup_from_zero");
let leader = TestNode::new();
let leader_data = leader.data.clone();
let bob_pubkey = KeyPair::new().pubkey();
let (alice, ledger_path) = genesis(10_000);
let server = FullNode::new(
leader,
true,
InFile::Path(ledger_path.clone()),
None,
None,
None,
);
let mut nodes = vec![server];
for _ in 0..N {
let keypair = KeyPair::new();
let validator = TestNode::new_with_pubkey(keypair.pubkey());
let mut val = FullNode::new(
validator,
false,
InFile::Path(ledger_path.clone()),
Some(keypair),
Some(leader_data.contact_info.ncp),
None,
);
nodes.push(val);
}
let servers = converge(&leader_data, N + 1);
//contains the leader addr as well
assert_eq!(servers.len(), N + 1);
//verify leader can do transfer
let leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, None).unwrap();
assert_eq!(leader_balance, 500);
//verify validator has the same balance
let mut success = 0usize;
for server in servers.iter() {
info!("0server: {:x}", server.debug_id());
let mut client = mk_client(server);
if let Ok(bal) = client.poll_get_balance(&bob_pubkey) {
info!("validator balance {}", bal);
if bal == leader_balance {
success += 1;
}
}
}
assert_eq!(success, servers.len());
success = 0;
// start up another validator, converge and then check everyone's balances
let keypair = KeyPair::new();
let validator = TestNode::new_with_pubkey(keypair.pubkey());
let val = FullNode::new(
validator,
false,
InFile::Path(ledger_path.clone()),
Some(keypair),
Some(leader_data.contact_info.ncp),
None,
);
nodes.push(val);
//contains the leader and new node
let servers = converge(&leader_data, N + 2);
let mut leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, None).unwrap();
info!("leader balance {}", leader_balance);
loop {
let mut client = mk_client(&leader_data);
leader_balance = client.poll_get_balance(&bob_pubkey).unwrap();
if leader_balance == 1000 {
break;
}
sleep(Duration::from_millis(300));
}
assert_eq!(leader_balance, 1000);
for server in servers.iter() {
let mut client = mk_client(server);
info!("1server: {:x}", server.debug_id());
for _ in 0..15 {
if let Ok(bal) = client.poll_get_balance(&bob_pubkey) {
info!("validator balance {}", bal);
if bal == leader_balance {
success += 1;
break;
}
}
sleep(Duration::from_millis(500));
}
}
assert_eq!(success, servers.len());
for node in nodes {
node.close().unwrap();
}
}
#[test]
fn test_multi_node_basic() {
logger::setup();
const N: usize = 5;
trace!("test_multi_node_basic");
let leader = TestNode::new();
let leader_data = leader.data.clone();
let bob_pubkey = KeyPair::new().pubkey();
let (alice, ledger_path) = genesis(10_000);
let server = FullNode::new(
leader,
true,
InFile::Path(ledger_path.clone()),
None,
None,
None,
);
let mut nodes = vec![server];
for _ in 0..N {
let keypair = KeyPair::new();
let validator = TestNode::new_with_pubkey(keypair.pubkey());
FullNode::new(
validator,
false,
InFile::Path(ledger_path.clone()),
Some(keypair),
Some(leader_data.contact_info.ncp),
None,
);
nodes.push(val);
}
let servers = converge(&leader_data, N + 1);
//contains the leader addr as well
assert_eq!(servers.len(), N + 1);
//verify leader can do transfer
let leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, None).unwrap();
assert_eq!(leader_balance, 500);
//verify validator has the same balance
let mut success = 0usize;
for server in servers.iter() {
let mut client = mk_client(server);
if let Ok(bal) = client.poll_get_balance(&bob_pubkey) {
trace!("validator balance {}", bal);
if bal == leader_balance {
success += 1;
}
}
}
assert_eq!(success, servers.len());
for node in nodes {
node.close().unwrap();
}
std::fs::remove_file(ledger_path).unwrap();
}
#[test]
fn test_boot_validator_from_file() {
logger::setup();
let leader = TestNode::new();
let bob_pubkey = KeyPair::new().pubkey();
let (alice, ledger_path) = genesis(100_000);
let leader_data = leader.data.clone();
let leader_fullnode = FullNode::new(
leader,
true,
InFile::Path(ledger_path.clone()),
None,
None,
Some(OutFile::Path(ledger_path.clone())),
);
let leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, Some(500)).unwrap();
assert_eq!(leader_balance, 500);
let leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, Some(1000)).unwrap();
assert_eq!(leader_balance, 1000);
let keypair = KeyPair::new();
let validator = TestNode::new_with_pubkey(keypair.pubkey());
let validator_data = validator.data.clone();
let val_fullnode = FullNode::new(
validator,
false,
InFile::Path(ledger_path.clone()),
Some(keypair),
Some(leader_data.contact_info.ncp),
None,
);
let mut client = mk_client(&validator_data);
let getbal = retry_get_balance(&mut client, &bob_pubkey, Some(leader_balance));
assert!(getbal == Some(leader_balance));
leader_fullnode.close().unwrap();
val_fullnode.close().unwrap();
std::fs::remove_file(ledger_path).unwrap();
}
fn create_leader(ledger_path: &str) -> (ReplicatedData, FullNode) {
let leader = TestNode::new();
let leader_data = leader.data.clone();
let leader_fullnode = FullNode::new(
leader,
true,
InFile::Path(ledger_path.to_string()),
None,
None,
Some(OutFile::Path(ledger_path.clone())),
);
(leader_data, leader_fullnode)
}
#[test]
fn test_leader_restart_validator_start_from_old_ledger() {
// this test verifies that a freshly started leader makes his ledger available
// in the repair window to validators that are started with an older
// ledger (currently up to WINDOW_SIZE entries)
logger::setup();
let (alice, ledger_path) = genesis(100_000);
let bob_pubkey = KeyPair::new().pubkey();
let (leader_data, leader_fullnode) = create_leader(&ledger_path);
// lengthen the ledger
let leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, Some(500)).unwrap();
assert_eq!(leader_balance, 500);
// create a "stale" ledger by copying current ledger
let mut stale_ledger_path = ledger_path.clone();
stale_ledger_path.insert_str(ledger_path.rfind("/").unwrap() + 1, "stale_");
std::fs::copy(&ledger_path, &stale_ledger_path)
.expect(format!("copy {} to {}", &ledger_path, &stale_ledger_path,).as_str());
// restart the leader
leader_fullnode.close().unwrap();
let (leader_data, leader_fullnode) = create_leader(&ledger_path);
// lengthen the ledger
let leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, Some(1000)).unwrap();
assert_eq!(leader_balance, 1000);
// restart the leader
leader_fullnode.close().unwrap();
let (leader_data, leader_fullnode) = create_leader(&ledger_path);
// start validator from old ledger
let keypair = KeyPair::new();
let validator = TestNode::new_with_pubkey(keypair.pubkey());
let validator_data = validator.data.clone();
let val_fullnode = FullNode::new(
validator,
false,
InFile::Path(stale_ledger_path.clone()),
Some(keypair),
Some(leader_data.contact_info.ncp),
None,
);
// trigger broadcast, validator should catch up from leader, whose window contains
// the entries missing from the stale ledger
// send requests so the validator eventually sees a gap and requests a repair
let mut expected = 1500;
let mut client = mk_client(&validator_data);
for _ in 0..10 {
let leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, Some(expected))
.unwrap();
assert_eq!(leader_balance, expected);
let getbal = retry_get_balance(&mut client, &bob_pubkey, Some(leader_balance));
if getbal == Some(leader_balance) {
break;
}
expected += 500;
}
let getbal = retry_get_balance(&mut client, &bob_pubkey, Some(expected));
assert_eq!(getbal, Some(expected));
leader_fullnode.close().unwrap();
val_fullnode.close().unwrap();
std::fs::remove_file(ledger_path).unwrap();
std::fs::remove_file(stale_ledger_path).unwrap();
}
//TODO: this test will run a long time so it's disabled for CI
#[test]
#[ignore]
fn test_multi_node_dynamic_network() {
logger::setup();
const N: usize = 60;
let leader = TestNode::new();
let bob_pubkey = KeyPair::new().pubkey();
let (alice, ledger_path) = genesis(100_000);
let leader_data = leader.data.clone();
let server = FullNode::new(
leader,
true,
InFile::Path(ledger_path.clone()),
None,
None,
Some(OutFile::Path(ledger_path.clone())),
);
info!("{:x} LEADER", leader_data.debug_id());
let leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, Some(500)).unwrap();
assert_eq!(leader_balance, 500);
let leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, Some(1000)).unwrap();
assert_eq!(leader_balance, 1000);
let validators: Vec<(ReplicatedData, FullNode)> = (0..N)
.into_iter()
.map(|n| {
let keypair = KeyPair::new();
let validator = TestNode::new_with_pubkey(keypair.pubkey());
let rd = validator.data.clone();
//send some tokens to the new validator
let bal =
send_tx_and_retry_get_balance(&leader_data, &alice, &keypair.pubkey(), Some(500));
assert_eq!(bal, Some(500));
let val = FullNode::new(
validator,
false,
InFile::Path(ledger_path.clone()),
Some(keypair),
Some(leader_data.contact_info.ncp),
Some(OutFile::Path(ledger_path.clone())),
);
info!("started[{}/{}] {:x}", n, N, rd.debug_id());
(rd, val)
})
.collect();
for i in 0..N {
//verify leader can do transfer
let expected = ((i + 3) * 500) as i64;
let leader_balance =
send_tx_and_retry_get_balance(&leader_data, &alice, &bob_pubkey, Some(expected))
.unwrap();
if leader_balance != expected {
info!(
"leader dropped transaction {} {:?} {:?}",
i, leader_balance, expected
);
}
//verify all validators have the same balance
{
let mut success = 0usize;
let mut distance = 0i64;
for server in validators.iter() {
let mut client = mk_client(&server.0);
trace!("{:x} {} get_balance start", server.0.debug_id(), i);
let getbal = retry_get_balance(&mut client, &bob_pubkey, Some(expected));
trace!(
"{:x} {} get_balance: {:?} expected: {}",
server.0.debug_id(),
i,
getbal,
expected
);
let bal = getbal.unwrap_or(0);
distance += (leader_balance - bal) / 500;
if let Some(bal) = getbal {
if bal == leader_balance {
success += 1;
}
}
}
info!(
"SUCCESS[{}] {} out of {} distance: {}",
i,
success,
validators.len(),
distance
);
//assert_eq!(success, validators.len());
}
}
for (_, node) in validators {
node.close().unwrap();
}
server.close().unwrap();
std::fs::remove_file(ledger_path).unwrap();
}
fn mk_client(leader: &ReplicatedData) -> ThinClient {
let requests_socket = UdpSocket::bind("0.0.0.0:0").unwrap();
requests_socket
.set_read_timeout(Some(Duration::new(1, 0)))
.unwrap();
let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap();
let daddr = "0.0.0.0:0".parse().unwrap();
assert!(leader.contact_info.rpu != daddr);
assert!(leader.contact_info.tpu != daddr);
ThinClient::new(
leader.contact_info.rpu,
requests_socket,
leader.contact_info.tpu,
transactions_socket,
)
}
fn retry_get_balance(
client: &mut ThinClient,
bob_pubkey: &PublicKey,
expected: Option<i64>,
) -> Option<i64> {
const LAST: usize = 20;
for run in 0..(LAST + 1) {
let out = client.poll_get_balance(bob_pubkey);
if expected.is_none() || run == LAST {
return out.ok().clone();
}
trace!("retry_get_balance[{}] {:?} {:?}", run, out, expected);
if let (Some(e), Ok(o)) = (expected, out) {
if o == e {
return Some(o);
}
}
}
None
}
fn send_tx_and_retry_get_balance(
leader: &ReplicatedData,
alice: &Mint,
bob_pubkey: &PublicKey,
expected: Option<i64>,
) -> Option<i64> {
let mut client = mk_client(leader);
trace!("getting leader last_id");
let last_id = client.get_last_id();
info!("executing leader transfer");
let _sig = client
.transfer(500, &alice.keypair(), *bob_pubkey, &last_id)
.unwrap();
retry_get_balance(&mut client, bob_pubkey, expected)
}