Accountsdb replication installment 2 (#19325)

This is the 2nd installment for the AccountsDb replication.

Summary of Changes

The basic google protocol buffer protocol for replicating updated slots and accounts. tonic/tokio is used for transporting the messages.

The basic framework of the client and server for replicating slots and accounts -- the persisting of accounts in the replica-side will be done at the next PR -- right now -- the accounts are streamed to the replica-node and dumped. Replication for information about Bank is also not done in this PR -- to be addressed in the next PR to limit the change size.

Functionality used by both the client and server side are encapsulated in the replica-lib crate.

There is no impact to the existing validator by default.

Tests:

Observe the confirmed slots replicated to the replica-node.
Observe the accounts for the confirmed slot are received at the replica-node side.
This commit is contained in:
Lijun Wang
2021-09-01 14:10:16 -07:00
committed by GitHub
parent 27c2180db9
commit 8378e8790f
28 changed files with 994 additions and 27 deletions

View File

@@ -21,6 +21,8 @@ jsonrpc-derive = "17.0.0"
jsonrpc-ipc-server = "17.0.0"
jsonrpc-server-utils= "17.0.0"
log = "0.4.11"
prost = "0.8.0"
prost-types = "0.8.0"
rand = "0.7.0"
serde = "1.0.130"
solana-clap-utils = { path = "../clap-utils", version = "=1.8.0" }
@@ -34,11 +36,13 @@ solana-logger = { path = "../logger", version = "=1.8.0" }
solana-metrics = { path = "../metrics", version = "=1.8.0" }
solana-net-utils = { path = "../net-utils", version = "=1.8.0" }
solana-rpc = { path = "../rpc", version = "=1.8.0" }
solana-replica-lib = { path = "../replica-lib", version = "=1.8.0" }
solana-runtime = { path = "../runtime", version = "=1.8.0" }
solana-sdk = { path = "../sdk", version = "=1.8.0" }
solana-streamer = { path = "../streamer", version = "=1.8.0" }
solana-version = { path = "../version", version = "=1.8.0" }
solana-validator = { path = "../validator", version = "=1.8.0" }
tonic = { version = "0.5.0", features = ["tls", "transport"] }
[dev-dependencies]
solana-core = { path = "../core", version = "=1.8.0" }
@@ -51,3 +55,6 @@ tempfile = "3.2.0"
[package.metadata.docs.rs]
targets = ["x86_64-unknown-linux-gnu"]
[build-dependencies]
tonic-build = "0.5.1"

View File

@@ -0,0 +1,81 @@
/// Module responsible for replicating AccountsDb data from its peer to its local AccountsDb in the replica-node
use {
log::*,
solana_replica_lib::accountsdb_repl_client::{
AccountsDbReplClientService, AccountsDbReplClientServiceConfig, ReplicaRpcError,
},
solana_sdk::{clock::Slot, pubkey::Pubkey},
std::{
thread::{self, sleep, Builder, JoinHandle},
time::Duration,
},
};
pub struct AccountsDbReplService {
thread: JoinHandle<()>,
}
impl AccountsDbReplService {
pub fn new(
last_replicated_slot: Slot,
config: AccountsDbReplClientServiceConfig,
) -> Result<Self, ReplicaRpcError> {
let accountsdb_repl_client = AccountsDbReplClientService::new(config)?;
let thread = Builder::new()
.name("sol-accountsdb-repl-svc".to_string())
.spawn(move || {
Self::run_service(last_replicated_slot, accountsdb_repl_client);
})
.unwrap();
Ok(Self { thread })
}
fn replicate_accounts_for_slot(
accountsdb_repl_client: &mut AccountsDbReplClientService,
slot: Slot,
) {
match accountsdb_repl_client.get_slot_accounts(slot) {
Err(err) => {
error!(
"Ran into error getting accounts for slot {:?}, error: {:?}",
slot, err
);
}
Ok(accounts) => {
for account in accounts.iter() {
debug!(
"Received account: {:?}",
Pubkey::new(&account.account_meta.as_ref().unwrap().pubkey)
);
}
}
}
}
fn run_service(
mut last_replicated_slot: Slot,
mut accountsdb_repl_client: AccountsDbReplClientService,
) {
loop {
match accountsdb_repl_client.get_confirmed_slots(last_replicated_slot) {
Ok(slots) => {
info!("Received updated slots: {:?}", slots);
if !slots.is_empty() {
for slot in slots.iter() {
Self::replicate_accounts_for_slot(&mut accountsdb_repl_client, *slot);
}
last_replicated_slot = slots[slots.len() - 1];
}
}
Err(err) => {
error!("Ran into error getting updated slots: {:?}", err);
}
}
sleep(Duration::from_millis(200));
}
}
pub fn join(self) -> thread::Result<()> {
self.thread.join()
}
}

View File

@@ -1,2 +1,5 @@
#![allow(clippy::integer_arithmetic)]
pub mod accountsdb_repl_service;
pub mod replica_node;
pub mod replica_util;

View File

@@ -67,12 +67,28 @@ pub fn main() {
.help("Use DIR as snapshot location [default: --ledger value]"),
)
.arg(
Arg::with_name("peer")
.long("peer")
.value_name("IP:PORT")
Arg::with_name("peer_address")
.long("peer-address")
.value_name("IP")
.takes_value(true)
.required(true)
.help("The the IP:PORT for the peer validator/replica to download from"),
.help("The the address for the peer validator/replica to download from"),
)
.arg(
Arg::with_name("peer_rpc_port")
.long("peer-rpc-port")
.value_name("PORT")
.takes_value(true)
.required(true)
.help("The the PORT for the peer validator/replica from which to download the snapshots"),
)
.arg(
Arg::with_name("peer_accountsdb_repl_port")
.long("peer-accountsdb-repl-port")
.value_name("PORT")
.takes_value(true)
.required(true)
.help("The the PORT for the peer validator/replica serving the AccountsDb replication"),
)
.arg(
Arg::with_name("peer_pubkey")
@@ -296,19 +312,30 @@ pub fn main() {
vec![ledger_path.join("accounts")]
};
let rpc_source_addr =
solana_net_utils::parse_host_port(matches.value_of("peer").unwrap_or_else(|| {
let peer_address = solana_net_utils::parse_host(matches.value_of("peer_address").unwrap())
.expect("invalid peer_address");
let peer_rpc_port = value_t!(matches, "peer_rpc_port", u16).unwrap_or_else(|_| {
clap::Error::with_description(
"The --peer-rpc-port <PORT> argument is required",
clap::ErrorKind::ArgumentNotFound,
)
.exit();
});
let rpc_peer_addr = SocketAddr::new(peer_address, peer_rpc_port);
let peer_accountsdb_repl_port = value_t!(matches, "peer_accountsdb_repl_port", u16)
.unwrap_or_else(|_| {
clap::Error::with_description(
"The --peer <IP:PORT> argument is required",
"The --peer-accountsdb-repl-port <PORT> argument is required",
clap::ErrorKind::ArgumentNotFound,
)
.exit();
}))
.unwrap_or_else(|e| {
eprintln!("failed to parse entrypoint address: {}", e);
exit(1);
});
let accountsdb_repl_peer_addr = SocketAddr::new(peer_address, peer_accountsdb_repl_port);
let rpc_port = value_t!(matches, "rpc_port", u16).unwrap_or_else(|_| {
clap::Error::with_description(
"The --rpc-port <PORT> argument is required",
@@ -358,7 +385,8 @@ pub fn main() {
);
let config = ReplicaNodeConfig {
rpc_source_addr,
rpc_peer_addr,
accountsdb_repl_peer_addr: Some(accountsdb_repl_peer_addr),
rpc_addr: rpc_addrs.0,
rpc_pubsub_addr: rpc_addrs.1,
ledger_path,
@@ -376,6 +404,6 @@ pub fn main() {
replica_exit: Arc::new(RwLock::new(Exit::default())),
};
let validator = ReplicaNode::new(config);
validator.join();
let replica = ReplicaNode::new(config);
replica.join();
}

View File

@@ -1,4 +1,5 @@
use {
crate::accountsdb_repl_service::AccountsDbReplService,
crossbeam_channel::unbounded,
log::*,
solana_download_utils::download_snapshot,
@@ -8,6 +9,7 @@ use {
blockstore::Blockstore, blockstore_db::AccessType, blockstore_processor,
leader_schedule_cache::LeaderScheduleCache,
},
solana_replica_lib::accountsdb_repl_client::AccountsDbReplClientServiceConfig,
solana_rpc::{
max_slots::MaxSlots,
optimistically_confirmed_bank_tracker::{
@@ -40,7 +42,8 @@ use {
};
pub struct ReplicaNodeConfig {
pub rpc_source_addr: SocketAddr,
pub rpc_peer_addr: SocketAddr,
pub accountsdb_repl_peer_addr: Option<SocketAddr>,
pub rpc_addr: SocketAddr,
pub rpc_pubsub_addr: SocketAddr,
pub ledger_path: PathBuf,
@@ -62,6 +65,7 @@ pub struct ReplicaNode {
json_rpc_service: Option<JsonRpcService>,
pubsub_service: Option<PubSubService>,
optimistically_confirmed_bank_tracker: Option<OptimisticallyConfirmedBankTracker>,
accountsdb_repl_service: Option<AccountsDbReplService>,
}
// Struct maintaining information about banks
@@ -86,7 +90,7 @@ fn initialize_from_snapshot(
);
download_snapshot(
&replica_config.rpc_source_addr,
&replica_config.rpc_peer_addr,
&replica_config.snapshot_archives_dir,
replica_config.snapshot_info,
false,
@@ -240,6 +244,7 @@ fn start_client_rpc_services(
bank_forks.clone(),
optimistically_confirmed_bank.clone(),
subscriptions.clone(),
None,
)),
)
}
@@ -247,7 +252,7 @@ fn start_client_rpc_services(
impl ReplicaNode {
pub fn new(replica_config: ReplicaNodeConfig) -> Self {
let genesis_config = download_then_check_genesis_hash(
&replica_config.rpc_source_addr,
&replica_config.rpc_peer_addr,
&replica_config.ledger_path,
None,
MAX_GENESIS_ARCHIVE_UNPACKED_SIZE,
@@ -279,10 +284,31 @@ impl ReplicaNode {
&replica_config.socket_addr_space,
);
let accountsdb_repl_client_config = AccountsDbReplClientServiceConfig {
worker_threads: 1,
replica_server_addr: replica_config.accountsdb_repl_peer_addr.unwrap(),
};
let last_replicated_slot = bank_info.bank_forks.read().unwrap().root_bank().slot();
info!(
"Starting AccountsDbReplService from slot {:?}",
last_replicated_slot
);
let accountsdb_repl_service = Some(
AccountsDbReplService::new(last_replicated_slot, accountsdb_repl_client_config)
.expect("Failed to start AccountsDb replication service"),
);
info!(
"Started AccountsDbReplService from slot {:?}",
last_replicated_slot
);
ReplicaNode {
json_rpc_service,
pubsub_service,
optimistically_confirmed_bank_tracker,
accountsdb_repl_service,
}
}
@@ -302,5 +328,10 @@ impl ReplicaNode {
.join()
.expect("optimistically_confirmed_bank_tracker");
}
if let Some(accountsdb_repl_service) = self.accountsdb_repl_service {
accountsdb_repl_service
.join()
.expect("accountsdb_repl_service");
}
}
}

View File

@@ -19,7 +19,10 @@ use {
net::{SocketAddr, UdpSocket},
path::Path,
process::exit,
sync::{atomic::AtomicBool, Arc},
sync::{
atomic::{AtomicBool, Ordering},
Arc,
},
thread::sleep,
time::{Duration, Instant},
},
@@ -264,5 +267,9 @@ pub fn get_rpc_peer_info(
);
let rpc_node_details = rpc_node_details.unwrap();
// We no longer need the gossip node, stop it:
let gossip_exit_flag = gossip.1;
gossip_exit_flag.store(true, Ordering::Relaxed);
(gossip.0, rpc_node_details.0, rpc_node_details.1)
}

View File

@@ -9,6 +9,7 @@ use {
local_cluster::{ClusterConfig, LocalCluster},
validator_configs::*,
},
solana_replica_lib::accountsdb_repl_server::AccountsDbReplServiceConfig,
solana_replica_node::{
replica_node::{ReplicaNode, ReplicaNodeConfig},
replica_util,
@@ -132,11 +133,22 @@ fn setup_snapshot_validator_config(
// Create the account paths
let (account_storage_dirs, account_storage_paths) = generate_account_paths(num_account_paths);
let bind_ip_addr = IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1));
let accountsdb_repl_port =
solana_net_utils::find_available_port_in_range(bind_ip_addr, (1024, 65535)).unwrap();
let replica_server_addr = SocketAddr::new(bind_ip_addr, accountsdb_repl_port);
let accountsdb_repl_service_config = Some(AccountsDbReplServiceConfig {
worker_threads: 1,
replica_server_addr,
});
// Create the validator config
let validator_config = ValidatorConfig {
snapshot_config: Some(snapshot_config),
account_paths: account_storage_paths,
accounts_hash_interval_slots: snapshot_interval_slots,
accountsdb_repl_service_config,
..ValidatorConfig::default()
};
@@ -259,7 +271,14 @@ fn test_replica_bootstrap() {
info!("The cluster info:\n{:?}", cluster_info.contact_info_trace());
let config = ReplicaNodeConfig {
rpc_source_addr: contact_info.rpc,
rpc_peer_addr: contact_info.rpc,
accountsdb_repl_peer_addr: Some(
leader_snapshot_test_config
.validator_config
.accountsdb_repl_service_config
.unwrap()
.replica_server_addr,
),
rpc_addr,
rpc_pubsub_addr,
ledger_path: ledger_path.to_path_buf(),