Accountsdb replication installment 2 (#19325)
This is the 2nd installment for the AccountsDb replication. Summary of Changes The basic google protocol buffer protocol for replicating updated slots and accounts. tonic/tokio is used for transporting the messages. The basic framework of the client and server for replicating slots and accounts -- the persisting of accounts in the replica-side will be done at the next PR -- right now -- the accounts are streamed to the replica-node and dumped. Replication for information about Bank is also not done in this PR -- to be addressed in the next PR to limit the change size. Functionality used by both the client and server side are encapsulated in the replica-lib crate. There is no impact to the existing validator by default. Tests: Observe the confirmed slots replicated to the replica-node. Observe the accounts for the confirmed slot are received at the replica-node side.
This commit is contained in:
@@ -21,6 +21,8 @@ jsonrpc-derive = "17.0.0"
|
||||
jsonrpc-ipc-server = "17.0.0"
|
||||
jsonrpc-server-utils= "17.0.0"
|
||||
log = "0.4.11"
|
||||
prost = "0.8.0"
|
||||
prost-types = "0.8.0"
|
||||
rand = "0.7.0"
|
||||
serde = "1.0.130"
|
||||
solana-clap-utils = { path = "../clap-utils", version = "=1.8.0" }
|
||||
@@ -34,11 +36,13 @@ solana-logger = { path = "../logger", version = "=1.8.0" }
|
||||
solana-metrics = { path = "../metrics", version = "=1.8.0" }
|
||||
solana-net-utils = { path = "../net-utils", version = "=1.8.0" }
|
||||
solana-rpc = { path = "../rpc", version = "=1.8.0" }
|
||||
solana-replica-lib = { path = "../replica-lib", version = "=1.8.0" }
|
||||
solana-runtime = { path = "../runtime", version = "=1.8.0" }
|
||||
solana-sdk = { path = "../sdk", version = "=1.8.0" }
|
||||
solana-streamer = { path = "../streamer", version = "=1.8.0" }
|
||||
solana-version = { path = "../version", version = "=1.8.0" }
|
||||
solana-validator = { path = "../validator", version = "=1.8.0" }
|
||||
tonic = { version = "0.5.0", features = ["tls", "transport"] }
|
||||
|
||||
[dev-dependencies]
|
||||
solana-core = { path = "../core", version = "=1.8.0" }
|
||||
@@ -51,3 +55,6 @@ tempfile = "3.2.0"
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
targets = ["x86_64-unknown-linux-gnu"]
|
||||
|
||||
[build-dependencies]
|
||||
tonic-build = "0.5.1"
|
||||
|
81
replica-node/src/accountsdb_repl_service.rs
Normal file
81
replica-node/src/accountsdb_repl_service.rs
Normal file
@@ -0,0 +1,81 @@
|
||||
/// Module responsible for replicating AccountsDb data from its peer to its local AccountsDb in the replica-node
|
||||
use {
|
||||
log::*,
|
||||
solana_replica_lib::accountsdb_repl_client::{
|
||||
AccountsDbReplClientService, AccountsDbReplClientServiceConfig, ReplicaRpcError,
|
||||
},
|
||||
solana_sdk::{clock::Slot, pubkey::Pubkey},
|
||||
std::{
|
||||
thread::{self, sleep, Builder, JoinHandle},
|
||||
time::Duration,
|
||||
},
|
||||
};
|
||||
|
||||
pub struct AccountsDbReplService {
|
||||
thread: JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl AccountsDbReplService {
|
||||
pub fn new(
|
||||
last_replicated_slot: Slot,
|
||||
config: AccountsDbReplClientServiceConfig,
|
||||
) -> Result<Self, ReplicaRpcError> {
|
||||
let accountsdb_repl_client = AccountsDbReplClientService::new(config)?;
|
||||
let thread = Builder::new()
|
||||
.name("sol-accountsdb-repl-svc".to_string())
|
||||
.spawn(move || {
|
||||
Self::run_service(last_replicated_slot, accountsdb_repl_client);
|
||||
})
|
||||
.unwrap();
|
||||
Ok(Self { thread })
|
||||
}
|
||||
|
||||
fn replicate_accounts_for_slot(
|
||||
accountsdb_repl_client: &mut AccountsDbReplClientService,
|
||||
slot: Slot,
|
||||
) {
|
||||
match accountsdb_repl_client.get_slot_accounts(slot) {
|
||||
Err(err) => {
|
||||
error!(
|
||||
"Ran into error getting accounts for slot {:?}, error: {:?}",
|
||||
slot, err
|
||||
);
|
||||
}
|
||||
Ok(accounts) => {
|
||||
for account in accounts.iter() {
|
||||
debug!(
|
||||
"Received account: {:?}",
|
||||
Pubkey::new(&account.account_meta.as_ref().unwrap().pubkey)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn run_service(
|
||||
mut last_replicated_slot: Slot,
|
||||
mut accountsdb_repl_client: AccountsDbReplClientService,
|
||||
) {
|
||||
loop {
|
||||
match accountsdb_repl_client.get_confirmed_slots(last_replicated_slot) {
|
||||
Ok(slots) => {
|
||||
info!("Received updated slots: {:?}", slots);
|
||||
if !slots.is_empty() {
|
||||
for slot in slots.iter() {
|
||||
Self::replicate_accounts_for_slot(&mut accountsdb_repl_client, *slot);
|
||||
}
|
||||
last_replicated_slot = slots[slots.len() - 1];
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
error!("Ran into error getting updated slots: {:?}", err);
|
||||
}
|
||||
}
|
||||
sleep(Duration::from_millis(200));
|
||||
}
|
||||
}
|
||||
|
||||
pub fn join(self) -> thread::Result<()> {
|
||||
self.thread.join()
|
||||
}
|
||||
}
|
@@ -1,2 +1,5 @@
|
||||
#![allow(clippy::integer_arithmetic)]
|
||||
|
||||
pub mod accountsdb_repl_service;
|
||||
pub mod replica_node;
|
||||
pub mod replica_util;
|
||||
|
@@ -67,12 +67,28 @@ pub fn main() {
|
||||
.help("Use DIR as snapshot location [default: --ledger value]"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("peer")
|
||||
.long("peer")
|
||||
.value_name("IP:PORT")
|
||||
Arg::with_name("peer_address")
|
||||
.long("peer-address")
|
||||
.value_name("IP")
|
||||
.takes_value(true)
|
||||
.required(true)
|
||||
.help("The the IP:PORT for the peer validator/replica to download from"),
|
||||
.help("The the address for the peer validator/replica to download from"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("peer_rpc_port")
|
||||
.long("peer-rpc-port")
|
||||
.value_name("PORT")
|
||||
.takes_value(true)
|
||||
.required(true)
|
||||
.help("The the PORT for the peer validator/replica from which to download the snapshots"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("peer_accountsdb_repl_port")
|
||||
.long("peer-accountsdb-repl-port")
|
||||
.value_name("PORT")
|
||||
.takes_value(true)
|
||||
.required(true)
|
||||
.help("The the PORT for the peer validator/replica serving the AccountsDb replication"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("peer_pubkey")
|
||||
@@ -296,19 +312,30 @@ pub fn main() {
|
||||
vec![ledger_path.join("accounts")]
|
||||
};
|
||||
|
||||
let rpc_source_addr =
|
||||
solana_net_utils::parse_host_port(matches.value_of("peer").unwrap_or_else(|| {
|
||||
let peer_address = solana_net_utils::parse_host(matches.value_of("peer_address").unwrap())
|
||||
.expect("invalid peer_address");
|
||||
|
||||
let peer_rpc_port = value_t!(matches, "peer_rpc_port", u16).unwrap_or_else(|_| {
|
||||
clap::Error::with_description(
|
||||
"The --peer-rpc-port <PORT> argument is required",
|
||||
clap::ErrorKind::ArgumentNotFound,
|
||||
)
|
||||
.exit();
|
||||
});
|
||||
|
||||
let rpc_peer_addr = SocketAddr::new(peer_address, peer_rpc_port);
|
||||
|
||||
let peer_accountsdb_repl_port = value_t!(matches, "peer_accountsdb_repl_port", u16)
|
||||
.unwrap_or_else(|_| {
|
||||
clap::Error::with_description(
|
||||
"The --peer <IP:PORT> argument is required",
|
||||
"The --peer-accountsdb-repl-port <PORT> argument is required",
|
||||
clap::ErrorKind::ArgumentNotFound,
|
||||
)
|
||||
.exit();
|
||||
}))
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("failed to parse entrypoint address: {}", e);
|
||||
exit(1);
|
||||
});
|
||||
|
||||
let accountsdb_repl_peer_addr = SocketAddr::new(peer_address, peer_accountsdb_repl_port);
|
||||
|
||||
let rpc_port = value_t!(matches, "rpc_port", u16).unwrap_or_else(|_| {
|
||||
clap::Error::with_description(
|
||||
"The --rpc-port <PORT> argument is required",
|
||||
@@ -358,7 +385,8 @@ pub fn main() {
|
||||
);
|
||||
|
||||
let config = ReplicaNodeConfig {
|
||||
rpc_source_addr,
|
||||
rpc_peer_addr,
|
||||
accountsdb_repl_peer_addr: Some(accountsdb_repl_peer_addr),
|
||||
rpc_addr: rpc_addrs.0,
|
||||
rpc_pubsub_addr: rpc_addrs.1,
|
||||
ledger_path,
|
||||
@@ -376,6 +404,6 @@ pub fn main() {
|
||||
replica_exit: Arc::new(RwLock::new(Exit::default())),
|
||||
};
|
||||
|
||||
let validator = ReplicaNode::new(config);
|
||||
validator.join();
|
||||
let replica = ReplicaNode::new(config);
|
||||
replica.join();
|
||||
}
|
||||
|
@@ -1,4 +1,5 @@
|
||||
use {
|
||||
crate::accountsdb_repl_service::AccountsDbReplService,
|
||||
crossbeam_channel::unbounded,
|
||||
log::*,
|
||||
solana_download_utils::download_snapshot,
|
||||
@@ -8,6 +9,7 @@ use {
|
||||
blockstore::Blockstore, blockstore_db::AccessType, blockstore_processor,
|
||||
leader_schedule_cache::LeaderScheduleCache,
|
||||
},
|
||||
solana_replica_lib::accountsdb_repl_client::AccountsDbReplClientServiceConfig,
|
||||
solana_rpc::{
|
||||
max_slots::MaxSlots,
|
||||
optimistically_confirmed_bank_tracker::{
|
||||
@@ -40,7 +42,8 @@ use {
|
||||
};
|
||||
|
||||
pub struct ReplicaNodeConfig {
|
||||
pub rpc_source_addr: SocketAddr,
|
||||
pub rpc_peer_addr: SocketAddr,
|
||||
pub accountsdb_repl_peer_addr: Option<SocketAddr>,
|
||||
pub rpc_addr: SocketAddr,
|
||||
pub rpc_pubsub_addr: SocketAddr,
|
||||
pub ledger_path: PathBuf,
|
||||
@@ -62,6 +65,7 @@ pub struct ReplicaNode {
|
||||
json_rpc_service: Option<JsonRpcService>,
|
||||
pubsub_service: Option<PubSubService>,
|
||||
optimistically_confirmed_bank_tracker: Option<OptimisticallyConfirmedBankTracker>,
|
||||
accountsdb_repl_service: Option<AccountsDbReplService>,
|
||||
}
|
||||
|
||||
// Struct maintaining information about banks
|
||||
@@ -86,7 +90,7 @@ fn initialize_from_snapshot(
|
||||
);
|
||||
|
||||
download_snapshot(
|
||||
&replica_config.rpc_source_addr,
|
||||
&replica_config.rpc_peer_addr,
|
||||
&replica_config.snapshot_archives_dir,
|
||||
replica_config.snapshot_info,
|
||||
false,
|
||||
@@ -240,6 +244,7 @@ fn start_client_rpc_services(
|
||||
bank_forks.clone(),
|
||||
optimistically_confirmed_bank.clone(),
|
||||
subscriptions.clone(),
|
||||
None,
|
||||
)),
|
||||
)
|
||||
}
|
||||
@@ -247,7 +252,7 @@ fn start_client_rpc_services(
|
||||
impl ReplicaNode {
|
||||
pub fn new(replica_config: ReplicaNodeConfig) -> Self {
|
||||
let genesis_config = download_then_check_genesis_hash(
|
||||
&replica_config.rpc_source_addr,
|
||||
&replica_config.rpc_peer_addr,
|
||||
&replica_config.ledger_path,
|
||||
None,
|
||||
MAX_GENESIS_ARCHIVE_UNPACKED_SIZE,
|
||||
@@ -279,10 +284,31 @@ impl ReplicaNode {
|
||||
&replica_config.socket_addr_space,
|
||||
);
|
||||
|
||||
let accountsdb_repl_client_config = AccountsDbReplClientServiceConfig {
|
||||
worker_threads: 1,
|
||||
replica_server_addr: replica_config.accountsdb_repl_peer_addr.unwrap(),
|
||||
};
|
||||
|
||||
let last_replicated_slot = bank_info.bank_forks.read().unwrap().root_bank().slot();
|
||||
info!(
|
||||
"Starting AccountsDbReplService from slot {:?}",
|
||||
last_replicated_slot
|
||||
);
|
||||
let accountsdb_repl_service = Some(
|
||||
AccountsDbReplService::new(last_replicated_slot, accountsdb_repl_client_config)
|
||||
.expect("Failed to start AccountsDb replication service"),
|
||||
);
|
||||
|
||||
info!(
|
||||
"Started AccountsDbReplService from slot {:?}",
|
||||
last_replicated_slot
|
||||
);
|
||||
|
||||
ReplicaNode {
|
||||
json_rpc_service,
|
||||
pubsub_service,
|
||||
optimistically_confirmed_bank_tracker,
|
||||
accountsdb_repl_service,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -302,5 +328,10 @@ impl ReplicaNode {
|
||||
.join()
|
||||
.expect("optimistically_confirmed_bank_tracker");
|
||||
}
|
||||
if let Some(accountsdb_repl_service) = self.accountsdb_repl_service {
|
||||
accountsdb_repl_service
|
||||
.join()
|
||||
.expect("accountsdb_repl_service");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -19,7 +19,10 @@ use {
|
||||
net::{SocketAddr, UdpSocket},
|
||||
path::Path,
|
||||
process::exit,
|
||||
sync::{atomic::AtomicBool, Arc},
|
||||
sync::{
|
||||
atomic::{AtomicBool, Ordering},
|
||||
Arc,
|
||||
},
|
||||
thread::sleep,
|
||||
time::{Duration, Instant},
|
||||
},
|
||||
@@ -264,5 +267,9 @@ pub fn get_rpc_peer_info(
|
||||
);
|
||||
let rpc_node_details = rpc_node_details.unwrap();
|
||||
|
||||
// We no longer need the gossip node, stop it:
|
||||
let gossip_exit_flag = gossip.1;
|
||||
gossip_exit_flag.store(true, Ordering::Relaxed);
|
||||
|
||||
(gossip.0, rpc_node_details.0, rpc_node_details.1)
|
||||
}
|
||||
|
@@ -9,6 +9,7 @@ use {
|
||||
local_cluster::{ClusterConfig, LocalCluster},
|
||||
validator_configs::*,
|
||||
},
|
||||
solana_replica_lib::accountsdb_repl_server::AccountsDbReplServiceConfig,
|
||||
solana_replica_node::{
|
||||
replica_node::{ReplicaNode, ReplicaNodeConfig},
|
||||
replica_util,
|
||||
@@ -132,11 +133,22 @@ fn setup_snapshot_validator_config(
|
||||
// Create the account paths
|
||||
let (account_storage_dirs, account_storage_paths) = generate_account_paths(num_account_paths);
|
||||
|
||||
let bind_ip_addr = IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1));
|
||||
let accountsdb_repl_port =
|
||||
solana_net_utils::find_available_port_in_range(bind_ip_addr, (1024, 65535)).unwrap();
|
||||
let replica_server_addr = SocketAddr::new(bind_ip_addr, accountsdb_repl_port);
|
||||
|
||||
let accountsdb_repl_service_config = Some(AccountsDbReplServiceConfig {
|
||||
worker_threads: 1,
|
||||
replica_server_addr,
|
||||
});
|
||||
|
||||
// Create the validator config
|
||||
let validator_config = ValidatorConfig {
|
||||
snapshot_config: Some(snapshot_config),
|
||||
account_paths: account_storage_paths,
|
||||
accounts_hash_interval_slots: snapshot_interval_slots,
|
||||
accountsdb_repl_service_config,
|
||||
..ValidatorConfig::default()
|
||||
};
|
||||
|
||||
@@ -259,7 +271,14 @@ fn test_replica_bootstrap() {
|
||||
info!("The cluster info:\n{:?}", cluster_info.contact_info_trace());
|
||||
|
||||
let config = ReplicaNodeConfig {
|
||||
rpc_source_addr: contact_info.rpc,
|
||||
rpc_peer_addr: contact_info.rpc,
|
||||
accountsdb_repl_peer_addr: Some(
|
||||
leader_snapshot_test_config
|
||||
.validator_config
|
||||
.accountsdb_repl_service_config
|
||||
.unwrap()
|
||||
.replica_server_addr,
|
||||
),
|
||||
rpc_addr,
|
||||
rpc_pubsub_addr,
|
||||
ledger_path: ledger_path.to_path_buf(),
|
||||
|
Reference in New Issue
Block a user