adds shred-version to ip-echo-server response (backport #18066) (#18113)

* adds shred-version to ip-echo-server response

When starting a validator, the node initially joins gossip with
shred_verison = 0, until it adopts the entrypoint's shred-version:
https://github.com/solana-labs/solana/blob/9b182f408/validator/src/main.rs#L417

Depending on the load on the entrypoint, this adopting entrypoint
shred-version through gossip sometimes becomes very slow, and causes
several problems in gossip because we have to partially support
shred_version == 0 which is a source of leaking crds values from one
cluster to another. e.g. see
https://github.com/solana-labs/solana/pull/17899
and the other linked issues there.

In order to remove shred_version == 0 from gossip, this commit adds
shred-version to ip-echo-server response. Once the entrypoints are
updated, on validator start-up, if --expected_shred_version is not
specified we will obtain shred-version from the entrypoint using
ip-echo-server.

(cherry picked from commit 598093b5db)

# Conflicts:
#	Cargo.lock
#	net-utils/Cargo.toml
#	programs/bpf/Cargo.lock

* removes backport merge conflicts

* obtains shred-version from entrypoint's ip-echo-server in validator-main

(cherry picked from commit 58e115275a)

Co-authored-by: behzad nouri <behzadnouri@gmail.com>
This commit is contained in:
mergify[bot]
2021-06-21 23:04:08 +00:00
committed by GitHub
parent 090c801cc6
commit 363b75619f
9 changed files with 160 additions and 46 deletions

View File

@@ -991,6 +991,28 @@ fn rpc_bootstrap(
}
}
fn get_cluster_shred_version(entrypoints: &[SocketAddr]) -> Option<u16> {
let entrypoints = {
let mut index: Vec<_> = (0..entrypoints.len()).collect();
index.shuffle(&mut rand::thread_rng());
index.into_iter().map(|i| &entrypoints[i])
};
for entrypoint in entrypoints {
match solana_net_utils::get_cluster_shred_version(entrypoint) {
Err(err) => eprintln!("get_cluster_shred_version failed: {}, {}", entrypoint, err),
Ok(0) => eprintln!("zero sherd-version from entrypoint: {}", entrypoint),
Ok(shred_version) => {
info!(
"obtained shred-version {} from {}",
shred_version, entrypoint
);
return Some(shred_version);
}
}
}
None
}
pub fn main() {
let default_dynamic_port_range =
&format!("{}-{}", VALIDATOR_PORT_RANGE.0, VALIDATOR_PORT_RANGE.1);
@@ -2133,6 +2155,25 @@ pub fn main() {
} else {
AccountShrinkThreshold::IndividalStore { shrink_ratio }
};
let entrypoint_addrs = values_t!(matches, "entrypoint", String)
.unwrap_or_default()
.into_iter()
.map(|entrypoint| {
solana_net_utils::parse_host_port(&entrypoint).unwrap_or_else(|e| {
eprintln!("failed to parse entrypoint address: {}", e);
exit(1);
})
})
.collect::<HashSet<_>>()
.into_iter()
.collect::<Vec<_>>();
// TODO: Once entrypoints are updated to return shred-version, this should
// abort if it fails to obtain a shred-version, so that nodes always join
// gossip with a valid shred-version. The code to adopt entrypoint shred
// version can then be deleted from gossip and get_rpc_node above.
let expected_shred_version = value_t!(matches, "expected_shred_version", u16)
.ok()
.or_else(|| get_cluster_shred_version(&entrypoint_addrs));
let mut validator_config = ValidatorConfig {
require_tower: matches.is_present("require_tower"),
@@ -2145,7 +2186,7 @@ pub fn main() {
expected_bank_hash: matches
.value_of("expected_bank_hash")
.map(|s| Hash::from_str(s).unwrap()),
expected_shred_version: value_t!(matches, "expected_shred_version", u16).ok(),
expected_shred_version,
new_hard_forks: hardforks_of(&matches, "hard_forks"),
rpc_config: JsonRpcConfig {
enable_rpc_transaction_history: matches.is_present("enable_rpc_transaction_history"),
@@ -2396,19 +2437,6 @@ pub fn main() {
validator_config.halt_on_trusted_validators_accounts_hash_mismatch = true;
}
let entrypoint_addrs = values_t!(matches, "entrypoint", String)
.unwrap_or_default()
.into_iter()
.map(|entrypoint| {
solana_net_utils::parse_host_port(&entrypoint).unwrap_or_else(|e| {
eprintln!("failed to parse entrypoint address: {}", e);
exit(1);
})
})
.collect::<HashSet<_>>()
.into_iter()
.collect::<Vec<_>>();
let public_rpc_addr = matches.value_of("public_rpc_addr").map(|addr| {
solana_net_utils::parse_host_port(addr).unwrap_or_else(|e| {
eprintln!("failed to parse public rpc address: {}", e);