Compare commits

...

16 Commits

Author SHA1 Message Date
ff9bd2f512 Fix the output from Gossip Discovery (#4070) 2019-04-29 14:59:01 -07:00
25810ce729 Remove Bench Exchange Contract Execution graph 2019-04-29 14:28:09 -07:00
82c7f0e366 testnet-demo: use more low quota nodes 2019-04-29 12:08:39 -07:00
012d05f10b Increase testnet-demo node count a little 2019-04-29 09:10:18 -07:00
f853595efb testnet-demo now runs across more GCE zones (#4053)
* testnet-demo now runs across more GCE zones

* Save zone info to config file

* Add geoip whitelist for common data centers

* Skip more of start

* Include -x for config

* Fetch private key from first validator node if necessary

* Correct -r propagation
2019-04-28 19:50:02 -07:00
09e4f7e49c Correctly terminate instances across multiple zones 2019-04-28 09:09:34 -07:00
cb37072ed7 Switch testnet-demo to influxcloud 2019-04-27 22:12:30 -07:00
0b109d3340 Correct us-central1-b zone name 2019-04-27 21:43:31 -07:00
dcdc5b8cf7 testnet-demo: skip over validator nodes that fail to boot 2019-04-27 21:34:02 -07:00
1a7c30bb86 Use GPU nodes for blockstreamer as well if rest of testnet has GPUs (#4046) (#4048)
automerge
2019-04-27 21:31:01 -07:00
3ebc14f965 Blockstreamer annotation fix for non buildkite deployments (#4045) (#4047)
automerge
2019-04-27 21:01:26 -07:00
cf589efbbf Performance metrics computation methodology (#4041) (#4044)
automerge
2019-04-27 16:59:45 -07:00
94d5c64281 testnet-demo: add more GCE zones, remove client 2019-04-27 16:53:05 -07:00
566de1fd0e Add DNS resolution for network/drone arguments (#4037)
automerge
2019-04-27 10:00:41 -07:00
cb0f367084 Avoid inaccurate PATH nagging 2019-04-27 15:32:23 +00:00
e08e1fe6ac Add " 2019-04-27 07:41:55 -07:00
14 changed files with 314 additions and 365 deletions

View File

@ -1,10 +1,10 @@
use std::net::SocketAddr;
use std::time::Duration;
use clap::{crate_description, crate_name, crate_version, value_t, App, Arg, ArgMatches}; use clap::{crate_description, crate_name, crate_version, value_t, App, Arg, ArgMatches};
use solana::gen_keys::GenKeys; use solana::gen_keys::GenKeys;
use solana_drone::drone::DRONE_PORT; use solana_drone::drone::DRONE_PORT;
use solana_sdk::signature::{read_keypair, Keypair, KeypairUtil}; use solana_sdk::signature::{read_keypair, Keypair, KeypairUtil};
use std::net::SocketAddr;
use std::process::exit;
use std::time::Duration;
pub struct Config { pub struct Config {
pub network_addr: SocketAddr, pub network_addr: SocketAddr,
@ -146,16 +146,17 @@ pub fn build_args<'a, 'b>() -> App<'a, 'b> {
pub fn extract_args<'a>(matches: &ArgMatches<'a>) -> Config { pub fn extract_args<'a>(matches: &ArgMatches<'a>) -> Config {
let mut args = Config::default(); let mut args = Config::default();
args.network_addr = matches args.network_addr = solana_netutil::parse_host_port(matches.value_of("network").unwrap())
.value_of("network") .unwrap_or_else(|e| {
.unwrap() eprintln!("failed to parse network address: {}", e);
.parse() exit(1)
.expect("Failed to parse network"); });
args.drone_addr = matches
.value_of("drone") args.drone_addr = solana_netutil::parse_host_port(matches.value_of("drone").unwrap())
.unwrap() .unwrap_or_else(|e| {
.parse() eprintln!("failed to parse drone address: {}", e);
.expect("Failed to parse drone address"); exit(1)
});
if matches.is_present("identity") { if matches.is_present("identity") {
args.identity = read_keypair(matches.value_of("identity").unwrap()) args.identity = read_keypair(matches.value_of("identity").unwrap())

View File

@ -20,6 +20,7 @@
- [Ledger Replication](ledger-replication.md) - [Ledger Replication](ledger-replication.md)
- [Secure Vote Signing](vote-signing.md) - [Secure Vote Signing](vote-signing.md)
- [Staking Delegation and Rewards](stake-delegation-and-rewards.md) - [Staking Delegation and Rewards](stake-delegation-and-rewards.md)
- [Performance Metrics](performance-metrics.md)
- [Anatomy of a Fullnode](fullnode.md) - [Anatomy of a Fullnode](fullnode.md)
- [TPU](tpu.md) - [TPU](tpu.md)

View File

@ -0,0 +1,29 @@
# Performance Metrics
Solana cluster performance is measured as average number of transactions per second
that the network can sustain (TPS). And, how long it takes for a transaction to be
confirmed by super majority of the cluster (Confirmation Time).
Each cluster node maintains various counters that are incremented on certain events.
These counters are periodically uploaded to a cloud based database. Solana's metrics
dashboard fetches these counters, and computes the performance metrics and displays
it on the dashboard.
## TPS
The leader node's banking stage maintains a count of transactions that it processed.
The dashboard displays the count averaged over 2 second period in the TPS time series
graph. The dashboard also shows per second mean, maximum and total TPS as a running
counter.
## Confirmation Time
Each validator node maintains a list of active ledger forks that are visible to the node.
A fork is considered to be frozen when the node has received and processed all entries
corresponding to the fork. A fork is considered to be confirmed when it receives cumulative
super majority vote, and when one of its children forks is frozen.
The node assigns a timestamp to every new fork, and computes the time it took to confirm
the fork. This time is reflected as validator confirmation time in performance metrics.
The performance dashboard displays the average of each validator node's confirmation time
as a time series graph.

View File

@ -11,9 +11,11 @@ clientNodeCount=0
additionalFullNodeCount=10 additionalFullNodeCount=10
publicNetwork=false publicNetwork=false
stopNetwork=false stopNetwork=false
skipSetup=false reuseLedger=false
skipCreate=false
skipStart=false skipStart=false
externalNode=false externalNode=false
failOnValidatorBootupFailure=true
tarChannelOrTag=edge tarChannelOrTag=edge
delete=false delete=false
enableGpu=false enableGpu=false
@ -55,8 +57,10 @@ Deploys a CD testnet
-r - Reuse existing node/ledger configuration from a -r - Reuse existing node/ledger configuration from a
previous |start| (ie, don't run ./multinode-demo/setup.sh). previous |start| (ie, don't run ./multinode-demo/setup.sh).
-x - External node. Default: false -x - External node. Default: false
-e - Skip create. Assume the nodes have already been created
-s - Skip start. Nodes will still be created or configured, but network software will not be started. -s - Skip start. Nodes will still be created or configured, but network software will not be started.
-S - Stop network software without tearing down nodes. -S - Stop network software without tearing down nodes.
-f - Discard validator nodes that didn't bootup successfully
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
metrics metrics
@ -66,7 +70,7 @@ EOF
zone=() zone=()
while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:S" opt; do while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:Sfe" opt; do
case $opt in case $opt in
h | \?) h | \?)
usage usage
@ -119,7 +123,10 @@ while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:S" opt; do
delete=true delete=true
;; ;;
r) r)
skipSetup=true reuseLedger=true
;;
e)
skipCreate=true
;; ;;
s) s)
skipStart=true skipStart=true
@ -127,6 +134,9 @@ while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:S" opt; do
x) x)
externalNode=true externalNode=true
;; ;;
f)
failOnValidatorBootupFailure=false
;;
u) u)
blockstreamer=true blockstreamer=true
;; ;;
@ -170,15 +180,15 @@ for val in "${zone[@]}"; do
done done
if $stopNetwork; then if $stopNetwork; then
skipSetup=true skipCreate=true
fi fi
if $delete; then if $delete; then
skipSetup=false skipCreate=false
fi fi
# Create the network # Create the network
if ! $skipSetup; then if ! $skipCreate; then
echo "--- $cloudProvider.sh delete" echo "--- $cloudProvider.sh delete"
# shellcheck disable=SC2068 # shellcheck disable=SC2068
time net/"$cloudProvider".sh delete ${zone_args[@]} -p "$netName" ${externalNode:+-x} time net/"$cloudProvider".sh delete ${zone_args[@]} -p "$netName" ${externalNode:+-x}
@ -224,6 +234,10 @@ if ! $skipSetup; then
create_args+=(-x) create_args+=(-x)
fi fi
if ! $failOnValidatorBootupFailure; then
create_args+=(-f)
fi
time net/"$cloudProvider".sh create "${create_args[@]}" time net/"$cloudProvider".sh create "${create_args[@]}"
else else
echo "--- $cloudProvider.sh config" echo "--- $cloudProvider.sh config"
@ -236,6 +250,14 @@ else
config_args+=(-P) config_args+=(-P)
fi fi
if $externalNode; then
config_args+=(-x)
fi
if ! $failOnValidatorBootupFailure; then
config_args+=(-f)
fi
time net/"$cloudProvider".sh config "${config_args[@]}" time net/"$cloudProvider".sh config "${config_args[@]}"
fi fi
net/init-metrics.sh -e net/init-metrics.sh -e
@ -249,35 +271,35 @@ if $stopNetwork; then
exit 0 exit 0
fi fi
echo --- net.sh start
maybeRejectExtraNodes=
if ! $publicNetwork; then
maybeRejectExtraNodes="-o rejectExtraNodes"
fi
maybeNoValidatorSanity=
if [[ -n $NO_VALIDATOR_SANITY ]]; then
maybeNoValidatorSanity="-o noValidatorSanity"
fi
maybeNoLedgerVerify=
if [[ -n $NO_LEDGER_VERIFY ]]; then
maybeNoLedgerVerify="-o noLedgerVerify"
fi
maybeSkipSetup=
if $skipSetup; then
maybeSkipSetup="-r"
fi
ok=true ok=true
if ! $skipStart; then if ! $skipStart; then
( (
if $skipSetup; then if $skipCreate; then
# TODO: Enable rolling updates # TODO: Enable rolling updates
#op=update #op=update
op=restart op=restart
else else
op=start op=start
fi fi
echo "--- net.sh $op"
maybeRejectExtraNodes=
if ! $publicNetwork; then
maybeRejectExtraNodes="-o rejectExtraNodes"
fi
maybeNoValidatorSanity=
if [[ -n $NO_VALIDATOR_SANITY ]]; then
maybeNoValidatorSanity="-o noValidatorSanity"
fi
maybeNoLedgerVerify=
if [[ -n $NO_LEDGER_VERIFY ]]; then
maybeNoLedgerVerify="-o noLedgerVerify"
fi
maybeReuseLedger=
if $reuseLedger; then
maybeReuseLedger="-r"
fi
maybeUpdateManifestKeypairFile= maybeUpdateManifestKeypairFile=
# shellcheck disable=SC2154 # SOLANA_INSTALL_UPDATE_MANIFEST_KEYPAIR_x86_64_unknown_linux_gnu comes from .buildkite/env/ # shellcheck disable=SC2154 # SOLANA_INSTALL_UPDATE_MANIFEST_KEYPAIR_x86_64_unknown_linux_gnu comes from .buildkite/env/
@ -289,7 +311,7 @@ if ! $skipStart; then
# shellcheck disable=SC2086 # Don't want to double quote the $maybeXYZ variables # shellcheck disable=SC2086 # Don't want to double quote the $maybeXYZ variables
time net/net.sh $op -t "$tarChannelOrTag" \ time net/net.sh $op -t "$tarChannelOrTag" \
$maybeUpdateManifestKeypairFile \ $maybeUpdateManifestKeypairFile \
$maybeSkipSetup \ $maybeReuseLedger \
$maybeRejectExtraNodes \ $maybeRejectExtraNodes \
$maybeNoValidatorSanity \ $maybeNoValidatorSanity \
$maybeNoLedgerVerify $maybeNoLedgerVerify

View File

@ -52,7 +52,7 @@ steps:
value: "create-and-start" value: "create-and-start"
- label: "Create testnet, but do not start software. If the testnet already exists it will be deleted and re-created" - label: "Create testnet, but do not start software. If the testnet already exists it will be deleted and re-created"
value: "create" value: "create"
- label: "Start network software on an existing testnet. If software is already running it will be restarted." - label: "Start network software on an existing testnet. If software is already running it will be restarted"
value: "start" value: "start"
- label: "Stop network software without deleting testnet nodes" - label: "Stop network software without deleting testnet nodes"
value: "stop" value: "stop"
@ -62,11 +62,11 @@ steps:
value: "sanity-or-restart" value: "sanity-or-restart"
- label: "Sanity check only" - label: "Sanity check only"
value: "sanity" value: "sanity"
- label: "Delete the testnet. - label: "Delete the testnet"
value: "delete" value: "delete"
- label: "Enable/unlock the testnet." - label: "Enable/unlock the testnet"
value: "enable" value: "enable"
- label: "Delete and then lock the testnet from further operation until it is re-enabled." - label: "Delete and then lock the testnet from further operation until it is re-enabled"
value: "disable" value: "disable"
- command: "ci/$(basename "$0")" - command: "ci/$(basename "$0")"
agents: agents:
@ -81,7 +81,40 @@ eval "$(ci/channel-info.sh)"
EC2_ZONES=(us-west-1a sa-east-1a ap-northeast-2a eu-central-1a ca-central-1a) EC2_ZONES=(us-west-1a sa-east-1a ap-northeast-2a eu-central-1a ca-central-1a)
GCE_ZONES=(us-west1-b asia-east2-a europe-west4-a southamerica-east1-b us-east4-c)
# GCE zones with _lots_ of quota
GCE_ZONES=(
us-west1-a
us-central1-a
us-east1-b
europe-west4-a
us-west1-b
us-central1-b
us-east1-c
europe-west4-b
us-west1-c
us-east1-d
europe-west4-c
)
# GCE zones with enough quota for one CPU-only fullnode
GCE_LOW_QUOTA_ZONES=(
asia-east2-a
asia-northeast1-b
asia-northeast2-b
asia-south1-c
asia-southeast1-b
australia-southeast1-b
europe-north1-a
europe-west2-b
europe-west3-c
europe-west6-a
northamerica-northeast1-a
southamerica-east1-b
)
case $TESTNET in case $TESTNET in
testnet-edge|testnet-edge-perf) testnet-edge|testnet-edge-perf)
CHANNEL_OR_TAG=edge CHANNEL_OR_TAG=edge
@ -107,7 +140,9 @@ testnet-perf)
testnet-demo) testnet-demo)
CHANNEL_OR_TAG=beta CHANNEL_OR_TAG=beta
CHANNEL_BRANCH=$BETA_CHANNEL CHANNEL_BRANCH=$BETA_CHANNEL
: "${GCE_NODE_COUNT:=200}" : "${GCE_NODE_COUNT:=150}"
: "${GCE_LOW_QUOTA_NODE_COUNT:=70}"
: "${TESTNET_DB_HOST:=https://clocktower-f1d56615.influxcloud.net:8086}"
;; ;;
*) *)
echo "Error: Invalid TESTNET=$TESTNET" echo "Error: Invalid TESTNET=$TESTNET"
@ -123,6 +158,10 @@ GCE_ZONE_ARGS=()
for val in "${GCE_ZONES[@]}"; do for val in "${GCE_ZONES[@]}"; do
GCE_ZONE_ARGS+=("-z $val") GCE_ZONE_ARGS+=("-z $val")
done done
GCE_LOW_QUOTA_ZONE_ARGS=()
for val in "${GCE_LOW_QUOTA_ZONES[@]}"; do
GCE_LOW_QUOTA_ZONE_ARGS+=("-z $val")
done
if [[ -n $TESTNET_DB_HOST ]]; then if [[ -n $TESTNET_DB_HOST ]]; then
SOLANA_METRICS_PARTIAL_CONFIG="host=$TESTNET_DB_HOST,$SOLANA_METRICS_PARTIAL_CONFIG" SOLANA_METRICS_PARTIAL_CONFIG="host=$TESTNET_DB_HOST,$SOLANA_METRICS_PARTIAL_CONFIG"
@ -151,6 +190,7 @@ steps:
TESTNET_DB_HOST: "$TESTNET_DB_HOST" TESTNET_DB_HOST: "$TESTNET_DB_HOST"
EC2_NODE_COUNT: "$EC2_NODE_COUNT" EC2_NODE_COUNT: "$EC2_NODE_COUNT"
GCE_NODE_COUNT: "$GCE_NODE_COUNT" GCE_NODE_COUNT: "$GCE_NODE_COUNT"
GCE_LOW_QUOTA_NODE_COUNT: "$GCE_LOW_QUOTA_NODE_COUNT"
EOF EOF
) | buildkite-agent pipeline upload ) | buildkite-agent pipeline upload
exit 0 exit 0
@ -227,7 +267,7 @@ sanity() {
ok=true ok=true
if [[ -n $GCE_NODE_COUNT ]]; then if [[ -n $GCE_NODE_COUNT ]]; then
NO_LEDGER_VERIFY=1 \ NO_LEDGER_VERIFY=1 \
ci/testnet-sanity.sh demo-testnet-solana-com gce "${GCE_ZONES[0]}" || ok=false ci/testnet-sanity.sh demo-testnet-solana-com gce "${GCE_ZONES[0]}" -f || ok=false
else else
echo "Error: no GCE nodes" echo "Error: no GCE nodes"
ok=false ok=false
@ -270,7 +310,7 @@ deploy() {
set -x set -x
ci/testnet-deploy.sh -p edge-testnet-solana-com -C ec2 -z us-west-1a \ ci/testnet-deploy.sh -p edge-testnet-solana-com -C ec2 -z us-west-1a \
-t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0ccd4f2239886fa94 \ -t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0ccd4f2239886fa94 \
${skipCreate:+-r} \ ${skipCreate:+-e} \
${skipStart:+-s} \ ${skipStart:+-s} \
${maybeStop:+-S} \ ${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
@ -285,7 +325,7 @@ deploy() {
ci/testnet-deploy.sh -p edge-perf-testnet-solana-com -C ec2 -z us-west-2b \ ci/testnet-deploy.sh -p edge-perf-testnet-solana-com -C ec2 -z us-west-2b \
-g -t "$CHANNEL_OR_TAG" -c 2 \ -g -t "$CHANNEL_OR_TAG" -c 2 \
-b \ -b \
${skipCreate:+-r} \ ${skipCreate:+-e} \
${skipStart:+-s} \ ${skipStart:+-s} \
${maybeStop:+-S} \ ${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
@ -298,7 +338,7 @@ deploy() {
ci/testnet-deploy.sh -p beta-testnet-solana-com -C ec2 -z us-west-1a \ ci/testnet-deploy.sh -p beta-testnet-solana-com -C ec2 -z us-west-1a \
-t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0f286cf8a0771ce35 \ -t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0f286cf8a0771ce35 \
-b \ -b \
${skipCreate:+-r} \ ${skipCreate:+-e} \
${skipStart:+-s} \ ${skipStart:+-s} \
${maybeStop:+-S} \ ${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
@ -313,7 +353,7 @@ deploy() {
ci/testnet-deploy.sh -p beta-perf-testnet-solana-com -C ec2 -z us-west-2b \ ci/testnet-deploy.sh -p beta-perf-testnet-solana-com -C ec2 -z us-west-2b \
-g -t "$CHANNEL_OR_TAG" -c 2 \ -g -t "$CHANNEL_OR_TAG" -c 2 \
-b \ -b \
${skipCreate:+-r} \ ${skipCreate:+-e} \
${skipStart:+-s} \ ${skipStart:+-s} \
${maybeStop:+-S} \ ${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
@ -330,7 +370,7 @@ deploy() {
# shellcheck disable=SC2068 # shellcheck disable=SC2068
ci/testnet-deploy.sh -p testnet-solana-com -C ec2 ${EC2_ZONE_ARGS[@]} \ ci/testnet-deploy.sh -p testnet-solana-com -C ec2 ${EC2_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$EC2_NODE_COUNT" -c 0 -u -P -a eipalloc-0fa502bf95f6f18b2 \ -t "$CHANNEL_OR_TAG" -n "$EC2_NODE_COUNT" -c 0 -u -P -a eipalloc-0fa502bf95f6f18b2 \
${skipCreate:+-r} \ ${skipCreate:+-e} \
${maybeSkipStart:+-s} \ ${maybeSkipStart:+-s} \
${maybeStop:+-S} \ ${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
@ -339,11 +379,11 @@ deploy() {
# shellcheck disable=SC2068 # shellcheck disable=SC2068
ci/testnet-deploy.sh -p testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \ ci/testnet-deploy.sh -p testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P \ -t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P \
${skipCreate:+-r} \ ${skipCreate:+-e} \
${skipStart:+-s} \ ${skipStart:+-s} \
${maybeStop:+-S} \ ${maybeStop:+-S} \
${maybeDelete:+-D} \ ${maybeDelete:+-D} \
${EC2_NODE_COUNT:+-x} -x
fi fi
) )
;; ;;
@ -358,7 +398,7 @@ deploy() {
-t "$CHANNEL_OR_TAG" -c 2 \ -t "$CHANNEL_OR_TAG" -c 2 \
-b \ -b \
-d pd-ssd \ -d pd-ssd \
${skipCreate:+-r} \ ${skipCreate:+-e} \
${skipStart:+-s} \ ${skipStart:+-s} \
${maybeStop:+-S} \ ${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
@ -367,12 +407,25 @@ deploy() {
testnet-demo) testnet-demo)
( (
set -x set -x
if [[ -n $GCE_NODE_COUNT ]]; then
if [[ -n $GCE_LOW_QUOTA_NODE_COUNT ]] || [[ -n $skipStart ]]; then
maybeSkipStart="skip"
fi
# shellcheck disable=SC2068
ci/testnet-deploy.sh -p demo-testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P -u -f \
-a demo-testnet-solana-com \
${skipCreate:+-e} \
${maybeSkipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
if [[ -n $GCE_LOW_QUOTA_NODE_COUNT ]]; then
# shellcheck disable=SC2068 # shellcheck disable=SC2068
ci/testnet-deploy.sh -p testnet-demo -C gce ${GCE_ZONE_ARGS[@]} \ ci/testnet-deploy.sh -p demo-testnet-solana-com2 -C gce ${GCE_LOW_QUOTA_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 1 -P -u \ -t "$CHANNEL_OR_TAG" -n "$GCE_LOW_QUOTA_NODE_COUNT" -c 0 -P -f -x \
-a demo-testnet-solana-com \ ${skipCreate:+-e} \
${skipCreate:+-r} \
${skipStart:+-s} \ ${skipStart:+-s} \
${maybeStop:+-S} \ ${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}

View File

@ -249,7 +249,7 @@ impl ClusterInfo {
let nodes: Vec<_> = self let nodes: Vec<_> = self
.all_peers() .all_peers()
.into_iter() .into_iter()
.map(|node| { .map(|(node, last_updated)| {
if !ContactInfo::is_valid_address(&node.gossip) { if !ContactInfo::is_valid_address(&node.gossip) {
spy_nodes += 1; spy_nodes += 1;
} }
@ -266,7 +266,7 @@ impl ClusterInfo {
tpu: {:20} | |\n \ tpu: {:20} | |\n \
rpc: {:20} | |\n", rpc: {:20} | |\n",
addr_to_string(&node.gossip), addr_to_string(&node.gossip),
now.saturating_sub(node.wallclock), now.saturating_sub(last_updated),
node.id, node.id,
if node.id == my_id { "(me)" } else { "" }.to_string(), if node.id == my_id { "(me)" } else { "" }.to_string(),
addr_to_string(&node.tpu), addr_to_string(&node.tpu),
@ -347,14 +347,17 @@ impl ClusterInfo {
.collect() .collect()
} }
// All nodes in gossip, including spy nodes // All nodes in gossip (including spy nodes) and the last time we heard about them
pub(crate) fn all_peers(&self) -> Vec<ContactInfo> { pub(crate) fn all_peers(&self) -> Vec<(ContactInfo, u64)> {
self.gossip self.gossip
.crds .crds
.table .table
.values() .values()
.filter_map(|x| x.value.contact_info()) .filter_map(|x| {
.cloned() x.value
.contact_info()
.map(|ci| (ci.clone(), x.local_timestamp))
})
.collect() .collect()
} }

View File

@ -294,7 +294,7 @@ impl RpcSol for RpcSolImpl {
Ok(cluster_info Ok(cluster_info
.all_peers() .all_peers()
.iter() .iter()
.filter_map(|contact_info| { .filter_map(|(contact_info, _)| {
if ContactInfo::is_valid_address(&contact_info.gossip) { if ContactInfo::is_valid_address(&contact_info.gossip) {
Some(RpcContactInfo { Some(RpcContactInfo {
id: contact_info.id.to_string(), id: contact_info.id.to_string(),

View File

@ -250,7 +250,10 @@ fn get_update_manifest(
fn check_env_path_for_bin_dir(config: &Config) { fn check_env_path_for_bin_dir(config: &Config) {
use std::env; use std::env;
let bin_dir = config.active_release_bin_dir(); let bin_dir = config
.active_release_bin_dir()
.canonicalize()
.unwrap_or_default();
let found = match env::var_os("PATH") { let found = match env::var_os("PATH") {
Some(paths) => env::split_paths(&paths).any(|path| { Some(paths) => env::split_paths(&paths).any(|path| {
if let Ok(path) = path.canonicalize() { if let Ok(path) = path.canonicalize() {

View File

@ -4410,268 +4410,6 @@
"alignLevel": null "alignLevel": null
} }
}, },
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"h": 5,
"w": 8,
"x": 16,
"y": 49
},
"id": 56,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"do_account_request\" FROM \"$testnet\".\"autogen\".\"counter-exchange-do-account-request\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") / 3 AS \"transfer_request\" FROM \"$testnet\".\"autogen\".\"counter-exchange-transfer-request\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time(3s)",
"rawQuery": true,
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") / 3 AS \"trade_request\" FROM \"$testnet\".\"autogen\".\"counter-exchange-trade-request\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time(3s)",
"rawQuery": true,
"refId": "C",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") / 3 AS \"trade_cancellation\" FROM \"$testnet\".\"autogen\".\"counter-exchange-trade-cancellation\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time(3s)",
"rawQuery": true,
"refId": "D",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") / 3 AS \"swap_request\" FROM \"$testnet\".\"autogen\".\"counter-exchange-swap-request\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time(3s)",
"rawQuery": true,
"refId": "E",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Bench Exchange Contract Execution",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{ {
"collapsed": false, "collapsed": false,
"gridPos": { "gridPos": {

View File

@ -19,6 +19,7 @@ mkdir -p "$netConfigDir" "$netLogDir"
source "$(dirname "${BASH_SOURCE[0]}")"/../scripts/configure-metrics.sh source "$(dirname "${BASH_SOURCE[0]}")"/../scripts/configure-metrics.sh
configFile="$netConfigDir/config" configFile="$netConfigDir/config"
geoipConfigFile="$netConfigDir/geoip.yml"
entrypointIp= entrypointIp=
publicNetwork= publicNetwork=
@ -28,10 +29,13 @@ externalNodeSshKey=
sshOptions=() sshOptions=()
fullnodeIpList=() fullnodeIpList=()
fullnodeIpListPrivate=() fullnodeIpListPrivate=()
fullnodeIpListZone=()
clientIpList=() clientIpList=()
clientIpListPrivate=() clientIpListPrivate=()
clientIpListZone=()
blockstreamerIpList=() blockstreamerIpList=()
blockstreamerIpListPrivate=() blockstreamerIpListPrivate=()
blockstreamerIpListZone=()
leaderRotation= leaderRotation=
buildSshOptions() { buildSshOptions() {

View File

@ -163,11 +163,13 @@ while getopts "h?p:Pn:c:z:gG:a:d:buxf" opt; do
enableGpu=true enableGpu=true
bootstrapLeaderMachineType=$gpuBootstrapLeaderMachineType bootstrapLeaderMachineType=$gpuBootstrapLeaderMachineType
fullNodeMachineType=$bootstrapLeaderMachineType fullNodeMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
;; ;;
G) G)
enableGpu=true enableGpu=true
bootstrapLeaderMachineType="$OPTARG" bootstrapLeaderMachineType="$OPTARG"
fullNodeMachineType=$bootstrapLeaderMachineType fullNodeMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
;; ;;
a) a)
customAddress=$OPTARG customAddress=$OPTARG
@ -225,6 +227,7 @@ esac
# name - name of the instance # name - name of the instance
# publicIp - The public IP address of this instance # publicIp - The public IP address of this instance
# privateIp - The private IP address of this instance # privateIp - The private IP address of this instance
# zone - Zone of this instance
# count - Monotonically increasing count for each # count - Monotonically increasing count for each
# invocation of cmd, starting at 1 # invocation of cmd, starting at 1
# ... - Extra args to cmd.. # ... - Extra args to cmd..
@ -240,11 +243,70 @@ cloud_ForEachInstance() {
declare name publicIp privateIp declare name publicIp privateIp
IFS=: read -r name publicIp privateIp zone < <(echo "$info") IFS=: read -r name publicIp privateIp zone < <(echo "$info")
eval "$cmd" "$name" "$publicIp" "$privateIp" "$count" "$@" eval "$cmd" "$name" "$publicIp" "$privateIp" "$zone" "$count" "$@"
count=$((count + 1)) count=$((count + 1))
done done
} }
# Given a cloud provider zone, return an approximate lat,long location for the
# data center. Normal geoip lookups for cloud provider IP addresses are
# sometimes widely inaccurate.
zoneLocation() {
declare zone="$1"
case "$zone" in
us-west1*)
echo "[45.5946, -121.1787]"
;;
us-central1*)
echo "[41.2619, -95.8608]"
;;
us-east1*)
echo "[33.1960, -80.0131]"
;;
asia-east2*)
echo "[22.3193, 114.1694]"
;;
asia-northeast1*)
echo "[35.6762, 139.6503]"
;;
asia-northeast2*)
echo "[34.6937, 135.5023]"
;;
asia-south1*)
echo "[19.0760, 72.8777]"
;;
asia-southeast1*)
echo "[1.3404, 103.7090]"
;;
australia-southeast1*)
echo "[-33.8688, 151.2093]"
;;
europe-north1*)
echo "[60.5693, 27.1878]"
;;
europe-west2*)
echo "[51.5074, -0.1278]"
;;
europe-west3*)
echo "[50.1109, 8.6821]"
;;
europe-west4*)
echo "[53.4386, 6.8355]"
;;
europe-west6*)
echo "[47.3769, 8.5417]"
;;
northamerica-northeast1*)
echo "[45.5017, -73.5673]"
;;
southamerica-east1*)
echo "[-23.5505, -46.6333]"
;;
*)
;;
esac
}
prepareInstancesAndWriteConfigFile() { prepareInstancesAndWriteConfigFile() {
$metricsWriteDatapoint "testnet-deploy net-config-begin=1" $metricsWriteDatapoint "testnet-deploy net-config-begin=1"
@ -252,6 +314,7 @@ prepareInstancesAndWriteConfigFile() {
echo "Appending to existing config file" echo "Appending to existing config file"
echo "externalNodeSshKey=$sshPrivateKey" >> "$configFile" echo "externalNodeSshKey=$sshPrivateKey" >> "$configFile"
else else
rm -f "$geoipConfigFile"
cat >> "$configFile" <<EOF cat >> "$configFile" <<EOF
# autogenerated at $(date) # autogenerated at $(date)
netBasename=$prefix netBasename=$prefix
@ -260,6 +323,7 @@ sshPrivateKey=$sshPrivateKey
leaderRotation=$leaderRotation leaderRotation=$leaderRotation
EOF EOF
fi fi
touch "$geoipConfigFile"
buildSshOptions buildSshOptions
@ -267,11 +331,13 @@ EOF
declare name="$1" declare name="$1"
declare publicIp="$2" declare publicIp="$2"
declare privateIp="$3" declare privateIp="$3"
declare zone="$4"
#declare index="$5"
declare failOnFailure="$5" declare failOnFailure="$6"
declare arrayName="$6" declare arrayName="$7"
# This check should eventually be moved to cloud provider specific script # This check should eventually be moved to cloud provider specific script
if [ "$publicIp" = "TERMINATED" ] || [ "$privateIp" = "TERMINATED" ]; then if [ "$publicIp" = "TERMINATED" ] || [ "$privateIp" = "TERMINATED" ]; then
if $failOnFailure; then if $failOnFailure; then
exit 1 exit 1
@ -284,7 +350,7 @@ EOF
echo "Waiting for $name to finish booting..." echo "Waiting for $name to finish booting..."
( (
set -x +e set -x +e
for i in $(seq 1 20); do for i in $(seq 1 30); do
timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete" timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete"
ret=$? ret=$?
if [[ $ret -eq 0 ]]; then if [[ $ret -eq 0 ]]; then
@ -303,21 +369,21 @@ EOF
exit 1 exit 1
fi fi
else else
echo "$arrayName+=($publicIp) # $name" >> "$configFile" {
echo "${arrayName}Private+=($privateIp) # $name" >> "$configFile" echo "$arrayName+=($publicIp) # $name"
echo "${arrayName}Private+=($privateIp) # $name"
echo "${arrayName}Zone+=($zone) # $name"
} >> "$configFile"
declare latlng=
latlng=$(zoneLocation "$zone")
if [[ -n $latlng ]]; then
echo "$publicIp: $latlng" >> "$geoipConfigFile"
fi
fi fi
} }
if $externalNodes; then fetchPrivateKey() {
echo "Bootstrap leader is already configured"
else
echo "Looking for bootstrap leader instance..."
cloud_FindInstance "$prefix-bootstrap-leader"
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to find bootstrap leader"
exit 1
}
( (
declare nodeName declare nodeName
declare nodeIp declare nodeIp
@ -336,7 +402,9 @@ EOF
set -x -o pipefail set -x -o pipefail
for i in $(seq 1 30); do for i in $(seq 1 30); do
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then
break if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone"; then
break
fi
fi fi
sleep 1 sleep 1
@ -348,6 +416,20 @@ EOF
fi fi
) )
}
if $externalNodes; then
echo "Bootstrap leader is already configured"
else
echo "Looking for bootstrap leader instance..."
cloud_FindInstance "$prefix-bootstrap-leader"
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to find bootstrap leader"
exit 1
}
fetchPrivateKey
echo "fullnodeIpList=()" >> "$configFile" echo "fullnodeIpList=()" >> "$configFile"
echo "fullnodeIpListPrivate=()" >> "$configFile" echo "fullnodeIpListPrivate=()" >> "$configFile"
cloud_ForEachInstance recordInstanceIp true fullnodeIpList cloud_ForEachInstance recordInstanceIp true fullnodeIpList
@ -361,6 +443,8 @@ EOF
echo "Unable to find additional fullnodes" echo "Unable to find additional fullnodes"
exit 1 exit 1
} }
fetchPrivateKey
cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" fullnodeIpList cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" fullnodeIpList
done done
fi fi
@ -586,29 +670,33 @@ info)
declare nodeType=$1 declare nodeType=$1
declare ip=$2 declare ip=$2
declare ipPrivate=$3 declare ipPrivate=$3
printf " %-16s | %-15s | %-15s\n" "$nodeType" "$ip" "$ipPrivate" declare zone=$4
printf " %-16s | %-15s | %-15s | %s\n" "$nodeType" "$ip" "$ipPrivate" "$zone"
} }
printNode "Node Type" "Public IP" "Private IP" printNode "Node Type" "Public IP" "Private IP" "Zone"
echo "-------------------+-----------------+-----------------" echo "-------------------+-----------------+-----------------+--------------"
nodeType=bootstrap-leader nodeType=bootstrap-leader
for i in $(seq 0 $(( ${#fullnodeIpList[@]} - 1)) ); do for i in $(seq 0 $(( ${#fullnodeIpList[@]} - 1)) ); do
ipAddress=${fullnodeIpList[$i]} ipAddress=${fullnodeIpList[$i]}
ipAddressPrivate=${fullnodeIpListPrivate[$i]} ipAddressPrivate=${fullnodeIpListPrivate[$i]}
printNode $nodeType "$ipAddress" "$ipAddressPrivate" zone=${fullnodeIpListZone[$i]}
printNode $nodeType "$ipAddress" "$ipAddressPrivate" "$zone"
nodeType=fullnode nodeType=fullnode
done done
for i in $(seq 0 $(( ${#clientIpList[@]} - 1)) ); do for i in $(seq 0 $(( ${#clientIpList[@]} - 1)) ); do
ipAddress=${clientIpList[$i]} ipAddress=${clientIpList[$i]}
ipAddressPrivate=${clientIpListPrivate[$i]} ipAddressPrivate=${clientIpListPrivate[$i]}
printNode bench-tps "$ipAddress" "$ipAddressPrivate" zone=${clientIpListZone[$i]}
printNode bench-tps "$ipAddress" "$ipAddressPrivate" "$zone"
done done
for i in $(seq 0 $(( ${#blockstreamerIpList[@]} - 1)) ); do for i in $(seq 0 $(( ${#blockstreamerIpList[@]} - 1)) ); do
ipAddress=${blockstreamerIpList[$i]} ipAddress=${blockstreamerIpList[$i]}
ipAddressPrivate=${blockstreamerIpListPrivate[$i]} ipAddressPrivate=${blockstreamerIpListPrivate[$i]}
printNode blockstreamer "$ipAddress" "$ipAddressPrivate" zone=${blockstreamerIpListZone[$i]}
printNode blockstreamer "$ipAddress" "$ipAddressPrivate" "$zone"
done done
;; ;;
*) *)

View File

@ -188,7 +188,7 @@ else
fi fi
annotate() { annotate() {
${BUILDKITE:-false} && { [[ -z $BUILDKITE ]] || {
buildkite-agent annotate "$@" buildkite-agent annotate "$@"
} }
} }

View File

@ -135,6 +135,7 @@ local|tar)
scp "$entrypointIp":~/solana/config-local/mint-id.json config-local/ scp "$entrypointIp":~/solana/config-local/mint-id.json config-local/
./multinode-demo/drone.sh > drone.log 2>&1 & ./multinode-demo/drone.sh > drone.log 2>&1 &
export BLOCKEXPLORER_GEOIP_WHITELIST=$PWD/net/config/geoip.yml
npm install @solana/blockexplorer@1 npm install @solana/blockexplorer@1
npx solana-blockexplorer > blockexplorer.log 2>&1 & npx solana-blockexplorer > blockexplorer.log 2>&1 &

View File

@ -310,18 +310,24 @@ cloud_DeleteInstances() {
return return
fi fi
declare names=("${instances[@]/:*/}") # Terminate the instances
declare zones=("${instances[@]/*:/}") for instance in "${instances[@]}"; do
declare region= declare name="${instance/:*/}"
region=$(__cloud_GetRegion "${zones[0]}") declare zone="${instance/*:/}"
declare region=
( region=$(__cloud_GetRegion "$zone")
set -x (
aws ec2 terminate-instances --region "$region" --instance-ids "${names[@]}" set -x
) aws ec2 terminate-instances --region "$region" --instance-ids "$name"
)
done
# Wait until the instances are terminated # Wait until the instances are terminated
for name in "${names[@]}"; do for instance in "${instances[@]}"; do
declare name="${instance/:*/}"
declare zone="${instance/*:/}"
declare region=
region=$(__cloud_GetRegion "$zone")
while true; do while true; do
declare instanceState declare instanceState
instanceState=$(\ instanceState=$(\