Compare commits

...

16 Commits

Author SHA1 Message Date
ff9bd2f512 Fix the output from Gossip Discovery (#4070) 2019-04-29 14:59:01 -07:00
25810ce729 Remove Bench Exchange Contract Execution graph 2019-04-29 14:28:09 -07:00
82c7f0e366 testnet-demo: use more low quota nodes 2019-04-29 12:08:39 -07:00
012d05f10b Increase testnet-demo node count a little 2019-04-29 09:10:18 -07:00
f853595efb testnet-demo now runs across more GCE zones (#4053)
* testnet-demo now runs across more GCE zones

* Save zone info to config file

* Add geoip whitelist for common data centers

* Skip more of start

* Include -x for config

* Fetch private key from first validator node if necessary

* Correct -r propagation
2019-04-28 19:50:02 -07:00
09e4f7e49c Correctly terminate instances across multiple zones 2019-04-28 09:09:34 -07:00
cb37072ed7 Switch testnet-demo to influxcloud 2019-04-27 22:12:30 -07:00
0b109d3340 Correct us-central1-b zone name 2019-04-27 21:43:31 -07:00
dcdc5b8cf7 testnet-demo: skip over validator nodes that fail to boot 2019-04-27 21:34:02 -07:00
1a7c30bb86 Use GPU nodes for blockstreamer as well if rest of testnet has GPUs (#4046) (#4048)
automerge
2019-04-27 21:31:01 -07:00
3ebc14f965 Blockstreamer annotation fix for non buildkite deployments (#4045) (#4047)
automerge
2019-04-27 21:01:26 -07:00
cf589efbbf Performance metrics computation methodology (#4041) (#4044)
automerge
2019-04-27 16:59:45 -07:00
94d5c64281 testnet-demo: add more GCE zones, remove client 2019-04-27 16:53:05 -07:00
566de1fd0e Add DNS resolution for network/drone arguments (#4037)
automerge
2019-04-27 10:00:41 -07:00
cb0f367084 Avoid inaccurate PATH nagging 2019-04-27 15:32:23 +00:00
e08e1fe6ac Add " 2019-04-27 07:41:55 -07:00
14 changed files with 314 additions and 365 deletions

View File

@ -1,10 +1,10 @@
use std::net::SocketAddr;
use std::time::Duration;
use clap::{crate_description, crate_name, crate_version, value_t, App, Arg, ArgMatches};
use solana::gen_keys::GenKeys;
use solana_drone::drone::DRONE_PORT;
use solana_sdk::signature::{read_keypair, Keypair, KeypairUtil};
use std::net::SocketAddr;
use std::process::exit;
use std::time::Duration;
pub struct Config {
pub network_addr: SocketAddr,
@ -146,16 +146,17 @@ pub fn build_args<'a, 'b>() -> App<'a, 'b> {
pub fn extract_args<'a>(matches: &ArgMatches<'a>) -> Config {
let mut args = Config::default();
args.network_addr = matches
.value_of("network")
.unwrap()
.parse()
.expect("Failed to parse network");
args.drone_addr = matches
.value_of("drone")
.unwrap()
.parse()
.expect("Failed to parse drone address");
args.network_addr = solana_netutil::parse_host_port(matches.value_of("network").unwrap())
.unwrap_or_else(|e| {
eprintln!("failed to parse network address: {}", e);
exit(1)
});
args.drone_addr = solana_netutil::parse_host_port(matches.value_of("drone").unwrap())
.unwrap_or_else(|e| {
eprintln!("failed to parse drone address: {}", e);
exit(1)
});
if matches.is_present("identity") {
args.identity = read_keypair(matches.value_of("identity").unwrap())

View File

@ -20,6 +20,7 @@
- [Ledger Replication](ledger-replication.md)
- [Secure Vote Signing](vote-signing.md)
- [Staking Delegation and Rewards](stake-delegation-and-rewards.md)
- [Performance Metrics](performance-metrics.md)
- [Anatomy of a Fullnode](fullnode.md)
- [TPU](tpu.md)

View File

@ -0,0 +1,29 @@
# Performance Metrics
Solana cluster performance is measured as average number of transactions per second
that the network can sustain (TPS). And, how long it takes for a transaction to be
confirmed by super majority of the cluster (Confirmation Time).
Each cluster node maintains various counters that are incremented on certain events.
These counters are periodically uploaded to a cloud based database. Solana's metrics
dashboard fetches these counters, and computes the performance metrics and displays
it on the dashboard.
## TPS
The leader node's banking stage maintains a count of transactions that it processed.
The dashboard displays the count averaged over 2 second period in the TPS time series
graph. The dashboard also shows per second mean, maximum and total TPS as a running
counter.
## Confirmation Time
Each validator node maintains a list of active ledger forks that are visible to the node.
A fork is considered to be frozen when the node has received and processed all entries
corresponding to the fork. A fork is considered to be confirmed when it receives cumulative
super majority vote, and when one of its children forks is frozen.
The node assigns a timestamp to every new fork, and computes the time it took to confirm
the fork. This time is reflected as validator confirmation time in performance metrics.
The performance dashboard displays the average of each validator node's confirmation time
as a time series graph.

View File

@ -11,9 +11,11 @@ clientNodeCount=0
additionalFullNodeCount=10
publicNetwork=false
stopNetwork=false
skipSetup=false
reuseLedger=false
skipCreate=false
skipStart=false
externalNode=false
failOnValidatorBootupFailure=true
tarChannelOrTag=edge
delete=false
enableGpu=false
@ -55,8 +57,10 @@ Deploys a CD testnet
-r - Reuse existing node/ledger configuration from a
previous |start| (ie, don't run ./multinode-demo/setup.sh).
-x - External node. Default: false
-e - Skip create. Assume the nodes have already been created
-s - Skip start. Nodes will still be created or configured, but network software will not be started.
-S - Stop network software without tearing down nodes.
-f - Discard validator nodes that didn't bootup successfully
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
metrics
@ -66,7 +70,7 @@ EOF
zone=()
while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:S" opt; do
while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:Sfe" opt; do
case $opt in
h | \?)
usage
@ -119,7 +123,10 @@ while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:S" opt; do
delete=true
;;
r)
skipSetup=true
reuseLedger=true
;;
e)
skipCreate=true
;;
s)
skipStart=true
@ -127,6 +134,9 @@ while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:S" opt; do
x)
externalNode=true
;;
f)
failOnValidatorBootupFailure=false
;;
u)
blockstreamer=true
;;
@ -170,15 +180,15 @@ for val in "${zone[@]}"; do
done
if $stopNetwork; then
skipSetup=true
skipCreate=true
fi
if $delete; then
skipSetup=false
skipCreate=false
fi
# Create the network
if ! $skipSetup; then
if ! $skipCreate; then
echo "--- $cloudProvider.sh delete"
# shellcheck disable=SC2068
time net/"$cloudProvider".sh delete ${zone_args[@]} -p "$netName" ${externalNode:+-x}
@ -224,6 +234,10 @@ if ! $skipSetup; then
create_args+=(-x)
fi
if ! $failOnValidatorBootupFailure; then
create_args+=(-f)
fi
time net/"$cloudProvider".sh create "${create_args[@]}"
else
echo "--- $cloudProvider.sh config"
@ -236,6 +250,14 @@ else
config_args+=(-P)
fi
if $externalNode; then
config_args+=(-x)
fi
if ! $failOnValidatorBootupFailure; then
config_args+=(-f)
fi
time net/"$cloudProvider".sh config "${config_args[@]}"
fi
net/init-metrics.sh -e
@ -249,35 +271,35 @@ if $stopNetwork; then
exit 0
fi
echo --- net.sh start
maybeRejectExtraNodes=
if ! $publicNetwork; then
maybeRejectExtraNodes="-o rejectExtraNodes"
fi
maybeNoValidatorSanity=
if [[ -n $NO_VALIDATOR_SANITY ]]; then
maybeNoValidatorSanity="-o noValidatorSanity"
fi
maybeNoLedgerVerify=
if [[ -n $NO_LEDGER_VERIFY ]]; then
maybeNoLedgerVerify="-o noLedgerVerify"
fi
maybeSkipSetup=
if $skipSetup; then
maybeSkipSetup="-r"
fi
ok=true
if ! $skipStart; then
(
if $skipSetup; then
if $skipCreate; then
# TODO: Enable rolling updates
#op=update
op=restart
else
op=start
fi
echo "--- net.sh $op"
maybeRejectExtraNodes=
if ! $publicNetwork; then
maybeRejectExtraNodes="-o rejectExtraNodes"
fi
maybeNoValidatorSanity=
if [[ -n $NO_VALIDATOR_SANITY ]]; then
maybeNoValidatorSanity="-o noValidatorSanity"
fi
maybeNoLedgerVerify=
if [[ -n $NO_LEDGER_VERIFY ]]; then
maybeNoLedgerVerify="-o noLedgerVerify"
fi
maybeReuseLedger=
if $reuseLedger; then
maybeReuseLedger="-r"
fi
maybeUpdateManifestKeypairFile=
# shellcheck disable=SC2154 # SOLANA_INSTALL_UPDATE_MANIFEST_KEYPAIR_x86_64_unknown_linux_gnu comes from .buildkite/env/
@ -289,7 +311,7 @@ if ! $skipStart; then
# shellcheck disable=SC2086 # Don't want to double quote the $maybeXYZ variables
time net/net.sh $op -t "$tarChannelOrTag" \
$maybeUpdateManifestKeypairFile \
$maybeSkipSetup \
$maybeReuseLedger \
$maybeRejectExtraNodes \
$maybeNoValidatorSanity \
$maybeNoLedgerVerify

View File

@ -52,7 +52,7 @@ steps:
value: "create-and-start"
- label: "Create testnet, but do not start software. If the testnet already exists it will be deleted and re-created"
value: "create"
- label: "Start network software on an existing testnet. If software is already running it will be restarted."
- label: "Start network software on an existing testnet. If software is already running it will be restarted"
value: "start"
- label: "Stop network software without deleting testnet nodes"
value: "stop"
@ -62,11 +62,11 @@ steps:
value: "sanity-or-restart"
- label: "Sanity check only"
value: "sanity"
- label: "Delete the testnet.
- label: "Delete the testnet"
value: "delete"
- label: "Enable/unlock the testnet."
- label: "Enable/unlock the testnet"
value: "enable"
- label: "Delete and then lock the testnet from further operation until it is re-enabled."
- label: "Delete and then lock the testnet from further operation until it is re-enabled"
value: "disable"
- command: "ci/$(basename "$0")"
agents:
@ -81,7 +81,40 @@ eval "$(ci/channel-info.sh)"
EC2_ZONES=(us-west-1a sa-east-1a ap-northeast-2a eu-central-1a ca-central-1a)
GCE_ZONES=(us-west1-b asia-east2-a europe-west4-a southamerica-east1-b us-east4-c)
# GCE zones with _lots_ of quota
GCE_ZONES=(
us-west1-a
us-central1-a
us-east1-b
europe-west4-a
us-west1-b
us-central1-b
us-east1-c
europe-west4-b
us-west1-c
us-east1-d
europe-west4-c
)
# GCE zones with enough quota for one CPU-only fullnode
GCE_LOW_QUOTA_ZONES=(
asia-east2-a
asia-northeast1-b
asia-northeast2-b
asia-south1-c
asia-southeast1-b
australia-southeast1-b
europe-north1-a
europe-west2-b
europe-west3-c
europe-west6-a
northamerica-northeast1-a
southamerica-east1-b
)
case $TESTNET in
testnet-edge|testnet-edge-perf)
CHANNEL_OR_TAG=edge
@ -107,7 +140,9 @@ testnet-perf)
testnet-demo)
CHANNEL_OR_TAG=beta
CHANNEL_BRANCH=$BETA_CHANNEL
: "${GCE_NODE_COUNT:=200}"
: "${GCE_NODE_COUNT:=150}"
: "${GCE_LOW_QUOTA_NODE_COUNT:=70}"
: "${TESTNET_DB_HOST:=https://clocktower-f1d56615.influxcloud.net:8086}"
;;
*)
echo "Error: Invalid TESTNET=$TESTNET"
@ -123,6 +158,10 @@ GCE_ZONE_ARGS=()
for val in "${GCE_ZONES[@]}"; do
GCE_ZONE_ARGS+=("-z $val")
done
GCE_LOW_QUOTA_ZONE_ARGS=()
for val in "${GCE_LOW_QUOTA_ZONES[@]}"; do
GCE_LOW_QUOTA_ZONE_ARGS+=("-z $val")
done
if [[ -n $TESTNET_DB_HOST ]]; then
SOLANA_METRICS_PARTIAL_CONFIG="host=$TESTNET_DB_HOST,$SOLANA_METRICS_PARTIAL_CONFIG"
@ -151,6 +190,7 @@ steps:
TESTNET_DB_HOST: "$TESTNET_DB_HOST"
EC2_NODE_COUNT: "$EC2_NODE_COUNT"
GCE_NODE_COUNT: "$GCE_NODE_COUNT"
GCE_LOW_QUOTA_NODE_COUNT: "$GCE_LOW_QUOTA_NODE_COUNT"
EOF
) | buildkite-agent pipeline upload
exit 0
@ -227,7 +267,7 @@ sanity() {
ok=true
if [[ -n $GCE_NODE_COUNT ]]; then
NO_LEDGER_VERIFY=1 \
ci/testnet-sanity.sh demo-testnet-solana-com gce "${GCE_ZONES[0]}" || ok=false
ci/testnet-sanity.sh demo-testnet-solana-com gce "${GCE_ZONES[0]}" -f || ok=false
else
echo "Error: no GCE nodes"
ok=false
@ -270,7 +310,7 @@ deploy() {
set -x
ci/testnet-deploy.sh -p edge-testnet-solana-com -C ec2 -z us-west-1a \
-t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0ccd4f2239886fa94 \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -285,7 +325,7 @@ deploy() {
ci/testnet-deploy.sh -p edge-perf-testnet-solana-com -C ec2 -z us-west-2b \
-g -t "$CHANNEL_OR_TAG" -c 2 \
-b \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -298,7 +338,7 @@ deploy() {
ci/testnet-deploy.sh -p beta-testnet-solana-com -C ec2 -z us-west-1a \
-t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0f286cf8a0771ce35 \
-b \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -313,7 +353,7 @@ deploy() {
ci/testnet-deploy.sh -p beta-perf-testnet-solana-com -C ec2 -z us-west-2b \
-g -t "$CHANNEL_OR_TAG" -c 2 \
-b \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -330,7 +370,7 @@ deploy() {
# shellcheck disable=SC2068
ci/testnet-deploy.sh -p testnet-solana-com -C ec2 ${EC2_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$EC2_NODE_COUNT" -c 0 -u -P -a eipalloc-0fa502bf95f6f18b2 \
${skipCreate:+-r} \
${skipCreate:+-e} \
${maybeSkipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -339,11 +379,11 @@ deploy() {
# shellcheck disable=SC2068
ci/testnet-deploy.sh -p testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D} \
${EC2_NODE_COUNT:+-x}
-x
fi
)
;;
@ -358,7 +398,7 @@ deploy() {
-t "$CHANNEL_OR_TAG" -c 2 \
-b \
-d pd-ssd \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -367,12 +407,25 @@ deploy() {
testnet-demo)
(
set -x
if [[ -n $GCE_NODE_COUNT ]]; then
if [[ -n $GCE_LOW_QUOTA_NODE_COUNT ]] || [[ -n $skipStart ]]; then
maybeSkipStart="skip"
fi
# shellcheck disable=SC2068
ci/testnet-deploy.sh -p demo-testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P -u -f \
-a demo-testnet-solana-com \
${skipCreate:+-e} \
${maybeSkipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
if [[ -n $GCE_LOW_QUOTA_NODE_COUNT ]]; then
# shellcheck disable=SC2068
ci/testnet-deploy.sh -p testnet-demo -C gce ${GCE_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 1 -P -u \
-a demo-testnet-solana-com \
${skipCreate:+-r} \
ci/testnet-deploy.sh -p demo-testnet-solana-com2 -C gce ${GCE_LOW_QUOTA_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_LOW_QUOTA_NODE_COUNT" -c 0 -P -f -x \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}

View File

@ -249,7 +249,7 @@ impl ClusterInfo {
let nodes: Vec<_> = self
.all_peers()
.into_iter()
.map(|node| {
.map(|(node, last_updated)| {
if !ContactInfo::is_valid_address(&node.gossip) {
spy_nodes += 1;
}
@ -266,7 +266,7 @@ impl ClusterInfo {
tpu: {:20} | |\n \
rpc: {:20} | |\n",
addr_to_string(&node.gossip),
now.saturating_sub(node.wallclock),
now.saturating_sub(last_updated),
node.id,
if node.id == my_id { "(me)" } else { "" }.to_string(),
addr_to_string(&node.tpu),
@ -347,14 +347,17 @@ impl ClusterInfo {
.collect()
}
// All nodes in gossip, including spy nodes
pub(crate) fn all_peers(&self) -> Vec<ContactInfo> {
// All nodes in gossip (including spy nodes) and the last time we heard about them
pub(crate) fn all_peers(&self) -> Vec<(ContactInfo, u64)> {
self.gossip
.crds
.table
.values()
.filter_map(|x| x.value.contact_info())
.cloned()
.filter_map(|x| {
x.value
.contact_info()
.map(|ci| (ci.clone(), x.local_timestamp))
})
.collect()
}

View File

@ -294,7 +294,7 @@ impl RpcSol for RpcSolImpl {
Ok(cluster_info
.all_peers()
.iter()
.filter_map(|contact_info| {
.filter_map(|(contact_info, _)| {
if ContactInfo::is_valid_address(&contact_info.gossip) {
Some(RpcContactInfo {
id: contact_info.id.to_string(),

View File

@ -250,7 +250,10 @@ fn get_update_manifest(
fn check_env_path_for_bin_dir(config: &Config) {
use std::env;
let bin_dir = config.active_release_bin_dir();
let bin_dir = config
.active_release_bin_dir()
.canonicalize()
.unwrap_or_default();
let found = match env::var_os("PATH") {
Some(paths) => env::split_paths(&paths).any(|path| {
if let Ok(path) = path.canonicalize() {

View File

@ -4410,268 +4410,6 @@
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"h": 5,
"w": 8,
"x": 16,
"y": 49
},
"id": 56,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"do_account_request\" FROM \"$testnet\".\"autogen\".\"counter-exchange-do-account-request\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") / 3 AS \"transfer_request\" FROM \"$testnet\".\"autogen\".\"counter-exchange-transfer-request\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time(3s)",
"rawQuery": true,
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") / 3 AS \"trade_request\" FROM \"$testnet\".\"autogen\".\"counter-exchange-trade-request\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time(3s)",
"rawQuery": true,
"refId": "C",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") / 3 AS \"trade_cancellation\" FROM \"$testnet\".\"autogen\".\"counter-exchange-trade-cancellation\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time(3s)",
"rawQuery": true,
"refId": "D",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") / 3 AS \"swap_request\" FROM \"$testnet\".\"autogen\".\"counter-exchange-swap-request\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time(3s)",
"rawQuery": true,
"refId": "E",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Bench Exchange Contract Execution",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"gridPos": {

View File

@ -19,6 +19,7 @@ mkdir -p "$netConfigDir" "$netLogDir"
source "$(dirname "${BASH_SOURCE[0]}")"/../scripts/configure-metrics.sh
configFile="$netConfigDir/config"
geoipConfigFile="$netConfigDir/geoip.yml"
entrypointIp=
publicNetwork=
@ -28,10 +29,13 @@ externalNodeSshKey=
sshOptions=()
fullnodeIpList=()
fullnodeIpListPrivate=()
fullnodeIpListZone=()
clientIpList=()
clientIpListPrivate=()
clientIpListZone=()
blockstreamerIpList=()
blockstreamerIpListPrivate=()
blockstreamerIpListZone=()
leaderRotation=
buildSshOptions() {

View File

@ -163,11 +163,13 @@ while getopts "h?p:Pn:c:z:gG:a:d:buxf" opt; do
enableGpu=true
bootstrapLeaderMachineType=$gpuBootstrapLeaderMachineType
fullNodeMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
;;
G)
enableGpu=true
bootstrapLeaderMachineType="$OPTARG"
fullNodeMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
;;
a)
customAddress=$OPTARG
@ -225,6 +227,7 @@ esac
# name - name of the instance
# publicIp - The public IP address of this instance
# privateIp - The private IP address of this instance
# zone - Zone of this instance
# count - Monotonically increasing count for each
# invocation of cmd, starting at 1
# ... - Extra args to cmd..
@ -240,11 +243,70 @@ cloud_ForEachInstance() {
declare name publicIp privateIp
IFS=: read -r name publicIp privateIp zone < <(echo "$info")
eval "$cmd" "$name" "$publicIp" "$privateIp" "$count" "$@"
eval "$cmd" "$name" "$publicIp" "$privateIp" "$zone" "$count" "$@"
count=$((count + 1))
done
}
# Given a cloud provider zone, return an approximate lat,long location for the
# data center. Normal geoip lookups for cloud provider IP addresses are
# sometimes widely inaccurate.
zoneLocation() {
declare zone="$1"
case "$zone" in
us-west1*)
echo "[45.5946, -121.1787]"
;;
us-central1*)
echo "[41.2619, -95.8608]"
;;
us-east1*)
echo "[33.1960, -80.0131]"
;;
asia-east2*)
echo "[22.3193, 114.1694]"
;;
asia-northeast1*)
echo "[35.6762, 139.6503]"
;;
asia-northeast2*)
echo "[34.6937, 135.5023]"
;;
asia-south1*)
echo "[19.0760, 72.8777]"
;;
asia-southeast1*)
echo "[1.3404, 103.7090]"
;;
australia-southeast1*)
echo "[-33.8688, 151.2093]"
;;
europe-north1*)
echo "[60.5693, 27.1878]"
;;
europe-west2*)
echo "[51.5074, -0.1278]"
;;
europe-west3*)
echo "[50.1109, 8.6821]"
;;
europe-west4*)
echo "[53.4386, 6.8355]"
;;
europe-west6*)
echo "[47.3769, 8.5417]"
;;
northamerica-northeast1*)
echo "[45.5017, -73.5673]"
;;
southamerica-east1*)
echo "[-23.5505, -46.6333]"
;;
*)
;;
esac
}
prepareInstancesAndWriteConfigFile() {
$metricsWriteDatapoint "testnet-deploy net-config-begin=1"
@ -252,6 +314,7 @@ prepareInstancesAndWriteConfigFile() {
echo "Appending to existing config file"
echo "externalNodeSshKey=$sshPrivateKey" >> "$configFile"
else
rm -f "$geoipConfigFile"
cat >> "$configFile" <<EOF
# autogenerated at $(date)
netBasename=$prefix
@ -260,6 +323,7 @@ sshPrivateKey=$sshPrivateKey
leaderRotation=$leaderRotation
EOF
fi
touch "$geoipConfigFile"
buildSshOptions
@ -267,11 +331,13 @@ EOF
declare name="$1"
declare publicIp="$2"
declare privateIp="$3"
declare zone="$4"
#declare index="$5"
declare failOnFailure="$5"
declare arrayName="$6"
declare failOnFailure="$6"
declare arrayName="$7"
# This check should eventually be moved to cloud provider specific script
# This check should eventually be moved to cloud provider specific script
if [ "$publicIp" = "TERMINATED" ] || [ "$privateIp" = "TERMINATED" ]; then
if $failOnFailure; then
exit 1
@ -284,7 +350,7 @@ EOF
echo "Waiting for $name to finish booting..."
(
set -x +e
for i in $(seq 1 20); do
for i in $(seq 1 30); do
timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete"
ret=$?
if [[ $ret -eq 0 ]]; then
@ -303,21 +369,21 @@ EOF
exit 1
fi
else
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
echo "${arrayName}Private+=($privateIp) # $name" >> "$configFile"
{
echo "$arrayName+=($publicIp) # $name"
echo "${arrayName}Private+=($privateIp) # $name"
echo "${arrayName}Zone+=($zone) # $name"
} >> "$configFile"
declare latlng=
latlng=$(zoneLocation "$zone")
if [[ -n $latlng ]]; then
echo "$publicIp: $latlng" >> "$geoipConfigFile"
fi
fi
}
if $externalNodes; then
echo "Bootstrap leader is already configured"
else
echo "Looking for bootstrap leader instance..."
cloud_FindInstance "$prefix-bootstrap-leader"
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to find bootstrap leader"
exit 1
}
fetchPrivateKey() {
(
declare nodeName
declare nodeIp
@ -336,7 +402,9 @@ EOF
set -x -o pipefail
for i in $(seq 1 30); do
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then
break
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone"; then
break
fi
fi
sleep 1
@ -348,6 +416,20 @@ EOF
fi
)
}
if $externalNodes; then
echo "Bootstrap leader is already configured"
else
echo "Looking for bootstrap leader instance..."
cloud_FindInstance "$prefix-bootstrap-leader"
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to find bootstrap leader"
exit 1
}
fetchPrivateKey
echo "fullnodeIpList=()" >> "$configFile"
echo "fullnodeIpListPrivate=()" >> "$configFile"
cloud_ForEachInstance recordInstanceIp true fullnodeIpList
@ -361,6 +443,8 @@ EOF
echo "Unable to find additional fullnodes"
exit 1
}
fetchPrivateKey
cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" fullnodeIpList
done
fi
@ -586,29 +670,33 @@ info)
declare nodeType=$1
declare ip=$2
declare ipPrivate=$3
printf " %-16s | %-15s | %-15s\n" "$nodeType" "$ip" "$ipPrivate"
declare zone=$4
printf " %-16s | %-15s | %-15s | %s\n" "$nodeType" "$ip" "$ipPrivate" "$zone"
}
printNode "Node Type" "Public IP" "Private IP"
echo "-------------------+-----------------+-----------------"
printNode "Node Type" "Public IP" "Private IP" "Zone"
echo "-------------------+-----------------+-----------------+--------------"
nodeType=bootstrap-leader
for i in $(seq 0 $(( ${#fullnodeIpList[@]} - 1)) ); do
ipAddress=${fullnodeIpList[$i]}
ipAddressPrivate=${fullnodeIpListPrivate[$i]}
printNode $nodeType "$ipAddress" "$ipAddressPrivate"
zone=${fullnodeIpListZone[$i]}
printNode $nodeType "$ipAddress" "$ipAddressPrivate" "$zone"
nodeType=fullnode
done
for i in $(seq 0 $(( ${#clientIpList[@]} - 1)) ); do
ipAddress=${clientIpList[$i]}
ipAddressPrivate=${clientIpListPrivate[$i]}
printNode bench-tps "$ipAddress" "$ipAddressPrivate"
zone=${clientIpListZone[$i]}
printNode bench-tps "$ipAddress" "$ipAddressPrivate" "$zone"
done
for i in $(seq 0 $(( ${#blockstreamerIpList[@]} - 1)) ); do
ipAddress=${blockstreamerIpList[$i]}
ipAddressPrivate=${blockstreamerIpListPrivate[$i]}
printNode blockstreamer "$ipAddress" "$ipAddressPrivate"
zone=${blockstreamerIpListZone[$i]}
printNode blockstreamer "$ipAddress" "$ipAddressPrivate" "$zone"
done
;;
*)

View File

@ -188,7 +188,7 @@ else
fi
annotate() {
${BUILDKITE:-false} && {
[[ -z $BUILDKITE ]] || {
buildkite-agent annotate "$@"
}
}

View File

@ -135,6 +135,7 @@ local|tar)
scp "$entrypointIp":~/solana/config-local/mint-id.json config-local/
./multinode-demo/drone.sh > drone.log 2>&1 &
export BLOCKEXPLORER_GEOIP_WHITELIST=$PWD/net/config/geoip.yml
npm install @solana/blockexplorer@1
npx solana-blockexplorer > blockexplorer.log 2>&1 &

View File

@ -310,18 +310,24 @@ cloud_DeleteInstances() {
return
fi
declare names=("${instances[@]/:*/}")
declare zones=("${instances[@]/*:/}")
declare region=
region=$(__cloud_GetRegion "${zones[0]}")
(
set -x
aws ec2 terminate-instances --region "$region" --instance-ids "${names[@]}"
)
# Terminate the instances
for instance in "${instances[@]}"; do
declare name="${instance/:*/}"
declare zone="${instance/*:/}"
declare region=
region=$(__cloud_GetRegion "$zone")
(
set -x
aws ec2 terminate-instances --region "$region" --instance-ids "$name"
)
done
# Wait until the instances are terminated
for name in "${names[@]}"; do
for instance in "${instances[@]}"; do
declare name="${instance/:*/}"
declare zone="${instance/*:/}"
declare region=
region=$(__cloud_GetRegion "$zone")
while true; do
declare instanceState
instanceState=$(\