Push perf test results to slack app (#6371)
* Add script to publish testnet results to slack * Obscure webhook URL * fixup * Replace read with cat redirection * Turn back on net restart * Pick nits * Make symlink before trying to delete its contents * Display test config in slack and pick Trents nit not to maybe rm -rf /* * Clean up results print * Minor nits * Turn the test settings back up to 11 * typo * Shellcheck * Just a few more fields * fix payload formatting * Del clear-config.sh * Mount secondary * Add commit SHA link and Grafana time range URL * Add fancy buttons instead of text URLs * Tighten up test config display * Fixup display nits * chellsheck * Rebase and fix typo
This commit is contained in:
@ -113,11 +113,14 @@ clear_config_dir() {
|
|||||||
SECONDARY_DISK_MOUNT_POINT=/mnt/extra-disk
|
SECONDARY_DISK_MOUNT_POINT=/mnt/extra-disk
|
||||||
setup_secondary_mount() {
|
setup_secondary_mount() {
|
||||||
# If there is a secondary disk, symlink the config/ dir there
|
# If there is a secondary disk, symlink the config/ dir there
|
||||||
if [[ -d $SECONDARY_DISK_MOUNT_POINT ]] && \
|
(
|
||||||
[[ -w $SECONDARY_DISK_MOUNT_POINT ]]; then
|
set -x
|
||||||
mkdir -p $SECONDARY_DISK_MOUNT_POINT/config
|
if [[ -d $SECONDARY_DISK_MOUNT_POINT ]] && \
|
||||||
rm -rf "$SOLANA_CONFIG_DIR"
|
[[ -w $SECONDARY_DISK_MOUNT_POINT ]]; then
|
||||||
ln -sfT $SECONDARY_DISK_MOUNT_POINT/config "$SOLANA_CONFIG_DIR"
|
mkdir -p $SECONDARY_DISK_MOUNT_POINT/config
|
||||||
fi
|
rm -rf "$SOLANA_CONFIG_DIR"
|
||||||
|
ln -sfT $SECONDARY_DISK_MOUNT_POINT/config "$SOLANA_CONFIG_DIR"
|
||||||
|
fi
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,6 +152,7 @@ EOF
|
|||||||
set -x
|
set -x
|
||||||
if [[ $skipSetup != true ]]; then
|
if [[ $skipSetup != true ]]; then
|
||||||
clear_config_dir "$SOLANA_CONFIG_DIR"
|
clear_config_dir "$SOLANA_CONFIG_DIR"
|
||||||
|
setup_secondary_mount
|
||||||
|
|
||||||
if [[ -n $internalNodesLamports ]]; then
|
if [[ -n $internalNodesLamports ]]; then
|
||||||
echo "---" >> config/fullnode-balances.yml
|
echo "---" >> config/fullnode-balances.yml
|
||||||
@ -248,6 +249,7 @@ EOF
|
|||||||
fi
|
fi
|
||||||
if [[ $skipSetup != true ]]; then
|
if [[ $skipSetup != true ]]; then
|
||||||
clear_config_dir "$SOLANA_CONFIG_DIR"
|
clear_config_dir "$SOLANA_CONFIG_DIR"
|
||||||
|
setup_secondary_mount
|
||||||
[[ -z $internalNodesLamports ]] || net/scripts/rsync-retry.sh -vPrc \
|
[[ -z $internalNodesLamports ]] || net/scripts/rsync-retry.sh -vPrc \
|
||||||
"$entrypointIp":~/solana/config/fullnode-"$nodeIndex"-identity.json config/fullnode-identity.json
|
"$entrypointIp":~/solana/config/fullnode-"$nodeIndex"-identity.json config/fullnode-identity.json
|
||||||
fi
|
fi
|
||||||
|
@ -2,13 +2,14 @@ steps:
|
|||||||
- command: "system-test/testnet-performance/testnet-automation.sh"
|
- command: "system-test/testnet-performance/testnet-automation.sh"
|
||||||
label: "COLO performance testnet GPU enabled"
|
label: "COLO performance testnet GPU enabled"
|
||||||
env:
|
env:
|
||||||
|
UPLOAD_RESULTS_TO_SLACK: "true"
|
||||||
CLOUD_PROVIDER: "colo"
|
CLOUD_PROVIDER: "colo"
|
||||||
TESTNET_TAG: "colo-edge-perf-gpu-enabled"
|
TESTNET_TAG: "colo-edge-perf-gpu-enabled"
|
||||||
RAMP_UP_TIME: 60
|
RAMP_UP_TIME: 0
|
||||||
TEST_DURATION: 300
|
TEST_DURATION: 600
|
||||||
NUMBER_OF_VALIDATOR_NODES: 4
|
NUMBER_OF_VALIDATOR_NODES: 4
|
||||||
NUMBER_OF_CLIENT_NODES: 2
|
NUMBER_OF_CLIENT_NODES: 2
|
||||||
CLIENT_OPTIONS: "bench-tps=2=--tx_count 80000 --thread-batch-sleep-ms 1000"
|
CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250"
|
||||||
ADDITIONAL_FLAGS: ""
|
ADDITIONAL_FLAGS: ""
|
||||||
agents:
|
agents:
|
||||||
- "queue=colo-deploy"
|
- "queue=colo-deploy"
|
||||||
|
@ -2,6 +2,7 @@ steps:
|
|||||||
- command: "system-test/testnet-performance/testnet-automation.sh"
|
- command: "system-test/testnet-performance/testnet-automation.sh"
|
||||||
label: "GCE performance testnets CPU ONLY"
|
label: "GCE performance testnets CPU ONLY"
|
||||||
env:
|
env:
|
||||||
|
UPLOAD_RESULTS_TO_SLACK: "true"
|
||||||
CLOUD_PROVIDER: "gce"
|
CLOUD_PROVIDER: "gce"
|
||||||
TESTNET_TAG: "gce-edge-perf-cpu-only"
|
TESTNET_TAG: "gce-edge-perf-cpu-only"
|
||||||
RAMP_UP_TIME: 60
|
RAMP_UP_TIME: 60
|
||||||
|
@ -2,14 +2,15 @@ steps:
|
|||||||
- command: "system-test/testnet-performance/testnet-automation.sh"
|
- command: "system-test/testnet-performance/testnet-automation.sh"
|
||||||
label: "GCE performance testnets GPU ENABLED"
|
label: "GCE performance testnets GPU ENABLED"
|
||||||
env:
|
env:
|
||||||
|
UPLOAD_RESULTS_TO_SLACK: "true"
|
||||||
CLOUD_PROVIDER: "gce"
|
CLOUD_PROVIDER: "gce"
|
||||||
TESTNET_TAG: "gce-edge-perf-gpu-enabled"
|
TESTNET_TAG: "gce-edge-perf-gpu-enabled"
|
||||||
RAMP_UP_TIME: 60
|
RAMP_UP_TIME: 0
|
||||||
TEST_DURATION: 300
|
TEST_DURATION: 600
|
||||||
NUMBER_OF_VALIDATOR_NODES: 10
|
NUMBER_OF_VALIDATOR_NODES: 50
|
||||||
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100"
|
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100"
|
||||||
NUMBER_OF_CLIENT_NODES: 1
|
NUMBER_OF_CLIENT_NODES: 2
|
||||||
CLIENT_OPTIONS: "bench-tps=1=--tx_count 80000 --thread-batch-sleep-ms 1000"
|
CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250"
|
||||||
TESTNET_ZONES: "us-west1-a,us-west1-b,us-central1-a,europe-west4-a"
|
TESTNET_ZONES: "us-west1-a,us-west1-b,us-central1-a,europe-west4-a"
|
||||||
ADDITIONAL_FLAGS: ""
|
ADDITIONAL_FLAGS: ""
|
||||||
agents:
|
agents:
|
||||||
|
@ -2,6 +2,9 @@
|
|||||||
import sys, json
|
import sys, json
|
||||||
|
|
||||||
data=json.load(sys.stdin)
|
data=json.load(sys.stdin)
|
||||||
print[\
|
|
||||||
([result['series'][0]['columns'][1].encode(), result['series'][0]['values'][0][1]]) \
|
if 'results' in data:
|
||||||
for result in data['results']]
|
for result in data['results']:
|
||||||
|
print result['series'][0]['columns'][1].encode() + ': ' + str(result['series'][0]['values'][0][1])
|
||||||
|
else:
|
||||||
|
print "No results returned from CURL request"
|
||||||
|
@ -7,10 +7,9 @@ set -e
|
|||||||
|
|
||||||
# TODO: Remove all default values, force explicitness in the testcase definition
|
# TODO: Remove all default values, force explicitness in the testcase definition
|
||||||
[[ -n $TEST_DURATION ]] || TEST_DURATION=300
|
[[ -n $TEST_DURATION ]] || TEST_DURATION=300
|
||||||
[[ -n $RAMP_UP_TIME ]] || RAMP_UP_TIME=60
|
[[ -n $RAMP_UP_TIME ]] || RAMP_UP_TIME=0
|
||||||
[[ -n $NUMBER_OF_VALIDATOR_NODES ]] || NUMBER_OF_VALIDATOR_NODES=2
|
[[ -n $NUMBER_OF_VALIDATOR_NODES ]] || NUMBER_OF_VALIDATOR_NODES=2
|
||||||
[[ -n $NUMBER_OF_CLIENT_NODES ]] || NUMBER_OF_CLIENT_NODES=1
|
[[ -n $NUMBER_OF_CLIENT_NODES ]] || NUMBER_OF_CLIENT_NODES=1
|
||||||
[[ -n $TESTNET_ZONES ]] || TESTNET_ZONES="us-west1-a"
|
|
||||||
|
|
||||||
function collect_logs {
|
function collect_logs {
|
||||||
echo --- collect logs from remote nodes
|
echo --- collect logs from remote nodes
|
||||||
@ -26,6 +25,11 @@ function collect_logs {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function cleanup_testnet {
|
function cleanup_testnet {
|
||||||
|
FINISH_UNIX_MSECS="$(($(date +%s%N)/1000000))"
|
||||||
|
if [[ -n $UPLOAD_RESULTS_TO_SLACK ]] ; then
|
||||||
|
upload_results_to_slack
|
||||||
|
fi
|
||||||
|
|
||||||
(
|
(
|
||||||
set +e
|
set +e
|
||||||
collect_logs
|
collect_logs
|
||||||
@ -101,9 +105,9 @@ launchTestnet() {
|
|||||||
|
|
||||||
echo --- start "$NUMBER_OF_VALIDATOR_NODES" node test
|
echo --- start "$NUMBER_OF_VALIDATOR_NODES" node test
|
||||||
if [[ -n $CHANNEL ]]; then
|
if [[ -n $CHANNEL ]]; then
|
||||||
net/net.sh start -t "$CHANNEL" "$maybeClientOptions" "$CLIENT_OPTIONS"
|
net/net.sh restart -t "$CHANNEL" "$maybeClientOptions" "$CLIENT_OPTIONS"
|
||||||
else
|
else
|
||||||
net/net.sh start -T solana-release*.tar.bz2 "$maybeClientOptions" "$CLIENT_OPTIONS"
|
net/net.sh restart -T solana-release*.tar.bz2 "$maybeClientOptions" "$CLIENT_OPTIONS"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo --- wait "$RAMP_UP_TIME" seconds for network throughput to stabilize
|
echo --- wait "$RAMP_UP_TIME" seconds for network throughput to stabilize
|
||||||
@ -128,27 +132,27 @@ launchTestnet() {
|
|||||||
)'
|
)'
|
||||||
|
|
||||||
declare q_mean_confirmation='
|
declare q_mean_confirmation='
|
||||||
SELECT round(mean("duration_ms")) as "mean_confirmation"
|
SELECT round(mean("duration_ms")) as "mean_confirmation_ms"
|
||||||
FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
|
FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
|
||||||
WHERE time > now() - '"$TEST_DURATION"'s'
|
WHERE time > now() - '"$TEST_DURATION"'s'
|
||||||
|
|
||||||
declare q_max_confirmation='
|
declare q_max_confirmation='
|
||||||
SELECT round(max("duration_ms")) as "max_confirmation"
|
SELECT round(max("duration_ms")) as "max_confirmation_ms"
|
||||||
FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
|
FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
|
||||||
WHERE time > now() - '"$TEST_DURATION"'s'
|
WHERE time > now() - '"$TEST_DURATION"'s'
|
||||||
|
|
||||||
declare q_99th_confirmation='
|
declare q_99th_confirmation='
|
||||||
SELECT round(percentile("duration_ms", 99)) as "99th_confirmation"
|
SELECT round(percentile("duration_ms", 99)) as "99th_percentile_confirmation_ms"
|
||||||
FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
|
FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
|
||||||
WHERE time > now() - '"$TEST_DURATION"'s'
|
WHERE time > now() - '"$TEST_DURATION"'s'
|
||||||
|
|
||||||
RESULTS_FILE="$TESTNET_TAG"_SUMMARY_STATS_"$NUMBER_OF_VALIDATOR_NODES".log
|
|
||||||
curl -G "${INFLUX_HOST}/query?u=ro&p=topsecret" \
|
curl -G "${INFLUX_HOST}/query?u=ro&p=topsecret" \
|
||||||
--data-urlencode "db=${TESTNET_TAG}" \
|
--data-urlencode "db=${TESTNET_TAG}" \
|
||||||
--data-urlencode "q=$q_mean_tps;$q_max_tps;$q_mean_confirmation;$q_max_confirmation;$q_99th_confirmation" |
|
--data-urlencode "q=$q_mean_tps;$q_max_tps;$q_mean_confirmation;$q_max_confirmation;$q_99th_confirmation" |
|
||||||
python system-test/testnet-performance/testnet-automation-json-parser.py >>"$RESULTS_FILE"
|
python system-test/testnet-performance/testnet-automation-json-parser.py >>"$RESULT_FILE"
|
||||||
|
|
||||||
upload-ci-artifact "$RESULTS_FILE"
|
RESULT_DETAILS=$(<"$RESULT_FILE")
|
||||||
|
upload-ci-artifact "$RESULT_FILE"
|
||||||
}
|
}
|
||||||
|
|
||||||
cd "$(dirname "$0")/../.."
|
cd "$(dirname "$0")/../.."
|
||||||
@ -169,10 +173,33 @@ fi
|
|||||||
|
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source ci/upload-ci-artifact.sh
|
source ci/upload-ci-artifact.sh
|
||||||
|
source system-test/testnet-performance/upload_results_to_slack.sh
|
||||||
|
|
||||||
maybeClientOptions=${CLIENT_OPTIONS:+"-c"}
|
maybeClientOptions=${CLIENT_OPTIONS:+"-c"}
|
||||||
maybeMachineType=${VALIDATOR_NODE_MACHINE_TYPE:+"-G"}
|
maybeMachineType=${VALIDATOR_NODE_MACHINE_TYPE:+"-G"}
|
||||||
|
|
||||||
IFS=, read -r -a TESTNET_CLOUD_ZONES <<<"${TESTNET_ZONES}"
|
IFS=, read -r -a TESTNET_CLOUD_ZONES <<<"${TESTNET_ZONES}"
|
||||||
|
|
||||||
|
RESULT_FILE="$TESTNET_TAG"_SUMMARY_STATS_"$NUMBER_OF_VALIDATOR_NODES".log
|
||||||
|
rm -f $RESULT_FILE
|
||||||
|
RESULT_DETAILS="Test failed to finish"
|
||||||
|
|
||||||
|
TEST_PARAMS_TO_DISPLAY=(CLOUD_PROVIDER \
|
||||||
|
NUMBER_OF_VALIDATOR_NODES \
|
||||||
|
VALIDATOR_NODE_MACHINE_TYPE \
|
||||||
|
NUMBER_OF_CLIENT_NODES \
|
||||||
|
CLIENT_OPTIONS \
|
||||||
|
TESTNET_ZONES \
|
||||||
|
TEST_DURATION \
|
||||||
|
ADDITIONAL_FLAGS)
|
||||||
|
|
||||||
|
TEST_CONFIGURATION=
|
||||||
|
for i in "${TEST_PARAMS_TO_DISPLAY[@]}" ; do
|
||||||
|
if [[ -n ${!i} ]] ; then
|
||||||
|
TEST_CONFIGURATION+="${i} = ${!i} | "
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
START_UNIX_MSECS="$(($(date +%s%N)/1000000))"
|
||||||
|
|
||||||
launchTestnet
|
launchTestnet
|
||||||
|
102
system-test/testnet-performance/upload_results_to_slack.sh
Executable file
102
system-test/testnet-performance/upload_results_to_slack.sh
Executable file
@ -0,0 +1,102 @@
|
|||||||
|
upload_results_to_slack() {
|
||||||
|
echo --- Uploading results to Slack Performance Results App
|
||||||
|
|
||||||
|
if [[ -z $SLACK_WEBHOOK_URL ]] ; then
|
||||||
|
echo "SLACK_WEBHOOOK_URL undefined"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
[[ -n $BUILDKITE_MESSAGE ]] || BUILDKITE_MESSAGE="Message not defined"
|
||||||
|
|
||||||
|
if [[ -n $BUILDKITE_COMMIT ]] ; then
|
||||||
|
COMMIT_BUTTON_TEXT="$(echo "$BUILDKITE_COMMIT" | head -c 8)"
|
||||||
|
COMMIT_URL="https://github.com/solana-labs/solana/commit/${BUILDKITE_COMMIT}"
|
||||||
|
else
|
||||||
|
COMMIT_BUTTON_TEXT="Commit not defined"
|
||||||
|
COMMIT_URL="https://github.com/solana-labs/solana/commits/master"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n $BUILDKITE_BUILD_URL ]] ; then
|
||||||
|
BUILD_BUTTON_TEXT="Build Kite Job"
|
||||||
|
else
|
||||||
|
BUILD_BUTTON_TEXT="Build URL not defined"
|
||||||
|
BUILDKITE_BUILD_URL="https://buildkite.com/solana-labs/"
|
||||||
|
fi
|
||||||
|
|
||||||
|
GRAFANA_URL="https://metrics.solana.com:3000/d/testnet-${CHANNEL:-edge}/testnet-monitor-${CHANNEL:-edge}?var-testnet=${TESTNET_TAG:-testnet-automation}&from=${START_UNIX_MSECS:-0}&to=${FINISH_UNIX_MSECS:-0}"
|
||||||
|
|
||||||
|
[[ -n $RESULT_DETAILS ]] || RESULT_DETAILS="Undefined"
|
||||||
|
[[ -n $TEST_CONFIGURATION ]] || TEST_CONFIGURATION="Undefined"
|
||||||
|
|
||||||
|
payLoad="$(cat <<EOF
|
||||||
|
{
|
||||||
|
"blocks": [
|
||||||
|
{
|
||||||
|
"type": "section",
|
||||||
|
"text": {
|
||||||
|
"type": "mrkdwn",
|
||||||
|
"text": "*New Build: $BUILDKITE_MESSAGE*"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "actions",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"type": "button",
|
||||||
|
"text": {
|
||||||
|
"type": "plain_text",
|
||||||
|
"text": "$COMMIT_BUTTON_TEXT",
|
||||||
|
"emoji": true
|
||||||
|
},
|
||||||
|
"url": "$COMMIT_URL"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "button",
|
||||||
|
"text": {
|
||||||
|
"type": "plain_text",
|
||||||
|
"text": "$BUILD_BUTTON_TEXT",
|
||||||
|
"emoji": true
|
||||||
|
},
|
||||||
|
"url": "$BUILDKITE_BUILD_URL"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "button",
|
||||||
|
"text": {
|
||||||
|
"type": "plain_text",
|
||||||
|
"text": "Grafana",
|
||||||
|
"emoji": true
|
||||||
|
},
|
||||||
|
"url": "$GRAFANA_URL"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "divider"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "section",
|
||||||
|
"text": {
|
||||||
|
"type": "mrkdwn",
|
||||||
|
"text": "Test Configuration: \n\`\`\`$TEST_CONFIGURATION\`\`\`"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "divider"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "section",
|
||||||
|
"text": {
|
||||||
|
"type": "mrkdwn",
|
||||||
|
"text": "Result Details: \n\`\`\`$RESULT_DETAILS\`\`\`"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
)"
|
||||||
|
|
||||||
|
curl -X POST \
|
||||||
|
-H 'Content-type: application/json' \
|
||||||
|
--data "$payLoad" \
|
||||||
|
"$SLACK_WEBHOOK_URL"
|
||||||
|
}
|
Reference in New Issue
Block a user