Add more metrics
This commit is contained in:
		
							
								
								
									
										10
									
								
								net/gce.sh
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								net/gce.sh
									
									
									
									
									
								
							@@ -110,7 +110,7 @@ done
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
prepareInstancesAndWriteConfigFile() {
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-config-start=1"
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-config-begin=1"
 | 
			
		||||
 | 
			
		||||
  cat >> "$configFile" <<EOF
 | 
			
		||||
# autogenerated at $(date)
 | 
			
		||||
@@ -183,15 +183,15 @@ EOF
 | 
			
		||||
 | 
			
		||||
case $command in
 | 
			
		||||
delete)
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-delete-start=1"
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
 | 
			
		||||
 | 
			
		||||
  gcloud_FindInstances "name~^$prefix-"
 | 
			
		||||
 | 
			
		||||
  if [[ ${#instances[@]} -eq 0 ]]; then
 | 
			
		||||
    echo "No instances found matching '^$prefix-'"
 | 
			
		||||
    exit 0
 | 
			
		||||
  fi
 | 
			
		||||
  else
 | 
			
		||||
    gcloud_DeleteInstances "$yes"
 | 
			
		||||
  fi
 | 
			
		||||
  rm -f "$configFile"
 | 
			
		||||
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
 | 
			
		||||
@@ -200,7 +200,7 @@ delete)
 | 
			
		||||
create)
 | 
			
		||||
  [[ -n $validatorNodeCount ]] || usage "Need number of nodes"
 | 
			
		||||
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-create=1"
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-create-begin=1"
 | 
			
		||||
 | 
			
		||||
  echo "Network composition:"
 | 
			
		||||
  echo "Leader = $leaderMachineType (GPU=${leaderAccelerator:-none})"
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										16
									
								
								net/net.sh
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								net/net.sh
									
									
									
									
									
								
							@@ -188,13 +188,20 @@ startClient() {
 | 
			
		||||
 | 
			
		||||
sanity() {
 | 
			
		||||
  declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
 | 
			
		||||
  declare ok=true
 | 
			
		||||
 | 
			
		||||
  echo "--- Sanity"
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
 | 
			
		||||
 | 
			
		||||
  (
 | 
			
		||||
    set -x
 | 
			
		||||
    # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
 | 
			
		||||
    ssh "${sshOptions[@]}" "$leaderIp" \
 | 
			
		||||
      "./solana/net/remote/remote-sanity.sh $sanityExtraArgs"
 | 
			
		||||
  ) || exit 1
 | 
			
		||||
  ) || ok=false
 | 
			
		||||
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-sanity-complete=1"
 | 
			
		||||
  $ok || exit 1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
start() {
 | 
			
		||||
@@ -231,6 +238,7 @@ start() {
 | 
			
		||||
  esac
 | 
			
		||||
 | 
			
		||||
  echo "Deployment started at $(date)"
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-start-begin=1"
 | 
			
		||||
 | 
			
		||||
  SECONDS=0
 | 
			
		||||
  declare leaderDeployTime=
 | 
			
		||||
@@ -263,7 +271,7 @@ start() {
 | 
			
		||||
    startClient "$ipAddress" "$netLogDir/client-$ipAddress.log"
 | 
			
		||||
  done
 | 
			
		||||
  clientDeployTime=$SECONDS
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy start=1"
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-start-complete=1"
 | 
			
		||||
 | 
			
		||||
  if [[ $deployMethod = "snap" ]]; then
 | 
			
		||||
    IFS=\  read -r _ networkVersion _ < <(
 | 
			
		||||
@@ -303,8 +311,7 @@ stop_node() {
 | 
			
		||||
 | 
			
		||||
stop() {
 | 
			
		||||
  SECONDS=0
 | 
			
		||||
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy stop=1"
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
 | 
			
		||||
 | 
			
		||||
  stop_node "$leaderIp"
 | 
			
		||||
 | 
			
		||||
@@ -312,6 +319,7 @@ stop() {
 | 
			
		||||
    stop_node "$ipAddress"
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy net-stop-complete=1"
 | 
			
		||||
  echo "Stopping nodes took $SECONDS seconds"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -54,16 +54,13 @@ esac
 | 
			
		||||
 | 
			
		||||
scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
 | 
			
		||||
 | 
			
		||||
set +e
 | 
			
		||||
while true; do
 | 
			
		||||
  echo "=== Client start: $(date)" >> client.log
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy client-begin=1"
 | 
			
		||||
  clientCommand="$solana_bench_tps --num-nodes $numNodes --seconds 600 --sustained --threads $threadCount"
 | 
			
		||||
  echo "$ $clientCommand" >> client.log
 | 
			
		||||
 | 
			
		||||
  set +e
 | 
			
		||||
  $clientCommand >> client.log 2>&1
 | 
			
		||||
  set -e
 | 
			
		||||
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy,name=$netBasename clientexit=1"
 | 
			
		||||
  echo Error: bench-tps should never exit | tee -a client.log
 | 
			
		||||
  $metricsWriteDatapoint "testnet-deploy client-complete=1"
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user