From f0c39cc84d61e9e7ce72451f27d22cd671b3c570 Mon Sep 17 00:00:00 2001 From: pgarg66 Date: Tue, 17 Jul 2018 13:48:25 -0700 Subject: [PATCH] Remote multinode scripts cleanup (#666) - Also added support for stop nodes --- multinode-demo/remote_leader.sh | 2 - multinode-demo/remote_validator.sh | 3 - multinode-demo/start_nodes.sh | 192 +++++++++++++++++++---------- 3 files changed, 129 insertions(+), 68 deletions(-) diff --git a/multinode-demo/remote_leader.sh b/multinode-demo/remote_leader.sh index f0502ee1f8..59c58c46d2 100755 --- a/multinode-demo/remote_leader.sh +++ b/multinode-demo/remote_leader.sh @@ -2,8 +2,6 @@ [[ -n $FORCE ]] || exit -mkdir -p ~/.ssh ~/solana ~/.cargo/bin -sudo apt-get --assume-yes install rsync libssl-dev chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa PATH="$HOME"/.cargo/bin:"$PATH" diff --git a/multinode-demo/remote_validator.sh b/multinode-demo/remote_validator.sh index 0a8dc9bdbe..28a9f5a9d2 100755 --- a/multinode-demo/remote_validator.sh +++ b/multinode-demo/remote_validator.sh @@ -2,8 +2,6 @@ [[ -n $FORCE ]] || exit -mkdir -p ~/.ssh ~/solana ~/.cargo/bin -sudo apt-get --assume-yes install rsync libssl-dev chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa PATH="$HOME"/.cargo/bin:"$PATH" @@ -12,7 +10,6 @@ ssh-keygen -R "$1" ssh-keyscan "$1" >>~/.ssh/known_hosts 2>/dev/null rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/ -rsync -vPrz "$1":~/solana/fetch-perf-libs.sh ~/solana/ # Run setup USE_INSTALL=1 ./multinode-demo/setup.sh -p diff --git a/multinode-demo/start_nodes.sh b/multinode-demo/start_nodes.sh index 5550fa5dc6..39b4721436 100755 --- a/multinode-demo/start_nodes.sh +++ b/multinode-demo/start_nodes.sh @@ -1,62 +1,89 @@ #!/bin/bash -ip_addr_file=$1 -remote_user=$2 -ssh_keys=$3 +command=$1 +ip_addr_file= +remote_user= +ssh_keys= + +shift usage() { - echo -e "\\tUsage: $0 [path to ssh keys]\\n" - echo -e "\\t : A bash script that exports an array of IP addresses, ip_addr_array. Elements of the array are public IP address of remote nodes." - echo -e "\\t : The username for logging into remote nodes." - echo -e "\\t [path to ssh keys]: The public/private key pair that remote nodes can use to perform rsync and ssh among themselves. Must contain pub, priv and authorized_keys.\\n" - exit 1 + exitcode=0 + if [[ -n "$1" ]]; then + exitcode=1 + echo "Error: $*" + fi + cat < <-f IP Addr Array file> <-u username> [-k ssh-keys] + +Manage a GCE multinode network + + start|stop - Create or delete the network + -f file - A bash script that exports an array of IP addresses, ip_addr_array. + Elements of the array are public IP address of remote nodes. + -u username - The username for logging into remote nodes. + -k ssh-keys - Path to public/private key pair that remote nodes can use to perform + rsync and ssh among themselves. Must contain pub, and priv keys. + +EOF + exit $exitcode } +while getopts "h?f:u:k:" opt; do + case $opt in + h | \?) + usage + ;; + f) + ip_addr_file=$OPTARG + ;; + u) + remote_user=$OPTARG + ;; + k) + ssh_keys=$OPTARG + ;; + *) + usage "Error: unhandled option: $opt" + ;; + esac +done + +set -e + # Sample IP Address array file contents # ip_addr_array=(192.168.1.1 192.168.1.5 192.168.2.2) -if [[ -z "$ip_addr_file" ]]; then - usage -fi - -if [[ -z "$remote_user" ]]; then - usage -fi - -echo "Build started at $(date)" -SECONDS=0 -# Build and install locally -PATH="$HOME"/.cargo/bin:"$PATH" -cargo install --force - -build_time=$SECONDS -echo "Build took $SECONDS seconds" +[[ -n $command ]] || usage "Need a command (start|stop)" +[[ -n $ip_addr_file ]] || usage "Need a file with IP address array" +[[ -n $remote_user ]] || usage "Need the username for remote nodes" ip_addr_array=() # Get IP address array # shellcheck source=/dev/null source "$ip_addr_file" -echo "Deployment started at $(date)" -SECONDS=0 -count=0 -leader_ip= -leader_time= +build_project() { + echo "Build started at $(date)" + SECONDS=0 -mkdir -p log + # Build and install locally + PATH="$HOME"/.cargo/bin:"$PATH" + cargo install --force -common_setup() { + echo "Build took $SECONDS seconds" +} + +common_start_setup() { ip_addr=$1 # Killing sshguard for now. TODO: Find a better solution # sshguard is blacklisting IP address after ssh-keyscan and ssh login attempts - ssh -n -f "$remote_user@$ip_addr" " + ssh -n -f "$remote_user@$ip_addr" " \ set -ex; \ sudo service sshguard stop; \ sudo apt-get --assume-yes install rsync libssl-dev; \ - pkill -9 solana-; \ - pkill -9 validator; \ - pkill -9 leader; \ + mkdir -p ~/.ssh ~/solana ~/.cargo/bin; \ " >log/"$ip_addr".log # If provided, deploy SSH keys @@ -69,8 +96,8 @@ common_setup() { fi } -leader() { - common_setup "$1" +start_leader() { + common_start_setup "$1" { rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/ @@ -84,37 +111,76 @@ leader() { SECONDS=0 } -validator() { - common_setup "$1" +start_validator() { + common_start_setup "$1" ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader_ip"":~/solana/multinode-demo ~/solana/" >>log/"$1".log ssh -n -f "$remote_user@$ip_addr" "cd solana; FORCE=1 ./multinode-demo/remote_validator.sh $leader_ip" >>log/"$1".log } -for ip_addr in "${ip_addr_array[@]}"; do - ssh-keygen -R "$ip_addr" >log/local.log - ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts 2>/dev/null +start_all_nodes() { + echo "Deployment started at $(date)" + SECONDS=0 + count=0 + leader_ip= + leader_time= - if ((!count)); then - # Start the leader on the first node - echo "Leader node $ip_addr, killing previous instance and restarting" - leader "$ip_addr" - else - # Start validator on all other nodes - echo "Validator[$count] node $ip_addr, killing previous instance and restarting" - validator "$ip_addr" & - # TBD: Remove the sleep or reduce time once GCP login quota is increased + mkdir -p log + + for ip_addr in "${ip_addr_array[@]}"; do + ssh-keygen -R "$ip_addr" >log/local.log + ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts 2>/dev/null + + if ((!count)); then + # Start the leader on the first node + echo "Leader node $ip_addr, killing previous instance and restarting" + start_leader "$ip_addr" + else + # Start validator on all other nodes + echo "Validator[$count] node $ip_addr, killing previous instance and restarting" + start_validator "$ip_addr" & + # TBD: Remove the sleep or reduce time once GCP login quota is increased + sleep 2 + fi + + ((count = count + 1)) + done + + wait + + ((validator_count = count - 1)) + + echo "Deployment finished at $(date)" + echo "Leader deployment too $leader_time seconds" + echo "$validator_count Validator deployment took $SECONDS seconds" +} + +stop_all_nodes() { + SECONDS=0 + local count=0 + for ip_addr in "${ip_addr_array[@]}"; do + echo "Stopping node[$count] $ip_addr. Remote user $remote_user" + + ssh -n -f "$remote_user@$ip_addr" " \ + set -ex; \ + sudo service sshguard stop; \ + pkill -9 solana-; \ + pkill -9 validator; \ + pkill -9 leader; \ + " sleep 2 - fi + ((count = count + 1)) + echo "Stopped node[$count] $ip_addr" + done + echo "Stopping $count nodes took $SECONDS seconds" +} - ((count++)) -done - -wait - -((validator_count = count - 1)) - -echo "Deployment finished at $(date)" -echo "Build took $build_time seconds" -echo "Leader deployment too $leader_time seconds" -echo "$validator_count Validator deployment took $SECONDS seconds" +if [[ $command == "start" ]]; then + #build_project + stop_all_nodes + start_all_nodes +elif [[ $command == "stop" ]]; then + stop_all_nodes +else + usage "Unknown command: $command" +fi