cherry pick readme update

Fix missing recycle in recv_from (#1205 )
In the error case that i>0 (we have blobs to send) we break out of the loop and do not push the allocated r to the v array. We should recycle this blob, otherwise it will be dropped.
2018-09-13 19:19:48 -07:00 · 2018-09-13 10:27:24 -07:00 · 2018-09-13 10:27:24 -07:00 · 2018-09-12 09:24:42 -07:00 · 2018-09-11 16:56:54 -07:00 · 2018-09-11 16:52:45 -07:00
132 changed files with 7221 additions and 4222 deletions
--- a/.buildkite/hooks/post-command
+++ b/.buildkite/hooks/post-command
@ -41,5 +41,5 @@ else

  point="job_stats,$point_tags $point_fields"

-  multinode-demo/metrics_write_datapoint.sh "$point" || true
+  scripts/metrics-write-datapoint.sh "$point" || true
 fi
--- a/.buildkite/hooks/pre-command
+++ b/.buildkite/hooks/pre-command
@ -1,13 +1,27 @@
 #!/bin/bash -e

-[[ -n "$CARGO_TARGET_CACHE_NAME" ]] || exit 0
+# Ensure the pattern "+++ ..." never occurs when |set -x| is set, as buildkite
+# interprets this as the start of a log group.
+# Ref: https://buildkite.com/docs/pipelines/managing-log-output
+export PS4="++"

 #
 # Restore target/ from the previous CI build on this machine
 #
-(
+[[ -n "$CARGO_TARGET_CACHE_NAME" ]] || (
  d=$HOME/cargo-target-cache/"$CARGO_TARGET_CACHE_NAME"
+
+  if [[ -d $d ]]; then
+    du -hs "$d"
+    read -r cacheSizeInGB _ < <(du -s --block-size=1000000000 "$d")
+    if [[ $cacheSizeInGB -gt 5 ]]; then
+      echo "$d has gotten too large, removing it"
+      rm -rf "$d"
+    fi
+  fi
+
  mkdir -p "$d"/target
  set -x
  rsync -a --delete --link-dest="$d" "$d"/target .
 )
+
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 name = "solana"
 description = "Blockchain, Rebuilt for Scale"
-version = "0.7.1"
+version = "0.8.0"
 documentation = "https://docs.rs/solana"
 homepage = "http://solana.com/"
 readme = "README.md"
@ -61,24 +61,28 @@ unstable = []
 ipv6 = []
 cuda = []
 erasure = []
+test = []

 [dependencies]
 atty = "0.2"
 bincode = "1.0.0"
 bs58 = "0.2.0"
 byteorder = "1.2.1"
+bytes = "0.4"
 chrono = { version = "0.4.0", features = ["serde"] }
 clap = "2.31"
 dirs = "1.0.2"
 env_logger = "0.5.12"
-futures = "0.1.21"
-generic-array = { version = "0.11.1", default-features = false, features = ["serde"] }
+generic-array = { version = "0.12.0", default-features = false, features = ["serde"] }
 getopts = "0.2"
 influx_db_client = "0.3.4"
+jsonrpc-core = { git = "https://github.com/paritytech/jsonrpc", rev = "4b6060b" }
+jsonrpc-http-server = { git = "https://github.com/paritytech/jsonrpc", rev = "4b6060b" }
+jsonrpc-macros = { git = "https://github.com/paritytech/jsonrpc", rev = "4b6060b" }
 itertools = "0.7.8"
-libc = "0.2.1"
 log = "0.4.2"
 matches = "0.1.6"
+nix = "0.11.0"
 pnet_datalink = "0.21.0"
 rand = "0.5.1"
 rayon = "1.0.0"
@ -88,32 +92,23 @@ sha2 = "0.7.0"
 serde = "1.0.27"
 serde_derive = "1.0.27"
 serde_json = "1.0.10"
+socket2 = "0.3.8"
 sys-info = "0.5.6"
 tokio = "0.1"
 tokio-codec = "0.1"
-tokio-core = "0.1.17"
-tokio-io = "0.1"
 untrusted = "0.6.2"

-[dev-dependencies]
-criterion = "0.2"
-
 [[bench]]
 name = "bank"
-harness = false

 [[bench]]
 name = "banking_stage"
-harness = false

 [[bench]]
 name = "ledger"
-harness = false

 [[bench]]
 name = "signature"
-harness = false

 [[bench]]
 name = "sigverify"
-harness = false
--- a/README.md
+++ b/README.md
@ -17,7 +17,11 @@ All claims, content, designs, algorithms, estimates, roadmaps, specifications, a
 Introduction
 ===

-It's possible for a centralized database to process 710,000 transactions per second on a standard gigabit network if the transactions are, on average, no more than 176 bytes. A centralized database can also replicate itself and maintain high availability without significantly compromising that transaction rate using the distributed system technique known as Optimistic Concurrency Control [H.T.Kung, J.T.Robinson (1981)]. At Solana, we're demonstrating that these same theoretical limits apply just as well to blockchain on an adversarial network. The key ingredient? Finding a way to share time when nodes can't trust one-another. Once nodes can trust time, suddenly ~40 years of distributed systems research becomes applicable to blockchain! Furthermore, and much to our surprise, it can implemented using a mechanism that has existed in Bitcoin since day one. The Bitcoin feature is called nLocktime and it can be used to postdate transactions using block height instead of a timestamp. As a Bitcoin client, you'd use block height instead of a timestamp if you don't trust the network. Block height turns out to be an instance of what's being called a Verifiable Delay Function in cryptography circles. It's a cryptographically secure way to say time has passed. In Solana, we use a far more granular verifiable delay function, a SHA 256 hash chain, to checkpoint the ledger and coordinate consensus. With it, we implement Optimistic Concurrency Control and are now well in route towards that theoretical limit of 710,000 transactions per second.
+It's possible for a centralized database to process 710,000 transactions per second on a standard gigabit network if the transactions are, on average, no more than 176 bytes. A centralized database can also replicate itself and maintain high availability without significantly compromising that transaction rate using the distributed system technique known as Optimistic Concurrency Control [\[H.T.Kung, J.T.Robinson (1981)\]](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.65.4735). At Solana, we're demonstrating that these same theoretical limits apply just as well to blockchain on an adversarial network. The key ingredient? Finding a way to share time when nodes can't trust one-another. Once nodes can trust time, suddenly ~40 years of distributed systems research becomes applicable to blockchain!
+
+> Perhaps the most striking difference between algorithms obtained by our method and ones based upon timeout is that using timeout produces a traditional distributed algorithm in which the processes operate asynchronously, while our method produces a globally synchronous one in which every process does the same thing at (approximately) the same time. Our method seems to contradict the whole purpose of distributed processing, which is to permit different processes to operate independently and perform different functions. However, if a distributed system is really a single system, then the processes must be synchronized in some way. Conceptually, the easiest way to synchronize processes is to get them all to do the same thing at the same time. Therefore, our method is used to implement a kernel that performs the necessary synchronization--for example, making sure that two different processes do not try to modify a file at the same time. Processes might spend only a small fraction of their time executing the synchronizing kernel; the rest of the time, they can operate independently--e.g., accessing different files. This is an approach we have advocated even when fault-tolerance is not required. The method's basic simplicity makes it easier to understand the precise properties of a system, which is crucial if one is to know just how fault-tolerant the system is. [\[L.Lamport (1984)\]](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.71.1078)
+
+Furthermore, and much to our surprise, it can be implemented using a mechanism that has existed in Bitcoin since day one. The Bitcoin feature is called nLocktime and it can be used to postdate transactions using block height instead of a timestamp. As a Bitcoin client, you'd use block height instead of a timestamp if you don't trust the network. Block height turns out to be an instance of what's being called a Verifiable Delay Function in cryptography circles. It's a cryptographically secure way to say time has passed. In Solana, we use a far more granular verifiable delay function, a SHA 256 hash chain, to checkpoint the ledger and coordinate consensus. With it, we implement Optimistic Concurrency Control and are now well in route towards that theoretical limit of 710,000 transactions per second.


 Testnet Demos
@ -58,7 +62,7 @@ your odds of success if you check out the
 before proceeding:

 ```bash
-$ git checkout v0.7.0-beta
+$ git checkout v0.8.0
 ```

 Configuration Setup
@ -92,45 +96,47 @@ Before you start a fullnode, make sure you know the IP address of the machine yo
 want to be the leader for the demo, and make sure that udp ports 8000-10000 are
 open on all the machines you want to test with.

-Now start the server:
+Now start the server in a separate shell:

 ```bash
 $ ./multinode-demo/leader.sh
 ```

-Wait a few seconds for the server to initialize. It will print "Ready." when it's ready to
+Wait a few seconds for the server to initialize. It will print "leader ready..." when it's ready to
 receive transactions. The leader will request some tokens from the drone if it doesn't have any.
 The drone does not need to be running for subsequent leader starts.

 Multinode Testnet
 ---

-To run a multinode testnet, after starting a leader node, spin up some validator nodes:
+To run a multinode testnet, after starting a leader node, spin up some validator nodes in
+separate shells:

 ```bash
-$ ./multinode-demo/validator.sh ubuntu@10.0.1.51:~/solana 10.0.1.51
+$ ./multinode-demo/validator.sh
 ```

 To run a performance-enhanced leader or validator (on Linux),
 [CUDA 9.2](https://developer.nvidia.com/cuda-downloads) must be installed on
 your system:
+
 ```bash
 $ ./fetch-perf-libs.sh
 $ SOLANA_CUDA=1 ./multinode-demo/leader.sh
-$ SOLANA_CUDA=1 ./multinode-demo/validator.sh ubuntu@10.0.1.51:~/solana 10.0.1.51
-
+$ SOLANA_CUDA=1 ./multinode-demo/validator.sh
 ```


-
 Testnet Client Demo
 ---

-Now that your singlenode or multinode testnet is up and running, in a separate shell, let's send it some transactions! Note we pass in
-the JSON configuration file here, not the genesis ledger.
+Now that your singlenode or multinode testnet is up and running let's send it
+some transactions!
+
+In a separate shell start the client:

 ```bash
-$ ./multinode-demo/client.sh ubuntu@10.0.1.51:~/solana 2 #The leader machine and the total number of nodes in the network
+$ ./multinode-demo/client.sh # runs against localhost by default
 ```

 What just happened? The client demo spins up several threads to send 500,000 transactions
@ -142,21 +148,35 @@ demo completes after it has convinced itself the testnet won't process any addit
 transactions. You should see several TPS measurements printed to the screen. In the
 multinode variation, you'll see TPS measurements for each validator node as well.

+Public Testnet
+--------------
+In this example the client connects to our public testnet. To run validators on the testnet you would need to open udp ports `8000-10000`.
+
+```bash
+$ ./multinode-demo/client.sh --network $(dig +short testnet.solana.com):8001 --identity config-private/client-id.json --duration 60
+```
+
+You can observe the effects of your client's transactions on our [dashboard](https://metrics.solana.com:3000/d/testnet/testnet-hud?orgId=2&from=now-30m&to=now&refresh=5s&var-testnet=testnet)
+
+
 Linux Snap
 ---
 A Linux [Snap](https://snapcraft.io/) is available, which can be used to
 easily get Solana running on supported Linux systems without building anything
 from source.  The `edge` Snap channel is updated daily with the latest
 development from the `master` branch.  To install:
+
 ```bash
 $ sudo snap install solana --edge --devmode
 ```
+
 (`--devmode` flag is required only for `solana.fullnode-cuda`)

 Once installed the usual Solana programs will be available as `solona.*` instead
 of `solana-*`.  For example, `solana.fullnode` instead of `solana-fullnode`.

 Update to the latest version at any time with:
+
 ```bash
 $ snap info solana
 $ sudo snap refresh solana --devmode
@ -176,6 +196,7 @@ contains the latest log, and the files `*.s` (if present) contain older rotated
 logs.

 Disable the daemon at any time by running:
+
 ```bash
 $ sudo snap set solana mode=
 ```
@ -184,11 +205,13 @@ Runtime configuration files for the daemon can be found in
 `/var/snap/solana/current/config`.

 #### Leader daemon
+
 ```bash
 $ sudo snap set solana mode=leader
 ```

 If CUDA is available:
+
 ```bash
 $ sudo snap set solana mode=leader enable-cuda=1
 ```
@ -211,26 +234,31 @@ to port tcp:873, tcp:9900 and the port range udp:8000-udp:10000**


 To run both the Leader and Drone:
+
 ```bash
 $ sudo snap set solana mode=leader+drone

 ```

 #### Validator daemon
+
 ```bash
 $ sudo snap set solana mode=validator

 ```
 If CUDA is available:
+
 ```bash
 $ sudo snap set solana mode=validator enable-cuda=1
 ```

 By default the validator will connect to **testnet.solana.com**, override
 the leader IP address by running:
+
 ```bash
 $ sudo snap set solana mode=validator leader-address=127.0.0.1 #<-- change IP address
 ```
+
 It's assumed that the leader will be running `rsync` configured as described in
 the previous **Leader daemon** section.

@ -254,9 +282,10 @@ If your rustc version is lower than 1.26.1, please update it:
 $ rustup update
 ```

-On Linux systems you may need to install libssl-dev and pkg-config.  On Ubuntu:
+On Linux systems you may need to install libssl-dev, pkg-config, zlib1g-dev, etc.  On Ubuntu:
+
 ```bash
-$ sudo apt-get install libssl-dev pkg-config
+$ sudo apt-get install libssl-dev pkg-config zlib1g-dev
 ```

 Download the source code:
@ -276,6 +305,7 @@ $ cargo test
 ```

 To emulate all the tests that will run on a Pull Request, run:
+
 ```bash
 $ ./ci/run-local.sh
 ```
@ -284,17 +314,21 @@ Debugging
 ---

 There are some useful debug messages in the code, you can enable them on a per-module and per-level
-basis with the normal RUST\_LOG environment variable. Run the fullnode with this syntax:
+basis.  Before running a leader or validator set the normal RUST\_LOG environment variable.
+
+For example, to enable info everywhere and debug only in the solana::banking_stage module:
+
 ```bash
-$ RUST_LOG=solana::streamer=debug,solana::server=info cat genesis.log | ./target/release/solana-fullnode > transactions0.log
+$ export RUST_LOG=info,solana::banking_stage=debug
 ```
-to see the debug and info sections for streamer and server respectively. Generally
-we are using debug for infrequent debug messages, trace for potentially frequent messages and
-info for performance-related logging.

-Attaching to a running process with gdb:
+Generally we are using debug for infrequent debug messages, trace for potentially frequent
+messages and info for performance-related logging.

-```
+You can also attach to a running process with GDB.  The leader's process is named
+_solana-fullnode_:
+
+```bash
 $ sudo gdb
 attach <PID>
 set logging on
@ -318,6 +352,11 @@ Run the benchmarks:
 $ cargo +nightly bench --features="unstable"
 ```

+Release Process
+---
+The release process for this project is described [here](rfcs/rfc-005-branches-tags-and-channels.md).
+
+
 Code coverage
 ---

--- a/_config.yml
+++ b/_config.yml
@ -1 +0,0 @@
-theme: jekyll-theme-slate
--- a/benches/bank.rs
+++ b/benches/bank.rs
@ -1,18 +1,19 @@
-#[macro_use]
-extern crate criterion;
+#![feature(test)]
 extern crate bincode;
 extern crate rayon;
 extern crate solana;
+extern crate test;

 use bincode::serialize;
-use criterion::{Bencher, Criterion};
 use rayon::prelude::*;
 use solana::bank::*;
 use solana::hash::hash;
 use solana::mint::Mint;
 use solana::signature::{Keypair, KeypairUtil};
 use solana::transaction::Transaction;
+use test::Bencher;

+#[bench]
 fn bench_process_transaction(bencher: &mut Bencher) {
    let mint = Mint::new(100_000_000);
    let bank = Bank::new(&mint);
@ -39,28 +40,10 @@ fn bench_process_transaction(bencher: &mut Bencher) {
        })
        .collect();

-    bencher.iter_with_setup(
-        || {
-            // Since benchmarker runs this multiple times, we need to clear the signatures.
-            bank.clear_signatures();
-            transactions.clone()
-        },
-        |transactions| {
-            let results = bank.process_transactions(transactions);
-            assert!(results.iter().all(Result::is_ok));
-        },
-    )
+    bencher.iter(|| {
+        // Since benchmarker runs this multiple times, we need to clear the signatures.
+        bank.clear_signatures();
+        let results = bank.process_transactions(transactions.clone());
+        assert!(results.iter().all(Result::is_ok));
+    })
 }
-
-fn bench(criterion: &mut Criterion) {
-    criterion.bench_function("bench_process_transaction", |bencher| {
-        bench_process_transaction(bencher);
-    });
-}
-
-criterion_group!(
-    name = benches;
-    config = Criterion::default().sample_size(2);
-    targets = bench
-);
-criterion_main!(benches);
--- a/benches/banking_stage.rs
+++ b/benches/banking_stage.rs
@ -1,10 +1,9 @@
+#![feature(test)]
 extern crate bincode;
-#[macro_use]
-extern crate criterion;
 extern crate rayon;
 extern crate solana;
+extern crate test;

-use criterion::{Bencher, Criterion};
 use rayon::prelude::*;
 use solana::bank::Bank;
 use solana::banking_stage::BankingStage;
@ -16,6 +15,7 @@ use solana::transaction::Transaction;
 use std::iter;
 use std::sync::mpsc::{channel, Receiver};
 use std::sync::Arc;
+use test::Bencher;

 // use self::test::Bencher;
 // use bank::{Bank, MAX_ENTRY_IDS};
@ -95,6 +95,7 @@ fn check_txs(receiver: &Receiver<Signal>, ref_tx_count: usize) {
    assert_eq!(total, ref_tx_count);
 }

+#[bench]
 fn bench_banking_stage_multi_accounts(bencher: &mut Bencher) {
    let tx = 10_000_usize;
    let mint_total = 1_000_000_000_000;
@ -145,7 +146,6 @@ fn bench_banking_stage_multi_accounts(bencher: &mut Bencher) {
                })
                .collect();

-        let verified_setup_len = verified_setup.len();
        verified_sender.send(verified_setup).unwrap();
        BankingStage::process_packets(&bank, &verified_receiver, &signal_sender, &packet_recycler)
            .unwrap();
@ -160,7 +160,6 @@ fn bench_banking_stage_multi_accounts(bencher: &mut Bencher) {
            })
            .collect();

-        let verified_len = verified.len();
        verified_sender.send(verified).unwrap();
        BankingStage::process_packets(&bank, &verified_receiver, &signal_sender, &packet_recycler)
            .unwrap();
@ -169,6 +168,7 @@ fn bench_banking_stage_multi_accounts(bencher: &mut Bencher) {
    });
 }

+#[bench]
 fn bench_banking_stage_single_from(bencher: &mut Bencher) {
    let tx = 10_000_usize;
    let mint = Mint::new(1_000_000_000_000);
@ -203,7 +203,6 @@ fn bench_banking_stage_single_from(bencher: &mut Bencher) {
                (x, iter::repeat(1).take(len).collect())
            })
            .collect();
-        let verified_len = verified.len();
        verified_sender.send(verified).unwrap();
        BankingStage::process_packets(&bank, &verified_receiver, &signal_sender, &packet_recycler)
            .unwrap();
@ -211,19 +210,3 @@ fn bench_banking_stage_single_from(bencher: &mut Bencher) {
        check_txs(&signal_receiver, tx);
    });
 }
-
-fn bench(criterion: &mut Criterion) {
-    criterion.bench_function("bench_banking_stage_multi_accounts", |bencher| {
-        bench_banking_stage_multi_accounts(bencher);
-    });
-    criterion.bench_function("bench_process_stage_single_from", |bencher| {
-        bench_banking_stage_single_from(bencher);
-    });
-}
-
-criterion_group!(
-    name = benches;
-    config = Criterion::default().sample_size(2);
-    targets = bench
-);
-criterion_main!(benches);
--- a/benches/ledger.rs
+++ b/benches/ledger.rs
@ -1,15 +1,15 @@
-#[macro_use]
-extern crate criterion;
+#![feature(test)]
 extern crate solana;
+extern crate test;

-use criterion::{Bencher, Criterion};
 use solana::hash::{hash, Hash};
 use solana::ledger::{next_entries, reconstruct_entries_from_blobs, Block};
 use solana::packet::BlobRecycler;
 use solana::signature::{Keypair, KeypairUtil};
 use solana::transaction::Transaction;
-use std::collections::VecDeque;
+use test::Bencher;

+#[bench]
 fn bench_block_to_blobs_to_block(bencher: &mut Bencher) {
    let zero = Hash::default();
    let one = hash(&zero.as_ref());
@ -20,21 +20,7 @@ fn bench_block_to_blobs_to_block(bencher: &mut Bencher) {

    let blob_recycler = BlobRecycler::default();
    bencher.iter(|| {
-        let mut blob_q = VecDeque::new();
-        entries.to_blobs(&blob_recycler, &mut blob_q);
-        assert_eq!(reconstruct_entries_from_blobs(blob_q).unwrap(), entries);
+        let blobs = entries.to_blobs(&blob_recycler);
+        assert_eq!(reconstruct_entries_from_blobs(blobs).unwrap(), entries);
    });
 }
-
-fn bench(criterion: &mut Criterion) {
-    criterion.bench_function("bench_block_to_blobs_to_block", |bencher| {
-        bench_block_to_blobs_to_block(bencher);
-    });
-}
-
-criterion_group!(
-    name = benches;
-    config = Criterion::default().sample_size(2);
-    targets = bench
-);
-criterion_main!(benches);
--- a/benches/signature.rs
+++ b/benches/signature.rs
@ -1,24 +1,12 @@
-#[macro_use]
-extern crate criterion;
+#![feature(test)]
 extern crate solana;
+extern crate test;

-use criterion::{Bencher, Criterion};
 use solana::signature::GenKeys;
+use test::Bencher;

+#[bench]
 fn bench_gen_keys(b: &mut Bencher) {
    let mut rnd = GenKeys::new([0u8; 32]);
    b.iter(|| rnd.gen_n_keypairs(1000));
 }
-
-fn bench(criterion: &mut Criterion) {
-    criterion.bench_function("bench_gen_keys", |bencher| {
-        bench_gen_keys(bencher);
-    });
-}
-
-criterion_group!(
-    name = benches;
-    config = Criterion::default().sample_size(2);
-    targets = bench
-);
-criterion_main!(benches);
--- a/benches/sigverify.rs
+++ b/benches/sigverify.rs
@ -1,14 +1,15 @@
-#[macro_use]
-extern crate criterion;
+#![feature(test)]
 extern crate bincode;
 extern crate rayon;
 extern crate solana;
+extern crate test;

-use criterion::{Bencher, Criterion};
 use solana::packet::{to_packets, PacketRecycler};
 use solana::sigverify;
 use solana::transaction::test_tx;
+use test::Bencher;

+#[bench]
 fn bench_sigverify(bencher: &mut Bencher) {
    let tx = test_tx();

@ -21,16 +22,3 @@ fn bench_sigverify(bencher: &mut Bencher) {
        let _ans = sigverify::ed25519_verify(&batches);
    })
 }
-
-fn bench(criterion: &mut Criterion) {
-    criterion.bench_function("bench_sigverify", |bencher| {
-        bench_sigverify(bencher);
-    });
-}
-
-criterion_group!(
-    name = benches;
-    config = Criterion::default().sample_size(2);
-    targets = bench
-);
-criterion_main!(benches);
--- a/build.rs
+++ b/build.rs
@ -1,15 +1,33 @@
 use std::env;
+use std::fs;

 fn main() {
-    println!("cargo:rustc-link-search=native=.");
-    if !env::var("CARGO_FEATURE_CUDA").is_err() {
+    println!("cargo:rerun-if-changed=target/perf-libs");
+    println!("cargo:rerun-if-changed=build.rs");
+
+    // Ensure target/perf-libs/ exists.  It's been observed that
+    // a cargo:rerun-if-changed= directive with a non-existent
+    // directory triggers a rebuild on every |cargo build| invocation
+    fs::create_dir("target/perf-libs").unwrap_or_else(|err| {
+        if err.kind() != std::io::ErrorKind::AlreadyExists {
+            panic!("Unable to create target/perf-libs: {:?}", err);
+        }
+    });
+
+    let cuda = !env::var("CARGO_FEATURE_CUDA").is_err();
+    let erasure = !env::var("CARGO_FEATURE_ERASURE").is_err();
+
+    if cuda || erasure {
+        println!("cargo:rustc-link-search=native=target/perf-libs");
+    }
+    if cuda {
        println!("cargo:rustc-link-lib=static=cuda_verify_ed25519");
        println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
        println!("cargo:rustc-link-lib=dylib=cudart");
        println!("cargo:rustc-link-lib=dylib=cuda");
        println!("cargo:rustc-link-lib=dylib=cudadevrt");
    }
-    if !env::var("CARGO_FEATURE_ERASURE").is_err() {
+    if erasure {
        println!("cargo:rustc-link-lib=dylib=Jerasure");
        println!("cargo:rustc-link-lib=dylib=gf_complete");
    }
--- a/ci/audit.sh
+++ b/ci/audit.sh
@ -16,10 +16,10 @@ _() {

 maybe_cargo_install() {
  for cmd in "$@"; do
-    set +e 
+    set +e
    cargo "$cmd" --help > /dev/null 2>&1
    declare exitcode=$?
-    set -e 
+    set -e
    if [[ $exitcode -eq 101 ]]; then
      _ cargo install cargo-"$cmd"
    fi
@ -29,4 +29,4 @@ maybe_cargo_install() {
 maybe_cargo_install audit tree

 _ cargo tree
-_ cargo audit
+_ cargo audit || true
--- a/ci/buildkite.yml
+++ b/ci/buildkite.yml
@ -1,18 +1,18 @@
 steps:
-  - command: "ci/docker-run.sh solanalabs/rust ci/test-stable.sh"
+  - command: "ci/docker-run.sh solanalabs/rust:1.28.0 ci/test-stable.sh"
    name: "stable [public]"
    env:
      CARGO_TARGET_CACHE_NAME: "stable"
    timeout_in_minutes: 30
-  - command: "ci/docker-run.sh solanalabs/rust ci/test-bench.sh"
-    name: "bench [public]"
-    env:
-      CARGO_TARGET_CACHE_NAME: "stable"
-    timeout_in_minutes: 30
+    #  - command: "ci/docker-run.sh solanalabs/rust-nightly ci/test-bench.sh"
+    #    name: "bench [public]"
+    #    env:
+    #      CARGO_TARGET_CACHE_NAME: "nightly"
+    #    timeout_in_minutes: 30
  - command: "ci/shellcheck.sh"
    name: "shellcheck [public]"
    timeout_in_minutes: 20
-  - command: "ci/docker-run.sh solanalabs/rust-nightly ci/test-nightly.sh"
+  - command: "ci/docker-run.sh solanalabs/rust-nightly:2018-09-03 ci/test-nightly.sh || true"
    name: "nightly [public]"
    env:
      CARGO_TARGET_CACHE_NAME: "nightly"
@ -24,8 +24,8 @@ steps:
    timeout_in_minutes: 20
    agents:
      - "queue=cuda"
-  - command: "ci/test-large-network.sh"
-    name: "large-network [public]"
+  - command: "ci/test-large-network.sh || true"
+    name: "large-network [public] [ignored]"
    env:
      CARGO_TARGET_CACHE_NAME: "stable"
    timeout_in_minutes: 20
--- a/ci/channel-info.sh
+++ b/ci/channel-info.sh
@ -0,0 +1,91 @@
+#!/bin/bash
+#
+# Computes the current branch names of the edge, beta and stable
+# channels, as well as the latest tagged release for beta and stable.
+#
+# stdout of this script may be eval-ed
+#
+
+here="$(dirname "$0")"
+
+# shellcheck source=ci/semver_bash/semver.sh
+source "$here"/semver_bash/semver.sh
+
+remote=https://github.com/solana-labs/solana.git
+
+# Fetch all vX.Y.Z tags
+#
+# NOTE: pre-release tags are explicitly ignored
+#
+# shellcheck disable=SC2207
+tags=( \
+  $(git ls-remote --tags $remote \
+    | cut -c52- \
+    | grep '^v[[:digit:]][[:digit:]]*\.[[:digit:]][[:digit:]]*.[[:digit:]][[:digit:]]*$' \
+    | cut -c2- \
+  ) \
+)
+
+# Fetch all the vX.Y branches
+#
+# shellcheck disable=SC2207
+heads=( \
+  $(git ls-remote --heads $remote \
+    | cut -c53- \
+    | grep '^v[[:digit:]][[:digit:]]*\.[[:digit:]][[:digit:]]*$' \
+    | cut -c2- \
+  ) \
+)
+
+# Figure the beta channel by looking for the largest vX.Y branch
+beta=
+for head in "${heads[@]}"; do
+  if [[ -n $beta ]]; then
+    if semverLT "$head.0" "$beta.0"; then
+      continue
+    fi
+  fi
+  beta=$head
+done
+
+# Figure the stable channel by looking for the second largest vX.Y branch
+stable=
+for head in "${heads[@]}"; do
+  if [[ $head = "$beta" ]]; then
+    continue
+  fi
+  if [[ -n $stable ]]; then
+    if semverLT "$head.0" "$stable.0"; then
+      continue
+    fi
+  fi
+  stable=$head
+done
+
+for tag in "${tags[@]}"; do
+  if [[ -n $beta && $tag = $beta* ]]; then
+    if [[ -n $beta_tag ]]; then
+      if semverLT "$tag" "$beta_tag"; then
+        continue
+      fi
+    fi
+    beta_tag=$tag
+  fi
+
+  if [[ -n $stable && $tag = $stable* ]]; then
+    if [[ -n $stable_tag ]]; then
+      if semverLT "$tag" "$stable_tag"; then
+        continue
+      fi
+    fi
+    stable_tag=$tag
+  fi
+done
+
+echo EDGE_CHANNEL=master
+echo BETA_CHANNEL="${beta:+v$beta}"
+echo STABLE_CHANNEL="${stable:+v$stable}"
+echo BETA_CHANNEL_LATEST_TAG="${beta_tag:+v$beta_tag}"
+echo STABLE_CHANNEL_LATEST_TAG="${stable_tag:+v$stable_tag}"
+
+exit 0
--- a/ci/docker-run.sh
+++ b/ci/docker-run.sh
@ -1,22 +1,30 @@
 #!/bin/bash -e

 usage() {
-  echo "Usage: $0 [docker image name] [command]"
+  echo "Usage: $0 [--nopull] [docker image name] [command]"
  echo
  echo Runs command in the specified docker image with
-  echo a CI-appropriate environment
+  echo a CI-appropriate environment.
+  echo
+  echo "--nopull   Skip the dockerhub image update"
  echo
 }

 cd "$(dirname "$0")/.."

+NOPULL=false
+if [[ $1 = --nopull ]]; then
+  NOPULL=true
+  shift
+fi
+
 IMAGE="$1"
 if [[ -z "$IMAGE" ]]; then
  echo Error: image not defined
  exit 1
 fi

-docker pull "$IMAGE"
+$NOPULL || docker pull "$IMAGE"
 shift

 ARGS=(
@ -26,9 +34,14 @@ ARGS=(
 )

 if [[ -n $CI ]]; then
+  # Share the real ~/.cargo between docker containers in CI for speed
  ARGS+=(--volume "$HOME:/home")
-  ARGS+=(--env "CARGO_HOME=/home/.cargo")
+else
+  # Avoid sharing ~/.cargo when building locally to avoid a mixed macOS/Linux
+  # ~/.cargo
+  ARGS+=(--volume "$PWD:/home")
 fi
+ARGS+=(--env "CARGO_HOME=/home/.cargo")

 # kcov tries to set the personality of the binary which docker
 # doesn't allow by default.
--- a/ci/docker-rust-nightly/Dockerfile
+++ b/ci/docker-rust-nightly/Dockerfile
@ -1,9 +1,10 @@
-FROM rustlang/rust:nightly
+FROM solanalabs/rust
+ARG date

-RUN rustup component add clippy-preview --toolchain=nightly && \
-    echo deb http://ftp.debian.org/debian stretch-backports main >> /etc/apt/sources.list && \
-    apt update && \
-    apt install -y \
-      llvm-6.0 \
-      && \
-    rm -rf /var/lib/apt/lists/*
+RUN set -x && \
+    rustup install nightly-$date && \
+    rustup default nightly-$date && \
+    rustup component add clippy-preview --toolchain=nightly-$date && \
+    rustc --version && \
+    cargo --version && \
+    cargo +nightly-$date install cargo-cov
--- a/ci/docker-rust-nightly/README.md
+++ b/ci/docker-rust-nightly/README.md
@ -1,6 +1,36 @@
 Docker image containing rust nightly and some preinstalled crates used in CI.

-This image may be manually updated by running `./build.sh` if you are a member
+This image may be manually updated by running `CI=true ./build.sh` if you are a member
 of the [Solana Labs](https://hub.docker.com/u/solanalabs/) Docker Hub
 organization, but it is also automatically updated periodically by
 [this automation](https://buildkite.com/solana-labs/solana-ci-docker-rust-nightly).
+
+## Moving to a newer nightly
+
+We pin the version of nightly (see the `ARG nightly=xyz` line in `Dockerfile`)
+to avoid the build breaking at unexpected times, as occasionally nightly will
+introduce breaking changes.
+
+To update the pinned version:
+1. Run `ci/docker-rust-nightly/build.sh` to rebuild the nightly image locally,
+   or potentially `ci/docker-rust-nightly/build.sh YYYY-MM-DD` if there's a
+   specific YYYY-MM-DD that is desired (default is today's build).
+1. Run `SOLANA_DOCKER_RUN_NOSETUID=1 ci/docker-run.sh --nopull solanalabs/rust-nightly:YYYY-MM-DD ci/test-nightly.sh`
+   to confirm the new nightly image builds.  Fix any issues as needed
+1. Run `docker login` to enable pushing images to Docker Hub, if you're authorized.
+1. Run `CI=true ci/docker-rust-nightly/build.sh YYYY-MM-DD` to push the new nightly image to dockerhub.com.
+1. Modify the `solanalabs/rust-nightly:YYYY-MM-DD` reference in `ci/buildkite.yml` from the previous to
+   new *YYYY-MM-DD* value, send a PR with this change and any codebase adjustments needed.
+
+## Troubleshooting
+
+### Resource is denied
+
+When running `CI=true ci/docker-rust-nightly/build.sh`, you see:
+
+```
+denied: requested access to the resource is denied
+```
+
+Run `docker login` to enable pushing images to Docker Hub. Contact @mvines or @garious
+to get write access.
--- a/ci/docker-rust-nightly/build.sh
+++ b/ci/docker-rust-nightly/build.sh
@ -2,5 +2,12 @@

 cd "$(dirname "$0")"

-docker build -t solanalabs/rust-nightly .
-docker push solanalabs/rust-nightly
+nightlyDate=${1:-$(date +%Y-%m-%d)}
+docker build -t solanalabs/rust-nightly:"$nightlyDate" --build-arg date="$nightlyDate" .
+
+maybeEcho=
+if [[ -z $CI ]]; then
+  echo "Not CI, skipping |docker push|"
+  maybeEcho="echo"
+fi
+$maybeEcho docker push solanalabs/rust-nightly:"$nightlyDate"
--- a/ci/docker-rust/Dockerfile
+++ b/ci/docker-rust/Dockerfile
@ -1,15 +1,23 @@
+# Note: when the rust version (1.28) is changed also modify
+# ci/buildkite.yml to pick up the new image tag
 FROM rust:1.28

-RUN apt update && \
+RUN set -x && \
+    apt update && \
    apt-get install apt-transport-https && \
    echo deb https://apt.buildkite.com/buildkite-agent stable main > /etc/apt/sources.list.d/buildkite-agent.list && \
+    echo deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-6.0 main > /etc/apt/sources.list.d/llvm.list && \
    apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 32A37959C2FA5C3C99EFBC32A79206696452D198 && \
+    wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
    apt update && \
    apt install -y \
      buildkite-agent \
+      cmake \
+      llvm-6.0 \
      rsync \
      sudo \
-      cmake \
      && \
    rustup component add rustfmt-preview && \
-    rm -rf /var/lib/apt/lists/*
+    rm -rf /var/lib/apt/lists/* && \
+    rustc --version && \
+    cargo --version
--- a/ci/docker-rust/build.sh
+++ b/ci/docker-rust/build.sh
@ -3,4 +3,9 @@
 cd "$(dirname "$0")"

 docker build -t solanalabs/rust .
+
+read -r rustc version _ < <(docker run solanalabs/rust rustc --version)
+[[ $rustc = rustc ]]
+docker tag solanalabs/rust:latest solanalabs/rust:"$version"
+
 docker push solanalabs/rust
--- a/ci/hoover.sh
+++ b/ci/hoover.sh
@ -40,10 +40,10 @@ echo --- Remove unused docker networks
  docker network prune -f
 )

-echo "--- Delete /tmp files older than 1 day owned by $(whoami)"
+echo "--- Delete /tmp files older than 1 day owned by $(id -un)"
 (
  set -x
-  find /tmp -maxdepth 1 -user "$(whoami)" -mtime +1 -print0 | xargs -0 rm -rf
+  find /tmp -maxdepth 1 -user "$(id -un)" -mtime +1 -print0 | xargs -0 rm -rf
 )

 echo --- Deleting stale buildkite agent build directories
--- a/ci/install-earlyoom.sh
+++ b/ci/install-earlyoom.sh
@ -1,32 +0,0 @@
-#!/bin/bash -x
-#
-# Install EarlyOOM
-#
-
-[[ $(uname) = Linux ]] || exit 1
-
-# 64 - enable signalling of processes (term, kill, oom-kill)
-# TODO: This setting will not persist across reboots
-sysrq=$(( $(cat /proc/sys/kernel/sysrq) | 64 ))
-sudo sysctl -w kernel.sysrq=$sysrq
-
-if command -v earlyoom; then
-  sudo systemctl status earlyoom
-  exit 0
-fi
-
-wget http://ftp.us.debian.org/debian/pool/main/e/earlyoom/earlyoom_1.1-2_amd64.deb
-sudo apt install --quiet --yes ./earlyoom_1.1-2_amd64.deb
-
-cat > earlyoom <<OOM
-# use the kernel OOM killer, trigger at 20% available RAM,
-EARLYOOM_ARGS="-k -m 20"
-OOM
-sudo cp earlyoom /etc/default/
-rm earlyoom
-
-sudo systemctl stop earlyoom
-sudo systemctl enable earlyoom
-sudo systemctl start earlyoom
-
-exit 0
--- a/ci/localnet-sanity.sh
+++ b/ci/localnet-sanity.sh
@ -6,9 +6,9 @@

 cd "$(dirname "$0")"/..
 source ci/upload_ci_artifact.sh
-source multinode-demo/common.sh
+source scripts/configure-metrics.sh

-./multinode-demo/setup.sh
+multinode-demo/setup.sh

 backgroundCommands="drone leader validator validator-x"
 pids=()
@ -16,7 +16,7 @@ pids=()
 for cmd in $backgroundCommands; do
  echo "--- Start $cmd"
  rm -f log-"$cmd".txt
-  ./multinode-demo/"$cmd".sh > log-"$cmd".txt 2>&1 &
+  multinode-demo/"$cmd".sh > log-"$cmd".txt 2>&1 &
  declare pid=$!
  pids+=("$pid")
  echo "pid: $pid"
@ -64,21 +64,28 @@ flag_error() {
 echo "--- Wallet sanity"
 (
  set -x
-  multinode-demo/test/wallet-sanity.sh
+  scripts/wallet-sanity.sh
 ) || flag_error

 echo "--- Node count"
 (
+  source multinode-demo/common.sh
  set -x
-  ./multinode-demo/client.sh "$PWD" 3 -c --addr 127.0.0.1
+  client_id=/tmp/client-id.json-$$
+  $solana_keygen -o $client_id
+  $solana_bench_tps --identity $client_id --num-nodes 3 --converge-only
+  rm -rf $client_id
 ) || flag_error

 killBackgroundCommands

 echo "--- Ledger verification"
 (
+  source multinode-demo/common.sh
  set -x
-  $solana_ledger_tool --ledger "$SOLANA_CONFIG_DIR"/ledger verify
+  cp -R "$SOLANA_CONFIG_DIR"/ledger /tmp/ledger-$$
+  $solana_ledger_tool --ledger /tmp/ledger-$$ verify
+  rm -rf /tmp/ledger-$$
 ) || flag_error

 echo +++
--- a/ci/semver_bash/LICENSE
+++ b/ci/semver_bash/LICENSE
@ -0,0 +1,26 @@
+Copyright (c) 2013, Ray Bejjani
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met: 
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies, 
+either expressed or implied, of the FreeBSD Project.
--- a/ci/semver_bash/README.md
+++ b/ci/semver_bash/README.md
@ -0,0 +1,31 @@
+semver_bash is a bash parser for semantic versioning
+====================================================
+
+[Semantic Versioning](http://semver.org/) is a set of guidelines that help keep
+version and version management sane. This is a bash based parser to help manage
+a project's versions. Use it from a Makefile or any scripts you use in your
+project.
+
+Usage
+-----
+semver_bash can be used from the command line as:  
+
+    $ ./semver.sh "3.2.1" "3.2.1-alpha"  
+    3.2.1 -> M: 3 m:2 p:1 s:  
+    3.2.1-alpha -> M: 3 m:2 p:1 s:-alpha  
+    3.2.1 == 3.2.1-alpha -> 1.  
+    3.2.1 < 3.2.1-alpha -> 1.  
+    3.2.1 > 3.2.1-alpha -> 0.
+
+
+Alternatively, you can source it from within a script:
+
+    . ./semver.sh  
+    
+    local MAJOR=0  
+    local MINOR=0  
+    local PATCH=0  
+    local SPECIAL=""
+    
+    semverParseInto "1.2.3" MAJOR MINOR PATCH SPECIAL  
+    semverParseInto "3.2.1" MAJOR MINOR PATCH SPECIAL  
--- a/ci/semver_bash/semver.sh
+++ b/ci/semver_bash/semver.sh
@ -0,0 +1,130 @@
+#!/usr/bin/env sh
+
+function semverParseInto() {
+    local RE='[^0-9]*\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)\([0-9A-Za-z-]*\)'
+    #MAJOR
+    eval $2=`echo $1 | sed -e "s#$RE#\1#"`
+    #MINOR
+    eval $3=`echo $1 | sed -e "s#$RE#\2#"`
+    #MINOR
+    eval $4=`echo $1 | sed -e "s#$RE#\3#"`
+    #SPECIAL
+    eval $5=`echo $1 | sed -e "s#$RE#\4#"`
+}
+
+function semverEQ() {
+    local MAJOR_A=0
+    local MINOR_A=0
+    local PATCH_A=0
+    local SPECIAL_A=0
+
+    local MAJOR_B=0
+    local MINOR_B=0
+    local PATCH_B=0
+    local SPECIAL_B=0
+
+    semverParseInto $1 MAJOR_A MINOR_A PATCH_A SPECIAL_A
+    semverParseInto $2 MAJOR_B MINOR_B PATCH_B SPECIAL_B
+
+    if [ $MAJOR_A -ne $MAJOR_B ]; then
+        return 1
+    fi
+
+    if [ $MINOR_A -ne $MINOR_B ]; then
+        return 1
+    fi
+
+    if [ $PATCH_A -ne $PATCH_B ]; then
+        return 1
+    fi
+
+    if [[ "_$SPECIAL_A" != "_$SPECIAL_B" ]]; then
+        return 1
+    fi
+
+
+    return 0
+
+}
+
+function semverLT() {
+    local MAJOR_A=0
+    local MINOR_A=0
+    local PATCH_A=0
+    local SPECIAL_A=0
+
+    local MAJOR_B=0
+    local MINOR_B=0
+    local PATCH_B=0
+    local SPECIAL_B=0
+
+    semverParseInto $1 MAJOR_A MINOR_A PATCH_A SPECIAL_A
+    semverParseInto $2 MAJOR_B MINOR_B PATCH_B SPECIAL_B
+
+    if [ $MAJOR_A -lt $MAJOR_B ]; then
+        return 0
+    fi
+
+    if [[ $MAJOR_A -le $MAJOR_B  && $MINOR_A -lt $MINOR_B ]]; then
+        return 0
+    fi
+    
+    if [[ $MAJOR_A -le $MAJOR_B  && $MINOR_A -le $MINOR_B && $PATCH_A -lt $PATCH_B ]]; then
+        return 0
+    fi
+
+    if [[ "_$SPECIAL_A"  == "_" ]] && [[ "_$SPECIAL_B"  == "_" ]] ; then
+        return 1
+    fi
+    if [[ "_$SPECIAL_A"  == "_" ]] && [[ "_$SPECIAL_B"  != "_" ]] ; then
+        return 1
+    fi
+    if [[ "_$SPECIAL_A"  != "_" ]] && [[ "_$SPECIAL_B"  == "_" ]] ; then
+        return 0
+    fi
+
+    if [[ "_$SPECIAL_A" < "_$SPECIAL_B" ]]; then
+        return 0
+    fi
+
+    return 1
+
+}
+
+function semverGT() {
+    semverEQ $1 $2
+    local EQ=$?
+
+    semverLT $1 $2
+    local LT=$?
+
+    if [ $EQ -ne 0 ] && [ $LT -ne 0 ]; then
+        return 0
+    else
+        return 1
+    fi
+}
+
+if [ "___semver.sh" == "___`basename $0`" ]; then
+
+MAJOR=0
+MINOR=0
+PATCH=0
+SPECIAL=""
+
+semverParseInto $1 MAJOR MINOR PATCH SPECIAL
+echo "$1 -> M: $MAJOR m:$MINOR p:$PATCH s:$SPECIAL"
+
+semverParseInto $2 MAJOR MINOR PATCH SPECIAL
+echo "$2 -> M: $MAJOR m:$MINOR p:$PATCH s:$SPECIAL"
+
+semverEQ $1 $2
+echo "$1 == $2 -> $?."
+
+semverLT $1 $2
+echo "$1 < $2 -> $?."
+
+semverGT $1 $2
+echo "$1 > $2 -> $?."
+
+fi
--- a/ci/semver_bash/semver_test.sh
+++ b/ci/semver_bash/semver_test.sh
@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+
+. ./semver.sh
+
+semverTest() {
+local A=R1.3.2
+local B=R2.3.2
+local C=R1.4.2
+local D=R1.3.3
+local E=R1.3.2a
+local F=R1.3.2b
+local G=R1.2.3
+
+local MAJOR=0
+local MINOR=0
+local PATCH=0
+local SPECIAL=""
+
+semverParseInto $A MAJOR MINOR PATCH SPECIAL
+echo "$A -> M:$MAJOR m:$MINOR p:$PATCH s:$SPECIAL. Expect M:1 m:3 p:2 s:"
+semverParseInto $E MAJOR MINOR PATCH SPECIAL
+echo "$E -> M:$MAJOR m:$MINOR p:$PATCH s:$SPECIAL. Expect M:1 m:3 p:2 s:a"
+
+echo "Equality comparisions"
+semverEQ $A $A
+echo "$A == $A -> $?. Expect 0."
+
+semverLT $A $A
+echo "$A < $A -> $?. Expect 1."
+
+semverGT $A $A
+echo "$A > $A -> $?. Expect 1."
+
+
+echo "Major number comparisions"
+semverEQ $A $B
+echo "$A == $B -> $?. Expect 1."
+
+semverLT $A $B
+echo "$A < $B -> $?. Expect 0."
+
+semverGT $A $B
+echo "$A > $B -> $?. Expect 1."
+
+semverEQ $B $A
+echo "$B == $A -> $?. Expect 1."
+
+semverLT $B $A
+echo "$B < $A -> $?. Expect 1."
+
+semverGT $B $A
+echo "$B > $A -> $?. Expect 0."
+
+
+echo "Minor number comparisions"
+semverEQ $A $C
+echo "$A == $C -> $?. Expect 1."
+
+semverLT $A $C
+echo "$A < $C -> $?. Expect 0."
+
+semverGT $A $C
+echo "$A > $C -> $?. Expect 1."
+
+semverEQ $C $A
+echo "$C == $A -> $?. Expect 1."
+
+semverLT $C $A
+echo "$C < $A -> $?. Expect 1."
+
+semverGT $C $A
+echo "$C > $A -> $?. Expect 0."
+
+echo "patch number comparisions"
+semverEQ $A $D
+echo "$A == $D -> $?. Expect 1."
+
+semverLT $A $D
+echo "$A < $D -> $?. Expect 0."
+
+semverGT $A $D
+echo "$A > $D -> $?. Expect 1."
+
+semverEQ $D $A
+echo "$D == $A -> $?. Expect 1."
+
+semverLT $D $A
+echo "$D < $A -> $?. Expect 1."
+
+semverGT $D $A
+echo "$D > $A -> $?. Expect 0."
+
+echo "special section vs no special comparisions"
+semverEQ $A $E
+echo "$A == $E -> $?. Expect 1."
+
+semverLT $A $E
+echo "$A < $E -> $?. Expect 1."
+
+semverGT $A $E
+echo "$A > $E -> $?. Expect 0."
+
+semverEQ $E $A
+echo "$E == $A -> $?. Expect 1."
+
+semverLT $E $A
+echo "$E < $A -> $?. Expect 0."
+
+semverGT $E $A
+echo "$E > $A -> $?. Expect 1."
+
+echo "special section vs special comparisions"
+semverEQ $E $F
+echo "$E == $F -> $?. Expect 1."
+
+semverLT $E $F
+echo "$E < $F -> $?. Expect 0."
+
+semverGT $E $F
+echo "$E > $F -> $?. Expect 1."
+
+semverEQ $F $E
+echo "$F == $E -> $?. Expect 1."
+
+semverLT $F $E
+echo "$F < $E -> $?. Expect 1."
+
+semverGT $F $E
+echo "$F > $E -> $?. Expect 0."
+
+echo "Minor and patch number comparisons"
+semverEQ $A $G
+echo "$A == $G -> $?. Expect 1."
+
+semverLT $A $G
+echo "$A < $G -> $?. Expect 1."
+
+semverGT $A $G
+echo "$A > $G -> $?. Expect 0."
+
+semverEQ $G $A
+echo "$G == $A -> $?. Expect 1."
+
+semverLT $G $A
+echo "$G < $A -> $?. Expect 0."
+
+semverGT $G $A
+echo "$G > $A -> $?. Expect 1."
+}
+
+semverTest
--- a/ci/shellcheck.sh
+++ b/ci/shellcheck.sh
@ -6,6 +6,7 @@ cd "$(dirname "$0")/.."

 set -x
 find . -name "*.sh" \
+    -not -regex ".*/ci/semver_bash/.*" \
    -not -regex ".*/.cargo/.*" \
    -not -regex ".*/node_modules/.*" \
    -not -regex ".*/target/.*" \
--- a/ci/snap.sh
+++ b/ci/snap.sh
@ -7,16 +7,21 @@ if [[ -z $BUILDKITE_BRANCH ]] || ./ci/is-pr.sh; then
  DRYRUN="echo"
 fi

-# BUILDKITE_TAG is the normal environment variable set by Buildkite.  However
-# when this script is run from a triggered pipeline, TRIGGERED_BUILDKITE_TAG is
-# used instead of BUILDKITE_TAG (due to Buildkite limitations that prevents
-# BUILDKITE_TAG from propagating through to triggered pipelines)
-if [[ -z "$BUILDKITE_TAG" && -z "$TRIGGERED_BUILDKITE_TAG" ]]; then
+eval "$(ci/channel-info.sh)"
+
+if [[ $BUILDKITE_BRANCH = "$STABLE_CHANNEL" ]]; then
+  SNAP_CHANNEL=stable
+elif [[ $BUILDKITE_BRANCH = "$EDGE_CHANNEL" ]]; then
  SNAP_CHANNEL=edge
-else
+elif [[ $BUILDKITE_BRANCH = "$BETA_CHANNEL" ]]; then
  SNAP_CHANNEL=beta
 fi

+if [[ -z $SNAP_CHANNEL ]]; then
+  echo Unable to determine channel to publish into, exiting.
+  exit 0
+fi
+
 if [[ -z $DRYRUN ]]; then
  [[ -n $SNAPCRAFT_CREDENTIALS_KEY ]] || {
    echo SNAPCRAFT_CREDENTIALS_KEY not defined
@ -39,15 +44,18 @@ set -x

 echo --- checking for multilog
 if [[ ! -x /usr/bin/multilog ]]; then
-  echo "multilog not found, install with: sudo apt-get install -y daemontools"
-  exit 1
+  if [[ -z $CI ]]; then
+    echo "multilog not found, install with: sudo apt-get install -y daemontools"
+    exit 1
+  fi
+  sudo apt-get install -y daemontools
 fi

-echo --- build
+echo --- build: $SNAP_CHANNEL channel
 snapcraft

 source ci/upload_ci_artifact.sh
 upload_ci_artifact solana_*.snap

-echo --- publish
+echo --- publish: $SNAP_CHANNEL channel
 $DRYRUN snapcraft push solana_*.snap --release $SNAP_CHANNEL
--- a/ci/test-bench.sh
+++ b/ci/test-bench.sh
@ -2,7 +2,7 @@

 cd "$(dirname "$0")/.."

-ci/version-check.sh stable
+ci/version-check.sh nightly
 export RUST_BACKTRACE=1

 _() {
@ -10,4 +10,4 @@ _() {
  "$@"
 }

-_ cargo bench --verbose
+_ cargo bench --features=unstable --verbose
--- a/ci/test-large-network.sh
+++ b/ci/test-large-network.sh
@ -12,7 +12,7 @@ fi
 export RUST_BACKTRACE=1

 ./fetch-perf-libs.sh
-export LD_LIBRARY_PATH+=:$PWD
+export LD_LIBRARY_PATH=$PWD/target/perf-libs:$LD_LIBRARY_PATH

 export RUST_LOG=multinode=info

--- a/ci/test-nightly.sh
+++ b/ci/test-nightly.sh
@ -11,8 +11,11 @@ _() {
 }

 _ cargo build --verbose --features unstable
-_ cargo test --verbose --features unstable
-_ cargo clippy -- --deny=warnings
+_ cargo test --verbose --features=unstable
+
+# TODO: Re-enable warnings-as-errors after clippy offers a way to not warn on unscoped lint names.
+#_ cargo clippy -- --deny=warnings
+_ cargo clippy

 exit 0

@ -28,4 +31,3 @@ if [[ -z "$CODECOV_TOKEN" ]]; then
 else
  bash <(curl -s https://codecov.io/bash) -x 'llvm-cov-6.0 gcov'
 fi
-
--- a/ci/test-stable-perf.sh
+++ b/ci/test-stable-perf.sh
@ -11,7 +11,7 @@ fi
 export RUST_BACKTRACE=1

 ./fetch-perf-libs.sh
-export LD_LIBRARY_PATH=$PWD:/usr/local/cuda/lib64
+export LD_LIBRARY_PATH=$PWD/target/perf-libs:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 export PATH=$PATH:/usr/local/cuda/bin

 _() {
--- a/ci/test-stable.sh
+++ b/ci/test-stable.sh
@ -22,4 +22,4 @@ echo --- ci/localnet-sanity.sh
  USE_INSTALL=1 ci/localnet-sanity.sh
 )

-_ ci/audit.sh
+_ ci/audit.sh || true
--- a/ci/testnet-deploy.sh
+++ b/ci/testnet-deploy.sh
@ -1,471 +1,113 @@
 #!/bin/bash -e
-#
-# Deploys the Solana software running on the testnet full nodes
-#
-# This script must be run by a user/machine that has successfully authenticated
-# with GCP and has sufficient permission.
-#
-here=$(dirname "$0")
-metrics_write_datapoint="$here"/../multinode-demo/metrics_write_datapoint.sh

-# TODO: Switch over to rolling updates
-ROLLING_UPDATE=false
-#ROLLING_UPDATE=true
+cd "$(dirname "$0")"/..

-if [[ -z $SOLANA_METRICS_CONFIG ]]; then
-  echo Error: SOLANA_METRICS_CONFIG environment variable is unset
-  exit 1
-fi
+zone=
+leaderAddress=
+clientNodeCount=0
+validatorNodeCount=10
+publicNetwork=false
+snapChannel=edge
+delete=false

-# Default to edge channel.  To select the beta channel:
-#   export SOLANA_SNAP_CHANNEL=beta
-if [[ -z $SOLANA_SNAP_CHANNEL ]]; then
-  SOLANA_SNAP_CHANNEL=edge
-fi
+usage() {
+  exitcode=0
+  if [[ -n "$1" ]]; then
+    exitcode=1
+    echo "Error: $*"
+  fi
+  cat <<EOF
+usage: $0 [name] [zone] [options...]

-# Select default network URL based on SOLANA_SNAP_CHANNEL if SOLANA_NET_ENTRYPOINT is
-# unspecified
-if [[ -z $SOLANA_NET_ENTRYPOINT ]]; then
-  case $SOLANA_SNAP_CHANNEL in
-  edge)
-    SOLANA_NET_ENTRYPOINT=master.testnet.solana.com
-    unset SOLANA_NET_NAME
+Deploys a CD testnet
+
+  name  - name of the network
+  zone   - GCE to deploy the network into
+
+  options:
+   -s edge|beta|stable  - Deploy the specified Snap release channel
+                          (default: $snapChannel)
+   -n [number]          - Number of validator nodes (default: $validatorNodeCount)
+   -c [number]          - Number of client nodes (default: $clientNodeCount)
+   -P                   - Use public network IP addresses (default: $publicNetwork)
+   -a [address]         - Set the leader node's external IP address to this GCE address
+   -d                   - Delete the network
+
+   Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
+         metrics
+EOF
+  exit $exitcode
+}
+
+netName=$1
+zone=$2
+[[ -n $netName ]] || usage
+[[ -n $zone ]] || usage "Zone not specified"
+shift 2
+
+while getopts "h?p:Pn:c:s:a:d" opt; do
+  case $opt in
+  h | \?)
+    usage
    ;;
-  beta)
-    SOLANA_NET_ENTRYPOINT=testnet.solana.com
-    unset SOLANA_NET_NAME
+  P)
+    publicNetwork=true
+    ;;
+  n)
+    validatorNodeCount=$OPTARG
+    ;;
+  c)
+    clientNodeCount=$OPTARG
+    ;;
+  s)
+    case $OPTARG in
+    edge|beta|stable)
+      snapChannel=$OPTARG
+      ;;
+    *)
+      usage "Invalid snap channel: $OPTARG"
+      ;;
+    esac
+    ;;
+  a)
+    leaderAddress=$OPTARG
+    ;;
+  d)
+    delete=true
    ;;
  *)
-    echo Error: Unknown SOLANA_SNAP_CHANNEL=$SOLANA_SNAP_CHANNEL
-    exit 1
+    usage "Error: unhandled option: $opt"
    ;;
  esac
+done
+
+
+gce_create_args=(
+  -a "$leaderAddress"
+  -c "$clientNodeCount"
+  -n "$validatorNodeCount"
+  -g
+  -p "$netName"
+  -z "$zone"
+)
+
+if $publicNetwork; then
+  gce_create_args+=(-P)
 fi

-if [[ -z $SOLANA_NET_NAME ]]; then
-  SOLANA_NET_NAME=${SOLANA_NET_ENTRYPOINT//./-}
+set -x
+
+echo --- gce.sh delete
+time net/gce.sh delete -p "$netName"
+if $delete; then
+  exit 0
 fi

-: ${SOLANA_NET_NAME:?$SOLANA_NET_ENTRYPOINT}
-netBasename=${SOLANA_NET_NAME/-*/}
-if [[ $netBasename != testnet ]]; then
-  netBasename="testnet-$netBasename"
-fi
+echo --- gce.sh create
+time net/gce.sh create "${gce_create_args[@]}"
+net/init-metrics.sh -e

-# Figure installation command
-SNAP_INSTALL_CMD="\
-  for i in {1..3}; do \
-    sudo snap install solana --$SOLANA_SNAP_CHANNEL --devmode && break;
-    sleep 1; \
-  done \
-"
-LOCAL_SNAP=$1
-if [[ -n $LOCAL_SNAP ]]; then
-  if [[ ! -f $LOCAL_SNAP ]]; then
-    echo "Error: $LOCAL_SNAP is not a file"
-    exit 1
-  fi
-  SNAP_INSTALL_CMD="sudo snap install ~/solana_local.snap --devmode --dangerous"
-fi
-SNAP_INSTALL_CMD="sudo snap remove solana; $SNAP_INSTALL_CMD"
-
-EARLYOOM_INSTALL_CMD="\
-  wget -O install-earlyoom.sh https://raw.githubusercontent.com/solana-labs/solana/master/ci/install-earlyoom.sh; \
-  bash install-earlyoom.sh \
-"
-SNAP_INSTALL_CMD="$EARLYOOM_INSTALL_CMD; $SNAP_INSTALL_CMD"
-
-# `export SKIP_INSTALL=1` to reset the network without reinstalling the snap
-if [[ -n $SKIP_INSTALL ]]; then
-  SNAP_INSTALL_CMD="echo Install skipped"
-fi
-
-echo "+++ Configuration for $netBasename"
-publicUrl="$SOLANA_NET_ENTRYPOINT"
-if [[ $publicUrl = testnet.solana.com ]]; then
-  publicIp="" # Use default value
-else
-  publicIp=$(dig +short $publicUrl | head -n1)
-fi
-
-echo "Network name: $SOLANA_NET_NAME"
-echo "Network entry point URL: $publicUrl ($publicIp)"
-echo "Snap channel: $SOLANA_SNAP_CHANNEL"
-echo "Install command: $SNAP_INSTALL_CMD"
-echo "Setup args: $SOLANA_SETUP_ARGS"
-[[ -z $LOCAL_SNAP ]] || echo "Local snap: $LOCAL_SNAP"
-
-vmlist=() # Each array element is formatted as "class:vmName:vmZone:vmPublicIp"
-
-vm_exec() {
-  declare vmName=$1
-  declare vmZone=$2
-  declare vmPublicIp=$3
-  declare message=$4
-  declare cmd=$5
-
-  echo "--- $message $vmName in zone $vmZone ($vmPublicIp)"
-  ssh -o BatchMode=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-    testnet-deploy@"$vmPublicIp" "$cmd"
-}
-
-#
-# vm_foreach [cmd] [extra args to cmd]
-# where
-#   cmd   - the command to execute on each VM
-#           The command will receive three fixed arguments, followed by any
-#           additionl arguments supplied to vm_foreach:
-#               vmName - GCP name of the VM
-#               vmZone - The GCP zone the VM is located in
-#               vmPublicIp - The public IP address of this VM
-#               vmClass - The 'class' of this VM
-#               count  - Monotonically increasing count for each
-#                        invocation of cmd, starting at 1
-#               ...    - Extra args to cmd..
-#
-#
-vm_foreach() {
-  declare cmd=$1
-  shift
-
-  declare count=1
-  for info in "${vmlist[@]}"; do
-    declare vmClass vmName vmZone vmPublicIp
-    IFS=: read -r vmClass vmName vmZone vmPublicIp < <(echo "$info")
-
-    eval "$cmd" "$vmName" "$vmZone" "$vmPublicIp" "$vmClass" "$count" "$@"
-    count=$((count + 1))
-  done
-}
-
-#
-# vm_foreach_in_class [class] [cmd]
-# where
-#   class - the desired VM class to operate on
-#   cmd   - the command to execute on each VM in the desired class.
-#           The command will receive three arguments:
-#               vmName - GCP name of the VM
-#               vmZone - The GCP zone the VM is located in
-#               vmPublicIp - The public IP address of this VM
-#               count  - Monotonically increasing count for each
-#                        invocation of cmd, starting at 1
-#
-#
-_run_cmd_if_class() {
-  declare vmName=$1
-  declare vmZone=$2
-  declare vmPublicIp=$3
-  declare vmClass=$4
-  declare count=$5
-  declare class=$6
-  declare cmd=$7
-  if [[ $class = "$vmClass" ]]; then
-    eval "$cmd" "$vmName" "$vmZone" "$vmPublicIp" "$count"
-  fi
-}
-
-vm_foreach_in_class() {
-  declare class=$1
-  declare cmd=$2
-  vm_foreach _run_cmd_if_class "$1" "$2"
-}
-
-#
-# Load all VMs matching the specified filter and tag them with the specified
-# class into the `vmlist` array.
-findVms() {
-  declare class="$1"
-  declare filter="$2"
-  gcloud compute instances list --filter="$filter"
-  while read -r vmName vmZone vmPublicIp status; do
-    if [[ $status != RUNNING ]]; then
-      echo "Warning: $vmName is not RUNNING, ignoring it."
-      continue
-    fi
-    vmlist+=("$class:$vmName:$vmZone:$vmPublicIp")
-  done < <(gcloud compute instances list \
-             --filter="$filter" \
-             --format 'value(name,zone,networkInterfaces[0].accessConfigs[0].natIP,status)')
-}
-
-wait_for_pids() {
-  echo "--- Waiting for $*"
-  for pid in "${pids[@]}"; do
-    declare ok=true
-    wait "$pid" || ok=false
-    cat "log-$pid.txt"
-    if ! $ok; then
-      echo ^^^ +++
-      exit 1
-    fi
-    rm "log-$pid.txt"
-  done
-}
-
-delete_unreachable_validators() {
-  declare vmName=$1
-  declare vmZone=$2
-  declare vmPublicIp=$3
-
-  touch "log-$vmName.txt"
-  (
-    SECONDS=0
-    if ! vm_exec "$vmName" "$vmZone" "$vmPublicIp" "Checking $vmName" uptime; then
-      echo "^^^ +++"
-
-      # Validators are managed by a Compute Engine Instance Group, so deleting
-      # one will just cause a new one to be spawned.
-      echo "Warning: $vmName is unreachable, deleting it"
-      gcloud compute instances delete "$vmName" --zone "$vmZone"
-    fi
-    echo "validator checked in ${SECONDS} seconds"
-  ) >> "log-$vmName.txt" 2>&1 &
-  declare pid=$!
-
-  # Rename log file so it can be discovered later by $pid
-  mv "log-$vmName.txt" "log-$pid.txt"
-  pids+=("$pid")
-}
-
-
-echo "Validator nodes (unverified):"
-findVms validator "name~^$SOLANA_NET_NAME-validator-"
-pids=()
-vm_foreach_in_class validator delete_unreachable_validators
-wait_for_pids validator sanity check
-vmlist=()
-
-echo "Leader node:"
-findVms leader "name=$SOLANA_NET_NAME"
-[[ ${#vmlist[@]} = 1 ]] || {
-  echo "Unable to find $SOLANA_NET_NAME"
-  exit 1
-}
-
-echo "Client node(s):"
-findVms client "name~^$SOLANA_NET_NAME-client"
-
-echo "Validator nodes:"
-findVms validator "name~^$SOLANA_NET_NAME-validator-"
-
-fullnode_count=0
-inc_fullnode_count() {
-  fullnode_count=$((fullnode_count + 1))
-}
-vm_foreach_in_class leader inc_fullnode_count
-vm_foreach_in_class validator inc_fullnode_count
-
-# Add "network stopping" datapoint
-$metrics_write_datapoint "testnet-deploy,name=$netBasename stop=1"
-
-client_start() {
-  declare vmName=$1
-  declare vmZone=$2
-  declare vmPublicIp=$3
-  declare count=$4
-
-  vm_exec "$vmName" "$vmZone" "$vmPublicIp" \
-    "Starting client $count:" \
-    "\
-      set -x;
-      snap info solana; \
-      sudo snap get solana; \
-      threadCount=\$(nproc); \
-      if [[ \$threadCount -gt 4 ]]; then threadCount=4; fi; \
-      tmux kill-session -t solana; \
-      tmux new -s solana -d \" \
-          set -x; \
-          sudo rm /tmp/solana.log; \
-          while : ; do \
-              /snap/bin/solana.bench-tps $SOLANA_NET_ENTRYPOINT $fullnode_count --loop -s 600 --sustained -t \$threadCount 2>&1 | tee -a /tmp/solana.log; \
-              echo 'https://metrics.solana.com:8086/write?db=${INFLUX_DATABASE}&u=${INFLUX_USERNAME}&p=${INFLUX_PASSWORD}' \
-                | xargs curl --max-time 5 -XPOST --data-binary 'testnet-deploy,name=$netBasename clientexit=1'; \
-              echo Error: bench-tps should never exit | tee -a /tmp/solana.log; \
-          done; \
-          bash \
-        \"; \
-      sleep 2; \
-      tmux capture-pane -t solana -p -S -100; \
-      tail /tmp/solana.log; \
-  "
-}
-
-client_stop() {
-  declare vmName=$1
-  declare vmZone=$2
-  declare vmPublicIp=$3
-  declare count=$4
-
-  touch "log-$vmName.txt"
-  (
-    SECONDS=0
-    vm_exec "$vmName" "$vmZone" "$vmPublicIp" \
-      "Stopping client $vmName ($count):" \
-      "\
-        set -x;
-        tmux list-sessions; \
-        tmux capture-pane -t solana -p; \
-        tmux kill-session -t solana; \
-        $SNAP_INSTALL_CMD; \
-        sudo snap set solana metrics-config=$SOLANA_METRICS_CONFIG \
-          rust-log=$RUST_LOG \
-          default-metrics-rate=$SOLANA_DEFAULT_METRICS_RATE \
-        ; \
-      "
-    echo "Client stopped in ${SECONDS} seconds"
-  ) >> "log-$vmName.txt" 2>&1 &
-  declare pid=$!
-
-  # Rename log file so it can be discovered later by $pid
-  mv "log-$vmName.txt" "log-$pid.txt"
-  pids+=("$pid")
-}
-
-fullnode_start() {
-  declare class=$1
-  declare vmName=$2
-  declare vmZone=$3
-  declare vmPublicIp=$4
-  declare count=$5
-
-  touch "log-$vmName.txt"
-  (
-    SECONDS=0
-    commonNodeConfig="\
-      rust-log=$RUST_LOG \
-      default-metrics-rate=$SOLANA_DEFAULT_METRICS_RATE \
-      metrics-config=$SOLANA_METRICS_CONFIG \
-      setup-args=$SOLANA_SETUP_ARGS \
-    "
-    if [[ $class = leader ]]; then
-      nodeConfig="mode=leader+drone $commonNodeConfig"
-      if [[ -n $SOLANA_CUDA ]]; then
-        nodeConfig="$nodeConfig enable-cuda=1"
-      fi
-    else
-      nodeConfig="mode=validator leader-address=$publicIp $commonNodeConfig"
-    fi
-
-    vm_exec "$vmName" "$vmZone" "$vmPublicIp" "Starting $class $count:" \
-      "\
-        set -ex; \
-        logmarker='solana deploy $(date)/$RANDOM'; \
-        logger \"\$logmarker\"; \
-        $SNAP_INSTALL_CMD; \
-        sudo snap set solana $nodeConfig; \
-        snap info solana; \
-        sudo snap get solana; \
-        echo Slight delay to get more syslog output; \
-        sleep 2; \
-        sudo grep -Pzo \"\$logmarker(.|\\n)*\" /var/log/syslog \
-      "
-    echo "Succeeded in ${SECONDS} seconds"
-  ) >> "log-$vmName.txt" 2>&1 &
-  declare pid=$!
-
-  # Rename log file so it can be discovered later by $pid
-  mv "log-$vmName.txt" "log-$pid.txt"
-
-  pids+=("$pid")
-}
-
-leader_start() {
-  fullnode_start leader "$@"
-}
-
-validator_start() {
-  fullnode_start validator "$@"
-}
-
-fullnode_stop() {
-  declare vmName=$1
-  declare vmZone=$2
-  declare vmPublicIp=$3
-  declare count=$4
-
-  touch "log-$vmName.txt"
-  (
-    SECONDS=0
-    # Try to ping the machine first.  When a machine (validator) is restarted,
-    # there can be a delay between when the instance is reported as RUNNING and when
-    # it's reachable over the network
-    timeout 30s bash -c "set -o pipefail; until ping -c 3 $vmPublicIp | tr - _; do echo .; done"
-    vm_exec "$vmName" "$vmZone" "$vmPublicIp" "Shutting down" "\
-      if snap list solana; then \
-        sudo snap set solana mode=; \
-      fi"
-    echo "Succeeded in ${SECONDS} seconds"
-  ) >> "log-$vmName.txt" 2>&1 &
-  declare pid=$!
-
-  # Rename log file so it can be discovered later by $pid
-  mv "log-$vmName.txt" "log-$pid.txt"
-
-  pids+=("$pid")
-}
-
-if [[ -n $LOCAL_SNAP ]]; then
-  echo "--- Transferring $LOCAL_SNAP to node(s)"
-
-  transfer_local_snap() {
-    declare vmName=$1
-    declare vmZone=$2
-    declare vmPublicIp=$3
-    declare vmClass=$4
-    declare count=$5
-
-    echo "--- $vmName in zone $vmZone ($count)"
-    SECONDS=0
-    scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-      "$LOCAL_SNAP" testnet-deploy@"$vmPublicIp":solana_local.snap
-    echo "Succeeded in ${SECONDS} seconds"
-  }
-  vm_foreach transfer_local_snap
-fi
-
-echo "--- Stopping client node(s)"
-pids=()
-vm_foreach_in_class client client_stop
-client_stop_pids=("${pids[@]}")
-
-if ! $ROLLING_UPDATE; then
-  pids=()
-  echo "--- Shutting down all full nodes"
-  vm_foreach_in_class leader fullnode_stop
-  vm_foreach_in_class validator fullnode_stop
-  wait_for_pids fullnode shutdown
-fi
-
-pids=()
-echo --- Starting leader node
-vm_foreach_in_class leader leader_start
-wait_for_pids leader
-
-pids=()
-echo --- Starting validator nodes
-vm_foreach_in_class validator validator_start
-wait_for_pids validators
-
-echo "--- $publicUrl sanity test"
-if [[ -z $CI ]]; then
-  # TODO: ssh into a node and run testnet-sanity.sh there.  It's not safe to
-  #       assume the correct Snap is installed on the current non-CI machine
-  echo Skipped for non-CI deploy
-  snapVersion=unknown
-else
-  (
-    set -x
-    USE_SNAP=1 ci/testnet-sanity.sh $publicUrl $fullnode_count
-  )
-  IFS=\  read -r _ snapVersion _ < <(snap info solana | grep "^installed:")
-  snapVersion=${snapVersion/0+git./}
-fi
-
-pids=("${client_stop_pids[@]}")
-wait_for_pids client shutdown
-vm_foreach_in_class client client_start
-
-# Add "network started" datapoint
-$metrics_write_datapoint "testnet-deploy,name=$netBasename start=1,version=\"$snapVersion\""
+echo --- net.sh start
+time net/net.sh start -s "$snapChannel"

 exit 0
--- a/ci/testnet-sanity.sh
+++ b/ci/testnet-sanity.sh
@ -1,66 +1,36 @@
 #!/bin/bash -e
-#
-# Perform a quick sanity test on the specific testnet
-#

 cd "$(dirname "$0")/.."
-source multinode-demo/common.sh

-NET_URL=$1
-if [[ -z $NET_URL ]]; then
-  NET_URL=testnet.solana.com
-fi
-
-EXPECTED_NODE_COUNT=$2
-if [[ -z $EXPECTED_NODE_COUNT ]]; then
-  EXPECTED_NODE_COUNT=50
-fi
-
-echo "--- $NET_URL: verify ledger"
-if [[ -d /var/snap/solana/current/config/ledger ]]; then
-  # Note: here we assume this script is actually running on the leader node...
-  sudo solana.ledger-tool --ledger /var/snap/solana/current/config/ledger verify
-else
-  echo "^^^ +++"
-  echo "Ledger verify skipped"
-fi
-
-echo "--- $NET_URL: wallet sanity"
-(
-  set -x
-  multinode-demo/test/wallet-sanity.sh $NET_URL
-)
-
-echo "--- $NET_URL: node count"
-if [[ -n "$USE_SNAP" ]]; then
-  # TODO: Merge client.sh functionality into solana-bench-tps proper and
-  #       remove this USE_SNAP case
-  cmd=$solana_bench_tps
-else
-  cmd=multinode-demo/client.sh
-fi
-
-(
-  set -x
-  $cmd $NET_URL $EXPECTED_NODE_COUNT -c
-)
-
-echo "--- $NET_URL: validator sanity"
-if [[ -z $NO_VALIDATOR_SANITY ]]; then
-  (
-    ./multinode-demo/setup.sh -t validator
-    set -e pipefail
-    timeout 10s ./multinode-demo/validator.sh "$NET_URL" 2>&1 | tee validator.log
-  )
-  wc -l validator.log
-  if grep -C100 panic validator.log; then
-    echo "^^^ +++ Panic observed"
-    exit 1
-  else
-    echo "Validator log looks ok"
+usage() {
+  exitcode=0
+  if [[ -n "$1" ]]; then
+    exitcode=1
+    echo "Error: $*"
  fi
-else
-  echo "^^^ +++ Validator sanity disabled (NO_VALIDATOR_SANITY defined)"
-fi
+  cat <<EOF
+usage: $0 [name]
+
+Sanity check a CD testnet
+
+  name  - name of the network
+
+  Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
+        metrics
+EOF
+  exit $exitcode
+}
+
+netName=$1
+[[ -n $netName ]] || usage ""
+
+set -x
+echo --- gce.sh config
+net/gce.sh config -p "$netName"
+net/init-metrics.sh -e
+echo --- net.sh sanity
+net/net.sh sanity \
+  ${NO_LEDGER_VERIFY:+-o noLedgerVerify} \
+  ${NO_VALIDATOR_SANITY:+-o noValidatorSanity} \

 exit 0
--- a/doc/json-rpc.md
+++ b/doc/json-rpc.md
@ -0,0 +1,178 @@
+Solana JSON RPC API
+===
+
+Solana nodes accept HTTP requests using the [JSON-RPC 2.0](https://www.jsonrpc.org/specification) specification.
+
+To interact with a Solana node inside a JavaScript application, use the [solana-web3.js](https://github.com/solana-labs/solana-web3.js) library, which gives a convenient interface for the RPC methods.
+
+RPC Endpoint
+---
+
+**Default port:** 8899  
+eg. http://localhost:8899, http://192.168.1.88:8899
+
+Methods
+---
+
+* [confirmTransaction](#confirmtransaction)
+* [getAddress](#getaddress)
+* [getBalance](#getbalance)
+* [getLastId](#getlastid)
+* [getTransactionCount](#gettransactioncount)
+* [requestAirdrop](#requestairdrop)
+* [sendTransaction](#sendtransaction)
+
+Request Formatting
+---
+
+To make a JSON-RPC request, send an HTTP POST request with a `Content-Type: application/json` header. The JSON request data should contain 4 fields:
+
+* `jsonrpc`, set to `"2.0"`
+* `id`, a unique client-generated identifying integer
+* `method`, a string containing the method to be invoked
+* `params`, a JSON array of ordered parameter values
+
+Example using curl:
+```bash
+curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0", "id":1, "method":"getBalance", "params":["83astBRguLMdt2h5U1Tpdq5tjFoJ6noeGwaY3mDLVcri"]}' 192.168.1.88:8899
+```
+
+The response output will be a JSON object with the following fields:
+
+* `jsonrpc`, matching the request specification
+* `id`, matching the request identifier
+* `result`, requested data or success confirmation
+
+Requests can be sent in batches by sending an array of JSON-RPC request objects as the data for a single POST.
+
+Definitions
+---
+
+* Hash: A SHA-256 hash of a chunk of data.
+* Pubkey: The public key of a Ed25519 key-pair.
+* Signature: An Ed25519 signature of a chunk of data.
+* Transaction: A Solana instruction signed by a client key-pair.
+
+JSON RPC API Reference
+---
+
+### confirmTransaction
+Returns a transaction receipt
+
+##### Parameters:
+* `string` - Signature of Transaction to confirm, as base-58 encoded string
+
+##### Results:
+* `boolean` - Transaction status, true if Transaction is confirmed
+
+##### Example:
+```bash
+// Request
+curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0", "id":1, "method":"confirmTransaction", "params":["5VERv8NMvzbJMEkV8xnrLkEaWRtSz9CosKDYjCJjBRnbJLgp8uirBgmQpjKhoR4tjF3ZpRzrFmBV6UjKdiSZkQUW"]}' http://localhost:8899
+
+// Result
+{"jsonrpc":"2.0","result":true,"id":1}
+```
+
+---
+
+### getBalance
+Returns the balance of the account of provided Pubkey
+
+##### Parameters:
+* `string` - Pubkey of account to query, as base-58 encoded string
+
+##### Results:
+* `integer` - quantity, as a signed 64-bit integer
+
+##### Example:
+```bash
+// Request
+curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0", "id":1, "method":"getBalance", "params":["83astBRguLMdt2h5U1Tpdq5tjFoJ6noeGwaY3mDLVcri"]}' http://localhost:8899
+
+// Result
+{"jsonrpc":"2.0","result":0,"id":1}
+```
+
+---
+
+### getLastId
+Returns the last entry ID from the ledger
+
+##### Parameters:
+None
+
+##### Results:
+* `string` - the ID of last entry, a Hash as base-58 encoded string
+
+##### Example:
+```bash
+// Request
+curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "method":"getLastId"}' http://localhost:8899
+
+// Result
+{"jsonrpc":"2.0","result":"GH7ome3EiwEr7tu9JuTh2dpYWBJK3z69Xm1ZE3MEE6JC","id":1}
+```
+
+---
+
+### getTransactionCount
+Returns the current Transaction count from the ledger
+
+##### Parameters:
+None
+
+##### Results:
+* `integer` - count, as unsigned 64-bit integer
+
+##### Example:
+```bash
+// Request
+curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "method":"getTransactionCount"}' http://localhost:8899
+
+// Result
+{"jsonrpc":"2.0","result":268,"id":1}
+```
+
+---
+
+### requestAirdrop
+Requests an airdrop of tokens to a Pubkey
+
+##### Parameters:
+* `string` - Pubkey of account to receive tokens, as base-58 encoded string
+* `integer` - token quantity, as a signed 64-bit integer
+
+##### Results:
+* `string` - Transaction Signature of airdrop, as base-58 encoded string
+
+##### Example:
+```bash
+// Request
+curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "method":"requestAirdrop", "params":["83astBRguLMdt2h5U1Tpdq5tjFoJ6noeGwaY3mDLVcri", 50]}' http://localhost:8899
+
+// Result
+{"jsonrpc":"2.0","result":"5VERv8NMvzbJMEkV8xnrLkEaWRtSz9CosKDYjCJjBRnbJLgp8uirBgmQpjKhoR4tjF3ZpRzrFmBV6UjKdiSZkQUW","id":1}
+```
+
+---
+
+### sendTransaction
+Creates new transaction
+
+##### Parameters:
+* `array` - array of octets containing a fully-signed Transaction
+
+##### Results:
+* `string` - Transaction Signature, as base-58 encoded string
+
+##### Example:
+```bash
+// Request
+curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "method":"sendTransaction", "params":[[61, 98, 55, 49, 15, 187, 41, 215, 176, 49, 234, 229, 228, 77, 129, 221, 239, 88, 145, 227, 81, 158, 223, 123, 14, 229, 235, 247, 191, 115, 199, 71, 121, 17, 32, 67, 63, 209, 239, 160, 161, 2, 94, 105, 48, 159, 235, 235, 93, 98, 172, 97, 63, 197, 160, 164, 192, 20, 92, 111, 57, 145, 251, 6, 40, 240, 124, 194, 149, 155, 16, 138, 31, 113, 119, 101, 212, 128, 103, 78, 191, 80, 182, 234, 216, 21, 121, 243, 35, 100, 122, 68, 47, 57, 13, 39, 0, 0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 40, 240, 124, 194, 149, 155, 16, 138, 31, 113, 119, 101, 212, 128, 103, 78, 191, 80, 182, 234, 216, 21, 121, 243, 35, 100, 122, 68, 47, 57, 11, 12, 106, 49, 74, 226, 201, 16, 161, 192, 28, 84, 124, 97, 190, 201, 171, 186, 6, 18, 70, 142, 89, 185, 176, 154, 115, 61, 26, 163, 77, 1, 88, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]}' http://localhost:8899
+
+// Result
+{"jsonrpc":"2.0","result":"2EBVM6cB8vAAD93Ktr6Vd8p67XPbQzCJX47MpReuiCXJAtcjaxpvWpcg9Ege1Nr5Tk3a2GFrByT7WPBjdsTycY9b","id":1}
+```
+
+---
--- a/doc/testnet.md
+++ b/doc/testnet.md
@ -33,3 +33,12 @@ variable `RAYON_NUM_THREADS=<xx>`
 ## How can I test a change on the testnet?

 Currently, a merged PR is the only way to test a change on the testnet.
+
+## Adjusting the number of clients or validators on the testnet
+
+1. Go to the [GCP Instance Group](https://console.cloud.google.com/compute/instanceGroups/list?project=principal-lane-200702) tab
+2. Find the client or validator instance group you'd like to adjust
+3. Edit it (pencil icon), change the "Number of instances", then click "Save" button
+4. Refresh until the change to number of instances has been executed
+5. Click the "New Build" button on the [testnet-deploy](https://buildkite.com/solana-labs/testnet-deploy/)
+   buildkite job to initiate a redeploy of the network with the updated instance count.
--- a/fetch-perf-libs.sh
+++ b/fetch-perf-libs.sh
@ -10,28 +10,30 @@ if [[ $(uname -m) != x86_64 ]]; then
  exit 1
 fi

+mkdir -p target/perf-libs
 (
-  set -x
-  curl -o solana-perf.tgz \
-    https://solana-perf.s3.amazonaws.com/master/x86_64-unknown-linux-gnu/solana-perf.tgz
-  tar zxvf solana-perf.tgz
-)
+  cd target/perf-libs
+  (
+    set -x
+    curl https://solana-perf.s3.amazonaws.com/master/x86_64-unknown-linux-gnu/solana-perf.tgz | tar zxvf -
+  )

-if [[ -r /usr/local/cuda/version.txt && -r cuda-version.txt ]]; then
-  if ! diff /usr/local/cuda/version.txt cuda-version.txt > /dev/null; then
+  if [[ -r /usr/local/cuda/version.txt && -r cuda-version.txt ]]; then
+    if ! diff /usr/local/cuda/version.txt cuda-version.txt > /dev/null; then
+        echo ==============================================
+        echo Warning: possible CUDA version mismatch
+        echo
+        echo "Expected version: $(cat cuda-version.txt)"
+        echo "Detected version: $(cat /usr/local/cuda/version.txt)"
+        echo ==============================================
+    fi
+  else
    echo ==============================================
-    echo Warning: possible CUDA version mismatch
-    echo
-    echo "Expected version: $(cat cuda-version.txt)"
-    echo "Detected version: $(cat /usr/local/cuda/version.txt)"
+    echo Warning: unable to validate CUDA version
    echo ==============================================
  fi
-else
-  echo ==============================================
-  echo Warning: unable to validate CUDA version
-  echo ==============================================
-fi

-echo "Downloaded solana-perf version: $(cat solana-perf-HEAD.txt)"
+  echo "Downloaded solana-perf version: $(cat solana-perf-HEAD.txt)"
+)

 exit 0
--- a/multinode-demo/client.sh
+++ b/multinode-demo/client.sh
@ -1,66 +1,25 @@
 #!/bin/bash -e
-#
-USAGE=" usage: $0 [leader_url] [num_nodes] [--loop] [extra args]
-
- leader_url       URL to the leader (defaults to ..)
- num_nodes        Minimum number of nodes to look for while converging
- --loop           Add this flag to cause the program to loop infinitely
- \"extra args\"     Any additional arguments are pass along to solana-bench-tps
-"

 here=$(dirname "$0")
 # shellcheck source=multinode-demo/common.sh
 source "$here"/common.sh

-leader=$1
-if [[ -n $leader ]]; then
-  if [[ $leader == "-h" || $leader == "--help" ]]; then
-    echo "$USAGE"
-    exit 0
+usage() {
+  if [[ -n $1 ]]; then
+    echo "$*"
+    echo
  fi
-  shift
+  echo "usage: $0 [extra args]"
+  echo
+  echo " Run bench-tps "
+  echo
+  echo "   extra args: additional arguments are pass along to solana-bench-tps"
+  echo
+  exit 1
+}
+
+if [[ -z $1 ]]; then # default behavior
+  $solana_bench_tps --identity config-private/client-id.json --network 127.0.0.1:8001 --duration 90
 else
-  if [[ -d "$SNAP" ]]; then
-    leader=testnet.solana.com # Default to testnet when running as a Snap
-  else
-    leader=$here/.. # Default to local solana repo
-  fi
+  $solana_bench_tps "$@"
 fi
-
-count=$1
-if [[ -n $count ]]; then
-  shift
-else
-  count=1
-fi
-
-loop=
-if [[ $1 = --loop ]]; then
-  loop=1
-  shift
-fi
-
-rsync_leader_url=$(rsync_url "$leader")
-(
-  set -x
-  mkdir -p "$SOLANA_CONFIG_CLIENT_DIR"
-  $rsync -vPz "$rsync_leader_url"/config/leader.json "$SOLANA_CONFIG_CLIENT_DIR"/
-
-  client_json="$SOLANA_CONFIG_CLIENT_DIR"/client.json
-  [[ -r $client_json ]] || $solana_keygen -o "$client_json"
-)
-
-iteration=0
-set -x
-while true; do
-    $solana_bench_tps \
-      -n "$count" \
-      -l "$SOLANA_CONFIG_CLIENT_DIR"/leader.json \
-      -k "$SOLANA_CONFIG_CLIENT_DIR"/client.json \
-      "$@"
-  [[ -n $loop ]] || exit 0
-  iteration=$((iteration + 1))
-  echo ------------------------------------------------------------------------
-  echo "Iteration: $iteration"
-  echo ------------------------------------------------------------------------
-done
--- a/multinode-demo/common.sh
+++ b/multinode-demo/common.sh
@ -1,7 +1,11 @@
 # |source| this file
 #
-# Disable complaints about unused variables in this file:
+# Common utilities shared by other scripts in this directory
+#
+# The following directive disable complaints about unused variables in this
+# file:
 # shellcheck disable=2034
+#

 rsync=rsync
 leader_logger="cat"
@ -41,12 +45,6 @@ if [[ -d $SNAP ]]; then # Running inside a Linux Snap?
  # 0700
  mkdir -p "$SNAP_DATA"/{drone,leader,validator}

-  SOLANA_METRICS_CONFIG="$(snapctl get metrics-config)"
-  SOLANA_DEFAULT_METRICS_RATE="$(snapctl get default-metrics-rate)"
-  export SOLANA_DEFAULT_METRICS_RATE
-  SOLANA_CUDA="$(snapctl get enable-cuda)"
-  RUST_LOG="$(snapctl get rust-log)"
-
 elif [[ -n $USE_SNAP ]]; then # Use the Linux Snap binaries
  solana_program() {
    declare program="$1"
@ -80,7 +78,7 @@ else
    fi

    # Locate perf libs downloaded by |./fetch-perf-libs.sh|
-    LD_LIBRARY_PATH=$(cd "$here" && dirname "$PWD"):$LD_LIBRARY_PATH
+    LD_LIBRARY_PATH=$(cd "$here" && dirname "$PWD"/target/perf-libs):$LD_LIBRARY_PATH
    export LD_LIBRARY_PATH
  fi
 fi
@ -98,50 +96,8 @@ solana_ledger_tool=$(solana_program ledger-tool)
 export RUST_LOG=${RUST_LOG:-solana=info} # if RUST_LOG is unset, default to info
 export RUST_BACKTRACE=1

-
-# The SOLANA_METRICS_CONFIG environment variable is formatted as a
-# comma-delimited list of parameters. All parameters are optional.
-#
-# Example:
-#   export SOLANA_METRICS_CONFIG="host=<metrics host>,db=<database name>,u=<username>,p=<password>"
-#
-configure_metrics() {
-  [[ -n $SOLANA_METRICS_CONFIG ]] || return 0
-
-  declare metrics_params
-  IFS=',' read -r -a metrics_params <<< "$SOLANA_METRICS_CONFIG"
-  for param in "${metrics_params[@]}"; do
-    IFS='=' read -r -a pair <<< "$param"
-    if [[ ${#pair[@]} != 2 ]]; then
-      echo Error: invalid metrics parameter: "$param" >&2
-    else
-      declare name="${pair[0]}"
-      declare value="${pair[1]}"
-      case "$name" in
-      host)
-        export INFLUX_HOST="$value"
-        echo INFLUX_HOST="$INFLUX_HOST" >&2
-        ;;
-      db)
-        export INFLUX_DATABASE="$value"
-        echo INFLUX_DATABASE="$INFLUX_DATABASE" >&2
-        ;;
-      u)
-        export INFLUX_USERNAME="$value"
-        echo INFLUX_USERNAME="$INFLUX_USERNAME" >&2
-        ;;
-      p)
-        export INFLUX_PASSWORD="$value"
-        echo INFLUX_PASSWORD="********" >&2
-        ;;
-      *)
-        echo Error: Unknown metrics parameter name: "$name" >&2
-        ;;
-      esac
-    fi
-  done
-}
-configure_metrics
+# shellcheck source=scripts/configure-metrics.sh
+source "$(dirname "${BASH_SOURCE[0]}")"/../scripts/configure-metrics.sh

 tune_networking() {
  # Skip in CI
@ -154,7 +110,7 @@ tune_networking() {
      # test the existence of the sysctls before trying to set them
      # go ahead and return true and don't exit if these calls fail
      sysctl net.core.rmem_max 2>/dev/null 1>/dev/null &&
-          sudo sysctl -w net.core.rmem_max=26214400 1>/dev/null 2>/dev/null
+          sudo sysctl -w net.core.rmem_max=67108864 1>/dev/null 2>/dev/null

      sysctl net.core.rmem_default 2>/dev/null 1>/dev/null &&
          sudo sysctl -w net.core.rmem_default=26214400 1>/dev/null 2>/dev/null
@ -173,20 +129,6 @@ tune_networking() {
  fi
 }

-oom_score_adj() {
-  declare pid=$1
-  declare score=$2
-  if [[ $(uname) != Linux ]]; then
-    return
-  fi
-
-  echo "$score" > "/proc/$pid/oom_score_adj" || true
-  declare currentScore
-  currentScore=$(cat "/proc/$pid/oom_score_adj" || true)
-  if [[ $score != "$currentScore" ]]; then
-    echo "Failed to set oom_score_adj to $score for pid $pid (current score: $currentScore)"
-  fi
-}

 SOLANA_CONFIG_DIR=${SNAP_DATA:-$PWD}/config
 SOLANA_CONFIG_PRIVATE_DIR=${SNAP_DATA:-$PWD}/config-private
@ -211,3 +153,50 @@ rsync_url() { # adds the 'rsync://` prefix to URLs that need it
  # Default to rsync:// URL
  echo "rsync://$url"
 }
+
+# called from drone, validator, client
+find_leader() {
+  declare leader leader_address
+  declare shift=0
+
+  if [[ -d $SNAP ]]; then
+    if [[ -n $1 ]]; then
+      usage "Error: unexpected parameter: $1"
+    fi
+
+    # Select leader from the Snap configuration
+    leader_ip=$(snapctl get leader-ip)
+    if [[ -z $leader_ip ]]; then
+      leader=testnet.solana.com
+      leader_ip=$(dig +short "${leader%:*}" | head -n1)
+      if [[ -z $leader_ip ]]; then
+          usage "Error: unable to resolve IP address for $leader"
+      fi
+    fi
+    leader=$leader_ip
+    leader_address=$leader_ip:8001
+  else
+    if [[ -z $1 ]]; then
+      leader=${here}/..        # Default to local tree for rsync
+      leader_address=127.0.0.1:8001 # Default to local leader
+    elif [[ -z $2 ]]; then
+      leader=$1
+
+      declare leader_ip
+      leader_ip=$(dig +short "${leader%:*}" | head -n1)
+
+      if [[ -z $leader_ip ]]; then
+          usage "Error: unable to resolve IP address for $leader"
+      fi
+
+      leader_address=$leader_ip:8001
+      shift=1
+    else
+      leader=$1
+      leader_address=$2
+      shift=2
+    fi
+  fi
+
+  echo "$leader" "$leader_address" "$shift"
+}
--- a/multinode-demo/drone.sh
+++ b/multinode-demo/drone.sh
@ -1,28 +1,26 @@
 #!/bin/bash
 #
-# usage: $0 <rsync network path to solana repo on leader machine>
+# Starts an instance of solana-drone
 #
-
 here=$(dirname "$0")
+
 # shellcheck source=multinode-demo/common.sh
 source "$here"/common.sh
-SOLANA_CONFIG_DIR="$SOLANA_CONFIG_DIR"-drone

-if [[ -d "$SNAP" ]]; then
-  # Exit if mode is not yet configured
-  # (typically the case after the Snap is first installed)
-  [[ -n "$(snapctl get mode)" ]] || exit 0
-
-  # Select leader from the Snap configuration
-  leader_address="$(snapctl get leader-address)"
-  if [[ -z "$leader_address" ]]; then
-    # Assume drone is running on the same node as the leader by default
-    leader_address="localhost"
+usage() {
+  if [[ -n $1 ]]; then
+    echo "$*"
+    echo
  fi
-  leader="$leader_address"
-else
-  leader=${1:-${here}/..}  # Default to local tree for data
-fi
+  echo "usage: $0 [network entry point]"
+  echo
+  echo " Run an airdrop drone for the specified network"
+  echo
+  exit 1
+}
+
+read -r _ leader_address shift < <(find_leader "${@:1:1}")
+shift "$shift"

 [[ -f "$SOLANA_CONFIG_PRIVATE_DIR"/mint.json ]] || {
  echo "$SOLANA_CONFIG_PRIVATE_DIR/mint.json not found, create it by running:"
@ -31,16 +29,12 @@ fi
  exit 1
 }

-rsync_leader_url=$(rsync_url "$leader")
 set -ex
-mkdir -p "$SOLANA_CONFIG_DIR"
-$rsync -vPz "$rsync_leader_url"/config/leader.json "$SOLANA_CONFIG_DIR"/
-

 trap 'kill "$pid" && wait "$pid"' INT TERM
 $solana_drone \
-  -l "$SOLANA_CONFIG_DIR"/leader.json -k "$SOLANA_CONFIG_PRIVATE_DIR"/mint.json \
+  --keypair "$SOLANA_CONFIG_PRIVATE_DIR"/mint.json \
+  --network "$leader_address" \
  > >($drone_logger) 2>&1 &
 pid=$!
-oom_score_adj "$pid" 1000
 wait "$pid"
--- a/multinode-demo/gce_multinode.sh
+++ b/multinode-demo/gce_multinode.sh
@ -1,80 +0,0 @@
-#!/bin/bash
-
-command=$1
-prefix=
-num_nodes=
-out_file=
-image_name="ubuntu-16-04-cuda-9-2-new"
-
-shift
-
-usage() {
-  exitcode=0
-  if [[ -n "$1" ]]; then
-    exitcode=1
-    echo "Error: $*"
-  fi
-  cat <<EOF
-usage: $0 <create|delete> <-p prefix> <-n num_nodes> <-o file> [-i image-name]
-
-Manage a GCE multinode network
-
- create|delete    - Create or delete the network
- -p prefix        - A common prefix for node names, to avoid collision
- -n num_nodes     - Number of nodes
- -o out_file      - Used for create option. Outputs an array of IP addresses
-                    of new nodes to the file
- -i image_name    - Existing image on GCE (default $image_name)
-
-EOF
-  exit $exitcode
-}
-
-while getopts "h?p:i:n:o:" opt; do
-  case $opt in
-  h | \?)
-    usage
-    ;;
-  p)
-    prefix=$OPTARG
-    ;;
-  i)
-    image_name=$OPTARG
-    ;;
-  o)
-    out_file=$OPTARG
-    ;;
-  n)
-    num_nodes=$OPTARG
-    ;;
-  *)
-    usage "Error: unhandled option: $opt"
-    ;;
-  esac
-done
-
-set -e
-
-[[ -n $command ]] || usage "Need a command (create|delete)"
-
-[[ -n $prefix ]] || usage "Need a prefix for GCE instance names"
-
-[[ -n $num_nodes ]] || usage "Need number of nodes"
-
-nodes=()
-for i in $(seq 1 "$num_nodes"); do
-  nodes+=("$prefix$i")
-done
-
-if [[ $command == "create" ]]; then
-  [[ -n $out_file ]] || usage "Need an outfile to store IP Addresses"
-
-  ip_addr_list=$(gcloud beta compute instances create "${nodes[@]}" --zone=us-west1-b --tags=testnet \
-    --image="$image_name" | awk '/RUNNING/ {print $5}')
-
-  echo "ip_addr_array=($ip_addr_list)" >"$out_file"
-elif [[ $command == "delete" ]]; then
-  gcloud beta compute instances delete "${nodes[@]}"
-else
-  usage "Unknown command: $command"
-fi
--- a/multinode-demo/leader.sh
+++ b/multinode-demo/leader.sh
@ -1,9 +1,15 @@
 #!/bin/bash
+#
+# Starts a leader node
+#

 here=$(dirname "$0")
 # shellcheck source=multinode-demo/common.sh
 source "$here"/common.sh

+# shellcheck source=scripts/oom-score-adj.sh
+source "$here"/../scripts/oom-score-adj.sh
+
 if [[ -d "$SNAP" ]]; then
  # Exit if mode is not yet configured
  # (typically the case after the Snap is first installed)
--- a/multinode-demo/remote_leader.sh
+++ b/multinode-demo/remote_leader.sh
@ -1,14 +0,0 @@
-#!/bin/bash -e
-
-[[ -n $FORCE ]] || exit
-
-chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
-
-PATH="$HOME"/.cargo/bin:"$PATH"
-
-./fetch-perf-libs.sh
-
-# Run setup
-USE_INSTALL=1 ./multinode-demo/setup.sh -p
-USE_INSTALL=1 ./multinode-demo/drone.sh >drone.log 2>&1 &
-USE_INSTALL=1 SOLANA_CUDA=1 ./multinode-demo/leader.sh >leader.log 2>&1 &
--- a/multinode-demo/remote_nodes.sh
+++ b/multinode-demo/remote_nodes.sh
@ -1,185 +0,0 @@
-#!/bin/bash
-
-command=$1
-ip_addr_file=
-remote_user=
-ssh_keys=
-
-shift
-
-usage() {
-  exitcode=0
-  if [[ -n "$1" ]]; then
-    exitcode=1
-    echo "Error: $*"
-  fi
-  cat <<EOF
-usage: $0 <start|stop> <-f IP Addr Array file> <-u username> [-k ssh-keys]
-
-Manage a GCE multinode network
-
- start|stop    - Create or delete the network
- -f file       - A bash script that exports an array of IP addresses, ip_addr_array.
-                 Elements of the array are public IP address of remote nodes.
- -u username   - The username for logging into remote nodes.
- -k ssh-keys   - Path to public/private key pair that remote nodes can use to perform
-                 rsync and ssh among themselves. Must contain pub, and priv keys.
-
-EOF
-  exit $exitcode
-}
-
-while getopts "h?f:u:k:" opt; do
-  case $opt in
-  h | \?)
-    usage
-    ;;
-  f)
-    ip_addr_file=$OPTARG
-    ;;
-  u)
-    remote_user=$OPTARG
-    ;;
-  k)
-    ssh_keys=$OPTARG
-    ;;
-  *)
-    usage "Error: unhandled option: $opt"
-    ;;
-  esac
-done
-
-set -e
-
-# Sample IP Address array file contents
-# ip_addr_array=(192.168.1.1 192.168.1.5 192.168.2.2)
-
-[[ -n $command ]] || usage "Need a command (start|stop)"
-[[ -n $ip_addr_file ]] || usage "Need a file with IP address array"
-[[ -n $remote_user ]] || usage "Need the username for remote nodes"
-
-ip_addr_array=()
-# Get IP address array
-# shellcheck source=/dev/null
-source "$ip_addr_file"
-
-build_project() {
-  echo "Build started at $(date)"
-  SECONDS=0
-
-  # Build and install locally
-  PATH="$HOME"/.cargo/bin:"$PATH"
-  cargo install --force
-
-  echo "Build took $SECONDS seconds"
-}
-
-common_start_setup() {
-  ip_addr=$1
-
-  # Killing sshguard for now. TODO: Find a better solution
-  # sshguard is blacklisting IP address after ssh-keyscan and ssh login attempts
-  ssh "$remote_user@$ip_addr" " \
-    set -ex; \
-    sudo service sshguard stop; \
-    sudo apt-get --assume-yes install rsync libssl-dev; \
-    mkdir -p ~/.ssh ~/solana ~/.cargo/bin; \
-  " >log/"$ip_addr".log
-
-  # If provided, deploy SSH keys
-  if [[ -n $ssh_keys ]]; then
-    {
-      rsync -vPrz "$ssh_keys"/id_rsa "$remote_user@$ip_addr":~/.ssh/
-      rsync -vPrz "$ssh_keys"/id_rsa.pub "$remote_user@$ip_addr":~/.ssh/
-      rsync -vPrz "$ssh_keys"/id_rsa.pub "$remote_user@$ip_addr":~/.ssh/authorized_keys
-      rsync -vPrz ./multinode-demo "$remote_user@$ip_addr":~/solana/
-    } >>log/"$ip_addr".log
-  fi
-}
-
-start_leader() {
-  common_start_setup "$1"
-
-  {
-    rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/
-    rsync -vPrz ./fetch-perf-libs.sh "$remote_user@$ip_addr":~/solana/
-    ssh -n -f "$remote_user@$ip_addr" 'cd solana; FORCE=1 ./multinode-demo/remote_leader.sh'
-  } >>log/"$1".log
-
-  leader_ip=$1
-  leader_time=$SECONDS
-  SECONDS=0
-}
-
-start_validator() {
-  common_start_setup "$1"
-
-  ssh -n -f "$remote_user@$ip_addr" "cd solana; FORCE=1 ./multinode-demo/remote_validator.sh $leader_ip" >>log/"$1".log
-}
-
-start_all_nodes() {
-  echo "Deployment started at $(date)"
-  SECONDS=0
-  count=0
-  leader_ip=
-  leader_time=
-
-  mkdir -p log
-
-  for ip_addr in "${ip_addr_array[@]}"; do
-    if ((!count)); then
-      # Start the leader on the first node
-      echo "Leader node $ip_addr, killing previous instance and restarting"
-      start_leader "$ip_addr"
-    else
-      # Start validator on all other nodes
-      echo "Validator[$count] node $ip_addr, killing previous instance and restarting"
-      start_validator "$ip_addr" &
-      # TBD: Remove the sleep or reduce time once GCP login quota is increased
-      sleep 2
-    fi
-
-    ((count = count + 1))
-  done
-
-  wait
-
-  ((validator_count = count - 1))
-
-  echo "Deployment finished at $(date)"
-  echo "Leader deployment too $leader_time seconds"
-  echo "$validator_count Validator deployment took $SECONDS seconds"
-}
-
-stop_all_nodes() {
-  SECONDS=0
-  local count=0
-  for ip_addr in "${ip_addr_array[@]}"; do
-    ssh-keygen -R "$ip_addr" >log/local.log
-    ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts 2>/dev/null
-
-    echo "Stopping node[$count] $ip_addr. Remote user $remote_user"
-
-    ssh -n -f "$remote_user@$ip_addr" " \
-    set -ex; \
-    sudo service sshguard stop; \
-    pkill -9 solana-; \
-    pkill -9 validator; \
-    pkill -9 leader; \
-    "
-    sleep 2
-    ((count = count + 1))
-    echo "Stopped node[$count] $ip_addr"
-  done
-  echo "Stopping $count nodes took $SECONDS seconds"
-}
-
-if [[ $command == "start" ]]; then
-  build_project
-  stop_all_nodes
-  start_all_nodes
-elif [[ $command == "stop" ]]; then
-  stop_all_nodes
-else
-  usage "Unknown command: $command"
-fi
--- a/multinode-demo/remote_validator.sh
+++ b/multinode-demo/remote_validator.sh
@ -1,17 +0,0 @@
-#!/bin/bash -e
-
-[[ -n $FORCE ]] || exit
-
-chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
-
-PATH="$HOME"/.cargo/bin:"$PATH"
-
-touch ~/.ssh/known_hosts
-ssh-keygen -R "$1" 2>/dev/null
-ssh-keyscan "$1" >>~/.ssh/known_hosts 2>/dev/null
-
-rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/
-
-# Run setup
-USE_INSTALL=1 ./multinode-demo/setup.sh -p
-USE_INSTALL=1 ./multinode-demo/validator.sh "$1":~/solana "$1" >validator.log 2>&1
--- a/multinode-demo/setup.sh
+++ b/multinode-demo/setup.sh
@ -1,4 +1,7 @@
 #!/bin/bash
+#
+# Creates a fullnode configuration
+#

 here=$(dirname "$0")
 # shellcheck source=multinode-demo/common.sh
@ -31,6 +34,7 @@ ip_address_arg=-l
 num_tokens=1000000000
 node_type_leader=true
 node_type_validator=true
+node_type_client=true
 while getopts "h?n:lpt:" opt; do
  case $opt in
  h|\?)
@ -52,10 +56,17 @@ while getopts "h?n:lpt:" opt; do
    leader)
      node_type_leader=true
      node_type_validator=false
+      node_type_client=false
      ;;
    validator)
      node_type_leader=false
      node_type_validator=true
+      node_type_client=false
+      ;;
+    client)
+      node_type_leader=false
+      node_type_validator=false
+      node_type_client=true
      ;;
    *)
      usage "Error: unknown node type: $node_type"
@ -69,25 +80,27 @@ while getopts "h?n:lpt:" opt; do
 done


-leader_address_args=("$ip_address_arg")
-validator_address_args=("$ip_address_arg" -b 9000)
-leader_id_path="$SOLANA_CONFIG_PRIVATE_DIR"/leader-id.json
-validator_id_path="$SOLANA_CONFIG_PRIVATE_DIR"/validator-id.json
-mint_path="$SOLANA_CONFIG_PRIVATE_DIR"/mint.json
-
 set -e

-for i in "$SOLANA_CONFIG_DIR" "$SOLANA_CONFIG_PRIVATE_DIR" "$SOLANA_CONFIG_VALIDATOR_DIR"; do
+for i in "$SOLANA_CONFIG_DIR" "$SOLANA_CONFIG_VALIDATOR_DIR" "$SOLANA_CONFIG_PRIVATE_DIR"; do
  echo "Cleaning $i"
  rm -rvf "$i"
  mkdir -p "$i"
 done

-
-$solana_keygen -o "$leader_id_path"
-$solana_keygen -o "$validator_id_path"
+if $node_type_client; then
+  client_id_path="$SOLANA_CONFIG_PRIVATE_DIR"/client-id.json
+  $solana_keygen -o "$client_id_path"
+  ls -lhR "$SOLANA_CONFIG_PRIVATE_DIR"/
+fi

 if $node_type_leader; then
+  leader_address_args=("$ip_address_arg")
+  leader_id_path="$SOLANA_CONFIG_PRIVATE_DIR"/leader-id.json
+  mint_path="$SOLANA_CONFIG_PRIVATE_DIR"/mint.json
+
+  $solana_keygen -o "$leader_id_path"
+
  echo "Creating $mint_path with $num_tokens tokens"
  $solana_keygen -o "$mint_path"

@ -96,15 +109,20 @@ if $node_type_leader; then

  echo "Creating $SOLANA_CONFIG_DIR/leader.json"
  $solana_fullnode_config --keypair="$leader_id_path" "${leader_address_args[@]}" > "$SOLANA_CONFIG_DIR"/leader.json
+
+  ls -lhR "$SOLANA_CONFIG_DIR"/
+  ls -lhR "$SOLANA_CONFIG_PRIVATE_DIR"/
 fi


 if $node_type_validator; then
+  validator_address_args=("$ip_address_arg" -b 9000)
+  validator_id_path="$SOLANA_CONFIG_PRIVATE_DIR"/validator-id.json
+
+  $solana_keygen -o "$validator_id_path"
+
  echo "Creating $SOLANA_CONFIG_VALIDATOR_DIR/validator.json"
  $solana_fullnode_config --keypair="$validator_id_path" "${validator_address_args[@]}" > "$SOLANA_CONFIG_VALIDATOR_DIR"/validator.json
-fi

-ls -lhR "$SOLANA_CONFIG_DIR"/
-if $node_type_leader; then
-  ls -lhR "$SOLANA_CONFIG_PRIVATE_DIR"
+  ls -lhR "$SOLANA_CONFIG_VALIDATOR_DIR"/
 fi
--- a/multinode-demo/validator-x.sh
+++ b/multinode-demo/validator-x.sh
@ -1,4 +1,8 @@
 #!/bin/bash
+#
+# Start a dynamically-configured validator node
+#
+
 here=$(dirname "$0")

 exec "$here"/validator.sh -x "$@"
--- a/multinode-demo/validator.sh
+++ b/multinode-demo/validator.sh
@ -1,16 +1,31 @@
 #!/bin/bash
+#
+# Start a validator node
+#
 here=$(dirname "$0")
 # shellcheck source=multinode-demo/common.sh
 source "$here"/common.sh

+# shellcheck source=scripts/oom-score-adj.sh
+source "$here"/../scripts/oom-score-adj.sh
+
+if [[ -d "$SNAP" ]]; then
+  # Exit if mode is not yet configured
+  # (typically the case after the Snap is first installed)
+  [[ -n "$(snapctl get mode)" ]] || exit 0
+fi
+
 usage() {
  if [[ -n $1 ]]; then
    echo "$*"
    echo
  fi
-  echo "usage: $0 [-x] [rsync network path to solana repo on leader machine] [network ip address of leader]"
-  echo ""
-  echo "       -x: runs a new, dynamically-configured validator"
+  echo "usage: $0 [-x] [rsync network path to leader] [network entry point]"
+  echo
+  echo " Start a validator on the specified network"
+  echo
+  echo "   -x: runs a new, dynamically-configured validator"
+  echo
  exit 1
 }

@ -29,34 +44,8 @@ if [[ -n $3 ]]; then
  usage
 fi

-if [[ -d $SNAP ]]; then
-  # Exit if mode is not yet configured
-  # (typically the case after the Snap is first installed)
-  [[ -n $(snapctl get mode) ]] || exit 0
-
-  # Select leader from the Snap configuration
-  leader_address=$(snapctl get leader-address)
-  if [[ -z $leader_address ]]; then
-    # Assume public testnet by default
-    leader_address=35.227.93.37  # testnet.solana.com
-  fi
-  leader=$leader_address
-else
-  if [[ -z $1 ]]; then
-    leader=${1:-${here}/..}    # Default to local tree for data
-    leader_address=${2:-127.0.0.1}  # Default to local leader
-  elif [[ -z $2 ]]; then
-    leader=$1
-    leader_address=$(dig +short "${leader%:*}" | head -n1)
-    if [[ -z $leader_address ]]; then
-      usage "Error: unable to resolve IP address for $leader"
-    fi
-  else
-    leader=$1
-    leader_address=$2
-  fi
-fi
-leader_port=8001
+read -r leader leader_address shift < <(find_leader "${@:1:2}")
+shift "$shift"

 if [[ -n $SOLANA_CUDA ]]; then
  program=$solana_fullnode_cuda
@ -103,7 +92,7 @@ $rsync -vPr "$rsync_leader_url"/config/ "$SOLANA_LEADER_CONFIG_DIR"
 trap 'kill "$pid" && wait "$pid"' INT TERM
 $program \
  --identity "$validator_json_path" \
-  --testnet "$leader_address:$leader_port" \
+  --network "$leader_address" \
  --ledger "$SOLANA_LEADER_CONFIG_DIR"/ledger \
  > >($validator_logger) 2>&1 &
 pid=$!
--- a/multinode-demo/wallet.sh
+++ b/multinode-demo/wallet.sh
@ -1,5 +1,7 @@
 #!/bin/bash
 #
+# Runs solana-wallet against the specified network
+#
 # usage: $0 <rsync network path to solana repo on leader machine>"
 #

@ -7,6 +9,9 @@ here=$(dirname "$0")
 # shellcheck source=multinode-demo/common.sh
 source "$here"/common.sh

+# shellcheck source=scripts/oom-score-adj.sh
+source "$here"/../scripts/oom-score-adj.sh
+
 # if $1 isn't host:path, something.com, or a valid local path
 if [[ ${1%:} != "$1" || "$1" =~ [^.]\.[^.] || -d $1 ]]; then
  leader=$1 # interpret
@ -42,4 +47,4 @@ fi

 # shellcheck disable=SC2086 # $solana_wallet should not be quoted
 exec $solana_wallet \
-  -l "$SOLANA_CONFIG_CLIENT_DIR"/leader.json -k "$client_id_path" "$@"
+  -l "$SOLANA_CONFIG_CLIENT_DIR"/leader.json -k "$client_id_path" --timeout 10 "$@"
--- a/net/.gitignore
+++ b/net/.gitignore
@ -0,0 +1,2 @@
+/config/
+/log/
--- a/net/README.md
+++ b/net/README.md
@ -0,0 +1,66 @@
+
+# Network Management
+This directory contains scripts useful for working with a test network.  It's
+intended to be both dev and CD friendly.
+
+### User Account Prerequisites
+
+Log in to GCP with:
+```bash
+$ gcloud auth login
+```
+
+Also ensure that `$(whoami)` is the name of an InfluxDB user account with enough
+access to create a new database.
+
+## Quick Start
+```bash
+$ cd net/
+$ ./gce.sh create -n 5 -c 1  #<-- Create a GCE testnet with 5 validators, 1 client (billing starts here)
+$ ./init-metrics.sh $(whoami)   #<-- Configure a metrics database for the testnet
+$ ./net.sh start             #<-- Deploy the network from the local workspace
+$ ./ssh.sh                   #<-- Details on how to ssh into any testnet node
+$ ./gce.sh delete            #<-- Dispose of the network (billing stops here)
+```
+
+## Tips
+
+### Running the network over public IP addresses
+By default private IP addresses are used with all instances in the same
+availability zone to avoid GCE network engress charges.  However to run the
+network over public IP addresses:
+```bash
+$ ./gce.sh create -P ...
+```
+
+### Deploying a Snap-based network
+To deploy the latest pre-built `edge` channel Snap (ie, latest from the `master`
+branch), once the testnet has been created run:
+
+```bash
+$ ./net.sh start -s edge
+```
+
+### Enabling CUDA
+First ensure the network instances are created with GPU enabled:
+```bash
+$ ./gce.sh create -g ...
+```
+
+If deploying a Snap-based network nothing further is required, as GPU presence
+is detected at runtime and the CUDA build is auto selected.
+
+If deploying a locally-built network, first run `./fetch-perf-libs.sh` then
+ensure the `cuda` feature is specified at network start:
+```bash
+$ ./net.sh start -f "cuda,erasure"
+```
+
+### How to interact with a CD testnet deployed by ci/testnet-deploy.sh
+
+Taking **master-testnet-solana-com** as an example, configure your workspace for
+the testnet using:
+```
+$ ./gce.sh config -p master-testnet-solana-com
+$ ./ssh.sh                                     # <-- Details on how to ssh into any testnet node
+```
--- a/net/common.sh
+++ b/net/common.sh
@ -0,0 +1,58 @@
+# |source| this file
+#
+# Common utilities shared by other scripts in this directory
+#
+# The following directive disable complaints about unused variables in this
+# file:
+# shellcheck disable=2034
+#
+
+netDir=$(
+  cd "$(dirname "${BASH_SOURCE[0]}")" || exit
+  echo "$PWD"
+)
+netConfigDir="$netDir"/config
+netLogDir="$netDir"/log
+mkdir -p "$netConfigDir" "$netLogDir"
+
+# shellcheck source=scripts/configure-metrics.sh
+source "$(dirname "${BASH_SOURCE[0]}")"/../scripts/configure-metrics.sh
+
+configFile="$netConfigDir/config"
+
+entrypointIp=
+publicNetwork=
+leaderIp=
+netBasename=
+sshPrivateKey=
+clientIpList=()
+sshOptions=()
+validatorIpList=()
+
+buildSshOptions() {
+  sshOptions=(
+    -o "BatchMode=yes"
+    -o "StrictHostKeyChecking=no"
+    -o "UserKnownHostsFile=/dev/null"
+    -o "User=solana"
+    -o "IdentityFile=$sshPrivateKey"
+    -o "LogLevel=ERROR"
+    -F /dev/null
+  )
+}
+
+loadConfigFile() {
+  [[ -r $configFile ]] || usage "Config file unreadable: $configFile"
+
+  # shellcheck source=/dev/null
+  source "$configFile"
+  [[ -n "$entrypointIp" ]] || usage "Config file invalid, entrypointIp unspecified: $configFile"
+  [[ -n "$publicNetwork" ]] || usage "Config file invalid, publicNetwork unspecified: $configFile"
+  [[ -n "$leaderIp" ]] || usage "Config file invalid, leaderIp unspecified: $configFile"
+  [[ -n "$netBasename" ]] || usage "Config file invalid, netBasename unspecified: $configFile"
+  [[ -n $sshPrivateKey ]] || usage "Config file invalid, sshPrivateKey unspecified: $configFile"
+  [[ ${#validatorIpList[@]} -gt 0 ]] || usage "Config file invalid, validatorIpList unspecified: $configFile"
+
+  buildSshOptions
+  configureMetrics
+}
--- a/net/gce.sh
+++ b/net/gce.sh
@ -0,0 +1,336 @@
+#!/bin/bash -e
+
+here=$(dirname "$0")
+# shellcheck source=net/scripts/gcloud.sh
+source "$here"/scripts/gcloud.sh
+# shellcheck source=net/common.sh
+source "$here"/common.sh
+
+prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
+validatorNodeCount=5
+clientNodeCount=1
+leaderBootDiskSize=1TB
+leaderMachineType=n1-standard-16
+leaderAccelerator=
+validatorMachineType=n1-standard-4
+validatorBootDiskSize=$leaderBootDiskSize
+validatorAccelerator=
+clientMachineType=n1-standard-16
+clientBootDiskSize=40GB
+clientAccelerator=
+
+imageName="ubuntu-16-04-cuda-9-2-new"
+publicNetwork=false
+zone="us-west1-b"
+leaderAddress=
+
+usage() {
+  exitcode=0
+  if [[ -n "$1" ]]; then
+    exitcode=1
+    echo "Error: $*"
+  fi
+  cat <<EOF
+usage: $0 [create|config|delete] [common options] [command-specific options]
+
+Configure a GCE-based testnet
+
+ create - create a new testnet (implies 'config')
+ config - configure the testnet and write a config file describing it
+ delete - delete the testnet
+
+ common options:
+   -p [prefix]      - Optional common prefix for instance names to avoid
+                      collisions (default: $prefix)
+
+ create-specific options:
+   -n [number]      - Number of validator nodes (default: $validatorNodeCount)
+   -c [number]      - Number of client nodes (default: $clientNodeCount)
+   -P               - Use public network IP addresses (default: $publicNetwork)
+   -z [zone]        - GCP Zone for the nodes (default: $zone)
+   -i [imageName]   - Existing image on GCE (default: $imageName)
+   -g               - Enable GPU
+   -a [address]     - Set the leader node's external IP address to this GCE address
+
+ config-specific options:
+   none
+
+ delete-specific options:
+   none
+
+EOF
+  exit $exitcode
+}
+
+
+command=$1
+[[ -n $command ]] || usage
+shift
+[[ $command = create || $command = config || $command = delete ]] || usage "Invalid command: $command"
+
+while getopts "h?p:Pi:n:c:z:ga:" opt; do
+  case $opt in
+  h | \?)
+    usage
+    ;;
+  p)
+    [[ ${OPTARG//[^A-Za-z0-9-]/} == "$OPTARG" ]] || usage "Invalid prefix: \"$OPTARG\", alphanumeric only"
+    prefix=$OPTARG
+    ;;
+  P)
+    publicNetwork=true
+    ;;
+  i)
+    imageName=$OPTARG
+    ;;
+  n)
+    validatorNodeCount=$OPTARG
+    ;;
+  c)
+    clientNodeCount=$OPTARG
+    ;;
+  z)
+    zone=$OPTARG
+    ;;
+  g)
+    leaderAccelerator="count=4,type=nvidia-tesla-k80"
+    ;;
+  a)
+    leaderAddress=$OPTARG
+    ;;
+  *)
+    usage "Error: unhandled option: $opt"
+    ;;
+  esac
+done
+shift $((OPTIND - 1))
+
+[[ -z $1 ]] || usage "Unexpected argument: $1"
+sshPrivateKey="$netConfigDir/id_$prefix"
+
+prepareInstancesAndWriteConfigFile() {
+  $metricsWriteDatapoint "testnet-deploy net-config-begin=1"
+
+  cat >> "$configFile" <<EOF
+# autogenerated at $(date)
+netBasename=$prefix
+publicNetwork=$publicNetwork
+sshPrivateKey=$sshPrivateKey
+EOF
+
+  buildSshOptions
+
+  recordInstanceIp() {
+    declare name="$1"
+    declare publicIp="$3"
+    declare privateIp="$4"
+
+    declare arrayName="$6"
+
+    echo "$arrayName+=($publicIp)  # $name" >> "$configFile"
+    if [[ $arrayName = "leaderIp" ]]; then
+      if $publicNetwork; then
+        echo "entrypointIp=$publicIp" >> "$configFile"
+      else
+        echo "entrypointIp=$privateIp" >> "$configFile"
+      fi
+    fi
+  }
+
+  waitForStartupComplete() {
+    declare name="$1"
+    declare publicIp="$3"
+
+    echo "Waiting for $name to finish booting..."
+    (
+      for i in $(seq 1 30); do
+        if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.gce-startup-complete"); then
+          break
+        fi
+        sleep 2
+        echo "Retry $i..."
+      done
+    )
+  }
+
+  echo "Looking for leader instance..."
+  gcloud_FindInstances "name=$prefix-leader" show
+  [[ ${#instances[@]} -eq 1 ]] || {
+    echo "Unable to find leader"
+    exit 1
+  }
+
+  echo "Fetching $sshPrivateKey from $leaderName"
+  (
+    rm -rf "$sshPrivateKey"{,pub}
+
+    declare leaderName
+    declare leaderZone
+    declare leaderIp
+    IFS=: read -r leaderName leaderZone leaderIp _ < <(echo "${instances[0]}")
+
+    set -x
+
+    # Try to ping the machine first.  There can be a delay between when the
+    # instance is reported as RUNNING and when it's reachable over the network
+    timeout 30s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done"
+
+    # Try to scp in a couple times, sshd may not yet be up even though the
+    # machine can be pinged...
+    set -o pipefail
+    for i in $(seq 1 10); do
+      if gcloud compute scp --zone "$leaderZone" \
+          "$leaderName:/solana-id_ecdsa" "$sshPrivateKey"; then
+        break
+      fi
+      sleep 1
+      echo "Retry $i..."
+    done
+
+    chmod 400 "$sshPrivateKey"
+  )
+
+  echo "leaderIp=()" >> "$configFile"
+  gcloud_ForEachInstance recordInstanceIp leaderIp
+  gcloud_ForEachInstance waitForStartupComplete
+
+  echo "Looking for validator instances..."
+  gcloud_FindInstances "name~^$prefix-validator" show
+  [[ ${#instances[@]} -gt 0 ]] || {
+    echo "Unable to find validators"
+    exit 1
+  }
+  echo "validatorIpList=()" >> "$configFile"
+  gcloud_ForEachInstance recordInstanceIp validatorIpList
+  gcloud_ForEachInstance waitForStartupComplete
+
+  echo "clientIpList=()" >> "$configFile"
+  echo "Looking for client instances..."
+  gcloud_FindInstances "name~^$prefix-client" show
+  [[ ${#instances[@]} -eq 0 ]] || {
+    gcloud_ForEachInstance recordInstanceIp clientIpList
+    gcloud_ForEachInstance waitForStartupComplete
+  }
+
+  echo "Wrote $configFile"
+  $metricsWriteDatapoint "testnet-deploy net-config-complete=1"
+}
+
+case $command in
+delete)
+  $metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
+
+  # Delete the leader node first to prevent unusual metrics on the dashboard
+  # during shutdown.
+  # TODO: It would be better to fully cut-off metrics reporting before any
+  # instances are deleted.
+  for filter in "^$prefix-leader" "^$prefix-"; do
+    gcloud_FindInstances "name~$filter"
+
+    if [[ ${#instances[@]} -eq 0 ]]; then
+      echo "No instances found matching '$filter'"
+    else
+      gcloud_DeleteInstances true
+    fi
+  done
+  rm -f "$configFile"
+
+  $metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
+  ;;
+
+create)
+  [[ -n $validatorNodeCount ]] || usage "Need number of nodes"
+
+  $metricsWriteDatapoint "testnet-deploy net-create-begin=1"
+
+  rm -rf "$sshPrivateKey"{,.pub}
+  ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey"
+
+  printNetworkInfo() {
+    cat <<EOF
+========================================================================================
+
+Network composition:
+  Leader = $leaderMachineType (GPU=${leaderAccelerator:-none})
+  Validators = $validatorNodeCount x $validatorMachineType (GPU=${validatorAccelerator:-none})
+  Client(s) = $clientNodeCount x $clientMachineType (GPU=${clientAccelerator:-none})
+
+========================================================================================
+
+EOF
+  }
+  printNetworkInfo
+
+  declare startupScript="$netConfigDir"/gce-startup-script.sh
+  cat > "$startupScript" <<EOF
+#!/bin/bash -ex
+# autogenerated at $(date)
+
+cat > /etc/motd <<EOM
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+  This instance has not been fully configured.
+  See "startup-script" log messages in /var/log/syslog for status:
+    $ sudo cat /var/log/syslog | grep startup-script
+
+  To block until setup is complete, run:
+    $ until [[ -f /.gce-startup-complete ]]; do sleep 1; done
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+EOM
+
+# Place the generated private key at /solana-id_ecdsa so it's retrievable by anybody
+# who is able to log into this machine
+cat > /solana-id_ecdsa <<EOK
+$(cat "$sshPrivateKey")
+EOK
+cat > /solana-id_ecdsa.pub <<EOK
+$(cat "$sshPrivateKey.pub")
+EOK
+chmod 444 /solana-id_ecdsa
+
+USER=\$(id -un)
+
+$(
+  cd "$here"/scripts/
+  cat \
+    disable-background-upgrades.sh \
+    create-solana-user.sh \
+    install-earlyoom.sh \
+    install-rsync.sh \
+    install-libssl-compatability.sh \
+)
+
+cat > /etc/motd <<EOM
+$(printNetworkInfo)
+EOM
+
+touch /.gce-startup-complete
+
+EOF
+
+  gcloud_CreateInstances "$prefix-leader" 1 "$zone" \
+    "$imageName" "$leaderMachineType" "$leaderBootDiskSize" "$leaderAccelerator" \
+    "$startupScript" "$leaderAddress"
+
+  gcloud_CreateInstances "$prefix-validator" "$validatorNodeCount" "$zone" \
+    "$imageName" "$validatorMachineType" "$validatorBootDiskSize" "$validatorAccelerator" \
+    "$startupScript" ""
+
+  if [[ $clientNodeCount -gt 0 ]]; then
+    gcloud_CreateInstances "$prefix-client" "$clientNodeCount" "$zone" \
+      "$imageName" "$clientMachineType" "$clientBootDiskSize" "$clientAccelerator" \
+      "$startupScript" ""
+  fi
+
+  $metricsWriteDatapoint "testnet-deploy net-create-complete=1"
+
+  prepareInstancesAndWriteConfigFile
+  ;;
+
+config)
+  prepareInstancesAndWriteConfigFile
+  ;;
+*)
+  usage "Unknown command: $command"
+esac
--- a/net/init-metrics.sh
+++ b/net/init-metrics.sh
@ -0,0 +1,80 @@
+#!/bin/bash -e
+
+here=$(dirname "$0")
+# shellcheck source=net/common.sh
+source "$here"/common.sh
+
+usage() {
+  exitcode=0
+  if [[ -n "$1" ]]; then
+    exitcode=1
+    echo "Error: $*"
+  fi
+  cat <<EOF
+usage: $0 [-e] [-d] [username]
+
+Creates a testnet dev metrics database
+
+  username        InfluxDB user with access to create a new database
+  -d              Delete the database instead of creating it
+  -e              Assume database already exists and SOLANA_METRICS_CONFIG is
+                  defined in the environment already
+
+EOF
+  exit $exitcode
+}
+
+loadConfigFile
+
+useEnv=false
+delete=false
+while getopts "hde" opt; do
+  case $opt in
+  h|\?)
+    usage
+    exit 0
+    ;;
+  d)
+    delete=true
+    ;;
+  e)
+    useEnv=true
+    ;;
+  *)
+    usage "Error: unhandled option: $opt"
+    ;;
+  esac
+done
+shift $((OPTIND - 1))
+
+if $useEnv; then
+  [[ -n $SOLANA_METRICS_CONFIG ]] ||
+    usage "Error: SOLANA_METRICS_CONFIG is not defined in the environment"
+else
+  username=$1
+  [[ -n "$username" ]] || usage "username not specified"
+
+  read -rs -p "InfluxDB password for $username: " password
+  [[ -n $password ]] || { echo "Password not specified"; exit 1; }
+  echo
+
+  query() {
+    echo "$*"
+    curl -XPOST \
+      "https://metrics.solana.com:8086/query?u=${username}&p=${password}" \
+      --data-urlencode "q=$*"
+  }
+
+  query "DROP DATABASE \"$netBasename\""
+  ! $delete || exit 0
+  query "CREATE DATABASE \"$netBasename\""
+  query "ALTER RETENTION POLICY autogen ON \"$netBasename\" DURATION 7d"
+  query "GRANT READ ON \"$netBasename\" TO \"ro\""
+  query "GRANT WRITE ON \"$netBasename\" TO \"scratch_writer\""
+
+  SOLANA_METRICS_CONFIG="db=$netBasename,u=scratch_writer,p=topsecret"
+fi
+
+echo "export SOLANA_METRICS_CONFIG=\"$SOLANA_METRICS_CONFIG\"" >> "$configFile"
+
+exit 0
--- a/net/net.sh
+++ b/net/net.sh
@ -0,0 +1,352 @@
+#!/bin/bash -e
+
+here=$(dirname "$0")
+SOLANA_ROOT="$(cd "$here"/..; pwd)"
+
+# shellcheck source=net/common.sh
+source "$here"/common.sh
+
+usage() {
+  exitcode=0
+  if [[ -n "$1" ]]; then
+    exitcode=1
+    echo "Error: $*"
+  fi
+  cat <<EOF
+usage: $0 [start|stop|restart|sanity] [command-specific options]
+
+Operate a configured testnet
+
+ start    - Start the network
+ sanity   - Sanity check the network
+ stop     - Stop the network
+ restart  - Shortcut for stop then start
+
+ start-specific options:
+   -S [snapFilename]    - Deploy the specified Snap file
+   -s edge|beta|stable  - Deploy the latest Snap on the specified Snap release channel
+   -f [cargoFeatures]   - List of |cargo --feaures=| to activate
+                          (ignored if -s or -S is specified)
+
+   Note: if RUST_LOG is set in the environment it will be propogated into the
+         network nodes.
+
+ sanity/start-specific options:
+   -o noLedgerVerify    - Skip ledger verification
+   -o noValidatorSanity - Skip validator sanity
+
+ stop-specific options:
+   none
+
+EOF
+  exit $exitcode
+}
+
+snapChannel=
+snapFilename=
+deployMethod=local
+sanityExtraArgs=
+cargoFeatures=
+
+command=$1
+[[ -n $command ]] || usage
+shift
+
+while getopts "h?S:s:o:f:" opt; do
+  case $opt in
+  h | \?)
+    usage
+    ;;
+  S)
+    snapFilename=$OPTARG
+    [[ -f $snapFilename ]] || usage "Snap not readable: $snapFilename"
+    deployMethod=snap
+    ;;
+  s)
+    case $OPTARG in
+    edge|beta|stable)
+      snapChannel=$OPTARG
+      deployMethod=snap
+      ;;
+    *)
+      usage "Invalid snap channel: $OPTARG"
+      ;;
+    esac
+    ;;
+  f)
+    cargoFeatures=$OPTARG
+    ;;
+  o)
+    case $OPTARG in
+    noLedgerVerify|noValidatorSanity)
+      sanityExtraArgs="$sanityExtraArgs -o $OPTARG"
+      ;;
+    *)
+      echo "Error: unknown option: $OPTARG"
+      exit 1
+      ;;
+    esac
+    ;;
+  *)
+    usage "Error: unhandled option: $opt"
+    ;;
+  esac
+done
+
+loadConfigFile
+expectedNodeCount=$((${#validatorIpList[@]} + 1))
+
+build() {
+  declare MAYBE_DOCKER=
+  if [[ $(uname) != Linux ]]; then
+    MAYBE_DOCKER="ci/docker-run.sh solanalabs/rust"
+  fi
+  SECONDS=0
+  (
+    cd "$SOLANA_ROOT"
+    echo "--- Build started at $(date)"
+
+    set -x
+    rm -rf farf
+    $MAYBE_DOCKER cargo install --features="$cargoFeatures" --root farf
+  )
+  echo "Build took $SECONDS seconds"
+}
+
+startCommon() {
+  declare ipAddress=$1
+  test -d "$SOLANA_ROOT"
+  ssh "${sshOptions[@]}" "$ipAddress" "mkdir -p ~/solana ~/.cargo/bin"
+  rsync -vPrc -e "ssh ${sshOptions[*]}" \
+    "$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \
+    "$ipAddress":~/solana/
+}
+
+startLeader() {
+  declare ipAddress=$1
+  declare logFile="$2"
+  echo "--- Starting leader: $leaderIp"
+  echo "start log: $logFile"
+
+  # Deploy local binaries to leader.  Validators and clients later fetch the
+  # binaries from the leader.
+  (
+    set -x
+    startCommon "$ipAddress" || exit 1
+    case $deployMethod in
+    snap)
+      rsync -vPrc -e "ssh ${sshOptions[*]}" "$snapFilename" "$ipAddress:~/solana/solana.snap"
+      ;;
+    local)
+      rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:~/.cargo/bin/"
+      ;;
+    *)
+      usage "Internal error: invalid deployMethod: $deployMethod"
+      ;;
+    esac
+
+    ssh "${sshOptions[@]}" -n "$ipAddress" \
+      "./solana/net/remote/remote-node.sh $deployMethod leader $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
+  ) >> "$logFile" 2>&1 || {
+    cat "$logFile"
+    echo "^^^ +++"
+    exit 1
+  }
+}
+
+startValidator() {
+  declare ipAddress=$1
+  declare logFile="$netLogDir/validator-$ipAddress.log"
+
+  echo "--- Starting validator: $leaderIp"
+  echo "start log: $logFile"
+  (
+    set -x
+    startCommon "$ipAddress"
+    ssh "${sshOptions[@]}" -n "$ipAddress" \
+      "./solana/net/remote/remote-node.sh $deployMethod validator $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
+  ) >> "$logFile" 2>&1 &
+  declare pid=$!
+  ln -sfT "validator-$ipAddress.log" "$netLogDir/validator-$pid.log"
+  pids+=("$pid")
+}
+
+startClient() {
+  declare ipAddress=$1
+  declare logFile="$2"
+  echo "--- Starting client: $ipAddress"
+  echo "start log: $logFile"
+  (
+    set -x
+    startCommon "$ipAddress"
+    ssh "${sshOptions[@]}" -f "$ipAddress" \
+      "./solana/net/remote/remote-client.sh $deployMethod $entrypointIp $expectedNodeCount \"$RUST_LOG\""
+  ) >> "$logFile" 2>&1 || {
+    cat "$logFile"
+    echo "^^^ +++"
+    exit 1
+  }
+}
+
+sanity() {
+  declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
+  declare ok=true
+
+  echo "--- Sanity"
+  $metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
+
+  (
+    set -x
+    # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
+    ssh "${sshOptions[@]}" "$leaderIp" \
+      "./solana/net/remote/remote-sanity.sh $sanityExtraArgs"
+  ) || ok=false
+
+  $metricsWriteDatapoint "testnet-deploy net-sanity-complete=1"
+  $ok || exit 1
+}
+
+start() {
+  case $deployMethod in
+  snap)
+    if [[ -n $snapChannel ]]; then
+      rm -f "$SOLANA_ROOT"/solana_*.snap
+      if [[ $(uname) != Linux ]]; then
+        (
+          set -x
+          SOLANA_DOCKER_RUN_NOSETUID=1 "$SOLANA_ROOT"/ci/docker-run.sh ubuntu:18.04 bash -c "
+            set -ex;
+            apt-get -qq update;
+            apt-get -qq -y install snapd;
+            snap download --channel=$snapChannel solana;
+          "
+        )
+      else
+        (
+          cd "$SOLANA_ROOT"
+          snap download --channel="$snapChannel" solana
+        )
+      fi
+      snapFilename="$(echo "$SOLANA_ROOT"/solana_*.snap)"
+      [[ -r $snapFilename ]] || {
+        echo "Error: Snap not readable: $snapFilename"
+        exit 1
+      }
+    fi
+    ;;
+  local)
+    build
+    ;;
+  *)
+    usage "Internal error: invalid deployMethod: $deployMethod"
+    ;;
+  esac
+
+  echo "Deployment started at $(date)"
+  $metricsWriteDatapoint "testnet-deploy net-start-begin=1"
+
+  SECONDS=0
+  declare leaderDeployTime=
+  startLeader "$leaderIp" "$netLogDir/leader-$leaderIp.log"
+  leaderDeployTime=$SECONDS
+  $metricsWriteDatapoint "testnet-deploy net-leader-started=1"
+
+  SECONDS=0
+  pids=()
+  for ipAddress in "${validatorIpList[@]}"; do
+    startValidator "$ipAddress"
+  done
+
+  for pid in "${pids[@]}"; do
+    declare ok=true
+    wait "$pid" || ok=false
+    if ! $ok; then
+      cat "$netLogDir/validator-$pid.log"
+      echo ^^^ +++
+      exit 1
+    fi
+  done
+
+  $metricsWriteDatapoint "testnet-deploy net-validators-started=1"
+  validatorDeployTime=$SECONDS
+
+  sanity
+
+  SECONDS=0
+  for ipAddress in "${clientIpList[@]}"; do
+    startClient "$ipAddress" "$netLogDir/client-$ipAddress.log"
+  done
+  clientDeployTime=$SECONDS
+  $metricsWriteDatapoint "testnet-deploy net-start-complete=1"
+
+  if [[ $deployMethod = "snap" ]]; then
+    declare networkVersion=unknown
+    IFS=\  read -r _ networkVersion _ < <(
+      ssh "${sshOptions[@]}" "$leaderIp" \
+        "snap info solana | grep \"^installed:\""
+    )
+    networkVersion=${networkVersion/0+git./}
+    $metricsWriteDatapoint "testnet-deploy version=\"$networkVersion\""
+  fi
+
+  echo
+  echo "+++ Deployment Successful"
+  echo "Leader deployment took $leaderDeployTime seconds"
+  echo "Validator deployment (${#validatorIpList[@]} instances) took $validatorDeployTime seconds"
+  echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds"
+  echo "Network start logs in $netLogDir:"
+  ls -l "$netLogDir"
+}
+
+
+stopNode() {
+  local ipAddress=$1
+  echo "--- Stopping node: $ipAddress"
+  (
+    set -x
+    ssh "${sshOptions[@]}" "$ipAddress" "
+      set -x
+      if snap list solana; then
+        sudo snap set solana mode=
+        sudo snap remove solana
+      fi
+      ! tmux list-sessions || tmux kill-session
+      for pattern in solana- remote- oom-monitor net-stats; do
+        pkill -9 \$pattern
+      done
+    "
+  ) || true
+}
+
+stop() {
+  SECONDS=0
+  $metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
+
+  stopNode "$leaderIp"
+
+  for ipAddress in "${validatorIpList[@]}" "${clientIpList[@]}"; do
+    stopNode "$ipAddress"
+  done
+
+  $metricsWriteDatapoint "testnet-deploy net-stop-complete=1"
+  echo "Stopping nodes took $SECONDS seconds"
+}
+
+case $command in
+restart)
+  stop
+  start
+  ;;
+start)
+  start
+  ;;
+sanity)
+  sanity
+  ;;
+stop)
+  stop
+  ;;
+*)
+  echo "Internal error: Unknown command: $command"
+  exit 1
+esac
--- a/net/remote/README.md
+++ b/net/remote/README.md
@ -0,0 +1 @@
+Scripts that run on the remote testnet nodes
--- a/net/remote/remote-client.sh
+++ b/net/remote/remote-client.sh
@ -0,0 +1,83 @@
+#!/bin/bash -e
+
+cd "$(dirname "$0")"/../..
+
+echo "$(date) | $0 $*" > client.log
+
+deployMethod="$1"
+entrypointIp="$2"
+numNodes="$3"
+RUST_LOG="$4"
+export RUST_LOG=${RUST_LOG:-solana=info} # if RUST_LOG is unset, default to info
+
+missing() {
+  echo "Error: $1 not specified"
+  exit 1
+}
+
+[[ -n $deployMethod ]] || missing deployMethod
+[[ -n $entrypointIp ]] || missing entrypointIp
+[[ -n $numNodes ]]     || missing numNodes
+
+source net/common.sh
+loadConfigFile
+
+threadCount=$(nproc)
+if [[ $threadCount -gt 4 ]]; then
+  threadCount=4
+fi
+
+case $deployMethod in
+snap)
+  net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/solana/solana.snap" .
+  sudo snap install solana.snap --devmode --dangerous
+
+  solana_bench_tps=/snap/bin/solana.bench-tps
+  solana_keygen=/snap/bin/solana.keygen
+  ;;
+local)
+  PATH="$HOME"/.cargo/bin:"$PATH"
+  export USE_INSTALL=1
+  export SOLANA_DEFAULT_METRICS_RATE=1
+
+  net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/.cargo/bin/solana*" ~/.cargo/bin/
+  solana_bench_tps=solana-bench-tps
+  solana_keygen=solana-keygen
+  ;;
+*)
+  echo "Unknown deployment method: $deployMethod"
+  exit 1
+esac
+
+scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
+scripts/net-stats.sh  > net-stats.log 2>&1 &
+
+! tmux list-sessions || tmux kill-session
+
+clientCommand="\
+  $solana_bench_tps \
+    --network $entrypointIp:8001 \
+    --identity client.json \
+    --num-nodes $numNodes \
+    --duration 600 \
+    --sustained \
+    --threads $threadCount \
+"
+
+keygenCommand="$solana_keygen -o client.json"
+tmux new -s solana-bench-tps -d "
+  [[ -r client.json ]] || {
+    echo '$ $keygenCommand'  | tee -a client.log
+    $keygenCommand >> client.log 2>&1
+  }
+
+  while true; do
+    echo === Client start: \$(date) | tee -a client.log
+    $metricsWriteDatapoint 'testnet-deploy client-begin=1'
+    echo '$ $clientCommand' | tee -a client.log
+    $clientCommand >> client.log 2>&1
+    $metricsWriteDatapoint 'testnet-deploy client-complete=1'
+  done
+"
+sleep 1
+tmux capture-pane -t solana-bench-tps -p -S -100
--- a/net/remote/remote-node.sh
+++ b/net/remote/remote-node.sh
@ -0,0 +1,113 @@
+#!/bin/bash -e
+
+cd "$(dirname "$0")"/../..
+
+deployMethod="$1"
+nodeType="$2"
+publicNetwork="$3"
+entrypointIp="$4"
+numNodes="$5"
+RUST_LOG="$6"
+
+missing() {
+  echo "Error: $1 not specified"
+  exit 1
+}
+
+[[ -n $deployMethod ]]  || missing deployMethod
+[[ -n $nodeType ]]      || missing nodeType
+[[ -n $publicNetwork ]] || missing publicNetwork
+[[ -n $entrypointIp ]]  || missing entrypointIp
+[[ -n $numNodes ]]      || missing numNodes
+
+cat > deployConfig <<EOF
+deployMethod="$deployMethod"
+entrypointIp="$entrypointIp"
+numNodes="$numNodes"
+EOF
+
+source net/common.sh
+loadConfigFile
+
+if [[ $publicNetwork = true ]]; then
+  setupArgs="-p"
+else
+  setupArgs="-l"
+fi
+
+
+case $deployMethod in
+snap)
+  SECONDS=0
+  [[ $nodeType = leader ]] ||
+    net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/solana/solana.snap" .
+  sudo snap install solana.snap --devmode --dangerous
+
+  commonNodeConfig="\
+    leader-ip=$entrypointIp \
+    default-metrics-rate=1 \
+    metrics-config=$SOLANA_METRICS_CONFIG \
+    rust-log=$RUST_LOG \
+    setup-args=$setupArgs \
+  "
+
+  if [[ -e /dev/nvidia0 ]]; then
+    commonNodeConfig="$commonNodeConfig enable-cuda=1"
+  fi
+
+  if [[ $nodeType = leader ]]; then
+    nodeConfig="mode=leader+drone $commonNodeConfig"
+    ln -sf -T /var/snap/solana/current/leader/current leader.log
+    ln -sf -T /var/snap/solana/current/drone/current drone.log
+  else
+    nodeConfig="mode=validator $commonNodeConfig"
+    ln -sf -T /var/snap/solana/current/validator/current validator.log
+  fi
+
+  logmarker="solana deploy $(date)/$RANDOM"
+  logger "$logmarker"
+
+  # shellcheck disable=SC2086 # Don't want to double quote "$nodeConfig"
+  sudo snap set solana $nodeConfig
+  snap info solana
+  sudo snap get solana
+  echo Slight delay to get more syslog output
+  sleep 2
+  sudo grep -Pzo "$logmarker(.|\\n)*" /var/log/syslog
+
+  echo "Succeeded in ${SECONDS} seconds"
+  ;;
+local)
+  PATH="$HOME"/.cargo/bin:"$PATH"
+  export USE_INSTALL=1
+  export RUST_LOG
+  export SOLANA_DEFAULT_METRICS_RATE=1
+
+  ./fetch-perf-libs.sh
+  export LD_LIBRARY_PATH="$PWD/target/perf-libs:$LD_LIBRARY_PATH"
+
+  scripts/oom-monitor.sh  > oom-monitor.log 2>&1 &
+  scripts/net-stats.sh  > net-stats.log 2>&1 &
+
+  case $nodeType in
+  leader)
+    ./multinode-demo/setup.sh -t leader $setupArgs
+    ./multinode-demo/drone.sh > drone.log 2>&1 &
+    ./multinode-demo/leader.sh > leader.log 2>&1 &
+    ;;
+  validator)
+    net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/.cargo/bin/solana*" ~/.cargo/bin/
+
+    ./multinode-demo/setup.sh -t validator $setupArgs
+    ./multinode-demo/validator.sh "$entrypointIp":~/solana "$entrypointIp:8001" >validator.log 2>&1 &
+    ;;
+  *)
+    echo "Error: unknown node type: $nodeType"
+    exit 1
+    ;;
+  esac
+  ;;
+*)
+  echo "Unknown deployment method: $deployMethod"
+  exit 1
+esac
--- a/net/remote/remote-sanity.sh
+++ b/net/remote/remote-sanity.sh
@ -0,0 +1,138 @@
+#!/bin/bash -e
+#
+# This script is to be run on the leader node
+#
+
+cd "$(dirname "$0")"/../..
+
+deployMethod=
+entrypointIp=
+numNodes=
+
+[[ -r deployConfig ]] || {
+  echo deployConfig missing
+  exit 1
+}
+# shellcheck source=/dev/null # deployConfig is written by remote-node.sh
+source deployConfig
+
+missing() {
+  echo "Error: $1 not specified"
+  exit 1
+}
+
+[[ -n $deployMethod ]] || missing deployMethod
+[[ -n $entrypointIp ]] || missing entrypointIp
+[[ -n $numNodes ]]     || missing numNodes
+
+ledgerVerify=true
+validatorSanity=true
+while [[ $1 = -o ]]; do
+  opt="$2"
+  shift 2
+  case $opt in
+  noLedgerVerify)
+    ledgerVerify=false
+    ;;
+  noValidatorSanity)
+    validatorSanity=false
+    ;;
+  *)
+    echo "Error: unknown option: $opt"
+    exit 1
+    ;;
+  esac
+done
+
+source net/common.sh
+loadConfigFile
+
+case $deployMethod in
+snap)
+  PATH="/snap/bin:$PATH"
+  export USE_SNAP=1
+  entrypointRsyncUrl="$entrypointIp"
+
+  solana_bench_tps=solana.bench-tps
+  solana_ledger_tool=solana.ledger-tool
+  solana_keygen=solana.keygen
+
+  ledger=/var/snap/solana/current/config/ledger
+  client_id=~/snap/solana/current/config/client-id.json
+
+  ;;
+local)
+  PATH="$HOME"/.cargo/bin:"$PATH"
+  export USE_INSTALL=1
+  entrypointRsyncUrl="$entrypointIp:~/solana"
+
+  solana_bench_tps=solana-bench-tps
+  solana_ledger_tool=solana-ledger-tool
+  solana_keygen=solana-keygen
+
+  ledger=config/ledger
+  client_id=config/client-id.json
+  ;;
+*)
+  echo "Unknown deployment method: $deployMethod"
+  exit 1
+esac
+
+
+echo "--- $entrypointIp: wallet sanity"
+(
+  set -x
+  scripts/wallet-sanity.sh "$entrypointRsyncUrl"
+)
+
+echo "+++ $entrypointIp: node count ($numNodes expected)"
+(
+  set -x
+  $solana_keygen -o "$client_id"
+  $solana_bench_tps --network "$entrypointIp:8001" --identity "$client_id" --num-nodes "$numNodes" --converge-only
+)
+
+echo "--- $entrypointIp: verify ledger"
+if $ledgerVerify; then
+  if [[ -d $ledger ]]; then
+    (
+      set -x
+      rm -rf /var/tmp/ledger-verify
+      du -hs "$ledger"
+      time cp -r "$ledger" /var/tmp/ledger-verify
+      time $solana_ledger_tool --ledger /var/tmp/ledger-verify verify
+    )
+  else
+    echo "^^^ +++"
+    echo "Ledger verify skipped: directory does not exist: $ledger"
+  fi
+else
+  echo "^^^ +++"
+  echo "Note: ledger verify disabled"
+fi
+
+
+echo "--- $entrypointIp: validator sanity"
+if $validatorSanity; then
+  (
+    set -ex -o pipefail
+    ./multinode-demo/setup.sh -t validator
+    timeout 10s ./multinode-demo/validator.sh "$entrypointRsyncUrl" "$entrypointIp:8001" 2>&1 | tee validator.log
+  ) || {
+    exitcode=$?
+    [[ $exitcode -eq 124 ]] || exit $exitcode
+  }
+  wc -l validator.log
+  if grep -C100 panic validator.log; then
+    echo "^^^ +++"
+    echo "Panic observed"
+    exit 1
+  else
+    echo "Validator log looks ok"
+  fi
+else
+  echo "^^^ +++"
+  echo "Note: validator sanity disabled"
+fi
+
+echo --- Pass
--- a/net/scripts/create-solana-user.sh
+++ b/net/scripts/create-solana-user.sh
@ -0,0 +1,27 @@
+#!/bin/bash -ex
+
+[[ $(uname) = Linux ]] || exit 1
+[[ $USER = root ]] || exit 1
+
+adduser solana --gecos "" --disabled-password --quiet
+adduser solana sudo
+echo "solana ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
+id solana
+
+[[ -r /solana-id_ecdsa ]] || exit 1
+[[ -r /solana-id_ecdsa.pub ]] || exit 1
+
+sudo -u solana bash -c "
+  mkdir -p /home/solana/.ssh/
+  cd /home/solana/.ssh/
+  cp /solana-id_ecdsa.pub authorized_keys
+  umask 377
+  cp /solana-id_ecdsa id_ecdsa
+  echo \"
+    Host *
+    BatchMode yes
+    IdentityFile ~/.ssh/id_ecdsa
+    StrictHostKeyChecking no
+  \" > config
+"
+
--- a/net/scripts/disable-background-upgrades.sh
+++ b/net/scripts/disable-background-upgrades.sh
@ -0,0 +1,20 @@
+#!/bin/bash -ex
+#
+# Prevent background upgrades that block |apt-get|
+#
+# TODO: This approach is pretty uncompromising.  An alternative solution that
+#       doesn't involve deleting system files would be welcome.
+
+[[ $(uname) = Linux ]] || exit 1
+[[ $USER = root ]] || exit 1
+
+rm -rf /usr/lib/apt/apt.systemd.daily
+rm -rf /usr/bin/unattended-upgrade
+killall apt.systemd.daily || true
+killall unattended-upgrade || true
+
+while fuser /var/lib/dpkg/lock; do
+  echo Waiting for lock release...
+  sleep 1
+done
+
--- a/net/scripts/gcloud.sh
+++ b/net/scripts/gcloud.sh
@ -0,0 +1,187 @@
+# |source| this file
+#
+# Utilities for working with gcloud
+#
+
+
+#
+# gcloud_FindInstances [filter] [options]
+#
+# Find instances matching the specified pattern.
+#
+# For each matching instance, an entry in the `instances` array will be added with the
+# following information about the instance:
+#   "name:zone:public IP:private IP"
+#
+# filter   - The instances to filter on
+# options  - If set to the string "show", the list of instances will be echoed
+#            to stdout
+#
+# examples:
+#   $ gcloud_FindInstances "name=exact-machine-name"
+#   $ gcloud_FindInstances "name~^all-machines-with-a-common-machine-prefix"
+#
+gcloud_FindInstances() {
+  declare filter="$1"
+  declare options="$2"
+  instances=()
+
+  declare name zone publicIp privateIp status
+  while read -r name zone publicIp privateIp status; do
+    if [[ $status != RUNNING ]]; then
+      echo "Warning: $name is not RUNNING, ignoring it."
+      continue
+    fi
+    if [[ $options = show ]]; then
+      printf "%-30s | %-16s publicIp=%-16s privateIp=%s\n" "$name" "$zone" "$publicIp" "$privateIp"
+    fi
+
+    instances+=("$name:$zone:$publicIp:$privateIp")
+  done < <(gcloud compute instances list \
+             --filter="$filter" \
+             --format 'value(name,zone,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status)')
+}
+
+#
+# gcloud_ForEachInstance [cmd] [extra args to cmd]
+#
+# Execute a command for each element in the `instances` array
+#
+#   cmd   - The command to execute on each instance
+#           The command will receive arguments followed by any
+#           additionl arguments supplied to gcloud_ForEachInstance:
+#               name     - name of the instance
+#               zone     - zone the instance is located in
+#               publicIp - The public IP address of this instance
+#               privateIp - The priate IP address of this instance
+#               count    - Monotonically increasing count for each
+#                          invocation of cmd, starting at 1
+#               ...      - Extra args to cmd..
+#
+#
+gcloud_ForEachInstance() {
+  declare cmd="$1"
+  shift
+  [[ -n $cmd ]] || { echo gcloud_ForEachInstance: cmd not specified; exit 1; }
+
+  declare count=1
+  for info in "${instances[@]}"; do
+    declare name zone publicIp privateIp
+    IFS=: read -r name zone publicIp privateIp < <(echo "$info")
+
+    eval "$cmd" "$name" "$zone" "$publicIp" "$privateIp" "$count" "$@"
+    count=$((count + 1))
+  done
+}
+
+#
+# gcloud_CreateInstances [namePrefix] [numNodes] [zone] [imageName]
+#                        [machineType] [bootDiskSize] [accelerator]
+#                        [startupScript] [address]
+#
+# Creates one more identical instances.
+#
+# namePrefix    - unique string to prefix all the instance names with
+# numNodes      - number of instances to create
+# zone          - zone to create the instances in
+# imageName     - Disk image for the instances
+# machineType   - GCE machine type
+# bootDiskSize  - Optional disk of the boot disk
+# accelerator   - Optional accelerator to attach to the instance(s), see
+#                 eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
+# startupScript - Optional startup script to execute when the instance boots
+# address       - Optional name of the GCE static IP address to attach to the
+#                 instance.  Requires that |numNodes| = 1 and that addressName
+#                 has been provisioned in the GCE region that is hosting |zone|
+#
+# Tip: use gcloud_FindInstances to locate the instances once this function
+#      returns
+gcloud_CreateInstances() {
+  declare namePrefix="$1"
+  declare numNodes="$2"
+  declare zone="$3"
+  declare imageName="$4"
+  declare machineType="$5"
+  declare optionalBootDiskSize="$6"
+  declare optionalAccelerator="$7"
+  declare optionalStartupScript="$8"
+  declare optionalAddress="$9"
+
+  declare nodes
+  if [[ $numNodes = 1 ]]; then
+    nodes=("$namePrefix")
+  else
+    read -ra nodes <<<$(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes")
+  fi
+
+  declare -a args
+  args=(
+    "--zone=$zone"
+    "--tags=testnet"
+    "--image=$imageName"
+    "--machine-type=$machineType"
+  )
+  if [[ -n $optionalBootDiskSize ]]; then
+    args+=(
+      "--boot-disk-size=$optionalBootDiskSize"
+    )
+  fi
+  if [[ -n $optionalAccelerator ]]; then
+    args+=(
+      "--accelerator=$optionalAccelerator"
+      --maintenance-policy TERMINATE
+      --restart-on-failure
+    )
+  fi
+  if [[ -n $optionalStartupScript ]]; then
+    args+=(
+      --metadata-from-file "startup-script=$optionalStartupScript"
+    )
+  fi
+
+  if [[ -n $optionalAddress ]]; then
+    [[ $numNodes = 1 ]] || {
+      echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress"
+      exit 1
+    }
+    args+=(
+      "--address=$optionalAddress"
+    )
+  fi
+
+  (
+    set -x
+    gcloud beta compute instances create "${nodes[@]}" "${args[@]}"
+  )
+}
+
+#
+# gcloud_DeleteInstances [yes]
+#
+# Deletes all the instances listed in the `instances` array
+#
+# If yes = "true", skip the delete confirmation
+#
+gcloud_DeleteInstances() {
+  declare maybeQuiet=
+  if [[ $1 = true ]]; then
+    maybeQuiet=--quiet
+  fi
+
+  if [[ ${#instances[0]} -eq 0 ]]; then
+    echo No instances to delete
+    return
+  fi
+  declare names=("${instances[@]/:*/}")
+
+  # Assume all instances are in the same zone
+  # TODO: One day this assumption will be invalid
+  declare zone
+  IFS=: read -r _ zone _ < <(echo "${instances[0]}")
+
+  (
+    set -x
+    gcloud beta compute instances delete --zone "$zone" $maybeQuiet "${names[@]}"
+  )
+}
+
--- a/net/scripts/install-earlyoom.sh
+++ b/net/scripts/install-earlyoom.sh
@ -0,0 +1,30 @@
+#!/bin/bash -ex
+#
+# Install EarlyOOM
+#
+
+[[ $(uname) = Linux ]] || exit 1
+[[ $USER = root ]] || exit 1
+
+# 64 - enable signalling of processes (term, kill, oom-kill)
+# TODO: This setting will not persist across reboots
+sysctl -w kernel.sysrq=$(( $(cat /proc/sys/kernel/sysrq) | 64 ))
+
+if command -v earlyoom; then
+  systemctl status earlyoom
+else
+  wget http://ftp.us.debian.org/debian/pool/main/e/earlyoom/earlyoom_1.1-2_amd64.deb
+  apt install --quiet --yes ./earlyoom_1.1-2_amd64.deb
+
+  cat > earlyoom <<OOM
+  # use the kernel OOM killer, trigger at 20% available RAM,
+  EARLYOOM_ARGS="-k -m 20"
+OOM
+  cp earlyoom /etc/default/
+  rm earlyoom
+
+  systemctl stop earlyoom
+  systemctl enable earlyoom
+  systemctl start earlyoom
+fi
+
--- a/net/scripts/install-libssl-compatability.sh
+++ b/net/scripts/install-libssl-compatability.sh
@ -0,0 +1,18 @@
+#!/bin/bash -ex
+
+[[ $(uname) = Linux ]] || exit 1
+[[ $USER = root ]] || exit 1
+
+# Install libssl-dev to be compatible with binaries built on an Ubuntu machine...
+apt-get update
+apt-get --assume-yes install libssl-dev
+
+# Install libssl1.1 to be compatible with binaries built in the
+# solanalabs/rust docker image
+#
+# cc: https://github.com/solana-labs/solana/issues/1090
+# cc: https://packages.ubuntu.com/bionic/amd64/libssl1.1/download
+wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.0g-2ubuntu4.1_amd64.deb
+dpkg -i libssl1.1_1.1.0g-2ubuntu4.1_amd64.deb
+rm libssl1.1_1.1.0g-2ubuntu4.1_amd64.deb
+
--- a/net/scripts/install-rsync.sh
+++ b/net/scripts/install-rsync.sh
@ -0,0 +1,19 @@
+#!/bin/bash -ex
+#
+# Rsync setup for Snap builds
+#
+
+[[ $(uname) = Linux ]] || exit 1
+[[ $USER = root ]] || exit 1
+
+apt-get --assume-yes install rsync
+cat > /etc/rsyncd.conf <<-EOF
+[config]
+path = /var/snap/solana/current/config
+hosts allow = *
+read only = true
+EOF
+
+systemctl enable rsync
+systemctl start rsync
+
--- a/net/scripts/rsync-retry.sh
+++ b/net/scripts/rsync-retry.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+#
+# rsync wrapper that retries a few times on failure
+#
+
+for i in $(seq 1 5); do
+  (
+    set -x
+    rsync "$@"
+  ) && exit 0
+  echo Retry "$i"...
+done
--- a/net/ssh.sh
+++ b/net/ssh.sh
@ -0,0 +1,69 @@
+#!/bin/bash
+
+here=$(dirname "$0")
+# shellcheck source=net/common.sh
+source "$here"/common.sh
+
+usage() {
+  exitcode=0
+  if [[ -n "$1" ]]; then
+    exitcode=1
+    echo "Error: $*"
+  fi
+  cat <<EOF
+usage: $0 [ipAddress] [extra ssh arguments]
+
+ssh into a node
+
+ ipAddress     - IP address of the desired node.
+
+If ipAddress is unspecified, a list of available nodes will be displayed.
+
+EOF
+  exit $exitcode
+}
+
+while getopts "h?" opt; do
+  case $opt in
+  h | \?)
+    usage
+    ;;
+  *)
+    usage "Error: unhandled option: $opt"
+    ;;
+  esac
+done
+
+loadConfigFile
+
+ipAddress=$1
+shift
+if [[ -n "$ipAddress" ]]; then
+  set -x
+  exec ssh "${sshOptions[@]}" "$ipAddress" "$@"
+fi
+
+printNode() {
+  declare nodeType=$1
+  declare ip=$2
+  printf "  %-25s | For logs run: $0 $ip tail -f solana/$nodeType.log\n" "$0 $ip"
+}
+
+echo Leader:
+printNode leader "$leaderIp"
+echo
+echo Validators:
+for ipAddress in "${validatorIpList[@]}"; do
+  printNode validator "$ipAddress"
+done
+echo
+echo Clients:
+if [[ ${#clientIpList[@]} -eq 0 ]]; then
+  echo "  None"
+else
+  for ipAddress in "${clientIpList[@]}"; do
+    printNode client "$ipAddress"
+  done
+fi
+
+exit 0
--- a/rfcs/rfc-001-smart-contracts-engine.md
+++ b/rfcs/rfc-001-smart-contracts-engine.md
@ -4,7 +4,7 @@ The goal of this RFC is to define a set of constraints for APIs and runtime such

 ## Version

-version 0.1 
+version 0.2 

 ## Toolchain Stack

@ -37,154 +37,175 @@ version 0.1

 In Figure 1 an untrusted client, creates a program in the front-end language of her choice, (like C/C++/Rust/Lua), and compiles it with LLVM to a position independent shared object ELF, targeting BPF bytecode. Solana will safely load and execute the ELF.

-## Bytecode
-
-Our bytecode is based on Berkley Packet Filter. The requirements for BPF overlap almost exactly with the requirements we have:
-
-1. Deterministic amount of time to execute the code
-2. Bytecode that is portable between machine instruction sets
-3. Verified memory accesses
-4. Fast to load the object, verify the bytecode and JIT to local machine instruction set
-
-For 1, that means that loops are unrolled, and for any jumps back we can guard them with a check against the number of instruction that have been executed at this point.  If the limit is reached, the program yields its execution.  This involves saving the stack and current instruction index.
-
-For 2, the BPF bytecode already easily maps to x86–64, arm64 and other instruction sets. 
-
-For 3, every load and store that is relative can be checked to be within the expected memory that is passed into the ELF.  Dynamic load and stores can do a runtime check against available memory, these will be slow and should be avoided.
-
-For 4, Fully linked PIC ELF with just a single RX segment. Effectively we are linking a shared object with `-fpic -target bpf` and with a linker script to collect everything into a single RX segment. Writable globals are not supported.
-
-### Address Checks
-
-The interface to the module takes a `&mut Vec<Vec<u8>>` in rust, or a `int sz, void* data[sz], int szs[sz]` in `C`.  Given the module's bytecode, for each method, we need to analyze the bounds on load and stores into each buffer the module uses.  This check needs to be done `on chain`, and after those bounds are computed we can verify that the user supplied array of buffers will not cause a memory fault.  For load and stores that we cannot analyze, we can replace with a `safe_load` and `safe_store` instruction that will check the table for access.
-
-## Loader
-The loader is our first smart contract. The job of this contract is to load the actual program with its own instance data.  The loader will verify the bytecode and that the object implements the expected entry points.
-
-Since there is only one RX segment, the context for the contract instance is passed into each entry point as well as the event data for that entry point.
-
-A client will create a transaction to create a new loader instance:
-
-`Solana_NewLoader(Loader Instance PubKey, proof of key ownership, space I need for my elf)`
-
-A client will then do a bunch of transactions to load its elf into the loader instance they created:
-
-`Loader_UploadElf(Loader Instance PubKey, proof of key ownership, pos start, pos end, data)`
-
-At this point the client can create a new instance of the module with its own instance address:
-
-`Loader_NewInstance(Loader Instance PubKey, proof of key ownership, Instance PubKey, proof of key ownership)`
-
-Once the instance has been created, the client may need to upload more user data to solana to configure this instance:
-
-`Instance_UploadModuleData(Instance PubKey, proof of key ownership, pos start, pos end, data)`
-
-Now clients can `start` the instance:
-
-`Instance_Start(Instance PubKey, proof of key ownership)`
-
 ## Runtime

-Our goal with the runtime is to have a general purpose execution environment that is highly parallelizable and doesn't require dynamic resource management. We want to execute as many contracts as we can in parallel, and have them pass or fail without a destructive state change.
-
-### State and Entry Point
-
-State is addressed by an account which is at the moment simply the PubKey.  Our goal is to eliminate dynamic memory allocation in the smart contract itself, so the contract is a function that takes a mapping of [(PubKey,State)] and returns [(PubKey, State')].  The output of keys is a subset of the input.  Three basic kinds of state exist:
-
-* Instance State
-* Participant State
-* Caller State
-
-There isn't any difference in how each is implemented, but conceptually Participant State is memory that is allocated for each participant in the contract.  Instance State is memory that is allocated for the contract itself, and Caller State is memory that the transactions caller has allocated.
+The goal with the runtime is to have a general purpose execution environment that is highly parallelizeable and doesn't require dynamic resource management. The goal is to execute as many contracts as possible in parallel, and have them pass or fail without a destructive state change.


-### Call
+### State

+State is addressed by an account which is at the moment simply the Pubkey.  Our goal is to eliminate memory allocation from within the smart contract itself.  Thus the client of the contract provides all the state that is necessary for the contract to execute in the transaction itself.  The runtime interacts with the contract through a state transition function, which takes a mapping of [(Pubkey,State)] and returns [(Pubkey, State')].  The State is an opeque type to the runtime, a `Vec<u8>`, the contents of which the contract has full control over.
+
+### Call Structure
 ```
-void call(
-    const struct instance_data *data,
-    const uint8_t kind[],  //instance|participant|caller|read|write
-    const uint8_t *keys[],
-    uint8_t *data[],
-    int num,
-    uint8_t dirty[],        //dirty memory bits
-    uint8_t *userdata,      //current transaction data
-);
+/// Call definition
+/// Signed portion
+#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
+pub struct CallData {
+    /// Each Pubkey in this vector is mapped to a corresponding `Page` that is loaded for contract execution
+    /// In a simple pay transaction `key[0]` is the token owner's key and `key[1]` is the recipient's key.
+    pub keys: Vec<Pubkey>,
+
+    /// The Pubkeys that are required to have a proof.  The proofs are a `Vec<Signature> which encoded along side this data structure
+    /// Each Signature signs the `required_proofs` vector as well as the `keys` vectors.  The transaction is valid if and only if all
+    /// the required signatures are present and the public key vector is unchanged between signatures.
+    pub required_proofs: Vec<u8>,
+
+    /// PoH data
+    /// last PoH hash observed by the sender
+    pub last_id: Hash,
+
+    /// Program
+    /// The address of the program we want to call.  ContractId is just a Pubkey that is the address of the loaded code that will execute this Call.
+    pub contract_id: ContractId,
+    /// OS scheduling fee
+    pub fee: i64,
+    /// struct version to prevent duplicate spends
+    /// Calls with a version <= Page.version are rejected
+    pub version: u64,
+    /// method to call in the contract
+    pub method: u8,
+    /// usedata in bytes
+    pub userdata: Vec<u8>,
+}
+
+#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
+pub struct Call {
+    /// Signatures and Keys
+    /// (signature, key index)
+    /// This vector contains a tuple of signatures, and the key index the signature is for
+    /// proofs[0] is always key[0]
+    pub proofs: Vec<Signature>,
+    pub data: CallData,
+}
 ```

-To call this operation, the transaction that is destined to the contract instance specifies what keyed state it should present to the `call` function.  To allocate the state memory or a call context, the client has to first call a function on the contract with the designed address that will own the state.
+At it's core, this is just a set of Pubkeys and Signatures with a bit of metadata.  The contract Pubkey routes this transaction into that contracts entry point.  `version` is used for dropping retransmitted requests.

-At its core, this is a system call that requires cryptographic proof of ownership of memory regions instead of an OS that checks page tables for access rights.
-
-* `Instance_AllocateContext(Instance PubKey, My PubKey, Proof of key ownership)`
-
-Any transaction can then call `call` on the contract with a set of keys.  It's up to the contract itself to manage ownership:
-
-* `Instance_Call(Instance PubKey, [Context PubKeys], proofs of ownership, userdata...)`
-
-Contracts should be able to read any state that is part of solana, but only write to state that the contract allocated.
-
-#### Caller State
-
-Caller `state` is memory allocated for the `call` that belongs to the public key that is issuing the `call`.  This is the caller's context.
-
-#### Instance State
-
-Instance `state` is memory that belongs to this contract instance.  We may also need module-wide `state` as well.
-
-#### Participant State
-
-Participant `state` is any other memory.  In some cases it may make sense to have these allocated as part of the call by the caller.
-
-### Reduce
-
-Some operations on the contract will require iteration over all the keys.  To make this parallelizable the iteration is broken up into reduce calls which are combined.
-
-```
-void reduce_m(
-    const struct instance_data *data,
-    const uint8_t *keys[],
-    const uint8_t *data[],
-    int num,
-    uint8_t *reduce_data,
-);
-
-void reduce_r(
-    const struct instance_data *data,
-    const uint8_t *reduce_data[],
-    int num,
-    uint8_t *reduce_data,
-);
-```
+Contracts should be able to read any state that is part of runtime, but only write to state that the contract allocated.

 ### Execution

-Transactions are batched and processed in parallel at each stage.
-```
-+-----------+    +--------------+      +-----------+    +---------------+
-| sigverify |-+->| debit commit |---+->| execution |-+->| memory commit |
-+-----------+ |  +--------------+   |  +-----------+ |  +---------------+
-              |                     |                |
-              |  +---------------+  |                |  +--------------+
-              |->| memory verify |->+                +->| debit undo   |
-                 +---------------+                   |  +--------------+
-                                                     |
-                                                     |  +---------------+
-                                                     +->| credit commit |
-                                                        +---------------+
-
+Calls batched and processed in a pipeline

 ```
-The `debit verify` stage is very similar to `memory verify`.  Proof of key ownership is used to check if the callers key has some state allocated with the contract, then the memory is loaded and executed.  After execution stage, the dirty pages are written back by the contract.  Because know all the memory accesses during execution, we can batch transactions that do not interfere with each other.  We can also apply the `debit undo` and `credit commit` stages of the transaction.  `debit undo` is run in case of an exception during contract execution, only transfers may be reversed, fees are commited to solana.
+-----------+    +-------------+    +--------------+    +--------------------+    
+| sigverify |--->| lock memory |--->| validate fee |--->| allocate new pages |--->
+-----------+    +-------------+    +--------------+    +--------------------+    
+                                
+    +------------+    +---------+    +--------------+    +-=------------+   
+--->| load pages |--->| execute |--->|unlock memory |--->| commit pages |   
+    +------------+    +---------+    +--------------+    +--------------+   

-### GPU execution
+```

-A single contract can read and write to separate key pairs without interference.  These separate calls to the same contract can execute on the same GPU thread over different memory using different SIMD lanes.
+At the `execute` stage, the loaded pages have no data dependencies, so all the contracts can be executed in parallel. 
+## Memory Management
+```
+pub struct Page {
+    /// key that indexes this page
+    /// prove ownership of this key to spend from this Page
+    owner: Pubkey,
+    /// contract that owns this page
+    /// contract can write to the data that is in `memory` vector
+    contract: Pubkey,
+    /// balance that belongs to owner
+    balance: u64,
+    /// version of the structure, public for testing
+    version: u64,
+    /// hash of the page data
+    memhash: Hash,
+    /// The following could be in a separate structure
+    memory: Vec<u8>,
+}
+```
+
+The guarantee that runtime enforces:
+    1. The contract code is the only code that will modify the contents of `memory`
+    2. Total balances on all the pages is equal before and after exectuion of a call
+    3. Balances of each of the pages not owned by the contract must be equal to or greater after the call than before the call.
+
+## Entry Point
+Exectuion of the contract involves maping the contract's public key to an entry point which takes a pointer to the transaction, and an array of loaded pages.
+```
+// Find the method
+match (tx.contract, tx.method) {
+    // system interface
+    // everyone has the same reallocate
+    (_, 0) => system_0_realloc(&tx, &mut call_pages),
+    (_, 1) => system_1_assign(&tx, &mut call_pages),
+    // contract methods
+    (DEFAULT_CONTRACT, 128) => default_contract_128_move_funds(&tx, &mut call_pages),
+    (contract, method) => //... 
+```
+
+The first 127 methods are reserved for the system interface, which implements allocation and assignment of memory.  The rest, including the contract for moving funds are implemented by the contract itself.
+
+## System Interface
+```
+/// SYSTEM interface, same for very contract, methods 0 to 127
+/// method 0
+/// reallocate
+/// spend the funds from the call to the first recipient's
+pub fn system_0_realloc(call: &Call, pages: &mut Vec<Page>) {
+    if call.contract == DEFAULT_CONTRACT {
+        let size: u64 = deserialize(&call.userdata).unwrap();
+        pages[0].memory.resize(size as usize, 0u8);
+    }
+}
+/// method 1
+/// assign
+/// assign the page to a contract
+pub fn system_1_assign(call: &Call, pages: &mut Vec<Page>) {
+    let contract = deserialize(&call.userdata).unwrap();
+    if call.contract == DEFAULT_CONTRACT {
+        pages[0].contract = contract;
+        //zero out the memory in pages[0].memory
+        //Contracts need to own the state of that data otherwise a use could fabricate the state and
+        //manipulate the contract
+        pages[0].memory.clear();
+    }
+} 
+```
+The first method resizes the memory that is assosciated with the callers page.  The second system call assignes the page to the contract.  Both methods check if the current contract is 0, otherwise the method does nothing and the caller spent their fees.
+
+This ensures that when memory is assigned to the contract the initial state of all the bytes is 0, and the contract itself is the only thing that can modify that state.
+
+## Simplest contract
+```
+/// DEFAULT_CONTRACT interface
+/// All contracts start with 128
+/// method 128
+/// move_funds
+/// spend the funds from the call to the first recipient's
+pub fn default_contract_128_move_funds(call: &Call, pages: &mut Vec<Page>) {
+    let amount: u64 = deserialize(&call.userdata).unwrap();
+    if pages[0].balance >= amount  {
+        pages[0].balance -= amount;
+        pages[1].balance += amount;
+    }
+}
+``` 
+
+This simply moves the amount from page[0], which is the callers page, to page[1], which is the recipient's page.

 ## Notes

 1. There is no dynamic memory allocation.
-2. Persistant Memory is allocated to a Key with ownership
+2. Persistent Memory is allocated to a Key with ownership
 3. Contracts can `call` to update key owned state
-4. Contracts can `reduce` over the memory to aggregate state
-5. `call` is just a *syscall* that does a cryptographic check of memory owndershp
+4. `call` is just a *syscall* that does a cryptographic check of memory ownership
+5. Kernel guarantees that when memory is assigned to the contract its state is 0
+6. Kernel guarantees that contract is the only thing that can modify memory that its assigned to
+7. Kernel guarantees that the contract can only spend tokens that are in pages that are assigned to it
+8. Kernel guarantees the balances belonging to pages are balanced before and after the call
--- a/rfcs/rfc-005-branches-tags-and-channels.md
+++ b/rfcs/rfc-005-branches-tags-and-channels.md
@ -0,0 +1,59 @@
+```
+========================= master branch (edge channel) =======================>
+         \                      \                     \
+          \___v0.7.0 tag         \                     \
+           \                      \         v0.9.0 tag__\
+            \          v0.8.0 tag__\                     \
+ v0.7.1 tag__\                      \                 v0.9 branch (beta channel)
+              \___v0.7.2 tag         \___v0.8.1 tag
+               \                      \
+                \                      \
+           v0.7 branch         v0.8 branch (stable channel)
+
+```
+
+## Branches and Tags
+
+### master branch
+All new development occurs on the `master` branch.
+
+Bug fixes that affect a `vX.Y` branch are first made on `master`.  This is to
+allow a fix some soak time on `master` before it is applied to one or more
+stabilization branches.
+
+Merging to `master` first also helps ensure that fixes applied to one release
+are present for future releases.  (Sometimes the joy of landing a critical
+release blocker in a branch causes you to forget to propagate back to
+`master`!)"
+
+Once the bug fix lands on `master` it is cherry-picked into the `vX.Y` branch
+and potentially the `vX.Y-1` branch.  The exception to this rule is when a bug
+fix for `vX.Y` doesn't apply to `master` or `vX.Y-1`.
+
+Immediately after a new stabilization branch is forged, the `Cargo.toml` minor
+version (*Y*) in the `master` branch is incremented by the release engineer.
+Incrementing the major version of the `master` branch is outside the scope of
+this document.
+
+### v*X.Y* stabilization branches
+These are stabilization branches for a given milestone.  They are created off
+the `master` branch as late as possible prior to the milestone release.
+
+### v*X.Y.Z* release tag
+The release tags are created as desired by the owner of the given stabilization
+branch, and cause that *X.Y.Z* release to be shipped to https://crates.io,
+https://snapcraft.io/, and elsewhere.
+
+Immediately after a new v*X.Y.Z* branch tag has been created, the `Cargo.toml`
+patch version number (*Z*) of the stabilization branch is incremented by the
+release engineer.
+
+## Channels
+Channels are used by end-users (humans and bots) to consume the branches
+described in the previous section, so they may automatically update to the most
+recent version matching their desired stability.
+
+There are three release channels that map to branches as follows:
+* edge - tracks the `master` branch, least stable.
+* beta - tracks the largest (and latest) `vX.Y` stabilization branch, more stable.
+* stable - tracks the second largest `vX.Y` stabilization branch, most stable.
--- a/scripts/configure-metrics.sh
+++ b/scripts/configure-metrics.sh
@ -0,0 +1,51 @@
+# |source| this file
+#
+# The SOLANA_METRICS_CONFIG environment variable is formatted as a
+# comma-delimited list of parameters. All parameters are optional.
+#
+# Example:
+#   export SOLANA_METRICS_CONFIG="host=<metrics host>,db=<database name>,u=<username>,p=<password>"
+#
+# The following directive disable complaints about unused variables in this
+# file:
+# shellcheck disable=2034
+#
+metricsWriteDatapoint="$(dirname "${BASH_SOURCE[0]}")"/metrics-write-datapoint.sh
+
+configureMetrics() {
+  [[ -n $SOLANA_METRICS_CONFIG ]] || return 0
+
+  declare metricsParams
+  IFS=',' read -r -a metricsParams <<< "$SOLANA_METRICS_CONFIG"
+  for param in "${metricsParams[@]}"; do
+    IFS='=' read -r -a pair <<< "$param"
+    if [[ ${#pair[@]} != 2 ]]; then
+      echo Error: invalid metrics parameter: "$param" >&2
+    else
+      declare name="${pair[0]}"
+      declare value="${pair[1]}"
+      case "$name" in
+      host)
+        export INFLUX_HOST="$value"
+        echo INFLUX_HOST="$INFLUX_HOST" >&2
+        ;;
+      db)
+        export INFLUX_DATABASE="$value"
+        echo INFLUX_DATABASE="$INFLUX_DATABASE" >&2
+        ;;
+      u)
+        export INFLUX_USERNAME="$value"
+        echo INFLUX_USERNAME="$INFLUX_USERNAME" >&2
+        ;;
+      p)
+        export INFLUX_PASSWORD="$value"
+        echo INFLUX_PASSWORD="********" >&2
+        ;;
+      *)
+        echo Error: Unknown metrics parameter name: "$name" >&2
+        ;;
+      esac
+    fi
+  done
+}
+configureMetrics
--- a/multinode-demo/metrics_write_datapoint.sh
+++ b/multinode-demo/metrics_write_datapoint.sh
@ -1,4 +1,7 @@
 #!/bin/bash -e
+#
+# Send a metrics datapoint
+#

 point=$1
 if [[ -z $point ]]; then
--- a/scripts/net-stats.sh
+++ b/scripts/net-stats.sh
@ -0,0 +1,49 @@
+#!/bin/bash -e
+#
+# Reports network statistics
+#
+
+[[ $(uname) == Linux ]] || exit 0
+
+cd "$(dirname "$0")"
+
+# shellcheck source=scripts/configure-metrics.sh
+source configure-metrics.sh
+
+packets_received=0
+packets_received_diff=0
+receive_errors=0
+receive_errors_diff=0
+rcvbuf_errors=0
+rcvbuf_errors_diff=0
+
+update_netstat() {
+  declare net_stat
+  net_stat=$(netstat -suna)
+
+  declare stats
+  stats=$(echo "$net_stat" | awk 'BEGIN {tmp_var = 0} /packets received/ {tmp_var = $1} END { print tmp_var }')
+  packets_received_diff=$((stats - packets_received))
+  packets_received="$stats"
+
+  stats=$(echo "$net_stat" | awk 'BEGIN {tmp_var = 0} /packet receive errors/ {tmp_var = $1} END { print tmp_var }')
+  receive_errors_diff=$((stats - receive_errors))
+  receive_errors="$stats"
+
+  stats=$(echo "$net_stat" | awk 'BEGIN {tmp_var = 0} /RcvbufErrors/ {tmp_var = $2} END { print tmp_var }')
+  rcvbuf_errors_diff=$((stats - rcvbuf_errors))
+  rcvbuf_errors="$stats"
+}
+
+update_netstat
+
+while true; do
+  update_netstat
+  report="packets_received=$packets_received_diff,receive_errors=$receive_errors_diff,rcvbuf_errors=$rcvbuf_errors_diff"
+
+  echo "$report"
+  ./metrics-write-datapoint.sh "net-stats,hostname=$HOSTNAME $report"
+  sleep 1
+done
+
+exit 1
--- a/multinode-demo/oom_monitor.sh
+++ b/multinode-demo/oom_monitor.sh
@ -3,19 +3,21 @@
 # Reports Linux OOM Killer activity
 #

-here=$(dirname "$0")
-# shellcheck source=multinode-demo/common.sh
-source "$here"/common.sh
+cd "$(dirname "$0")"

-if [[ $(uname) != Linux ]]; then
-  exit 0
-fi
+# shellcheck source=scripts/oom-score-adj.sh
+source oom-score-adj.sh
+
+# shellcheck source=scripts/configure-metrics.sh
+source configure-metrics.sh
+
+[[ $(uname) = Linux ]] || exit 0

 syslog=/var/log/syslog
-if [[ ! -r $syslog ]]; then
+[[ -r $syslog ]] || {
  echo Unable to read $syslog
-  exit 0
-fi
+  exit 1
+}

 # Adjust OOM score to reduce the chance that this script will be killed
 # during an Out of Memory event since the purpose of this script is to
@ -24,9 +26,10 @@ oom_score_adj "self" -500

 while read -r victim; do
  echo "Out of memory event detected, $victim killed"
-  "$here"/metrics_write_datapoint.sh "oom-killer,victim=$victim killed=1"
+  ./metrics-write-datapoint.sh "oom-killer,victim=$victim,hostname=$HOSTNAME killed=1"
 done < <( \
  tail --follow=name --retry -n0 $syslog \
  | sed --unbuffered -n 's/^.* Out of memory: Kill process [1-9][0-9]* (\([^)]*\)) .*/\1/p' \
 )
+
 exit 1
--- a/scripts/oom-score-adj.sh
+++ b/scripts/oom-score-adj.sh
@ -0,0 +1,20 @@
+# |source| this file
+#
+# Adjusts the OOM score for the specified process.  Linux only
+#
+# usage: oom_score_adj [pid] [score]
+#
+oom_score_adj() {
+  declare pid=$1
+  declare score=$2
+  if [[ $(uname) != Linux ]]; then
+    return
+  fi
+
+  echo "$score" > "/proc/$pid/oom_score_adj" || true
+  declare currentScore
+  currentScore=$(cat "/proc/$pid/oom_score_adj" || true)
+  if [[ $score != "$currentScore" ]]; then
+    echo "Failed to set oom_score_adj to $score for pid $pid (current score: $currentScore)"
+  fi
+}
--- a/scripts/perf-stats.py
+++ b/scripts/perf-stats.py
@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+
+import json
+import sys
+
+stages_data = {}
+
+if len(sys.argv) != 2:
+    print("USAGE: {} <input file>".format(sys.argv[0]))
+    sys.exit(1)
+
+with open(sys.argv[1]) as fh:
+    for line in fh.readlines():
+        if "COUNTER" in line:
+            json_part = line[line.find("{"):]
+            x = json.loads(json_part)
+            counter = x['name']
+            if not (counter in stages_data):
+                stages_data[counter] = {'first_ts': x['now'], 'last_ts': x['now'], 'last_count': 0,
+                                        'data': [], 'max_speed': 0, 'min_speed': 9999999999.0,
+                                        'count': 0,
+                                        'max_speed_ts': 0, 'min_speed_ts': 0}
+            stages_data[counter]['count'] += 1
+            count_since_last = x['counts'] - stages_data[counter]['last_count']
+            time_since_last = float(x['now'] - stages_data[counter]['last_ts'])
+            if time_since_last > 1:
+                speed = 1000.0 * (count_since_last / time_since_last)
+                stages_data[counter]['data'].append(speed)
+                if speed > stages_data[counter]['max_speed']:
+                    stages_data[counter]['max_speed'] = speed
+                    stages_data[counter]['max_speed_ts'] = x['now']
+                if speed < stages_data[counter]['min_speed']:
+                    stages_data[counter]['min_speed'] = speed
+                    stages_data[counter]['min_speed_ts'] = x['now']
+            stages_data[counter]['last_ts'] = x['now']
+            stages_data[counter]['last_count'] = x['counts']
+
+for stage in stages_data.keys():
+    stages_data[stage]['data'].sort()
+    #mean_index = stages_data[stage]['count'] / 2
+    mean = 0
+    average = 0
+    eightieth = 0
+    data_len = len(stages_data[stage]['data'])
+    mean_index = int(data_len / 2)
+    eightieth_index = int(data_len * 0.8)
+    #print("mean idx: {} data.len: {}".format(mean_index, data_len))
+    if data_len > 0:
+        mean = stages_data[stage]['data'][mean_index]
+        average = float(sum(stages_data[stage]['data'])) / data_len
+        eightieth = stages_data[stage]['data'][eightieth_index]
+    print("stage: {} max: {:,.2f} min: {:.2f} count: {} total: {} mean: {:,.2f} average: {:,.2f} 80%: {:,.2f}".format(stage,
+                                                       stages_data[stage]['max_speed'],
+                                                       stages_data[stage]['min_speed'],
+                                                       stages_data[stage]['count'],
+                                                       stages_data[stage]['last_count'],
+                                                       mean, average, eightieth))
+    num = 5
+    idx = -1
+    if data_len >= num:
+        print("    top {}: ".format(num), end='')
+        for x in range(0, num):
+            print("{:,.2f}  ".format(stages_data[stage]['data'][idx]), end='')
+            idx -= 1
+            if stages_data[stage]['data'][idx] < average:
+                break
+        print("")
+    print("    max_ts: {} min_ts: {}".format(stages_data[stage]['max_speed_ts'], stages_data[stage]['min_speed_ts']))
+    print("\n")
+
--- a/scripts/snap-config-to-env.sh
+++ b/scripts/snap-config-to-env.sh
@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Snap daemons have no access to the environment so |snap set solana ...| is
+# used to set runtime configuration.
+#
+# This script exports the snap runtime configuration options back as
+# environment variables before invoking the specified program
+#
+
+if [[ -d $SNAP ]]; then # Running inside a Linux Snap?
+  RUST_LOG="$(snapctl get rust-log)"
+  SOLANA_CUDA="$(snapctl get enable-cuda)"
+  SOLANA_DEFAULT_METRICS_RATE="$(snapctl get default-metrics-rate)"
+  SOLANA_METRICS_CONFIG="$(snapctl get metrics-config)"
+
+  export RUST_LOG
+  export SOLANA_CUDA
+  export SOLANA_DEFAULT_METRICS_RATE
+  export SOLANA_METRICS_CONFIG
+fi
+
+exec "$@"
--- a/multinode-demo/test/wallet-sanity.sh
+++ b/multinode-demo/test/wallet-sanity.sh
@ -3,15 +3,14 @@
 # Wallet sanity test
 #

-here=$(dirname "$0")
-cd "$here"
+cd "$(dirname "$0")"/..

 if [[ -n "$USE_SNAP" ]]; then
  # TODO: Merge wallet.sh functionality into solana-wallet proper and
  #       remove this USE_SNAP case
  wallet="solana.wallet $1"
 else
-  wallet="../wallet.sh $1"
+  wallet="multinode-demo/wallet.sh $1"
 fi

 # Tokens transferred to this address are lost forever...
@ -35,7 +34,7 @@ pay_and_confirm() {

 $wallet reset
 $wallet address
-check_balance_output "Your balance is: 0"
+check_balance_output "No account found" "Your balance is: 0"
 $wallet airdrop --tokens 60
 check_balance_output "Your balance is: 60"
 $wallet airdrop --tokens 40
--- a/snap/hooks/configure
+++ b/snap/hooks/configure
@ -5,6 +5,7 @@ snapctl stop --disable solana.daemon-drone
 snapctl stop --disable solana.daemon-leader
 snapctl stop --disable solana.daemon-validator
 snapctl stop --disable solana.daemon-oom-monitor
+snapctl stop --disable solana.daemon-net-stats

 mode="$(snapctl get mode)"
 if [[ -z "$mode" ]]; then
@ -18,16 +19,16 @@ setup_args="$(snapctl get setup-args)"

 case $mode in
 leader+drone)
-  "$SNAP"/bin/setup.sh -t leader $num_tokens -p $setup_args
+  "$SNAP"/multinode-demo/setup.sh -t leader $num_tokens -p $setup_args
  snapctl start --enable solana.daemon-drone
  snapctl start --enable solana.daemon-leader
  ;;
 leader)
-  "$SNAP"/bin/setup.sh -t leader $num_tokens -p $setup_args
+  "$SNAP"/multinode-demo/setup.sh -t leader $num_tokens -p $setup_args
  snapctl start --enable solana.daemon-leader
  ;;
 validator)
-  "$SNAP"/bin/setup.sh -t validator -p $setup_args
+  "$SNAP"/multinode-demo/setup.sh -t validator -p $setup_args
  snapctl start --enable solana.daemon-validator
  ;;
 *)
@ -37,3 +38,4 @@ validator)
 esac

 snapctl start --enable solana.daemon-oom-monitor
+snapctl start --enable solana.daemon-net-stats
--- a/snap/snapcraft.yaml
+++ b/snap/snapcraft.yaml
@ -49,41 +49,44 @@ apps:
    plugs:
      - home
  bench-tps:
-    # TODO: Merge client.sh functionality into solana-bench-tps proper
-    command: client.sh
-    #command: solana-bench-tps
+    command: solana-bench-tps
    plugs:
      - network
      - network-bind
      - home
  wallet:
    # TODO: Merge wallet.sh functionality into solana-wallet proper
-    command: wallet.sh
+    command: multinode-demo/wallet.sh
    #command: solana-wallet
    plugs:
      - network
      - home
  daemon-validator:
    daemon: simple
-    command: validator.sh
+    command: scripts/snap-config-to-env.sh $SNAP/multinode-demo/validator.sh
    plugs:
      - network
      - network-bind
  daemon-leader:
    daemon: simple
-    command: leader.sh
+    command: scripts/snap-config-to-env.sh $SNAP/multinode-demo/leader.sh
    plugs:
      - network
      - network-bind
  daemon-drone:
    daemon: simple
-    command: drone.sh
+    command: scripts/snap-config-to-env.sh $SNAP/multinode-demo/drone.sh
    plugs:
      - network
      - network-bind
  daemon-oom-monitor:
    daemon: simple
-    command: oom_monitor.sh
+    command: scripts/snap-config-to-env.sh $SNAP/scripts/oom-monitor.sh
+    plugs:
+      - network
+  daemon-net-stats:
+    daemon: simple
+    command: scripts/snap-config-to-env.sh $SNAP/scripts/net-stats.sh
    plugs:
      - network

@ -92,6 +95,8 @@ parts:
    plugin: nil
    prime:
      - bin
+      - multinode-demo
+      - scripts
      - usr/lib
    override-build: |
      # Install CUDA 9.2 runtime
@ -108,20 +113,25 @@ parts:
      rm -rf $SNAPCRAFT_PART_INSTALL/bin/*
      mv $SNAPCRAFT_PART_INSTALL/solana-fullnode $SNAPCRAFT_PART_INSTALL/bin/solana-fullnode-cuda
      mkdir -p $SNAPCRAFT_PART_INSTALL/usr/lib/
-      cp -f libJerasure.so $SNAPCRAFT_PART_INSTALL/usr/lib/libJerasure.so.2
-      cp -f libgf_complete.so $SNAPCRAFT_PART_INSTALL/usr/lib/libgf_complete.so.1
+      cp -f target/perf-libs/libJerasure.so $SNAPCRAFT_PART_INSTALL/usr/lib/libJerasure.so.2
+      cp -f target/perf-libs/libgf_complete.so $SNAPCRAFT_PART_INSTALL/usr/lib/libgf_complete.so.1

      # Build/install all other programs
      cargo install --root $SNAPCRAFT_PART_INSTALL --bins

-      # Install multinode scripts
-      mkdir -p $SNAPCRAFT_PART_INSTALL/bin
-      cp -av multinode-demo/* $SNAPCRAFT_PART_INSTALL/bin/
+      # Install multinode-demo/
+      mkdir -p $SNAPCRAFT_PART_INSTALL/multinode-demo/
+      cp -av multinode-demo/* $SNAPCRAFT_PART_INSTALL/multinode-demo/

-      # TODO: build curl,rsync/multilog from source instead of sneaking it in from the host
-      # system...
+      # Install scripts/
+      mkdir -p $SNAPCRAFT_PART_INSTALL/scripts/
+      cp -av scripts/* $SNAPCRAFT_PART_INSTALL/scripts/
+
+      # TODO: build curl,dig,rsync/multilog from source instead of sneaking it
+      # in from the host system...
      set -x
      mkdir -p $SNAPCRAFT_PART_INSTALL/bin
      cp -av /usr/bin/curl $SNAPCRAFT_PART_INSTALL/bin/
+      cp -av /usr/bin/dig $SNAPCRAFT_PART_INSTALL/bin/
      cp -av /usr/bin/multilog $SNAPCRAFT_PART_INSTALL/bin/
      cp -av /usr/bin/rsync $SNAPCRAFT_PART_INSTALL/bin/
--- a/src/bank.rs
+++ b/src/bank.rs
@ -1,14 +1,13 @@
-//! The `bank` module tracks client balances and the progress of smart
+//! The `bank` module tracks client accounts and the progress of smart
 //! contracts. It offers a high-level API that signs transactions
 //! on behalf of the caller, and a low-level API for when they have
 //! already been signed and verified.

-extern crate libc;
-
+use bincode::{deserialize, serialize};
 use chrono::prelude::*;
 use counter::Counter;
 use entry::Entry;
-use hash::Hash;
+use hash::{hash, Hash};
 use itertools::Itertools;
 use ledger::Block;
 use log::Level;
@ -17,7 +16,7 @@ use payment_plan::{Payment, PaymentPlan, Witness};
 use signature::{Keypair, Pubkey, Signature};
 use std;
 use std::collections::hash_map::Entry::Occupied;
-use std::collections::{HashMap, HashSet, VecDeque};
+use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
 use std::result;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::RwLock;
@ -37,7 +36,7 @@ pub const MAX_ENTRY_IDS: usize = 1024 * 16;
 pub const VERIFY_BLOCK_SIZE: usize = 16;

 /// Reasons a transaction might be rejected.
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq, Clone)]
 pub enum BankError {
    /// Attempt to debit from `Pubkey`, but no found no record of a prior credit.
    AccountNotFound(Pubkey),
@ -62,19 +61,32 @@ pub enum BankError {

    /// Proof of History verification failed.
    LedgerVerificationFailed,
+    /// Contract's transaction token balance does not equal the balance after the transaction
+    UnbalancedTransaction(Signature),
+    /// Contract location Pubkey already contains userdata
+    ContractAlreadyPending(Pubkey),
 }

 pub type Result<T> = result::Result<T, BankError>;
-
+/// An Account with userdata that is stored on chain
+#[derive(Serialize, Deserialize, Debug, Clone, Default)]
+pub struct Account {
+    /// tokens in the account
+    pub tokens: i64,
+    /// user data
+    /// A transaction can write to its userdata
+    pub userdata: Vec<u8>,
+}
+#[derive(Default)]
+struct ErrorCounters {
+    account_not_found_validator: usize,
+    account_not_found_leader: usize,
+    account_not_found_vote: usize,
+}
 /// The state of all accounts and contracts after processing its entries.
 pub struct Bank {
    /// A map of account public keys to the balance in that account.
-    balances: RwLock<HashMap<Pubkey, i64>>,
-
-    /// A map of smart contract transaction signatures to what remains of its payment
-    /// plan. Each transaction that targets the plan should cause it to be reduced.
-    /// Once it cannot be reduced, final payments are made and it is discarded.
-    pending: RwLock<HashMap<Signature, Plan>>,
+    accounts: RwLock<HashMap<Pubkey, Account>>,

    /// A FIFO queue of `last_id` items, where each item is a set of signatures
    /// that have been processed using that `last_id`. Rejected `last_id`
@ -100,8 +112,7 @@ pub struct Bank {
 impl Default for Bank {
    fn default() -> Self {
        Bank {
-            balances: RwLock::new(HashMap::new()),
-            pending: RwLock::new(HashMap::new()),
+            accounts: RwLock::new(HashMap::new()),
            last_ids: RwLock::new(VecDeque::new()),
            last_ids_sigs: RwLock::new(HashMap::new()),
            transaction_count: AtomicUsize::new(0),
@ -121,7 +132,11 @@ impl Bank {
    /// Create an Bank using a deposit.
    pub fn new_from_deposit(deposit: &Payment) -> Self {
        let bank = Self::default();
-        bank.apply_payment(deposit, &mut bank.balances.write().unwrap());
+        {
+            let mut accounts = bank.accounts.write().unwrap();
+            let account = accounts.entry(deposit.to).or_insert_with(Account::default);
+            Self::apply_payment(deposit, account);
+        }
        bank
    }

@ -136,9 +151,10 @@ impl Bank {
        bank
    }

-    /// Commit funds to the `payment.to` party.
-    fn apply_payment(&self, payment: &Payment, balances: &mut HashMap<Pubkey, i64>) {
-        *balances.entry(payment.to).or_insert(0) += payment.tokens;
+    /// Commit funds to the given account
+    fn apply_payment(payment: &Payment, account: &mut Account) {
+        trace!("apply payments {}", payment.tokens);
+        account.tokens += payment.tokens;
    }

    /// Return the last entry ID registered.
@ -160,23 +176,6 @@ impl Bank {
        Ok(())
    }

-    /// Forget the given `signature` because its transaction was rejected.
-    fn forget_signature(signatures: &mut HashSet<Signature>, signature: &Signature) {
-        signatures.remove(signature);
-    }
-
-    /// Forget the given `signature` with `last_id` because the transaction was rejected.
-    fn forget_signature_with_last_id(&self, signature: &Signature, last_id: &Hash) {
-        if let Some(entry) = self
-            .last_ids_sigs
-            .write()
-            .expect("'last_ids' read lock in forget_signature_with_last_id")
-            .get_mut(last_id)
-        {
-            Self::forget_signature(&mut entry.0, signature);
-        }
-    }
-
    /// Forget all signatures. Useful for benchmarking.
    pub fn clear_signatures(&self) {
        for (_, sigs) in self.last_ids_sigs.write().unwrap().iter_mut() {
@ -233,121 +232,236 @@ impl Bank {
        last_ids.push_back(*last_id);
    }

-    /// Deduct tokens from the 'from' address the account has sufficient
-    /// funds and isn't a duplicate.
-    fn apply_debits(&self, tx: &Transaction, bals: &mut HashMap<Pubkey, i64>) -> Result<()> {
-        let mut purge = false;
+    /// Deduct tokens from the source account if it has sufficient funds and the contract isn't
+    /// pending
+    fn apply_debits_to_budget_payment_plan(
+        tx: &Transaction,
+        accounts: &mut [Account],
+        instruction: &Instruction,
+    ) -> Result<()> {
        {
-            let option = bals.get_mut(&tx.from);
-            if option.is_none() {
-                // TODO: this is gnarly because the counters are static atomics
-                if !self.is_leader {
-                    inc_new_counter_info!("bank-appy_debits-account_not_found-validator", 1);
-                } else if let Instruction::NewVote(_) = &tx.instruction {
-                    inc_new_counter_info!("bank-appy_debits-vote_account_not_found", 1);
-                } else {
-                    inc_new_counter_info!("bank-appy_debits-generic_account_not_found", 1);
-                }
-                return Err(BankError::AccountNotFound(tx.from));
-            }
-            let bal = option.unwrap();
-
-            self.reserve_signature_with_last_id(&tx.signature, &tx.last_id)?;
-
-            if let Instruction::NewContract(contract) = &tx.instruction {
+            let tokens = if !accounts[0].userdata.is_empty() {
+                0
+            } else {
+                accounts[0].tokens
+            };
+            if let Instruction::NewContract(contract) = &instruction {
                if contract.tokens < 0 {
                    return Err(BankError::NegativeTokens);
                }

-                if *bal < contract.tokens {
-                    self.forget_signature_with_last_id(&tx.signature, &tx.last_id);
-                    return Err(BankError::InsufficientFunds(tx.from));
-                } else if *bal == contract.tokens {
-                    purge = true;
+                if tokens < contract.tokens {
+                    return Err(BankError::InsufficientFunds(tx.keys[0]));
                } else {
-                    *bal -= contract.tokens;
+                    let bal = &mut accounts[0];
+                    bal.tokens -= contract.tokens;
                }
            };
        }
-
-        if purge {
-            bals.remove(&tx.from);
-        }
-
        Ok(())
    }

    /// Apply only a transaction's credits.
    /// Note: It is safe to apply credits from multiple transactions in parallel.
-    fn apply_credits(&self, tx: &Transaction, balances: &mut HashMap<Pubkey, i64>) {
-        match &tx.instruction {
+    fn apply_credits_to_budget_payment_plan(
+        tx: &Transaction,
+        accounts: &mut [Account],
+        instruction: &Instruction,
+    ) -> Result<()> {
+        match instruction {
            Instruction::NewContract(contract) => {
                let plan = contract.plan.clone();
                if let Some(payment) = plan.final_payment() {
-                    self.apply_payment(&payment, balances);
+                    Self::apply_payment(&payment, &mut accounts[1]);
+                    Ok(())
+                } else if !accounts[1].userdata.is_empty() {
+                    Err(BankError::ContractAlreadyPending(tx.keys[1]))
                } else {
-                    let mut pending = self
-                        .pending
-                        .write()
-                        .expect("'pending' write lock in apply_credits");
+                    let mut pending = HashMap::new();
                    pending.insert(tx.signature, plan);
+                    //TODO this is a temporary on demand allocation
+                    //until system contract requires explicit allocation of memory
+                    accounts[1].userdata = serialize(&pending).unwrap();
+                    accounts[1].tokens += contract.tokens;
+                    Ok(())
                }
            }
            Instruction::ApplyTimestamp(dt) => {
-                let _ = self.apply_timestamp(tx.from, *dt);
+                Self::apply_timestamp(tx.keys[0], *dt, &mut accounts[1]);
+                Ok(())
            }
            Instruction::ApplySignature(signature) => {
-                let _ = self.apply_signature(tx.from, *signature);
+                Self::apply_signature(tx.keys[0], *signature, accounts);
+                Ok(())
            }
            Instruction::NewVote(_vote) => {
-                trace!("GOT VOTE! last_id={:?}", &tx.last_id.as_ref()[..8]);
                // TODO: record the vote in the stake table...
+                trace!("GOT VOTE! last_id={}", tx.last_id);
+                Ok(())
            }
        }
    }
+    /// Budget DSL contract interface
+    /// * tx - the transaction
+    /// * accounts[0] - The source of the tokens
+    /// * accounts[1] - The contract context.  Once the contract has been completed, the tokens can
+    /// be spent from this account .
+    pub fn process_transaction_of_budget_instruction(
+        tx: &Transaction,
+        accounts: &mut [Account],
+    ) -> Result<()> {
+        let instruction = tx.instruction();
+        Self::apply_debits_to_budget_payment_plan(tx, accounts, &instruction)?;
+        Self::apply_credits_to_budget_payment_plan(tx, accounts, &instruction)
+    }
+    //TODO the contract needs to provide a "get_balance" introspection call of the userdata
+    pub fn get_balance_of_budget_payment_plan(account: &Account) -> i64 {
+        if let Ok(pending) = deserialize(&account.userdata) {
+            let pending: HashMap<Signature, Plan> = pending;
+            if !pending.is_empty() {
+                0
+            } else {
+                account.tokens
+            }
+        } else {
+            account.tokens
+        }
+    }

    /// Process a Transaction. If it contains a payment plan that requires a witness
    /// to progress, the payment plan will be stored in the bank.
    pub fn process_transaction(&self, tx: &Transaction) -> Result<()> {
-        let bals = &mut self.balances.write().unwrap();
-        self.apply_debits(tx, bals)?;
-        self.apply_credits(tx, bals);
-        self.transaction_count.fetch_add(1, Ordering::Relaxed);
-        Ok(())
+        match self.process_transactions(vec![tx.clone()])[0] {
+            Err(ref e) => {
+                info!("process_transaction error: {:?}", e);
+                Err((*e).clone())
+            }
+            Ok(_) => Ok(()),
+        }
+    }
+
+    fn load_account(
+        &self,
+        tx: &Transaction,
+        accounts: &HashMap<Pubkey, Account>,
+        error_counters: &mut ErrorCounters,
+    ) -> Result<Vec<Account>> {
+        // Copy all the accounts
+        if accounts.get(&tx.keys[0]).is_none() {
+            if !self.is_leader {
+                error_counters.account_not_found_validator += 1;
+            } else {
+                error_counters.account_not_found_leader += 1;
+            }
+            if let Instruction::NewVote(_vote) = tx.instruction() {
+                error_counters.account_not_found_vote += 1;
+            }
+            Err(BankError::AccountNotFound(*tx.from()))
+        } else if accounts.get(&tx.keys[0]).unwrap().tokens < tx.fee {
+            Err(BankError::InsufficientFunds(*tx.from()))
+        } else {
+            let mut called_accounts: Vec<Account> = tx
+                .keys
+                .iter()
+                .map(|key| accounts.get(key).cloned().unwrap_or(Account::default()))
+                .collect();
+            // There is no way to predict what contract will execute without an error
+            // If a fee can pay for execution then the contract will be scheduled
+            self.reserve_signature_with_last_id(&tx.signature, &tx.last_id)?;
+            called_accounts[0].tokens -= tx.fee;
+            Ok(called_accounts)
+        }
+    }
+    fn load_accounts(
+        &self,
+        txs: &Vec<Transaction>,
+        accounts: &HashMap<Pubkey, Account>,
+        error_counters: &mut ErrorCounters,
+    ) -> Vec<Result<Vec<Account>>> {
+        txs.iter()
+            .map(|tx| self.load_account(tx, accounts, error_counters))
+            .collect()
+    }
+
+    pub fn execute_transaction(tx: Transaction, accounts: &mut [Account]) -> Result<Transaction> {
+        let pre_total: i64 = accounts.iter().map(|a| a.tokens).sum();
+
+        // TODO next steps is to add hooks to call arbitrary contracts here
+        // Call the contract method
+        // It's up to the contract to implement its own rules on moving funds
+        let e = Self::process_transaction_of_budget_instruction(&tx, accounts);
+
+        // Verify the transaction
+        // TODO, At the moment there is only 1 contract, so 1-3 are not checked
+        // 1. For accounts assigned to the contract, the total sum of all the tokens in these accounts cannot increase.
+        // 2. For accounts unassigned to the contract, the individual balance of each accounts cannot decrease.
+        // 3. For accounts unassigned to the contract, the userdata cannot change.
+
+        // 4. The total sum of all the tokens in all the pages cannot change.
+        let post_total: i64 = accounts.iter().map(|a| a.tokens).sum();
+        if pre_total != post_total {
+            Err(BankError::UnbalancedTransaction(tx.signature))
+        } else if let Err(err) = e {
+            Err(err)
+        } else {
+            Ok(tx)
+        }
+    }
+
+    pub fn store_accounts(
+        res: &Vec<Result<Transaction>>,
+        loaded: &Vec<Result<Vec<Account>>>,
+        accounts: &mut HashMap<Pubkey, Account>,
+    ) {
+        loaded.iter().zip(res.iter()).for_each(|(racc, rtx)| {
+            if let (Ok(acc), Ok(tx)) = (racc, rtx) {
+                tx.keys.iter().zip(acc.iter()).for_each(|(key, account)| {
+                    //purge if 0
+                    if account.tokens == 0 {
+                        accounts.remove(&key);
+                    } else {
+                        *accounts.entry(*key).or_insert_with(Account::default) = account.clone();
+                        assert_eq!(accounts.get(key).unwrap().tokens, account.tokens);
+                    }
+                });
+            };
+        });
    }

    /// Process a batch of transactions.
    #[must_use]
    pub fn process_transactions(&self, txs: Vec<Transaction>) -> Vec<Result<Transaction>> {
-        let bals = &mut self.balances.write().unwrap();
        debug!("processing Transactions {}", txs.len());
+        // TODO right now a single write lock is held for the duration of processing all the
+        // transactions
+        // To break this lock each account needs to be locked to prevent concurrent access
+        let mut accounts = self.accounts.write().unwrap();
        let txs_len = txs.len();
+        let mut error_counters = ErrorCounters::default();
        let now = Instant::now();
-        let results: Vec<_> = txs
-            .into_iter()
-            .map(|tx| self.apply_debits(&tx, bals).map(|_| tx))
-            .collect(); // Calling collect() here forces all debits to complete before moving on.
-
-        let debits = now.elapsed();
+        let mut loaded_accounts = self.load_accounts(&txs, &mut accounts, &mut error_counters);
+        let load_elapsed = now.elapsed();
        let now = Instant::now();

-        let res: Vec<_> = results
-            .into_iter()
-            .map(|result| {
-                result.map(|tx| {
-                    self.apply_credits(&tx, bals);
-                    tx
-                })
+        let res: Vec<Result<Transaction>> = loaded_accounts
+            .iter_mut()
+            .zip(txs.into_iter())
+            .map(|(acc, tx)| match acc {
+                Err(e) => Err(e.clone()),
+                Ok(ref mut accounts) => Self::execute_transaction(tx, accounts),
            })
            .collect();
-
+        let execution_elapsed = now.elapsed();
+        let now = Instant::now();
+        Self::store_accounts(&res, &loaded_accounts, &mut accounts);
+        let write_elapsed = now.elapsed();
        debug!(
-            "debits: {} us credits: {:?} us tx: {}",
-            duration_as_us(&debits),
-            duration_as_us(&now.elapsed()),
+            "load: {} us execution: {} us write: {} us tx: {}",
+            duration_as_us(&load_elapsed),
+            duration_as_us(&execution_elapsed),
+            duration_as_us(&write_elapsed),
            txs_len
        );
-
        let mut tx_count = 0;
        let mut err_count = 0;
        for r in &res {
@ -355,13 +469,34 @@ impl Bank {
                tx_count += 1;
            } else {
                if err_count == 0 {
-                    info!("tx error: {:?}", r);
+                    trace!("tx error: {:?}", r);
                }
                err_count += 1;
            }
        }
        if err_count > 0 {
            info!("{} errors of {} txs", err_count, err_count + tx_count);
+            if !self.is_leader {
+                inc_new_counter_info!("bank-process_transactions_err-validator", err_count);
+                inc_new_counter_info!(
+                    "bank-appy_debits-account_not_found-validator",
+                    error_counters.account_not_found_validator
+                );
+            } else {
+                inc_new_counter_info!("bank-process_transactions_err-leader", err_count);
+                inc_new_counter_info!(
+                    "bank-appy_debits-account_not_found-leader",
+                    error_counters.account_not_found_leader
+                );
+                inc_new_counter_info!(
+                    "bank-appy_debits-vote_account_not_found",
+                    error_counters.account_not_found_vote
+                );
+            }
+        }
+        let cur_tx_count = self.transaction_count.load(Ordering::Relaxed);
+        if ((cur_tx_count + tx_count) & !(262144 - 1)) > cur_tx_count & !(262144 - 1) {
+            info!("accounts.len: {}", accounts.len());
        }
        self.transaction_count
            .fetch_add(tx_count, Ordering::Relaxed);
@ -406,13 +541,11 @@ impl Bank {
    }

    /// Process an ordered list of entries.
-    pub fn process_entries(&self, entries: Vec<Entry>) -> Result<u64> {
-        let mut entry_count = 0;
+    pub fn process_entries(&self, entries: Vec<Entry>) -> Result<()> {
        for entry in entries {
-            entry_count += 1;
            self.process_entry(entry)?;
        }
-        Ok(entry_count)
+        Ok(())
    }

    /// Append entry blocks to the ledger, verifying them along the way.
@ -461,13 +594,18 @@ impl Bank {
            .expect("invalid ledger: need at least 2 entries");
        {
            let tx = &entry1.transactions[0];
-            let deposit = if let Instruction::NewContract(contract) = &tx.instruction {
+            let instruction = tx.instruction();
+            let deposit = if let Instruction::NewContract(contract) = instruction {
                contract.plan.final_payment()
            } else {
                None
            }.expect("invalid ledger, needs to start with a contract");
-
-            self.apply_payment(&deposit, &mut self.balances.write().unwrap());
+            {
+                let mut accounts = self.accounts.write().unwrap();
+                let entry = accounts.entry(tx.keys[0]).or_insert_with(Account::default);
+                Self::apply_payment(&deposit, entry);
+                trace!("applied genesis payment {:?} {:?}", deposit, entry);
+            }
        }
        self.register_entry_id(&entry0.id);
        self.register_entry_id(&entry1.id);
@ -489,39 +627,40 @@ impl Bank {

    /// Process a Witness Signature. Any payment plans waiting on this signature
    /// will progress one step.
-    fn apply_signature(&self, from: Pubkey, signature: Signature) -> Result<()> {
-        if let Occupied(mut e) = self
-            .pending
-            .write()
-            .expect("write() in apply_signature")
-            .entry(signature)
-        {
+    fn apply_signature(from: Pubkey, signature: Signature, account: &mut [Account]) {
+        let mut pending: HashMap<Signature, Plan> =
+            deserialize(&account[1].userdata).unwrap_or(HashMap::new());
+        if let Occupied(mut e) = pending.entry(signature) {
            e.get_mut().apply_witness(&Witness::Signature, &from);
            if let Some(payment) = e.get().final_payment() {
-                self.apply_payment(&payment, &mut self.balances.write().unwrap());
+                //move the tokens back to the from account
+                account[0].tokens += payment.tokens;
+                account[1].tokens -= payment.tokens;
                e.remove_entry();
            }
        };
-
-        Ok(())
+        //TODO this allocation needs to be changed once the runtime only allows for explicitly
+        //allocated memory
+        account[1].userdata = if pending.is_empty() {
+            vec![]
+        } else {
+            serialize(&pending).unwrap()
+        };
    }

    /// Process a Witness Timestamp. Any payment plans waiting on this timestamp
    /// will progress one step.
-    fn apply_timestamp(&self, from: Pubkey, dt: DateTime<Utc>) -> Result<()> {
+    fn apply_timestamp(from: Pubkey, dt: DateTime<Utc>, account: &mut Account) {
+        let mut pending: HashMap<Signature, Plan> =
+            deserialize(&account.userdata).unwrap_or(HashMap::new());
        // Check to see if any timelocked transactions can be completed.
        let mut completed = vec![];

        // Hold 'pending' write lock until the end of this function. Otherwise another thread can
        // double-spend if it enters before the modified plan is removed from 'pending'.
-        let mut pending = self
-            .pending
-            .write()
-            .expect("'pending' write lock in apply_timestamp");
        for (key, plan) in pending.iter_mut() {
            plan.apply_witness(&Witness::Timestamp(dt), &from);
-            if let Some(payment) = plan.final_payment() {
-                self.apply_payment(&payment, &mut self.balances.write().unwrap());
+            if let Some(_payment) = plan.final_payment() {
                completed.push(key.clone());
            }
        }
@ -529,8 +668,13 @@ impl Bank {
        for key in completed {
            pending.remove(&key);
        }
-
-        Ok(())
+        //TODO this allocation needs to be changed once the runtime only allows for explicitly
+        //allocated memory
+        account.userdata = if pending.is_empty() {
+            vec![]
+        } else {
+            serialize(&pending).unwrap()
+        };
    }

    /// Create, sign, and process a Transaction from `keypair` to `to` of
@ -564,11 +708,17 @@ impl Bank {
    }

    pub fn get_balance(&self, pubkey: &Pubkey) -> i64 {
-        let bals = self
-            .balances
+        self.get_account(pubkey)
+            .map(|x| Self::get_balance_of_budget_payment_plan(&x))
+            .unwrap_or(0)
+    }
+
+    pub fn get_account(&self, pubkey: &Pubkey) -> Option<Account> {
+        let accounts = self
+            .accounts
            .read()
-            .expect("'balances' read lock in get_balance");
-        bals.get(pubkey).cloned().unwrap_or(0)
+            .expect("'accounts' read lock in get_balance");
+        accounts.get(pubkey).cloned()
    }

    pub fn transaction_count(&self) -> usize {
@ -588,6 +738,16 @@ impl Bank {
        false
    }

+    /// Hash the `accounts` HashMap. This represents a validator's interpretation
+    ///  of the ledger up to the `last_id`, to be sent back to the leader when voting.
+    pub fn hash_internal_state(&self) -> Hash {
+        let mut ordered_accounts = BTreeMap::new();
+        for (pubkey, account) in self.accounts.read().unwrap().iter() {
+            ordered_accounts.insert(*pubkey, account.clone());
+        }
+        hash(&serialize(&ordered_accounts).unwrap())
+    }
+
    pub fn finality(&self) -> usize {
        self.finality_time.load(Ordering::Relaxed)
    }
@ -607,11 +767,18 @@ mod tests {
    use hash::hash;
    use ledger;
    use packet::BLOB_DATA_SIZE;
-    use signature::KeypairUtil;
+    use signature::{GenKeys, KeypairUtil};
    use std;
    use std::io::{BufReader, Cursor, Seek, SeekFrom};
    use std::mem::size_of;

+    #[test]
+    fn test_bank_new() {
+        let mint = Mint::new(10_000);
+        let bank = Bank::new(&mint);
+        assert_eq!(bank.get_balance(&mint.pubkey()), 10_000);
+    }
+
    #[test]
    fn test_two_payments_to_one_party() {
        let mint = Mint::new(10_000);
@ -641,6 +808,23 @@ mod tests {
        assert_eq!(bank.transaction_count(), 0);
    }

+    // TODO: This test verifies potentially undesirable behavior
+    // See github issue 1157 (https://github.com/solana-labs/solana/issues/1157)
+    #[test]
+    fn test_detect_failed_duplicate_transactions_issue_1157() {
+        let mint = Mint::new(1);
+        let bank = Bank::new(&mint);
+
+        let tx = Transaction::new(&mint.keypair(), mint.keypair().pubkey(), -1, mint.last_id());
+        let signature = tx.signature;
+        assert!(!bank.has_signature(&signature));
+        assert_eq!(
+            bank.process_transaction(&tx),
+            Err(BankError::NegativeTokens)
+        );
+        assert!(bank.has_signature(&signature));
+    }
+
    #[test]
    fn test_account_not_found() {
        let mint = Mint::new(1);
@ -661,6 +845,7 @@ mod tests {
        bank.transfer(1_000, &mint.keypair(), pubkey, mint.last_id())
            .unwrap();
        assert_eq!(bank.transaction_count(), 1);
+        assert_eq!(bank.get_balance(&pubkey), 1_000);
        assert_eq!(
            bank.transfer(10_001, &mint.keypair(), pubkey, mint.last_id()),
            Err(BankError::InsufficientFunds(mint.pubkey()))
@ -684,39 +869,46 @@ mod tests {

    #[test]
    fn test_transfer_on_date() {
-        let mint = Mint::new(1);
+        let mint = Mint::new(2);
        let bank = Bank::new(&mint);
        let pubkey = Keypair::new().pubkey();
        let dt = Utc::now();
        bank.transfer_on_date(1, &mint.keypair(), pubkey, dt, mint.last_id())
            .unwrap();

-        // Mint's balance will be zero because all funds are locked up.
-        assert_eq!(bank.get_balance(&mint.pubkey()), 0);
+        // Mint's balance will be 1 because 1 of the tokens is locked up
+        assert_eq!(bank.get_balance(&mint.pubkey()), 1);

        // tx count is 1, because debits were applied.
        assert_eq!(bank.transaction_count(), 1);

-        // pubkey's balance will be None because the funds have not been
+        // pubkey's balance will be 0 because the funds have not been
        // sent.
        assert_eq!(bank.get_balance(&pubkey), 0);

        // Now, acknowledge the time in the condition occurred and
        // that pubkey's funds are now available.
-        bank.apply_timestamp(mint.pubkey(), dt).unwrap();
+        let tx = Transaction::new_timestamp(&mint.keypair(), pubkey, dt, bank.last_id());
+        let res = bank.process_transaction(&tx);
+        assert!(res.is_ok());
        assert_eq!(bank.get_balance(&pubkey), 1);

-        // tx count is still 1, because we chose not to count timestamp transactions
-        // tx count.
-        assert_eq!(bank.transaction_count(), 1);
+        // tx count is 2
+        assert_eq!(bank.transaction_count(), 2);

-        bank.apply_timestamp(mint.pubkey(), dt).unwrap(); // <-- Attack! Attempt to process completed transaction.
-        assert_ne!(bank.get_balance(&pubkey), 2);
+        // try to replay the timestamp contract
+        bank.register_entry_id(&hash(bank.last_id().as_ref()));
+        let tx = Transaction::new_timestamp(&mint.keypair(), pubkey, dt, bank.last_id());
+        let res = bank.process_transaction(&tx);
+        assert!(res.is_ok());
+
+        assert_eq!(bank.get_balance(&pubkey), 1);
    }

    #[test]
    fn test_cancel_transfer() {
-        let mint = Mint::new(1);
+        // mint needs to have a balance to modify the external contract
+        let mint = Mint::new(2);
        let bank = Bank::new(&mint);
        let pubkey = Keypair::new().pubkey();
        let dt = Utc::now();
@ -727,23 +919,31 @@ mod tests {
        // Assert the debit counts as a transaction.
        assert_eq!(bank.transaction_count(), 1);

-        // Mint's balance will be zero because all funds are locked up.
-        assert_eq!(bank.get_balance(&mint.pubkey()), 0);
-
-        // pubkey's balance will be None because the funds have not been
-        // sent.
-        assert_eq!(bank.get_balance(&pubkey), 0);
-
-        // Now, cancel the trancaction. Mint gets her funds back, pubkey never sees them.
-        bank.apply_signature(mint.pubkey(), signature).unwrap();
+        // Mint's balance will be 1 because 1 of the tokens is locked up.
        assert_eq!(bank.get_balance(&mint.pubkey()), 1);
+
+        // pubkey's balance will be 0 because the funds are locked up
        assert_eq!(bank.get_balance(&pubkey), 0);

-        // Assert cancel doesn't cause count to go backward.
-        assert_eq!(bank.transaction_count(), 1);
+        // Now, cancel the transaction. Mint gets her funds back, pubkey never sees them.
+        let tx = Transaction::new_signature(&mint.keypair(), pubkey, signature, bank.last_id());
+        let res = bank.process_transaction(&tx);
+        assert!(res.is_ok());
+        assert_eq!(bank.get_balance(&pubkey), 0);
+        assert_eq!(bank.get_balance(&mint.pubkey()), 2);

-        bank.apply_signature(mint.pubkey(), signature).unwrap(); // <-- Attack! Attempt to cancel completed transaction.
-        assert_ne!(bank.get_balance(&mint.pubkey()), 2);
+        // Assert cancel counts as a tx
+        assert_eq!(bank.transaction_count(), 2);
+
+        // try to replay the signature contract
+        bank.register_entry_id(&hash(bank.last_id().as_ref()));
+        let tx = Transaction::new_signature(&mint.keypair(), pubkey, signature, bank.last_id());
+        let res = bank.process_transaction(&tx); //<-- attack! try to get budget dsl to pay out with another signature
+        assert!(res.is_ok());
+        // balance is is still 2 for the mint
+        assert_eq!(bank.get_balance(&mint.pubkey()), 2);
+        // balance is is still 0 for the contract
+        assert_eq!(bank.get_balance(&pubkey), 0);
    }

    #[test]
@ -762,13 +962,13 @@ mod tests {
    }

    #[test]
-    fn test_forget_signature() {
+    fn test_clear_signatures() {
        let mint = Mint::new(1);
        let bank = Bank::new(&mint);
        let signature = Signature::default();
        bank.reserve_signature_with_last_id(&signature, &mint.last_id())
            .unwrap();
-        bank.forget_signature_with_last_id(&signature, &mint.last_id());
+        bank.clear_signatures();
        assert!(
            bank.reserve_signature_with_last_id(&signature, &mint.last_id())
                .is_ok()
@ -881,6 +1081,19 @@ mod tests {
        entries.into_iter()
    }

+    fn create_sample_block_with_next_entries_using_keypairs(
+        mint: &Mint,
+        keypairs: &[Keypair],
+    ) -> impl Iterator<Item = Entry> {
+        let hash = mint.last_id();
+        let transactions: Vec<_> = keypairs
+            .iter()
+            .map(|keypair| Transaction::new(&mint.keypair(), keypair.pubkey(), 1, hash))
+            .collect();
+        let entries = ledger::next_entries(&hash, 0, transactions);
+        entries.into_iter()
+    }
+
    fn create_sample_block(mint: &Mint, length: usize) -> impl Iterator<Item = Entry> {
        let mut entries = Vec::with_capacity(length);
        let mut hash = mint.last_id();
@ -910,6 +1123,15 @@ mod tests {
        (genesis.into_iter().chain(block), mint.pubkey())
    }

+    fn create_sample_ledger_with_mint_and_keypairs(
+        mint: &Mint,
+        keypairs: &[Keypair],
+    ) -> impl Iterator<Item = Entry> {
+        let genesis = mint.create_entries();
+        let block = create_sample_block_with_next_entries_using_keypairs(mint, keypairs);
+        genesis.into_iter().chain(block)
+    }
+
    #[test]
    fn test_process_ledger() {
        let (ledger, pubkey) = create_sample_ledger(1);
@ -997,11 +1219,38 @@ mod tests {
        assert!(!validator_bank.is_leader);
    }
    #[test]
+    fn test_hash_internal_state() {
+        let mint = Mint::new(2_000);
+        let seed = [0u8; 32];
+        let mut rnd = GenKeys::new(seed);
+        let keypairs = rnd.gen_n_keypairs(5);
+        let ledger0 = create_sample_ledger_with_mint_and_keypairs(&mint, &keypairs);
+        let ledger1 = create_sample_ledger_with_mint_and_keypairs(&mint, &keypairs);
+
+        let bank0 = Bank::default();
+        bank0.process_ledger(ledger0).unwrap();
+        let bank1 = Bank::default();
+        bank1.process_ledger(ledger1).unwrap();
+
+        let initial_state = bank0.hash_internal_state();
+
+        assert_eq!(bank1.hash_internal_state(), initial_state);
+
+        let pubkey = keypairs[0].pubkey();
+        bank0
+            .transfer(1_000, &mint.keypair(), pubkey, mint.last_id())
+            .unwrap();
+        assert_ne!(bank0.hash_internal_state(), initial_state);
+        bank1
+            .transfer(1_000, &mint.keypair(), pubkey, mint.last_id())
+            .unwrap();
+        assert_eq!(bank0.hash_internal_state(), bank1.hash_internal_state());
+    }
+    #[test]
    fn test_finality() {
        let def_bank = Bank::default();
        assert_eq!(def_bank.finality(), std::usize::MAX);
        def_bank.set_finality(90);
        assert_eq!(def_bank.finality(), 90);
    }
-
 }
--- a/src/banking_stage.rs
+++ b/src/banking_stage.rs
@ -114,7 +114,7 @@ impl BankingStage {
            signal_sender.send(Signal::Transactions(transactions))?;
            debug!("done process_transactions");

-            packet_recycler.recycle(msgs);
+            packet_recycler.recycle(msgs, "process_transactions");
        }
        let total_time_s = timing::duration_as_s(&proc_start.elapsed());
        let total_time_ms = timing::duration_as_ms(&proc_start.elapsed());
--- a/src/bin/bench-streamer.rs
+++ b/src/bin/bench-streamer.rs
@ -1,9 +1,13 @@
+extern crate clap;
 extern crate solana;

+use clap::{App, Arg};
+use solana::netutil::bind_to;
 use solana::packet::{Packet, PacketRecycler, BLOB_SIZE, PACKET_DATA_SIZE};
 use solana::result::Result;
 use solana::streamer::{receiver, PacketReceiver};
-use std::net::{SocketAddr, UdpSocket};
+use std::cmp::max;
+use std::net::{IpAddr, Ipv4Addr, SocketAddr, UdpSocket};
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::sync::mpsc::channel;
 use std::sync::Arc;
@ -49,28 +53,62 @@ fn sink(
        let timer = Duration::new(1, 0);
        if let Ok(msgs) = r.recv_timeout(timer) {
            rvs.fetch_add(msgs.read().unwrap().packets.len(), Ordering::Relaxed);
-            recycler.recycle(msgs);
+            recycler.recycle(msgs, "sink");
        }
    })
 }

 fn main() -> Result<()> {
-    let read = UdpSocket::bind("127.0.0.1:0")?;
-    read.set_read_timeout(Some(Duration::new(1, 0)))?;
+    let mut num_sockets = 1usize;
+
+    let matches = App::new("solana-bench-streamer")
+        .arg(
+            Arg::with_name("num-recv-sockets")
+                .long("num-recv-sockets")
+                .value_name("NUM")
+                .takes_value(true)
+                .help("Use NUM receive sockets"),
+        )
+        .get_matches();
+
+    if let Some(n) = matches.value_of("num-recv-sockets") {
+        num_sockets = max(num_sockets, n.to_string().parse().expect("integer"));
+    }
+
+    let mut port = 0;
+    let mut addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0);

-    let addr = read.local_addr()?;
    let exit = Arc::new(AtomicBool::new(false));
    let pack_recycler = PacketRecycler::default();

-    let (s_reader, r_reader) = channel();
-    let t_reader = receiver(read, exit.clone(), pack_recycler.clone(), s_reader);
+    let mut read_channels = Vec::new();
+    let mut read_threads = Vec::new();
+    for _ in 0..num_sockets {
+        let read = bind_to(port, false).unwrap();
+        read.set_read_timeout(Some(Duration::new(1, 0))).unwrap();
+
+        addr = read.local_addr().unwrap();
+        port = addr.port();
+
+        let (s_reader, r_reader) = channel();
+        read_channels.push(r_reader);
+        read_threads.push(receiver(
+            Arc::new(read),
+            exit.clone(),
+            pack_recycler.clone(),
+            s_reader,
+        ));
+    }
+
    let t_producer1 = producer(&addr, &pack_recycler, exit.clone());
    let t_producer2 = producer(&addr, &pack_recycler, exit.clone());
    let t_producer3 = producer(&addr, &pack_recycler, exit.clone());

    let rvs = Arc::new(AtomicUsize::new(0));
-    let t_sink = sink(pack_recycler.clone(), exit.clone(), rvs.clone(), r_reader);
-
+    let sink_threads: Vec<_> = read_channels
+        .into_iter()
+        .map(|r_reader| sink(pack_recycler.clone(), exit.clone(), rvs.clone(), r_reader))
+        .collect();
    let start = SystemTime::now();
    let start_val = rvs.load(Ordering::Relaxed);
    sleep(Duration::new(5, 0));
@ -81,10 +119,14 @@ fn main() -> Result<()> {
    let fcount = (end_val - start_val) as f64;
    println!("performance: {:?}", fcount / ftime);
    exit.store(true, Ordering::Relaxed);
-    t_reader.join()?;
+    for t_reader in read_threads {
+        t_reader.join()?;
+    }
    t_producer1.join()?;
    t_producer2.join()?;
    t_producer3.join()?;
-    t_sink.join()?;
+    for t_sink in sink_threads {
+        t_sink.join()?;
+    }
    Ok(())
 }
--- a/src/bin/bench-tps.rs
+++ b/src/bin/bench-tps.rs
@ -4,6 +4,7 @@ extern crate clap;
 extern crate influx_db_client;
 extern crate rayon;
 extern crate serde_json;
+#[macro_use]
 extern crate solana;

 use clap::{App, Arg};
@ -12,24 +13,22 @@ use rayon::prelude::*;
 use solana::client::mk_client;
 use solana::crdt::{Crdt, NodeInfo};
 use solana::drone::DRONE_PORT;
-use solana::fullnode::Config;
 use solana::hash::Hash;
 use solana::logger;
 use solana::metrics;
-use solana::nat::{get_public_ip_addr, udp_random_bind};
 use solana::ncp::Ncp;
+use solana::packet::BlobRecycler;
 use solana::service::Service;
 use solana::signature::{read_keypair, GenKeys, Keypair, KeypairUtil};
-use solana::thin_client::ThinClient;
+use solana::thin_client::{poll_gossip_for_leader, ThinClient};
 use solana::timing::{duration_as_ms, duration_as_s};
 use solana::transaction::Transaction;
 use solana::wallet::request_airdrop;
 use solana::window::default_window;
 use std::collections::VecDeque;
-use std::fs::File;
-use std::net::{IpAddr, Ipv4Addr, SocketAddr, UdpSocket};
+use std::net::SocketAddr;
 use std::process::exit;
-use std::sync::atomic::{AtomicBool, AtomicIsize, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicIsize, AtomicUsize, Ordering};
 use std::sync::{Arc, RwLock};
 use std::thread::sleep;
 use std::thread::Builder;
@ -143,7 +142,13 @@ fn send_barrier_transaction(barrier_client: &mut ThinClient, last_id: &mut Hash,
            );

            // Sanity check that the client balance is still 1
-            let balance = barrier_client.poll_get_balance(&id.pubkey()).unwrap_or(-1);
+            let balance = barrier_client
+                .poll_balance_with_timeout(
+                    &id.pubkey(),
+                    &Duration::from_millis(100),
+                    &Duration::from_secs(10),
+                )
+                .expect("Failed to get balance");
            if balance != 1 {
                panic!("Expected an account balance of 1 (balance: {}", balance);
            }
@ -228,6 +233,7 @@ fn do_tx_transfers(
    shared_txs: &Arc<RwLock<VecDeque<Vec<Transaction>>>>,
    leader: &NodeInfo,
    shared_tx_thread_count: &Arc<AtomicIsize>,
+    total_tx_sent_count: &Arc<AtomicUsize>,
 ) {
    let client = mk_client(&leader);
    loop {
@ -249,6 +255,7 @@ fn do_tx_transfers(
                client.transfer_signed(&tx).unwrap();
            }
            shared_tx_thread_count.fetch_add(-1, Ordering::Relaxed);
+            total_tx_sent_count.fetch_add(tx_len, Ordering::Relaxed);
            println!(
                "Tx send done. {} ms {} tps",
                duration_as_ms(&transfer_start.elapsed()),
@ -275,33 +282,48 @@ fn airdrop_tokens(client: &mut ThinClient, leader: &NodeInfo, id: &Keypair, tx_c
    let mut drone_addr = leader.contact_info.tpu;
    drone_addr.set_port(DRONE_PORT);

-    let starting_balance = client.poll_get_balance(&id.pubkey()).unwrap();
+    let starting_balance = client.poll_get_balance(&id.pubkey()).unwrap_or(0);
    metrics_submit_token_balance(starting_balance);
+    println!("starting balance {}", starting_balance);

    if starting_balance < tx_count {
        let airdrop_amount = tx_count - starting_balance;
        println!(
-            "Airdropping {:?} tokens from {}",
-            airdrop_amount, drone_addr
+            "Airdropping {:?} tokens from {} for {}",
+            airdrop_amount,
+            drone_addr,
+            id.pubkey(),
        );

-        let previous_balance = starting_balance;
-        request_airdrop(&drone_addr, &id.pubkey(), airdrop_amount as u64).unwrap();
+        if let Err(e) = request_airdrop(&drone_addr, &id.pubkey(), airdrop_amount as u64) {
+            panic!(
+                "Error requesting airdrop: {:?} to addr: {:?} amount: {}",
+                e, drone_addr, airdrop_amount
+            );
+        }

        // TODO: return airdrop Result from Drone instead of polling the
        //       network
-        let mut current_balance = previous_balance;
+        let mut current_balance = starting_balance;
        for _ in 0..20 {
            sleep(Duration::from_millis(500));
-            current_balance = client.poll_get_balance(&id.pubkey()).unwrap();
+            current_balance = client.poll_get_balance(&id.pubkey()).unwrap_or_else(|e| {
+                println!("airdrop error {}", e);
+                starting_balance
+            });
            if starting_balance != current_balance {
                break;
            }
-            println!(".");
+            println!("current balance {}...", current_balance);
        }
        metrics_submit_token_balance(current_balance);
        if current_balance - starting_balance != airdrop_amount {
-            println!("Airdrop failed!");
+            println!(
+                "Airdrop failed! {} {} {}",
+                id.pubkey(),
+                current_balance,
+                starting_balance
+            );
            exit(1);
        }
    }
@ -311,10 +333,11 @@ fn compute_and_report_stats(
    maxes: &Arc<RwLock<Vec<(SocketAddr, NodeStats)>>>,
    sample_period: u64,
    tx_send_elapsed: &Duration,
+    total_tx_send_count: usize,
 ) {
    // Compute/report stats
    let mut max_of_maxes = 0.0;
-    let mut total_txs = 0;
+    let mut max_tx_count = 0;
    let mut nodes_with_zero_tps = 0;
    let mut total_maxes = 0.0;
    println!(" Node address        |       Max TPS | Total Transactions");
@ -342,7 +365,9 @@ fn compute_and_report_stats(
        if stats.tps > max_of_maxes {
            max_of_maxes = stats.tps;
        }
-        total_txs += stats.tx;
+        if stats.tx > max_tx_count {
+            max_tx_count = stats.tx;
+        }
    }

    if total_maxes > 0.0 {
@ -355,166 +380,155 @@ fn compute_and_report_stats(
    }

    println!(
-        "\nHighest TPS: {:.2} sampling period {}s total transactions: {} clients: {}",
+        "\nHighest TPS: {:.2} sampling period {}s max transactions: {} clients: {} drop rate: {:.2}",
        max_of_maxes,
        sample_period,
-        total_txs,
-        maxes.read().unwrap().len()
+        max_tx_count,
+        maxes.read().unwrap().len(),
+        (total_tx_send_count as u64 - max_tx_count) as f64 / total_tx_send_count as f64,
    );
    println!(
        "\tAverage TPS: {}",
-        total_txs as f32 / duration_as_s(tx_send_elapsed)
+        max_tx_count as f32 / duration_as_s(tx_send_elapsed)
    );
 }

+// First transfer 3/4 of the tokens to the dest accounts
+// then ping-pong 1/4 of the tokens back to the other account
+// this leaves 1/4 token buffer in each account
+fn should_switch_directions(num_tokens_per_account: i64, i: i64) -> bool {
+    i % (num_tokens_per_account / 4) == 0 && (i >= (3 * num_tokens_per_account) / 4)
+}
+
 fn main() {
    logger::setup();
    metrics::set_panic_hook("bench-tps");
-    let mut threads = 4usize;
-    let mut num_nodes = 1usize;
-    let mut time_sec = 90;
-    let mut sustained = false;
-    let mut tx_count = 500_000;

    let matches = App::new("solana-bench-tps")
        .version(crate_version!())
        .arg(
-            Arg::with_name("leader")
-                .short("l")
-                .long("leader")
-                .value_name("PATH")
-                .takes_value(true)
-                .help("/path/to/leader.json"),
-        )
-        .arg(
-            Arg::with_name("keypair")
-                .short("k")
-                .long("keypair")
-                .value_name("PATH")
-                .takes_value(true)
-                .default_value("~/.config/solana/id.json")
-                .help("/path/to/id.json"),
-        )
-        .arg(
-            Arg::with_name("num_nodes")
+            Arg::with_name("network")
                .short("n")
-                .long("nodes")
-                .value_name("NUMBER")
+                .long("network")
+                .value_name("HOST:PORT")
                .takes_value(true)
-                .help("number of nodes to converge to"),
+                .help("rendezvous with the network at this gossip entry point, defaults to 127.0.0.1:8001"),
+        )
+        .arg(
+            Arg::with_name("identity")
+                .short("i")
+                .long("identity")
+                .value_name("PATH")
+                .takes_value(true)
+                .required(true)
+                .help("file containing a client identity (keypair)"),
+        )
+        .arg(
+            Arg::with_name("num-nodes")
+                .short("N")
+                .long("num-nodes")
+                .value_name("NUM")
+                .takes_value(true)
+                .help("wait for NUM nodes to converge"),
        )
        .arg(
            Arg::with_name("threads")
                .short("t")
                .long("threads")
-                .value_name("NUMBER")
+                .value_name("NUM")
                .takes_value(true)
                .help("number of threads"),
        )
        .arg(
-            Arg::with_name("seconds")
-                .short("s")
-                .long("sec")
-                .value_name("NUMBER")
+            Arg::with_name("duration")
+                .long("duration")
+                .value_name("SECS")
                .takes_value(true)
-                .help("send transactions for this many seconds"),
+                .help("run benchmark for SECS seconds then exit, default is forever"),
        )
        .arg(
-            Arg::with_name("converge_only")
-                .short("c")
+            Arg::with_name("converge-only")
+                .long("converge-only")
                .help("exit immediately after converging"),
        )
-        .arg(
-            Arg::with_name("addr")
-                .short("a")
-                .long("addr")
-                .value_name("IPADDR")
-                .takes_value(true)
-                .help("address to advertise to the network"),
-        )
        .arg(
            Arg::with_name("sustained")
                .long("sustained")
-                .help("Use sustained performance mode vs. peak mode. This overlaps the tx generation with transfers."),
+                .help("use sustained performance mode vs. peak mode. This overlaps the tx generation with transfers."),
        )
        .arg(
            Arg::with_name("tx_count")
                .long("tx_count")
-                .value_name("NUMBER")
+                .value_name("NUM")
                .takes_value(true)
-                .help("number of transactions to send in a single batch")
+                .help("number of transactions to send per batch")
        )
        .get_matches();

-    let leader: NodeInfo;
-    if let Some(l) = matches.value_of("leader") {
-        leader = read_leader(l).node_info;
-    } else {
-        let server_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 8000);
-        leader = NodeInfo::new_leader(&server_addr);
-    };
-
-    let id = read_keypair(matches.value_of("keypair").unwrap()).expect("client keypair");
-
-    if let Some(t) = matches.value_of("threads") {
-        threads = t.to_string().parse().expect("integer");
-    }
-
-    if let Some(n) = matches.value_of("num_nodes") {
-        num_nodes = n.to_string().parse().expect("integer");
-    }
-
-    if let Some(s) = matches.value_of("seconds") {
-        time_sec = s.to_string().parse().expect("integer");
-    }
-
-    let addr = if let Some(s) = matches.value_of("addr") {
-        s.to_string().parse().unwrap_or_else(|e| {
-            eprintln!("failed to parse {} as IP address error: {:?}", s, e);
-            exit(1);
+    let network = if let Some(addr) = matches.value_of("network") {
+        addr.parse().unwrap_or_else(|e| {
+            eprintln!("failed to parse network: {}", e);
+            exit(1)
        })
    } else {
-        get_public_ip_addr().unwrap_or_else(|e| {
-            eprintln!("failed to get public IP, try --addr? error: {:?}", e);
-            exit(1);
-        })
+        socketaddr!("127.0.0.1:8001")
    };

-    if let Some(s) = matches.value_of("tx_count") {
-        tx_count = s.to_string().parse().expect("integer");
-    }
+    let id =
+        read_keypair(matches.value_of("identity").unwrap()).expect("can't read client identity");

-    if matches.is_present("sustained") {
-        sustained = true;
-    }
+    let threads = if let Some(t) = matches.value_of("threads") {
+        t.to_string().parse().expect("can't parse threads")
+    } else {
+        4usize
+    };
+
+    let num_nodes = if let Some(n) = matches.value_of("num-nodes") {
+        n.to_string().parse().expect("can't parse num-nodes")
+    } else {
+        1usize
+    };
+
+    let duration = if let Some(s) = matches.value_of("duration") {
+        Duration::new(s.to_string().parse().expect("can't parse duration"), 0)
+    } else {
+        Duration::new(std::u64::MAX, 0)
+    };
+
+    let tx_count = if let Some(s) = matches.value_of("tx_count") {
+        s.to_string().parse().expect("can't parse tx_count")
+    } else {
+        500_000
+    };
+
+    let sustained = matches.is_present("sustained");
+
+    println!("Looking for leader at {:?}", network);
+    let leader = poll_gossip_for_leader(network, None).expect("unable to find leader on network");

    let exit_signal = Arc::new(AtomicBool::new(false));
    let mut c_threads = vec![];
-    let validators = converge(&leader, &exit_signal, num_nodes, &mut c_threads, addr);
+    let (nodes, leader) = converge(&leader, &exit_signal, num_nodes, &mut c_threads);

-    println!(" Node address         | Node identifier");
-    println!("----------------------+------------------");
-    for node in &validators {
-        println!(
-            " {:20} | {:16x}",
-            node.contact_info.tpu.to_string(),
-            node.debug_id()
-        );
-    }
-    println!("Nodes: {}", validators.len());
-
-    if validators.len() < num_nodes {
+    if nodes.len() < num_nodes {
        println!(
            "Error: Insufficient nodes discovered.  Expecting {} or more",
            num_nodes
        );
        exit(1);
    }
+    if leader.is_none() {
+        println!("no leader");
+        exit(1);
+    }

-    if matches.is_present("converge_only") {
+    if matches.is_present("converge-only") {
        return;
    }

+    let leader = leader.unwrap();
+
+    println!("leader is at {} {}", leader.contact_info.rpu, leader.id);
    let mut client = mk_client(&leader);
    let mut barrier_client = mk_client(&leader);

@ -527,7 +541,20 @@ fn main() {
    let barrier_id = rnd.gen_n_keypairs(1).pop().unwrap();

    println!("Get tokens...");
-    airdrop_tokens(&mut client, &leader, &id, tx_count);
+    let num_tokens_per_account = 20;
+
+    // Sample the first keypair, see if it has tokens, if so then resume
+    // to avoid token loss
+    let keypair0_balance = client.poll_get_balance(&keypairs[0].pubkey()).unwrap_or(0);
+
+    if num_tokens_per_account > keypair0_balance {
+        airdrop_tokens(
+            &mut client,
+            &leader,
+            &id,
+            (num_tokens_per_account - keypair0_balance) * tx_count,
+        );
+    }
    airdrop_tokens(&mut barrier_client, &leader, &barrier_id, 1);

    println!("Get last ID...");
@ -542,7 +569,7 @@ fn main() {
    let maxes = Arc::new(RwLock::new(Vec::new()));
    let sample_period = 1; // in seconds
    println!("Sampling TPS every {} second...", sample_period);
-    let v_threads: Vec<_> = validators
+    let v_threads: Vec<_> = nodes
        .into_iter()
        .map(|v| {
            let exit_signal = exit_signal.clone();
@ -560,6 +587,7 @@ fn main() {
        Arc::new(RwLock::new(VecDeque::new()));

    let shared_tx_active_thread_count = Arc::new(AtomicIsize::new(0));
+    let total_tx_sent_count = Arc::new(AtomicUsize::new(0));

    let s_threads: Vec<_> = (0..threads)
        .map(|_| {
@ -567,6 +595,7 @@ fn main() {
            let shared_txs = shared_txs.clone();
            let leader = leader.clone();
            let shared_tx_active_thread_count = shared_tx_active_thread_count.clone();
+            let total_tx_sent_count = total_tx_sent_count.clone();
            Builder::new()
                .name("solana-client-sender".to_string())
                .spawn(move || {
@ -575,6 +604,7 @@ fn main() {
                        &shared_txs,
                        &leader,
                        &shared_tx_active_thread_count,
+                        &total_tx_sent_count,
                    );
                })
                .unwrap()
@ -582,10 +612,10 @@ fn main() {
        .collect();

    // generate and send transactions for the specified duration
-    let time = Duration::new(time_sec, 0);
-    let now = Instant::now();
+    let start = Instant::now();
    let mut reclaim_tokens_back_to_source_account = false;
-    while now.elapsed() < time || reclaim_tokens_back_to_source_account {
+    let mut i = keypair0_balance;
+    while start.elapsed() < duration {
        let balance = client.poll_get_balance(&id.pubkey()).unwrap_or(-1);
        metrics_submit_token_balance(balance);

@ -600,8 +630,6 @@ fn main() {
            threads,
            reclaim_tokens_back_to_source_account,
        );
-        reclaim_tokens_back_to_source_account = !reclaim_tokens_back_to_source_account;
-
        // In sustained mode overlap the transfers with generation
        // this has higher average performance but lower peak performance
        // in tested environments.
@ -614,6 +642,11 @@ fn main() {
        // transactions sent by `generate_txs()` so instead send and confirm a single transaction
        // to validate the network is still functional.
        send_barrier_transaction(&mut barrier_client, &mut last_id, &barrier_id);
+
+        i += 1;
+        if should_switch_directions(num_tokens_per_account, i) {
+            reclaim_tokens_back_to_source_account = !reclaim_tokens_back_to_source_account;
+        }
    }

    // Stop the sampling threads so it will collect the stats
@ -637,7 +670,12 @@ fn main() {
    let balance = client.poll_get_balance(&id.pubkey()).unwrap_or(-1);
    metrics_submit_token_balance(balance);

-    compute_and_report_stats(&maxes, sample_period, &now.elapsed());
+    compute_and_report_stats(
+        &maxes,
+        sample_period,
+        &start.elapsed(),
+        total_tx_sent_count.load(Ordering::Relaxed),
+    );

    // join the crdt client threads
    for t in c_threads {
@ -645,71 +683,78 @@ fn main() {
    }
 }

-fn spy_node(addr: IpAddr) -> (NodeInfo, UdpSocket) {
-    let gossip_socket = udp_random_bind(8000, 10000, 5).unwrap();
-
-    let gossip_addr = SocketAddr::new(addr, gossip_socket.local_addr().unwrap().port());
-
-    let pubkey = Keypair::new().pubkey();
-    let daddr = "0.0.0.0:0".parse().unwrap();
-    assert!(!gossip_addr.ip().is_unspecified());
-    assert!(!gossip_addr.ip().is_multicast());
-    let node = NodeInfo::new(pubkey, gossip_addr, daddr, daddr, daddr, daddr);
-    (node, gossip_socket)
-}
-
 fn converge(
    leader: &NodeInfo,
    exit_signal: &Arc<AtomicBool>,
    num_nodes: usize,
    threads: &mut Vec<JoinHandle<()>>,
-    addr: IpAddr,
-) -> Vec<NodeInfo> {
+) -> (Vec<NodeInfo>, Option<NodeInfo>) {
    //lets spy on the network
-    let (spy, spy_gossip) = spy_node(addr);
-    let mut spy_crdt = Crdt::new(spy).expect("Crdt::new");
+    let (node, gossip_socket) = Crdt::spy_node();
+    let mut spy_crdt = Crdt::new(node).expect("Crdt::new");
    spy_crdt.insert(&leader);
    spy_crdt.set_leader(leader.id);
    let spy_ref = Arc::new(RwLock::new(spy_crdt));
-    let window = default_window();
-    let gossip_send_socket = udp_random_bind(8000, 10000, 5).unwrap();
+    let window = Arc::new(RwLock::new(default_window()));
    let ncp = Ncp::new(
        &spy_ref,
-        window.clone(),
+        window,
+        BlobRecycler::default(),
        None,
-        spy_gossip,
-        gossip_send_socket,
+        gossip_socket,
        exit_signal.clone(),
-    ).expect("DataReplicator::new");
+    );
    let mut v: Vec<NodeInfo> = vec![];
-    //wait for the network to converge, 30 seconds should be plenty
+    // wait for the network to converge, 30 seconds should be plenty
    for _ in 0..30 {
-        v = spy_ref
-            .read()
-            .unwrap()
-            .table
-            .values()
-            .into_iter()
-            .filter(|x| Crdt::is_valid_address(x.contact_info.rpu))
-            .cloned()
-            .collect();
-        if v.len() >= num_nodes {
-            println!("CONVERGED!");
-            break;
-        } else {
-            println!(
-                "{} node(s) discovered (looking for {} or more)",
-                v.len(),
-                num_nodes
-            );
+        {
+            let spy_ref = spy_ref.read().unwrap();
+
+            println!("{}", spy_ref.node_info_trace());
+
+            if spy_ref.leader_data().is_some() {
+                v = spy_ref
+                    .table
+                    .values()
+                    .filter(|x| Crdt::is_valid_address(&x.contact_info.rpu))
+                    .cloned()
+                    .collect();
+
+                if v.len() >= num_nodes {
+                    println!("CONVERGED!");
+                    break;
+                } else {
+                    println!(
+                        "{} node(s) discovered (looking for {} or more)",
+                        v.len(),
+                        num_nodes
+                    );
+                }
+            }
        }
        sleep(Duration::new(1, 0));
    }
    threads.extend(ncp.thread_hdls().into_iter());
-    v
+    let leader = spy_ref.read().unwrap().leader_data().cloned();
+    (v, leader)
 }

-fn read_leader(path: &str) -> Config {
-    let file = File::open(path).unwrap_or_else(|_| panic!("file not found: {}", path));
-    serde_json::from_reader(file).unwrap_or_else(|_| panic!("failed to parse {}", path))
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_switch_directions() {
+        assert_eq!(should_switch_directions(20, 0), false);
+        assert_eq!(should_switch_directions(20, 1), false);
+        assert_eq!(should_switch_directions(20, 14), false);
+        assert_eq!(should_switch_directions(20, 15), true);
+        assert_eq!(should_switch_directions(20, 16), false);
+        assert_eq!(should_switch_directions(20, 19), false);
+        assert_eq!(should_switch_directions(20, 20), true);
+        assert_eq!(should_switch_directions(20, 21), false);
+        assert_eq!(should_switch_directions(20, 99), false);
+        assert_eq!(should_switch_directions(20, 100), true);
+        assert_eq!(should_switch_directions(20, 101), false);
+    }
 }
--- a/src/bin/drone.rs
+++ b/src/bin/drone.rs
@ -1,40 +1,53 @@
 extern crate bincode;
+extern crate bytes;
 #[macro_use]
 extern crate clap;
+extern crate log;
 extern crate serde_json;
 extern crate solana;
 extern crate tokio;
 extern crate tokio_codec;
-extern crate tokio_io;

-use bincode::deserialize;
+use bincode::{deserialize, serialize};
+use bytes::Bytes;
 use clap::{App, Arg};
-use solana::crdt::NodeInfo;
 use solana::drone::{Drone, DroneRequest, DRONE_PORT};
-use solana::fullnode::Config;
 use solana::logger;
 use solana::metrics::set_panic_hook;
 use solana::signature::read_keypair;
-use std::fs::File;
-use std::net::{IpAddr, Ipv4Addr, SocketAddr};
+use std::error;
+use std::io;
+use std::net::{Ipv4Addr, SocketAddr};
+use std::process::exit;
 use std::sync::{Arc, Mutex};
 use std::thread;
 use tokio::net::TcpListener;
 use tokio::prelude::*;
 use tokio_codec::{BytesCodec, Decoder};

-fn main() {
+macro_rules! socketaddr {
+    ($ip:expr, $port:expr) => {
+        SocketAddr::from((Ipv4Addr::from($ip), $port))
+    };
+    ($str:expr) => {{
+        let a: SocketAddr = $str.parse().unwrap();
+        a
+    }};
+}
+
+fn main() -> Result<(), Box<error::Error>> {
    logger::setup();
    set_panic_hook("drone");
    let matches = App::new("drone")
        .version(crate_version!())
        .arg(
-            Arg::with_name("leader")
-                .short("l")
-                .long("leader")
-                .value_name("PATH")
+            Arg::with_name("network")
+                .short("n")
+                .long("network")
+                .value_name("HOST:PORT")
                .takes_value(true)
-                .help("/path/to/leader.json"),
+                .required(true)
+                .help("rendezvous with the network at this gossip entry point"),
        )
        .arg(
            Arg::with_name("keypair")
@ -43,57 +56,55 @@ fn main() {
                .value_name("PATH")
                .takes_value(true)
                .required(true)
-                .help("/path/to/mint.json"),
+                .help("File to read the client's keypair from"),
        )
        .arg(
-            Arg::with_name("time")
-                .short("t")
-                .long("time")
+            Arg::with_name("slice")
+                .long("slice")
                .value_name("SECONDS")
                .takes_value(true)
-                .help("time slice over which to limit requests to drone"),
+                .help("Time slice over which to limit requests to drone"),
        )
        .arg(
            Arg::with_name("cap")
-                .short("c")
                .long("cap")
                .value_name("NUMBER")
                .takes_value(true)
-                .help("request limit for time slice"),
+                .help("Request limit for time slice"),
        )
        .get_matches();

-    let leader: NodeInfo;
-    if let Some(l) = matches.value_of("leader") {
-        leader = read_leader(l).node_info;
-    } else {
-        let server_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 8000);
-        leader = NodeInfo::new_leader(&server_addr);
-    };
+    let network = matches
+        .value_of("network")
+        .unwrap()
+        .parse()
+        .unwrap_or_else(|e| {
+            eprintln!("failed to parse network: {}", e);
+            exit(1)
+        });

    let mint_keypair =
-        read_keypair(matches.value_of("keypair").expect("keypair")).expect("client keypair");
+        read_keypair(matches.value_of("keypair").unwrap()).expect("failed to read client keypair");

    let time_slice: Option<u64>;
-    if let Some(t) = matches.value_of("time") {
-        time_slice = Some(t.to_string().parse().expect("integer"));
+    if let Some(secs) = matches.value_of("slice") {
+        time_slice = Some(secs.to_string().parse().expect("failed to parse slice"));
    } else {
        time_slice = None;
    }
    let request_cap: Option<u64>;
    if let Some(c) = matches.value_of("cap") {
-        request_cap = Some(c.to_string().parse().expect("integer"));
+        request_cap = Some(c.to_string().parse().expect("failed to parse cap"));
    } else {
        request_cap = None;
    }

-    let drone_addr: SocketAddr = format!("0.0.0.0:{}", DRONE_PORT).parse().unwrap();
+    let drone_addr = socketaddr!(0, DRONE_PORT);

    let drone = Arc::new(Mutex::new(Drone::new(
        mint_keypair,
        drone_addr,
-        leader.contact_info.tpu,
-        leader.contact_info.rpu,
+        network,
        time_slice,
        request_cap,
    )));
@ -114,36 +125,44 @@ fn main() {
            let drone2 = drone.clone();
            // let client_ip = socket.peer_addr().expect("drone peer_addr").ip();
            let framed = BytesCodec::new().framed(socket);
-            let (_writer, reader) = framed.split();
+            let (writer, reader) = framed.split();

-            let processor = reader
-                .for_each(move |bytes| {
-                    let req: DroneRequest = deserialize(&bytes).or_else(|err| {
-                        use std::io;
-                        Err(io::Error::new(
-                            io::ErrorKind::Other,
-                            format!("deserialize packet in drone: {:?}", err),
-                        ))
-                    })?;
+            let processor = reader.and_then(move |bytes| {
+                let req: DroneRequest = deserialize(&bytes).or_else(|err| {
+                    Err(io::Error::new(
+                        io::ErrorKind::Other,
+                        format!("deserialize packet in drone: {:?}", err),
+                    ))
+                })?;

-                    println!("Airdrop requested...");
-                    // let res = drone2.lock().unwrap().check_rate_limit(client_ip);
-                    let res1 = drone2.lock().unwrap().send_airdrop(req);
-                    match res1 {
-                        Ok(_) => println!("Airdrop sent!"),
-                        Err(_) => println!("Request limit reached for this time slice"),
-                    }
-                    Ok(())
-                })
-                .then(|result| {
-                    println!("Socket closed with result: {:?}", result);
-                    Ok(())
-                });
-            tokio::spawn(processor)
+                println!("Airdrop requested...");
+                // let res = drone2.lock().unwrap().check_rate_limit(client_ip);
+                let res1 = drone2.lock().unwrap().send_airdrop(req);
+                match res1 {
+                    Ok(_) => println!("Airdrop sent!"),
+                    Err(_) => println!("Request limit reached for this time slice"),
+                }
+                let response = res1?;
+                println!("Airdrop tx signature: {:?}", response);
+                let response_vec = serialize(&response).or_else(|err| {
+                    Err(io::Error::new(
+                        io::ErrorKind::Other,
+                        format!("serialize signature in drone: {:?}", err),
+                    ))
+                })?;
+                let response_bytes = Bytes::from(response_vec.clone());
+                Ok(response_bytes)
+            });
+            let server = writer
+                .send_all(processor.or_else(|err| {
+                    Err(io::Error::new(
+                        io::ErrorKind::Other,
+                        format!("Drone response: {:?}", err),
+                    ))
+                }))
+                .then(|_| Ok(()));
+            tokio::spawn(server)
        });
    tokio::run(done);
-}
-fn read_leader(path: &str) -> Config {
-    let file = File::open(path).unwrap_or_else(|_| panic!("file not found: {}", path));
-    serde_json::from_reader(file).unwrap_or_else(|_| panic!("failed to parse {}", path))
+    Ok(())
 }
--- a/src/bin/fullnode-config.rs
+++ b/src/bin/fullnode-config.rs
@ -5,9 +5,9 @@ extern crate serde_json;
 extern crate solana;

 use clap::{App, Arg};
-use solana::crdt::{get_ip_addr, parse_port_or_addr};
+use solana::crdt::FULLNODE_PORT_RANGE;
 use solana::fullnode::Config;
-use solana::nat::get_public_ip_addr;
+use solana::netutil::{get_ip_addr, get_public_ip_addr, parse_port_or_addr};
 use solana::signature::read_pkcs8;
 use std::io;
 use std::net::SocketAddr;
@ -48,13 +48,7 @@ fn main() {
        .get_matches();

    let bind_addr: SocketAddr = {
-        let mut bind_addr = parse_port_or_addr({
-            if let Some(b) = matches.value_of("bind") {
-                Some(b.to_string())
-            } else {
-                None
-            }
-        });
+        let mut bind_addr = parse_port_or_addr(matches.value_of("bind"), FULLNODE_PORT_RANGE.0);
        if matches.is_present("local") {
            let ip = get_ip_addr().unwrap();
            bind_addr.set_ip(ip);
--- a/src/bin/fullnode.rs
+++ b/src/bin/fullnode.rs
@ -1,23 +1,28 @@
 #[macro_use]
 extern crate clap;
 extern crate getopts;
+#[macro_use]
 extern crate log;
 extern crate serde_json;
+#[macro_use]
 extern crate solana;

 use clap::{App, Arg};
 use solana::client::mk_client;
-use solana::crdt::{NodeInfo, TestNode};
+use solana::crdt::Node;
 use solana::drone::DRONE_PORT;
 use solana::fullnode::{Config, Fullnode};
 use solana::logger;
 use solana::metrics::set_panic_hook;
 use solana::service::Service;
 use solana::signature::{Keypair, KeypairUtil};
+use solana::thin_client::poll_gossip_for_leader;
 use solana::wallet::request_airdrop;
 use std::fs::File;
-use std::net::{IpAddr, Ipv4Addr, SocketAddr};
+use std::net::{Ipv4Addr, SocketAddr};
 use std::process::exit;
+use std::thread::sleep;
+use std::time::Duration;

 fn main() -> () {
    logger::setup();
@ -33,12 +38,12 @@ fn main() -> () {
                .help("run with the identity found in FILE"),
        )
        .arg(
-            Arg::with_name("testnet")
-                .short("t")
-                .long("testnet")
+            Arg::with_name("network")
+                .short("n")
+                .long("network")
                .value_name("HOST:PORT")
                .takes_value(true)
-                .help("connect to the network at this gossip entry point"),
+                .help("connect/rendezvous with the network at this gossip entry point"),
        )
        .arg(
            Arg::with_name("ledger")
@ -51,16 +56,12 @@ fn main() -> () {
        )
        .get_matches();

-    let bind_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 8000);
-    let mut keypair = Keypair::new();
-    let mut repl_data = NodeInfo::new_leader_with_pubkey(keypair.pubkey(), &bind_addr);
-    if let Some(i) = matches.value_of("identity") {
+    let (keypair, ncp) = if let Some(i) = matches.value_of("identity") {
        let path = i.to_string();
        if let Ok(file) = File::open(path.clone()) {
            let parse: serde_json::Result<Config> = serde_json::from_reader(file);
            if let Ok(data) = parse {
-                keypair = data.keypair();
-                repl_data = data.node_info;
+                (data.keypair(), data.node_info.contact_info.ncp)
            } else {
                eprintln!("failed to parse {}", path);
                exit(1);
@ -69,50 +70,62 @@ fn main() -> () {
            eprintln!("failed to read {}", path);
            exit(1);
        }
-    }
-
-    let leader_pubkey = keypair.pubkey();
-    let repl_clone = repl_data.clone();
+    } else {
+        (Keypair::new(), socketaddr!(0, 8000))
+    };

    let ledger_path = matches.value_of("ledger").unwrap();

-    let mut node = TestNode::new_with_bind_addr(repl_data, bind_addr);
-    let mut drone_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), DRONE_PORT);
-    let fullnode = if let Some(t) = matches.value_of("testnet") {
-        let testnet_address_string = t.to_string();
-        let testnet_addr: SocketAddr = testnet_address_string.parse().unwrap();
-        drone_addr.set_ip(testnet_addr.ip());
+    // socketaddr that is initial pointer into the network's gossip (ncp)
+    let network = matches
+        .value_of("network")
+        .map(|network| network.parse().expect("failed to parse network address"));

-        Fullnode::new(node, false, ledger_path, keypair, Some(testnet_addr))
-    } else {
-        node.data.leader_id = node.data.id;
+    let node = Node::new_with_external_ip(keypair.pubkey(), &ncp);

-        Fullnode::new(node, true, ledger_path, keypair, None)
+    // save off some stuff for airdrop
+    let node_info = node.info.clone();
+    let pubkey = keypair.pubkey();
+
+    let fullnode = Fullnode::new(node, ledger_path, keypair, network, false);
+
+    // airdrop stuff, probably goes away at some point
+    let leader = match network {
+        Some(network) => {
+            poll_gossip_for_leader(network, None).expect("can't find leader on network")
+        }
+        None => node_info,
    };

-    let mut client = mk_client(&repl_clone);
-    let previous_balance = client.poll_get_balance(&leader_pubkey).unwrap();
-    eprintln!("balance is {}", previous_balance);
+    let mut client = mk_client(&leader);

-    if previous_balance == 0 {
-        eprintln!("requesting airdrop from {}", drone_addr);
-        request_airdrop(&drone_addr, &leader_pubkey, 50).unwrap_or_else(|_| {
-            panic!(
-                "Airdrop failed, is the drone address correct {:?} drone running?",
+    // TODO: maybe have the drone put itself in gossip somewhere instead of hardcoding?
+    let drone_addr = match network {
+        Some(network) => SocketAddr::new(network.ip(), DRONE_PORT),
+        None => SocketAddr::new(ncp.ip(), DRONE_PORT),
+    };
+
+    loop {
+        let balance = client.poll_get_balance(&pubkey).unwrap_or(0);
+        info!("balance is {}", balance);
+
+        if balance >= 50 {
+            info!("good to go!");
+            break;
+        }
+
+        info!("requesting airdrop from {}", drone_addr);
+        loop {
+            if request_airdrop(&drone_addr, &pubkey, 50).is_ok() {
+                break;
+            }
+            info!(
+                "airdrop request, is the drone address correct {:?}, drone running?",
                drone_addr
-            )
-        });
-
-        // Try multiple times to confirm a non-zero balance.  |poll_get_balance| currently times
-        // out after 1 second, and sometimes this is not enough time while the network is
-        // booting
-        let balance_ok = (0..30).any(|i| {
-            let balance = client.poll_get_balance(&leader_pubkey).unwrap();
-            eprintln!("new balance is {} (attempt #{})", balance, i);
-            balance > 0
-        });
-        assert!(balance_ok, "0 balance, airdrop failed?");
+            );
+            sleep(Duration::from_secs(2));
+        }
    }

-    fullnode.join().expect("join");
+    fullnode.join().expect("to never happen");
 }
--- a/src/bin/wallet.rs
+++ b/src/bin/wallet.rs
@ -5,6 +5,7 @@ extern crate bs58;
 extern crate clap;
 extern crate dirs;
 extern crate serde_json;
+#[macro_use]
 extern crate solana;

 use clap::{App, Arg, SubCommand};
@ -14,7 +15,7 @@ use solana::drone::DRONE_PORT;
 use solana::fullnode::Config;
 use solana::logger;
 use solana::signature::{read_keypair, Keypair, KeypairUtil, Pubkey, Signature};
-use solana::thin_client::ThinClient;
+use solana::thin_client::{poll_gossip_for_leader, ThinClient};
 use solana::wallet::request_airdrop;
 use std::error;
 use std::fmt;
@ -63,9 +64,9 @@ struct WalletConfig {

 impl Default for WalletConfig {
    fn default() -> WalletConfig {
-        let default_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 8000);
+        let default_addr = socketaddr!(0, 8000);
        WalletConfig {
-            leader: NodeInfo::new_leader(&default_addr),
+            leader: NodeInfo::new_with_socketaddr(&default_addr),
            id: Keypair::new(),
            drone_addr: default_addr,
            command: WalletCommand::Balance,
@ -92,12 +93,18 @@ fn parse_args() -> Result<WalletConfig, Box<error::Error>> {
                .takes_value(true)
                .help("/path/to/id.json"),
        )
+        .arg(
+            Arg::with_name("timeout")
+                .long("timeout")
+                .value_name("SECONDS")
+                .takes_value(true)
+                .help("Max SECONDS to wait to get necessary gossip from the network"),
+        )
        .subcommand(
            SubCommand::with_name("airdrop")
                .about("Request a batch of tokens")
                .arg(
                    Arg::with_name("tokens")
-                        // .index(1)
                        .long("tokens")
                        .value_name("NUMBER")
                        .takes_value(true)
@ -110,16 +117,14 @@ fn parse_args() -> Result<WalletConfig, Box<error::Error>> {
                .about("Send a payment")
                .arg(
                    Arg::with_name("tokens")
-                        // .index(2)
                        .long("tokens")
                        .value_name("NUMBER")
                        .takes_value(true)
                        .required(true)
-                        .help("the number of tokens to send"),
+                        .help("The number of tokens to send"),
                )
                .arg(
                    Arg::with_name("to")
-                        // .index(1)
                        .long("to")
                        .value_name("PUBKEY")
                        .takes_value(true)
@ -146,8 +151,14 @@ fn parse_args() -> Result<WalletConfig, Box<error::Error>> {
        leader = read_leader(l)?.node_info;
    } else {
        let server_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 8000);
-        leader = NodeInfo::new_leader(&server_addr);
+        leader = NodeInfo::new_with_socketaddr(&server_addr);
    };
+    let timeout: Option<u64>;
+    if let Some(secs) = matches.value_of("timeout") {
+        timeout = Some(secs.to_string().parse().expect("integer"));
+    } else {
+        timeout = None;
+    }

    let mut path = dirs::home_dir().expect("home directory");
    let id_path = if matches.is_present("keypair") {
@ -163,6 +174,8 @@ fn parse_args() -> Result<WalletConfig, Box<error::Error>> {
        )))
    })?;

+    let leader = poll_gossip_for_leader(leader.contact_info.ncp, timeout)?;
+
    let mut drone_addr = leader.contact_info.tpu;
    drone_addr.set_port(DRONE_PORT);

@ -243,7 +256,6 @@ fn process_command(
                }
                Err(error) => {
                    println!("An error occurred: {:?}", error);
-                    Err(error)?;
                }
            }
        }
@ -254,7 +266,7 @@ fn process_command(
                "Requesting airdrop of {:?} tokens from {}",
                tokens, config.drone_addr
            );
-            let previous_balance = client.poll_get_balance(&config.id.pubkey())?;
+            let previous_balance = client.poll_get_balance(&config.id.pubkey()).unwrap_or(0);
            request_airdrop(&config.drone_addr, &config.id.pubkey(), tokens as u64)?;

            // TODO: return airdrop Result from Drone instead of polling the
@ -262,7 +274,10 @@ fn process_command(
            let mut current_balance = previous_balance;
            for _ in 0..20 {
                sleep(Duration::from_millis(500));
-                current_balance = client.poll_get_balance(&config.id.pubkey())?;
+                current_balance = client
+                    .poll_get_balance(&config.id.pubkey())
+                    .unwrap_or(previous_balance);
+
                if previous_balance != current_balance {
                    break;
                }
--- a/src/blob_fetch_stage.rs
+++ b/src/blob_fetch_stage.rs
@ -16,31 +16,26 @@ pub struct BlobFetchStage {

 impl BlobFetchStage {
    pub fn new(
-        socket: UdpSocket,
+        socket: Arc<UdpSocket>,
        exit: Arc<AtomicBool>,
-        blob_recycler: &BlobRecycler,
+        recycler: &BlobRecycler,
    ) -> (Self, BlobReceiver) {
-        Self::new_multi_socket(vec![socket], exit, blob_recycler)
+        Self::new_multi_socket(vec![socket], exit, recycler)
    }
    pub fn new_multi_socket(
-        sockets: Vec<UdpSocket>,
+        sockets: Vec<Arc<UdpSocket>>,
        exit: Arc<AtomicBool>,
-        blob_recycler: &BlobRecycler,
+        recycler: &BlobRecycler,
    ) -> (Self, BlobReceiver) {
-        let (blob_sender, blob_receiver) = channel();
+        let (sender, receiver) = channel();
        let thread_hdls: Vec<_> = sockets
            .into_iter()
            .map(|socket| {
-                streamer::blob_receiver(
-                    exit.clone(),
-                    blob_recycler.clone(),
-                    socket,
-                    blob_sender.clone(),
-                ).expect("blob receiver init")
+                streamer::blob_receiver(socket, exit.clone(), recycler.clone(), sender.clone())
            })
            .collect();

-        (BlobFetchStage { exit, thread_hdls }, blob_receiver)
+        (BlobFetchStage { exit, thread_hdls }, receiver)
    }

    pub fn close(&self) {
--- a/src/broadcast_stage.rs
+++ b/src/broadcast_stage.rs
@ -16,7 +16,7 @@ use std::sync::{Arc, RwLock};
 use std::thread::{self, Builder, JoinHandle};
 use std::time::Duration;
 use streamer::BlobReceiver;
-use window::{self, SharedWindow, WindowIndex, WINDOW_SIZE};
+use window::{self, SharedWindow, WindowIndex, WindowUtil, WINDOW_SIZE};

 fn broadcast(
    node_info: &NodeInfo,
@ -28,7 +28,7 @@ fn broadcast(
    transmit_index: &mut WindowIndex,
    receive_index: &mut u64,
 ) -> Result<()> {
-    let debug_id = node_info.debug_id();
+    let id = node_info.id;
    let timer = Duration::new(1, 0);
    let mut dq = receiver.recv_timeout(timer)?;
    while let Ok(mut nq) = receiver.try_recv() {
@ -42,13 +42,11 @@ fn broadcast(
    // break them up into window-sized chunks to process
    let blobs_chunked = blobs_vec.chunks(WINDOW_SIZE as usize).map(|x| x.to_vec());

-    if log_enabled!(Level::Trace) {
-        trace!("{}", window::print_window(debug_id, window, *receive_index));
-    }
+    trace!("{}", window.read().unwrap().print(&id, *receive_index));

    for mut blobs in blobs_chunked {
        let blobs_len = blobs.len();
-        trace!("{:x}: broadcast blobs.len: {}", debug_id, blobs_len);
+        trace!("{}: broadcast blobs.len: {}", id, blobs_len);

        // Index the blobs
        window::index_blobs(node_info, &blobs, receive_index)
@ -64,29 +62,29 @@ fn broadcast(
                let pos = (ix % WINDOW_SIZE) as usize;
                if let Some(x) = mem::replace(&mut win[pos].data, None) {
                    trace!(
-                        "{:x} popped {} at {}",
-                        debug_id,
+                        "{} popped {} at {}",
+                        id,
                        x.read().unwrap().get_index().unwrap(),
                        pos
                    );
-                    recycler.recycle(x);
+                    recycler.recycle(x, "broadcast-data");
                }
                if let Some(x) = mem::replace(&mut win[pos].coding, None) {
                    trace!(
-                        "{:x} popped {} at {}",
-                        debug_id,
+                        "{} popped {} at {}",
+                        id,
                        x.read().unwrap().get_index().unwrap(),
                        pos
                    );
-                    recycler.recycle(x);
+                    recycler.recycle(x, "broadcast-coding");
                }

-                trace!("{:x} null {}", debug_id, pos);
+                trace!("{} null {}", id, pos);
            }
            while let Some(b) = blobs.pop() {
                let ix = b.read().unwrap().get_index().expect("blob index");
                let pos = (ix % WINDOW_SIZE) as usize;
-                trace!("{:x} caching {} at {}", debug_id, ix, pos);
+                trace!("{} caching {} at {}", id, ix, pos);
                assert!(win[pos].data.is_none());
                win[pos].data = Some(b);
            }
@ -96,7 +94,7 @@ fn broadcast(
        #[cfg(feature = "erasure")]
        {
            erasure::generate_coding(
-                debug_id,
+                &id,
                &mut window.write().unwrap(),
                recycler,
                *receive_index,
--- a/src/budget.rs
+++ b/src/budget.rs
@ -31,7 +31,7 @@ impl Condition {
    }
 }

-/// A data type reprsenting a payment plan.
+/// A data type representing a payment plan.
 #[repr(C)]
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
 pub enum Budget {
--- a/src/client.rs
+++ b/src/client.rs
@ -1,11 +1,11 @@
-use crdt::NodeInfo;
-use nat::udp_random_bind;
+use crdt::{NodeInfo, FULLNODE_PORT_RANGE};
+use netutil::bind_in_range;
 use std::time::Duration;
 use thin_client::ThinClient;

 pub fn mk_client(r: &NodeInfo) -> ThinClient {
-    let requests_socket = udp_random_bind(8000, 10000, 5).unwrap();
-    let transactions_socket = udp_random_bind(8000, 10000, 5).unwrap();
+    let (_, requests_socket) = bind_in_range(FULLNODE_PORT_RANGE).unwrap();
+    let (_, transactions_socket) = bind_in_range(FULLNODE_PORT_RANGE).unwrap();

    requests_socket
        .set_read_timeout(Some(Duration::new(1, 0)))
--- a/src/counter.rs
+++ b/src/counter.rs
@ -74,11 +74,12 @@ impl Counter {
        if times % lograte == 0 && times > 0 {
            let lastlog = self.lastlog.load(Ordering::Relaxed);
            info!(
-                "COUNTER:{{\"name\": \"{}\", \"counts\": {}, \"samples\": {},  \"now\": {}}}",
+                "COUNTER:{{\"name\": \"{}\", \"counts\": {}, \"samples\": {},  \"now\": {}, \"events\": {}}}",
                self.name,
-                counts,
+                counts + events,
                times,
                timing::timestamp(),
+                events,
            );
            metrics::submit(
                influxdb::Point::new(&format!("counter-{}", self.name))
--- a/src/crdt.rs
+++ b/src/crdt.rs
--- a/src/drone.rs
+++ b/src/drone.rs
@ -12,11 +12,11 @@ use std::io;
 use std::io::{Error, ErrorKind};
 use std::net::{IpAddr, SocketAddr, UdpSocket};
 use std::time::Duration;
-use thin_client::ThinClient;
+use thin_client::{poll_gossip_for_leader, ThinClient};
 use transaction::Transaction;

 pub const TIME_SLICE: u64 = 60;
-pub const REQUEST_CAP: u64 = 1_000_000;
+pub const REQUEST_CAP: u64 = 500_000_000;
 pub const DRONE_PORT: u16 = 9900;

 #[derive(Serialize, Deserialize, Debug, Clone, Copy)]
@ -31,8 +31,7 @@ pub struct Drone {
    mint_keypair: Keypair,
    ip_cache: Vec<IpAddr>,
    _airdrop_addr: SocketAddr,
-    transactions_addr: SocketAddr,
-    requests_addr: SocketAddr,
+    network_addr: SocketAddr,
    pub time_slice: Duration,
    request_cap: u64,
    pub request_current: u64,
@ -42,8 +41,7 @@ impl Drone {
    pub fn new(
        mint_keypair: Keypair,
        _airdrop_addr: SocketAddr,
-        transactions_addr: SocketAddr,
-        requests_addr: SocketAddr,
+        network_addr: SocketAddr,
        time_input: Option<u64>,
        request_cap_input: Option<u64>,
    ) -> Drone {
@ -59,8 +57,7 @@ impl Drone {
            mint_keypair,
            ip_cache: Vec::new(),
            _airdrop_addr,
-            transactions_addr,
-            requests_addr,
+            network_addr,
            time_slice,
            request_cap,
            request_current: 0,
@ -100,10 +97,13 @@ impl Drone {
        let requests_socket = UdpSocket::bind("0.0.0.0:0").unwrap();
        let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap();

+        let leader = poll_gossip_for_leader(self.network_addr, Some(10))
+            .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
+
        let mut client = ThinClient::new(
-            self.requests_addr,
+            leader.contact_info.rpu,
            requests_socket,
-            self.transactions_addr,
+            leader.contact_info.tpu,
            transactions_socket,
        );
        let last_id = client.get_last_id();
@ -141,7 +141,7 @@ impl Drone {
                    )
                    .to_owned(),
            );
-            client.transfer_signed(&tx)
+            client.retry_transfer_signed(&tx, 10)
        } else {
            Err(Error::new(ErrorKind::Other, "token limit reached"))
        }
@ -157,18 +157,15 @@ impl Drop for Drone {
 #[cfg(test)]
 mod tests {
    use bank::Bank;
-    use crdt::{get_ip_addr, TestNode};
+    use crdt::Node;
    use drone::{Drone, DroneRequest, REQUEST_CAP, TIME_SLICE};
    use fullnode::Fullnode;
    use logger;
    use mint::Mint;
-    use service::Service;
+    use netutil::get_ip_addr;
    use signature::{Keypair, KeypairUtil};
    use std::fs::remove_dir_all;
    use std::net::{SocketAddr, UdpSocket};
-    use std::sync::atomic::{AtomicBool, Ordering};
-    use std::sync::Arc;
-    use std::thread::sleep;
    use std::time::Duration;
    use thin_client::ThinClient;

@ -177,16 +174,8 @@ mod tests {
        let keypair = Keypair::new();
        let mut addr: SocketAddr = "0.0.0.0:9900".parse().unwrap();
        addr.set_ip(get_ip_addr().unwrap());
-        let transactions_addr = "0.0.0.0:0".parse().unwrap();
-        let requests_addr = "0.0.0.0:0".parse().unwrap();
-        let mut drone = Drone::new(
-            keypair,
-            addr,
-            transactions_addr,
-            requests_addr,
-            None,
-            Some(3),
-        );
+        let network_addr = "0.0.0.0:0".parse().unwrap();
+        let mut drone = Drone::new(keypair, addr, network_addr, None, Some(3));
        assert!(drone.check_request_limit(1));
        drone.request_current = 3;
        assert!(!drone.check_request_limit(1));
@ -197,9 +186,8 @@ mod tests {
        let keypair = Keypair::new();
        let mut addr: SocketAddr = "0.0.0.0:9900".parse().unwrap();
        addr.set_ip(get_ip_addr().unwrap());
-        let transactions_addr = "0.0.0.0:0".parse().unwrap();
-        let requests_addr = "0.0.0.0:0".parse().unwrap();
-        let mut drone = Drone::new(keypair, addr, transactions_addr, requests_addr, None, None);
+        let network_addr = "0.0.0.0:0".parse().unwrap();
+        let mut drone = Drone::new(keypair, addr, network_addr, None, None);
        drone.request_current = drone.request_current + 256;
        assert_eq!(drone.request_current, 256);
        drone.clear_request_count();
@ -211,9 +199,8 @@ mod tests {
        let keypair = Keypair::new();
        let mut addr: SocketAddr = "0.0.0.0:9900".parse().unwrap();
        addr.set_ip(get_ip_addr().unwrap());
-        let transactions_addr = "0.0.0.0:0".parse().unwrap();
-        let requests_addr = "0.0.0.0:0".parse().unwrap();
-        let mut drone = Drone::new(keypair, addr, transactions_addr, requests_addr, None, None);
+        let network_addr = "0.0.0.0:0".parse().unwrap();
+        let mut drone = Drone::new(keypair, addr, network_addr, None, None);
        let ip = "127.0.0.1".parse().expect("create IpAddr from string");
        assert_eq!(drone.ip_cache.len(), 0);
        drone.add_ip_to_cache(ip);
@ -226,9 +213,8 @@ mod tests {
        let keypair = Keypair::new();
        let mut addr: SocketAddr = "0.0.0.0:9900".parse().unwrap();
        addr.set_ip(get_ip_addr().unwrap());
-        let transactions_addr = "0.0.0.0:0".parse().unwrap();
-        let requests_addr = "0.0.0.0:0".parse().unwrap();
-        let mut drone = Drone::new(keypair, addr, transactions_addr, requests_addr, None, None);
+        let network_addr = "0.0.0.0:0".parse().unwrap();
+        let mut drone = Drone::new(keypair, addr, network_addr, None, None);
        let ip = "127.0.0.1".parse().expect("create IpAddr from string");
        assert_eq!(drone.ip_cache.len(), 0);
        drone.add_ip_to_cache(ip);
@ -243,18 +229,10 @@ mod tests {
        let keypair = Keypair::new();
        let mut addr: SocketAddr = "0.0.0.0:9900".parse().unwrap();
        addr.set_ip(get_ip_addr().unwrap());
-        let transactions_addr = "0.0.0.0:0".parse().unwrap();
-        let requests_addr = "0.0.0.0:0".parse().unwrap();
+        let network_addr = "0.0.0.0:0".parse().unwrap();
        let time_slice: Option<u64> = None;
        let request_cap: Option<u64> = None;
-        let drone = Drone::new(
-            keypair,
-            addr,
-            transactions_addr,
-            requests_addr,
-            time_slice,
-            request_cap,
-        );
+        let drone = Drone::new(keypair, addr, network_addr, time_slice, request_cap);
        assert_eq!(drone.time_slice, Duration::new(TIME_SLICE, 0));
        assert_eq!(drone.request_cap, REQUEST_CAP);
    }
@ -275,36 +253,32 @@ mod tests {

        logger::setup();
        let leader_keypair = Keypair::new();
-        let leader = TestNode::new_localhost_with_pubkey(leader_keypair.pubkey());
+        let leader = Node::new_localhost_with_pubkey(leader_keypair.pubkey());

        let alice = Mint::new(10_000_000);
        let bank = Bank::new(&alice);
        let bob_pubkey = Keypair::new().pubkey();
        let carlos_pubkey = Keypair::new().pubkey();
-        let exit = Arc::new(AtomicBool::new(false));
-        let leader_data = leader.data.clone();
+        let leader_data = leader.info.clone();
        let ledger_path = tmp_ledger_path("send_airdrop");

-        let server = Fullnode::new_leader(
+        let server = Fullnode::new_with_bank(
            leader_keypair,
            bank,
            0,
            &[],
            leader,
-            exit.clone(),
-            &ledger_path,
+            None,
+            Some(&ledger_path),
            false,
        );
-        //TODO: this seems unstable
-        sleep(Duration::from_millis(900));

        let mut addr: SocketAddr = "0.0.0.0:9900".parse().expect("bind to drone socket");
        addr.set_ip(get_ip_addr().expect("drone get_ip_addr"));
        let mut drone = Drone::new(
            alice.keypair(),
            addr,
-            leader_data.contact_info.tpu,
-            leader_data.contact_info.rpu,
+            leader_data.contact_info.ncp,
            None,
            Some(150_000),
        );
@ -327,10 +301,31 @@ mod tests {
        let bob_sig = drone.send_airdrop(bob_req).unwrap();
        assert!(client.poll_for_signature(&bob_sig).is_ok());

+        // restart the leader, drone should find the new one at the same gossip port
+        server.close().unwrap();
+
+        let leader_keypair = Keypair::new();
+        let leader = Node::new_localhost_with_pubkey(leader_keypair.pubkey());
+        let leader_data = leader.info.clone();
+        let server = Fullnode::new(leader, &ledger_path, leader_keypair, None, false);
+
+        let requests_socket = UdpSocket::bind("0.0.0.0:0").expect("drone bind to requests socket");
+        let transactions_socket =
+            UdpSocket::bind("0.0.0.0:0").expect("drone bind to transactions socket");
+
+        let mut client = ThinClient::new(
+            leader_data.contact_info.rpu,
+            requests_socket,
+            leader_data.contact_info.tpu,
+            transactions_socket,
+        );
+
        let carlos_req = DroneRequest::GetAirdrop {
            airdrop_request_amount: 5_000_000,
            client_pubkey: carlos_pubkey,
        };
+
+        // using existing drone, new thin client
        let carlos_sig = drone.send_airdrop(carlos_req).unwrap();
        assert!(client.poll_for_signature(&carlos_sig).is_ok());

@ -342,8 +337,7 @@ mod tests {
        info!("TPS request balance: {:?}", carlos_balance);
        assert_eq!(carlos_balance.unwrap(), TPS_BATCH);

-        exit.store(true, Ordering::Relaxed);
-        server.join().unwrap();
+        server.close().unwrap();
        remove_dir_all(ledger_path).unwrap();
    }
 }
--- a/src/entry.rs
+++ b/src/entry.rs
@ -45,9 +45,6 @@ pub struct Entry {
    ///  2. this Entry can be left out of the bank's entry_id cache for
    ///       purposes of duplicate rejection
    pub has_more: bool,
-
-    /// Erasure requires that Entry be a multiple of 4 bytes in size
-    pad: [u8; 3],
 }

 impl Entry {
@ -65,7 +62,6 @@ impl Entry {
            id,
            transactions,
            has_more,
-            pad: [0, 0, 0],
        };

        let size = serialized_size(&entry).unwrap();
@ -116,7 +112,6 @@ impl Entry {
            id: Hash::default(),
            transactions,
            has_more: false,
-            pad: [0, 0, 0],
        }).unwrap() <= BLOB_DATA_SIZE as u64
    }

@ -142,7 +137,6 @@ impl Entry {
            id: *id,
            transactions: vec![],
            has_more: false,
-            pad: [0, 0, 0],
        }
    }

@ -209,7 +203,6 @@ pub fn next_entry(start_hash: &Hash, num_hashes: u64, transactions: Vec<Transact
        id: next_hash(start_hash, num_hashes, &transactions),
        transactions,
        has_more: false,
-        pad: [0, 0, 0],
    }
 }

@ -255,8 +248,8 @@ mod tests {

        // First, verify entries
        let keypair = Keypair::new();
-        let tx0 = Transaction::new_timestamp(&keypair, Utc::now(), zero);
-        let tx1 = Transaction::new_signature(&keypair, Default::default(), zero);
+        let tx0 = Transaction::new_timestamp(&keypair, keypair.pubkey(), Utc::now(), zero);
+        let tx1 = Transaction::new_signature(&keypair, keypair.pubkey(), Default::default(), zero);
        let mut e0 = Entry::new(&zero, 0, vec![tx0.clone(), tx1.clone()], false);
        assert!(e0.verify(&zero));

@ -278,7 +271,7 @@ mod tests {
        assert_eq!(tick.id, zero);

        let keypair = Keypair::new();
-        let tx0 = Transaction::new_timestamp(&keypair, Utc::now(), zero);
+        let tx0 = Transaction::new_timestamp(&keypair, keypair.pubkey(), Utc::now(), zero);
        let entry0 = next_entry(&zero, 1, vec![tx0.clone()]);
        assert_eq!(entry0.num_hashes, 1);
        assert_eq!(entry0.id, next_hash(&zero, 1, &vec![tx0]));
--- a/src/entry_writer.rs
+++ b/src/entry_writer.rs
@ -101,9 +101,11 @@ pub fn read_entries<R: BufRead>(reader: R) -> impl Iterator<Item = io::Result<En
 #[cfg(test)]
 mod tests {
    use super::*;
+    use bincode::serialize;
    use ledger;
    use mint::Mint;
    use packet::BLOB_DATA_SIZE;
+    use packet::PACKET_DATA_SIZE;
    use signature::{Keypair, KeypairUtil};
    use std::io::Cursor;
    use transaction::Transaction;
@ -117,9 +119,11 @@ mod tests {
        let mut entry_writer = EntryWriter::new(&bank, writer);
        let keypair = Keypair::new();
        let tx = Transaction::new(&mint.keypair(), keypair.pubkey(), 1, mint.last_id());
+        let tx_size = serialize(&tx).unwrap().len();

-        // NOTE: if Entry grows to larger than a transaction, the code below falls over
-        let threshold = (BLOB_DATA_SIZE / 256) - 1; // 256 is transaction size
+        assert!(tx_size <= PACKET_DATA_SIZE);
+        assert!(BLOB_DATA_SIZE >= PACKET_DATA_SIZE);
+        let threshold = (BLOB_DATA_SIZE / tx_size) - 1; // PACKET_DATA_SIZE is transaction size

        // Verify large entries are split up and the first sets has_more.
        let txs = vec![tx.clone(); threshold * 2];
--- a/src/erasure.rs
+++ b/src/erasure.rs
@ -1,5 +1,6 @@
 // Support erasure coding
 use packet::{BlobRecycler, SharedBlob, BLOB_DATA_SIZE, BLOB_HEADER_SIZE};
+use signature::Pubkey;
 use std::cmp;
 use std::mem;
 use std::result;
@ -84,7 +85,7 @@ pub fn generate_coding_blocks(coding: &mut [&mut [u8]], data: &[&[u8]]) -> Resul
    let mut data_arg = Vec::with_capacity(data.len());
    for block in data {
        if block_len != block.len() as i32 {
-            trace!(
+            error!(
                "data block size incorrect {} expected {}",
                block.len(),
                block_len
@ -96,7 +97,7 @@ pub fn generate_coding_blocks(coding: &mut [&mut [u8]], data: &[&[u8]]) -> Resul
    let mut coding_arg = Vec::with_capacity(coding.len());
    for mut block in coding {
        if block_len != block.len() as i32 {
-            trace!(
+            error!(
                "coding block size incorrect {} expected {}",
                block.len(),
                block_len
@ -152,8 +153,8 @@ pub fn decode_blocks(
        }
        data_arg.push(x.as_mut_ptr());
    }
-    unsafe {
-        let ret = jerasure_matrix_decode(
+    let ret = unsafe {
+        jerasure_matrix_decode(
            data.len() as i32,
            coding.len() as i32,
            ERASURE_W,
@ -163,15 +164,15 @@ pub fn decode_blocks(
            data_arg.as_ptr(),
            coding_arg.as_ptr(),
            data[0].len() as i32,
-        );
-        trace!("jerasure_matrix_decode ret: {}", ret);
-        for x in data[erasures[0] as usize][0..8].iter() {
-            trace!("{} ", x)
-        }
-        trace!("");
-        if ret < 0 {
-            return Err(ErasureError::DecodeError);
-        }
+        )
+    };
+    trace!("jerasure_matrix_decode ret: {}", ret);
+    for x in data[erasures[0] as usize][0..8].iter() {
+        trace!("{} ", x)
+    }
+    trace!("");
+    if ret < 0 {
+        return Err(ErasureError::DecodeError);
    }
    Ok(())
 }
@ -214,7 +215,7 @@ pub fn decode_blocks(
 //
 //
 pub fn generate_coding(
-    debug_id: u64,
+    id: &Pubkey,
    window: &mut [WindowSlot],
    recycler: &BlobRecycler,
    receive_index: u64,
@ -234,8 +235,8 @@ pub fn generate_coding(
            break;
        }
        info!(
-            "generate_coding {:x} start: {} end: {} start_idx: {} num_blobs: {}",
-            debug_id, block_start, block_end, start_idx, num_blobs
+            "generate_coding {} start: {} end: {} start_idx: {} num_blobs: {}",
+            id, block_start, block_end, start_idx, num_blobs
        );

        let mut max_data_size = 0;
@ -243,12 +244,12 @@ pub fn generate_coding(
        // find max_data_size, maybe bail if not all the data is here
        for i in block_start..block_end {
            let n = i % window.len();
-            trace!("{:x} window[{}] = {:?}", debug_id, n, window[n].data);
+            trace!("{} window[{}] = {:?}", id, n, window[n].data);

            if let Some(b) = &window[n].data {
                max_data_size = cmp::max(b.read().unwrap().meta.size, max_data_size);
            } else {
-                trace!("{:x} data block is null @ {}", debug_id, n);
+                trace!("{} data block is null @ {}", id, n);
                return Ok(());
            }
        }
@ -256,7 +257,7 @@ pub fn generate_coding(
        // round up to the nearest jerasure alignment
        max_data_size = align!(max_data_size, JERASURE_ALIGN);

-        trace!("{:x} max_data_size: {}", debug_id, max_data_size);
+        trace!("{} max_data_size: {}", id, max_data_size);

        let mut data_blobs = Vec::with_capacity(NUM_DATA);
        for i in block_start..block_end {
@ -299,8 +300,8 @@ pub fn generate_coding(
                let id = data_rl.get_id().unwrap();

                trace!(
-                    "{:x} copying index {} id {:?} from data to coding",
-                    debug_id,
+                    "{} copying index {} id {:?} from data to coding",
+                    id,
                    index,
                    id
                );
@ -324,7 +325,7 @@ pub fn generate_coding(
            .iter()
            .enumerate()
            .map(|(i, l)| {
-                trace!("{:x} i: {} data: {}", debug_id, i, l.data[0]);
+                trace!("{} i: {} data: {}", id, i, l.data[0]);
                &l.data[..max_data_size]
            })
            .collect();
@ -338,15 +339,15 @@ pub fn generate_coding(
            .iter_mut()
            .enumerate()
            .map(|(i, l)| {
-                trace!("{:x} i: {} coding: {}", debug_id, i, l.data[0],);
+                trace!("{} i: {} coding: {}", id, i, l.data[0],);
                &mut l.data_mut()[..max_data_size]
            })
            .collect();

        generate_coding_blocks(coding_ptrs.as_mut_slice(), &data_ptrs)?;
        debug!(
-            "{:x} start_idx: {} data: {}:{} coding: {}:{}",
-            debug_id, start_idx, block_start, block_end, coding_start, block_end
+            "{} start_idx: {} data: {}:{} coding: {}:{}",
+            id, start_idx, block_start, block_end, coding_start, block_end
        );
        block_start = block_end;
    }
@ -358,7 +359,7 @@ pub fn generate_coding(
 //  true if slot is stale (i.e. has the wrong index), old blob is flushed
 //  false if slot has a blob with the right index
 fn is_missing(
-    debug_id: u64,
+    id: &Pubkey,
    idx: u64,
    window_slot: &mut Option<SharedBlob>,
    recycler: &BlobRecycler,
@ -367,24 +368,24 @@ fn is_missing(
    if let Some(blob) = mem::replace(window_slot, None) {
        let blob_idx = blob.read().unwrap().get_index().unwrap();
        if blob_idx == idx {
-            trace!("recover {:x}: idx: {} good {}", debug_id, idx, c_or_d);
+            trace!("recover {}: idx: {} good {}", id, idx, c_or_d);
            // put it back
            mem::replace(window_slot, Some(blob));
            false
        } else {
            trace!(
-                "recover {:x}: idx: {} old {} {}, recycling",
-                debug_id,
+                "recover {}: idx: {} old {} {}, recycling",
+                id,
                idx,
                c_or_d,
                blob_idx,
            );
            // recycle it
-            recycler.recycle(blob);
+            recycler.recycle(blob, "is_missing");
            true
        }
    } else {
-        trace!("recover {:x}: idx: {} None {}", debug_id, idx, c_or_d);
+        trace!("recover {}: idx: {} None {}", id, idx, c_or_d);
        // nothing there
        true
    }
@ -395,7 +396,7 @@ fn is_missing(
 // if a blob is stale, remove it from the window slot
 //  side effect: block will be cleaned of old blobs
 fn find_missing(
-    debug_id: u64,
+    id: &Pubkey,
    block_start_idx: u64,
    block_start: usize,
    window: &mut [WindowSlot],
@ -411,12 +412,11 @@ fn find_missing(
        let idx = (i - block_start) as u64 + block_start_idx;
        let n = i % window.len();

-        if is_missing(debug_id, idx, &mut window[n].data, recycler, "data") {
+        if is_missing(id, idx, &mut window[n].data, recycler, "data") {
            data_missing += 1;
        }

-        if i >= coding_start && is_missing(debug_id, idx, &mut window[n].coding, recycler, "coding")
-        {
+        if i >= coding_start && is_missing(id, idx, &mut window[n].coding, recycler, "coding") {
            coding_missing += 1;
        }
    }
@ -430,7 +430,7 @@ fn find_missing(
 //    any of the blocks, the block is skipped.
 //   Side effect: old blobs in a block are None'd
 pub fn recover(
-    debug_id: u64,
+    id: &Pubkey,
    recycler: &BlobRecycler,
    window: &mut [WindowSlot],
    start_idx: u64,
@ -444,8 +444,8 @@ pub fn recover(
    let coding_start = block_start + NUM_DATA - NUM_CODING;
    let block_end = block_start + NUM_DATA;
    trace!(
-        "recover {:x}: block_start_idx: {} block_start: {} coding_start: {} block_end: {}",
-        debug_id,
+        "recover {}: block_start_idx: {} block_start: {} coding_start: {} block_end: {}",
+        id,
        block_start_idx,
        block_start,
        coding_start,
@ -453,7 +453,7 @@ pub fn recover(
    );

    let (data_missing, coding_missing) =
-        find_missing(debug_id, block_start_idx, block_start, window, recycler);
+        find_missing(id, block_start_idx, block_start, window, recycler);

    // if we're not missing data, or if we have too much missin but have enough coding
    if data_missing == 0 {
@ -463,8 +463,8 @@ pub fn recover(

    if (data_missing + coding_missing) > NUM_CODING {
        trace!(
-            "recover {:x}: start: {} skipping recovery data: {} coding: {}",
-            debug_id,
+            "recover {}: start: {} skipping recovery data: {} coding: {}",
+            id,
            block_start,
            data_missing,
            coding_missing
@ -474,8 +474,8 @@ pub fn recover(
    }

    trace!(
-        "recover {:x}: recovering: data: {} coding: {}",
-        debug_id,
+        "recover {}: recovering: data: {} coding: {}",
+        id,
        data_missing,
        coding_missing
    );
@ -492,7 +492,7 @@ pub fn recover(
        if let Some(b) = window[j].data.clone() {
            if meta.is_none() {
                meta = Some(b.read().unwrap().meta.clone());
-                trace!("recover {:x} meta at {} {:?}", debug_id, j, meta);
+                trace!("recover {} meta at {} {:?}", id, j, meta);
            }
            blobs.push(b);
        } else {
@ -508,6 +508,12 @@ pub fn recover(
        if let Some(b) = window[j].coding.clone() {
            if size.is_none() {
                size = Some(b.read().unwrap().meta.size - BLOB_HEADER_SIZE);
+                trace!(
+                    "{} recover size {} from {}",
+                    id,
+                    size.unwrap(),
+                    i as u64 + block_start_idx
+                );
            }
            blobs.push(b);
        } else {
@ -518,12 +524,13 @@ pub fn recover(
            erasures.push(((i - coding_start) + NUM_DATA) as i32);
        }
    }
+
    // now that we have size (from coding), zero out data blob tails
+    let size = size.unwrap();
    for i in block_start..block_end {
        let j = i % window.len();

        if let Some(b) = &window[j].data {
-            let size = size.unwrap();
            let mut b_wl = b.write().unwrap();
            for i in b_wl.meta.size..size {
                b_wl.data[i] = 0;
@ -533,12 +540,7 @@ pub fn recover(

    // marks end of erasures
    erasures.push(-1);
-    trace!(
-        "erasures[]: {:x} {:?} data_size: {}",
-        debug_id,
-        erasures,
-        size.unwrap(),
-    );
+    trace!("erasures[]: {} {:?} data_size: {}", id, erasures, size,);
    //lock everything for write
    for b in &blobs {
        locks.push(b.write().expect("'locks' arr in pb fn recover"));
@ -549,16 +551,16 @@ pub fn recover(
        let mut data_ptrs: Vec<&mut [u8]> = Vec::with_capacity(NUM_DATA);
        for (i, l) in locks.iter_mut().enumerate() {
            if i < NUM_DATA {
-                trace!("{:x} pushing data: {}", debug_id, i);
-                data_ptrs.push(&mut l.data[..size.unwrap()]);
+                trace!("{} pushing data: {}", id, i);
+                data_ptrs.push(&mut l.data[..size]);
            } else {
-                trace!("{:x} pushing coding: {}", debug_id, i);
-                coding_ptrs.push(&mut l.data_mut()[..size.unwrap()]);
+                trace!("{} pushing coding: {}", id, i);
+                coding_ptrs.push(&mut l.data_mut()[..size]);
            }
        }
        trace!(
-            "{:x} coding_ptrs.len: {} data_ptrs.len {}",
-            debug_id,
+            "{} coding_ptrs.len: {} data_ptrs.len {}",
+            id,
            coding_ptrs.len(),
            data_ptrs.len()
        );
@ -577,29 +579,38 @@ pub fn recover(

        let mut data_size;
        if n < NUM_DATA {
-            data_size = locks[n].get_data_size().unwrap();
-            data_size -= BLOB_HEADER_SIZE as u64;
+            data_size = locks[n].get_data_size().unwrap() as usize;
+            data_size -= BLOB_HEADER_SIZE;
+            if data_size > BLOB_DATA_SIZE {
+                error!("{} corrupt data blob[{}] data_size: {}", id, idx, data_size);
+                corrupt = true;
+            }
        } else {
-            data_size = size.unwrap() as u64;
+            data_size = size;
            idx -= NUM_CODING as u64;
            locks[n].set_index(idx).unwrap();
+
+            if data_size - BLOB_HEADER_SIZE > BLOB_DATA_SIZE {
+                error!(
+                    "{} corrupt coding blob[{}] data_size: {}",
+                    id, idx, data_size
+                );
+                corrupt = true;
+            }
        }

        locks[n].meta = meta.clone().unwrap();
-        locks[n].set_size(data_size as usize);
+        locks[n].set_size(data_size);
        trace!(
-            "{:x} erasures[{}] ({}) size: {:x} data[0]: {}",
-            debug_id,
+            "{} erasures[{}] ({}) size: {} data[0]: {}",
+            id,
            *i,
            idx,
            data_size,
            locks[n].data()[0]
        );
-        if data_size > BLOB_DATA_SIZE as u64 {
-            corrupt = true;
-        }
    }
-    assert!(!corrupt, " {:x} ", debug_id);
+    assert!(!corrupt, " {} ", id);

    Ok(())
 }
@ -609,10 +620,9 @@ mod test {
    use crdt;
    use erasure;
    use logger;
-    use packet::{BlobRecycler, BLOB_HEADER_SIZE, BLOB_SIZE};
+    use packet::{BlobRecycler, BLOB_DATA_SIZE, BLOB_HEADER_SIZE, BLOB_SIZE};
    use rand::{thread_rng, Rng};
-    use signature::Keypair;
-    use signature::KeypairUtil;
+    use signature::{Keypair, KeypairUtil, Pubkey};
    //    use std::sync::{Arc, RwLock};
    use window::{index_blobs, WindowSlot};

@ -724,7 +734,12 @@ mod test {
            let b_ = b.clone();
            let mut w = b.write().unwrap();
            // generate a random length, multiple of 4 between 8 and 32
-            let data_len = (thread_rng().gen_range(2, 8) * 4) + 1;
+            let data_len = if i == 3 {
+                BLOB_DATA_SIZE
+            } else {
+                (thread_rng().gen_range(2, 8) * 4) + 1
+            };
+
            eprintln!("data_len of {} is {}", i, data_len);
            w.set_size(data_len);

@ -746,7 +761,6 @@ mod test {
            "127.0.0.1:1235".parse().unwrap(),
            "127.0.0.1:1236".parse().unwrap(),
            "127.0.0.1:1237".parse().unwrap(),
-            "127.0.0.1:1238".parse().unwrap(),
        );
        assert!(index_blobs(&d, &blobs, &mut (offset as u64)).is_ok());
        for b in blobs {
@ -773,34 +787,34 @@ mod test {
        }
    }

+    fn pollute_recycler(blob_recycler: &BlobRecycler) {
+        let mut blobs = Vec::with_capacity(WINDOW_SIZE * 2);
+        for _ in 0..WINDOW_SIZE * 10 {
+            let blob = blob_recycler.allocate();
+            {
+                let mut b_l = blob.write().unwrap();
+
+                for i in 0..BLOB_SIZE {
+                    b_l.data[i] = thread_rng().gen();
+                }
+                // some of the blobs should previously been used for coding
+                if thread_rng().gen_bool(erasure::NUM_CODING as f64 / erasure::NUM_DATA as f64) {
+                    b_l.set_coding().unwrap();
+                }
+            }
+            blobs.push(blob);
+        }
+        for blob in blobs {
+            blob_recycler.recycle(blob, "pollute_recycler");
+        }
+    }
+
    #[test]
    pub fn test_window_recover_basic() {
        logger::setup();
        let blob_recycler = BlobRecycler::default();

-        {
-            let mut blobs = Vec::with_capacity(WINDOW_SIZE * 2);
-            for _ in 0..WINDOW_SIZE * 10 {
-                let blob = blob_recycler.allocate();
-
-                {
-                    let mut b_l = blob.write().unwrap();
-
-                    for i in 0..BLOB_SIZE {
-                        b_l.data[i] = thread_rng().gen();
-                    }
-                    // some of the blobs should previously been used for coding
-                    if thread_rng().gen_bool(erasure::NUM_CODING as f64 / erasure::NUM_DATA as f64)
-                    {
-                        b_l.set_coding().unwrap();
-                    }
-                }
-                blobs.push(blob);
-            }
-            for blob in blobs {
-                blob_recycler.recycle(blob);
-            }
-        }
+        pollute_recycler(&blob_recycler);

        // Generate a window
        let offset = 0;
@ -819,9 +833,10 @@ mod test {

        // Generate the coding blocks
        let mut index = (erasure::NUM_DATA + 2) as u64;
+        let id = Pubkey::default();
        assert!(
            erasure::generate_coding(
-                0,
+                &id,
                &mut window,
                &blob_recycler,
                offset as u64,
@ -848,7 +863,7 @@ mod test {
        // Recover it from coding
        assert!(
            erasure::recover(
-                0,
+                &id,
                &blob_recycler,
                &mut window,
                (offset + WINDOW_SIZE) as u64,
@ -887,7 +902,10 @@ mod test {
        let refwindow = window[erase_offset].data.clone();
        window[erase_offset].data = None;

-        blob_recycler.recycle(window[erase_offset].coding.clone().unwrap());
+        blob_recycler.recycle(
+            window[erase_offset].coding.clone().unwrap(),
+            "window_recover_basic",
+        );
        window[erase_offset].coding = None;

        print_window(&window);
@ -895,7 +913,7 @@ mod test {
        // Recover it from coding
        assert!(
            erasure::recover(
-                0,
+                &id,
                &blob_recycler,
                &mut window,
                (offset + WINDOW_SIZE) as u64,
@ -941,7 +959,7 @@ mod test {
        // Recover it from coding
        assert!(
            erasure::recover(
-                0,
+                &id,
                &blob_recycler,
                &mut window,
                (offset + WINDOW_SIZE) as u64,
--- a/src/fetch_stage.rs
+++ b/src/fetch_stage.rs
@ -16,31 +16,27 @@ pub struct FetchStage {

 impl FetchStage {
    pub fn new(
-        socket: UdpSocket,
-        exit: Arc<AtomicBool>,
-        packet_recycler: &PacketRecycler,
-    ) -> (Self, PacketReceiver) {
-        Self::new_multi_socket(vec![socket], exit, packet_recycler)
-    }
-    pub fn new_multi_socket(
        sockets: Vec<UdpSocket>,
        exit: Arc<AtomicBool>,
-        packet_recycler: &PacketRecycler,
+        recycler: &PacketRecycler,
    ) -> (Self, PacketReceiver) {
-        let (packet_sender, packet_receiver) = channel();
+        let tx_sockets = sockets.into_iter().map(Arc::new).collect();
+        Self::new_multi_socket(tx_sockets, exit, recycler)
+    }
+    pub fn new_multi_socket(
+        sockets: Vec<Arc<UdpSocket>>,
+        exit: Arc<AtomicBool>,
+        recycler: &PacketRecycler,
+    ) -> (Self, PacketReceiver) {
+        let (sender, receiver) = channel();
        let thread_hdls: Vec<_> = sockets
            .into_iter()
            .map(|socket| {
-                streamer::receiver(
-                    socket,
-                    exit.clone(),
-                    packet_recycler.clone(),
-                    packet_sender.clone(),
-                )
+                streamer::receiver(socket, exit.clone(), recycler.clone(), sender.clone())
            })
            .collect();

-        (FetchStage { exit, thread_hdls }, packet_receiver)
+        (FetchStage { exit, thread_hdls }, receiver)
    }

    pub fn close(&self) {
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Scripts that run on the remote testnet nodes`