Compare commits

...

315 Commits
v0.7 ... v0.8.0

Author SHA1 Message Date
77949a4be6 cherry pick readme update 2018-09-13 19:19:48 -07:00
af58940964 Fix missing recycle in recv_from (#1205)
In the error case that i>0 (we have blobs to send)
we break out of the loop and do not push the allocated r
to the v array. We should recycle this blob, otherwise it
will be dropped.
2018-09-13 10:27:24 -07:00
21963b8c82 fix "leak" in Blob::recv_from (#1198)
* fix "leak" in Blob::recv_from

fixes #1199
2018-09-13 10:27:24 -07:00
b52230097e groom Fullnode's new_with_bank() to match new() more 2018-09-12 09:24:42 -07:00
a8fdb8a5a7 use a single BlobRecycler per fullnode 2018-09-11 16:56:54 -07:00
297f859631 Change '>=' back to '>' to fix recycling of blobs/packets (#1192)
Recycler will have a strong ref to the item so it will be at
least 1, >= will always prevent recycling.
2018-09-11 16:52:45 -07:00
5d19b799af Fix snap configuration for netstat daemon (#1190)
- Also increased the frequency at which the stats are sent
- Fixed file permissions for snapcraft.yaml
2018-09-11 14:49:05 -07:00
af3eb5a16c .sh 2018-09-11 11:29:49 -07:00
b313b7f6f9 Revert "move rpc_server to drop() semantics instead of having its own thread"
This reverts commit 40aa0654fa.
2018-09-10 22:48:33 -07:00
016ee36808 remove -x 2018-09-10 21:40:14 -07:00
c3fc98c48f use gossip to find the leader for every airdrop request 2018-09-10 21:29:45 -07:00
40aa0654fa move rpc_server to drop() semantics instead of having its own thread 2018-09-10 20:25:53 -07:00
bace2880d0 Correct spelling 2018-09-10 19:58:21 -07:00
9d80eefb81 Log the number of accounts each 250k txes (#1178) 2018-09-10 17:40:00 -07:00
1c17c6dd2b Report UDP network statistics (#1176)
* Report UDP network statistics

Fixes #1093

* Address review comments

* Address additional review comments

* Fix shellcheck errors
2018-09-10 15:52:08 -07:00
2be0dbddbb Correct spelling 2018-09-10 13:48:43 -07:00
a91b785ba5 move fullnode trace generation into crdt 2018-09-10 13:47:57 -07:00
0ef05de889 Add sleep to prevent spinning thread 2018-09-10 12:50:28 -07:00
a093d5c809 Fix erasure build 2018-09-10 11:40:26 -06:00
fc64e1853c Initialize Window, not SharedWindow
Wrap with Arc<RwLock>> when/if needed, no earlier.
2018-09-10 11:40:26 -06:00
7f669094de Split window into two modules 2018-09-10 11:40:26 -06:00
5025d89c88 Inline window method implementations 2018-09-10 11:40:26 -06:00
2b44c4504a Use WindowUtil for more idiomatic code 2018-09-10 11:40:26 -06:00
d2c9beb843 Add a trait to pretend Window is an object 2018-09-10 11:40:26 -06:00
9e6d3bf532 Correct spelling 2018-09-10 09:29:01 -07:00
a89b611e9e comments (#1165) 2018-09-09 07:07:38 -07:00
ebcac3c2d1 Use a common solana user on all testnet instances 2018-09-08 22:34:26 -07:00
7029e4395c Fix OOM reporting 2018-09-08 18:57:31 -07:00
5afcdcbbe6 More log grooming 2018-09-08 14:16:34 -07:00
3840b4b516 Groom log output 2018-09-08 14:10:18 -07:00
7aeb6d642b Display log file 2018-09-08 13:59:45 -07:00
1d6c4aacae Retry rsync a couple times before failing 2018-09-08 13:59:45 -07:00
9f5c86e60c Install earlyoom at gce instance startup 2018-09-08 13:59:45 -07:00
9f413fd656 Establish net/scripts/... for better scoping 2018-09-08 13:59:45 -07:00
97c3125a78 improve localnet-sanity's robustness (#1160)
* fix poll_gossip_for_leader() loop to actually wait
         for 30 seconds
    * reduce reuseaddr use to only when necessary,
         try to avoid already bound sockets
    * move nat.rs to netutil.rs
    * add gossip tracing to thin_client and bench-tps
2018-09-09 04:50:43 +09:00
a77aca75b2 Add NO_VALIDATOR_SANITY back 2018-09-07 22:37:05 -07:00
96bfd9478b make all the nodes have a pretty seq id (#1159) 2018-09-08 14:18:18 +09:00
e8206cb2d4 Echo the network address before entering a quiet polling loop 2018-09-07 21:20:00 -07:00
c3af0d9d25 Improve client.log 2018-09-07 21:20:00 -07:00
932c994dc9 Use new bench-tps command-line args 2018-09-07 21:20:00 -07:00
c34d911eaf Migrate Budget DSL to use the Account state (#979)
* Migrate Budget DSL to use the Account state instead of global bank data structures.

* Serialize Instruction into Transaction::userdata.
* Store the pending set in the Account::userdata
* Enforce the token balance rules on contract execution. This becomes the entry point for generic contracts.
* This pr will have a performance impact on the bank. The next set of changes will fix this by locking each account during multi threaded execution of all the contracts.
* With this change a contract transaction needs to store its state under an address. That address could be the destination of the tokens, or any random address. For the latter, an extra step would be needed to claim the tokens which isn't implemented by budget_dsl at the moment.
* test tracking issue 1157
2018-09-07 20:18:36 -07:00
ddd1871840 Install libssl1.1 for solanalabs/rust docker image compat 2018-09-07 19:57:41 -07:00
db825788fa Document how to get ssh access into CD testnets 2018-09-07 19:41:13 -07:00
b1b03ec13b Refine docker image tagging to avoid breaking stabilization branches on updates 2018-09-07 18:42:25 -07:00
73a8441add /var/snap is not writable by most users 2018-09-07 17:41:20 -07:00
bf29590f41 WSL needs ReuseAddr in addition to ReusePort (which it doesn't honor) (#1149) 2018-09-08 07:28:22 +09:00
51b27779c9 client changes for TODOs and looping (#1138)
* remove client.sh from snap
* default to ephemeral instead of ~/.config key
* rework CLI for bench-tps
* remote multinode-demo stuff from remote-client.sh
* remove multinode-demo from remote-sanity and localnet-sanity
2018-09-08 07:07:10 +09:00
5169c8d08f Add method to return hash of bank state 2018-09-07 15:38:53 -06:00
0d945e6a92 Groom testnet-sanity logging 2018-09-07 12:45:48 -07:00
1090254ba5 Add datapoints for leader/validator start 2018-09-07 12:45:48 -07:00
e51445d857 🙃 2018-09-07 12:24:34 -07:00
4b47abd3bf Fix --num-nodes argument parsing 2018-09-07 12:20:42 -07:00
71a617b4dc Fix erasure build 2018-09-07 13:18:19 -06:00
a722802c95 Window write lock to read lock 2018-09-07 13:18:19 -06:00
e9f44b6661 window -> window_service 2018-09-07 13:18:19 -06:00
9693de1867 Reposition parameters 2018-09-07 13:18:19 -06:00
f7ea95aed1 Hoist lock, reposition parameters 2018-09-07 13:18:19 -06:00
f07ce59be8 Toggle parameters 2018-09-07 13:18:19 -06:00
da423b6cf0 Hoist read lock 2018-09-07 13:18:19 -06:00
d5f60b68e4 Hoist window write lock 2018-09-07 13:18:19 -06:00
78b3a8f7f9 Hoist repair_window() branches
This probably would have been done if repair_window() was unit-tested.
2018-09-07 13:18:19 -06:00
d77699c126 Do the easy check first
All functions above operate on immutable values, so this shouldn't
change functionality, but no repair_window() tests to be certain.hI
2018-09-07 13:18:19 -06:00
09ba0dae15 Remove redundant clone() 2018-09-07 13:18:19 -06:00
a5c7575207 Rewrite find_next_missing, call it clear_slots 2018-09-07 13:18:19 -06:00
50f040530b Remove redundant cast 2018-09-07 13:18:19 -06:00
7f99c90539 Simplify using early return and Result::ok() 2018-09-07 13:18:19 -06:00
d8564b725c Don't reference window to get each slot 2018-09-07 13:18:19 -06:00
e4de25442a Hoist write lock
It needed to be passed the lock before, because it contained a
branch where one side didn't require locking. Now that that
defensive programming was hoisted, we can hoist the write lock
as well, leaving a simpler function for unit testing.
2018-09-07 13:18:19 -06:00
3b2ea8fd40 Hoist untested branch in window
If there were unit tests for this function, the author would have
written it this way to make their own life easier.
2018-09-07 13:18:19 -06:00
9a1832ed61 Bump ping timeout 2018-09-07 12:01:43 -07:00
9e45f1f5e2 Doc fixup 2018-09-07 12:01:43 -07:00
ee682d5bc3 Move wallet-sanity.sh out of multinode-demo/ 2018-09-07 12:01:43 -07:00
05decc863f Make set -x more buildkite friendly 2018-09-07 12:01:43 -07:00
506a81e8cc Assume -y 2018-09-07 12:01:43 -07:00
dcb30a8489 Delete leader node first 2018-09-07 12:01:43 -07:00
a2631e89f6 Use consistent style 2018-09-07 12:01:43 -07:00
ab208ddb77 Clean up arg handling 2018-09-07 12:01:43 -07:00
09a48d773a Run bench-tps in a tmux 2018-09-07 12:01:43 -07:00
88298bf321 Add -n option 2018-09-07 12:01:43 -07:00
d252f7f687 Revert "Default to 10 validators"
This reverts commit ed5fbaef06.
2018-09-07 12:01:43 -07:00
533ebc17f2 Install multilog automatically on a CI machine 2018-09-07 11:56:23 -07:00
f4947236dc Keep cargo-target-cache size under 6GB-ish 2018-09-07 11:45:27 -07:00
e088833b81 s/create/start/ 2018-09-06 21:07:11 -07:00
53e16f68d9 Improve error handling 2018-09-06 20:57:05 -07:00
ed5fbaef06 Default to 10 validators 2018-09-06 20:46:49 -07:00
b1bacf12a6 Add some log sections 2018-09-06 20:38:11 -07:00
66ff602659 Rewrite ci/testnet-{deploy,sanity}.sh in terms of net/ primitives 2018-09-06 19:54:39 -07:00
e175c9dea9 Remove ip address hardcode. Fixes #959 2018-09-06 19:54:39 -07:00
5a57d9b5d9 de-y 2018-09-06 19:54:39 -07:00
03e87e4169 Add more metrics 2018-09-06 19:54:39 -07:00
abfff66d53 Retry ssh a couple times before giving up 2018-09-06 19:54:39 -07:00
31dee553d5 Split start/version reporting 2018-09-06 19:54:39 -07:00
9ca6a2d25b Configure boot disk size 2018-09-06 19:54:39 -07:00
a3178c3bc7 Remove unused name tag 2018-09-06 19:54:39 -07:00
aa07bdfbaa Optionally suppress delete confirmation 2018-09-06 19:54:39 -07:00
eaef9be710 Clarify -f 2018-09-06 19:54:39 -07:00
cae345b416 Allow - in prefix 2018-09-06 19:54:39 -07:00
acb1171422 Add -e option 2018-09-06 19:54:39 -07:00
52d8f293b6 Add links to citations
And fix hyphens in quote.
2018-09-06 20:41:05 -06:00
636eb8d058 Add Leslie Lamport quote to README 2018-09-06 20:41:05 -06:00
0fa27f65bb Use the default Pubkey formatter instead of debug_id() 2018-09-06 16:31:47 -06:00
8f94e3f7ae Buffer tokens when switching directions to prevent errors (#1126)
Even if transactions are dropped, accounts will have buffer
of tokens. Should reduce or eliminate AccountNotFound errors seen in the
leader while bench-tps is running.
2018-09-06 14:20:01 -07:00
05460eec0d Open multiple sockets for transaction UDP port (#1128)
* Reuse UDP port and open multiple sockets for transaction address

* Fixed failing crdt tests

* Add tests for reusing UDP ports

* Address review comments

* Updated bench-streamer to use multiple receive sockets

* Fix minimum number of recv sockets for bench-streamer

* Address review comments

Fixes #1132

* Moved bind_to function to nat.rs
2018-09-06 14:13:40 -07:00
072d0b67e4 Send deploy metrics to the testnet-specific database 2018-09-06 08:30:03 -07:00
fdc48d521c use USER instead of whoami (#1134)
* use USER instead of whoami

make gcloud_FigureRemoteUsername robust against unsolicited output
   (that I get on login ;) )

validate --prefix argument

* Update gcloud.sh
2018-09-07 00:18:05 +09:00
6560b0e2cc s/whoami/id -un/ 2018-09-05 14:26:21 -07:00
ec38dba209 GCE leader nodes can now be provisioned with a static IP address 2018-09-05 14:26:21 -07:00
d9e4bce6ad Add drop stats to bench-tps (#1127)
See how many transactions made it through
2018-09-05 11:58:41 -07:00
1fd4343621 Add total count to stat (#1124) 2018-09-05 09:28:18 -07:00
8d87627a49 t 2018-09-05 09:09:50 -07:00
aacf27fb76 Add convienience link to current Snap log files 2018-09-05 09:02:02 -07:00
a51536d107 Add log tail hint 2018-09-05 09:02:02 -07:00
1c874fbc1b Make This is little more hacky 2018-09-05 09:02:02 -07:00
0362169671 Better scope leader and validator setup 2018-09-05 09:02:02 -07:00
e2e569cb43 Set rsync url for local deployments 2018-09-05 09:02:02 -07:00
8c51b47e85 Preserve existing ssh config 2018-09-05 09:02:02 -07:00
017eb10e76 Add file header doc 2018-09-05 09:02:02 -07:00
f50aeb0e58 Always add perf-libs to LD_LIBRARY_PATH 2018-09-05 09:02:02 -07:00
48c19d3100 Enable cargo features to be specified 2018-09-05 09:02:02 -07:00
aaf0a23134 Add Tips section 2018-09-05 09:02:02 -07:00
89db85dbf9 Work around concurrent |gcloud compute ssh| terminal issue 2018-09-05 09:02:02 -07:00
e677cda027 Private IP networks now work, and are the default 2018-09-05 09:02:02 -07:00
db9219ccc8 Improve error monitoring 2018-09-05 09:02:02 -07:00
06fd945f85 Set node config correctly 2018-09-05 09:02:02 -07:00
6ad4a81123 s/_/-/g in filenames 2018-09-05 09:02:02 -07:00
bcaa0fdcb1 net/ can now deploy Snaps 2018-09-05 09:02:02 -07:00
2cb1375217 Run gcloud_PrepInstancesForSsh in parallel 2018-09-05 09:02:02 -07:00
9365a47d42 Employ a startup script 2018-09-05 09:02:02 -07:00
6ffe205447 Add -g option 2018-09-05 09:02:02 -07:00
ec3e62dd58 Add net/ sanity 2018-09-05 09:02:02 -07:00
fa07c49cc9 net/ can now deploy Snaps 2018-09-05 09:02:02 -07:00
449d7042f0 Configure metrics correctly 2018-09-05 09:02:02 -07:00
7e2b65374d gce instance types are now configurable 2018-09-05 09:02:02 -07:00
8e39465700 Drop .sh extension to hide from shellcheck 2018-09-05 09:02:02 -07:00
43b4207101 Run oom-monitor in net/ testnets 2018-09-05 09:02:02 -07:00
ff991b87da Add support for deploying from non-Linux machines 2018-09-05 09:02:02 -07:00
c81c19234f Improve incremental speed of docker cargo builds outside of CI 2018-09-05 09:02:02 -07:00
399caf343c Morph gce_multinode-based scripts into net/ 2018-09-05 09:02:02 -07:00
ffb72136c8 Remove account from balances table after error seen (#1120)
If balance goes to 0, then bank removes the account
from it's account table and returns no account error. Thin client
should also update the account to this state or it will
still have the cached balance from the last successful get_balance().
2018-09-04 21:33:19 -07:00
1a615bde2b Update README.md (#1117)
* Update README.md

* Fix spelling

* Improved punctuation
2018-09-04 20:41:11 -07:00
cf2626a1c5 Update instructions to upgrade nightly docker image 2018-09-04 20:56:40 -06:00
68c72d6f34 Fix nightly build 2018-09-04 20:56:40 -06:00
65f78905cd Install cargo-cov on latest nightly 2018-09-04 20:56:40 -06:00
70a8ae4612 Fixed private IP variable in gcloud script (#1119) 2018-09-04 16:24:19 -07:00
d82ec2634c Fix is_leader boolean (#1115)
A node is the leader if the address is none
2018-09-04 13:38:24 -07:00
b4a7a18334 Update README.md 2018-09-04 13:29:00 -07:00
c44c5f0b09 take into account size of an Entry (#1116) 2018-09-05 05:07:58 +09:00
226d3b9471 Trace recycle() calls (#968)
* trace recycle() calls fixes #810
2018-09-05 05:07:02 +09:00
2752bde683 Print to indicate what drone is doing while waiting for gossip 2018-09-04 13:45:08 -06:00
b8816d722c Fix Block::to_blobs() benchmark
16% speedup, w00t!

name                                control  ns/iter  variable  ns/iter  diff ns/iter   diff %  speedup
bench_block_to_blobs_to_block       29,897            25,807                   -4,090  -13.68%   x 1.16
2018-09-04 07:50:23 -10:00
2aa72cc72e Return a Vec from to_blobs() instead of using a mut parameter 2018-09-04 07:50:23 -10:00
8cc030ef84 Use Vec instead of VecDeque for SharedBlobs 2018-09-04 07:50:23 -10:00
9a9f89293a Better error handling messages for airdrops 2018-09-04 06:46:43 -10:00
501deeef56 accounts should never be negative (#1083) 2018-09-04 06:43:18 -10:00
05f921d544 Don't call println in the test suite 2018-09-04 06:01:32 -10:00
ab7a2960b1 Don't use product name in solana library 2018-09-04 06:01:32 -10:00
4e2deaa33b Less mut 2018-09-04 06:01:32 -10:00
d5ef18337c Remove redundant return value
And don't log the same error twice.
2018-09-04 06:01:32 -10:00
d18ea501b7 Minimize unsafe code 2018-09-04 06:01:32 -10:00
c9a1ac9b8c Don't propogate errors we'll never handle 2018-09-04 06:01:32 -10:00
c2a4cb544e Borrow, don't clone entries 2018-09-04 06:01:32 -10:00
3ab12076e8 Convert voting functions to methods
More idiomatic Rust.
2018-09-04 05:53:58 -10:00
6a383c45fc Update sendTransaction example to reflect new array size 2018-09-04 05:44:10 -10:00
7cc27e7bd1 Doc requestAirdrop rpc method 2018-09-04 05:44:10 -10:00
0464087327 Add api definitions 2018-09-04 05:44:10 -10:00
c193c7de12 Add JSON-RPC API Documentation 2018-09-04 05:44:10 -10:00
61abee204f don't check for snap mode in common.sh, is only relevant to snap daemons (#1113)
snap mode is for daemons, remove it from client (i.e. common.sh)

supply leader info to client via snap
2018-09-04 14:31:54 +09:00
a99dbb2a0c set -x in client.sh 2018-09-04 11:55:04 +09:00
e834c76b40 --count => --num-nodes 2018-09-04 07:07:25 +09:00
7b3c7f148b supply leader and leader_address 2018-09-02 02:27:05 +09:00
fb4b33b81b make the repair_backoff test more robust (#1095)
* more the repair_backoff test more robust

* fix names and magic numbers
2018-08-31 12:40:56 -10:00
25d7dc7b96 fixups 2018-09-01 04:38:18 +09:00
d1f1cbe88f leader-address=>leader-ip 2018-09-01 04:38:18 +09:00
a4e7b6e90c more fixups for client.sh changes 2018-09-01 03:33:21 +09:00
fbc7c9c431 fix client_start to deal with new client.sh 2018-09-01 03:23:05 +09:00
8b248dcf09 specify port 2018-09-01 02:56:24 +09:00
4938aad939 fixups 2018-09-01 02:21:46 +09:00
7e882dfe62 inform all snaps where the network is 2018-09-01 02:21:46 +09:00
5c8cb96f88 rebase fixup 2018-08-31 23:21:07 +09:00
9d1eb4f9ea remove 'localhost' leader (redundant, un-dig-friendly) 2018-08-31 23:21:07 +09:00
210a4d0640 fixup 2018-08-31 23:21:07 +09:00
176e806d94 rework of netwrk rendezvous
* rename NodeInfo field of Node from "data" to "info"
      (touches a lot of files)

  * update client to use gossip to find leader, a la drone

  * rework multinode scripts
      * move more stuff into rust
      * added usage to all
      * no more rsync unless you're a validator (TODO: whack that, too)
  * fullnode doesn't bail if drone isn't up yet, just keeps trying
  * drone doesn't bail if network isn't up yet, just keeps trying
2018-08-31 23:21:07 +09:00
eb4e5a7bd0 fixups 2018-08-31 23:21:07 +09:00
ba27596076 fixups 2018-08-31 23:21:07 +09:00
63e44dcc35 continue rendezvous refactor for gossip and repair
* remove trailing whitespace in ci/audit.sh

  * code review fixups
     * rename GOSSIP_PORT_RANGE => SOLANA_PORT_RANGE
     * remove out-of-date TODO in localnet-sanity.sh

  * remove features=test and code that was using it (localhost prohibitions in
      crdt) added TODO in crdt.rs, maybe we should boot localhost in production
      networks?

  * boot tvu_window from NodeInfo: instead, send repair requests from the repair
      socket (to gossip on peer) and answer repair requests via the sockaddr
      from the repair request

  * remove various unused pub functions

  * banish SocketAddr parse().unwrap() to a macro that can also accept simpler stuff
2018-08-31 23:21:07 +09:00
c0ba676658 fixup 2018-08-31 23:21:07 +09:00
1af4cee63b fix #1079
* move gossip/NCP off assuming anything about its address
  * use a single socket to send and receive gossip
  * remove --addr/-a from CLIs
  * rearrange networking utility code
  * use Arc<UdpSocket> to share the Sync-safe UdpSocket among threads
  * rename TestNode to Node

TODO:

  * re-enable 127.0.0.1 as a valid address in crdt
  * change repair request/response to a similar, single socket
  * pick cloned sockets or Arc<UdpSocket> for all these (rpu uses tryclone())
  * update contact_info with network truthiness instead of what the node
      says?
2018-08-31 23:21:07 +09:00
cb52a335bd re-enable localnet-sanity 2018-08-31 23:21:07 +09:00
e308a4279e Update RPC requestAirdrop endpoint to return airdrop tx signature 2018-08-28 18:27:41 -06:00
513a934ff6 Update request_airdrop utility function to pass along airdrop tx signature 2018-08-28 18:27:41 -06:00
77d820c842 Update drone module to return airdrop tx signature 2018-08-28 18:27:41 -06:00
30cbe7c6a9 Update jsonrpc crate version 2018-08-28 18:27:24 -06:00
18ef643dc7 Keep locals local 2018-08-28 08:11:44 -07:00
73a0bf8d30 Avoid unbounded /var/tmp growth 2018-08-28 08:11:44 -07:00
9d53208d68 Use gcloud_DeleteInstances 2018-08-28 08:11:44 -07:00
d26f135159 Find metrics-write-datapoint.sh again 2018-08-27 22:41:58 -07:00
c8e3ce26a9 Start of scripts/gcloud.sh 2018-08-27 22:35:14 -07:00
f88970a964 source oom-score-adj.sh from validator.sh 2018-08-28 10:01:41 +09:00
51d911e3f4 Update testnet-sanity.sh 2018-08-27 15:44:10 -07:00
bd5c6158ae Move some common scripts from multinode-demo/ to scripts/ 2018-08-27 13:52:38 -07:00
cd0db7842c Remove unused _config.yml 2018-08-27 13:52:38 -07:00
31d1087103 Documentation 2018-08-27 13:52:38 -07:00
0efd64df6f no need for sudo, move ledger copy out of SNAP_DATA 2018-08-28 05:42:05 +09:00
28bdf346f6 clean up after ledger sanity 2018-08-28 05:42:05 +09:00
48762834d9 Randomize repair requests (#1059)
* randomize packet repair requests

* exponential random repair requests

* use gen_range to get a uniform distribution
2018-08-27 07:05:48 -07:00
8d0d429acd update 2018-08-26 23:34:25 -07:00
e5408368f7 fmt 2018-08-26 22:35:26 -07:00
61492fd27e exit if no leader 2018-08-26 22:35:26 -07:00
bbce08a67b bench needs to discover leader as well 2018-08-26 22:35:26 -07:00
a002148098 retry transfer and poll 2018-08-26 16:10:46 -07:00
90ae662e4d Fix packet header offset
And update transaction offsets to use the same approach as packet.rs.
Maybe this should be serialized_size(), but thanks to this
GenericArray update, those values are the same.
2018-08-26 14:27:19 -06:00
60d8f5489f Update transaction layout offsets
24 less bytes in minimal transactions. 10% TPS boost?
2018-08-26 14:27:19 -06:00
59dd8b650d Update generic-array requirement from 0.11.1 to 0.12.0
Updates the requirements on [generic-array](https://github.com/fizyk20/generic-array) to permit the latest version.
- [Release notes](https://github.com/fizyk20/generic-array/releases)
- [Changelog](https://github.com/fizyk20/generic-array/blob/master/CHANGELOG.md)
- [Commits](https://github.com/fizyk20/generic-array/commits)

Signed-off-by: dependabot[bot] <support@dependabot.com>
2018-08-26 14:27:19 -06:00
738247ad44 advertise valid gossip address in drone and wallet (#1066)
* advertize valid gossip address in drone and wallet

get rid of asserts

check for valid ip address

check for valid address

ip address

* tests

* cleanup

* cleanup

* print error

* bump

* disable tests

* disable nightly
2018-08-26 11:36:27 -07:00
5b0bb7e607 Skip invalid nodes for finality (#1068)
* skip invalid nodes for finality

* check valid last_ids only

* fixup!

* fixup!
2018-08-25 23:12:41 -07:00
f7c0d30167 Disallow localhost in deployment (#1064)
* disallow localhost in deployment

* tests

* fmt

* integration tests do not have a flag to check

* fmt
2018-08-25 21:09:18 -07:00
8e98c7c9d6 fix purge test 2018-08-25 19:56:09 -07:00
50661e7b8d Added poll_balance_with_timeout method (#1062)
* Added poll_balance_with_timeout method

- updated bench-tps, fullnode and wallet to use this method instead
  of repeatedly calling poll_get_balance()

* Address review comments

- Revert some changes to use wrapper poll_get_balance()

* Reverting bench-tps to use poll_get_balance

- The original code is checking if the balance has been updated,
  instead of just retrieving the balance. The logic is different
  than poll_balance_with_timeout()

* Reverting wallet to use poll_get_balance

- The break condition in the loop is different than poll_balance_with_timeout().
  It's checking if the balance has been updated.
2018-08-25 18:24:25 -07:00
ad159e0906 Fix crash in fullnode when poll_get_balance() returns error (#1058) 2018-08-25 15:25:13 -07:00
d3fac8a06f Dynamically bind to available UDP ports in Fullnode (#920)
* Dynamically bind to available UDP ports in Fullnode

* Added tests for dynamic port binding

- Also removed hard coding of port range from CRDT
2018-08-25 10:24:16 -07:00
c641ba1006 Up network buffers to 64MB max (#1057)
500ms of data at 1Gbps = 125GB/2 = 64MB
Seems to help tx rate in GCP network tests.
2018-08-24 18:17:48 -07:00
de379ed915 Fix sig verify counters to be unique and tweak perf counters (#1056)
print events and add current events to old value to report
2018-08-24 16:05:32 -07:00
d4554c6b78 RFC Branches, Channels, and Tags 2018-08-23 21:28:05 -07:00
6fc21a4223 Don't hang in transaction_count (#1052)
Situation is there can be that there can be bad entries in
the bench-tps CRDT table until they get purged later. Threads however
are created for those bad entries and then will hang on trying
to get the transaction_count from those bad addresses and never end.
2018-08-23 20:57:13 -07:00
71319978df Up drone request amount (#1051)
Multiple clients will request 500k each so up this to support them.
2018-08-23 15:30:35 -07:00
6147e54686 Cap repair requests timeout (#958) 2018-08-23 15:30:21 -07:00
0c8eec2563 Cleanup Fullnode construction
leader_id was already set by Fullnode constructor. And cleanup the
rest of that code while in the neighborhood.

Thanks @CriesofCarrots!
2018-08-23 13:42:54 -07:00
4ab58f069a Add back JsonRpcService changes 2018-08-23 13:42:54 -07:00
85f96d926a Pacify clippy 2018-08-23 13:42:54 -07:00
816de4f8ec Hoist shared code between leaders and validators 2018-08-23 13:42:54 -07:00
42229a1105 Hoist thread_hdls 2018-08-23 13:42:54 -07:00
d8820053af Inline create_leader_threads and create_validator_threads 2018-08-23 13:42:54 -07:00
731f8512c6 Hoist Arc<Bank> 2018-08-23 13:42:54 -07:00
a133784706 Rename mode-specific constructors and return only thread handles 2018-08-23 13:42:54 -07:00
be58fdf1bb Less constructors 2018-08-23 13:42:54 -07:00
57daeb35d2 Drop all references to new_leader and new_validator 2018-08-23 13:42:54 -07:00
9c5e69bf3d Don't offer two ways to specify a leader 2018-08-23 13:42:54 -07:00
cfac127e4c Extract lower-level constructor
Passing in the bank is useful for unit-tests since Fullnode doesn't
store it in a member variable.
2018-08-23 13:42:54 -07:00
fda4523cbf Fix broken doc 2018-08-23 13:42:54 -07:00
cabe80b129 Increment counter by number of packets received (#1049)
So that we can see the total packets/s
2018-08-23 12:32:50 -07:00
d4c41219f9 Improve gossip use for drone and wallet
- Add utility function
  - Add thread sleep
  - Enable configurable timeout for gossip poll
2018-08-23 13:08:59 -06:00
4fdd9fbfca Wallet: use gossip to identify leader's port config 2018-08-23 13:08:59 -06:00
bdf5ac9c1a Drone: use gossip to identify leader's port config 2018-08-23 13:08:59 -06:00
f1785c76a4 Rework counter increment outside apply_debits loop (#1046)
Reduces prints/atomics work inside the process_transactions loop
2018-08-23 09:42:59 -07:00
2de8fe9c5f Pass bank to rpc as reference 2018-08-23 09:06:17 -06:00
d910ed68a3 Use balance to verify requestAirdrop success 2018-08-23 09:06:17 -06:00
f7f7ecd4c6 Add json-rpc requestAirdrop endpoint 2018-08-23 09:06:17 -06:00
a9c3a28a3b Add json-rpc sendTransaction endpoint 2018-08-23 09:06:17 -06:00
96787ff4ac Use builtin sum 2018-08-22 16:24:19 -06:00
c3ed4d28de Change average TPS to max average tps seen for any node and...
add script to collect perf stats
2018-08-22 14:55:04 -07:00
f1e35c3bc6 GCE script change to use GCE private network for multinode tests (#1042)
- Also the user can specify the zone where the nodes should be created
2018-08-22 13:21:33 -07:00
db3fb3a27c Boot criterion (#1032)
* Revert benchmarks back to libtest

Criterion has too many dependencies, it's execution as slower, and
we didn't see the kind of precision we had hoped for to use it to
block CI builds.

* Ignore benchmarks that take more than a few milliseconds per iteration

* Revert "Ignore benchmarks that take more than a few milliseconds per iteration"

This reverts commit b87cdf6ef4.

* Don't run benchmarks in CI

They are already built in the nightly build. Executing them in CI
doesn't add much value until the results are precise enough to act
on.
2018-08-22 08:57:07 -06:00
8282442956 fixes #927 2018-08-22 17:47:59 +09:00
a355d9f46c Add error catch for rpc server builder 2018-08-21 14:04:52 -06:00
be4824c955 Add custom panic hook for RPC port bind 2018-08-21 14:04:52 -06:00
86c1d97c13 Fix validator rpc addr to match leader 2018-08-20 22:35:06 -07:00
0b48aea937 echo commands, use PID (good form) 2018-08-21 11:41:00 +09:00
cdec0cead2 files have to appear in the snap 2018-08-21 11:41:00 +09:00
831709ce7e fixups 2018-08-21 10:36:03 +09:00
b7b8a31532 make a copy of the ledger for sanity check
we can't verify a live ledger, unfortunately, fixes #985
2018-08-21 10:36:03 +09:00
15406545d8 Document how to adjust the number of clients or validators on the testnet 2018-08-20 18:35:01 -07:00
5aced8224f Revert "make a copy of the ledger for sanity check"
This reverts commit af20a43b77.
2018-08-21 10:34:52 +09:00
af20a43b77 make a copy of the ledger for sanity check
we can't verify a live ledger, unfortunately, fixes #985
2018-08-21 09:45:52 +09:00
39c3280860 Don't block on large network test 2018-08-20 16:48:37 -06:00
2d35345c50 Boot unused creates 2018-08-20 16:48:37 -06:00
a02910be32 Remove pubkey from getBalance response 2018-08-20 15:02:48 -07:00
b9ec97a30b Add counter for bank transaction errors (#1015) 2018-08-20 14:56:01 -07:00
2e89999d88 # This is a combination of 4 commits.
# This is the 1st commit message:

Fix tesetment readme

# This is the commit message #2:

updte

# This is the commit message #3:

typo

# This is the commit message #4:

cleanup
2018-08-20 13:49:56 -07:00
24b0031925 Reduce number of nodes in multinode test (#1003) 2018-08-20 13:40:42 -07:00
9eeaf2d502 Bind RPC port on all interfaces 2018-08-20 12:45:50 -07:00
c9e6fb36c3 Avoid unncessary cargo rebuilds in non-perf configuration 2018-08-20 12:03:44 -07:00
8de317113c clippy: remove identity conversion 2018-08-20 10:55:55 -07:00
a1ec549630 Pin nightly rust for more controlled updating 2018-08-20 10:55:55 -07:00
ecddff98f5 Add --nopull argument 2018-08-20 10:55:55 -07:00
10066d67bf Add llvm deb repository 2018-08-19 09:01:36 -07:00
a07f7435c6 \ 2018-08-19 08:49:29 -07:00
d3523ebbe5 Nightly image now derives from stable image 2018-08-19 08:47:59 -07:00
133ddb11ff typo in README 2018-08-18 18:24:42 -07:00
1bf15ae907 Temporarily disable cargo audit CI failure 2018-08-18 12:29:49 -06:00
f73f3941cd Revert ill-advised jsonrpc marker, and handle jsonrpc server close 2018-08-18 12:29:49 -06:00
d69d79612b Simplify Rpc request processing 2018-08-18 12:29:49 -06:00
64ea5126e0 Fix early return for invalid parameter 2018-08-18 12:29:49 -06:00
9df3aa50d5 Remove unnecessary solana_ prefixes 2018-08-18 12:29:49 -06:00
cab75b7829 Handle potential panics 2018-08-18 12:29:49 -06:00
d9fac86015 Use jsonrpc git repo, allowing removal of Default bound for Metadata 2018-08-18 12:29:49 -06:00
1eb8724a89 Disable Rpc module for other tests to prevent port conflicts 2018-08-18 12:29:49 -06:00
c6662a4512 Implement Rpc in Fullnode 2018-08-18 12:29:49 -06:00
d3c09b4e96 Update jsonrpc dependency syntax 2018-08-18 12:29:49 -06:00
124f6e83d2 Rpc get last id endpoint 2018-08-18 12:29:49 -06:00
569ff73b39 Rpc tests 2018-08-18 12:29:49 -06:00
fc1dbddd93 Implement json-rpc functionality 2018-08-18 12:29:49 -06:00
3ae867bdd6 fixups 2018-08-18 02:22:52 -07:00
bc5f29150b fix erasure, remove Entry "pad"
* fixes #997
 * Entry pad is no longer required since erasure coding aligns data length
2018-08-18 02:22:52 -07:00
46016b8c7e crashes generate_coding() 2018-08-18 02:22:52 -07:00
5dbecd6b6b add logging, more conservative reset 2018-08-18 02:22:52 -07:00
877920e61b Compute snap channel using ci/channel-info.sh 2018-08-17 23:15:48 -07:00
3d1e908dad Add script to fetch latest channel info 2018-08-17 23:15:48 -07:00
6880c2bef0 Exclude ci/semver_bash/; don't want to diverge from upstream 2018-08-17 23:15:48 -07:00
78872ffb4b Vendor https://github.com/cloudflare/semver_bash/tree/c1133faf0e 2018-08-17 23:15:48 -07:00
229d825fe0 Fix master-perf basename 2018-08-17 21:59:36 -07:00
edc5fc098e Make SNAP_CHANNEL more visible in build log 2018-08-17 21:39:54 -07:00
bbe815468d Add instructions on how to run the demo against testnet.solana.com and watch it on the dashboard 2018-08-17 21:26:06 -07:00
82e7725a42 Invert logic 2018-08-17 21:16:35 -07:00
dc61cf1c8d Keep v0.7 snap off the edge channel 2018-08-17 21:12:10 -07:00
aba63e2c6c Log expansion directive must be on its own line 2018-08-17 20:58:14 -07:00
c2ddd056e2 Add option to skip ledger verification 2018-08-17 20:41:30 -07:00
c9508e84f2 0.8.0 2018-08-17 17:56:35 -07:00
f6f0900506 Large network test to not poll validator for sigs (#998)
- The finality is already reached. The test will check the signature
  in validators once, instead of polling. This will help speed up the test.
2018-08-17 14:38:19 -07:00
7aeef27b99 not quite banishing build.rs, but better 2018-08-16 22:33:31 -07:00
98d0ef6df5 Add some wget retries 2018-08-16 20:22:49 -07:00
208a7f16cb Fix bench-tps nokey error 2018-08-16 19:38:26 -06:00
16cf31c3a3 fix #990 2018-08-16 15:52:30 -07:00
2b48daaeba accept multiple expected outputs 2018-08-16 14:44:51 -07:00
79d24ee227 fixed test according to @rob-solana 2018-08-16 14:44:51 -07:00
a284030ecc Account type with state
comments

fixups!

fixups!

fixups for a real Result<> from get_balance()

on 2nd thought, be more rigorous

Merge branch 'rob-solana-accounts_with_state' into accounts_with_state

update

review comments

comments

get rid of option
2018-08-16 14:44:51 -07:00
132 changed files with 7221 additions and 4222 deletions

View File

@ -41,5 +41,5 @@ else
point="job_stats,$point_tags $point_fields"
multinode-demo/metrics_write_datapoint.sh "$point" || true
scripts/metrics-write-datapoint.sh "$point" || true
fi

View File

@ -1,13 +1,27 @@
#!/bin/bash -e
[[ -n "$CARGO_TARGET_CACHE_NAME" ]] || exit 0
# Ensure the pattern "+++ ..." never occurs when |set -x| is set, as buildkite
# interprets this as the start of a log group.
# Ref: https://buildkite.com/docs/pipelines/managing-log-output
export PS4="++"
#
# Restore target/ from the previous CI build on this machine
#
(
[[ -n "$CARGO_TARGET_CACHE_NAME" ]] || (
d=$HOME/cargo-target-cache/"$CARGO_TARGET_CACHE_NAME"
if [[ -d $d ]]; then
du -hs "$d"
read -r cacheSizeInGB _ < <(du -s --block-size=1000000000 "$d")
if [[ $cacheSizeInGB -gt 5 ]]; then
echo "$d has gotten too large, removing it"
rm -rf "$d"
fi
fi
mkdir -p "$d"/target
set -x
rsync -a --delete --link-dest="$d" "$d"/target .
)

View File

@ -1,7 +1,7 @@
[package]
name = "solana"
description = "Blockchain, Rebuilt for Scale"
version = "0.7.1"
version = "0.8.0"
documentation = "https://docs.rs/solana"
homepage = "http://solana.com/"
readme = "README.md"
@ -61,24 +61,28 @@ unstable = []
ipv6 = []
cuda = []
erasure = []
test = []
[dependencies]
atty = "0.2"
bincode = "1.0.0"
bs58 = "0.2.0"
byteorder = "1.2.1"
bytes = "0.4"
chrono = { version = "0.4.0", features = ["serde"] }
clap = "2.31"
dirs = "1.0.2"
env_logger = "0.5.12"
futures = "0.1.21"
generic-array = { version = "0.11.1", default-features = false, features = ["serde"] }
generic-array = { version = "0.12.0", default-features = false, features = ["serde"] }
getopts = "0.2"
influx_db_client = "0.3.4"
jsonrpc-core = { git = "https://github.com/paritytech/jsonrpc", rev = "4b6060b" }
jsonrpc-http-server = { git = "https://github.com/paritytech/jsonrpc", rev = "4b6060b" }
jsonrpc-macros = { git = "https://github.com/paritytech/jsonrpc", rev = "4b6060b" }
itertools = "0.7.8"
libc = "0.2.1"
log = "0.4.2"
matches = "0.1.6"
nix = "0.11.0"
pnet_datalink = "0.21.0"
rand = "0.5.1"
rayon = "1.0.0"
@ -88,32 +92,23 @@ sha2 = "0.7.0"
serde = "1.0.27"
serde_derive = "1.0.27"
serde_json = "1.0.10"
socket2 = "0.3.8"
sys-info = "0.5.6"
tokio = "0.1"
tokio-codec = "0.1"
tokio-core = "0.1.17"
tokio-io = "0.1"
untrusted = "0.6.2"
[dev-dependencies]
criterion = "0.2"
[[bench]]
name = "bank"
harness = false
[[bench]]
name = "banking_stage"
harness = false
[[bench]]
name = "ledger"
harness = false
[[bench]]
name = "signature"
harness = false
[[bench]]
name = "sigverify"
harness = false

View File

@ -17,7 +17,11 @@ All claims, content, designs, algorithms, estimates, roadmaps, specifications, a
Introduction
===
It's possible for a centralized database to process 710,000 transactions per second on a standard gigabit network if the transactions are, on average, no more than 176 bytes. A centralized database can also replicate itself and maintain high availability without significantly compromising that transaction rate using the distributed system technique known as Optimistic Concurrency Control [H.T.Kung, J.T.Robinson (1981)]. At Solana, we're demonstrating that these same theoretical limits apply just as well to blockchain on an adversarial network. The key ingredient? Finding a way to share time when nodes can't trust one-another. Once nodes can trust time, suddenly ~40 years of distributed systems research becomes applicable to blockchain! Furthermore, and much to our surprise, it can implemented using a mechanism that has existed in Bitcoin since day one. The Bitcoin feature is called nLocktime and it can be used to postdate transactions using block height instead of a timestamp. As a Bitcoin client, you'd use block height instead of a timestamp if you don't trust the network. Block height turns out to be an instance of what's being called a Verifiable Delay Function in cryptography circles. It's a cryptographically secure way to say time has passed. In Solana, we use a far more granular verifiable delay function, a SHA 256 hash chain, to checkpoint the ledger and coordinate consensus. With it, we implement Optimistic Concurrency Control and are now well in route towards that theoretical limit of 710,000 transactions per second.
It's possible for a centralized database to process 710,000 transactions per second on a standard gigabit network if the transactions are, on average, no more than 176 bytes. A centralized database can also replicate itself and maintain high availability without significantly compromising that transaction rate using the distributed system technique known as Optimistic Concurrency Control [\[H.T.Kung, J.T.Robinson (1981)\]](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.65.4735). At Solana, we're demonstrating that these same theoretical limits apply just as well to blockchain on an adversarial network. The key ingredient? Finding a way to share time when nodes can't trust one-another. Once nodes can trust time, suddenly ~40 years of distributed systems research becomes applicable to blockchain!
> Perhaps the most striking difference between algorithms obtained by our method and ones based upon timeout is that using timeout produces a traditional distributed algorithm in which the processes operate asynchronously, while our method produces a globally synchronous one in which every process does the same thing at (approximately) the same time. Our method seems to contradict the whole purpose of distributed processing, which is to permit different processes to operate independently and perform different functions. However, if a distributed system is really a single system, then the processes must be synchronized in some way. Conceptually, the easiest way to synchronize processes is to get them all to do the same thing at the same time. Therefore, our method is used to implement a kernel that performs the necessary synchronization--for example, making sure that two different processes do not try to modify a file at the same time. Processes might spend only a small fraction of their time executing the synchronizing kernel; the rest of the time, they can operate independently--e.g., accessing different files. This is an approach we have advocated even when fault-tolerance is not required. The method's basic simplicity makes it easier to understand the precise properties of a system, which is crucial if one is to know just how fault-tolerant the system is. [\[L.Lamport (1984)\]](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.71.1078)
Furthermore, and much to our surprise, it can be implemented using a mechanism that has existed in Bitcoin since day one. The Bitcoin feature is called nLocktime and it can be used to postdate transactions using block height instead of a timestamp. As a Bitcoin client, you'd use block height instead of a timestamp if you don't trust the network. Block height turns out to be an instance of what's being called a Verifiable Delay Function in cryptography circles. It's a cryptographically secure way to say time has passed. In Solana, we use a far more granular verifiable delay function, a SHA 256 hash chain, to checkpoint the ledger and coordinate consensus. With it, we implement Optimistic Concurrency Control and are now well in route towards that theoretical limit of 710,000 transactions per second.
Testnet Demos
@ -58,7 +62,7 @@ your odds of success if you check out the
before proceeding:
```bash
$ git checkout v0.7.0-beta
$ git checkout v0.8.0
```
Configuration Setup
@ -92,45 +96,47 @@ Before you start a fullnode, make sure you know the IP address of the machine yo
want to be the leader for the demo, and make sure that udp ports 8000-10000 are
open on all the machines you want to test with.
Now start the server:
Now start the server in a separate shell:
```bash
$ ./multinode-demo/leader.sh
```
Wait a few seconds for the server to initialize. It will print "Ready." when it's ready to
Wait a few seconds for the server to initialize. It will print "leader ready..." when it's ready to
receive transactions. The leader will request some tokens from the drone if it doesn't have any.
The drone does not need to be running for subsequent leader starts.
Multinode Testnet
---
To run a multinode testnet, after starting a leader node, spin up some validator nodes:
To run a multinode testnet, after starting a leader node, spin up some validator nodes in
separate shells:
```bash
$ ./multinode-demo/validator.sh ubuntu@10.0.1.51:~/solana 10.0.1.51
$ ./multinode-demo/validator.sh
```
To run a performance-enhanced leader or validator (on Linux),
[CUDA 9.2](https://developer.nvidia.com/cuda-downloads) must be installed on
your system:
```bash
$ ./fetch-perf-libs.sh
$ SOLANA_CUDA=1 ./multinode-demo/leader.sh
$ SOLANA_CUDA=1 ./multinode-demo/validator.sh ubuntu@10.0.1.51:~/solana 10.0.1.51
$ SOLANA_CUDA=1 ./multinode-demo/validator.sh
```
Testnet Client Demo
---
Now that your singlenode or multinode testnet is up and running, in a separate shell, let's send it some transactions! Note we pass in
the JSON configuration file here, not the genesis ledger.
Now that your singlenode or multinode testnet is up and running let's send it
some transactions!
In a separate shell start the client:
```bash
$ ./multinode-demo/client.sh ubuntu@10.0.1.51:~/solana 2 #The leader machine and the total number of nodes in the network
$ ./multinode-demo/client.sh # runs against localhost by default
```
What just happened? The client demo spins up several threads to send 500,000 transactions
@ -142,21 +148,35 @@ demo completes after it has convinced itself the testnet won't process any addit
transactions. You should see several TPS measurements printed to the screen. In the
multinode variation, you'll see TPS measurements for each validator node as well.
Public Testnet
--------------
In this example the client connects to our public testnet. To run validators on the testnet you would need to open udp ports `8000-10000`.
```bash
$ ./multinode-demo/client.sh --network $(dig +short testnet.solana.com):8001 --identity config-private/client-id.json --duration 60
```
You can observe the effects of your client's transactions on our [dashboard](https://metrics.solana.com:3000/d/testnet/testnet-hud?orgId=2&from=now-30m&to=now&refresh=5s&var-testnet=testnet)
Linux Snap
---
A Linux [Snap](https://snapcraft.io/) is available, which can be used to
easily get Solana running on supported Linux systems without building anything
from source. The `edge` Snap channel is updated daily with the latest
development from the `master` branch. To install:
```bash
$ sudo snap install solana --edge --devmode
```
(`--devmode` flag is required only for `solana.fullnode-cuda`)
Once installed the usual Solana programs will be available as `solona.*` instead
of `solana-*`. For example, `solana.fullnode` instead of `solana-fullnode`.
Update to the latest version at any time with:
```bash
$ snap info solana
$ sudo snap refresh solana --devmode
@ -176,6 +196,7 @@ contains the latest log, and the files `*.s` (if present) contain older rotated
logs.
Disable the daemon at any time by running:
```bash
$ sudo snap set solana mode=
```
@ -184,11 +205,13 @@ Runtime configuration files for the daemon can be found in
`/var/snap/solana/current/config`.
#### Leader daemon
```bash
$ sudo snap set solana mode=leader
```
If CUDA is available:
```bash
$ sudo snap set solana mode=leader enable-cuda=1
```
@ -211,26 +234,31 @@ to port tcp:873, tcp:9900 and the port range udp:8000-udp:10000**
To run both the Leader and Drone:
```bash
$ sudo snap set solana mode=leader+drone
```
#### Validator daemon
```bash
$ sudo snap set solana mode=validator
```
If CUDA is available:
```bash
$ sudo snap set solana mode=validator enable-cuda=1
```
By default the validator will connect to **testnet.solana.com**, override
the leader IP address by running:
```bash
$ sudo snap set solana mode=validator leader-address=127.0.0.1 #<-- change IP address
```
It's assumed that the leader will be running `rsync` configured as described in
the previous **Leader daemon** section.
@ -254,9 +282,10 @@ If your rustc version is lower than 1.26.1, please update it:
$ rustup update
```
On Linux systems you may need to install libssl-dev and pkg-config. On Ubuntu:
On Linux systems you may need to install libssl-dev, pkg-config, zlib1g-dev, etc. On Ubuntu:
```bash
$ sudo apt-get install libssl-dev pkg-config
$ sudo apt-get install libssl-dev pkg-config zlib1g-dev
```
Download the source code:
@ -276,6 +305,7 @@ $ cargo test
```
To emulate all the tests that will run on a Pull Request, run:
```bash
$ ./ci/run-local.sh
```
@ -284,17 +314,21 @@ Debugging
---
There are some useful debug messages in the code, you can enable them on a per-module and per-level
basis with the normal RUST\_LOG environment variable. Run the fullnode with this syntax:
basis. Before running a leader or validator set the normal RUST\_LOG environment variable.
For example, to enable info everywhere and debug only in the solana::banking_stage module:
```bash
$ RUST_LOG=solana::streamer=debug,solana::server=info cat genesis.log | ./target/release/solana-fullnode > transactions0.log
$ export RUST_LOG=info,solana::banking_stage=debug
```
to see the debug and info sections for streamer and server respectively. Generally
we are using debug for infrequent debug messages, trace for potentially frequent messages and
info for performance-related logging.
Attaching to a running process with gdb:
Generally we are using debug for infrequent debug messages, trace for potentially frequent
messages and info for performance-related logging.
```
You can also attach to a running process with GDB. The leader's process is named
_solana-fullnode_:
```bash
$ sudo gdb
attach <PID>
set logging on
@ -318,6 +352,11 @@ Run the benchmarks:
$ cargo +nightly bench --features="unstable"
```
Release Process
---
The release process for this project is described [here](rfcs/rfc-005-branches-tags-and-channels.md).
Code coverage
---

View File

@ -1 +0,0 @@
theme: jekyll-theme-slate

View File

@ -1,18 +1,19 @@
#[macro_use]
extern crate criterion;
#![feature(test)]
extern crate bincode;
extern crate rayon;
extern crate solana;
extern crate test;
use bincode::serialize;
use criterion::{Bencher, Criterion};
use rayon::prelude::*;
use solana::bank::*;
use solana::hash::hash;
use solana::mint::Mint;
use solana::signature::{Keypair, KeypairUtil};
use solana::transaction::Transaction;
use test::Bencher;
#[bench]
fn bench_process_transaction(bencher: &mut Bencher) {
let mint = Mint::new(100_000_000);
let bank = Bank::new(&mint);
@ -39,28 +40,10 @@ fn bench_process_transaction(bencher: &mut Bencher) {
})
.collect();
bencher.iter_with_setup(
|| {
// Since benchmarker runs this multiple times, we need to clear the signatures.
bank.clear_signatures();
transactions.clone()
},
|transactions| {
let results = bank.process_transactions(transactions);
assert!(results.iter().all(Result::is_ok));
},
)
bencher.iter(|| {
// Since benchmarker runs this multiple times, we need to clear the signatures.
bank.clear_signatures();
let results = bank.process_transactions(transactions.clone());
assert!(results.iter().all(Result::is_ok));
})
}
fn bench(criterion: &mut Criterion) {
criterion.bench_function("bench_process_transaction", |bencher| {
bench_process_transaction(bencher);
});
}
criterion_group!(
name = benches;
config = Criterion::default().sample_size(2);
targets = bench
);
criterion_main!(benches);

View File

@ -1,10 +1,9 @@
#![feature(test)]
extern crate bincode;
#[macro_use]
extern crate criterion;
extern crate rayon;
extern crate solana;
extern crate test;
use criterion::{Bencher, Criterion};
use rayon::prelude::*;
use solana::bank::Bank;
use solana::banking_stage::BankingStage;
@ -16,6 +15,7 @@ use solana::transaction::Transaction;
use std::iter;
use std::sync::mpsc::{channel, Receiver};
use std::sync::Arc;
use test::Bencher;
// use self::test::Bencher;
// use bank::{Bank, MAX_ENTRY_IDS};
@ -95,6 +95,7 @@ fn check_txs(receiver: &Receiver<Signal>, ref_tx_count: usize) {
assert_eq!(total, ref_tx_count);
}
#[bench]
fn bench_banking_stage_multi_accounts(bencher: &mut Bencher) {
let tx = 10_000_usize;
let mint_total = 1_000_000_000_000;
@ -145,7 +146,6 @@ fn bench_banking_stage_multi_accounts(bencher: &mut Bencher) {
})
.collect();
let verified_setup_len = verified_setup.len();
verified_sender.send(verified_setup).unwrap();
BankingStage::process_packets(&bank, &verified_receiver, &signal_sender, &packet_recycler)
.unwrap();
@ -160,7 +160,6 @@ fn bench_banking_stage_multi_accounts(bencher: &mut Bencher) {
})
.collect();
let verified_len = verified.len();
verified_sender.send(verified).unwrap();
BankingStage::process_packets(&bank, &verified_receiver, &signal_sender, &packet_recycler)
.unwrap();
@ -169,6 +168,7 @@ fn bench_banking_stage_multi_accounts(bencher: &mut Bencher) {
});
}
#[bench]
fn bench_banking_stage_single_from(bencher: &mut Bencher) {
let tx = 10_000_usize;
let mint = Mint::new(1_000_000_000_000);
@ -203,7 +203,6 @@ fn bench_banking_stage_single_from(bencher: &mut Bencher) {
(x, iter::repeat(1).take(len).collect())
})
.collect();
let verified_len = verified.len();
verified_sender.send(verified).unwrap();
BankingStage::process_packets(&bank, &verified_receiver, &signal_sender, &packet_recycler)
.unwrap();
@ -211,19 +210,3 @@ fn bench_banking_stage_single_from(bencher: &mut Bencher) {
check_txs(&signal_receiver, tx);
});
}
fn bench(criterion: &mut Criterion) {
criterion.bench_function("bench_banking_stage_multi_accounts", |bencher| {
bench_banking_stage_multi_accounts(bencher);
});
criterion.bench_function("bench_process_stage_single_from", |bencher| {
bench_banking_stage_single_from(bencher);
});
}
criterion_group!(
name = benches;
config = Criterion::default().sample_size(2);
targets = bench
);
criterion_main!(benches);

View File

@ -1,15 +1,15 @@
#[macro_use]
extern crate criterion;
#![feature(test)]
extern crate solana;
extern crate test;
use criterion::{Bencher, Criterion};
use solana::hash::{hash, Hash};
use solana::ledger::{next_entries, reconstruct_entries_from_blobs, Block};
use solana::packet::BlobRecycler;
use solana::signature::{Keypair, KeypairUtil};
use solana::transaction::Transaction;
use std::collections::VecDeque;
use test::Bencher;
#[bench]
fn bench_block_to_blobs_to_block(bencher: &mut Bencher) {
let zero = Hash::default();
let one = hash(&zero.as_ref());
@ -20,21 +20,7 @@ fn bench_block_to_blobs_to_block(bencher: &mut Bencher) {
let blob_recycler = BlobRecycler::default();
bencher.iter(|| {
let mut blob_q = VecDeque::new();
entries.to_blobs(&blob_recycler, &mut blob_q);
assert_eq!(reconstruct_entries_from_blobs(blob_q).unwrap(), entries);
let blobs = entries.to_blobs(&blob_recycler);
assert_eq!(reconstruct_entries_from_blobs(blobs).unwrap(), entries);
});
}
fn bench(criterion: &mut Criterion) {
criterion.bench_function("bench_block_to_blobs_to_block", |bencher| {
bench_block_to_blobs_to_block(bencher);
});
}
criterion_group!(
name = benches;
config = Criterion::default().sample_size(2);
targets = bench
);
criterion_main!(benches);

View File

@ -1,24 +1,12 @@
#[macro_use]
extern crate criterion;
#![feature(test)]
extern crate solana;
extern crate test;
use criterion::{Bencher, Criterion};
use solana::signature::GenKeys;
use test::Bencher;
#[bench]
fn bench_gen_keys(b: &mut Bencher) {
let mut rnd = GenKeys::new([0u8; 32]);
b.iter(|| rnd.gen_n_keypairs(1000));
}
fn bench(criterion: &mut Criterion) {
criterion.bench_function("bench_gen_keys", |bencher| {
bench_gen_keys(bencher);
});
}
criterion_group!(
name = benches;
config = Criterion::default().sample_size(2);
targets = bench
);
criterion_main!(benches);

View File

@ -1,14 +1,15 @@
#[macro_use]
extern crate criterion;
#![feature(test)]
extern crate bincode;
extern crate rayon;
extern crate solana;
extern crate test;
use criterion::{Bencher, Criterion};
use solana::packet::{to_packets, PacketRecycler};
use solana::sigverify;
use solana::transaction::test_tx;
use test::Bencher;
#[bench]
fn bench_sigverify(bencher: &mut Bencher) {
let tx = test_tx();
@ -21,16 +22,3 @@ fn bench_sigverify(bencher: &mut Bencher) {
let _ans = sigverify::ed25519_verify(&batches);
})
}
fn bench(criterion: &mut Criterion) {
criterion.bench_function("bench_sigverify", |bencher| {
bench_sigverify(bencher);
});
}
criterion_group!(
name = benches;
config = Criterion::default().sample_size(2);
targets = bench
);
criterion_main!(benches);

View File

@ -1,15 +1,33 @@
use std::env;
use std::fs;
fn main() {
println!("cargo:rustc-link-search=native=.");
if !env::var("CARGO_FEATURE_CUDA").is_err() {
println!("cargo:rerun-if-changed=target/perf-libs");
println!("cargo:rerun-if-changed=build.rs");
// Ensure target/perf-libs/ exists. It's been observed that
// a cargo:rerun-if-changed= directive with a non-existent
// directory triggers a rebuild on every |cargo build| invocation
fs::create_dir("target/perf-libs").unwrap_or_else(|err| {
if err.kind() != std::io::ErrorKind::AlreadyExists {
panic!("Unable to create target/perf-libs: {:?}", err);
}
});
let cuda = !env::var("CARGO_FEATURE_CUDA").is_err();
let erasure = !env::var("CARGO_FEATURE_ERASURE").is_err();
if cuda || erasure {
println!("cargo:rustc-link-search=native=target/perf-libs");
}
if cuda {
println!("cargo:rustc-link-lib=static=cuda_verify_ed25519");
println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
println!("cargo:rustc-link-lib=dylib=cudart");
println!("cargo:rustc-link-lib=dylib=cuda");
println!("cargo:rustc-link-lib=dylib=cudadevrt");
}
if !env::var("CARGO_FEATURE_ERASURE").is_err() {
if erasure {
println!("cargo:rustc-link-lib=dylib=Jerasure");
println!("cargo:rustc-link-lib=dylib=gf_complete");
}

View File

@ -16,10 +16,10 @@ _() {
maybe_cargo_install() {
for cmd in "$@"; do
set +e
set +e
cargo "$cmd" --help > /dev/null 2>&1
declare exitcode=$?
set -e
set -e
if [[ $exitcode -eq 101 ]]; then
_ cargo install cargo-"$cmd"
fi
@ -29,4 +29,4 @@ maybe_cargo_install() {
maybe_cargo_install audit tree
_ cargo tree
_ cargo audit
_ cargo audit || true

View File

@ -1,18 +1,18 @@
steps:
- command: "ci/docker-run.sh solanalabs/rust ci/test-stable.sh"
- command: "ci/docker-run.sh solanalabs/rust:1.28.0 ci/test-stable.sh"
name: "stable [public]"
env:
CARGO_TARGET_CACHE_NAME: "stable"
timeout_in_minutes: 30
- command: "ci/docker-run.sh solanalabs/rust ci/test-bench.sh"
name: "bench [public]"
env:
CARGO_TARGET_CACHE_NAME: "stable"
timeout_in_minutes: 30
# - command: "ci/docker-run.sh solanalabs/rust-nightly ci/test-bench.sh"
# name: "bench [public]"
# env:
# CARGO_TARGET_CACHE_NAME: "nightly"
# timeout_in_minutes: 30
- command: "ci/shellcheck.sh"
name: "shellcheck [public]"
timeout_in_minutes: 20
- command: "ci/docker-run.sh solanalabs/rust-nightly ci/test-nightly.sh"
- command: "ci/docker-run.sh solanalabs/rust-nightly:2018-09-03 ci/test-nightly.sh || true"
name: "nightly [public]"
env:
CARGO_TARGET_CACHE_NAME: "nightly"
@ -24,8 +24,8 @@ steps:
timeout_in_minutes: 20
agents:
- "queue=cuda"
- command: "ci/test-large-network.sh"
name: "large-network [public]"
- command: "ci/test-large-network.sh || true"
name: "large-network [public] [ignored]"
env:
CARGO_TARGET_CACHE_NAME: "stable"
timeout_in_minutes: 20

91
ci/channel-info.sh Executable file
View File

@ -0,0 +1,91 @@
#!/bin/bash
#
# Computes the current branch names of the edge, beta and stable
# channels, as well as the latest tagged release for beta and stable.
#
# stdout of this script may be eval-ed
#
here="$(dirname "$0")"
# shellcheck source=ci/semver_bash/semver.sh
source "$here"/semver_bash/semver.sh
remote=https://github.com/solana-labs/solana.git
# Fetch all vX.Y.Z tags
#
# NOTE: pre-release tags are explicitly ignored
#
# shellcheck disable=SC2207
tags=( \
$(git ls-remote --tags $remote \
| cut -c52- \
| grep '^v[[:digit:]][[:digit:]]*\.[[:digit:]][[:digit:]]*.[[:digit:]][[:digit:]]*$' \
| cut -c2- \
) \
)
# Fetch all the vX.Y branches
#
# shellcheck disable=SC2207
heads=( \
$(git ls-remote --heads $remote \
| cut -c53- \
| grep '^v[[:digit:]][[:digit:]]*\.[[:digit:]][[:digit:]]*$' \
| cut -c2- \
) \
)
# Figure the beta channel by looking for the largest vX.Y branch
beta=
for head in "${heads[@]}"; do
if [[ -n $beta ]]; then
if semverLT "$head.0" "$beta.0"; then
continue
fi
fi
beta=$head
done
# Figure the stable channel by looking for the second largest vX.Y branch
stable=
for head in "${heads[@]}"; do
if [[ $head = "$beta" ]]; then
continue
fi
if [[ -n $stable ]]; then
if semverLT "$head.0" "$stable.0"; then
continue
fi
fi
stable=$head
done
for tag in "${tags[@]}"; do
if [[ -n $beta && $tag = $beta* ]]; then
if [[ -n $beta_tag ]]; then
if semverLT "$tag" "$beta_tag"; then
continue
fi
fi
beta_tag=$tag
fi
if [[ -n $stable && $tag = $stable* ]]; then
if [[ -n $stable_tag ]]; then
if semverLT "$tag" "$stable_tag"; then
continue
fi
fi
stable_tag=$tag
fi
done
echo EDGE_CHANNEL=master
echo BETA_CHANNEL="${beta:+v$beta}"
echo STABLE_CHANNEL="${stable:+v$stable}"
echo BETA_CHANNEL_LATEST_TAG="${beta_tag:+v$beta_tag}"
echo STABLE_CHANNEL_LATEST_TAG="${stable_tag:+v$stable_tag}"
exit 0

View File

@ -1,22 +1,30 @@
#!/bin/bash -e
usage() {
echo "Usage: $0 [docker image name] [command]"
echo "Usage: $0 [--nopull] [docker image name] [command]"
echo
echo Runs command in the specified docker image with
echo a CI-appropriate environment
echo a CI-appropriate environment.
echo
echo "--nopull Skip the dockerhub image update"
echo
}
cd "$(dirname "$0")/.."
NOPULL=false
if [[ $1 = --nopull ]]; then
NOPULL=true
shift
fi
IMAGE="$1"
if [[ -z "$IMAGE" ]]; then
echo Error: image not defined
exit 1
fi
docker pull "$IMAGE"
$NOPULL || docker pull "$IMAGE"
shift
ARGS=(
@ -26,9 +34,14 @@ ARGS=(
)
if [[ -n $CI ]]; then
# Share the real ~/.cargo between docker containers in CI for speed
ARGS+=(--volume "$HOME:/home")
ARGS+=(--env "CARGO_HOME=/home/.cargo")
else
# Avoid sharing ~/.cargo when building locally to avoid a mixed macOS/Linux
# ~/.cargo
ARGS+=(--volume "$PWD:/home")
fi
ARGS+=(--env "CARGO_HOME=/home/.cargo")
# kcov tries to set the personality of the binary which docker
# doesn't allow by default.

View File

@ -1,9 +1,10 @@
FROM rustlang/rust:nightly
FROM solanalabs/rust
ARG date
RUN rustup component add clippy-preview --toolchain=nightly && \
echo deb http://ftp.debian.org/debian stretch-backports main >> /etc/apt/sources.list && \
apt update && \
apt install -y \
llvm-6.0 \
&& \
rm -rf /var/lib/apt/lists/*
RUN set -x && \
rustup install nightly-$date && \
rustup default nightly-$date && \
rustup component add clippy-preview --toolchain=nightly-$date && \
rustc --version && \
cargo --version && \
cargo +nightly-$date install cargo-cov

View File

@ -1,6 +1,36 @@
Docker image containing rust nightly and some preinstalled crates used in CI.
This image may be manually updated by running `./build.sh` if you are a member
This image may be manually updated by running `CI=true ./build.sh` if you are a member
of the [Solana Labs](https://hub.docker.com/u/solanalabs/) Docker Hub
organization, but it is also automatically updated periodically by
[this automation](https://buildkite.com/solana-labs/solana-ci-docker-rust-nightly).
## Moving to a newer nightly
We pin the version of nightly (see the `ARG nightly=xyz` line in `Dockerfile`)
to avoid the build breaking at unexpected times, as occasionally nightly will
introduce breaking changes.
To update the pinned version:
1. Run `ci/docker-rust-nightly/build.sh` to rebuild the nightly image locally,
or potentially `ci/docker-rust-nightly/build.sh YYYY-MM-DD` if there's a
specific YYYY-MM-DD that is desired (default is today's build).
1. Run `SOLANA_DOCKER_RUN_NOSETUID=1 ci/docker-run.sh --nopull solanalabs/rust-nightly:YYYY-MM-DD ci/test-nightly.sh`
to confirm the new nightly image builds. Fix any issues as needed
1. Run `docker login` to enable pushing images to Docker Hub, if you're authorized.
1. Run `CI=true ci/docker-rust-nightly/build.sh YYYY-MM-DD` to push the new nightly image to dockerhub.com.
1. Modify the `solanalabs/rust-nightly:YYYY-MM-DD` reference in `ci/buildkite.yml` from the previous to
new *YYYY-MM-DD* value, send a PR with this change and any codebase adjustments needed.
## Troubleshooting
### Resource is denied
When running `CI=true ci/docker-rust-nightly/build.sh`, you see:
```
denied: requested access to the resource is denied
```
Run `docker login` to enable pushing images to Docker Hub. Contact @mvines or @garious
to get write access.

View File

@ -2,5 +2,12 @@
cd "$(dirname "$0")"
docker build -t solanalabs/rust-nightly .
docker push solanalabs/rust-nightly
nightlyDate=${1:-$(date +%Y-%m-%d)}
docker build -t solanalabs/rust-nightly:"$nightlyDate" --build-arg date="$nightlyDate" .
maybeEcho=
if [[ -z $CI ]]; then
echo "Not CI, skipping |docker push|"
maybeEcho="echo"
fi
$maybeEcho docker push solanalabs/rust-nightly:"$nightlyDate"

View File

@ -1,15 +1,23 @@
# Note: when the rust version (1.28) is changed also modify
# ci/buildkite.yml to pick up the new image tag
FROM rust:1.28
RUN apt update && \
RUN set -x && \
apt update && \
apt-get install apt-transport-https && \
echo deb https://apt.buildkite.com/buildkite-agent stable main > /etc/apt/sources.list.d/buildkite-agent.list && \
echo deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-6.0 main > /etc/apt/sources.list.d/llvm.list && \
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 32A37959C2FA5C3C99EFBC32A79206696452D198 && \
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
apt update && \
apt install -y \
buildkite-agent \
cmake \
llvm-6.0 \
rsync \
sudo \
cmake \
&& \
rustup component add rustfmt-preview && \
rm -rf /var/lib/apt/lists/*
rm -rf /var/lib/apt/lists/* && \
rustc --version && \
cargo --version

View File

@ -3,4 +3,9 @@
cd "$(dirname "$0")"
docker build -t solanalabs/rust .
read -r rustc version _ < <(docker run solanalabs/rust rustc --version)
[[ $rustc = rustc ]]
docker tag solanalabs/rust:latest solanalabs/rust:"$version"
docker push solanalabs/rust

View File

@ -40,10 +40,10 @@ echo --- Remove unused docker networks
docker network prune -f
)
echo "--- Delete /tmp files older than 1 day owned by $(whoami)"
echo "--- Delete /tmp files older than 1 day owned by $(id -un)"
(
set -x
find /tmp -maxdepth 1 -user "$(whoami)" -mtime +1 -print0 | xargs -0 rm -rf
find /tmp -maxdepth 1 -user "$(id -un)" -mtime +1 -print0 | xargs -0 rm -rf
)
echo --- Deleting stale buildkite agent build directories

View File

@ -1,32 +0,0 @@
#!/bin/bash -x
#
# Install EarlyOOM
#
[[ $(uname) = Linux ]] || exit 1
# 64 - enable signalling of processes (term, kill, oom-kill)
# TODO: This setting will not persist across reboots
sysrq=$(( $(cat /proc/sys/kernel/sysrq) | 64 ))
sudo sysctl -w kernel.sysrq=$sysrq
if command -v earlyoom; then
sudo systemctl status earlyoom
exit 0
fi
wget http://ftp.us.debian.org/debian/pool/main/e/earlyoom/earlyoom_1.1-2_amd64.deb
sudo apt install --quiet --yes ./earlyoom_1.1-2_amd64.deb
cat > earlyoom <<OOM
# use the kernel OOM killer, trigger at 20% available RAM,
EARLYOOM_ARGS="-k -m 20"
OOM
sudo cp earlyoom /etc/default/
rm earlyoom
sudo systemctl stop earlyoom
sudo systemctl enable earlyoom
sudo systemctl start earlyoom
exit 0

View File

@ -6,9 +6,9 @@
cd "$(dirname "$0")"/..
source ci/upload_ci_artifact.sh
source multinode-demo/common.sh
source scripts/configure-metrics.sh
./multinode-demo/setup.sh
multinode-demo/setup.sh
backgroundCommands="drone leader validator validator-x"
pids=()
@ -16,7 +16,7 @@ pids=()
for cmd in $backgroundCommands; do
echo "--- Start $cmd"
rm -f log-"$cmd".txt
./multinode-demo/"$cmd".sh > log-"$cmd".txt 2>&1 &
multinode-demo/"$cmd".sh > log-"$cmd".txt 2>&1 &
declare pid=$!
pids+=("$pid")
echo "pid: $pid"
@ -64,21 +64,28 @@ flag_error() {
echo "--- Wallet sanity"
(
set -x
multinode-demo/test/wallet-sanity.sh
scripts/wallet-sanity.sh
) || flag_error
echo "--- Node count"
(
source multinode-demo/common.sh
set -x
./multinode-demo/client.sh "$PWD" 3 -c --addr 127.0.0.1
client_id=/tmp/client-id.json-$$
$solana_keygen -o $client_id
$solana_bench_tps --identity $client_id --num-nodes 3 --converge-only
rm -rf $client_id
) || flag_error
killBackgroundCommands
echo "--- Ledger verification"
(
source multinode-demo/common.sh
set -x
$solana_ledger_tool --ledger "$SOLANA_CONFIG_DIR"/ledger verify
cp -R "$SOLANA_CONFIG_DIR"/ledger /tmp/ledger-$$
$solana_ledger_tool --ledger /tmp/ledger-$$ verify
rm -rf /tmp/ledger-$$
) || flag_error
echo +++

26
ci/semver_bash/LICENSE Normal file
View File

@ -0,0 +1,26 @@
Copyright (c) 2013, Ray Bejjani
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are those
of the authors and should not be interpreted as representing official policies,
either expressed or implied, of the FreeBSD Project.

31
ci/semver_bash/README.md Normal file
View File

@ -0,0 +1,31 @@
semver_bash is a bash parser for semantic versioning
====================================================
[Semantic Versioning](http://semver.org/) is a set of guidelines that help keep
version and version management sane. This is a bash based parser to help manage
a project's versions. Use it from a Makefile or any scripts you use in your
project.
Usage
-----
semver_bash can be used from the command line as:
$ ./semver.sh "3.2.1" "3.2.1-alpha"
3.2.1 -> M: 3 m:2 p:1 s:
3.2.1-alpha -> M: 3 m:2 p:1 s:-alpha
3.2.1 == 3.2.1-alpha -> 1.
3.2.1 < 3.2.1-alpha -> 1.
3.2.1 > 3.2.1-alpha -> 0.
Alternatively, you can source it from within a script:
. ./semver.sh
local MAJOR=0
local MINOR=0
local PATCH=0
local SPECIAL=""
semverParseInto "1.2.3" MAJOR MINOR PATCH SPECIAL
semverParseInto "3.2.1" MAJOR MINOR PATCH SPECIAL

130
ci/semver_bash/semver.sh Executable file
View File

@ -0,0 +1,130 @@
#!/usr/bin/env sh
function semverParseInto() {
local RE='[^0-9]*\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)\([0-9A-Za-z-]*\)'
#MAJOR
eval $2=`echo $1 | sed -e "s#$RE#\1#"`
#MINOR
eval $3=`echo $1 | sed -e "s#$RE#\2#"`
#MINOR
eval $4=`echo $1 | sed -e "s#$RE#\3#"`
#SPECIAL
eval $5=`echo $1 | sed -e "s#$RE#\4#"`
}
function semverEQ() {
local MAJOR_A=0
local MINOR_A=0
local PATCH_A=0
local SPECIAL_A=0
local MAJOR_B=0
local MINOR_B=0
local PATCH_B=0
local SPECIAL_B=0
semverParseInto $1 MAJOR_A MINOR_A PATCH_A SPECIAL_A
semverParseInto $2 MAJOR_B MINOR_B PATCH_B SPECIAL_B
if [ $MAJOR_A -ne $MAJOR_B ]; then
return 1
fi
if [ $MINOR_A -ne $MINOR_B ]; then
return 1
fi
if [ $PATCH_A -ne $PATCH_B ]; then
return 1
fi
if [[ "_$SPECIAL_A" != "_$SPECIAL_B" ]]; then
return 1
fi
return 0
}
function semverLT() {
local MAJOR_A=0
local MINOR_A=0
local PATCH_A=0
local SPECIAL_A=0
local MAJOR_B=0
local MINOR_B=0
local PATCH_B=0
local SPECIAL_B=0
semverParseInto $1 MAJOR_A MINOR_A PATCH_A SPECIAL_A
semverParseInto $2 MAJOR_B MINOR_B PATCH_B SPECIAL_B
if [ $MAJOR_A -lt $MAJOR_B ]; then
return 0
fi
if [[ $MAJOR_A -le $MAJOR_B && $MINOR_A -lt $MINOR_B ]]; then
return 0
fi
if [[ $MAJOR_A -le $MAJOR_B && $MINOR_A -le $MINOR_B && $PATCH_A -lt $PATCH_B ]]; then
return 0
fi
if [[ "_$SPECIAL_A" == "_" ]] && [[ "_$SPECIAL_B" == "_" ]] ; then
return 1
fi
if [[ "_$SPECIAL_A" == "_" ]] && [[ "_$SPECIAL_B" != "_" ]] ; then
return 1
fi
if [[ "_$SPECIAL_A" != "_" ]] && [[ "_$SPECIAL_B" == "_" ]] ; then
return 0
fi
if [[ "_$SPECIAL_A" < "_$SPECIAL_B" ]]; then
return 0
fi
return 1
}
function semverGT() {
semverEQ $1 $2
local EQ=$?
semverLT $1 $2
local LT=$?
if [ $EQ -ne 0 ] && [ $LT -ne 0 ]; then
return 0
else
return 1
fi
}
if [ "___semver.sh" == "___`basename $0`" ]; then
MAJOR=0
MINOR=0
PATCH=0
SPECIAL=""
semverParseInto $1 MAJOR MINOR PATCH SPECIAL
echo "$1 -> M: $MAJOR m:$MINOR p:$PATCH s:$SPECIAL"
semverParseInto $2 MAJOR MINOR PATCH SPECIAL
echo "$2 -> M: $MAJOR m:$MINOR p:$PATCH s:$SPECIAL"
semverEQ $1 $2
echo "$1 == $2 -> $?."
semverLT $1 $2
echo "$1 < $2 -> $?."
semverGT $1 $2
echo "$1 > $2 -> $?."
fi

151
ci/semver_bash/semver_test.sh Executable file
View File

@ -0,0 +1,151 @@
#!/usr/bin/env bash
. ./semver.sh
semverTest() {
local A=R1.3.2
local B=R2.3.2
local C=R1.4.2
local D=R1.3.3
local E=R1.3.2a
local F=R1.3.2b
local G=R1.2.3
local MAJOR=0
local MINOR=0
local PATCH=0
local SPECIAL=""
semverParseInto $A MAJOR MINOR PATCH SPECIAL
echo "$A -> M:$MAJOR m:$MINOR p:$PATCH s:$SPECIAL. Expect M:1 m:3 p:2 s:"
semverParseInto $E MAJOR MINOR PATCH SPECIAL
echo "$E -> M:$MAJOR m:$MINOR p:$PATCH s:$SPECIAL. Expect M:1 m:3 p:2 s:a"
echo "Equality comparisions"
semverEQ $A $A
echo "$A == $A -> $?. Expect 0."
semverLT $A $A
echo "$A < $A -> $?. Expect 1."
semverGT $A $A
echo "$A > $A -> $?. Expect 1."
echo "Major number comparisions"
semverEQ $A $B
echo "$A == $B -> $?. Expect 1."
semverLT $A $B
echo "$A < $B -> $?. Expect 0."
semverGT $A $B
echo "$A > $B -> $?. Expect 1."
semverEQ $B $A
echo "$B == $A -> $?. Expect 1."
semverLT $B $A
echo "$B < $A -> $?. Expect 1."
semverGT $B $A
echo "$B > $A -> $?. Expect 0."
echo "Minor number comparisions"
semverEQ $A $C
echo "$A == $C -> $?. Expect 1."
semverLT $A $C
echo "$A < $C -> $?. Expect 0."
semverGT $A $C
echo "$A > $C -> $?. Expect 1."
semverEQ $C $A
echo "$C == $A -> $?. Expect 1."
semverLT $C $A
echo "$C < $A -> $?. Expect 1."
semverGT $C $A
echo "$C > $A -> $?. Expect 0."
echo "patch number comparisions"
semverEQ $A $D
echo "$A == $D -> $?. Expect 1."
semverLT $A $D
echo "$A < $D -> $?. Expect 0."
semverGT $A $D
echo "$A > $D -> $?. Expect 1."
semverEQ $D $A
echo "$D == $A -> $?. Expect 1."
semverLT $D $A
echo "$D < $A -> $?. Expect 1."
semverGT $D $A
echo "$D > $A -> $?. Expect 0."
echo "special section vs no special comparisions"
semverEQ $A $E
echo "$A == $E -> $?. Expect 1."
semverLT $A $E
echo "$A < $E -> $?. Expect 1."
semverGT $A $E
echo "$A > $E -> $?. Expect 0."
semverEQ $E $A
echo "$E == $A -> $?. Expect 1."
semverLT $E $A
echo "$E < $A -> $?. Expect 0."
semverGT $E $A
echo "$E > $A -> $?. Expect 1."
echo "special section vs special comparisions"
semverEQ $E $F
echo "$E == $F -> $?. Expect 1."
semverLT $E $F
echo "$E < $F -> $?. Expect 0."
semverGT $E $F
echo "$E > $F -> $?. Expect 1."
semverEQ $F $E
echo "$F == $E -> $?. Expect 1."
semverLT $F $E
echo "$F < $E -> $?. Expect 1."
semverGT $F $E
echo "$F > $E -> $?. Expect 0."
echo "Minor and patch number comparisons"
semverEQ $A $G
echo "$A == $G -> $?. Expect 1."
semverLT $A $G
echo "$A < $G -> $?. Expect 1."
semverGT $A $G
echo "$A > $G -> $?. Expect 0."
semverEQ $G $A
echo "$G == $A -> $?. Expect 1."
semverLT $G $A
echo "$G < $A -> $?. Expect 0."
semverGT $G $A
echo "$G > $A -> $?. Expect 1."
}
semverTest

View File

@ -6,6 +6,7 @@ cd "$(dirname "$0")/.."
set -x
find . -name "*.sh" \
-not -regex ".*/ci/semver_bash/.*" \
-not -regex ".*/.cargo/.*" \
-not -regex ".*/node_modules/.*" \
-not -regex ".*/target/.*" \

View File

@ -7,16 +7,21 @@ if [[ -z $BUILDKITE_BRANCH ]] || ./ci/is-pr.sh; then
DRYRUN="echo"
fi
# BUILDKITE_TAG is the normal environment variable set by Buildkite. However
# when this script is run from a triggered pipeline, TRIGGERED_BUILDKITE_TAG is
# used instead of BUILDKITE_TAG (due to Buildkite limitations that prevents
# BUILDKITE_TAG from propagating through to triggered pipelines)
if [[ -z "$BUILDKITE_TAG" && -z "$TRIGGERED_BUILDKITE_TAG" ]]; then
eval "$(ci/channel-info.sh)"
if [[ $BUILDKITE_BRANCH = "$STABLE_CHANNEL" ]]; then
SNAP_CHANNEL=stable
elif [[ $BUILDKITE_BRANCH = "$EDGE_CHANNEL" ]]; then
SNAP_CHANNEL=edge
else
elif [[ $BUILDKITE_BRANCH = "$BETA_CHANNEL" ]]; then
SNAP_CHANNEL=beta
fi
if [[ -z $SNAP_CHANNEL ]]; then
echo Unable to determine channel to publish into, exiting.
exit 0
fi
if [[ -z $DRYRUN ]]; then
[[ -n $SNAPCRAFT_CREDENTIALS_KEY ]] || {
echo SNAPCRAFT_CREDENTIALS_KEY not defined
@ -39,15 +44,18 @@ set -x
echo --- checking for multilog
if [[ ! -x /usr/bin/multilog ]]; then
echo "multilog not found, install with: sudo apt-get install -y daemontools"
exit 1
if [[ -z $CI ]]; then
echo "multilog not found, install with: sudo apt-get install -y daemontools"
exit 1
fi
sudo apt-get install -y daemontools
fi
echo --- build
echo --- build: $SNAP_CHANNEL channel
snapcraft
source ci/upload_ci_artifact.sh
upload_ci_artifact solana_*.snap
echo --- publish
echo --- publish: $SNAP_CHANNEL channel
$DRYRUN snapcraft push solana_*.snap --release $SNAP_CHANNEL

View File

@ -2,7 +2,7 @@
cd "$(dirname "$0")/.."
ci/version-check.sh stable
ci/version-check.sh nightly
export RUST_BACKTRACE=1
_() {
@ -10,4 +10,4 @@ _() {
"$@"
}
_ cargo bench --verbose
_ cargo bench --features=unstable --verbose

View File

@ -12,7 +12,7 @@ fi
export RUST_BACKTRACE=1
./fetch-perf-libs.sh
export LD_LIBRARY_PATH+=:$PWD
export LD_LIBRARY_PATH=$PWD/target/perf-libs:$LD_LIBRARY_PATH
export RUST_LOG=multinode=info

View File

@ -11,8 +11,11 @@ _() {
}
_ cargo build --verbose --features unstable
_ cargo test --verbose --features unstable
_ cargo clippy -- --deny=warnings
_ cargo test --verbose --features=unstable
# TODO: Re-enable warnings-as-errors after clippy offers a way to not warn on unscoped lint names.
#_ cargo clippy -- --deny=warnings
_ cargo clippy
exit 0
@ -28,4 +31,3 @@ if [[ -z "$CODECOV_TOKEN" ]]; then
else
bash <(curl -s https://codecov.io/bash) -x 'llvm-cov-6.0 gcov'
fi

View File

@ -11,7 +11,7 @@ fi
export RUST_BACKTRACE=1
./fetch-perf-libs.sh
export LD_LIBRARY_PATH=$PWD:/usr/local/cuda/lib64
export LD_LIBRARY_PATH=$PWD/target/perf-libs:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PATH=$PATH:/usr/local/cuda/bin
_() {

View File

@ -22,4 +22,4 @@ echo --- ci/localnet-sanity.sh
USE_INSTALL=1 ci/localnet-sanity.sh
)
_ ci/audit.sh
_ ci/audit.sh || true

View File

@ -1,471 +1,113 @@
#!/bin/bash -e
#
# Deploys the Solana software running on the testnet full nodes
#
# This script must be run by a user/machine that has successfully authenticated
# with GCP and has sufficient permission.
#
here=$(dirname "$0")
metrics_write_datapoint="$here"/../multinode-demo/metrics_write_datapoint.sh
# TODO: Switch over to rolling updates
ROLLING_UPDATE=false
#ROLLING_UPDATE=true
cd "$(dirname "$0")"/..
if [[ -z $SOLANA_METRICS_CONFIG ]]; then
echo Error: SOLANA_METRICS_CONFIG environment variable is unset
exit 1
fi
zone=
leaderAddress=
clientNodeCount=0
validatorNodeCount=10
publicNetwork=false
snapChannel=edge
delete=false
# Default to edge channel. To select the beta channel:
# export SOLANA_SNAP_CHANNEL=beta
if [[ -z $SOLANA_SNAP_CHANNEL ]]; then
SOLANA_SNAP_CHANNEL=edge
fi
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
usage: $0 [name] [zone] [options...]
# Select default network URL based on SOLANA_SNAP_CHANNEL if SOLANA_NET_ENTRYPOINT is
# unspecified
if [[ -z $SOLANA_NET_ENTRYPOINT ]]; then
case $SOLANA_SNAP_CHANNEL in
edge)
SOLANA_NET_ENTRYPOINT=master.testnet.solana.com
unset SOLANA_NET_NAME
Deploys a CD testnet
name - name of the network
zone - GCE to deploy the network into
options:
-s edge|beta|stable - Deploy the specified Snap release channel
(default: $snapChannel)
-n [number] - Number of validator nodes (default: $validatorNodeCount)
-c [number] - Number of client nodes (default: $clientNodeCount)
-P - Use public network IP addresses (default: $publicNetwork)
-a [address] - Set the leader node's external IP address to this GCE address
-d - Delete the network
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
metrics
EOF
exit $exitcode
}
netName=$1
zone=$2
[[ -n $netName ]] || usage
[[ -n $zone ]] || usage "Zone not specified"
shift 2
while getopts "h?p:Pn:c:s:a:d" opt; do
case $opt in
h | \?)
usage
;;
beta)
SOLANA_NET_ENTRYPOINT=testnet.solana.com
unset SOLANA_NET_NAME
P)
publicNetwork=true
;;
n)
validatorNodeCount=$OPTARG
;;
c)
clientNodeCount=$OPTARG
;;
s)
case $OPTARG in
edge|beta|stable)
snapChannel=$OPTARG
;;
*)
usage "Invalid snap channel: $OPTARG"
;;
esac
;;
a)
leaderAddress=$OPTARG
;;
d)
delete=true
;;
*)
echo Error: Unknown SOLANA_SNAP_CHANNEL=$SOLANA_SNAP_CHANNEL
exit 1
usage "Error: unhandled option: $opt"
;;
esac
done
gce_create_args=(
-a "$leaderAddress"
-c "$clientNodeCount"
-n "$validatorNodeCount"
-g
-p "$netName"
-z "$zone"
)
if $publicNetwork; then
gce_create_args+=(-P)
fi
if [[ -z $SOLANA_NET_NAME ]]; then
SOLANA_NET_NAME=${SOLANA_NET_ENTRYPOINT//./-}
set -x
echo --- gce.sh delete
time net/gce.sh delete -p "$netName"
if $delete; then
exit 0
fi
: ${SOLANA_NET_NAME:?$SOLANA_NET_ENTRYPOINT}
netBasename=${SOLANA_NET_NAME/-*/}
if [[ $netBasename != testnet ]]; then
netBasename="testnet-$netBasename"
fi
echo --- gce.sh create
time net/gce.sh create "${gce_create_args[@]}"
net/init-metrics.sh -e
# Figure installation command
SNAP_INSTALL_CMD="\
for i in {1..3}; do \
sudo snap install solana --$SOLANA_SNAP_CHANNEL --devmode && break;
sleep 1; \
done \
"
LOCAL_SNAP=$1
if [[ -n $LOCAL_SNAP ]]; then
if [[ ! -f $LOCAL_SNAP ]]; then
echo "Error: $LOCAL_SNAP is not a file"
exit 1
fi
SNAP_INSTALL_CMD="sudo snap install ~/solana_local.snap --devmode --dangerous"
fi
SNAP_INSTALL_CMD="sudo snap remove solana; $SNAP_INSTALL_CMD"
EARLYOOM_INSTALL_CMD="\
wget -O install-earlyoom.sh https://raw.githubusercontent.com/solana-labs/solana/master/ci/install-earlyoom.sh; \
bash install-earlyoom.sh \
"
SNAP_INSTALL_CMD="$EARLYOOM_INSTALL_CMD; $SNAP_INSTALL_CMD"
# `export SKIP_INSTALL=1` to reset the network without reinstalling the snap
if [[ -n $SKIP_INSTALL ]]; then
SNAP_INSTALL_CMD="echo Install skipped"
fi
echo "+++ Configuration for $netBasename"
publicUrl="$SOLANA_NET_ENTRYPOINT"
if [[ $publicUrl = testnet.solana.com ]]; then
publicIp="" # Use default value
else
publicIp=$(dig +short $publicUrl | head -n1)
fi
echo "Network name: $SOLANA_NET_NAME"
echo "Network entry point URL: $publicUrl ($publicIp)"
echo "Snap channel: $SOLANA_SNAP_CHANNEL"
echo "Install command: $SNAP_INSTALL_CMD"
echo "Setup args: $SOLANA_SETUP_ARGS"
[[ -z $LOCAL_SNAP ]] || echo "Local snap: $LOCAL_SNAP"
vmlist=() # Each array element is formatted as "class:vmName:vmZone:vmPublicIp"
vm_exec() {
declare vmName=$1
declare vmZone=$2
declare vmPublicIp=$3
declare message=$4
declare cmd=$5
echo "--- $message $vmName in zone $vmZone ($vmPublicIp)"
ssh -o BatchMode=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
testnet-deploy@"$vmPublicIp" "$cmd"
}
#
# vm_foreach [cmd] [extra args to cmd]
# where
# cmd - the command to execute on each VM
# The command will receive three fixed arguments, followed by any
# additionl arguments supplied to vm_foreach:
# vmName - GCP name of the VM
# vmZone - The GCP zone the VM is located in
# vmPublicIp - The public IP address of this VM
# vmClass - The 'class' of this VM
# count - Monotonically increasing count for each
# invocation of cmd, starting at 1
# ... - Extra args to cmd..
#
#
vm_foreach() {
declare cmd=$1
shift
declare count=1
for info in "${vmlist[@]}"; do
declare vmClass vmName vmZone vmPublicIp
IFS=: read -r vmClass vmName vmZone vmPublicIp < <(echo "$info")
eval "$cmd" "$vmName" "$vmZone" "$vmPublicIp" "$vmClass" "$count" "$@"
count=$((count + 1))
done
}
#
# vm_foreach_in_class [class] [cmd]
# where
# class - the desired VM class to operate on
# cmd - the command to execute on each VM in the desired class.
# The command will receive three arguments:
# vmName - GCP name of the VM
# vmZone - The GCP zone the VM is located in
# vmPublicIp - The public IP address of this VM
# count - Monotonically increasing count for each
# invocation of cmd, starting at 1
#
#
_run_cmd_if_class() {
declare vmName=$1
declare vmZone=$2
declare vmPublicIp=$3
declare vmClass=$4
declare count=$5
declare class=$6
declare cmd=$7
if [[ $class = "$vmClass" ]]; then
eval "$cmd" "$vmName" "$vmZone" "$vmPublicIp" "$count"
fi
}
vm_foreach_in_class() {
declare class=$1
declare cmd=$2
vm_foreach _run_cmd_if_class "$1" "$2"
}
#
# Load all VMs matching the specified filter and tag them with the specified
# class into the `vmlist` array.
findVms() {
declare class="$1"
declare filter="$2"
gcloud compute instances list --filter="$filter"
while read -r vmName vmZone vmPublicIp status; do
if [[ $status != RUNNING ]]; then
echo "Warning: $vmName is not RUNNING, ignoring it."
continue
fi
vmlist+=("$class:$vmName:$vmZone:$vmPublicIp")
done < <(gcloud compute instances list \
--filter="$filter" \
--format 'value(name,zone,networkInterfaces[0].accessConfigs[0].natIP,status)')
}
wait_for_pids() {
echo "--- Waiting for $*"
for pid in "${pids[@]}"; do
declare ok=true
wait "$pid" || ok=false
cat "log-$pid.txt"
if ! $ok; then
echo ^^^ +++
exit 1
fi
rm "log-$pid.txt"
done
}
delete_unreachable_validators() {
declare vmName=$1
declare vmZone=$2
declare vmPublicIp=$3
touch "log-$vmName.txt"
(
SECONDS=0
if ! vm_exec "$vmName" "$vmZone" "$vmPublicIp" "Checking $vmName" uptime; then
echo "^^^ +++"
# Validators are managed by a Compute Engine Instance Group, so deleting
# one will just cause a new one to be spawned.
echo "Warning: $vmName is unreachable, deleting it"
gcloud compute instances delete "$vmName" --zone "$vmZone"
fi
echo "validator checked in ${SECONDS} seconds"
) >> "log-$vmName.txt" 2>&1 &
declare pid=$!
# Rename log file so it can be discovered later by $pid
mv "log-$vmName.txt" "log-$pid.txt"
pids+=("$pid")
}
echo "Validator nodes (unverified):"
findVms validator "name~^$SOLANA_NET_NAME-validator-"
pids=()
vm_foreach_in_class validator delete_unreachable_validators
wait_for_pids validator sanity check
vmlist=()
echo "Leader node:"
findVms leader "name=$SOLANA_NET_NAME"
[[ ${#vmlist[@]} = 1 ]] || {
echo "Unable to find $SOLANA_NET_NAME"
exit 1
}
echo "Client node(s):"
findVms client "name~^$SOLANA_NET_NAME-client"
echo "Validator nodes:"
findVms validator "name~^$SOLANA_NET_NAME-validator-"
fullnode_count=0
inc_fullnode_count() {
fullnode_count=$((fullnode_count + 1))
}
vm_foreach_in_class leader inc_fullnode_count
vm_foreach_in_class validator inc_fullnode_count
# Add "network stopping" datapoint
$metrics_write_datapoint "testnet-deploy,name=$netBasename stop=1"
client_start() {
declare vmName=$1
declare vmZone=$2
declare vmPublicIp=$3
declare count=$4
vm_exec "$vmName" "$vmZone" "$vmPublicIp" \
"Starting client $count:" \
"\
set -x;
snap info solana; \
sudo snap get solana; \
threadCount=\$(nproc); \
if [[ \$threadCount -gt 4 ]]; then threadCount=4; fi; \
tmux kill-session -t solana; \
tmux new -s solana -d \" \
set -x; \
sudo rm /tmp/solana.log; \
while : ; do \
/snap/bin/solana.bench-tps $SOLANA_NET_ENTRYPOINT $fullnode_count --loop -s 600 --sustained -t \$threadCount 2>&1 | tee -a /tmp/solana.log; \
echo 'https://metrics.solana.com:8086/write?db=${INFLUX_DATABASE}&u=${INFLUX_USERNAME}&p=${INFLUX_PASSWORD}' \
| xargs curl --max-time 5 -XPOST --data-binary 'testnet-deploy,name=$netBasename clientexit=1'; \
echo Error: bench-tps should never exit | tee -a /tmp/solana.log; \
done; \
bash \
\"; \
sleep 2; \
tmux capture-pane -t solana -p -S -100; \
tail /tmp/solana.log; \
"
}
client_stop() {
declare vmName=$1
declare vmZone=$2
declare vmPublicIp=$3
declare count=$4
touch "log-$vmName.txt"
(
SECONDS=0
vm_exec "$vmName" "$vmZone" "$vmPublicIp" \
"Stopping client $vmName ($count):" \
"\
set -x;
tmux list-sessions; \
tmux capture-pane -t solana -p; \
tmux kill-session -t solana; \
$SNAP_INSTALL_CMD; \
sudo snap set solana metrics-config=$SOLANA_METRICS_CONFIG \
rust-log=$RUST_LOG \
default-metrics-rate=$SOLANA_DEFAULT_METRICS_RATE \
; \
"
echo "Client stopped in ${SECONDS} seconds"
) >> "log-$vmName.txt" 2>&1 &
declare pid=$!
# Rename log file so it can be discovered later by $pid
mv "log-$vmName.txt" "log-$pid.txt"
pids+=("$pid")
}
fullnode_start() {
declare class=$1
declare vmName=$2
declare vmZone=$3
declare vmPublicIp=$4
declare count=$5
touch "log-$vmName.txt"
(
SECONDS=0
commonNodeConfig="\
rust-log=$RUST_LOG \
default-metrics-rate=$SOLANA_DEFAULT_METRICS_RATE \
metrics-config=$SOLANA_METRICS_CONFIG \
setup-args=$SOLANA_SETUP_ARGS \
"
if [[ $class = leader ]]; then
nodeConfig="mode=leader+drone $commonNodeConfig"
if [[ -n $SOLANA_CUDA ]]; then
nodeConfig="$nodeConfig enable-cuda=1"
fi
else
nodeConfig="mode=validator leader-address=$publicIp $commonNodeConfig"
fi
vm_exec "$vmName" "$vmZone" "$vmPublicIp" "Starting $class $count:" \
"\
set -ex; \
logmarker='solana deploy $(date)/$RANDOM'; \
logger \"\$logmarker\"; \
$SNAP_INSTALL_CMD; \
sudo snap set solana $nodeConfig; \
snap info solana; \
sudo snap get solana; \
echo Slight delay to get more syslog output; \
sleep 2; \
sudo grep -Pzo \"\$logmarker(.|\\n)*\" /var/log/syslog \
"
echo "Succeeded in ${SECONDS} seconds"
) >> "log-$vmName.txt" 2>&1 &
declare pid=$!
# Rename log file so it can be discovered later by $pid
mv "log-$vmName.txt" "log-$pid.txt"
pids+=("$pid")
}
leader_start() {
fullnode_start leader "$@"
}
validator_start() {
fullnode_start validator "$@"
}
fullnode_stop() {
declare vmName=$1
declare vmZone=$2
declare vmPublicIp=$3
declare count=$4
touch "log-$vmName.txt"
(
SECONDS=0
# Try to ping the machine first. When a machine (validator) is restarted,
# there can be a delay between when the instance is reported as RUNNING and when
# it's reachable over the network
timeout 30s bash -c "set -o pipefail; until ping -c 3 $vmPublicIp | tr - _; do echo .; done"
vm_exec "$vmName" "$vmZone" "$vmPublicIp" "Shutting down" "\
if snap list solana; then \
sudo snap set solana mode=; \
fi"
echo "Succeeded in ${SECONDS} seconds"
) >> "log-$vmName.txt" 2>&1 &
declare pid=$!
# Rename log file so it can be discovered later by $pid
mv "log-$vmName.txt" "log-$pid.txt"
pids+=("$pid")
}
if [[ -n $LOCAL_SNAP ]]; then
echo "--- Transferring $LOCAL_SNAP to node(s)"
transfer_local_snap() {
declare vmName=$1
declare vmZone=$2
declare vmPublicIp=$3
declare vmClass=$4
declare count=$5
echo "--- $vmName in zone $vmZone ($count)"
SECONDS=0
scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
"$LOCAL_SNAP" testnet-deploy@"$vmPublicIp":solana_local.snap
echo "Succeeded in ${SECONDS} seconds"
}
vm_foreach transfer_local_snap
fi
echo "--- Stopping client node(s)"
pids=()
vm_foreach_in_class client client_stop
client_stop_pids=("${pids[@]}")
if ! $ROLLING_UPDATE; then
pids=()
echo "--- Shutting down all full nodes"
vm_foreach_in_class leader fullnode_stop
vm_foreach_in_class validator fullnode_stop
wait_for_pids fullnode shutdown
fi
pids=()
echo --- Starting leader node
vm_foreach_in_class leader leader_start
wait_for_pids leader
pids=()
echo --- Starting validator nodes
vm_foreach_in_class validator validator_start
wait_for_pids validators
echo "--- $publicUrl sanity test"
if [[ -z $CI ]]; then
# TODO: ssh into a node and run testnet-sanity.sh there. It's not safe to
# assume the correct Snap is installed on the current non-CI machine
echo Skipped for non-CI deploy
snapVersion=unknown
else
(
set -x
USE_SNAP=1 ci/testnet-sanity.sh $publicUrl $fullnode_count
)
IFS=\ read -r _ snapVersion _ < <(snap info solana | grep "^installed:")
snapVersion=${snapVersion/0+git./}
fi
pids=("${client_stop_pids[@]}")
wait_for_pids client shutdown
vm_foreach_in_class client client_start
# Add "network started" datapoint
$metrics_write_datapoint "testnet-deploy,name=$netBasename start=1,version=\"$snapVersion\""
echo --- net.sh start
time net/net.sh start -s "$snapChannel"
exit 0

View File

@ -1,66 +1,36 @@
#!/bin/bash -e
#
# Perform a quick sanity test on the specific testnet
#
cd "$(dirname "$0")/.."
source multinode-demo/common.sh
NET_URL=$1
if [[ -z $NET_URL ]]; then
NET_URL=testnet.solana.com
fi
EXPECTED_NODE_COUNT=$2
if [[ -z $EXPECTED_NODE_COUNT ]]; then
EXPECTED_NODE_COUNT=50
fi
echo "--- $NET_URL: verify ledger"
if [[ -d /var/snap/solana/current/config/ledger ]]; then
# Note: here we assume this script is actually running on the leader node...
sudo solana.ledger-tool --ledger /var/snap/solana/current/config/ledger verify
else
echo "^^^ +++"
echo "Ledger verify skipped"
fi
echo "--- $NET_URL: wallet sanity"
(
set -x
multinode-demo/test/wallet-sanity.sh $NET_URL
)
echo "--- $NET_URL: node count"
if [[ -n "$USE_SNAP" ]]; then
# TODO: Merge client.sh functionality into solana-bench-tps proper and
# remove this USE_SNAP case
cmd=$solana_bench_tps
else
cmd=multinode-demo/client.sh
fi
(
set -x
$cmd $NET_URL $EXPECTED_NODE_COUNT -c
)
echo "--- $NET_URL: validator sanity"
if [[ -z $NO_VALIDATOR_SANITY ]]; then
(
./multinode-demo/setup.sh -t validator
set -e pipefail
timeout 10s ./multinode-demo/validator.sh "$NET_URL" 2>&1 | tee validator.log
)
wc -l validator.log
if grep -C100 panic validator.log; then
echo "^^^ +++ Panic observed"
exit 1
else
echo "Validator log looks ok"
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
else
echo "^^^ +++ Validator sanity disabled (NO_VALIDATOR_SANITY defined)"
fi
cat <<EOF
usage: $0 [name]
Sanity check a CD testnet
name - name of the network
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
metrics
EOF
exit $exitcode
}
netName=$1
[[ -n $netName ]] || usage ""
set -x
echo --- gce.sh config
net/gce.sh config -p "$netName"
net/init-metrics.sh -e
echo --- net.sh sanity
net/net.sh sanity \
${NO_LEDGER_VERIFY:+-o noLedgerVerify} \
${NO_VALIDATOR_SANITY:+-o noValidatorSanity} \
exit 0

178
doc/json-rpc.md Normal file
View File

@ -0,0 +1,178 @@
Solana JSON RPC API
===
Solana nodes accept HTTP requests using the [JSON-RPC 2.0](https://www.jsonrpc.org/specification) specification.
To interact with a Solana node inside a JavaScript application, use the [solana-web3.js](https://github.com/solana-labs/solana-web3.js) library, which gives a convenient interface for the RPC methods.
RPC Endpoint
---
**Default port:** 8899
eg. http://localhost:8899, http://192.168.1.88:8899
Methods
---
* [confirmTransaction](#confirmtransaction)
* [getAddress](#getaddress)
* [getBalance](#getbalance)
* [getLastId](#getlastid)
* [getTransactionCount](#gettransactioncount)
* [requestAirdrop](#requestairdrop)
* [sendTransaction](#sendtransaction)
Request Formatting
---
To make a JSON-RPC request, send an HTTP POST request with a `Content-Type: application/json` header. The JSON request data should contain 4 fields:
* `jsonrpc`, set to `"2.0"`
* `id`, a unique client-generated identifying integer
* `method`, a string containing the method to be invoked
* `params`, a JSON array of ordered parameter values
Example using curl:
```bash
curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0", "id":1, "method":"getBalance", "params":["83astBRguLMdt2h5U1Tpdq5tjFoJ6noeGwaY3mDLVcri"]}' 192.168.1.88:8899
```
The response output will be a JSON object with the following fields:
* `jsonrpc`, matching the request specification
* `id`, matching the request identifier
* `result`, requested data or success confirmation
Requests can be sent in batches by sending an array of JSON-RPC request objects as the data for a single POST.
Definitions
---
* Hash: A SHA-256 hash of a chunk of data.
* Pubkey: The public key of a Ed25519 key-pair.
* Signature: An Ed25519 signature of a chunk of data.
* Transaction: A Solana instruction signed by a client key-pair.
JSON RPC API Reference
---
### confirmTransaction
Returns a transaction receipt
##### Parameters:
* `string` - Signature of Transaction to confirm, as base-58 encoded string
##### Results:
* `boolean` - Transaction status, true if Transaction is confirmed
##### Example:
```bash
// Request
curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0", "id":1, "method":"confirmTransaction", "params":["5VERv8NMvzbJMEkV8xnrLkEaWRtSz9CosKDYjCJjBRnbJLgp8uirBgmQpjKhoR4tjF3ZpRzrFmBV6UjKdiSZkQUW"]}' http://localhost:8899
// Result
{"jsonrpc":"2.0","result":true,"id":1}
```
---
### getBalance
Returns the balance of the account of provided Pubkey
##### Parameters:
* `string` - Pubkey of account to query, as base-58 encoded string
##### Results:
* `integer` - quantity, as a signed 64-bit integer
##### Example:
```bash
// Request
curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0", "id":1, "method":"getBalance", "params":["83astBRguLMdt2h5U1Tpdq5tjFoJ6noeGwaY3mDLVcri"]}' http://localhost:8899
// Result
{"jsonrpc":"2.0","result":0,"id":1}
```
---
### getLastId
Returns the last entry ID from the ledger
##### Parameters:
None
##### Results:
* `string` - the ID of last entry, a Hash as base-58 encoded string
##### Example:
```bash
// Request
curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "method":"getLastId"}' http://localhost:8899
// Result
{"jsonrpc":"2.0","result":"GH7ome3EiwEr7tu9JuTh2dpYWBJK3z69Xm1ZE3MEE6JC","id":1}
```
---
### getTransactionCount
Returns the current Transaction count from the ledger
##### Parameters:
None
##### Results:
* `integer` - count, as unsigned 64-bit integer
##### Example:
```bash
// Request
curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "method":"getTransactionCount"}' http://localhost:8899
// Result
{"jsonrpc":"2.0","result":268,"id":1}
```
---
### requestAirdrop
Requests an airdrop of tokens to a Pubkey
##### Parameters:
* `string` - Pubkey of account to receive tokens, as base-58 encoded string
* `integer` - token quantity, as a signed 64-bit integer
##### Results:
* `string` - Transaction Signature of airdrop, as base-58 encoded string
##### Example:
```bash
// Request
curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "method":"requestAirdrop", "params":["83astBRguLMdt2h5U1Tpdq5tjFoJ6noeGwaY3mDLVcri", 50]}' http://localhost:8899
// Result
{"jsonrpc":"2.0","result":"5VERv8NMvzbJMEkV8xnrLkEaWRtSz9CosKDYjCJjBRnbJLgp8uirBgmQpjKhoR4tjF3ZpRzrFmBV6UjKdiSZkQUW","id":1}
```
---
### sendTransaction
Creates new transaction
##### Parameters:
* `array` - array of octets containing a fully-signed Transaction
##### Results:
* `string` - Transaction Signature, as base-58 encoded string
##### Example:
```bash
// Request
curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "method":"sendTransaction", "params":[[61, 98, 55, 49, 15, 187, 41, 215, 176, 49, 234, 229, 228, 77, 129, 221, 239, 88, 145, 227, 81, 158, 223, 123, 14, 229, 235, 247, 191, 115, 199, 71, 121, 17, 32, 67, 63, 209, 239, 160, 161, 2, 94, 105, 48, 159, 235, 235, 93, 98, 172, 97, 63, 197, 160, 164, 192, 20, 92, 111, 57, 145, 251, 6, 40, 240, 124, 194, 149, 155, 16, 138, 31, 113, 119, 101, 212, 128, 103, 78, 191, 80, 182, 234, 216, 21, 121, 243, 35, 100, 122, 68, 47, 57, 13, 39, 0, 0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 40, 240, 124, 194, 149, 155, 16, 138, 31, 113, 119, 101, 212, 128, 103, 78, 191, 80, 182, 234, 216, 21, 121, 243, 35, 100, 122, 68, 47, 57, 11, 12, 106, 49, 74, 226, 201, 16, 161, 192, 28, 84, 124, 97, 190, 201, 171, 186, 6, 18, 70, 142, 89, 185, 176, 154, 115, 61, 26, 163, 77, 1, 88, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]}' http://localhost:8899
// Result
{"jsonrpc":"2.0","result":"2EBVM6cB8vAAD93Ktr6Vd8p67XPbQzCJX47MpReuiCXJAtcjaxpvWpcg9Ege1Nr5Tk3a2GFrByT7WPBjdsTycY9b","id":1}
```
---

View File

@ -33,3 +33,12 @@ variable `RAYON_NUM_THREADS=<xx>`
## How can I test a change on the testnet?
Currently, a merged PR is the only way to test a change on the testnet.
## Adjusting the number of clients or validators on the testnet
1. Go to the [GCP Instance Group](https://console.cloud.google.com/compute/instanceGroups/list?project=principal-lane-200702) tab
2. Find the client or validator instance group you'd like to adjust
3. Edit it (pencil icon), change the "Number of instances", then click "Save" button
4. Refresh until the change to number of instances has been executed
5. Click the "New Build" button on the [testnet-deploy](https://buildkite.com/solana-labs/testnet-deploy/)
buildkite job to initiate a redeploy of the network with the updated instance count.

View File

@ -10,28 +10,30 @@ if [[ $(uname -m) != x86_64 ]]; then
exit 1
fi
mkdir -p target/perf-libs
(
set -x
curl -o solana-perf.tgz \
https://solana-perf.s3.amazonaws.com/master/x86_64-unknown-linux-gnu/solana-perf.tgz
tar zxvf solana-perf.tgz
)
cd target/perf-libs
(
set -x
curl https://solana-perf.s3.amazonaws.com/master/x86_64-unknown-linux-gnu/solana-perf.tgz | tar zxvf -
)
if [[ -r /usr/local/cuda/version.txt && -r cuda-version.txt ]]; then
if ! diff /usr/local/cuda/version.txt cuda-version.txt > /dev/null; then
if [[ -r /usr/local/cuda/version.txt && -r cuda-version.txt ]]; then
if ! diff /usr/local/cuda/version.txt cuda-version.txt > /dev/null; then
echo ==============================================
echo Warning: possible CUDA version mismatch
echo
echo "Expected version: $(cat cuda-version.txt)"
echo "Detected version: $(cat /usr/local/cuda/version.txt)"
echo ==============================================
fi
else
echo ==============================================
echo Warning: possible CUDA version mismatch
echo
echo "Expected version: $(cat cuda-version.txt)"
echo "Detected version: $(cat /usr/local/cuda/version.txt)"
echo Warning: unable to validate CUDA version
echo ==============================================
fi
else
echo ==============================================
echo Warning: unable to validate CUDA version
echo ==============================================
fi
echo "Downloaded solana-perf version: $(cat solana-perf-HEAD.txt)"
echo "Downloaded solana-perf version: $(cat solana-perf-HEAD.txt)"
)
exit 0

View File

@ -1,66 +1,25 @@
#!/bin/bash -e
#
USAGE=" usage: $0 [leader_url] [num_nodes] [--loop] [extra args]
leader_url URL to the leader (defaults to ..)
num_nodes Minimum number of nodes to look for while converging
--loop Add this flag to cause the program to loop infinitely
\"extra args\" Any additional arguments are pass along to solana-bench-tps
"
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
leader=$1
if [[ -n $leader ]]; then
if [[ $leader == "-h" || $leader == "--help" ]]; then
echo "$USAGE"
exit 0
usage() {
if [[ -n $1 ]]; then
echo "$*"
echo
fi
shift
echo "usage: $0 [extra args]"
echo
echo " Run bench-tps "
echo
echo " extra args: additional arguments are pass along to solana-bench-tps"
echo
exit 1
}
if [[ -z $1 ]]; then # default behavior
$solana_bench_tps --identity config-private/client-id.json --network 127.0.0.1:8001 --duration 90
else
if [[ -d "$SNAP" ]]; then
leader=testnet.solana.com # Default to testnet when running as a Snap
else
leader=$here/.. # Default to local solana repo
fi
$solana_bench_tps "$@"
fi
count=$1
if [[ -n $count ]]; then
shift
else
count=1
fi
loop=
if [[ $1 = --loop ]]; then
loop=1
shift
fi
rsync_leader_url=$(rsync_url "$leader")
(
set -x
mkdir -p "$SOLANA_CONFIG_CLIENT_DIR"
$rsync -vPz "$rsync_leader_url"/config/leader.json "$SOLANA_CONFIG_CLIENT_DIR"/
client_json="$SOLANA_CONFIG_CLIENT_DIR"/client.json
[[ -r $client_json ]] || $solana_keygen -o "$client_json"
)
iteration=0
set -x
while true; do
$solana_bench_tps \
-n "$count" \
-l "$SOLANA_CONFIG_CLIENT_DIR"/leader.json \
-k "$SOLANA_CONFIG_CLIENT_DIR"/client.json \
"$@"
[[ -n $loop ]] || exit 0
iteration=$((iteration + 1))
echo ------------------------------------------------------------------------
echo "Iteration: $iteration"
echo ------------------------------------------------------------------------
done

View File

@ -1,7 +1,11 @@
# |source| this file
#
# Disable complaints about unused variables in this file:
# Common utilities shared by other scripts in this directory
#
# The following directive disable complaints about unused variables in this
# file:
# shellcheck disable=2034
#
rsync=rsync
leader_logger="cat"
@ -41,12 +45,6 @@ if [[ -d $SNAP ]]; then # Running inside a Linux Snap?
# 0700
mkdir -p "$SNAP_DATA"/{drone,leader,validator}
SOLANA_METRICS_CONFIG="$(snapctl get metrics-config)"
SOLANA_DEFAULT_METRICS_RATE="$(snapctl get default-metrics-rate)"
export SOLANA_DEFAULT_METRICS_RATE
SOLANA_CUDA="$(snapctl get enable-cuda)"
RUST_LOG="$(snapctl get rust-log)"
elif [[ -n $USE_SNAP ]]; then # Use the Linux Snap binaries
solana_program() {
declare program="$1"
@ -80,7 +78,7 @@ else
fi
# Locate perf libs downloaded by |./fetch-perf-libs.sh|
LD_LIBRARY_PATH=$(cd "$here" && dirname "$PWD"):$LD_LIBRARY_PATH
LD_LIBRARY_PATH=$(cd "$here" && dirname "$PWD"/target/perf-libs):$LD_LIBRARY_PATH
export LD_LIBRARY_PATH
fi
fi
@ -98,50 +96,8 @@ solana_ledger_tool=$(solana_program ledger-tool)
export RUST_LOG=${RUST_LOG:-solana=info} # if RUST_LOG is unset, default to info
export RUST_BACKTRACE=1
# The SOLANA_METRICS_CONFIG environment variable is formatted as a
# comma-delimited list of parameters. All parameters are optional.
#
# Example:
# export SOLANA_METRICS_CONFIG="host=<metrics host>,db=<database name>,u=<username>,p=<password>"
#
configure_metrics() {
[[ -n $SOLANA_METRICS_CONFIG ]] || return 0
declare metrics_params
IFS=',' read -r -a metrics_params <<< "$SOLANA_METRICS_CONFIG"
for param in "${metrics_params[@]}"; do
IFS='=' read -r -a pair <<< "$param"
if [[ ${#pair[@]} != 2 ]]; then
echo Error: invalid metrics parameter: "$param" >&2
else
declare name="${pair[0]}"
declare value="${pair[1]}"
case "$name" in
host)
export INFLUX_HOST="$value"
echo INFLUX_HOST="$INFLUX_HOST" >&2
;;
db)
export INFLUX_DATABASE="$value"
echo INFLUX_DATABASE="$INFLUX_DATABASE" >&2
;;
u)
export INFLUX_USERNAME="$value"
echo INFLUX_USERNAME="$INFLUX_USERNAME" >&2
;;
p)
export INFLUX_PASSWORD="$value"
echo INFLUX_PASSWORD="********" >&2
;;
*)
echo Error: Unknown metrics parameter name: "$name" >&2
;;
esac
fi
done
}
configure_metrics
# shellcheck source=scripts/configure-metrics.sh
source "$(dirname "${BASH_SOURCE[0]}")"/../scripts/configure-metrics.sh
tune_networking() {
# Skip in CI
@ -154,7 +110,7 @@ tune_networking() {
# test the existence of the sysctls before trying to set them
# go ahead and return true and don't exit if these calls fail
sysctl net.core.rmem_max 2>/dev/null 1>/dev/null &&
sudo sysctl -w net.core.rmem_max=26214400 1>/dev/null 2>/dev/null
sudo sysctl -w net.core.rmem_max=67108864 1>/dev/null 2>/dev/null
sysctl net.core.rmem_default 2>/dev/null 1>/dev/null &&
sudo sysctl -w net.core.rmem_default=26214400 1>/dev/null 2>/dev/null
@ -173,20 +129,6 @@ tune_networking() {
fi
}
oom_score_adj() {
declare pid=$1
declare score=$2
if [[ $(uname) != Linux ]]; then
return
fi
echo "$score" > "/proc/$pid/oom_score_adj" || true
declare currentScore
currentScore=$(cat "/proc/$pid/oom_score_adj" || true)
if [[ $score != "$currentScore" ]]; then
echo "Failed to set oom_score_adj to $score for pid $pid (current score: $currentScore)"
fi
}
SOLANA_CONFIG_DIR=${SNAP_DATA:-$PWD}/config
SOLANA_CONFIG_PRIVATE_DIR=${SNAP_DATA:-$PWD}/config-private
@ -211,3 +153,50 @@ rsync_url() { # adds the 'rsync://` prefix to URLs that need it
# Default to rsync:// URL
echo "rsync://$url"
}
# called from drone, validator, client
find_leader() {
declare leader leader_address
declare shift=0
if [[ -d $SNAP ]]; then
if [[ -n $1 ]]; then
usage "Error: unexpected parameter: $1"
fi
# Select leader from the Snap configuration
leader_ip=$(snapctl get leader-ip)
if [[ -z $leader_ip ]]; then
leader=testnet.solana.com
leader_ip=$(dig +short "${leader%:*}" | head -n1)
if [[ -z $leader_ip ]]; then
usage "Error: unable to resolve IP address for $leader"
fi
fi
leader=$leader_ip
leader_address=$leader_ip:8001
else
if [[ -z $1 ]]; then
leader=${here}/.. # Default to local tree for rsync
leader_address=127.0.0.1:8001 # Default to local leader
elif [[ -z $2 ]]; then
leader=$1
declare leader_ip
leader_ip=$(dig +short "${leader%:*}" | head -n1)
if [[ -z $leader_ip ]]; then
usage "Error: unable to resolve IP address for $leader"
fi
leader_address=$leader_ip:8001
shift=1
else
leader=$1
leader_address=$2
shift=2
fi
fi
echo "$leader" "$leader_address" "$shift"
}

View File

@ -1,28 +1,26 @@
#!/bin/bash
#
# usage: $0 <rsync network path to solana repo on leader machine>
# Starts an instance of solana-drone
#
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
SOLANA_CONFIG_DIR="$SOLANA_CONFIG_DIR"-drone
if [[ -d "$SNAP" ]]; then
# Exit if mode is not yet configured
# (typically the case after the Snap is first installed)
[[ -n "$(snapctl get mode)" ]] || exit 0
# Select leader from the Snap configuration
leader_address="$(snapctl get leader-address)"
if [[ -z "$leader_address" ]]; then
# Assume drone is running on the same node as the leader by default
leader_address="localhost"
usage() {
if [[ -n $1 ]]; then
echo "$*"
echo
fi
leader="$leader_address"
else
leader=${1:-${here}/..} # Default to local tree for data
fi
echo "usage: $0 [network entry point]"
echo
echo " Run an airdrop drone for the specified network"
echo
exit 1
}
read -r _ leader_address shift < <(find_leader "${@:1:1}")
shift "$shift"
[[ -f "$SOLANA_CONFIG_PRIVATE_DIR"/mint.json ]] || {
echo "$SOLANA_CONFIG_PRIVATE_DIR/mint.json not found, create it by running:"
@ -31,16 +29,12 @@ fi
exit 1
}
rsync_leader_url=$(rsync_url "$leader")
set -ex
mkdir -p "$SOLANA_CONFIG_DIR"
$rsync -vPz "$rsync_leader_url"/config/leader.json "$SOLANA_CONFIG_DIR"/
trap 'kill "$pid" && wait "$pid"' INT TERM
$solana_drone \
-l "$SOLANA_CONFIG_DIR"/leader.json -k "$SOLANA_CONFIG_PRIVATE_DIR"/mint.json \
--keypair "$SOLANA_CONFIG_PRIVATE_DIR"/mint.json \
--network "$leader_address" \
> >($drone_logger) 2>&1 &
pid=$!
oom_score_adj "$pid" 1000
wait "$pid"

View File

@ -1,80 +0,0 @@
#!/bin/bash
command=$1
prefix=
num_nodes=
out_file=
image_name="ubuntu-16-04-cuda-9-2-new"
shift
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
usage: $0 <create|delete> <-p prefix> <-n num_nodes> <-o file> [-i image-name]
Manage a GCE multinode network
create|delete - Create or delete the network
-p prefix - A common prefix for node names, to avoid collision
-n num_nodes - Number of nodes
-o out_file - Used for create option. Outputs an array of IP addresses
of new nodes to the file
-i image_name - Existing image on GCE (default $image_name)
EOF
exit $exitcode
}
while getopts "h?p:i:n:o:" opt; do
case $opt in
h | \?)
usage
;;
p)
prefix=$OPTARG
;;
i)
image_name=$OPTARG
;;
o)
out_file=$OPTARG
;;
n)
num_nodes=$OPTARG
;;
*)
usage "Error: unhandled option: $opt"
;;
esac
done
set -e
[[ -n $command ]] || usage "Need a command (create|delete)"
[[ -n $prefix ]] || usage "Need a prefix for GCE instance names"
[[ -n $num_nodes ]] || usage "Need number of nodes"
nodes=()
for i in $(seq 1 "$num_nodes"); do
nodes+=("$prefix$i")
done
if [[ $command == "create" ]]; then
[[ -n $out_file ]] || usage "Need an outfile to store IP Addresses"
ip_addr_list=$(gcloud beta compute instances create "${nodes[@]}" --zone=us-west1-b --tags=testnet \
--image="$image_name" | awk '/RUNNING/ {print $5}')
echo "ip_addr_array=($ip_addr_list)" >"$out_file"
elif [[ $command == "delete" ]]; then
gcloud beta compute instances delete "${nodes[@]}"
else
usage "Unknown command: $command"
fi

View File

@ -1,9 +1,15 @@
#!/bin/bash
#
# Starts a leader node
#
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
# shellcheck source=scripts/oom-score-adj.sh
source "$here"/../scripts/oom-score-adj.sh
if [[ -d "$SNAP" ]]; then
# Exit if mode is not yet configured
# (typically the case after the Snap is first installed)

View File

@ -1,14 +0,0 @@
#!/bin/bash -e
[[ -n $FORCE ]] || exit
chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
PATH="$HOME"/.cargo/bin:"$PATH"
./fetch-perf-libs.sh
# Run setup
USE_INSTALL=1 ./multinode-demo/setup.sh -p
USE_INSTALL=1 ./multinode-demo/drone.sh >drone.log 2>&1 &
USE_INSTALL=1 SOLANA_CUDA=1 ./multinode-demo/leader.sh >leader.log 2>&1 &

View File

@ -1,185 +0,0 @@
#!/bin/bash
command=$1
ip_addr_file=
remote_user=
ssh_keys=
shift
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
usage: $0 <start|stop> <-f IP Addr Array file> <-u username> [-k ssh-keys]
Manage a GCE multinode network
start|stop - Create or delete the network
-f file - A bash script that exports an array of IP addresses, ip_addr_array.
Elements of the array are public IP address of remote nodes.
-u username - The username for logging into remote nodes.
-k ssh-keys - Path to public/private key pair that remote nodes can use to perform
rsync and ssh among themselves. Must contain pub, and priv keys.
EOF
exit $exitcode
}
while getopts "h?f:u:k:" opt; do
case $opt in
h | \?)
usage
;;
f)
ip_addr_file=$OPTARG
;;
u)
remote_user=$OPTARG
;;
k)
ssh_keys=$OPTARG
;;
*)
usage "Error: unhandled option: $opt"
;;
esac
done
set -e
# Sample IP Address array file contents
# ip_addr_array=(192.168.1.1 192.168.1.5 192.168.2.2)
[[ -n $command ]] || usage "Need a command (start|stop)"
[[ -n $ip_addr_file ]] || usage "Need a file with IP address array"
[[ -n $remote_user ]] || usage "Need the username for remote nodes"
ip_addr_array=()
# Get IP address array
# shellcheck source=/dev/null
source "$ip_addr_file"
build_project() {
echo "Build started at $(date)"
SECONDS=0
# Build and install locally
PATH="$HOME"/.cargo/bin:"$PATH"
cargo install --force
echo "Build took $SECONDS seconds"
}
common_start_setup() {
ip_addr=$1
# Killing sshguard for now. TODO: Find a better solution
# sshguard is blacklisting IP address after ssh-keyscan and ssh login attempts
ssh "$remote_user@$ip_addr" " \
set -ex; \
sudo service sshguard stop; \
sudo apt-get --assume-yes install rsync libssl-dev; \
mkdir -p ~/.ssh ~/solana ~/.cargo/bin; \
" >log/"$ip_addr".log
# If provided, deploy SSH keys
if [[ -n $ssh_keys ]]; then
{
rsync -vPrz "$ssh_keys"/id_rsa "$remote_user@$ip_addr":~/.ssh/
rsync -vPrz "$ssh_keys"/id_rsa.pub "$remote_user@$ip_addr":~/.ssh/
rsync -vPrz "$ssh_keys"/id_rsa.pub "$remote_user@$ip_addr":~/.ssh/authorized_keys
rsync -vPrz ./multinode-demo "$remote_user@$ip_addr":~/solana/
} >>log/"$ip_addr".log
fi
}
start_leader() {
common_start_setup "$1"
{
rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/
rsync -vPrz ./fetch-perf-libs.sh "$remote_user@$ip_addr":~/solana/
ssh -n -f "$remote_user@$ip_addr" 'cd solana; FORCE=1 ./multinode-demo/remote_leader.sh'
} >>log/"$1".log
leader_ip=$1
leader_time=$SECONDS
SECONDS=0
}
start_validator() {
common_start_setup "$1"
ssh -n -f "$remote_user@$ip_addr" "cd solana; FORCE=1 ./multinode-demo/remote_validator.sh $leader_ip" >>log/"$1".log
}
start_all_nodes() {
echo "Deployment started at $(date)"
SECONDS=0
count=0
leader_ip=
leader_time=
mkdir -p log
for ip_addr in "${ip_addr_array[@]}"; do
if ((!count)); then
# Start the leader on the first node
echo "Leader node $ip_addr, killing previous instance and restarting"
start_leader "$ip_addr"
else
# Start validator on all other nodes
echo "Validator[$count] node $ip_addr, killing previous instance and restarting"
start_validator "$ip_addr" &
# TBD: Remove the sleep or reduce time once GCP login quota is increased
sleep 2
fi
((count = count + 1))
done
wait
((validator_count = count - 1))
echo "Deployment finished at $(date)"
echo "Leader deployment too $leader_time seconds"
echo "$validator_count Validator deployment took $SECONDS seconds"
}
stop_all_nodes() {
SECONDS=0
local count=0
for ip_addr in "${ip_addr_array[@]}"; do
ssh-keygen -R "$ip_addr" >log/local.log
ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts 2>/dev/null
echo "Stopping node[$count] $ip_addr. Remote user $remote_user"
ssh -n -f "$remote_user@$ip_addr" " \
set -ex; \
sudo service sshguard stop; \
pkill -9 solana-; \
pkill -9 validator; \
pkill -9 leader; \
"
sleep 2
((count = count + 1))
echo "Stopped node[$count] $ip_addr"
done
echo "Stopping $count nodes took $SECONDS seconds"
}
if [[ $command == "start" ]]; then
build_project
stop_all_nodes
start_all_nodes
elif [[ $command == "stop" ]]; then
stop_all_nodes
else
usage "Unknown command: $command"
fi

View File

@ -1,17 +0,0 @@
#!/bin/bash -e
[[ -n $FORCE ]] || exit
chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
PATH="$HOME"/.cargo/bin:"$PATH"
touch ~/.ssh/known_hosts
ssh-keygen -R "$1" 2>/dev/null
ssh-keyscan "$1" >>~/.ssh/known_hosts 2>/dev/null
rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/
# Run setup
USE_INSTALL=1 ./multinode-demo/setup.sh -p
USE_INSTALL=1 ./multinode-demo/validator.sh "$1":~/solana "$1" >validator.log 2>&1

View File

@ -1,4 +1,7 @@
#!/bin/bash
#
# Creates a fullnode configuration
#
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
@ -31,6 +34,7 @@ ip_address_arg=-l
num_tokens=1000000000
node_type_leader=true
node_type_validator=true
node_type_client=true
while getopts "h?n:lpt:" opt; do
case $opt in
h|\?)
@ -52,10 +56,17 @@ while getopts "h?n:lpt:" opt; do
leader)
node_type_leader=true
node_type_validator=false
node_type_client=false
;;
validator)
node_type_leader=false
node_type_validator=true
node_type_client=false
;;
client)
node_type_leader=false
node_type_validator=false
node_type_client=true
;;
*)
usage "Error: unknown node type: $node_type"
@ -69,25 +80,27 @@ while getopts "h?n:lpt:" opt; do
done
leader_address_args=("$ip_address_arg")
validator_address_args=("$ip_address_arg" -b 9000)
leader_id_path="$SOLANA_CONFIG_PRIVATE_DIR"/leader-id.json
validator_id_path="$SOLANA_CONFIG_PRIVATE_DIR"/validator-id.json
mint_path="$SOLANA_CONFIG_PRIVATE_DIR"/mint.json
set -e
for i in "$SOLANA_CONFIG_DIR" "$SOLANA_CONFIG_PRIVATE_DIR" "$SOLANA_CONFIG_VALIDATOR_DIR"; do
for i in "$SOLANA_CONFIG_DIR" "$SOLANA_CONFIG_VALIDATOR_DIR" "$SOLANA_CONFIG_PRIVATE_DIR"; do
echo "Cleaning $i"
rm -rvf "$i"
mkdir -p "$i"
done
$solana_keygen -o "$leader_id_path"
$solana_keygen -o "$validator_id_path"
if $node_type_client; then
client_id_path="$SOLANA_CONFIG_PRIVATE_DIR"/client-id.json
$solana_keygen -o "$client_id_path"
ls -lhR "$SOLANA_CONFIG_PRIVATE_DIR"/
fi
if $node_type_leader; then
leader_address_args=("$ip_address_arg")
leader_id_path="$SOLANA_CONFIG_PRIVATE_DIR"/leader-id.json
mint_path="$SOLANA_CONFIG_PRIVATE_DIR"/mint.json
$solana_keygen -o "$leader_id_path"
echo "Creating $mint_path with $num_tokens tokens"
$solana_keygen -o "$mint_path"
@ -96,15 +109,20 @@ if $node_type_leader; then
echo "Creating $SOLANA_CONFIG_DIR/leader.json"
$solana_fullnode_config --keypair="$leader_id_path" "${leader_address_args[@]}" > "$SOLANA_CONFIG_DIR"/leader.json
ls -lhR "$SOLANA_CONFIG_DIR"/
ls -lhR "$SOLANA_CONFIG_PRIVATE_DIR"/
fi
if $node_type_validator; then
validator_address_args=("$ip_address_arg" -b 9000)
validator_id_path="$SOLANA_CONFIG_PRIVATE_DIR"/validator-id.json
$solana_keygen -o "$validator_id_path"
echo "Creating $SOLANA_CONFIG_VALIDATOR_DIR/validator.json"
$solana_fullnode_config --keypair="$validator_id_path" "${validator_address_args[@]}" > "$SOLANA_CONFIG_VALIDATOR_DIR"/validator.json
fi
ls -lhR "$SOLANA_CONFIG_DIR"/
if $node_type_leader; then
ls -lhR "$SOLANA_CONFIG_PRIVATE_DIR"
ls -lhR "$SOLANA_CONFIG_VALIDATOR_DIR"/
fi

View File

@ -1,4 +1,8 @@
#!/bin/bash
#
# Start a dynamically-configured validator node
#
here=$(dirname "$0")
exec "$here"/validator.sh -x "$@"

View File

@ -1,16 +1,31 @@
#!/bin/bash
#
# Start a validator node
#
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
# shellcheck source=scripts/oom-score-adj.sh
source "$here"/../scripts/oom-score-adj.sh
if [[ -d "$SNAP" ]]; then
# Exit if mode is not yet configured
# (typically the case after the Snap is first installed)
[[ -n "$(snapctl get mode)" ]] || exit 0
fi
usage() {
if [[ -n $1 ]]; then
echo "$*"
echo
fi
echo "usage: $0 [-x] [rsync network path to solana repo on leader machine] [network ip address of leader]"
echo ""
echo " -x: runs a new, dynamically-configured validator"
echo "usage: $0 [-x] [rsync network path to leader] [network entry point]"
echo
echo " Start a validator on the specified network"
echo
echo " -x: runs a new, dynamically-configured validator"
echo
exit 1
}
@ -29,34 +44,8 @@ if [[ -n $3 ]]; then
usage
fi
if [[ -d $SNAP ]]; then
# Exit if mode is not yet configured
# (typically the case after the Snap is first installed)
[[ -n $(snapctl get mode) ]] || exit 0
# Select leader from the Snap configuration
leader_address=$(snapctl get leader-address)
if [[ -z $leader_address ]]; then
# Assume public testnet by default
leader_address=35.227.93.37 # testnet.solana.com
fi
leader=$leader_address
else
if [[ -z $1 ]]; then
leader=${1:-${here}/..} # Default to local tree for data
leader_address=${2:-127.0.0.1} # Default to local leader
elif [[ -z $2 ]]; then
leader=$1
leader_address=$(dig +short "${leader%:*}" | head -n1)
if [[ -z $leader_address ]]; then
usage "Error: unable to resolve IP address for $leader"
fi
else
leader=$1
leader_address=$2
fi
fi
leader_port=8001
read -r leader leader_address shift < <(find_leader "${@:1:2}")
shift "$shift"
if [[ -n $SOLANA_CUDA ]]; then
program=$solana_fullnode_cuda
@ -103,7 +92,7 @@ $rsync -vPr "$rsync_leader_url"/config/ "$SOLANA_LEADER_CONFIG_DIR"
trap 'kill "$pid" && wait "$pid"' INT TERM
$program \
--identity "$validator_json_path" \
--testnet "$leader_address:$leader_port" \
--network "$leader_address" \
--ledger "$SOLANA_LEADER_CONFIG_DIR"/ledger \
> >($validator_logger) 2>&1 &
pid=$!

View File

@ -1,5 +1,7 @@
#!/bin/bash
#
# Runs solana-wallet against the specified network
#
# usage: $0 <rsync network path to solana repo on leader machine>"
#
@ -7,6 +9,9 @@ here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
# shellcheck source=scripts/oom-score-adj.sh
source "$here"/../scripts/oom-score-adj.sh
# if $1 isn't host:path, something.com, or a valid local path
if [[ ${1%:} != "$1" || "$1" =~ [^.]\.[^.] || -d $1 ]]; then
leader=$1 # interpret
@ -42,4 +47,4 @@ fi
# shellcheck disable=SC2086 # $solana_wallet should not be quoted
exec $solana_wallet \
-l "$SOLANA_CONFIG_CLIENT_DIR"/leader.json -k "$client_id_path" "$@"
-l "$SOLANA_CONFIG_CLIENT_DIR"/leader.json -k "$client_id_path" --timeout 10 "$@"

2
net/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/config/
/log/

66
net/README.md Normal file
View File

@ -0,0 +1,66 @@
# Network Management
This directory contains scripts useful for working with a test network. It's
intended to be both dev and CD friendly.
### User Account Prerequisites
Log in to GCP with:
```bash
$ gcloud auth login
```
Also ensure that `$(whoami)` is the name of an InfluxDB user account with enough
access to create a new database.
## Quick Start
```bash
$ cd net/
$ ./gce.sh create -n 5 -c 1 #<-- Create a GCE testnet with 5 validators, 1 client (billing starts here)
$ ./init-metrics.sh $(whoami) #<-- Configure a metrics database for the testnet
$ ./net.sh start #<-- Deploy the network from the local workspace
$ ./ssh.sh #<-- Details on how to ssh into any testnet node
$ ./gce.sh delete #<-- Dispose of the network (billing stops here)
```
## Tips
### Running the network over public IP addresses
By default private IP addresses are used with all instances in the same
availability zone to avoid GCE network engress charges. However to run the
network over public IP addresses:
```bash
$ ./gce.sh create -P ...
```
### Deploying a Snap-based network
To deploy the latest pre-built `edge` channel Snap (ie, latest from the `master`
branch), once the testnet has been created run:
```bash
$ ./net.sh start -s edge
```
### Enabling CUDA
First ensure the network instances are created with GPU enabled:
```bash
$ ./gce.sh create -g ...
```
If deploying a Snap-based network nothing further is required, as GPU presence
is detected at runtime and the CUDA build is auto selected.
If deploying a locally-built network, first run `./fetch-perf-libs.sh` then
ensure the `cuda` feature is specified at network start:
```bash
$ ./net.sh start -f "cuda,erasure"
```
### How to interact with a CD testnet deployed by ci/testnet-deploy.sh
Taking **master-testnet-solana-com** as an example, configure your workspace for
the testnet using:
```
$ ./gce.sh config -p master-testnet-solana-com
$ ./ssh.sh # <-- Details on how to ssh into any testnet node
```

58
net/common.sh Normal file
View File

@ -0,0 +1,58 @@
# |source| this file
#
# Common utilities shared by other scripts in this directory
#
# The following directive disable complaints about unused variables in this
# file:
# shellcheck disable=2034
#
netDir=$(
cd "$(dirname "${BASH_SOURCE[0]}")" || exit
echo "$PWD"
)
netConfigDir="$netDir"/config
netLogDir="$netDir"/log
mkdir -p "$netConfigDir" "$netLogDir"
# shellcheck source=scripts/configure-metrics.sh
source "$(dirname "${BASH_SOURCE[0]}")"/../scripts/configure-metrics.sh
configFile="$netConfigDir/config"
entrypointIp=
publicNetwork=
leaderIp=
netBasename=
sshPrivateKey=
clientIpList=()
sshOptions=()
validatorIpList=()
buildSshOptions() {
sshOptions=(
-o "BatchMode=yes"
-o "StrictHostKeyChecking=no"
-o "UserKnownHostsFile=/dev/null"
-o "User=solana"
-o "IdentityFile=$sshPrivateKey"
-o "LogLevel=ERROR"
-F /dev/null
)
}
loadConfigFile() {
[[ -r $configFile ]] || usage "Config file unreadable: $configFile"
# shellcheck source=/dev/null
source "$configFile"
[[ -n "$entrypointIp" ]] || usage "Config file invalid, entrypointIp unspecified: $configFile"
[[ -n "$publicNetwork" ]] || usage "Config file invalid, publicNetwork unspecified: $configFile"
[[ -n "$leaderIp" ]] || usage "Config file invalid, leaderIp unspecified: $configFile"
[[ -n "$netBasename" ]] || usage "Config file invalid, netBasename unspecified: $configFile"
[[ -n $sshPrivateKey ]] || usage "Config file invalid, sshPrivateKey unspecified: $configFile"
[[ ${#validatorIpList[@]} -gt 0 ]] || usage "Config file invalid, validatorIpList unspecified: $configFile"
buildSshOptions
configureMetrics
}

336
net/gce.sh Executable file
View File

@ -0,0 +1,336 @@
#!/bin/bash -e
here=$(dirname "$0")
# shellcheck source=net/scripts/gcloud.sh
source "$here"/scripts/gcloud.sh
# shellcheck source=net/common.sh
source "$here"/common.sh
prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
validatorNodeCount=5
clientNodeCount=1
leaderBootDiskSize=1TB
leaderMachineType=n1-standard-16
leaderAccelerator=
validatorMachineType=n1-standard-4
validatorBootDiskSize=$leaderBootDiskSize
validatorAccelerator=
clientMachineType=n1-standard-16
clientBootDiskSize=40GB
clientAccelerator=
imageName="ubuntu-16-04-cuda-9-2-new"
publicNetwork=false
zone="us-west1-b"
leaderAddress=
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
usage: $0 [create|config|delete] [common options] [command-specific options]
Configure a GCE-based testnet
create - create a new testnet (implies 'config')
config - configure the testnet and write a config file describing it
delete - delete the testnet
common options:
-p [prefix] - Optional common prefix for instance names to avoid
collisions (default: $prefix)
create-specific options:
-n [number] - Number of validator nodes (default: $validatorNodeCount)
-c [number] - Number of client nodes (default: $clientNodeCount)
-P - Use public network IP addresses (default: $publicNetwork)
-z [zone] - GCP Zone for the nodes (default: $zone)
-i [imageName] - Existing image on GCE (default: $imageName)
-g - Enable GPU
-a [address] - Set the leader node's external IP address to this GCE address
config-specific options:
none
delete-specific options:
none
EOF
exit $exitcode
}
command=$1
[[ -n $command ]] || usage
shift
[[ $command = create || $command = config || $command = delete ]] || usage "Invalid command: $command"
while getopts "h?p:Pi:n:c:z:ga:" opt; do
case $opt in
h | \?)
usage
;;
p)
[[ ${OPTARG//[^A-Za-z0-9-]/} == "$OPTARG" ]] || usage "Invalid prefix: \"$OPTARG\", alphanumeric only"
prefix=$OPTARG
;;
P)
publicNetwork=true
;;
i)
imageName=$OPTARG
;;
n)
validatorNodeCount=$OPTARG
;;
c)
clientNodeCount=$OPTARG
;;
z)
zone=$OPTARG
;;
g)
leaderAccelerator="count=4,type=nvidia-tesla-k80"
;;
a)
leaderAddress=$OPTARG
;;
*)
usage "Error: unhandled option: $opt"
;;
esac
done
shift $((OPTIND - 1))
[[ -z $1 ]] || usage "Unexpected argument: $1"
sshPrivateKey="$netConfigDir/id_$prefix"
prepareInstancesAndWriteConfigFile() {
$metricsWriteDatapoint "testnet-deploy net-config-begin=1"
cat >> "$configFile" <<EOF
# autogenerated at $(date)
netBasename=$prefix
publicNetwork=$publicNetwork
sshPrivateKey=$sshPrivateKey
EOF
buildSshOptions
recordInstanceIp() {
declare name="$1"
declare publicIp="$3"
declare privateIp="$4"
declare arrayName="$6"
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
if [[ $arrayName = "leaderIp" ]]; then
if $publicNetwork; then
echo "entrypointIp=$publicIp" >> "$configFile"
else
echo "entrypointIp=$privateIp" >> "$configFile"
fi
fi
}
waitForStartupComplete() {
declare name="$1"
declare publicIp="$3"
echo "Waiting for $name to finish booting..."
(
for i in $(seq 1 30); do
if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.gce-startup-complete"); then
break
fi
sleep 2
echo "Retry $i..."
done
)
}
echo "Looking for leader instance..."
gcloud_FindInstances "name=$prefix-leader" show
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to find leader"
exit 1
}
echo "Fetching $sshPrivateKey from $leaderName"
(
rm -rf "$sshPrivateKey"{,pub}
declare leaderName
declare leaderZone
declare leaderIp
IFS=: read -r leaderName leaderZone leaderIp _ < <(echo "${instances[0]}")
set -x
# Try to ping the machine first. There can be a delay between when the
# instance is reported as RUNNING and when it's reachable over the network
timeout 30s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done"
# Try to scp in a couple times, sshd may not yet be up even though the
# machine can be pinged...
set -o pipefail
for i in $(seq 1 10); do
if gcloud compute scp --zone "$leaderZone" \
"$leaderName:/solana-id_ecdsa" "$sshPrivateKey"; then
break
fi
sleep 1
echo "Retry $i..."
done
chmod 400 "$sshPrivateKey"
)
echo "leaderIp=()" >> "$configFile"
gcloud_ForEachInstance recordInstanceIp leaderIp
gcloud_ForEachInstance waitForStartupComplete
echo "Looking for validator instances..."
gcloud_FindInstances "name~^$prefix-validator" show
[[ ${#instances[@]} -gt 0 ]] || {
echo "Unable to find validators"
exit 1
}
echo "validatorIpList=()" >> "$configFile"
gcloud_ForEachInstance recordInstanceIp validatorIpList
gcloud_ForEachInstance waitForStartupComplete
echo "clientIpList=()" >> "$configFile"
echo "Looking for client instances..."
gcloud_FindInstances "name~^$prefix-client" show
[[ ${#instances[@]} -eq 0 ]] || {
gcloud_ForEachInstance recordInstanceIp clientIpList
gcloud_ForEachInstance waitForStartupComplete
}
echo "Wrote $configFile"
$metricsWriteDatapoint "testnet-deploy net-config-complete=1"
}
case $command in
delete)
$metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
# Delete the leader node first to prevent unusual metrics on the dashboard
# during shutdown.
# TODO: It would be better to fully cut-off metrics reporting before any
# instances are deleted.
for filter in "^$prefix-leader" "^$prefix-"; do
gcloud_FindInstances "name~$filter"
if [[ ${#instances[@]} -eq 0 ]]; then
echo "No instances found matching '$filter'"
else
gcloud_DeleteInstances true
fi
done
rm -f "$configFile"
$metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
;;
create)
[[ -n $validatorNodeCount ]] || usage "Need number of nodes"
$metricsWriteDatapoint "testnet-deploy net-create-begin=1"
rm -rf "$sshPrivateKey"{,.pub}
ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey"
printNetworkInfo() {
cat <<EOF
========================================================================================
Network composition:
Leader = $leaderMachineType (GPU=${leaderAccelerator:-none})
Validators = $validatorNodeCount x $validatorMachineType (GPU=${validatorAccelerator:-none})
Client(s) = $clientNodeCount x $clientMachineType (GPU=${clientAccelerator:-none})
========================================================================================
EOF
}
printNetworkInfo
declare startupScript="$netConfigDir"/gce-startup-script.sh
cat > "$startupScript" <<EOF
#!/bin/bash -ex
# autogenerated at $(date)
cat > /etc/motd <<EOM
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
This instance has not been fully configured.
See "startup-script" log messages in /var/log/syslog for status:
$ sudo cat /var/log/syslog | grep startup-script
To block until setup is complete, run:
$ until [[ -f /.gce-startup-complete ]]; do sleep 1; done
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
EOM
# Place the generated private key at /solana-id_ecdsa so it's retrievable by anybody
# who is able to log into this machine
cat > /solana-id_ecdsa <<EOK
$(cat "$sshPrivateKey")
EOK
cat > /solana-id_ecdsa.pub <<EOK
$(cat "$sshPrivateKey.pub")
EOK
chmod 444 /solana-id_ecdsa
USER=\$(id -un)
$(
cd "$here"/scripts/
cat \
disable-background-upgrades.sh \
create-solana-user.sh \
install-earlyoom.sh \
install-rsync.sh \
install-libssl-compatability.sh \
)
cat > /etc/motd <<EOM
$(printNetworkInfo)
EOM
touch /.gce-startup-complete
EOF
gcloud_CreateInstances "$prefix-leader" 1 "$zone" \
"$imageName" "$leaderMachineType" "$leaderBootDiskSize" "$leaderAccelerator" \
"$startupScript" "$leaderAddress"
gcloud_CreateInstances "$prefix-validator" "$validatorNodeCount" "$zone" \
"$imageName" "$validatorMachineType" "$validatorBootDiskSize" "$validatorAccelerator" \
"$startupScript" ""
if [[ $clientNodeCount -gt 0 ]]; then
gcloud_CreateInstances "$prefix-client" "$clientNodeCount" "$zone" \
"$imageName" "$clientMachineType" "$clientBootDiskSize" "$clientAccelerator" \
"$startupScript" ""
fi
$metricsWriteDatapoint "testnet-deploy net-create-complete=1"
prepareInstancesAndWriteConfigFile
;;
config)
prepareInstancesAndWriteConfigFile
;;
*)
usage "Unknown command: $command"
esac

80
net/init-metrics.sh Executable file
View File

@ -0,0 +1,80 @@
#!/bin/bash -e
here=$(dirname "$0")
# shellcheck source=net/common.sh
source "$here"/common.sh
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
usage: $0 [-e] [-d] [username]
Creates a testnet dev metrics database
username InfluxDB user with access to create a new database
-d Delete the database instead of creating it
-e Assume database already exists and SOLANA_METRICS_CONFIG is
defined in the environment already
EOF
exit $exitcode
}
loadConfigFile
useEnv=false
delete=false
while getopts "hde" opt; do
case $opt in
h|\?)
usage
exit 0
;;
d)
delete=true
;;
e)
useEnv=true
;;
*)
usage "Error: unhandled option: $opt"
;;
esac
done
shift $((OPTIND - 1))
if $useEnv; then
[[ -n $SOLANA_METRICS_CONFIG ]] ||
usage "Error: SOLANA_METRICS_CONFIG is not defined in the environment"
else
username=$1
[[ -n "$username" ]] || usage "username not specified"
read -rs -p "InfluxDB password for $username: " password
[[ -n $password ]] || { echo "Password not specified"; exit 1; }
echo
query() {
echo "$*"
curl -XPOST \
"https://metrics.solana.com:8086/query?u=${username}&p=${password}" \
--data-urlencode "q=$*"
}
query "DROP DATABASE \"$netBasename\""
! $delete || exit 0
query "CREATE DATABASE \"$netBasename\""
query "ALTER RETENTION POLICY autogen ON \"$netBasename\" DURATION 7d"
query "GRANT READ ON \"$netBasename\" TO \"ro\""
query "GRANT WRITE ON \"$netBasename\" TO \"scratch_writer\""
SOLANA_METRICS_CONFIG="db=$netBasename,u=scratch_writer,p=topsecret"
fi
echo "export SOLANA_METRICS_CONFIG=\"$SOLANA_METRICS_CONFIG\"" >> "$configFile"
exit 0

352
net/net.sh Executable file
View File

@ -0,0 +1,352 @@
#!/bin/bash -e
here=$(dirname "$0")
SOLANA_ROOT="$(cd "$here"/..; pwd)"
# shellcheck source=net/common.sh
source "$here"/common.sh
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
usage: $0 [start|stop|restart|sanity] [command-specific options]
Operate a configured testnet
start - Start the network
sanity - Sanity check the network
stop - Stop the network
restart - Shortcut for stop then start
start-specific options:
-S [snapFilename] - Deploy the specified Snap file
-s edge|beta|stable - Deploy the latest Snap on the specified Snap release channel
-f [cargoFeatures] - List of |cargo --feaures=| to activate
(ignored if -s or -S is specified)
Note: if RUST_LOG is set in the environment it will be propogated into the
network nodes.
sanity/start-specific options:
-o noLedgerVerify - Skip ledger verification
-o noValidatorSanity - Skip validator sanity
stop-specific options:
none
EOF
exit $exitcode
}
snapChannel=
snapFilename=
deployMethod=local
sanityExtraArgs=
cargoFeatures=
command=$1
[[ -n $command ]] || usage
shift
while getopts "h?S:s:o:f:" opt; do
case $opt in
h | \?)
usage
;;
S)
snapFilename=$OPTARG
[[ -f $snapFilename ]] || usage "Snap not readable: $snapFilename"
deployMethod=snap
;;
s)
case $OPTARG in
edge|beta|stable)
snapChannel=$OPTARG
deployMethod=snap
;;
*)
usage "Invalid snap channel: $OPTARG"
;;
esac
;;
f)
cargoFeatures=$OPTARG
;;
o)
case $OPTARG in
noLedgerVerify|noValidatorSanity)
sanityExtraArgs="$sanityExtraArgs -o $OPTARG"
;;
*)
echo "Error: unknown option: $OPTARG"
exit 1
;;
esac
;;
*)
usage "Error: unhandled option: $opt"
;;
esac
done
loadConfigFile
expectedNodeCount=$((${#validatorIpList[@]} + 1))
build() {
declare MAYBE_DOCKER=
if [[ $(uname) != Linux ]]; then
MAYBE_DOCKER="ci/docker-run.sh solanalabs/rust"
fi
SECONDS=0
(
cd "$SOLANA_ROOT"
echo "--- Build started at $(date)"
set -x
rm -rf farf
$MAYBE_DOCKER cargo install --features="$cargoFeatures" --root farf
)
echo "Build took $SECONDS seconds"
}
startCommon() {
declare ipAddress=$1
test -d "$SOLANA_ROOT"
ssh "${sshOptions[@]}" "$ipAddress" "mkdir -p ~/solana ~/.cargo/bin"
rsync -vPrc -e "ssh ${sshOptions[*]}" \
"$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \
"$ipAddress":~/solana/
}
startLeader() {
declare ipAddress=$1
declare logFile="$2"
echo "--- Starting leader: $leaderIp"
echo "start log: $logFile"
# Deploy local binaries to leader. Validators and clients later fetch the
# binaries from the leader.
(
set -x
startCommon "$ipAddress" || exit 1
case $deployMethod in
snap)
rsync -vPrc -e "ssh ${sshOptions[*]}" "$snapFilename" "$ipAddress:~/solana/solana.snap"
;;
local)
rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:~/.cargo/bin/"
;;
*)
usage "Internal error: invalid deployMethod: $deployMethod"
;;
esac
ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote-node.sh $deployMethod leader $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
) >> "$logFile" 2>&1 || {
cat "$logFile"
echo "^^^ +++"
exit 1
}
}
startValidator() {
declare ipAddress=$1
declare logFile="$netLogDir/validator-$ipAddress.log"
echo "--- Starting validator: $leaderIp"
echo "start log: $logFile"
(
set -x
startCommon "$ipAddress"
ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote-node.sh $deployMethod validator $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
) >> "$logFile" 2>&1 &
declare pid=$!
ln -sfT "validator-$ipAddress.log" "$netLogDir/validator-$pid.log"
pids+=("$pid")
}
startClient() {
declare ipAddress=$1
declare logFile="$2"
echo "--- Starting client: $ipAddress"
echo "start log: $logFile"
(
set -x
startCommon "$ipAddress"
ssh "${sshOptions[@]}" -f "$ipAddress" \
"./solana/net/remote/remote-client.sh $deployMethod $entrypointIp $expectedNodeCount \"$RUST_LOG\""
) >> "$logFile" 2>&1 || {
cat "$logFile"
echo "^^^ +++"
exit 1
}
}
sanity() {
declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
declare ok=true
echo "--- Sanity"
$metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
(
set -x
# shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
ssh "${sshOptions[@]}" "$leaderIp" \
"./solana/net/remote/remote-sanity.sh $sanityExtraArgs"
) || ok=false
$metricsWriteDatapoint "testnet-deploy net-sanity-complete=1"
$ok || exit 1
}
start() {
case $deployMethod in
snap)
if [[ -n $snapChannel ]]; then
rm -f "$SOLANA_ROOT"/solana_*.snap
if [[ $(uname) != Linux ]]; then
(
set -x
SOLANA_DOCKER_RUN_NOSETUID=1 "$SOLANA_ROOT"/ci/docker-run.sh ubuntu:18.04 bash -c "
set -ex;
apt-get -qq update;
apt-get -qq -y install snapd;
snap download --channel=$snapChannel solana;
"
)
else
(
cd "$SOLANA_ROOT"
snap download --channel="$snapChannel" solana
)
fi
snapFilename="$(echo "$SOLANA_ROOT"/solana_*.snap)"
[[ -r $snapFilename ]] || {
echo "Error: Snap not readable: $snapFilename"
exit 1
}
fi
;;
local)
build
;;
*)
usage "Internal error: invalid deployMethod: $deployMethod"
;;
esac
echo "Deployment started at $(date)"
$metricsWriteDatapoint "testnet-deploy net-start-begin=1"
SECONDS=0
declare leaderDeployTime=
startLeader "$leaderIp" "$netLogDir/leader-$leaderIp.log"
leaderDeployTime=$SECONDS
$metricsWriteDatapoint "testnet-deploy net-leader-started=1"
SECONDS=0
pids=()
for ipAddress in "${validatorIpList[@]}"; do
startValidator "$ipAddress"
done
for pid in "${pids[@]}"; do
declare ok=true
wait "$pid" || ok=false
if ! $ok; then
cat "$netLogDir/validator-$pid.log"
echo ^^^ +++
exit 1
fi
done
$metricsWriteDatapoint "testnet-deploy net-validators-started=1"
validatorDeployTime=$SECONDS
sanity
SECONDS=0
for ipAddress in "${clientIpList[@]}"; do
startClient "$ipAddress" "$netLogDir/client-$ipAddress.log"
done
clientDeployTime=$SECONDS
$metricsWriteDatapoint "testnet-deploy net-start-complete=1"
if [[ $deployMethod = "snap" ]]; then
declare networkVersion=unknown
IFS=\ read -r _ networkVersion _ < <(
ssh "${sshOptions[@]}" "$leaderIp" \
"snap info solana | grep \"^installed:\""
)
networkVersion=${networkVersion/0+git./}
$metricsWriteDatapoint "testnet-deploy version=\"$networkVersion\""
fi
echo
echo "+++ Deployment Successful"
echo "Leader deployment took $leaderDeployTime seconds"
echo "Validator deployment (${#validatorIpList[@]} instances) took $validatorDeployTime seconds"
echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds"
echo "Network start logs in $netLogDir:"
ls -l "$netLogDir"
}
stopNode() {
local ipAddress=$1
echo "--- Stopping node: $ipAddress"
(
set -x
ssh "${sshOptions[@]}" "$ipAddress" "
set -x
if snap list solana; then
sudo snap set solana mode=
sudo snap remove solana
fi
! tmux list-sessions || tmux kill-session
for pattern in solana- remote- oom-monitor net-stats; do
pkill -9 \$pattern
done
"
) || true
}
stop() {
SECONDS=0
$metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
stopNode "$leaderIp"
for ipAddress in "${validatorIpList[@]}" "${clientIpList[@]}"; do
stopNode "$ipAddress"
done
$metricsWriteDatapoint "testnet-deploy net-stop-complete=1"
echo "Stopping nodes took $SECONDS seconds"
}
case $command in
restart)
stop
start
;;
start)
start
;;
sanity)
sanity
;;
stop)
stop
;;
*)
echo "Internal error: Unknown command: $command"
exit 1
esac

1
net/remote/README.md Normal file
View File

@ -0,0 +1 @@
Scripts that run on the remote testnet nodes

83
net/remote/remote-client.sh Executable file
View File

@ -0,0 +1,83 @@
#!/bin/bash -e
cd "$(dirname "$0")"/../..
echo "$(date) | $0 $*" > client.log
deployMethod="$1"
entrypointIp="$2"
numNodes="$3"
RUST_LOG="$4"
export RUST_LOG=${RUST_LOG:-solana=info} # if RUST_LOG is unset, default to info
missing() {
echo "Error: $1 not specified"
exit 1
}
[[ -n $deployMethod ]] || missing deployMethod
[[ -n $entrypointIp ]] || missing entrypointIp
[[ -n $numNodes ]] || missing numNodes
source net/common.sh
loadConfigFile
threadCount=$(nproc)
if [[ $threadCount -gt 4 ]]; then
threadCount=4
fi
case $deployMethod in
snap)
net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/solana/solana.snap" .
sudo snap install solana.snap --devmode --dangerous
solana_bench_tps=/snap/bin/solana.bench-tps
solana_keygen=/snap/bin/solana.keygen
;;
local)
PATH="$HOME"/.cargo/bin:"$PATH"
export USE_INSTALL=1
export SOLANA_DEFAULT_METRICS_RATE=1
net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/.cargo/bin/solana*" ~/.cargo/bin/
solana_bench_tps=solana-bench-tps
solana_keygen=solana-keygen
;;
*)
echo "Unknown deployment method: $deployMethod"
exit 1
esac
scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
scripts/net-stats.sh > net-stats.log 2>&1 &
! tmux list-sessions || tmux kill-session
clientCommand="\
$solana_bench_tps \
--network $entrypointIp:8001 \
--identity client.json \
--num-nodes $numNodes \
--duration 600 \
--sustained \
--threads $threadCount \
"
keygenCommand="$solana_keygen -o client.json"
tmux new -s solana-bench-tps -d "
[[ -r client.json ]] || {
echo '$ $keygenCommand' | tee -a client.log
$keygenCommand >> client.log 2>&1
}
while true; do
echo === Client start: \$(date) | tee -a client.log
$metricsWriteDatapoint 'testnet-deploy client-begin=1'
echo '$ $clientCommand' | tee -a client.log
$clientCommand >> client.log 2>&1
$metricsWriteDatapoint 'testnet-deploy client-complete=1'
done
"
sleep 1
tmux capture-pane -t solana-bench-tps -p -S -100

113
net/remote/remote-node.sh Executable file
View File

@ -0,0 +1,113 @@
#!/bin/bash -e
cd "$(dirname "$0")"/../..
deployMethod="$1"
nodeType="$2"
publicNetwork="$3"
entrypointIp="$4"
numNodes="$5"
RUST_LOG="$6"
missing() {
echo "Error: $1 not specified"
exit 1
}
[[ -n $deployMethod ]] || missing deployMethod
[[ -n $nodeType ]] || missing nodeType
[[ -n $publicNetwork ]] || missing publicNetwork
[[ -n $entrypointIp ]] || missing entrypointIp
[[ -n $numNodes ]] || missing numNodes
cat > deployConfig <<EOF
deployMethod="$deployMethod"
entrypointIp="$entrypointIp"
numNodes="$numNodes"
EOF
source net/common.sh
loadConfigFile
if [[ $publicNetwork = true ]]; then
setupArgs="-p"
else
setupArgs="-l"
fi
case $deployMethod in
snap)
SECONDS=0
[[ $nodeType = leader ]] ||
net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/solana/solana.snap" .
sudo snap install solana.snap --devmode --dangerous
commonNodeConfig="\
leader-ip=$entrypointIp \
default-metrics-rate=1 \
metrics-config=$SOLANA_METRICS_CONFIG \
rust-log=$RUST_LOG \
setup-args=$setupArgs \
"
if [[ -e /dev/nvidia0 ]]; then
commonNodeConfig="$commonNodeConfig enable-cuda=1"
fi
if [[ $nodeType = leader ]]; then
nodeConfig="mode=leader+drone $commonNodeConfig"
ln -sf -T /var/snap/solana/current/leader/current leader.log
ln -sf -T /var/snap/solana/current/drone/current drone.log
else
nodeConfig="mode=validator $commonNodeConfig"
ln -sf -T /var/snap/solana/current/validator/current validator.log
fi
logmarker="solana deploy $(date)/$RANDOM"
logger "$logmarker"
# shellcheck disable=SC2086 # Don't want to double quote "$nodeConfig"
sudo snap set solana $nodeConfig
snap info solana
sudo snap get solana
echo Slight delay to get more syslog output
sleep 2
sudo grep -Pzo "$logmarker(.|\\n)*" /var/log/syslog
echo "Succeeded in ${SECONDS} seconds"
;;
local)
PATH="$HOME"/.cargo/bin:"$PATH"
export USE_INSTALL=1
export RUST_LOG
export SOLANA_DEFAULT_METRICS_RATE=1
./fetch-perf-libs.sh
export LD_LIBRARY_PATH="$PWD/target/perf-libs:$LD_LIBRARY_PATH"
scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
scripts/net-stats.sh > net-stats.log 2>&1 &
case $nodeType in
leader)
./multinode-demo/setup.sh -t leader $setupArgs
./multinode-demo/drone.sh > drone.log 2>&1 &
./multinode-demo/leader.sh > leader.log 2>&1 &
;;
validator)
net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/.cargo/bin/solana*" ~/.cargo/bin/
./multinode-demo/setup.sh -t validator $setupArgs
./multinode-demo/validator.sh "$entrypointIp":~/solana "$entrypointIp:8001" >validator.log 2>&1 &
;;
*)
echo "Error: unknown node type: $nodeType"
exit 1
;;
esac
;;
*)
echo "Unknown deployment method: $deployMethod"
exit 1
esac

138
net/remote/remote-sanity.sh Executable file
View File

@ -0,0 +1,138 @@
#!/bin/bash -e
#
# This script is to be run on the leader node
#
cd "$(dirname "$0")"/../..
deployMethod=
entrypointIp=
numNodes=
[[ -r deployConfig ]] || {
echo deployConfig missing
exit 1
}
# shellcheck source=/dev/null # deployConfig is written by remote-node.sh
source deployConfig
missing() {
echo "Error: $1 not specified"
exit 1
}
[[ -n $deployMethod ]] || missing deployMethod
[[ -n $entrypointIp ]] || missing entrypointIp
[[ -n $numNodes ]] || missing numNodes
ledgerVerify=true
validatorSanity=true
while [[ $1 = -o ]]; do
opt="$2"
shift 2
case $opt in
noLedgerVerify)
ledgerVerify=false
;;
noValidatorSanity)
validatorSanity=false
;;
*)
echo "Error: unknown option: $opt"
exit 1
;;
esac
done
source net/common.sh
loadConfigFile
case $deployMethod in
snap)
PATH="/snap/bin:$PATH"
export USE_SNAP=1
entrypointRsyncUrl="$entrypointIp"
solana_bench_tps=solana.bench-tps
solana_ledger_tool=solana.ledger-tool
solana_keygen=solana.keygen
ledger=/var/snap/solana/current/config/ledger
client_id=~/snap/solana/current/config/client-id.json
;;
local)
PATH="$HOME"/.cargo/bin:"$PATH"
export USE_INSTALL=1
entrypointRsyncUrl="$entrypointIp:~/solana"
solana_bench_tps=solana-bench-tps
solana_ledger_tool=solana-ledger-tool
solana_keygen=solana-keygen
ledger=config/ledger
client_id=config/client-id.json
;;
*)
echo "Unknown deployment method: $deployMethod"
exit 1
esac
echo "--- $entrypointIp: wallet sanity"
(
set -x
scripts/wallet-sanity.sh "$entrypointRsyncUrl"
)
echo "+++ $entrypointIp: node count ($numNodes expected)"
(
set -x
$solana_keygen -o "$client_id"
$solana_bench_tps --network "$entrypointIp:8001" --identity "$client_id" --num-nodes "$numNodes" --converge-only
)
echo "--- $entrypointIp: verify ledger"
if $ledgerVerify; then
if [[ -d $ledger ]]; then
(
set -x
rm -rf /var/tmp/ledger-verify
du -hs "$ledger"
time cp -r "$ledger" /var/tmp/ledger-verify
time $solana_ledger_tool --ledger /var/tmp/ledger-verify verify
)
else
echo "^^^ +++"
echo "Ledger verify skipped: directory does not exist: $ledger"
fi
else
echo "^^^ +++"
echo "Note: ledger verify disabled"
fi
echo "--- $entrypointIp: validator sanity"
if $validatorSanity; then
(
set -ex -o pipefail
./multinode-demo/setup.sh -t validator
timeout 10s ./multinode-demo/validator.sh "$entrypointRsyncUrl" "$entrypointIp:8001" 2>&1 | tee validator.log
) || {
exitcode=$?
[[ $exitcode -eq 124 ]] || exit $exitcode
}
wc -l validator.log
if grep -C100 panic validator.log; then
echo "^^^ +++"
echo "Panic observed"
exit 1
else
echo "Validator log looks ok"
fi
else
echo "^^^ +++"
echo "Note: validator sanity disabled"
fi
echo --- Pass

View File

@ -0,0 +1,27 @@
#!/bin/bash -ex
[[ $(uname) = Linux ]] || exit 1
[[ $USER = root ]] || exit 1
adduser solana --gecos "" --disabled-password --quiet
adduser solana sudo
echo "solana ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
id solana
[[ -r /solana-id_ecdsa ]] || exit 1
[[ -r /solana-id_ecdsa.pub ]] || exit 1
sudo -u solana bash -c "
mkdir -p /home/solana/.ssh/
cd /home/solana/.ssh/
cp /solana-id_ecdsa.pub authorized_keys
umask 377
cp /solana-id_ecdsa id_ecdsa
echo \"
Host *
BatchMode yes
IdentityFile ~/.ssh/id_ecdsa
StrictHostKeyChecking no
\" > config
"

View File

@ -0,0 +1,20 @@
#!/bin/bash -ex
#
# Prevent background upgrades that block |apt-get|
#
# TODO: This approach is pretty uncompromising. An alternative solution that
# doesn't involve deleting system files would be welcome.
[[ $(uname) = Linux ]] || exit 1
[[ $USER = root ]] || exit 1
rm -rf /usr/lib/apt/apt.systemd.daily
rm -rf /usr/bin/unattended-upgrade
killall apt.systemd.daily || true
killall unattended-upgrade || true
while fuser /var/lib/dpkg/lock; do
echo Waiting for lock release...
sleep 1
done

187
net/scripts/gcloud.sh Normal file
View File

@ -0,0 +1,187 @@
# |source| this file
#
# Utilities for working with gcloud
#
#
# gcloud_FindInstances [filter] [options]
#
# Find instances matching the specified pattern.
#
# For each matching instance, an entry in the `instances` array will be added with the
# following information about the instance:
# "name:zone:public IP:private IP"
#
# filter - The instances to filter on
# options - If set to the string "show", the list of instances will be echoed
# to stdout
#
# examples:
# $ gcloud_FindInstances "name=exact-machine-name"
# $ gcloud_FindInstances "name~^all-machines-with-a-common-machine-prefix"
#
gcloud_FindInstances() {
declare filter="$1"
declare options="$2"
instances=()
declare name zone publicIp privateIp status
while read -r name zone publicIp privateIp status; do
if [[ $status != RUNNING ]]; then
echo "Warning: $name is not RUNNING, ignoring it."
continue
fi
if [[ $options = show ]]; then
printf "%-30s | %-16s publicIp=%-16s privateIp=%s\n" "$name" "$zone" "$publicIp" "$privateIp"
fi
instances+=("$name:$zone:$publicIp:$privateIp")
done < <(gcloud compute instances list \
--filter="$filter" \
--format 'value(name,zone,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status)')
}
#
# gcloud_ForEachInstance [cmd] [extra args to cmd]
#
# Execute a command for each element in the `instances` array
#
# cmd - The command to execute on each instance
# The command will receive arguments followed by any
# additionl arguments supplied to gcloud_ForEachInstance:
# name - name of the instance
# zone - zone the instance is located in
# publicIp - The public IP address of this instance
# privateIp - The priate IP address of this instance
# count - Monotonically increasing count for each
# invocation of cmd, starting at 1
# ... - Extra args to cmd..
#
#
gcloud_ForEachInstance() {
declare cmd="$1"
shift
[[ -n $cmd ]] || { echo gcloud_ForEachInstance: cmd not specified; exit 1; }
declare count=1
for info in "${instances[@]}"; do
declare name zone publicIp privateIp
IFS=: read -r name zone publicIp privateIp < <(echo "$info")
eval "$cmd" "$name" "$zone" "$publicIp" "$privateIp" "$count" "$@"
count=$((count + 1))
done
}
#
# gcloud_CreateInstances [namePrefix] [numNodes] [zone] [imageName]
# [machineType] [bootDiskSize] [accelerator]
# [startupScript] [address]
#
# Creates one more identical instances.
#
# namePrefix - unique string to prefix all the instance names with
# numNodes - number of instances to create
# zone - zone to create the instances in
# imageName - Disk image for the instances
# machineType - GCE machine type
# bootDiskSize - Optional disk of the boot disk
# accelerator - Optional accelerator to attach to the instance(s), see
# eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
# startupScript - Optional startup script to execute when the instance boots
# address - Optional name of the GCE static IP address to attach to the
# instance. Requires that |numNodes| = 1 and that addressName
# has been provisioned in the GCE region that is hosting |zone|
#
# Tip: use gcloud_FindInstances to locate the instances once this function
# returns
gcloud_CreateInstances() {
declare namePrefix="$1"
declare numNodes="$2"
declare zone="$3"
declare imageName="$4"
declare machineType="$5"
declare optionalBootDiskSize="$6"
declare optionalAccelerator="$7"
declare optionalStartupScript="$8"
declare optionalAddress="$9"
declare nodes
if [[ $numNodes = 1 ]]; then
nodes=("$namePrefix")
else
read -ra nodes <<<$(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes")
fi
declare -a args
args=(
"--zone=$zone"
"--tags=testnet"
"--image=$imageName"
"--machine-type=$machineType"
)
if [[ -n $optionalBootDiskSize ]]; then
args+=(
"--boot-disk-size=$optionalBootDiskSize"
)
fi
if [[ -n $optionalAccelerator ]]; then
args+=(
"--accelerator=$optionalAccelerator"
--maintenance-policy TERMINATE
--restart-on-failure
)
fi
if [[ -n $optionalStartupScript ]]; then
args+=(
--metadata-from-file "startup-script=$optionalStartupScript"
)
fi
if [[ -n $optionalAddress ]]; then
[[ $numNodes = 1 ]] || {
echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress"
exit 1
}
args+=(
"--address=$optionalAddress"
)
fi
(
set -x
gcloud beta compute instances create "${nodes[@]}" "${args[@]}"
)
}
#
# gcloud_DeleteInstances [yes]
#
# Deletes all the instances listed in the `instances` array
#
# If yes = "true", skip the delete confirmation
#
gcloud_DeleteInstances() {
declare maybeQuiet=
if [[ $1 = true ]]; then
maybeQuiet=--quiet
fi
if [[ ${#instances[0]} -eq 0 ]]; then
echo No instances to delete
return
fi
declare names=("${instances[@]/:*/}")
# Assume all instances are in the same zone
# TODO: One day this assumption will be invalid
declare zone
IFS=: read -r _ zone _ < <(echo "${instances[0]}")
(
set -x
gcloud beta compute instances delete --zone "$zone" $maybeQuiet "${names[@]}"
)
}

30
net/scripts/install-earlyoom.sh Executable file
View File

@ -0,0 +1,30 @@
#!/bin/bash -ex
#
# Install EarlyOOM
#
[[ $(uname) = Linux ]] || exit 1
[[ $USER = root ]] || exit 1
# 64 - enable signalling of processes (term, kill, oom-kill)
# TODO: This setting will not persist across reboots
sysctl -w kernel.sysrq=$(( $(cat /proc/sys/kernel/sysrq) | 64 ))
if command -v earlyoom; then
systemctl status earlyoom
else
wget http://ftp.us.debian.org/debian/pool/main/e/earlyoom/earlyoom_1.1-2_amd64.deb
apt install --quiet --yes ./earlyoom_1.1-2_amd64.deb
cat > earlyoom <<OOM
# use the kernel OOM killer, trigger at 20% available RAM,
EARLYOOM_ARGS="-k -m 20"
OOM
cp earlyoom /etc/default/
rm earlyoom
systemctl stop earlyoom
systemctl enable earlyoom
systemctl start earlyoom
fi

View File

@ -0,0 +1,18 @@
#!/bin/bash -ex
[[ $(uname) = Linux ]] || exit 1
[[ $USER = root ]] || exit 1
# Install libssl-dev to be compatible with binaries built on an Ubuntu machine...
apt-get update
apt-get --assume-yes install libssl-dev
# Install libssl1.1 to be compatible with binaries built in the
# solanalabs/rust docker image
#
# cc: https://github.com/solana-labs/solana/issues/1090
# cc: https://packages.ubuntu.com/bionic/amd64/libssl1.1/download
wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.0g-2ubuntu4.1_amd64.deb
dpkg -i libssl1.1_1.1.0g-2ubuntu4.1_amd64.deb
rm libssl1.1_1.1.0g-2ubuntu4.1_amd64.deb

19
net/scripts/install-rsync.sh Executable file
View File

@ -0,0 +1,19 @@
#!/bin/bash -ex
#
# Rsync setup for Snap builds
#
[[ $(uname) = Linux ]] || exit 1
[[ $USER = root ]] || exit 1
apt-get --assume-yes install rsync
cat > /etc/rsyncd.conf <<-EOF
[config]
path = /var/snap/solana/current/config
hosts allow = *
read only = true
EOF
systemctl enable rsync
systemctl start rsync

12
net/scripts/rsync-retry.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/bash
#
# rsync wrapper that retries a few times on failure
#
for i in $(seq 1 5); do
(
set -x
rsync "$@"
) && exit 0
echo Retry "$i"...
done

69
net/ssh.sh Executable file
View File

@ -0,0 +1,69 @@
#!/bin/bash
here=$(dirname "$0")
# shellcheck source=net/common.sh
source "$here"/common.sh
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
usage: $0 [ipAddress] [extra ssh arguments]
ssh into a node
ipAddress - IP address of the desired node.
If ipAddress is unspecified, a list of available nodes will be displayed.
EOF
exit $exitcode
}
while getopts "h?" opt; do
case $opt in
h | \?)
usage
;;
*)
usage "Error: unhandled option: $opt"
;;
esac
done
loadConfigFile
ipAddress=$1
shift
if [[ -n "$ipAddress" ]]; then
set -x
exec ssh "${sshOptions[@]}" "$ipAddress" "$@"
fi
printNode() {
declare nodeType=$1
declare ip=$2
printf " %-25s | For logs run: $0 $ip tail -f solana/$nodeType.log\n" "$0 $ip"
}
echo Leader:
printNode leader "$leaderIp"
echo
echo Validators:
for ipAddress in "${validatorIpList[@]}"; do
printNode validator "$ipAddress"
done
echo
echo Clients:
if [[ ${#clientIpList[@]} -eq 0 ]]; then
echo " None"
else
for ipAddress in "${clientIpList[@]}"; do
printNode client "$ipAddress"
done
fi
exit 0

View File

@ -4,7 +4,7 @@ The goal of this RFC is to define a set of constraints for APIs and runtime such
## Version
version 0.1
version 0.2
## Toolchain Stack
@ -37,154 +37,175 @@ version 0.1
In Figure 1 an untrusted client, creates a program in the front-end language of her choice, (like C/C++/Rust/Lua), and compiles it with LLVM to a position independent shared object ELF, targeting BPF bytecode. Solana will safely load and execute the ELF.
## Bytecode
Our bytecode is based on Berkley Packet Filter. The requirements for BPF overlap almost exactly with the requirements we have:
1. Deterministic amount of time to execute the code
2. Bytecode that is portable between machine instruction sets
3. Verified memory accesses
4. Fast to load the object, verify the bytecode and JIT to local machine instruction set
For 1, that means that loops are unrolled, and for any jumps back we can guard them with a check against the number of instruction that have been executed at this point. If the limit is reached, the program yields its execution. This involves saving the stack and current instruction index.
For 2, the BPF bytecode already easily maps to x8664, arm64 and other instruction sets. 
For 3, every load and store that is relative can be checked to be within the expected memory that is passed into the ELF. Dynamic load and stores can do a runtime check against available memory, these will be slow and should be avoided.
For 4, Fully linked PIC ELF with just a single RX segment. Effectively we are linking a shared object with `-fpic -target bpf` and with a linker script to collect everything into a single RX segment. Writable globals are not supported.
### Address Checks
The interface to the module takes a `&mut Vec<Vec<u8>>` in rust, or a `int sz, void* data[sz], int szs[sz]` in `C`. Given the module's bytecode, for each method, we need to analyze the bounds on load and stores into each buffer the module uses. This check needs to be done `on chain`, and after those bounds are computed we can verify that the user supplied array of buffers will not cause a memory fault. For load and stores that we cannot analyze, we can replace with a `safe_load` and `safe_store` instruction that will check the table for access.
## Loader
The loader is our first smart contract. The job of this contract is to load the actual program with its own instance data. The loader will verify the bytecode and that the object implements the expected entry points.
Since there is only one RX segment, the context for the contract instance is passed into each entry point as well as the event data for that entry point.
A client will create a transaction to create a new loader instance:
`Solana_NewLoader(Loader Instance PubKey, proof of key ownership, space I need for my elf)`
A client will then do a bunch of transactions to load its elf into the loader instance they created:
`Loader_UploadElf(Loader Instance PubKey, proof of key ownership, pos start, pos end, data)`
At this point the client can create a new instance of the module with its own instance address:
`Loader_NewInstance(Loader Instance PubKey, proof of key ownership, Instance PubKey, proof of key ownership)`
Once the instance has been created, the client may need to upload more user data to solana to configure this instance:
`Instance_UploadModuleData(Instance PubKey, proof of key ownership, pos start, pos end, data)`
Now clients can `start` the instance:
`Instance_Start(Instance PubKey, proof of key ownership)`
## Runtime
Our goal with the runtime is to have a general purpose execution environment that is highly parallelizable and doesn't require dynamic resource management. We want to execute as many contracts as we can in parallel, and have them pass or fail without a destructive state change.
### State and Entry Point
State is addressed by an account which is at the moment simply the PubKey. Our goal is to eliminate dynamic memory allocation in the smart contract itself, so the contract is a function that takes a mapping of [(PubKey,State)] and returns [(PubKey, State')]. The output of keys is a subset of the input. Three basic kinds of state exist:
* Instance State
* Participant State
* Caller State
There isn't any difference in how each is implemented, but conceptually Participant State is memory that is allocated for each participant in the contract. Instance State is memory that is allocated for the contract itself, and Caller State is memory that the transactions caller has allocated.
The goal with the runtime is to have a general purpose execution environment that is highly parallelizeable and doesn't require dynamic resource management. The goal is to execute as many contracts as possible in parallel, and have them pass or fail without a destructive state change.
### Call
### State
State is addressed by an account which is at the moment simply the Pubkey. Our goal is to eliminate memory allocation from within the smart contract itself. Thus the client of the contract provides all the state that is necessary for the contract to execute in the transaction itself. The runtime interacts with the contract through a state transition function, which takes a mapping of [(Pubkey,State)] and returns [(Pubkey, State')]. The State is an opeque type to the runtime, a `Vec<u8>`, the contents of which the contract has full control over.
### Call Structure
```
void call(
const struct instance_data *data,
const uint8_t kind[], //instance|participant|caller|read|write
const uint8_t *keys[],
uint8_t *data[],
int num,
uint8_t dirty[], //dirty memory bits
uint8_t *userdata, //current transaction data
);
/// Call definition
/// Signed portion
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct CallData {
/// Each Pubkey in this vector is mapped to a corresponding `Page` that is loaded for contract execution
/// In a simple pay transaction `key[0]` is the token owner's key and `key[1]` is the recipient's key.
pub keys: Vec<Pubkey>,
/// The Pubkeys that are required to have a proof. The proofs are a `Vec<Signature> which encoded along side this data structure
/// Each Signature signs the `required_proofs` vector as well as the `keys` vectors. The transaction is valid if and only if all
/// the required signatures are present and the public key vector is unchanged between signatures.
pub required_proofs: Vec<u8>,
/// PoH data
/// last PoH hash observed by the sender
pub last_id: Hash,
/// Program
/// The address of the program we want to call. ContractId is just a Pubkey that is the address of the loaded code that will execute this Call.
pub contract_id: ContractId,
/// OS scheduling fee
pub fee: i64,
/// struct version to prevent duplicate spends
/// Calls with a version <= Page.version are rejected
pub version: u64,
/// method to call in the contract
pub method: u8,
/// usedata in bytes
pub userdata: Vec<u8>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct Call {
/// Signatures and Keys
/// (signature, key index)
/// This vector contains a tuple of signatures, and the key index the signature is for
/// proofs[0] is always key[0]
pub proofs: Vec<Signature>,
pub data: CallData,
}
```
To call this operation, the transaction that is destined to the contract instance specifies what keyed state it should present to the `call` function. To allocate the state memory or a call context, the client has to first call a function on the contract with the designed address that will own the state.
At it's core, this is just a set of Pubkeys and Signatures with a bit of metadata. The contract Pubkey routes this transaction into that contracts entry point. `version` is used for dropping retransmitted requests.
At its core, this is a system call that requires cryptographic proof of ownership of memory regions instead of an OS that checks page tables for access rights.
* `Instance_AllocateContext(Instance PubKey, My PubKey, Proof of key ownership)`
Any transaction can then call `call` on the contract with a set of keys. It's up to the contract itself to manage ownership:
* `Instance_Call(Instance PubKey, [Context PubKeys], proofs of ownership, userdata...)`
Contracts should be able to read any state that is part of solana, but only write to state that the contract allocated.
#### Caller State
Caller `state` is memory allocated for the `call` that belongs to the public key that is issuing the `call`. This is the caller's context.
#### Instance State
Instance `state` is memory that belongs to this contract instance. We may also need module-wide `state` as well.
#### Participant State
Participant `state` is any other memory. In some cases it may make sense to have these allocated as part of the call by the caller.
### Reduce
Some operations on the contract will require iteration over all the keys. To make this parallelizable the iteration is broken up into reduce calls which are combined.
```
void reduce_m(
const struct instance_data *data,
const uint8_t *keys[],
const uint8_t *data[],
int num,
uint8_t *reduce_data,
);
void reduce_r(
const struct instance_data *data,
const uint8_t *reduce_data[],
int num,
uint8_t *reduce_data,
);
```
Contracts should be able to read any state that is part of runtime, but only write to state that the contract allocated.
### Execution
Transactions are batched and processed in parallel at each stage.
```
+-----------+ +--------------+ +-----------+ +---------------+
| sigverify |-+->| debit commit |---+->| execution |-+->| memory commit |
+-----------+ | +--------------+ | +-----------+ | +---------------+
| | |
| +---------------+ | | +--------------+
|->| memory verify |->+ +->| debit undo |
+---------------+ | +--------------+
|
| +---------------+
+->| credit commit |
+---------------+
Calls batched and processed in a pipeline
```
The `debit verify` stage is very similar to `memory verify`. Proof of key ownership is used to check if the callers key has some state allocated with the contract, then the memory is loaded and executed. After execution stage, the dirty pages are written back by the contract. Because know all the memory accesses during execution, we can batch transactions that do not interfere with each other. We can also apply the `debit undo` and `credit commit` stages of the transaction. `debit undo` is run in case of an exception during contract execution, only transfers may be reversed, fees are commited to solana.
+-----------+ +-------------+ +--------------+ +--------------------+
| sigverify |--->| lock memory |--->| validate fee |--->| allocate new pages |--->
+-----------+ +-------------+ +--------------+ +--------------------+
+------------+ +---------+ +--------------+ +-=------------+
--->| load pages |--->| execute |--->|unlock memory |--->| commit pages |
+------------+ +---------+ +--------------+ +--------------+
### GPU execution
```
A single contract can read and write to separate key pairs without interference. These separate calls to the same contract can execute on the same GPU thread over different memory using different SIMD lanes.
At the `execute` stage, the loaded pages have no data dependencies, so all the contracts can be executed in parallel.
## Memory Management
```
pub struct Page {
/// key that indexes this page
/// prove ownership of this key to spend from this Page
owner: Pubkey,
/// contract that owns this page
/// contract can write to the data that is in `memory` vector
contract: Pubkey,
/// balance that belongs to owner
balance: u64,
/// version of the structure, public for testing
version: u64,
/// hash of the page data
memhash: Hash,
/// The following could be in a separate structure
memory: Vec<u8>,
}
```
The guarantee that runtime enforces:
1. The contract code is the only code that will modify the contents of `memory`
2. Total balances on all the pages is equal before and after exectuion of a call
3. Balances of each of the pages not owned by the contract must be equal to or greater after the call than before the call.
## Entry Point
Exectuion of the contract involves maping the contract's public key to an entry point which takes a pointer to the transaction, and an array of loaded pages.
```
// Find the method
match (tx.contract, tx.method) {
// system interface
// everyone has the same reallocate
(_, 0) => system_0_realloc(&tx, &mut call_pages),
(_, 1) => system_1_assign(&tx, &mut call_pages),
// contract methods
(DEFAULT_CONTRACT, 128) => default_contract_128_move_funds(&tx, &mut call_pages),
(contract, method) => //...
```
The first 127 methods are reserved for the system interface, which implements allocation and assignment of memory. The rest, including the contract for moving funds are implemented by the contract itself.
## System Interface
```
/// SYSTEM interface, same for very contract, methods 0 to 127
/// method 0
/// reallocate
/// spend the funds from the call to the first recipient's
pub fn system_0_realloc(call: &Call, pages: &mut Vec<Page>) {
if call.contract == DEFAULT_CONTRACT {
let size: u64 = deserialize(&call.userdata).unwrap();
pages[0].memory.resize(size as usize, 0u8);
}
}
/// method 1
/// assign
/// assign the page to a contract
pub fn system_1_assign(call: &Call, pages: &mut Vec<Page>) {
let contract = deserialize(&call.userdata).unwrap();
if call.contract == DEFAULT_CONTRACT {
pages[0].contract = contract;
//zero out the memory in pages[0].memory
//Contracts need to own the state of that data otherwise a use could fabricate the state and
//manipulate the contract
pages[0].memory.clear();
}
}
```
The first method resizes the memory that is assosciated with the callers page. The second system call assignes the page to the contract. Both methods check if the current contract is 0, otherwise the method does nothing and the caller spent their fees.
This ensures that when memory is assigned to the contract the initial state of all the bytes is 0, and the contract itself is the only thing that can modify that state.
## Simplest contract
```
/// DEFAULT_CONTRACT interface
/// All contracts start with 128
/// method 128
/// move_funds
/// spend the funds from the call to the first recipient's
pub fn default_contract_128_move_funds(call: &Call, pages: &mut Vec<Page>) {
let amount: u64 = deserialize(&call.userdata).unwrap();
if pages[0].balance >= amount {
pages[0].balance -= amount;
pages[1].balance += amount;
}
}
```
This simply moves the amount from page[0], which is the callers page, to page[1], which is the recipient's page.
## Notes
1. There is no dynamic memory allocation.
2. Persistant Memory is allocated to a Key with ownership
2. Persistent Memory is allocated to a Key with ownership
3. Contracts can `call` to update key owned state
4. Contracts can `reduce` over the memory to aggregate state
5. `call` is just a *syscall* that does a cryptographic check of memory owndershp
4. `call` is just a *syscall* that does a cryptographic check of memory ownership
5. Kernel guarantees that when memory is assigned to the contract its state is 0
6. Kernel guarantees that contract is the only thing that can modify memory that its assigned to
7. Kernel guarantees that the contract can only spend tokens that are in pages that are assigned to it
8. Kernel guarantees the balances belonging to pages are balanced before and after the call

View File

@ -0,0 +1,59 @@
```
========================= master branch (edge channel) =======================>
\ \ \
\___v0.7.0 tag \ \
\ \ v0.9.0 tag__\
\ v0.8.0 tag__\ \
v0.7.1 tag__\ \ v0.9 branch (beta channel)
\___v0.7.2 tag \___v0.8.1 tag
\ \
\ \
v0.7 branch v0.8 branch (stable channel)
```
## Branches and Tags
### master branch
All new development occurs on the `master` branch.
Bug fixes that affect a `vX.Y` branch are first made on `master`. This is to
allow a fix some soak time on `master` before it is applied to one or more
stabilization branches.
Merging to `master` first also helps ensure that fixes applied to one release
are present for future releases. (Sometimes the joy of landing a critical
release blocker in a branch causes you to forget to propagate back to
`master`!)"
Once the bug fix lands on `master` it is cherry-picked into the `vX.Y` branch
and potentially the `vX.Y-1` branch. The exception to this rule is when a bug
fix for `vX.Y` doesn't apply to `master` or `vX.Y-1`.
Immediately after a new stabilization branch is forged, the `Cargo.toml` minor
version (*Y*) in the `master` branch is incremented by the release engineer.
Incrementing the major version of the `master` branch is outside the scope of
this document.
### v*X.Y* stabilization branches
These are stabilization branches for a given milestone. They are created off
the `master` branch as late as possible prior to the milestone release.
### v*X.Y.Z* release tag
The release tags are created as desired by the owner of the given stabilization
branch, and cause that *X.Y.Z* release to be shipped to https://crates.io,
https://snapcraft.io/, and elsewhere.
Immediately after a new v*X.Y.Z* branch tag has been created, the `Cargo.toml`
patch version number (*Z*) of the stabilization branch is incremented by the
release engineer.
## Channels
Channels are used by end-users (humans and bots) to consume the branches
described in the previous section, so they may automatically update to the most
recent version matching their desired stability.
There are three release channels that map to branches as follows:
* edge - tracks the `master` branch, least stable.
* beta - tracks the largest (and latest) `vX.Y` stabilization branch, more stable.
* stable - tracks the second largest `vX.Y` stabilization branch, most stable.

View File

@ -0,0 +1,51 @@
# |source| this file
#
# The SOLANA_METRICS_CONFIG environment variable is formatted as a
# comma-delimited list of parameters. All parameters are optional.
#
# Example:
# export SOLANA_METRICS_CONFIG="host=<metrics host>,db=<database name>,u=<username>,p=<password>"
#
# The following directive disable complaints about unused variables in this
# file:
# shellcheck disable=2034
#
metricsWriteDatapoint="$(dirname "${BASH_SOURCE[0]}")"/metrics-write-datapoint.sh
configureMetrics() {
[[ -n $SOLANA_METRICS_CONFIG ]] || return 0
declare metricsParams
IFS=',' read -r -a metricsParams <<< "$SOLANA_METRICS_CONFIG"
for param in "${metricsParams[@]}"; do
IFS='=' read -r -a pair <<< "$param"
if [[ ${#pair[@]} != 2 ]]; then
echo Error: invalid metrics parameter: "$param" >&2
else
declare name="${pair[0]}"
declare value="${pair[1]}"
case "$name" in
host)
export INFLUX_HOST="$value"
echo INFLUX_HOST="$INFLUX_HOST" >&2
;;
db)
export INFLUX_DATABASE="$value"
echo INFLUX_DATABASE="$INFLUX_DATABASE" >&2
;;
u)
export INFLUX_USERNAME="$value"
echo INFLUX_USERNAME="$INFLUX_USERNAME" >&2
;;
p)
export INFLUX_PASSWORD="$value"
echo INFLUX_PASSWORD="********" >&2
;;
*)
echo Error: Unknown metrics parameter name: "$name" >&2
;;
esac
fi
done
}
configureMetrics

View File

@ -1,4 +1,7 @@
#!/bin/bash -e
#
# Send a metrics datapoint
#
point=$1
if [[ -z $point ]]; then

49
scripts/net-stats.sh Executable file
View File

@ -0,0 +1,49 @@
#!/bin/bash -e
#
# Reports network statistics
#
[[ $(uname) == Linux ]] || exit 0
cd "$(dirname "$0")"
# shellcheck source=scripts/configure-metrics.sh
source configure-metrics.sh
packets_received=0
packets_received_diff=0
receive_errors=0
receive_errors_diff=0
rcvbuf_errors=0
rcvbuf_errors_diff=0
update_netstat() {
declare net_stat
net_stat=$(netstat -suna)
declare stats
stats=$(echo "$net_stat" | awk 'BEGIN {tmp_var = 0} /packets received/ {tmp_var = $1} END { print tmp_var }')
packets_received_diff=$((stats - packets_received))
packets_received="$stats"
stats=$(echo "$net_stat" | awk 'BEGIN {tmp_var = 0} /packet receive errors/ {tmp_var = $1} END { print tmp_var }')
receive_errors_diff=$((stats - receive_errors))
receive_errors="$stats"
stats=$(echo "$net_stat" | awk 'BEGIN {tmp_var = 0} /RcvbufErrors/ {tmp_var = $2} END { print tmp_var }')
rcvbuf_errors_diff=$((stats - rcvbuf_errors))
rcvbuf_errors="$stats"
}
update_netstat
while true; do
update_netstat
report="packets_received=$packets_received_diff,receive_errors=$receive_errors_diff,rcvbuf_errors=$rcvbuf_errors_diff"
echo "$report"
./metrics-write-datapoint.sh "net-stats,hostname=$HOSTNAME $report"
sleep 1
done
exit 1

View File

@ -3,19 +3,21 @@
# Reports Linux OOM Killer activity
#
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
cd "$(dirname "$0")"
if [[ $(uname) != Linux ]]; then
exit 0
fi
# shellcheck source=scripts/oom-score-adj.sh
source oom-score-adj.sh
# shellcheck source=scripts/configure-metrics.sh
source configure-metrics.sh
[[ $(uname) = Linux ]] || exit 0
syslog=/var/log/syslog
if [[ ! -r $syslog ]]; then
[[ -r $syslog ]] || {
echo Unable to read $syslog
exit 0
fi
exit 1
}
# Adjust OOM score to reduce the chance that this script will be killed
# during an Out of Memory event since the purpose of this script is to
@ -24,9 +26,10 @@ oom_score_adj "self" -500
while read -r victim; do
echo "Out of memory event detected, $victim killed"
"$here"/metrics_write_datapoint.sh "oom-killer,victim=$victim killed=1"
./metrics-write-datapoint.sh "oom-killer,victim=$victim,hostname=$HOSTNAME killed=1"
done < <( \
tail --follow=name --retry -n0 $syslog \
| sed --unbuffered -n 's/^.* Out of memory: Kill process [1-9][0-9]* (\([^)]*\)) .*/\1/p' \
)
exit 1

20
scripts/oom-score-adj.sh Normal file
View File

@ -0,0 +1,20 @@
# |source| this file
#
# Adjusts the OOM score for the specified process. Linux only
#
# usage: oom_score_adj [pid] [score]
#
oom_score_adj() {
declare pid=$1
declare score=$2
if [[ $(uname) != Linux ]]; then
return
fi
echo "$score" > "/proc/$pid/oom_score_adj" || true
declare currentScore
currentScore=$(cat "/proc/$pid/oom_score_adj" || true)
if [[ $score != "$currentScore" ]]; then
echo "Failed to set oom_score_adj to $score for pid $pid (current score: $currentScore)"
fi
}

70
scripts/perf-stats.py Executable file
View File

@ -0,0 +1,70 @@
#!/usr/bin/env python3
import json
import sys
stages_data = {}
if len(sys.argv) != 2:
print("USAGE: {} <input file>".format(sys.argv[0]))
sys.exit(1)
with open(sys.argv[1]) as fh:
for line in fh.readlines():
if "COUNTER" in line:
json_part = line[line.find("{"):]
x = json.loads(json_part)
counter = x['name']
if not (counter in stages_data):
stages_data[counter] = {'first_ts': x['now'], 'last_ts': x['now'], 'last_count': 0,
'data': [], 'max_speed': 0, 'min_speed': 9999999999.0,
'count': 0,
'max_speed_ts': 0, 'min_speed_ts': 0}
stages_data[counter]['count'] += 1
count_since_last = x['counts'] - stages_data[counter]['last_count']
time_since_last = float(x['now'] - stages_data[counter]['last_ts'])
if time_since_last > 1:
speed = 1000.0 * (count_since_last / time_since_last)
stages_data[counter]['data'].append(speed)
if speed > stages_data[counter]['max_speed']:
stages_data[counter]['max_speed'] = speed
stages_data[counter]['max_speed_ts'] = x['now']
if speed < stages_data[counter]['min_speed']:
stages_data[counter]['min_speed'] = speed
stages_data[counter]['min_speed_ts'] = x['now']
stages_data[counter]['last_ts'] = x['now']
stages_data[counter]['last_count'] = x['counts']
for stage in stages_data.keys():
stages_data[stage]['data'].sort()
#mean_index = stages_data[stage]['count'] / 2
mean = 0
average = 0
eightieth = 0
data_len = len(stages_data[stage]['data'])
mean_index = int(data_len / 2)
eightieth_index = int(data_len * 0.8)
#print("mean idx: {} data.len: {}".format(mean_index, data_len))
if data_len > 0:
mean = stages_data[stage]['data'][mean_index]
average = float(sum(stages_data[stage]['data'])) / data_len
eightieth = stages_data[stage]['data'][eightieth_index]
print("stage: {} max: {:,.2f} min: {:.2f} count: {} total: {} mean: {:,.2f} average: {:,.2f} 80%: {:,.2f}".format(stage,
stages_data[stage]['max_speed'],
stages_data[stage]['min_speed'],
stages_data[stage]['count'],
stages_data[stage]['last_count'],
mean, average, eightieth))
num = 5
idx = -1
if data_len >= num:
print(" top {}: ".format(num), end='')
for x in range(0, num):
print("{:,.2f} ".format(stages_data[stage]['data'][idx]), end='')
idx -= 1
if stages_data[stage]['data'][idx] < average:
break
print("")
print(" max_ts: {} min_ts: {}".format(stages_data[stage]['max_speed_ts'], stages_data[stage]['min_speed_ts']))
print("\n")

22
scripts/snap-config-to-env.sh Executable file
View File

@ -0,0 +1,22 @@
#!/bin/bash
#
# Snap daemons have no access to the environment so |snap set solana ...| is
# used to set runtime configuration.
#
# This script exports the snap runtime configuration options back as
# environment variables before invoking the specified program
#
if [[ -d $SNAP ]]; then # Running inside a Linux Snap?
RUST_LOG="$(snapctl get rust-log)"
SOLANA_CUDA="$(snapctl get enable-cuda)"
SOLANA_DEFAULT_METRICS_RATE="$(snapctl get default-metrics-rate)"
SOLANA_METRICS_CONFIG="$(snapctl get metrics-config)"
export RUST_LOG
export SOLANA_CUDA
export SOLANA_DEFAULT_METRICS_RATE
export SOLANA_METRICS_CONFIG
fi
exec "$@"

View File

@ -3,15 +3,14 @@
# Wallet sanity test
#
here=$(dirname "$0")
cd "$here"
cd "$(dirname "$0")"/..
if [[ -n "$USE_SNAP" ]]; then
# TODO: Merge wallet.sh functionality into solana-wallet proper and
# remove this USE_SNAP case
wallet="solana.wallet $1"
else
wallet="../wallet.sh $1"
wallet="multinode-demo/wallet.sh $1"
fi
# Tokens transferred to this address are lost forever...
@ -35,7 +34,7 @@ pay_and_confirm() {
$wallet reset
$wallet address
check_balance_output "Your balance is: 0"
check_balance_output "No account found" "Your balance is: 0"
$wallet airdrop --tokens 60
check_balance_output "Your balance is: 60"
$wallet airdrop --tokens 40

View File

@ -5,6 +5,7 @@ snapctl stop --disable solana.daemon-drone
snapctl stop --disable solana.daemon-leader
snapctl stop --disable solana.daemon-validator
snapctl stop --disable solana.daemon-oom-monitor
snapctl stop --disable solana.daemon-net-stats
mode="$(snapctl get mode)"
if [[ -z "$mode" ]]; then
@ -18,16 +19,16 @@ setup_args="$(snapctl get setup-args)"
case $mode in
leader+drone)
"$SNAP"/bin/setup.sh -t leader $num_tokens -p $setup_args
"$SNAP"/multinode-demo/setup.sh -t leader $num_tokens -p $setup_args
snapctl start --enable solana.daemon-drone
snapctl start --enable solana.daemon-leader
;;
leader)
"$SNAP"/bin/setup.sh -t leader $num_tokens -p $setup_args
"$SNAP"/multinode-demo/setup.sh -t leader $num_tokens -p $setup_args
snapctl start --enable solana.daemon-leader
;;
validator)
"$SNAP"/bin/setup.sh -t validator -p $setup_args
"$SNAP"/multinode-demo/setup.sh -t validator -p $setup_args
snapctl start --enable solana.daemon-validator
;;
*)
@ -37,3 +38,4 @@ validator)
esac
snapctl start --enable solana.daemon-oom-monitor
snapctl start --enable solana.daemon-net-stats

View File

@ -49,41 +49,44 @@ apps:
plugs:
- home
bench-tps:
# TODO: Merge client.sh functionality into solana-bench-tps proper
command: client.sh
#command: solana-bench-tps
command: solana-bench-tps
plugs:
- network
- network-bind
- home
wallet:
# TODO: Merge wallet.sh functionality into solana-wallet proper
command: wallet.sh
command: multinode-demo/wallet.sh
#command: solana-wallet
plugs:
- network
- home
daemon-validator:
daemon: simple
command: validator.sh
command: scripts/snap-config-to-env.sh $SNAP/multinode-demo/validator.sh
plugs:
- network
- network-bind
daemon-leader:
daemon: simple
command: leader.sh
command: scripts/snap-config-to-env.sh $SNAP/multinode-demo/leader.sh
plugs:
- network
- network-bind
daemon-drone:
daemon: simple
command: drone.sh
command: scripts/snap-config-to-env.sh $SNAP/multinode-demo/drone.sh
plugs:
- network
- network-bind
daemon-oom-monitor:
daemon: simple
command: oom_monitor.sh
command: scripts/snap-config-to-env.sh $SNAP/scripts/oom-monitor.sh
plugs:
- network
daemon-net-stats:
daemon: simple
command: scripts/snap-config-to-env.sh $SNAP/scripts/net-stats.sh
plugs:
- network
@ -92,6 +95,8 @@ parts:
plugin: nil
prime:
- bin
- multinode-demo
- scripts
- usr/lib
override-build: |
# Install CUDA 9.2 runtime
@ -108,20 +113,25 @@ parts:
rm -rf $SNAPCRAFT_PART_INSTALL/bin/*
mv $SNAPCRAFT_PART_INSTALL/solana-fullnode $SNAPCRAFT_PART_INSTALL/bin/solana-fullnode-cuda
mkdir -p $SNAPCRAFT_PART_INSTALL/usr/lib/
cp -f libJerasure.so $SNAPCRAFT_PART_INSTALL/usr/lib/libJerasure.so.2
cp -f libgf_complete.so $SNAPCRAFT_PART_INSTALL/usr/lib/libgf_complete.so.1
cp -f target/perf-libs/libJerasure.so $SNAPCRAFT_PART_INSTALL/usr/lib/libJerasure.so.2
cp -f target/perf-libs/libgf_complete.so $SNAPCRAFT_PART_INSTALL/usr/lib/libgf_complete.so.1
# Build/install all other programs
cargo install --root $SNAPCRAFT_PART_INSTALL --bins
# Install multinode scripts
mkdir -p $SNAPCRAFT_PART_INSTALL/bin
cp -av multinode-demo/* $SNAPCRAFT_PART_INSTALL/bin/
# Install multinode-demo/
mkdir -p $SNAPCRAFT_PART_INSTALL/multinode-demo/
cp -av multinode-demo/* $SNAPCRAFT_PART_INSTALL/multinode-demo/
# TODO: build curl,rsync/multilog from source instead of sneaking it in from the host
# system...
# Install scripts/
mkdir -p $SNAPCRAFT_PART_INSTALL/scripts/
cp -av scripts/* $SNAPCRAFT_PART_INSTALL/scripts/
# TODO: build curl,dig,rsync/multilog from source instead of sneaking it
# in from the host system...
set -x
mkdir -p $SNAPCRAFT_PART_INSTALL/bin
cp -av /usr/bin/curl $SNAPCRAFT_PART_INSTALL/bin/
cp -av /usr/bin/dig $SNAPCRAFT_PART_INSTALL/bin/
cp -av /usr/bin/multilog $SNAPCRAFT_PART_INSTALL/bin/
cp -av /usr/bin/rsync $SNAPCRAFT_PART_INSTALL/bin/

571
src/bank.rs Executable file → Normal file
View File

@ -1,14 +1,13 @@
//! The `bank` module tracks client balances and the progress of smart
//! The `bank` module tracks client accounts and the progress of smart
//! contracts. It offers a high-level API that signs transactions
//! on behalf of the caller, and a low-level API for when they have
//! already been signed and verified.
extern crate libc;
use bincode::{deserialize, serialize};
use chrono::prelude::*;
use counter::Counter;
use entry::Entry;
use hash::Hash;
use hash::{hash, Hash};
use itertools::Itertools;
use ledger::Block;
use log::Level;
@ -17,7 +16,7 @@ use payment_plan::{Payment, PaymentPlan, Witness};
use signature::{Keypair, Pubkey, Signature};
use std;
use std::collections::hash_map::Entry::Occupied;
use std::collections::{HashMap, HashSet, VecDeque};
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::result;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::RwLock;
@ -37,7 +36,7 @@ pub const MAX_ENTRY_IDS: usize = 1024 * 16;
pub const VERIFY_BLOCK_SIZE: usize = 16;
/// Reasons a transaction might be rejected.
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum BankError {
/// Attempt to debit from `Pubkey`, but no found no record of a prior credit.
AccountNotFound(Pubkey),
@ -62,19 +61,32 @@ pub enum BankError {
/// Proof of History verification failed.
LedgerVerificationFailed,
/// Contract's transaction token balance does not equal the balance after the transaction
UnbalancedTransaction(Signature),
/// Contract location Pubkey already contains userdata
ContractAlreadyPending(Pubkey),
}
pub type Result<T> = result::Result<T, BankError>;
/// An Account with userdata that is stored on chain
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct Account {
/// tokens in the account
pub tokens: i64,
/// user data
/// A transaction can write to its userdata
pub userdata: Vec<u8>,
}
#[derive(Default)]
struct ErrorCounters {
account_not_found_validator: usize,
account_not_found_leader: usize,
account_not_found_vote: usize,
}
/// The state of all accounts and contracts after processing its entries.
pub struct Bank {
/// A map of account public keys to the balance in that account.
balances: RwLock<HashMap<Pubkey, i64>>,
/// A map of smart contract transaction signatures to what remains of its payment
/// plan. Each transaction that targets the plan should cause it to be reduced.
/// Once it cannot be reduced, final payments are made and it is discarded.
pending: RwLock<HashMap<Signature, Plan>>,
accounts: RwLock<HashMap<Pubkey, Account>>,
/// A FIFO queue of `last_id` items, where each item is a set of signatures
/// that have been processed using that `last_id`. Rejected `last_id`
@ -100,8 +112,7 @@ pub struct Bank {
impl Default for Bank {
fn default() -> Self {
Bank {
balances: RwLock::new(HashMap::new()),
pending: RwLock::new(HashMap::new()),
accounts: RwLock::new(HashMap::new()),
last_ids: RwLock::new(VecDeque::new()),
last_ids_sigs: RwLock::new(HashMap::new()),
transaction_count: AtomicUsize::new(0),
@ -121,7 +132,11 @@ impl Bank {
/// Create an Bank using a deposit.
pub fn new_from_deposit(deposit: &Payment) -> Self {
let bank = Self::default();
bank.apply_payment(deposit, &mut bank.balances.write().unwrap());
{
let mut accounts = bank.accounts.write().unwrap();
let account = accounts.entry(deposit.to).or_insert_with(Account::default);
Self::apply_payment(deposit, account);
}
bank
}
@ -136,9 +151,10 @@ impl Bank {
bank
}
/// Commit funds to the `payment.to` party.
fn apply_payment(&self, payment: &Payment, balances: &mut HashMap<Pubkey, i64>) {
*balances.entry(payment.to).or_insert(0) += payment.tokens;
/// Commit funds to the given account
fn apply_payment(payment: &Payment, account: &mut Account) {
trace!("apply payments {}", payment.tokens);
account.tokens += payment.tokens;
}
/// Return the last entry ID registered.
@ -160,23 +176,6 @@ impl Bank {
Ok(())
}
/// Forget the given `signature` because its transaction was rejected.
fn forget_signature(signatures: &mut HashSet<Signature>, signature: &Signature) {
signatures.remove(signature);
}
/// Forget the given `signature` with `last_id` because the transaction was rejected.
fn forget_signature_with_last_id(&self, signature: &Signature, last_id: &Hash) {
if let Some(entry) = self
.last_ids_sigs
.write()
.expect("'last_ids' read lock in forget_signature_with_last_id")
.get_mut(last_id)
{
Self::forget_signature(&mut entry.0, signature);
}
}
/// Forget all signatures. Useful for benchmarking.
pub fn clear_signatures(&self) {
for (_, sigs) in self.last_ids_sigs.write().unwrap().iter_mut() {
@ -233,121 +232,236 @@ impl Bank {
last_ids.push_back(*last_id);
}
/// Deduct tokens from the 'from' address the account has sufficient
/// funds and isn't a duplicate.
fn apply_debits(&self, tx: &Transaction, bals: &mut HashMap<Pubkey, i64>) -> Result<()> {
let mut purge = false;
/// Deduct tokens from the source account if it has sufficient funds and the contract isn't
/// pending
fn apply_debits_to_budget_payment_plan(
tx: &Transaction,
accounts: &mut [Account],
instruction: &Instruction,
) -> Result<()> {
{
let option = bals.get_mut(&tx.from);
if option.is_none() {
// TODO: this is gnarly because the counters are static atomics
if !self.is_leader {
inc_new_counter_info!("bank-appy_debits-account_not_found-validator", 1);
} else if let Instruction::NewVote(_) = &tx.instruction {
inc_new_counter_info!("bank-appy_debits-vote_account_not_found", 1);
} else {
inc_new_counter_info!("bank-appy_debits-generic_account_not_found", 1);
}
return Err(BankError::AccountNotFound(tx.from));
}
let bal = option.unwrap();
self.reserve_signature_with_last_id(&tx.signature, &tx.last_id)?;
if let Instruction::NewContract(contract) = &tx.instruction {
let tokens = if !accounts[0].userdata.is_empty() {
0
} else {
accounts[0].tokens
};
if let Instruction::NewContract(contract) = &instruction {
if contract.tokens < 0 {
return Err(BankError::NegativeTokens);
}
if *bal < contract.tokens {
self.forget_signature_with_last_id(&tx.signature, &tx.last_id);
return Err(BankError::InsufficientFunds(tx.from));
} else if *bal == contract.tokens {
purge = true;
if tokens < contract.tokens {
return Err(BankError::InsufficientFunds(tx.keys[0]));
} else {
*bal -= contract.tokens;
let bal = &mut accounts[0];
bal.tokens -= contract.tokens;
}
};
}
if purge {
bals.remove(&tx.from);
}
Ok(())
}
/// Apply only a transaction's credits.
/// Note: It is safe to apply credits from multiple transactions in parallel.
fn apply_credits(&self, tx: &Transaction, balances: &mut HashMap<Pubkey, i64>) {
match &tx.instruction {
fn apply_credits_to_budget_payment_plan(
tx: &Transaction,
accounts: &mut [Account],
instruction: &Instruction,
) -> Result<()> {
match instruction {
Instruction::NewContract(contract) => {
let plan = contract.plan.clone();
if let Some(payment) = plan.final_payment() {
self.apply_payment(&payment, balances);
Self::apply_payment(&payment, &mut accounts[1]);
Ok(())
} else if !accounts[1].userdata.is_empty() {
Err(BankError::ContractAlreadyPending(tx.keys[1]))
} else {
let mut pending = self
.pending
.write()
.expect("'pending' write lock in apply_credits");
let mut pending = HashMap::new();
pending.insert(tx.signature, plan);
//TODO this is a temporary on demand allocation
//until system contract requires explicit allocation of memory
accounts[1].userdata = serialize(&pending).unwrap();
accounts[1].tokens += contract.tokens;
Ok(())
}
}
Instruction::ApplyTimestamp(dt) => {
let _ = self.apply_timestamp(tx.from, *dt);
Self::apply_timestamp(tx.keys[0], *dt, &mut accounts[1]);
Ok(())
}
Instruction::ApplySignature(signature) => {
let _ = self.apply_signature(tx.from, *signature);
Self::apply_signature(tx.keys[0], *signature, accounts);
Ok(())
}
Instruction::NewVote(_vote) => {
trace!("GOT VOTE! last_id={:?}", &tx.last_id.as_ref()[..8]);
// TODO: record the vote in the stake table...
trace!("GOT VOTE! last_id={}", tx.last_id);
Ok(())
}
}
}
/// Budget DSL contract interface
/// * tx - the transaction
/// * accounts[0] - The source of the tokens
/// * accounts[1] - The contract context. Once the contract has been completed, the tokens can
/// be spent from this account .
pub fn process_transaction_of_budget_instruction(
tx: &Transaction,
accounts: &mut [Account],
) -> Result<()> {
let instruction = tx.instruction();
Self::apply_debits_to_budget_payment_plan(tx, accounts, &instruction)?;
Self::apply_credits_to_budget_payment_plan(tx, accounts, &instruction)
}
//TODO the contract needs to provide a "get_balance" introspection call of the userdata
pub fn get_balance_of_budget_payment_plan(account: &Account) -> i64 {
if let Ok(pending) = deserialize(&account.userdata) {
let pending: HashMap<Signature, Plan> = pending;
if !pending.is_empty() {
0
} else {
account.tokens
}
} else {
account.tokens
}
}
/// Process a Transaction. If it contains a payment plan that requires a witness
/// to progress, the payment plan will be stored in the bank.
pub fn process_transaction(&self, tx: &Transaction) -> Result<()> {
let bals = &mut self.balances.write().unwrap();
self.apply_debits(tx, bals)?;
self.apply_credits(tx, bals);
self.transaction_count.fetch_add(1, Ordering::Relaxed);
Ok(())
match self.process_transactions(vec![tx.clone()])[0] {
Err(ref e) => {
info!("process_transaction error: {:?}", e);
Err((*e).clone())
}
Ok(_) => Ok(()),
}
}
fn load_account(
&self,
tx: &Transaction,
accounts: &HashMap<Pubkey, Account>,
error_counters: &mut ErrorCounters,
) -> Result<Vec<Account>> {
// Copy all the accounts
if accounts.get(&tx.keys[0]).is_none() {
if !self.is_leader {
error_counters.account_not_found_validator += 1;
} else {
error_counters.account_not_found_leader += 1;
}
if let Instruction::NewVote(_vote) = tx.instruction() {
error_counters.account_not_found_vote += 1;
}
Err(BankError::AccountNotFound(*tx.from()))
} else if accounts.get(&tx.keys[0]).unwrap().tokens < tx.fee {
Err(BankError::InsufficientFunds(*tx.from()))
} else {
let mut called_accounts: Vec<Account> = tx
.keys
.iter()
.map(|key| accounts.get(key).cloned().unwrap_or(Account::default()))
.collect();
// There is no way to predict what contract will execute without an error
// If a fee can pay for execution then the contract will be scheduled
self.reserve_signature_with_last_id(&tx.signature, &tx.last_id)?;
called_accounts[0].tokens -= tx.fee;
Ok(called_accounts)
}
}
fn load_accounts(
&self,
txs: &Vec<Transaction>,
accounts: &HashMap<Pubkey, Account>,
error_counters: &mut ErrorCounters,
) -> Vec<Result<Vec<Account>>> {
txs.iter()
.map(|tx| self.load_account(tx, accounts, error_counters))
.collect()
}
pub fn execute_transaction(tx: Transaction, accounts: &mut [Account]) -> Result<Transaction> {
let pre_total: i64 = accounts.iter().map(|a| a.tokens).sum();
// TODO next steps is to add hooks to call arbitrary contracts here
// Call the contract method
// It's up to the contract to implement its own rules on moving funds
let e = Self::process_transaction_of_budget_instruction(&tx, accounts);
// Verify the transaction
// TODO, At the moment there is only 1 contract, so 1-3 are not checked
// 1. For accounts assigned to the contract, the total sum of all the tokens in these accounts cannot increase.
// 2. For accounts unassigned to the contract, the individual balance of each accounts cannot decrease.
// 3. For accounts unassigned to the contract, the userdata cannot change.
// 4. The total sum of all the tokens in all the pages cannot change.
let post_total: i64 = accounts.iter().map(|a| a.tokens).sum();
if pre_total != post_total {
Err(BankError::UnbalancedTransaction(tx.signature))
} else if let Err(err) = e {
Err(err)
} else {
Ok(tx)
}
}
pub fn store_accounts(
res: &Vec<Result<Transaction>>,
loaded: &Vec<Result<Vec<Account>>>,
accounts: &mut HashMap<Pubkey, Account>,
) {
loaded.iter().zip(res.iter()).for_each(|(racc, rtx)| {
if let (Ok(acc), Ok(tx)) = (racc, rtx) {
tx.keys.iter().zip(acc.iter()).for_each(|(key, account)| {
//purge if 0
if account.tokens == 0 {
accounts.remove(&key);
} else {
*accounts.entry(*key).or_insert_with(Account::default) = account.clone();
assert_eq!(accounts.get(key).unwrap().tokens, account.tokens);
}
});
};
});
}
/// Process a batch of transactions.
#[must_use]
pub fn process_transactions(&self, txs: Vec<Transaction>) -> Vec<Result<Transaction>> {
let bals = &mut self.balances.write().unwrap();
debug!("processing Transactions {}", txs.len());
// TODO right now a single write lock is held for the duration of processing all the
// transactions
// To break this lock each account needs to be locked to prevent concurrent access
let mut accounts = self.accounts.write().unwrap();
let txs_len = txs.len();
let mut error_counters = ErrorCounters::default();
let now = Instant::now();
let results: Vec<_> = txs
.into_iter()
.map(|tx| self.apply_debits(&tx, bals).map(|_| tx))
.collect(); // Calling collect() here forces all debits to complete before moving on.
let debits = now.elapsed();
let mut loaded_accounts = self.load_accounts(&txs, &mut accounts, &mut error_counters);
let load_elapsed = now.elapsed();
let now = Instant::now();
let res: Vec<_> = results
.into_iter()
.map(|result| {
result.map(|tx| {
self.apply_credits(&tx, bals);
tx
})
let res: Vec<Result<Transaction>> = loaded_accounts
.iter_mut()
.zip(txs.into_iter())
.map(|(acc, tx)| match acc {
Err(e) => Err(e.clone()),
Ok(ref mut accounts) => Self::execute_transaction(tx, accounts),
})
.collect();
let execution_elapsed = now.elapsed();
let now = Instant::now();
Self::store_accounts(&res, &loaded_accounts, &mut accounts);
let write_elapsed = now.elapsed();
debug!(
"debits: {} us credits: {:?} us tx: {}",
duration_as_us(&debits),
duration_as_us(&now.elapsed()),
"load: {} us execution: {} us write: {} us tx: {}",
duration_as_us(&load_elapsed),
duration_as_us(&execution_elapsed),
duration_as_us(&write_elapsed),
txs_len
);
let mut tx_count = 0;
let mut err_count = 0;
for r in &res {
@ -355,13 +469,34 @@ impl Bank {
tx_count += 1;
} else {
if err_count == 0 {
info!("tx error: {:?}", r);
trace!("tx error: {:?}", r);
}
err_count += 1;
}
}
if err_count > 0 {
info!("{} errors of {} txs", err_count, err_count + tx_count);
if !self.is_leader {
inc_new_counter_info!("bank-process_transactions_err-validator", err_count);
inc_new_counter_info!(
"bank-appy_debits-account_not_found-validator",
error_counters.account_not_found_validator
);
} else {
inc_new_counter_info!("bank-process_transactions_err-leader", err_count);
inc_new_counter_info!(
"bank-appy_debits-account_not_found-leader",
error_counters.account_not_found_leader
);
inc_new_counter_info!(
"bank-appy_debits-vote_account_not_found",
error_counters.account_not_found_vote
);
}
}
let cur_tx_count = self.transaction_count.load(Ordering::Relaxed);
if ((cur_tx_count + tx_count) & !(262144 - 1)) > cur_tx_count & !(262144 - 1) {
info!("accounts.len: {}", accounts.len());
}
self.transaction_count
.fetch_add(tx_count, Ordering::Relaxed);
@ -406,13 +541,11 @@ impl Bank {
}
/// Process an ordered list of entries.
pub fn process_entries(&self, entries: Vec<Entry>) -> Result<u64> {
let mut entry_count = 0;
pub fn process_entries(&self, entries: Vec<Entry>) -> Result<()> {
for entry in entries {
entry_count += 1;
self.process_entry(entry)?;
}
Ok(entry_count)
Ok(())
}
/// Append entry blocks to the ledger, verifying them along the way.
@ -461,13 +594,18 @@ impl Bank {
.expect("invalid ledger: need at least 2 entries");
{
let tx = &entry1.transactions[0];
let deposit = if let Instruction::NewContract(contract) = &tx.instruction {
let instruction = tx.instruction();
let deposit = if let Instruction::NewContract(contract) = instruction {
contract.plan.final_payment()
} else {
None
}.expect("invalid ledger, needs to start with a contract");
self.apply_payment(&deposit, &mut self.balances.write().unwrap());
{
let mut accounts = self.accounts.write().unwrap();
let entry = accounts.entry(tx.keys[0]).or_insert_with(Account::default);
Self::apply_payment(&deposit, entry);
trace!("applied genesis payment {:?} {:?}", deposit, entry);
}
}
self.register_entry_id(&entry0.id);
self.register_entry_id(&entry1.id);
@ -489,39 +627,40 @@ impl Bank {
/// Process a Witness Signature. Any payment plans waiting on this signature
/// will progress one step.
fn apply_signature(&self, from: Pubkey, signature: Signature) -> Result<()> {
if let Occupied(mut e) = self
.pending
.write()
.expect("write() in apply_signature")
.entry(signature)
{
fn apply_signature(from: Pubkey, signature: Signature, account: &mut [Account]) {
let mut pending: HashMap<Signature, Plan> =
deserialize(&account[1].userdata).unwrap_or(HashMap::new());
if let Occupied(mut e) = pending.entry(signature) {
e.get_mut().apply_witness(&Witness::Signature, &from);
if let Some(payment) = e.get().final_payment() {
self.apply_payment(&payment, &mut self.balances.write().unwrap());
//move the tokens back to the from account
account[0].tokens += payment.tokens;
account[1].tokens -= payment.tokens;
e.remove_entry();
}
};
Ok(())
//TODO this allocation needs to be changed once the runtime only allows for explicitly
//allocated memory
account[1].userdata = if pending.is_empty() {
vec![]
} else {
serialize(&pending).unwrap()
};
}
/// Process a Witness Timestamp. Any payment plans waiting on this timestamp
/// will progress one step.
fn apply_timestamp(&self, from: Pubkey, dt: DateTime<Utc>) -> Result<()> {
fn apply_timestamp(from: Pubkey, dt: DateTime<Utc>, account: &mut Account) {
let mut pending: HashMap<Signature, Plan> =
deserialize(&account.userdata).unwrap_or(HashMap::new());
// Check to see if any timelocked transactions can be completed.
let mut completed = vec![];
// Hold 'pending' write lock until the end of this function. Otherwise another thread can
// double-spend if it enters before the modified plan is removed from 'pending'.
let mut pending = self
.pending
.write()
.expect("'pending' write lock in apply_timestamp");
for (key, plan) in pending.iter_mut() {
plan.apply_witness(&Witness::Timestamp(dt), &from);
if let Some(payment) = plan.final_payment() {
self.apply_payment(&payment, &mut self.balances.write().unwrap());
if let Some(_payment) = plan.final_payment() {
completed.push(key.clone());
}
}
@ -529,8 +668,13 @@ impl Bank {
for key in completed {
pending.remove(&key);
}
Ok(())
//TODO this allocation needs to be changed once the runtime only allows for explicitly
//allocated memory
account.userdata = if pending.is_empty() {
vec![]
} else {
serialize(&pending).unwrap()
};
}
/// Create, sign, and process a Transaction from `keypair` to `to` of
@ -564,11 +708,17 @@ impl Bank {
}
pub fn get_balance(&self, pubkey: &Pubkey) -> i64 {
let bals = self
.balances
self.get_account(pubkey)
.map(|x| Self::get_balance_of_budget_payment_plan(&x))
.unwrap_or(0)
}
pub fn get_account(&self, pubkey: &Pubkey) -> Option<Account> {
let accounts = self
.accounts
.read()
.expect("'balances' read lock in get_balance");
bals.get(pubkey).cloned().unwrap_or(0)
.expect("'accounts' read lock in get_balance");
accounts.get(pubkey).cloned()
}
pub fn transaction_count(&self) -> usize {
@ -588,6 +738,16 @@ impl Bank {
false
}
/// Hash the `accounts` HashMap. This represents a validator's interpretation
/// of the ledger up to the `last_id`, to be sent back to the leader when voting.
pub fn hash_internal_state(&self) -> Hash {
let mut ordered_accounts = BTreeMap::new();
for (pubkey, account) in self.accounts.read().unwrap().iter() {
ordered_accounts.insert(*pubkey, account.clone());
}
hash(&serialize(&ordered_accounts).unwrap())
}
pub fn finality(&self) -> usize {
self.finality_time.load(Ordering::Relaxed)
}
@ -607,11 +767,18 @@ mod tests {
use hash::hash;
use ledger;
use packet::BLOB_DATA_SIZE;
use signature::KeypairUtil;
use signature::{GenKeys, KeypairUtil};
use std;
use std::io::{BufReader, Cursor, Seek, SeekFrom};
use std::mem::size_of;
#[test]
fn test_bank_new() {
let mint = Mint::new(10_000);
let bank = Bank::new(&mint);
assert_eq!(bank.get_balance(&mint.pubkey()), 10_000);
}
#[test]
fn test_two_payments_to_one_party() {
let mint = Mint::new(10_000);
@ -641,6 +808,23 @@ mod tests {
assert_eq!(bank.transaction_count(), 0);
}
// TODO: This test verifies potentially undesirable behavior
// See github issue 1157 (https://github.com/solana-labs/solana/issues/1157)
#[test]
fn test_detect_failed_duplicate_transactions_issue_1157() {
let mint = Mint::new(1);
let bank = Bank::new(&mint);
let tx = Transaction::new(&mint.keypair(), mint.keypair().pubkey(), -1, mint.last_id());
let signature = tx.signature;
assert!(!bank.has_signature(&signature));
assert_eq!(
bank.process_transaction(&tx),
Err(BankError::NegativeTokens)
);
assert!(bank.has_signature(&signature));
}
#[test]
fn test_account_not_found() {
let mint = Mint::new(1);
@ -661,6 +845,7 @@ mod tests {
bank.transfer(1_000, &mint.keypair(), pubkey, mint.last_id())
.unwrap();
assert_eq!(bank.transaction_count(), 1);
assert_eq!(bank.get_balance(&pubkey), 1_000);
assert_eq!(
bank.transfer(10_001, &mint.keypair(), pubkey, mint.last_id()),
Err(BankError::InsufficientFunds(mint.pubkey()))
@ -684,39 +869,46 @@ mod tests {
#[test]
fn test_transfer_on_date() {
let mint = Mint::new(1);
let mint = Mint::new(2);
let bank = Bank::new(&mint);
let pubkey = Keypair::new().pubkey();
let dt = Utc::now();
bank.transfer_on_date(1, &mint.keypair(), pubkey, dt, mint.last_id())
.unwrap();
// Mint's balance will be zero because all funds are locked up.
assert_eq!(bank.get_balance(&mint.pubkey()), 0);
// Mint's balance will be 1 because 1 of the tokens is locked up
assert_eq!(bank.get_balance(&mint.pubkey()), 1);
// tx count is 1, because debits were applied.
assert_eq!(bank.transaction_count(), 1);
// pubkey's balance will be None because the funds have not been
// pubkey's balance will be 0 because the funds have not been
// sent.
assert_eq!(bank.get_balance(&pubkey), 0);
// Now, acknowledge the time in the condition occurred and
// that pubkey's funds are now available.
bank.apply_timestamp(mint.pubkey(), dt).unwrap();
let tx = Transaction::new_timestamp(&mint.keypair(), pubkey, dt, bank.last_id());
let res = bank.process_transaction(&tx);
assert!(res.is_ok());
assert_eq!(bank.get_balance(&pubkey), 1);
// tx count is still 1, because we chose not to count timestamp transactions
// tx count.
assert_eq!(bank.transaction_count(), 1);
// tx count is 2
assert_eq!(bank.transaction_count(), 2);
bank.apply_timestamp(mint.pubkey(), dt).unwrap(); // <-- Attack! Attempt to process completed transaction.
assert_ne!(bank.get_balance(&pubkey), 2);
// try to replay the timestamp contract
bank.register_entry_id(&hash(bank.last_id().as_ref()));
let tx = Transaction::new_timestamp(&mint.keypair(), pubkey, dt, bank.last_id());
let res = bank.process_transaction(&tx);
assert!(res.is_ok());
assert_eq!(bank.get_balance(&pubkey), 1);
}
#[test]
fn test_cancel_transfer() {
let mint = Mint::new(1);
// mint needs to have a balance to modify the external contract
let mint = Mint::new(2);
let bank = Bank::new(&mint);
let pubkey = Keypair::new().pubkey();
let dt = Utc::now();
@ -727,23 +919,31 @@ mod tests {
// Assert the debit counts as a transaction.
assert_eq!(bank.transaction_count(), 1);
// Mint's balance will be zero because all funds are locked up.
assert_eq!(bank.get_balance(&mint.pubkey()), 0);
// pubkey's balance will be None because the funds have not been
// sent.
assert_eq!(bank.get_balance(&pubkey), 0);
// Now, cancel the trancaction. Mint gets her funds back, pubkey never sees them.
bank.apply_signature(mint.pubkey(), signature).unwrap();
// Mint's balance will be 1 because 1 of the tokens is locked up.
assert_eq!(bank.get_balance(&mint.pubkey()), 1);
// pubkey's balance will be 0 because the funds are locked up
assert_eq!(bank.get_balance(&pubkey), 0);
// Assert cancel doesn't cause count to go backward.
assert_eq!(bank.transaction_count(), 1);
// Now, cancel the transaction. Mint gets her funds back, pubkey never sees them.
let tx = Transaction::new_signature(&mint.keypair(), pubkey, signature, bank.last_id());
let res = bank.process_transaction(&tx);
assert!(res.is_ok());
assert_eq!(bank.get_balance(&pubkey), 0);
assert_eq!(bank.get_balance(&mint.pubkey()), 2);
bank.apply_signature(mint.pubkey(), signature).unwrap(); // <-- Attack! Attempt to cancel completed transaction.
assert_ne!(bank.get_balance(&mint.pubkey()), 2);
// Assert cancel counts as a tx
assert_eq!(bank.transaction_count(), 2);
// try to replay the signature contract
bank.register_entry_id(&hash(bank.last_id().as_ref()));
let tx = Transaction::new_signature(&mint.keypair(), pubkey, signature, bank.last_id());
let res = bank.process_transaction(&tx); //<-- attack! try to get budget dsl to pay out with another signature
assert!(res.is_ok());
// balance is is still 2 for the mint
assert_eq!(bank.get_balance(&mint.pubkey()), 2);
// balance is is still 0 for the contract
assert_eq!(bank.get_balance(&pubkey), 0);
}
#[test]
@ -762,13 +962,13 @@ mod tests {
}
#[test]
fn test_forget_signature() {
fn test_clear_signatures() {
let mint = Mint::new(1);
let bank = Bank::new(&mint);
let signature = Signature::default();
bank.reserve_signature_with_last_id(&signature, &mint.last_id())
.unwrap();
bank.forget_signature_with_last_id(&signature, &mint.last_id());
bank.clear_signatures();
assert!(
bank.reserve_signature_with_last_id(&signature, &mint.last_id())
.is_ok()
@ -881,6 +1081,19 @@ mod tests {
entries.into_iter()
}
fn create_sample_block_with_next_entries_using_keypairs(
mint: &Mint,
keypairs: &[Keypair],
) -> impl Iterator<Item = Entry> {
let hash = mint.last_id();
let transactions: Vec<_> = keypairs
.iter()
.map(|keypair| Transaction::new(&mint.keypair(), keypair.pubkey(), 1, hash))
.collect();
let entries = ledger::next_entries(&hash, 0, transactions);
entries.into_iter()
}
fn create_sample_block(mint: &Mint, length: usize) -> impl Iterator<Item = Entry> {
let mut entries = Vec::with_capacity(length);
let mut hash = mint.last_id();
@ -910,6 +1123,15 @@ mod tests {
(genesis.into_iter().chain(block), mint.pubkey())
}
fn create_sample_ledger_with_mint_and_keypairs(
mint: &Mint,
keypairs: &[Keypair],
) -> impl Iterator<Item = Entry> {
let genesis = mint.create_entries();
let block = create_sample_block_with_next_entries_using_keypairs(mint, keypairs);
genesis.into_iter().chain(block)
}
#[test]
fn test_process_ledger() {
let (ledger, pubkey) = create_sample_ledger(1);
@ -997,11 +1219,38 @@ mod tests {
assert!(!validator_bank.is_leader);
}
#[test]
fn test_hash_internal_state() {
let mint = Mint::new(2_000);
let seed = [0u8; 32];
let mut rnd = GenKeys::new(seed);
let keypairs = rnd.gen_n_keypairs(5);
let ledger0 = create_sample_ledger_with_mint_and_keypairs(&mint, &keypairs);
let ledger1 = create_sample_ledger_with_mint_and_keypairs(&mint, &keypairs);
let bank0 = Bank::default();
bank0.process_ledger(ledger0).unwrap();
let bank1 = Bank::default();
bank1.process_ledger(ledger1).unwrap();
let initial_state = bank0.hash_internal_state();
assert_eq!(bank1.hash_internal_state(), initial_state);
let pubkey = keypairs[0].pubkey();
bank0
.transfer(1_000, &mint.keypair(), pubkey, mint.last_id())
.unwrap();
assert_ne!(bank0.hash_internal_state(), initial_state);
bank1
.transfer(1_000, &mint.keypair(), pubkey, mint.last_id())
.unwrap();
assert_eq!(bank0.hash_internal_state(), bank1.hash_internal_state());
}
#[test]
fn test_finality() {
let def_bank = Bank::default();
assert_eq!(def_bank.finality(), std::usize::MAX);
def_bank.set_finality(90);
assert_eq!(def_bank.finality(), 90);
}
}

View File

@ -114,7 +114,7 @@ impl BankingStage {
signal_sender.send(Signal::Transactions(transactions))?;
debug!("done process_transactions");
packet_recycler.recycle(msgs);
packet_recycler.recycle(msgs, "process_transactions");
}
let total_time_s = timing::duration_as_s(&proc_start.elapsed());
let total_time_ms = timing::duration_as_ms(&proc_start.elapsed());

View File

@ -1,9 +1,13 @@
extern crate clap;
extern crate solana;
use clap::{App, Arg};
use solana::netutil::bind_to;
use solana::packet::{Packet, PacketRecycler, BLOB_SIZE, PACKET_DATA_SIZE};
use solana::result::Result;
use solana::streamer::{receiver, PacketReceiver};
use std::net::{SocketAddr, UdpSocket};
use std::cmp::max;
use std::net::{IpAddr, Ipv4Addr, SocketAddr, UdpSocket};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::mpsc::channel;
use std::sync::Arc;
@ -49,28 +53,62 @@ fn sink(
let timer = Duration::new(1, 0);
if let Ok(msgs) = r.recv_timeout(timer) {
rvs.fetch_add(msgs.read().unwrap().packets.len(), Ordering::Relaxed);
recycler.recycle(msgs);
recycler.recycle(msgs, "sink");
}
})
}
fn main() -> Result<()> {
let read = UdpSocket::bind("127.0.0.1:0")?;
read.set_read_timeout(Some(Duration::new(1, 0)))?;
let mut num_sockets = 1usize;
let matches = App::new("solana-bench-streamer")
.arg(
Arg::with_name("num-recv-sockets")
.long("num-recv-sockets")
.value_name("NUM")
.takes_value(true)
.help("Use NUM receive sockets"),
)
.get_matches();
if let Some(n) = matches.value_of("num-recv-sockets") {
num_sockets = max(num_sockets, n.to_string().parse().expect("integer"));
}
let mut port = 0;
let mut addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0);
let addr = read.local_addr()?;
let exit = Arc::new(AtomicBool::new(false));
let pack_recycler = PacketRecycler::default();
let (s_reader, r_reader) = channel();
let t_reader = receiver(read, exit.clone(), pack_recycler.clone(), s_reader);
let mut read_channels = Vec::new();
let mut read_threads = Vec::new();
for _ in 0..num_sockets {
let read = bind_to(port, false).unwrap();
read.set_read_timeout(Some(Duration::new(1, 0))).unwrap();
addr = read.local_addr().unwrap();
port = addr.port();
let (s_reader, r_reader) = channel();
read_channels.push(r_reader);
read_threads.push(receiver(
Arc::new(read),
exit.clone(),
pack_recycler.clone(),
s_reader,
));
}
let t_producer1 = producer(&addr, &pack_recycler, exit.clone());
let t_producer2 = producer(&addr, &pack_recycler, exit.clone());
let t_producer3 = producer(&addr, &pack_recycler, exit.clone());
let rvs = Arc::new(AtomicUsize::new(0));
let t_sink = sink(pack_recycler.clone(), exit.clone(), rvs.clone(), r_reader);
let sink_threads: Vec<_> = read_channels
.into_iter()
.map(|r_reader| sink(pack_recycler.clone(), exit.clone(), rvs.clone(), r_reader))
.collect();
let start = SystemTime::now();
let start_val = rvs.load(Ordering::Relaxed);
sleep(Duration::new(5, 0));
@ -81,10 +119,14 @@ fn main() -> Result<()> {
let fcount = (end_val - start_val) as f64;
println!("performance: {:?}", fcount / ftime);
exit.store(true, Ordering::Relaxed);
t_reader.join()?;
for t_reader in read_threads {
t_reader.join()?;
}
t_producer1.join()?;
t_producer2.join()?;
t_producer3.join()?;
t_sink.join()?;
for t_sink in sink_threads {
t_sink.join()?;
}
Ok(())
}

View File

@ -4,6 +4,7 @@ extern crate clap;
extern crate influx_db_client;
extern crate rayon;
extern crate serde_json;
#[macro_use]
extern crate solana;
use clap::{App, Arg};
@ -12,24 +13,22 @@ use rayon::prelude::*;
use solana::client::mk_client;
use solana::crdt::{Crdt, NodeInfo};
use solana::drone::DRONE_PORT;
use solana::fullnode::Config;
use solana::hash::Hash;
use solana::logger;
use solana::metrics;
use solana::nat::{get_public_ip_addr, udp_random_bind};
use solana::ncp::Ncp;
use solana::packet::BlobRecycler;
use solana::service::Service;
use solana::signature::{read_keypair, GenKeys, Keypair, KeypairUtil};
use solana::thin_client::ThinClient;
use solana::thin_client::{poll_gossip_for_leader, ThinClient};
use solana::timing::{duration_as_ms, duration_as_s};
use solana::transaction::Transaction;
use solana::wallet::request_airdrop;
use solana::window::default_window;
use std::collections::VecDeque;
use std::fs::File;
use std::net::{IpAddr, Ipv4Addr, SocketAddr, UdpSocket};
use std::net::SocketAddr;
use std::process::exit;
use std::sync::atomic::{AtomicBool, AtomicIsize, Ordering};
use std::sync::atomic::{AtomicBool, AtomicIsize, AtomicUsize, Ordering};
use std::sync::{Arc, RwLock};
use std::thread::sleep;
use std::thread::Builder;
@ -143,7 +142,13 @@ fn send_barrier_transaction(barrier_client: &mut ThinClient, last_id: &mut Hash,
);
// Sanity check that the client balance is still 1
let balance = barrier_client.poll_get_balance(&id.pubkey()).unwrap_or(-1);
let balance = barrier_client
.poll_balance_with_timeout(
&id.pubkey(),
&Duration::from_millis(100),
&Duration::from_secs(10),
)
.expect("Failed to get balance");
if balance != 1 {
panic!("Expected an account balance of 1 (balance: {}", balance);
}
@ -228,6 +233,7 @@ fn do_tx_transfers(
shared_txs: &Arc<RwLock<VecDeque<Vec<Transaction>>>>,
leader: &NodeInfo,
shared_tx_thread_count: &Arc<AtomicIsize>,
total_tx_sent_count: &Arc<AtomicUsize>,
) {
let client = mk_client(&leader);
loop {
@ -249,6 +255,7 @@ fn do_tx_transfers(
client.transfer_signed(&tx).unwrap();
}
shared_tx_thread_count.fetch_add(-1, Ordering::Relaxed);
total_tx_sent_count.fetch_add(tx_len, Ordering::Relaxed);
println!(
"Tx send done. {} ms {} tps",
duration_as_ms(&transfer_start.elapsed()),
@ -275,33 +282,48 @@ fn airdrop_tokens(client: &mut ThinClient, leader: &NodeInfo, id: &Keypair, tx_c
let mut drone_addr = leader.contact_info.tpu;
drone_addr.set_port(DRONE_PORT);
let starting_balance = client.poll_get_balance(&id.pubkey()).unwrap();
let starting_balance = client.poll_get_balance(&id.pubkey()).unwrap_or(0);
metrics_submit_token_balance(starting_balance);
println!("starting balance {}", starting_balance);
if starting_balance < tx_count {
let airdrop_amount = tx_count - starting_balance;
println!(
"Airdropping {:?} tokens from {}",
airdrop_amount, drone_addr
"Airdropping {:?} tokens from {} for {}",
airdrop_amount,
drone_addr,
id.pubkey(),
);
let previous_balance = starting_balance;
request_airdrop(&drone_addr, &id.pubkey(), airdrop_amount as u64).unwrap();
if let Err(e) = request_airdrop(&drone_addr, &id.pubkey(), airdrop_amount as u64) {
panic!(
"Error requesting airdrop: {:?} to addr: {:?} amount: {}",
e, drone_addr, airdrop_amount
);
}
// TODO: return airdrop Result from Drone instead of polling the
// network
let mut current_balance = previous_balance;
let mut current_balance = starting_balance;
for _ in 0..20 {
sleep(Duration::from_millis(500));
current_balance = client.poll_get_balance(&id.pubkey()).unwrap();
current_balance = client.poll_get_balance(&id.pubkey()).unwrap_or_else(|e| {
println!("airdrop error {}", e);
starting_balance
});
if starting_balance != current_balance {
break;
}
println!(".");
println!("current balance {}...", current_balance);
}
metrics_submit_token_balance(current_balance);
if current_balance - starting_balance != airdrop_amount {
println!("Airdrop failed!");
println!(
"Airdrop failed! {} {} {}",
id.pubkey(),
current_balance,
starting_balance
);
exit(1);
}
}
@ -311,10 +333,11 @@ fn compute_and_report_stats(
maxes: &Arc<RwLock<Vec<(SocketAddr, NodeStats)>>>,
sample_period: u64,
tx_send_elapsed: &Duration,
total_tx_send_count: usize,
) {
// Compute/report stats
let mut max_of_maxes = 0.0;
let mut total_txs = 0;
let mut max_tx_count = 0;
let mut nodes_with_zero_tps = 0;
let mut total_maxes = 0.0;
println!(" Node address | Max TPS | Total Transactions");
@ -342,7 +365,9 @@ fn compute_and_report_stats(
if stats.tps > max_of_maxes {
max_of_maxes = stats.tps;
}
total_txs += stats.tx;
if stats.tx > max_tx_count {
max_tx_count = stats.tx;
}
}
if total_maxes > 0.0 {
@ -355,166 +380,155 @@ fn compute_and_report_stats(
}
println!(
"\nHighest TPS: {:.2} sampling period {}s total transactions: {} clients: {}",
"\nHighest TPS: {:.2} sampling period {}s max transactions: {} clients: {} drop rate: {:.2}",
max_of_maxes,
sample_period,
total_txs,
maxes.read().unwrap().len()
max_tx_count,
maxes.read().unwrap().len(),
(total_tx_send_count as u64 - max_tx_count) as f64 / total_tx_send_count as f64,
);
println!(
"\tAverage TPS: {}",
total_txs as f32 / duration_as_s(tx_send_elapsed)
max_tx_count as f32 / duration_as_s(tx_send_elapsed)
);
}
// First transfer 3/4 of the tokens to the dest accounts
// then ping-pong 1/4 of the tokens back to the other account
// this leaves 1/4 token buffer in each account
fn should_switch_directions(num_tokens_per_account: i64, i: i64) -> bool {
i % (num_tokens_per_account / 4) == 0 && (i >= (3 * num_tokens_per_account) / 4)
}
fn main() {
logger::setup();
metrics::set_panic_hook("bench-tps");
let mut threads = 4usize;
let mut num_nodes = 1usize;
let mut time_sec = 90;
let mut sustained = false;
let mut tx_count = 500_000;
let matches = App::new("solana-bench-tps")
.version(crate_version!())
.arg(
Arg::with_name("leader")
.short("l")
.long("leader")
.value_name("PATH")
.takes_value(true)
.help("/path/to/leader.json"),
)
.arg(
Arg::with_name("keypair")
.short("k")
.long("keypair")
.value_name("PATH")
.takes_value(true)
.default_value("~/.config/solana/id.json")
.help("/path/to/id.json"),
)
.arg(
Arg::with_name("num_nodes")
Arg::with_name("network")
.short("n")
.long("nodes")
.value_name("NUMBER")
.long("network")
.value_name("HOST:PORT")
.takes_value(true)
.help("number of nodes to converge to"),
.help("rendezvous with the network at this gossip entry point, defaults to 127.0.0.1:8001"),
)
.arg(
Arg::with_name("identity")
.short("i")
.long("identity")
.value_name("PATH")
.takes_value(true)
.required(true)
.help("file containing a client identity (keypair)"),
)
.arg(
Arg::with_name("num-nodes")
.short("N")
.long("num-nodes")
.value_name("NUM")
.takes_value(true)
.help("wait for NUM nodes to converge"),
)
.arg(
Arg::with_name("threads")
.short("t")
.long("threads")
.value_name("NUMBER")
.value_name("NUM")
.takes_value(true)
.help("number of threads"),
)
.arg(
Arg::with_name("seconds")
.short("s")
.long("sec")
.value_name("NUMBER")
Arg::with_name("duration")
.long("duration")
.value_name("SECS")
.takes_value(true)
.help("send transactions for this many seconds"),
.help("run benchmark for SECS seconds then exit, default is forever"),
)
.arg(
Arg::with_name("converge_only")
.short("c")
Arg::with_name("converge-only")
.long("converge-only")
.help("exit immediately after converging"),
)
.arg(
Arg::with_name("addr")
.short("a")
.long("addr")
.value_name("IPADDR")
.takes_value(true)
.help("address to advertise to the network"),
)
.arg(
Arg::with_name("sustained")
.long("sustained")
.help("Use sustained performance mode vs. peak mode. This overlaps the tx generation with transfers."),
.help("use sustained performance mode vs. peak mode. This overlaps the tx generation with transfers."),
)
.arg(
Arg::with_name("tx_count")
.long("tx_count")
.value_name("NUMBER")
.value_name("NUM")
.takes_value(true)
.help("number of transactions to send in a single batch")
.help("number of transactions to send per batch")
)
.get_matches();
let leader: NodeInfo;
if let Some(l) = matches.value_of("leader") {
leader = read_leader(l).node_info;
} else {
let server_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 8000);
leader = NodeInfo::new_leader(&server_addr);
};
let id = read_keypair(matches.value_of("keypair").unwrap()).expect("client keypair");
if let Some(t) = matches.value_of("threads") {
threads = t.to_string().parse().expect("integer");
}
if let Some(n) = matches.value_of("num_nodes") {
num_nodes = n.to_string().parse().expect("integer");
}
if let Some(s) = matches.value_of("seconds") {
time_sec = s.to_string().parse().expect("integer");
}
let addr = if let Some(s) = matches.value_of("addr") {
s.to_string().parse().unwrap_or_else(|e| {
eprintln!("failed to parse {} as IP address error: {:?}", s, e);
exit(1);
let network = if let Some(addr) = matches.value_of("network") {
addr.parse().unwrap_or_else(|e| {
eprintln!("failed to parse network: {}", e);
exit(1)
})
} else {
get_public_ip_addr().unwrap_or_else(|e| {
eprintln!("failed to get public IP, try --addr? error: {:?}", e);
exit(1);
})
socketaddr!("127.0.0.1:8001")
};
if let Some(s) = matches.value_of("tx_count") {
tx_count = s.to_string().parse().expect("integer");
}
let id =
read_keypair(matches.value_of("identity").unwrap()).expect("can't read client identity");
if matches.is_present("sustained") {
sustained = true;
}
let threads = if let Some(t) = matches.value_of("threads") {
t.to_string().parse().expect("can't parse threads")
} else {
4usize
};
let num_nodes = if let Some(n) = matches.value_of("num-nodes") {
n.to_string().parse().expect("can't parse num-nodes")
} else {
1usize
};
let duration = if let Some(s) = matches.value_of("duration") {
Duration::new(s.to_string().parse().expect("can't parse duration"), 0)
} else {
Duration::new(std::u64::MAX, 0)
};
let tx_count = if let Some(s) = matches.value_of("tx_count") {
s.to_string().parse().expect("can't parse tx_count")
} else {
500_000
};
let sustained = matches.is_present("sustained");
println!("Looking for leader at {:?}", network);
let leader = poll_gossip_for_leader(network, None).expect("unable to find leader on network");
let exit_signal = Arc::new(AtomicBool::new(false));
let mut c_threads = vec![];
let validators = converge(&leader, &exit_signal, num_nodes, &mut c_threads, addr);
let (nodes, leader) = converge(&leader, &exit_signal, num_nodes, &mut c_threads);
println!(" Node address | Node identifier");
println!("----------------------+------------------");
for node in &validators {
println!(
" {:20} | {:16x}",
node.contact_info.tpu.to_string(),
node.debug_id()
);
}
println!("Nodes: {}", validators.len());
if validators.len() < num_nodes {
if nodes.len() < num_nodes {
println!(
"Error: Insufficient nodes discovered. Expecting {} or more",
num_nodes
);
exit(1);
}
if leader.is_none() {
println!("no leader");
exit(1);
}
if matches.is_present("converge_only") {
if matches.is_present("converge-only") {
return;
}
let leader = leader.unwrap();
println!("leader is at {} {}", leader.contact_info.rpu, leader.id);
let mut client = mk_client(&leader);
let mut barrier_client = mk_client(&leader);
@ -527,7 +541,20 @@ fn main() {
let barrier_id = rnd.gen_n_keypairs(1).pop().unwrap();
println!("Get tokens...");
airdrop_tokens(&mut client, &leader, &id, tx_count);
let num_tokens_per_account = 20;
// Sample the first keypair, see if it has tokens, if so then resume
// to avoid token loss
let keypair0_balance = client.poll_get_balance(&keypairs[0].pubkey()).unwrap_or(0);
if num_tokens_per_account > keypair0_balance {
airdrop_tokens(
&mut client,
&leader,
&id,
(num_tokens_per_account - keypair0_balance) * tx_count,
);
}
airdrop_tokens(&mut barrier_client, &leader, &barrier_id, 1);
println!("Get last ID...");
@ -542,7 +569,7 @@ fn main() {
let maxes = Arc::new(RwLock::new(Vec::new()));
let sample_period = 1; // in seconds
println!("Sampling TPS every {} second...", sample_period);
let v_threads: Vec<_> = validators
let v_threads: Vec<_> = nodes
.into_iter()
.map(|v| {
let exit_signal = exit_signal.clone();
@ -560,6 +587,7 @@ fn main() {
Arc::new(RwLock::new(VecDeque::new()));
let shared_tx_active_thread_count = Arc::new(AtomicIsize::new(0));
let total_tx_sent_count = Arc::new(AtomicUsize::new(0));
let s_threads: Vec<_> = (0..threads)
.map(|_| {
@ -567,6 +595,7 @@ fn main() {
let shared_txs = shared_txs.clone();
let leader = leader.clone();
let shared_tx_active_thread_count = shared_tx_active_thread_count.clone();
let total_tx_sent_count = total_tx_sent_count.clone();
Builder::new()
.name("solana-client-sender".to_string())
.spawn(move || {
@ -575,6 +604,7 @@ fn main() {
&shared_txs,
&leader,
&shared_tx_active_thread_count,
&total_tx_sent_count,
);
})
.unwrap()
@ -582,10 +612,10 @@ fn main() {
.collect();
// generate and send transactions for the specified duration
let time = Duration::new(time_sec, 0);
let now = Instant::now();
let start = Instant::now();
let mut reclaim_tokens_back_to_source_account = false;
while now.elapsed() < time || reclaim_tokens_back_to_source_account {
let mut i = keypair0_balance;
while start.elapsed() < duration {
let balance = client.poll_get_balance(&id.pubkey()).unwrap_or(-1);
metrics_submit_token_balance(balance);
@ -600,8 +630,6 @@ fn main() {
threads,
reclaim_tokens_back_to_source_account,
);
reclaim_tokens_back_to_source_account = !reclaim_tokens_back_to_source_account;
// In sustained mode overlap the transfers with generation
// this has higher average performance but lower peak performance
// in tested environments.
@ -614,6 +642,11 @@ fn main() {
// transactions sent by `generate_txs()` so instead send and confirm a single transaction
// to validate the network is still functional.
send_barrier_transaction(&mut barrier_client, &mut last_id, &barrier_id);
i += 1;
if should_switch_directions(num_tokens_per_account, i) {
reclaim_tokens_back_to_source_account = !reclaim_tokens_back_to_source_account;
}
}
// Stop the sampling threads so it will collect the stats
@ -637,7 +670,12 @@ fn main() {
let balance = client.poll_get_balance(&id.pubkey()).unwrap_or(-1);
metrics_submit_token_balance(balance);
compute_and_report_stats(&maxes, sample_period, &now.elapsed());
compute_and_report_stats(
&maxes,
sample_period,
&start.elapsed(),
total_tx_sent_count.load(Ordering::Relaxed),
);
// join the crdt client threads
for t in c_threads {
@ -645,71 +683,78 @@ fn main() {
}
}
fn spy_node(addr: IpAddr) -> (NodeInfo, UdpSocket) {
let gossip_socket = udp_random_bind(8000, 10000, 5).unwrap();
let gossip_addr = SocketAddr::new(addr, gossip_socket.local_addr().unwrap().port());
let pubkey = Keypair::new().pubkey();
let daddr = "0.0.0.0:0".parse().unwrap();
assert!(!gossip_addr.ip().is_unspecified());
assert!(!gossip_addr.ip().is_multicast());
let node = NodeInfo::new(pubkey, gossip_addr, daddr, daddr, daddr, daddr);
(node, gossip_socket)
}
fn converge(
leader: &NodeInfo,
exit_signal: &Arc<AtomicBool>,
num_nodes: usize,
threads: &mut Vec<JoinHandle<()>>,
addr: IpAddr,
) -> Vec<NodeInfo> {
) -> (Vec<NodeInfo>, Option<NodeInfo>) {
//lets spy on the network
let (spy, spy_gossip) = spy_node(addr);
let mut spy_crdt = Crdt::new(spy).expect("Crdt::new");
let (node, gossip_socket) = Crdt::spy_node();
let mut spy_crdt = Crdt::new(node).expect("Crdt::new");
spy_crdt.insert(&leader);
spy_crdt.set_leader(leader.id);
let spy_ref = Arc::new(RwLock::new(spy_crdt));
let window = default_window();
let gossip_send_socket = udp_random_bind(8000, 10000, 5).unwrap();
let window = Arc::new(RwLock::new(default_window()));
let ncp = Ncp::new(
&spy_ref,
window.clone(),
window,
BlobRecycler::default(),
None,
spy_gossip,
gossip_send_socket,
gossip_socket,
exit_signal.clone(),
).expect("DataReplicator::new");
);
let mut v: Vec<NodeInfo> = vec![];
//wait for the network to converge, 30 seconds should be plenty
// wait for the network to converge, 30 seconds should be plenty
for _ in 0..30 {
v = spy_ref
.read()
.unwrap()
.table
.values()
.into_iter()
.filter(|x| Crdt::is_valid_address(x.contact_info.rpu))
.cloned()
.collect();
if v.len() >= num_nodes {
println!("CONVERGED!");
break;
} else {
println!(
"{} node(s) discovered (looking for {} or more)",
v.len(),
num_nodes
);
{
let spy_ref = spy_ref.read().unwrap();
println!("{}", spy_ref.node_info_trace());
if spy_ref.leader_data().is_some() {
v = spy_ref
.table
.values()
.filter(|x| Crdt::is_valid_address(&x.contact_info.rpu))
.cloned()
.collect();
if v.len() >= num_nodes {
println!("CONVERGED!");
break;
} else {
println!(
"{} node(s) discovered (looking for {} or more)",
v.len(),
num_nodes
);
}
}
}
sleep(Duration::new(1, 0));
}
threads.extend(ncp.thread_hdls().into_iter());
v
let leader = spy_ref.read().unwrap().leader_data().cloned();
(v, leader)
}
fn read_leader(path: &str) -> Config {
let file = File::open(path).unwrap_or_else(|_| panic!("file not found: {}", path));
serde_json::from_reader(file).unwrap_or_else(|_| panic!("failed to parse {}", path))
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_switch_directions() {
assert_eq!(should_switch_directions(20, 0), false);
assert_eq!(should_switch_directions(20, 1), false);
assert_eq!(should_switch_directions(20, 14), false);
assert_eq!(should_switch_directions(20, 15), true);
assert_eq!(should_switch_directions(20, 16), false);
assert_eq!(should_switch_directions(20, 19), false);
assert_eq!(should_switch_directions(20, 20), true);
assert_eq!(should_switch_directions(20, 21), false);
assert_eq!(should_switch_directions(20, 99), false);
assert_eq!(should_switch_directions(20, 100), true);
assert_eq!(should_switch_directions(20, 101), false);
}
}

View File

@ -1,40 +1,53 @@
extern crate bincode;
extern crate bytes;
#[macro_use]
extern crate clap;
extern crate log;
extern crate serde_json;
extern crate solana;
extern crate tokio;
extern crate tokio_codec;
extern crate tokio_io;
use bincode::deserialize;
use bincode::{deserialize, serialize};
use bytes::Bytes;
use clap::{App, Arg};
use solana::crdt::NodeInfo;
use solana::drone::{Drone, DroneRequest, DRONE_PORT};
use solana::fullnode::Config;
use solana::logger;
use solana::metrics::set_panic_hook;
use solana::signature::read_keypair;
use std::fs::File;
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::error;
use std::io;
use std::net::{Ipv4Addr, SocketAddr};
use std::process::exit;
use std::sync::{Arc, Mutex};
use std::thread;
use tokio::net::TcpListener;
use tokio::prelude::*;
use tokio_codec::{BytesCodec, Decoder};
fn main() {
macro_rules! socketaddr {
($ip:expr, $port:expr) => {
SocketAddr::from((Ipv4Addr::from($ip), $port))
};
($str:expr) => {{
let a: SocketAddr = $str.parse().unwrap();
a
}};
}
fn main() -> Result<(), Box<error::Error>> {
logger::setup();
set_panic_hook("drone");
let matches = App::new("drone")
.version(crate_version!())
.arg(
Arg::with_name("leader")
.short("l")
.long("leader")
.value_name("PATH")
Arg::with_name("network")
.short("n")
.long("network")
.value_name("HOST:PORT")
.takes_value(true)
.help("/path/to/leader.json"),
.required(true)
.help("rendezvous with the network at this gossip entry point"),
)
.arg(
Arg::with_name("keypair")
@ -43,57 +56,55 @@ fn main() {
.value_name("PATH")
.takes_value(true)
.required(true)
.help("/path/to/mint.json"),
.help("File to read the client's keypair from"),
)
.arg(
Arg::with_name("time")
.short("t")
.long("time")
Arg::with_name("slice")
.long("slice")
.value_name("SECONDS")
.takes_value(true)
.help("time slice over which to limit requests to drone"),
.help("Time slice over which to limit requests to drone"),
)
.arg(
Arg::with_name("cap")
.short("c")
.long("cap")
.value_name("NUMBER")
.takes_value(true)
.help("request limit for time slice"),
.help("Request limit for time slice"),
)
.get_matches();
let leader: NodeInfo;
if let Some(l) = matches.value_of("leader") {
leader = read_leader(l).node_info;
} else {
let server_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 8000);
leader = NodeInfo::new_leader(&server_addr);
};
let network = matches
.value_of("network")
.unwrap()
.parse()
.unwrap_or_else(|e| {
eprintln!("failed to parse network: {}", e);
exit(1)
});
let mint_keypair =
read_keypair(matches.value_of("keypair").expect("keypair")).expect("client keypair");
read_keypair(matches.value_of("keypair").unwrap()).expect("failed to read client keypair");
let time_slice: Option<u64>;
if let Some(t) = matches.value_of("time") {
time_slice = Some(t.to_string().parse().expect("integer"));
if let Some(secs) = matches.value_of("slice") {
time_slice = Some(secs.to_string().parse().expect("failed to parse slice"));
} else {
time_slice = None;
}
let request_cap: Option<u64>;
if let Some(c) = matches.value_of("cap") {
request_cap = Some(c.to_string().parse().expect("integer"));
request_cap = Some(c.to_string().parse().expect("failed to parse cap"));
} else {
request_cap = None;
}
let drone_addr: SocketAddr = format!("0.0.0.0:{}", DRONE_PORT).parse().unwrap();
let drone_addr = socketaddr!(0, DRONE_PORT);
let drone = Arc::new(Mutex::new(Drone::new(
mint_keypair,
drone_addr,
leader.contact_info.tpu,
leader.contact_info.rpu,
network,
time_slice,
request_cap,
)));
@ -114,36 +125,44 @@ fn main() {
let drone2 = drone.clone();
// let client_ip = socket.peer_addr().expect("drone peer_addr").ip();
let framed = BytesCodec::new().framed(socket);
let (_writer, reader) = framed.split();
let (writer, reader) = framed.split();
let processor = reader
.for_each(move |bytes| {
let req: DroneRequest = deserialize(&bytes).or_else(|err| {
use std::io;
Err(io::Error::new(
io::ErrorKind::Other,
format!("deserialize packet in drone: {:?}", err),
))
})?;
let processor = reader.and_then(move |bytes| {
let req: DroneRequest = deserialize(&bytes).or_else(|err| {
Err(io::Error::new(
io::ErrorKind::Other,
format!("deserialize packet in drone: {:?}", err),
))
})?;
println!("Airdrop requested...");
// let res = drone2.lock().unwrap().check_rate_limit(client_ip);
let res1 = drone2.lock().unwrap().send_airdrop(req);
match res1 {
Ok(_) => println!("Airdrop sent!"),
Err(_) => println!("Request limit reached for this time slice"),
}
Ok(())
})
.then(|result| {
println!("Socket closed with result: {:?}", result);
Ok(())
});
tokio::spawn(processor)
println!("Airdrop requested...");
// let res = drone2.lock().unwrap().check_rate_limit(client_ip);
let res1 = drone2.lock().unwrap().send_airdrop(req);
match res1 {
Ok(_) => println!("Airdrop sent!"),
Err(_) => println!("Request limit reached for this time slice"),
}
let response = res1?;
println!("Airdrop tx signature: {:?}", response);
let response_vec = serialize(&response).or_else(|err| {
Err(io::Error::new(
io::ErrorKind::Other,
format!("serialize signature in drone: {:?}", err),
))
})?;
let response_bytes = Bytes::from(response_vec.clone());
Ok(response_bytes)
});
let server = writer
.send_all(processor.or_else(|err| {
Err(io::Error::new(
io::ErrorKind::Other,
format!("Drone response: {:?}", err),
))
}))
.then(|_| Ok(()));
tokio::spawn(server)
});
tokio::run(done);
}
fn read_leader(path: &str) -> Config {
let file = File::open(path).unwrap_or_else(|_| panic!("file not found: {}", path));
serde_json::from_reader(file).unwrap_or_else(|_| panic!("failed to parse {}", path))
Ok(())
}

View File

@ -5,9 +5,9 @@ extern crate serde_json;
extern crate solana;
use clap::{App, Arg};
use solana::crdt::{get_ip_addr, parse_port_or_addr};
use solana::crdt::FULLNODE_PORT_RANGE;
use solana::fullnode::Config;
use solana::nat::get_public_ip_addr;
use solana::netutil::{get_ip_addr, get_public_ip_addr, parse_port_or_addr};
use solana::signature::read_pkcs8;
use std::io;
use std::net::SocketAddr;
@ -48,13 +48,7 @@ fn main() {
.get_matches();
let bind_addr: SocketAddr = {
let mut bind_addr = parse_port_or_addr({
if let Some(b) = matches.value_of("bind") {
Some(b.to_string())
} else {
None
}
});
let mut bind_addr = parse_port_or_addr(matches.value_of("bind"), FULLNODE_PORT_RANGE.0);
if matches.is_present("local") {
let ip = get_ip_addr().unwrap();
bind_addr.set_ip(ip);

View File

@ -1,23 +1,28 @@
#[macro_use]
extern crate clap;
extern crate getopts;
#[macro_use]
extern crate log;
extern crate serde_json;
#[macro_use]
extern crate solana;
use clap::{App, Arg};
use solana::client::mk_client;
use solana::crdt::{NodeInfo, TestNode};
use solana::crdt::Node;
use solana::drone::DRONE_PORT;
use solana::fullnode::{Config, Fullnode};
use solana::logger;
use solana::metrics::set_panic_hook;
use solana::service::Service;
use solana::signature::{Keypair, KeypairUtil};
use solana::thin_client::poll_gossip_for_leader;
use solana::wallet::request_airdrop;
use std::fs::File;
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::net::{Ipv4Addr, SocketAddr};
use std::process::exit;
use std::thread::sleep;
use std::time::Duration;
fn main() -> () {
logger::setup();
@ -33,12 +38,12 @@ fn main() -> () {
.help("run with the identity found in FILE"),
)
.arg(
Arg::with_name("testnet")
.short("t")
.long("testnet")
Arg::with_name("network")
.short("n")
.long("network")
.value_name("HOST:PORT")
.takes_value(true)
.help("connect to the network at this gossip entry point"),
.help("connect/rendezvous with the network at this gossip entry point"),
)
.arg(
Arg::with_name("ledger")
@ -51,16 +56,12 @@ fn main() -> () {
)
.get_matches();
let bind_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 8000);
let mut keypair = Keypair::new();
let mut repl_data = NodeInfo::new_leader_with_pubkey(keypair.pubkey(), &bind_addr);
if let Some(i) = matches.value_of("identity") {
let (keypair, ncp) = if let Some(i) = matches.value_of("identity") {
let path = i.to_string();
if let Ok(file) = File::open(path.clone()) {
let parse: serde_json::Result<Config> = serde_json::from_reader(file);
if let Ok(data) = parse {
keypair = data.keypair();
repl_data = data.node_info;
(data.keypair(), data.node_info.contact_info.ncp)
} else {
eprintln!("failed to parse {}", path);
exit(1);
@ -69,50 +70,62 @@ fn main() -> () {
eprintln!("failed to read {}", path);
exit(1);
}
}
let leader_pubkey = keypair.pubkey();
let repl_clone = repl_data.clone();
} else {
(Keypair::new(), socketaddr!(0, 8000))
};
let ledger_path = matches.value_of("ledger").unwrap();
let mut node = TestNode::new_with_bind_addr(repl_data, bind_addr);
let mut drone_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), DRONE_PORT);
let fullnode = if let Some(t) = matches.value_of("testnet") {
let testnet_address_string = t.to_string();
let testnet_addr: SocketAddr = testnet_address_string.parse().unwrap();
drone_addr.set_ip(testnet_addr.ip());
// socketaddr that is initial pointer into the network's gossip (ncp)
let network = matches
.value_of("network")
.map(|network| network.parse().expect("failed to parse network address"));
Fullnode::new(node, false, ledger_path, keypair, Some(testnet_addr))
} else {
node.data.leader_id = node.data.id;
let node = Node::new_with_external_ip(keypair.pubkey(), &ncp);
Fullnode::new(node, true, ledger_path, keypair, None)
// save off some stuff for airdrop
let node_info = node.info.clone();
let pubkey = keypair.pubkey();
let fullnode = Fullnode::new(node, ledger_path, keypair, network, false);
// airdrop stuff, probably goes away at some point
let leader = match network {
Some(network) => {
poll_gossip_for_leader(network, None).expect("can't find leader on network")
}
None => node_info,
};
let mut client = mk_client(&repl_clone);
let previous_balance = client.poll_get_balance(&leader_pubkey).unwrap();
eprintln!("balance is {}", previous_balance);
let mut client = mk_client(&leader);
if previous_balance == 0 {
eprintln!("requesting airdrop from {}", drone_addr);
request_airdrop(&drone_addr, &leader_pubkey, 50).unwrap_or_else(|_| {
panic!(
"Airdrop failed, is the drone address correct {:?} drone running?",
// TODO: maybe have the drone put itself in gossip somewhere instead of hardcoding?
let drone_addr = match network {
Some(network) => SocketAddr::new(network.ip(), DRONE_PORT),
None => SocketAddr::new(ncp.ip(), DRONE_PORT),
};
loop {
let balance = client.poll_get_balance(&pubkey).unwrap_or(0);
info!("balance is {}", balance);
if balance >= 50 {
info!("good to go!");
break;
}
info!("requesting airdrop from {}", drone_addr);
loop {
if request_airdrop(&drone_addr, &pubkey, 50).is_ok() {
break;
}
info!(
"airdrop request, is the drone address correct {:?}, drone running?",
drone_addr
)
});
// Try multiple times to confirm a non-zero balance. |poll_get_balance| currently times
// out after 1 second, and sometimes this is not enough time while the network is
// booting
let balance_ok = (0..30).any(|i| {
let balance = client.poll_get_balance(&leader_pubkey).unwrap();
eprintln!("new balance is {} (attempt #{})", balance, i);
balance > 0
});
assert!(balance_ok, "0 balance, airdrop failed?");
);
sleep(Duration::from_secs(2));
}
}
fullnode.join().expect("join");
fullnode.join().expect("to never happen");
}

View File

@ -5,6 +5,7 @@ extern crate bs58;
extern crate clap;
extern crate dirs;
extern crate serde_json;
#[macro_use]
extern crate solana;
use clap::{App, Arg, SubCommand};
@ -14,7 +15,7 @@ use solana::drone::DRONE_PORT;
use solana::fullnode::Config;
use solana::logger;
use solana::signature::{read_keypair, Keypair, KeypairUtil, Pubkey, Signature};
use solana::thin_client::ThinClient;
use solana::thin_client::{poll_gossip_for_leader, ThinClient};
use solana::wallet::request_airdrop;
use std::error;
use std::fmt;
@ -63,9 +64,9 @@ struct WalletConfig {
impl Default for WalletConfig {
fn default() -> WalletConfig {
let default_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 8000);
let default_addr = socketaddr!(0, 8000);
WalletConfig {
leader: NodeInfo::new_leader(&default_addr),
leader: NodeInfo::new_with_socketaddr(&default_addr),
id: Keypair::new(),
drone_addr: default_addr,
command: WalletCommand::Balance,
@ -92,12 +93,18 @@ fn parse_args() -> Result<WalletConfig, Box<error::Error>> {
.takes_value(true)
.help("/path/to/id.json"),
)
.arg(
Arg::with_name("timeout")
.long("timeout")
.value_name("SECONDS")
.takes_value(true)
.help("Max SECONDS to wait to get necessary gossip from the network"),
)
.subcommand(
SubCommand::with_name("airdrop")
.about("Request a batch of tokens")
.arg(
Arg::with_name("tokens")
// .index(1)
.long("tokens")
.value_name("NUMBER")
.takes_value(true)
@ -110,16 +117,14 @@ fn parse_args() -> Result<WalletConfig, Box<error::Error>> {
.about("Send a payment")
.arg(
Arg::with_name("tokens")
// .index(2)
.long("tokens")
.value_name("NUMBER")
.takes_value(true)
.required(true)
.help("the number of tokens to send"),
.help("The number of tokens to send"),
)
.arg(
Arg::with_name("to")
// .index(1)
.long("to")
.value_name("PUBKEY")
.takes_value(true)
@ -146,8 +151,14 @@ fn parse_args() -> Result<WalletConfig, Box<error::Error>> {
leader = read_leader(l)?.node_info;
} else {
let server_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 8000);
leader = NodeInfo::new_leader(&server_addr);
leader = NodeInfo::new_with_socketaddr(&server_addr);
};
let timeout: Option<u64>;
if let Some(secs) = matches.value_of("timeout") {
timeout = Some(secs.to_string().parse().expect("integer"));
} else {
timeout = None;
}
let mut path = dirs::home_dir().expect("home directory");
let id_path = if matches.is_present("keypair") {
@ -163,6 +174,8 @@ fn parse_args() -> Result<WalletConfig, Box<error::Error>> {
)))
})?;
let leader = poll_gossip_for_leader(leader.contact_info.ncp, timeout)?;
let mut drone_addr = leader.contact_info.tpu;
drone_addr.set_port(DRONE_PORT);
@ -243,7 +256,6 @@ fn process_command(
}
Err(error) => {
println!("An error occurred: {:?}", error);
Err(error)?;
}
}
}
@ -254,7 +266,7 @@ fn process_command(
"Requesting airdrop of {:?} tokens from {}",
tokens, config.drone_addr
);
let previous_balance = client.poll_get_balance(&config.id.pubkey())?;
let previous_balance = client.poll_get_balance(&config.id.pubkey()).unwrap_or(0);
request_airdrop(&config.drone_addr, &config.id.pubkey(), tokens as u64)?;
// TODO: return airdrop Result from Drone instead of polling the
@ -262,7 +274,10 @@ fn process_command(
let mut current_balance = previous_balance;
for _ in 0..20 {
sleep(Duration::from_millis(500));
current_balance = client.poll_get_balance(&config.id.pubkey())?;
current_balance = client
.poll_get_balance(&config.id.pubkey())
.unwrap_or(previous_balance);
if previous_balance != current_balance {
break;
}

View File

@ -16,31 +16,26 @@ pub struct BlobFetchStage {
impl BlobFetchStage {
pub fn new(
socket: UdpSocket,
socket: Arc<UdpSocket>,
exit: Arc<AtomicBool>,
blob_recycler: &BlobRecycler,
recycler: &BlobRecycler,
) -> (Self, BlobReceiver) {
Self::new_multi_socket(vec![socket], exit, blob_recycler)
Self::new_multi_socket(vec![socket], exit, recycler)
}
pub fn new_multi_socket(
sockets: Vec<UdpSocket>,
sockets: Vec<Arc<UdpSocket>>,
exit: Arc<AtomicBool>,
blob_recycler: &BlobRecycler,
recycler: &BlobRecycler,
) -> (Self, BlobReceiver) {
let (blob_sender, blob_receiver) = channel();
let (sender, receiver) = channel();
let thread_hdls: Vec<_> = sockets
.into_iter()
.map(|socket| {
streamer::blob_receiver(
exit.clone(),
blob_recycler.clone(),
socket,
blob_sender.clone(),
).expect("blob receiver init")
streamer::blob_receiver(socket, exit.clone(), recycler.clone(), sender.clone())
})
.collect();
(BlobFetchStage { exit, thread_hdls }, blob_receiver)
(BlobFetchStage { exit, thread_hdls }, receiver)
}
pub fn close(&self) {

View File

@ -16,7 +16,7 @@ use std::sync::{Arc, RwLock};
use std::thread::{self, Builder, JoinHandle};
use std::time::Duration;
use streamer::BlobReceiver;
use window::{self, SharedWindow, WindowIndex, WINDOW_SIZE};
use window::{self, SharedWindow, WindowIndex, WindowUtil, WINDOW_SIZE};
fn broadcast(
node_info: &NodeInfo,
@ -28,7 +28,7 @@ fn broadcast(
transmit_index: &mut WindowIndex,
receive_index: &mut u64,
) -> Result<()> {
let debug_id = node_info.debug_id();
let id = node_info.id;
let timer = Duration::new(1, 0);
let mut dq = receiver.recv_timeout(timer)?;
while let Ok(mut nq) = receiver.try_recv() {
@ -42,13 +42,11 @@ fn broadcast(
// break them up into window-sized chunks to process
let blobs_chunked = blobs_vec.chunks(WINDOW_SIZE as usize).map(|x| x.to_vec());
if log_enabled!(Level::Trace) {
trace!("{}", window::print_window(debug_id, window, *receive_index));
}
trace!("{}", window.read().unwrap().print(&id, *receive_index));
for mut blobs in blobs_chunked {
let blobs_len = blobs.len();
trace!("{:x}: broadcast blobs.len: {}", debug_id, blobs_len);
trace!("{}: broadcast blobs.len: {}", id, blobs_len);
// Index the blobs
window::index_blobs(node_info, &blobs, receive_index)
@ -64,29 +62,29 @@ fn broadcast(
let pos = (ix % WINDOW_SIZE) as usize;
if let Some(x) = mem::replace(&mut win[pos].data, None) {
trace!(
"{:x} popped {} at {}",
debug_id,
"{} popped {} at {}",
id,
x.read().unwrap().get_index().unwrap(),
pos
);
recycler.recycle(x);
recycler.recycle(x, "broadcast-data");
}
if let Some(x) = mem::replace(&mut win[pos].coding, None) {
trace!(
"{:x} popped {} at {}",
debug_id,
"{} popped {} at {}",
id,
x.read().unwrap().get_index().unwrap(),
pos
);
recycler.recycle(x);
recycler.recycle(x, "broadcast-coding");
}
trace!("{:x} null {}", debug_id, pos);
trace!("{} null {}", id, pos);
}
while let Some(b) = blobs.pop() {
let ix = b.read().unwrap().get_index().expect("blob index");
let pos = (ix % WINDOW_SIZE) as usize;
trace!("{:x} caching {} at {}", debug_id, ix, pos);
trace!("{} caching {} at {}", id, ix, pos);
assert!(win[pos].data.is_none());
win[pos].data = Some(b);
}
@ -96,7 +94,7 @@ fn broadcast(
#[cfg(feature = "erasure")]
{
erasure::generate_coding(
debug_id,
&id,
&mut window.write().unwrap(),
recycler,
*receive_index,

View File

@ -31,7 +31,7 @@ impl Condition {
}
}
/// A data type reprsenting a payment plan.
/// A data type representing a payment plan.
#[repr(C)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub enum Budget {

View File

@ -1,11 +1,11 @@
use crdt::NodeInfo;
use nat::udp_random_bind;
use crdt::{NodeInfo, FULLNODE_PORT_RANGE};
use netutil::bind_in_range;
use std::time::Duration;
use thin_client::ThinClient;
pub fn mk_client(r: &NodeInfo) -> ThinClient {
let requests_socket = udp_random_bind(8000, 10000, 5).unwrap();
let transactions_socket = udp_random_bind(8000, 10000, 5).unwrap();
let (_, requests_socket) = bind_in_range(FULLNODE_PORT_RANGE).unwrap();
let (_, transactions_socket) = bind_in_range(FULLNODE_PORT_RANGE).unwrap();
requests_socket
.set_read_timeout(Some(Duration::new(1, 0)))

View File

@ -74,11 +74,12 @@ impl Counter {
if times % lograte == 0 && times > 0 {
let lastlog = self.lastlog.load(Ordering::Relaxed);
info!(
"COUNTER:{{\"name\": \"{}\", \"counts\": {}, \"samples\": {}, \"now\": {}}}",
"COUNTER:{{\"name\": \"{}\", \"counts\": {}, \"samples\": {}, \"now\": {}, \"events\": {}}}",
self.name,
counts,
counts + events,
times,
timing::timestamp(),
events,
);
metrics::submit(
influxdb::Point::new(&format!("counter-{}", self.name))

File diff suppressed because it is too large Load Diff

View File

@ -12,11 +12,11 @@ use std::io;
use std::io::{Error, ErrorKind};
use std::net::{IpAddr, SocketAddr, UdpSocket};
use std::time::Duration;
use thin_client::ThinClient;
use thin_client::{poll_gossip_for_leader, ThinClient};
use transaction::Transaction;
pub const TIME_SLICE: u64 = 60;
pub const REQUEST_CAP: u64 = 1_000_000;
pub const REQUEST_CAP: u64 = 500_000_000;
pub const DRONE_PORT: u16 = 9900;
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
@ -31,8 +31,7 @@ pub struct Drone {
mint_keypair: Keypair,
ip_cache: Vec<IpAddr>,
_airdrop_addr: SocketAddr,
transactions_addr: SocketAddr,
requests_addr: SocketAddr,
network_addr: SocketAddr,
pub time_slice: Duration,
request_cap: u64,
pub request_current: u64,
@ -42,8 +41,7 @@ impl Drone {
pub fn new(
mint_keypair: Keypair,
_airdrop_addr: SocketAddr,
transactions_addr: SocketAddr,
requests_addr: SocketAddr,
network_addr: SocketAddr,
time_input: Option<u64>,
request_cap_input: Option<u64>,
) -> Drone {
@ -59,8 +57,7 @@ impl Drone {
mint_keypair,
ip_cache: Vec::new(),
_airdrop_addr,
transactions_addr,
requests_addr,
network_addr,
time_slice,
request_cap,
request_current: 0,
@ -100,10 +97,13 @@ impl Drone {
let requests_socket = UdpSocket::bind("0.0.0.0:0").unwrap();
let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap();
let leader = poll_gossip_for_leader(self.network_addr, Some(10))
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
let mut client = ThinClient::new(
self.requests_addr,
leader.contact_info.rpu,
requests_socket,
self.transactions_addr,
leader.contact_info.tpu,
transactions_socket,
);
let last_id = client.get_last_id();
@ -141,7 +141,7 @@ impl Drone {
)
.to_owned(),
);
client.transfer_signed(&tx)
client.retry_transfer_signed(&tx, 10)
} else {
Err(Error::new(ErrorKind::Other, "token limit reached"))
}
@ -157,18 +157,15 @@ impl Drop for Drone {
#[cfg(test)]
mod tests {
use bank::Bank;
use crdt::{get_ip_addr, TestNode};
use crdt::Node;
use drone::{Drone, DroneRequest, REQUEST_CAP, TIME_SLICE};
use fullnode::Fullnode;
use logger;
use mint::Mint;
use service::Service;
use netutil::get_ip_addr;
use signature::{Keypair, KeypairUtil};
use std::fs::remove_dir_all;
use std::net::{SocketAddr, UdpSocket};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::thread::sleep;
use std::time::Duration;
use thin_client::ThinClient;
@ -177,16 +174,8 @@ mod tests {
let keypair = Keypair::new();
let mut addr: SocketAddr = "0.0.0.0:9900".parse().unwrap();
addr.set_ip(get_ip_addr().unwrap());
let transactions_addr = "0.0.0.0:0".parse().unwrap();
let requests_addr = "0.0.0.0:0".parse().unwrap();
let mut drone = Drone::new(
keypair,
addr,
transactions_addr,
requests_addr,
None,
Some(3),
);
let network_addr = "0.0.0.0:0".parse().unwrap();
let mut drone = Drone::new(keypair, addr, network_addr, None, Some(3));
assert!(drone.check_request_limit(1));
drone.request_current = 3;
assert!(!drone.check_request_limit(1));
@ -197,9 +186,8 @@ mod tests {
let keypair = Keypair::new();
let mut addr: SocketAddr = "0.0.0.0:9900".parse().unwrap();
addr.set_ip(get_ip_addr().unwrap());
let transactions_addr = "0.0.0.0:0".parse().unwrap();
let requests_addr = "0.0.0.0:0".parse().unwrap();
let mut drone = Drone::new(keypair, addr, transactions_addr, requests_addr, None, None);
let network_addr = "0.0.0.0:0".parse().unwrap();
let mut drone = Drone::new(keypair, addr, network_addr, None, None);
drone.request_current = drone.request_current + 256;
assert_eq!(drone.request_current, 256);
drone.clear_request_count();
@ -211,9 +199,8 @@ mod tests {
let keypair = Keypair::new();
let mut addr: SocketAddr = "0.0.0.0:9900".parse().unwrap();
addr.set_ip(get_ip_addr().unwrap());
let transactions_addr = "0.0.0.0:0".parse().unwrap();
let requests_addr = "0.0.0.0:0".parse().unwrap();
let mut drone = Drone::new(keypair, addr, transactions_addr, requests_addr, None, None);
let network_addr = "0.0.0.0:0".parse().unwrap();
let mut drone = Drone::new(keypair, addr, network_addr, None, None);
let ip = "127.0.0.1".parse().expect("create IpAddr from string");
assert_eq!(drone.ip_cache.len(), 0);
drone.add_ip_to_cache(ip);
@ -226,9 +213,8 @@ mod tests {
let keypair = Keypair::new();
let mut addr: SocketAddr = "0.0.0.0:9900".parse().unwrap();
addr.set_ip(get_ip_addr().unwrap());
let transactions_addr = "0.0.0.0:0".parse().unwrap();
let requests_addr = "0.0.0.0:0".parse().unwrap();
let mut drone = Drone::new(keypair, addr, transactions_addr, requests_addr, None, None);
let network_addr = "0.0.0.0:0".parse().unwrap();
let mut drone = Drone::new(keypair, addr, network_addr, None, None);
let ip = "127.0.0.1".parse().expect("create IpAddr from string");
assert_eq!(drone.ip_cache.len(), 0);
drone.add_ip_to_cache(ip);
@ -243,18 +229,10 @@ mod tests {
let keypair = Keypair::new();
let mut addr: SocketAddr = "0.0.0.0:9900".parse().unwrap();
addr.set_ip(get_ip_addr().unwrap());
let transactions_addr = "0.0.0.0:0".parse().unwrap();
let requests_addr = "0.0.0.0:0".parse().unwrap();
let network_addr = "0.0.0.0:0".parse().unwrap();
let time_slice: Option<u64> = None;
let request_cap: Option<u64> = None;
let drone = Drone::new(
keypair,
addr,
transactions_addr,
requests_addr,
time_slice,
request_cap,
);
let drone = Drone::new(keypair, addr, network_addr, time_slice, request_cap);
assert_eq!(drone.time_slice, Duration::new(TIME_SLICE, 0));
assert_eq!(drone.request_cap, REQUEST_CAP);
}
@ -275,36 +253,32 @@ mod tests {
logger::setup();
let leader_keypair = Keypair::new();
let leader = TestNode::new_localhost_with_pubkey(leader_keypair.pubkey());
let leader = Node::new_localhost_with_pubkey(leader_keypair.pubkey());
let alice = Mint::new(10_000_000);
let bank = Bank::new(&alice);
let bob_pubkey = Keypair::new().pubkey();
let carlos_pubkey = Keypair::new().pubkey();
let exit = Arc::new(AtomicBool::new(false));
let leader_data = leader.data.clone();
let leader_data = leader.info.clone();
let ledger_path = tmp_ledger_path("send_airdrop");
let server = Fullnode::new_leader(
let server = Fullnode::new_with_bank(
leader_keypair,
bank,
0,
&[],
leader,
exit.clone(),
&ledger_path,
None,
Some(&ledger_path),
false,
);
//TODO: this seems unstable
sleep(Duration::from_millis(900));
let mut addr: SocketAddr = "0.0.0.0:9900".parse().expect("bind to drone socket");
addr.set_ip(get_ip_addr().expect("drone get_ip_addr"));
let mut drone = Drone::new(
alice.keypair(),
addr,
leader_data.contact_info.tpu,
leader_data.contact_info.rpu,
leader_data.contact_info.ncp,
None,
Some(150_000),
);
@ -327,10 +301,31 @@ mod tests {
let bob_sig = drone.send_airdrop(bob_req).unwrap();
assert!(client.poll_for_signature(&bob_sig).is_ok());
// restart the leader, drone should find the new one at the same gossip port
server.close().unwrap();
let leader_keypair = Keypair::new();
let leader = Node::new_localhost_with_pubkey(leader_keypair.pubkey());
let leader_data = leader.info.clone();
let server = Fullnode::new(leader, &ledger_path, leader_keypair, None, false);
let requests_socket = UdpSocket::bind("0.0.0.0:0").expect("drone bind to requests socket");
let transactions_socket =
UdpSocket::bind("0.0.0.0:0").expect("drone bind to transactions socket");
let mut client = ThinClient::new(
leader_data.contact_info.rpu,
requests_socket,
leader_data.contact_info.tpu,
transactions_socket,
);
let carlos_req = DroneRequest::GetAirdrop {
airdrop_request_amount: 5_000_000,
client_pubkey: carlos_pubkey,
};
// using existing drone, new thin client
let carlos_sig = drone.send_airdrop(carlos_req).unwrap();
assert!(client.poll_for_signature(&carlos_sig).is_ok());
@ -342,8 +337,7 @@ mod tests {
info!("TPS request balance: {:?}", carlos_balance);
assert_eq!(carlos_balance.unwrap(), TPS_BATCH);
exit.store(true, Ordering::Relaxed);
server.join().unwrap();
server.close().unwrap();
remove_dir_all(ledger_path).unwrap();
}
}

View File

@ -45,9 +45,6 @@ pub struct Entry {
/// 2. this Entry can be left out of the bank's entry_id cache for
/// purposes of duplicate rejection
pub has_more: bool,
/// Erasure requires that Entry be a multiple of 4 bytes in size
pad: [u8; 3],
}
impl Entry {
@ -65,7 +62,6 @@ impl Entry {
id,
transactions,
has_more,
pad: [0, 0, 0],
};
let size = serialized_size(&entry).unwrap();
@ -116,7 +112,6 @@ impl Entry {
id: Hash::default(),
transactions,
has_more: false,
pad: [0, 0, 0],
}).unwrap() <= BLOB_DATA_SIZE as u64
}
@ -142,7 +137,6 @@ impl Entry {
id: *id,
transactions: vec![],
has_more: false,
pad: [0, 0, 0],
}
}
@ -209,7 +203,6 @@ pub fn next_entry(start_hash: &Hash, num_hashes: u64, transactions: Vec<Transact
id: next_hash(start_hash, num_hashes, &transactions),
transactions,
has_more: false,
pad: [0, 0, 0],
}
}
@ -255,8 +248,8 @@ mod tests {
// First, verify entries
let keypair = Keypair::new();
let tx0 = Transaction::new_timestamp(&keypair, Utc::now(), zero);
let tx1 = Transaction::new_signature(&keypair, Default::default(), zero);
let tx0 = Transaction::new_timestamp(&keypair, keypair.pubkey(), Utc::now(), zero);
let tx1 = Transaction::new_signature(&keypair, keypair.pubkey(), Default::default(), zero);
let mut e0 = Entry::new(&zero, 0, vec![tx0.clone(), tx1.clone()], false);
assert!(e0.verify(&zero));
@ -278,7 +271,7 @@ mod tests {
assert_eq!(tick.id, zero);
let keypair = Keypair::new();
let tx0 = Transaction::new_timestamp(&keypair, Utc::now(), zero);
let tx0 = Transaction::new_timestamp(&keypair, keypair.pubkey(), Utc::now(), zero);
let entry0 = next_entry(&zero, 1, vec![tx0.clone()]);
assert_eq!(entry0.num_hashes, 1);
assert_eq!(entry0.id, next_hash(&zero, 1, &vec![tx0]));

View File

@ -101,9 +101,11 @@ pub fn read_entries<R: BufRead>(reader: R) -> impl Iterator<Item = io::Result<En
#[cfg(test)]
mod tests {
use super::*;
use bincode::serialize;
use ledger;
use mint::Mint;
use packet::BLOB_DATA_SIZE;
use packet::PACKET_DATA_SIZE;
use signature::{Keypair, KeypairUtil};
use std::io::Cursor;
use transaction::Transaction;
@ -117,9 +119,11 @@ mod tests {
let mut entry_writer = EntryWriter::new(&bank, writer);
let keypair = Keypair::new();
let tx = Transaction::new(&mint.keypair(), keypair.pubkey(), 1, mint.last_id());
let tx_size = serialize(&tx).unwrap().len();
// NOTE: if Entry grows to larger than a transaction, the code below falls over
let threshold = (BLOB_DATA_SIZE / 256) - 1; // 256 is transaction size
assert!(tx_size <= PACKET_DATA_SIZE);
assert!(BLOB_DATA_SIZE >= PACKET_DATA_SIZE);
let threshold = (BLOB_DATA_SIZE / tx_size) - 1; // PACKET_DATA_SIZE is transaction size
// Verify large entries are split up and the first sets has_more.
let txs = vec![tx.clone(); threshold * 2];

View File

@ -1,5 +1,6 @@
// Support erasure coding
use packet::{BlobRecycler, SharedBlob, BLOB_DATA_SIZE, BLOB_HEADER_SIZE};
use signature::Pubkey;
use std::cmp;
use std::mem;
use std::result;
@ -84,7 +85,7 @@ pub fn generate_coding_blocks(coding: &mut [&mut [u8]], data: &[&[u8]]) -> Resul
let mut data_arg = Vec::with_capacity(data.len());
for block in data {
if block_len != block.len() as i32 {
trace!(
error!(
"data block size incorrect {} expected {}",
block.len(),
block_len
@ -96,7 +97,7 @@ pub fn generate_coding_blocks(coding: &mut [&mut [u8]], data: &[&[u8]]) -> Resul
let mut coding_arg = Vec::with_capacity(coding.len());
for mut block in coding {
if block_len != block.len() as i32 {
trace!(
error!(
"coding block size incorrect {} expected {}",
block.len(),
block_len
@ -152,8 +153,8 @@ pub fn decode_blocks(
}
data_arg.push(x.as_mut_ptr());
}
unsafe {
let ret = jerasure_matrix_decode(
let ret = unsafe {
jerasure_matrix_decode(
data.len() as i32,
coding.len() as i32,
ERASURE_W,
@ -163,15 +164,15 @@ pub fn decode_blocks(
data_arg.as_ptr(),
coding_arg.as_ptr(),
data[0].len() as i32,
);
trace!("jerasure_matrix_decode ret: {}", ret);
for x in data[erasures[0] as usize][0..8].iter() {
trace!("{} ", x)
}
trace!("");
if ret < 0 {
return Err(ErasureError::DecodeError);
}
)
};
trace!("jerasure_matrix_decode ret: {}", ret);
for x in data[erasures[0] as usize][0..8].iter() {
trace!("{} ", x)
}
trace!("");
if ret < 0 {
return Err(ErasureError::DecodeError);
}
Ok(())
}
@ -214,7 +215,7 @@ pub fn decode_blocks(
//
//
pub fn generate_coding(
debug_id: u64,
id: &Pubkey,
window: &mut [WindowSlot],
recycler: &BlobRecycler,
receive_index: u64,
@ -234,8 +235,8 @@ pub fn generate_coding(
break;
}
info!(
"generate_coding {:x} start: {} end: {} start_idx: {} num_blobs: {}",
debug_id, block_start, block_end, start_idx, num_blobs
"generate_coding {} start: {} end: {} start_idx: {} num_blobs: {}",
id, block_start, block_end, start_idx, num_blobs
);
let mut max_data_size = 0;
@ -243,12 +244,12 @@ pub fn generate_coding(
// find max_data_size, maybe bail if not all the data is here
for i in block_start..block_end {
let n = i % window.len();
trace!("{:x} window[{}] = {:?}", debug_id, n, window[n].data);
trace!("{} window[{}] = {:?}", id, n, window[n].data);
if let Some(b) = &window[n].data {
max_data_size = cmp::max(b.read().unwrap().meta.size, max_data_size);
} else {
trace!("{:x} data block is null @ {}", debug_id, n);
trace!("{} data block is null @ {}", id, n);
return Ok(());
}
}
@ -256,7 +257,7 @@ pub fn generate_coding(
// round up to the nearest jerasure alignment
max_data_size = align!(max_data_size, JERASURE_ALIGN);
trace!("{:x} max_data_size: {}", debug_id, max_data_size);
trace!("{} max_data_size: {}", id, max_data_size);
let mut data_blobs = Vec::with_capacity(NUM_DATA);
for i in block_start..block_end {
@ -299,8 +300,8 @@ pub fn generate_coding(
let id = data_rl.get_id().unwrap();
trace!(
"{:x} copying index {} id {:?} from data to coding",
debug_id,
"{} copying index {} id {:?} from data to coding",
id,
index,
id
);
@ -324,7 +325,7 @@ pub fn generate_coding(
.iter()
.enumerate()
.map(|(i, l)| {
trace!("{:x} i: {} data: {}", debug_id, i, l.data[0]);
trace!("{} i: {} data: {}", id, i, l.data[0]);
&l.data[..max_data_size]
})
.collect();
@ -338,15 +339,15 @@ pub fn generate_coding(
.iter_mut()
.enumerate()
.map(|(i, l)| {
trace!("{:x} i: {} coding: {}", debug_id, i, l.data[0],);
trace!("{} i: {} coding: {}", id, i, l.data[0],);
&mut l.data_mut()[..max_data_size]
})
.collect();
generate_coding_blocks(coding_ptrs.as_mut_slice(), &data_ptrs)?;
debug!(
"{:x} start_idx: {} data: {}:{} coding: {}:{}",
debug_id, start_idx, block_start, block_end, coding_start, block_end
"{} start_idx: {} data: {}:{} coding: {}:{}",
id, start_idx, block_start, block_end, coding_start, block_end
);
block_start = block_end;
}
@ -358,7 +359,7 @@ pub fn generate_coding(
// true if slot is stale (i.e. has the wrong index), old blob is flushed
// false if slot has a blob with the right index
fn is_missing(
debug_id: u64,
id: &Pubkey,
idx: u64,
window_slot: &mut Option<SharedBlob>,
recycler: &BlobRecycler,
@ -367,24 +368,24 @@ fn is_missing(
if let Some(blob) = mem::replace(window_slot, None) {
let blob_idx = blob.read().unwrap().get_index().unwrap();
if blob_idx == idx {
trace!("recover {:x}: idx: {} good {}", debug_id, idx, c_or_d);
trace!("recover {}: idx: {} good {}", id, idx, c_or_d);
// put it back
mem::replace(window_slot, Some(blob));
false
} else {
trace!(
"recover {:x}: idx: {} old {} {}, recycling",
debug_id,
"recover {}: idx: {} old {} {}, recycling",
id,
idx,
c_or_d,
blob_idx,
);
// recycle it
recycler.recycle(blob);
recycler.recycle(blob, "is_missing");
true
}
} else {
trace!("recover {:x}: idx: {} None {}", debug_id, idx, c_or_d);
trace!("recover {}: idx: {} None {}", id, idx, c_or_d);
// nothing there
true
}
@ -395,7 +396,7 @@ fn is_missing(
// if a blob is stale, remove it from the window slot
// side effect: block will be cleaned of old blobs
fn find_missing(
debug_id: u64,
id: &Pubkey,
block_start_idx: u64,
block_start: usize,
window: &mut [WindowSlot],
@ -411,12 +412,11 @@ fn find_missing(
let idx = (i - block_start) as u64 + block_start_idx;
let n = i % window.len();
if is_missing(debug_id, idx, &mut window[n].data, recycler, "data") {
if is_missing(id, idx, &mut window[n].data, recycler, "data") {
data_missing += 1;
}
if i >= coding_start && is_missing(debug_id, idx, &mut window[n].coding, recycler, "coding")
{
if i >= coding_start && is_missing(id, idx, &mut window[n].coding, recycler, "coding") {
coding_missing += 1;
}
}
@ -430,7 +430,7 @@ fn find_missing(
// any of the blocks, the block is skipped.
// Side effect: old blobs in a block are None'd
pub fn recover(
debug_id: u64,
id: &Pubkey,
recycler: &BlobRecycler,
window: &mut [WindowSlot],
start_idx: u64,
@ -444,8 +444,8 @@ pub fn recover(
let coding_start = block_start + NUM_DATA - NUM_CODING;
let block_end = block_start + NUM_DATA;
trace!(
"recover {:x}: block_start_idx: {} block_start: {} coding_start: {} block_end: {}",
debug_id,
"recover {}: block_start_idx: {} block_start: {} coding_start: {} block_end: {}",
id,
block_start_idx,
block_start,
coding_start,
@ -453,7 +453,7 @@ pub fn recover(
);
let (data_missing, coding_missing) =
find_missing(debug_id, block_start_idx, block_start, window, recycler);
find_missing(id, block_start_idx, block_start, window, recycler);
// if we're not missing data, or if we have too much missin but have enough coding
if data_missing == 0 {
@ -463,8 +463,8 @@ pub fn recover(
if (data_missing + coding_missing) > NUM_CODING {
trace!(
"recover {:x}: start: {} skipping recovery data: {} coding: {}",
debug_id,
"recover {}: start: {} skipping recovery data: {} coding: {}",
id,
block_start,
data_missing,
coding_missing
@ -474,8 +474,8 @@ pub fn recover(
}
trace!(
"recover {:x}: recovering: data: {} coding: {}",
debug_id,
"recover {}: recovering: data: {} coding: {}",
id,
data_missing,
coding_missing
);
@ -492,7 +492,7 @@ pub fn recover(
if let Some(b) = window[j].data.clone() {
if meta.is_none() {
meta = Some(b.read().unwrap().meta.clone());
trace!("recover {:x} meta at {} {:?}", debug_id, j, meta);
trace!("recover {} meta at {} {:?}", id, j, meta);
}
blobs.push(b);
} else {
@ -508,6 +508,12 @@ pub fn recover(
if let Some(b) = window[j].coding.clone() {
if size.is_none() {
size = Some(b.read().unwrap().meta.size - BLOB_HEADER_SIZE);
trace!(
"{} recover size {} from {}",
id,
size.unwrap(),
i as u64 + block_start_idx
);
}
blobs.push(b);
} else {
@ -518,12 +524,13 @@ pub fn recover(
erasures.push(((i - coding_start) + NUM_DATA) as i32);
}
}
// now that we have size (from coding), zero out data blob tails
let size = size.unwrap();
for i in block_start..block_end {
let j = i % window.len();
if let Some(b) = &window[j].data {
let size = size.unwrap();
let mut b_wl = b.write().unwrap();
for i in b_wl.meta.size..size {
b_wl.data[i] = 0;
@ -533,12 +540,7 @@ pub fn recover(
// marks end of erasures
erasures.push(-1);
trace!(
"erasures[]: {:x} {:?} data_size: {}",
debug_id,
erasures,
size.unwrap(),
);
trace!("erasures[]: {} {:?} data_size: {}", id, erasures, size,);
//lock everything for write
for b in &blobs {
locks.push(b.write().expect("'locks' arr in pb fn recover"));
@ -549,16 +551,16 @@ pub fn recover(
let mut data_ptrs: Vec<&mut [u8]> = Vec::with_capacity(NUM_DATA);
for (i, l) in locks.iter_mut().enumerate() {
if i < NUM_DATA {
trace!("{:x} pushing data: {}", debug_id, i);
data_ptrs.push(&mut l.data[..size.unwrap()]);
trace!("{} pushing data: {}", id, i);
data_ptrs.push(&mut l.data[..size]);
} else {
trace!("{:x} pushing coding: {}", debug_id, i);
coding_ptrs.push(&mut l.data_mut()[..size.unwrap()]);
trace!("{} pushing coding: {}", id, i);
coding_ptrs.push(&mut l.data_mut()[..size]);
}
}
trace!(
"{:x} coding_ptrs.len: {} data_ptrs.len {}",
debug_id,
"{} coding_ptrs.len: {} data_ptrs.len {}",
id,
coding_ptrs.len(),
data_ptrs.len()
);
@ -577,29 +579,38 @@ pub fn recover(
let mut data_size;
if n < NUM_DATA {
data_size = locks[n].get_data_size().unwrap();
data_size -= BLOB_HEADER_SIZE as u64;
data_size = locks[n].get_data_size().unwrap() as usize;
data_size -= BLOB_HEADER_SIZE;
if data_size > BLOB_DATA_SIZE {
error!("{} corrupt data blob[{}] data_size: {}", id, idx, data_size);
corrupt = true;
}
} else {
data_size = size.unwrap() as u64;
data_size = size;
idx -= NUM_CODING as u64;
locks[n].set_index(idx).unwrap();
if data_size - BLOB_HEADER_SIZE > BLOB_DATA_SIZE {
error!(
"{} corrupt coding blob[{}] data_size: {}",
id, idx, data_size
);
corrupt = true;
}
}
locks[n].meta = meta.clone().unwrap();
locks[n].set_size(data_size as usize);
locks[n].set_size(data_size);
trace!(
"{:x} erasures[{}] ({}) size: {:x} data[0]: {}",
debug_id,
"{} erasures[{}] ({}) size: {} data[0]: {}",
id,
*i,
idx,
data_size,
locks[n].data()[0]
);
if data_size > BLOB_DATA_SIZE as u64 {
corrupt = true;
}
}
assert!(!corrupt, " {:x} ", debug_id);
assert!(!corrupt, " {} ", id);
Ok(())
}
@ -609,10 +620,9 @@ mod test {
use crdt;
use erasure;
use logger;
use packet::{BlobRecycler, BLOB_HEADER_SIZE, BLOB_SIZE};
use packet::{BlobRecycler, BLOB_DATA_SIZE, BLOB_HEADER_SIZE, BLOB_SIZE};
use rand::{thread_rng, Rng};
use signature::Keypair;
use signature::KeypairUtil;
use signature::{Keypair, KeypairUtil, Pubkey};
// use std::sync::{Arc, RwLock};
use window::{index_blobs, WindowSlot};
@ -724,7 +734,12 @@ mod test {
let b_ = b.clone();
let mut w = b.write().unwrap();
// generate a random length, multiple of 4 between 8 and 32
let data_len = (thread_rng().gen_range(2, 8) * 4) + 1;
let data_len = if i == 3 {
BLOB_DATA_SIZE
} else {
(thread_rng().gen_range(2, 8) * 4) + 1
};
eprintln!("data_len of {} is {}", i, data_len);
w.set_size(data_len);
@ -746,7 +761,6 @@ mod test {
"127.0.0.1:1235".parse().unwrap(),
"127.0.0.1:1236".parse().unwrap(),
"127.0.0.1:1237".parse().unwrap(),
"127.0.0.1:1238".parse().unwrap(),
);
assert!(index_blobs(&d, &blobs, &mut (offset as u64)).is_ok());
for b in blobs {
@ -773,34 +787,34 @@ mod test {
}
}
fn pollute_recycler(blob_recycler: &BlobRecycler) {
let mut blobs = Vec::with_capacity(WINDOW_SIZE * 2);
for _ in 0..WINDOW_SIZE * 10 {
let blob = blob_recycler.allocate();
{
let mut b_l = blob.write().unwrap();
for i in 0..BLOB_SIZE {
b_l.data[i] = thread_rng().gen();
}
// some of the blobs should previously been used for coding
if thread_rng().gen_bool(erasure::NUM_CODING as f64 / erasure::NUM_DATA as f64) {
b_l.set_coding().unwrap();
}
}
blobs.push(blob);
}
for blob in blobs {
blob_recycler.recycle(blob, "pollute_recycler");
}
}
#[test]
pub fn test_window_recover_basic() {
logger::setup();
let blob_recycler = BlobRecycler::default();
{
let mut blobs = Vec::with_capacity(WINDOW_SIZE * 2);
for _ in 0..WINDOW_SIZE * 10 {
let blob = blob_recycler.allocate();
{
let mut b_l = blob.write().unwrap();
for i in 0..BLOB_SIZE {
b_l.data[i] = thread_rng().gen();
}
// some of the blobs should previously been used for coding
if thread_rng().gen_bool(erasure::NUM_CODING as f64 / erasure::NUM_DATA as f64)
{
b_l.set_coding().unwrap();
}
}
blobs.push(blob);
}
for blob in blobs {
blob_recycler.recycle(blob);
}
}
pollute_recycler(&blob_recycler);
// Generate a window
let offset = 0;
@ -819,9 +833,10 @@ mod test {
// Generate the coding blocks
let mut index = (erasure::NUM_DATA + 2) as u64;
let id = Pubkey::default();
assert!(
erasure::generate_coding(
0,
&id,
&mut window,
&blob_recycler,
offset as u64,
@ -848,7 +863,7 @@ mod test {
// Recover it from coding
assert!(
erasure::recover(
0,
&id,
&blob_recycler,
&mut window,
(offset + WINDOW_SIZE) as u64,
@ -887,7 +902,10 @@ mod test {
let refwindow = window[erase_offset].data.clone();
window[erase_offset].data = None;
blob_recycler.recycle(window[erase_offset].coding.clone().unwrap());
blob_recycler.recycle(
window[erase_offset].coding.clone().unwrap(),
"window_recover_basic",
);
window[erase_offset].coding = None;
print_window(&window);
@ -895,7 +913,7 @@ mod test {
// Recover it from coding
assert!(
erasure::recover(
0,
&id,
&blob_recycler,
&mut window,
(offset + WINDOW_SIZE) as u64,
@ -941,7 +959,7 @@ mod test {
// Recover it from coding
assert!(
erasure::recover(
0,
&id,
&blob_recycler,
&mut window,
(offset + WINDOW_SIZE) as u64,

View File

@ -16,31 +16,27 @@ pub struct FetchStage {
impl FetchStage {
pub fn new(
socket: UdpSocket,
exit: Arc<AtomicBool>,
packet_recycler: &PacketRecycler,
) -> (Self, PacketReceiver) {
Self::new_multi_socket(vec![socket], exit, packet_recycler)
}
pub fn new_multi_socket(
sockets: Vec<UdpSocket>,
exit: Arc<AtomicBool>,
packet_recycler: &PacketRecycler,
recycler: &PacketRecycler,
) -> (Self, PacketReceiver) {
let (packet_sender, packet_receiver) = channel();
let tx_sockets = sockets.into_iter().map(Arc::new).collect();
Self::new_multi_socket(tx_sockets, exit, recycler)
}
pub fn new_multi_socket(
sockets: Vec<Arc<UdpSocket>>,
exit: Arc<AtomicBool>,
recycler: &PacketRecycler,
) -> (Self, PacketReceiver) {
let (sender, receiver) = channel();
let thread_hdls: Vec<_> = sockets
.into_iter()
.map(|socket| {
streamer::receiver(
socket,
exit.clone(),
packet_recycler.clone(),
packet_sender.clone(),
)
streamer::receiver(socket, exit.clone(), recycler.clone(), sender.clone())
})
.collect();
(FetchStage { exit, thread_hdls }, packet_receiver)
(FetchStage { exit, thread_hdls }, receiver)
}
pub fn close(&self) {

Some files were not shown because too many files have changed in this diff Show More