swarm/network, swarm/storage: Preserve opentracing contexts (#19022)

(cherry picked from commit 0c10d37606)
This commit is contained in:
lash
2019-02-08 16:57:48 +01:00
committed by Rafael Matias
parent 710775f435
commit 068725c5b0
10 changed files with 123 additions and 108 deletions

View File

@@ -52,6 +52,7 @@ type Fetcher struct {
requestC chan uint8 // channel for incoming requests (with the hopCount value in it)
searchTimeout time.Duration
skipCheck bool
ctx context.Context
}
type Request struct {
@@ -109,14 +110,14 @@ func NewFetcherFactory(request RequestFunc, skipCheck bool) *FetcherFactory {
// contain the peers which are actively requesting this chunk, to make sure we
// don't request back the chunks from them.
// The created Fetcher is started and returned.
func (f *FetcherFactory) New(ctx context.Context, source storage.Address, peersToSkip *sync.Map) storage.NetFetcher {
fetcher := NewFetcher(source, f.request, f.skipCheck)
go fetcher.run(ctx, peersToSkip)
func (f *FetcherFactory) New(ctx context.Context, source storage.Address, peers *sync.Map) storage.NetFetcher {
fetcher := NewFetcher(ctx, source, f.request, f.skipCheck)
go fetcher.run(peers)
return fetcher
}
// NewFetcher creates a new Fetcher for the given chunk address using the given request function.
func NewFetcher(addr storage.Address, rf RequestFunc, skipCheck bool) *Fetcher {
func NewFetcher(ctx context.Context, addr storage.Address, rf RequestFunc, skipCheck bool) *Fetcher {
return &Fetcher{
addr: addr,
protoRequestFunc: rf,
@@ -124,14 +125,15 @@ func NewFetcher(addr storage.Address, rf RequestFunc, skipCheck bool) *Fetcher {
requestC: make(chan uint8),
searchTimeout: defaultSearchTimeout,
skipCheck: skipCheck,
ctx: ctx,
}
}
// Offer is called when an upstream peer offers the chunk via syncing as part of `OfferedHashesMsg` and the node does not have the chunk locally.
func (f *Fetcher) Offer(ctx context.Context, source *enode.ID) {
func (f *Fetcher) Offer(source *enode.ID) {
// First we need to have this select to make sure that we return if context is done
select {
case <-ctx.Done():
case <-f.ctx.Done():
return
default:
}
@@ -140,15 +142,15 @@ func (f *Fetcher) Offer(ctx context.Context, source *enode.ID) {
// push to offerC instead if offerC is available (see number 2 in https://golang.org/ref/spec#Select_statements)
select {
case f.offerC <- source:
case <-ctx.Done():
case <-f.ctx.Done():
}
}
// Request is called when an upstream peer request the chunk as part of `RetrieveRequestMsg`, or from a local request through FileStore, and the node does not have the chunk locally.
func (f *Fetcher) Request(ctx context.Context, hopCount uint8) {
func (f *Fetcher) Request(hopCount uint8) {
// First we need to have this select to make sure that we return if context is done
select {
case <-ctx.Done():
case <-f.ctx.Done():
return
default:
}
@@ -162,13 +164,13 @@ func (f *Fetcher) Request(ctx context.Context, hopCount uint8) {
// push to offerC instead if offerC is available (see number 2 in https://golang.org/ref/spec#Select_statements)
select {
case f.requestC <- hopCount + 1:
case <-ctx.Done():
case <-f.ctx.Done():
}
}
// start prepares the Fetcher
// it keeps the Fetcher alive within the lifecycle of the passed context
func (f *Fetcher) run(ctx context.Context, peers *sync.Map) {
func (f *Fetcher) run(peers *sync.Map) {
var (
doRequest bool // determines if retrieval is initiated in the current iteration
wait *time.Timer // timer for search timeout
@@ -219,7 +221,7 @@ func (f *Fetcher) run(ctx context.Context, peers *sync.Map) {
doRequest = requested
// all Fetcher context closed, can quit
case <-ctx.Done():
case <-f.ctx.Done():
log.Trace("terminate fetcher", "request addr", f.addr)
// TODO: send cancellations to all peers left over in peers map (i.e., those we requested from)
return
@@ -228,7 +230,7 @@ func (f *Fetcher) run(ctx context.Context, peers *sync.Map) {
// need to issue a new request
if doRequest {
var err error
sources, err = f.doRequest(ctx, gone, peers, sources, hopCount)
sources, err = f.doRequest(gone, peers, sources, hopCount)
if err != nil {
log.Info("unable to request", "request addr", f.addr, "err", err)
}
@@ -266,7 +268,7 @@ func (f *Fetcher) run(ctx context.Context, peers *sync.Map) {
// * the peer's address is added to the set of peers to skip
// * the peer's address is removed from prospective sources, and
// * a go routine is started that reports on the gone channel if the peer is disconnected (or terminated their streamer)
func (f *Fetcher) doRequest(ctx context.Context, gone chan *enode.ID, peersToSkip *sync.Map, sources []*enode.ID, hopCount uint8) ([]*enode.ID, error) {
func (f *Fetcher) doRequest(gone chan *enode.ID, peersToSkip *sync.Map, sources []*enode.ID, hopCount uint8) ([]*enode.ID, error) {
var i int
var sourceID *enode.ID
var quit chan struct{}
@@ -283,7 +285,7 @@ func (f *Fetcher) doRequest(ctx context.Context, gone chan *enode.ID, peersToSki
for i = 0; i < len(sources); i++ {
req.Source = sources[i]
var err error
sourceID, quit, err = f.protoRequestFunc(ctx, req)
sourceID, quit, err = f.protoRequestFunc(f.ctx, req)
if err == nil {
// remove the peer from known sources
// Note: we can modify the source although we are looping on it, because we break from the loop immediately
@@ -297,7 +299,7 @@ func (f *Fetcher) doRequest(ctx context.Context, gone chan *enode.ID, peersToSki
if !foundSource {
req.Source = nil
var err error
sourceID, quit, err = f.protoRequestFunc(ctx, req)
sourceID, quit, err = f.protoRequestFunc(f.ctx, req)
if err != nil {
// if no peers found to request from
return sources, err
@@ -314,7 +316,7 @@ func (f *Fetcher) doRequest(ctx context.Context, gone chan *enode.ID, peersToSki
select {
case <-quit:
gone <- sourceID
case <-ctx.Done():
case <-f.ctx.Done():
}
}()
return sources, nil

View File

@@ -69,7 +69,11 @@ func (m *mockRequester) doRequest(ctx context.Context, request *Request) (*enode
func TestFetcherSingleRequest(t *testing.T) {
requester := newMockRequester()
addr := make([]byte, 32)
fetcher := NewFetcher(addr, requester.doRequest, true)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
fetcher := NewFetcher(ctx, addr, requester.doRequest, true)
peers := []string{"a", "b", "c", "d"}
peersToSkip := &sync.Map{}
@@ -77,13 +81,9 @@ func TestFetcherSingleRequest(t *testing.T) {
peersToSkip.Store(p, time.Now())
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go fetcher.run(peersToSkip)
go fetcher.run(ctx, peersToSkip)
rctx := context.Background()
fetcher.Request(rctx, 0)
fetcher.Request(0)
select {
case request := <-requester.requestC:
@@ -115,20 +115,19 @@ func TestFetcherSingleRequest(t *testing.T) {
func TestFetcherCancelStopsFetcher(t *testing.T) {
requester := newMockRequester()
addr := make([]byte, 32)
fetcher := NewFetcher(addr, requester.doRequest, true)
peersToSkip := &sync.Map{}
ctx, cancel := context.WithCancel(context.Background())
fetcher := NewFetcher(ctx, addr, requester.doRequest, true)
peersToSkip := &sync.Map{}
// we start the fetcher, and then we immediately cancel the context
go fetcher.run(ctx, peersToSkip)
go fetcher.run(peersToSkip)
cancel()
rctx, rcancel := context.WithTimeout(ctx, 100*time.Millisecond)
defer rcancel()
// we call Request with an active context
fetcher.Request(rctx, 0)
fetcher.Request(0)
// fetcher should not initiate request, we can only check by waiting a bit and making sure no request is happening
select {
@@ -140,23 +139,23 @@ func TestFetcherCancelStopsFetcher(t *testing.T) {
// TestFetchCancelStopsRequest tests that calling a Request function with a cancelled context does not initiate a request
func TestFetcherCancelStopsRequest(t *testing.T) {
t.Skip("since context is now per fetcher, this test is likely redundant")
requester := newMockRequester(100 * time.Millisecond)
addr := make([]byte, 32)
fetcher := NewFetcher(addr, requester.doRequest, true)
peersToSkip := &sync.Map{}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// we start the fetcher with an active context
go fetcher.run(ctx, peersToSkip)
fetcher := NewFetcher(ctx, addr, requester.doRequest, true)
rctx, rcancel := context.WithCancel(context.Background())
rcancel()
peersToSkip := &sync.Map{}
// we start the fetcher with an active context
go fetcher.run(peersToSkip)
// we call Request with a cancelled context
fetcher.Request(rctx, 0)
fetcher.Request(0)
// fetcher should not initiate request, we can only check by waiting a bit and making sure no request is happening
select {
@@ -166,8 +165,7 @@ func TestFetcherCancelStopsRequest(t *testing.T) {
}
// if there is another Request with active context, there should be a request, because the fetcher itself is not cancelled
rctx = context.Background()
fetcher.Request(rctx, 0)
fetcher.Request(0)
select {
case <-requester.requestC:
@@ -182,19 +180,19 @@ func TestFetcherCancelStopsRequest(t *testing.T) {
func TestFetcherOfferUsesSource(t *testing.T) {
requester := newMockRequester(100 * time.Millisecond)
addr := make([]byte, 32)
fetcher := NewFetcher(addr, requester.doRequest, true)
peersToSkip := &sync.Map{}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// start the fetcher
go fetcher.run(ctx, peersToSkip)
fetcher := NewFetcher(ctx, addr, requester.doRequest, true)
peersToSkip := &sync.Map{}
// start the fetcher
go fetcher.run(peersToSkip)
rctx := context.Background()
// call the Offer function with the source peer
fetcher.Offer(rctx, &sourcePeerID)
fetcher.Offer(&sourcePeerID)
// fetcher should not initiate request
select {
@@ -204,8 +202,7 @@ func TestFetcherOfferUsesSource(t *testing.T) {
}
// call Request after the Offer
rctx = context.Background()
fetcher.Request(rctx, 0)
fetcher.Request(0)
// there should be exactly 1 request coming from fetcher
var request *Request
@@ -234,19 +231,19 @@ func TestFetcherOfferUsesSource(t *testing.T) {
func TestFetcherOfferAfterRequestUsesSourceFromContext(t *testing.T) {
requester := newMockRequester(100 * time.Millisecond)
addr := make([]byte, 32)
fetcher := NewFetcher(addr, requester.doRequest, true)
peersToSkip := &sync.Map{}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
fetcher := NewFetcher(ctx, addr, requester.doRequest, true)
peersToSkip := &sync.Map{}
// start the fetcher
go fetcher.run(ctx, peersToSkip)
go fetcher.run(peersToSkip)
// call Request first
rctx := context.Background()
fetcher.Request(rctx, 0)
fetcher.Request(0)
// there should be a request coming from fetcher
var request *Request
@@ -260,7 +257,7 @@ func TestFetcherOfferAfterRequestUsesSourceFromContext(t *testing.T) {
}
// after the Request call Offer
fetcher.Offer(context.Background(), &sourcePeerID)
fetcher.Offer(&sourcePeerID)
// there should be a request coming from fetcher
select {
@@ -283,21 +280,21 @@ func TestFetcherOfferAfterRequestUsesSourceFromContext(t *testing.T) {
func TestFetcherRetryOnTimeout(t *testing.T) {
requester := newMockRequester()
addr := make([]byte, 32)
fetcher := NewFetcher(addr, requester.doRequest, true)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
fetcher := NewFetcher(ctx, addr, requester.doRequest, true)
// set searchTimeOut to low value so the test is quicker
fetcher.searchTimeout = 250 * time.Millisecond
peersToSkip := &sync.Map{}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// start the fetcher
go fetcher.run(ctx, peersToSkip)
go fetcher.run(peersToSkip)
// call the fetch function with an active context
rctx := context.Background()
fetcher.Request(rctx, 0)
fetcher.Request(0)
// after 100ms the first request should be initiated
time.Sleep(100 * time.Millisecond)
@@ -339,7 +336,7 @@ func TestFetcherFactory(t *testing.T) {
fetcher := fetcherFactory.New(context.Background(), addr, peersToSkip)
fetcher.Request(context.Background(), 0)
fetcher.Request(0)
// check if the created fetchFunction really starts a fetcher and initiates a request
select {
@@ -353,7 +350,11 @@ func TestFetcherFactory(t *testing.T) {
func TestFetcherRequestQuitRetriesRequest(t *testing.T) {
requester := newMockRequester()
addr := make([]byte, 32)
fetcher := NewFetcher(addr, requester.doRequest, true)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
fetcher := NewFetcher(ctx, addr, requester.doRequest, true)
// make sure the searchTimeout is long so it is sure the request is not
// retried because of timeout
@@ -361,13 +362,9 @@ func TestFetcherRequestQuitRetriesRequest(t *testing.T) {
peersToSkip := &sync.Map{}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go fetcher.run(peersToSkip)
go fetcher.run(ctx, peersToSkip)
rctx := context.Background()
fetcher.Request(rctx, 0)
fetcher.Request(0)
select {
case <-requester.requestC:
@@ -460,17 +457,15 @@ func TestRequestSkipPeerPermanent(t *testing.T) {
func TestFetcherMaxHopCount(t *testing.T) {
requester := newMockRequester()
addr := make([]byte, 32)
fetcher := NewFetcher(addr, requester.doRequest, true)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
fetcher := NewFetcher(ctx, addr, requester.doRequest, true)
peersToSkip := &sync.Map{}
go fetcher.run(ctx, peersToSkip)
rctx := context.Background()
fetcher.Request(rctx, maxHopCount)
go fetcher.run(peersToSkip)
// if hopCount is already at max no request should be initiated
select {

View File

@@ -144,7 +144,6 @@ func (d *Delivery) handleRetrieveRequestMsg(ctx context.Context, sp *Peer, req *
ctx, osp = spancontext.StartSpan(
ctx,
"retrieve.request")
defer osp.Finish()
s, err := sp.getServer(NewStream(swarmChunkServerStreamName, "", true))
if err != nil {
@@ -167,6 +166,7 @@ func (d *Delivery) handleRetrieveRequestMsg(ctx context.Context, sp *Peer, req *
}()
go func() {
defer osp.Finish()
chunk, err := d.chunkStore.Get(ctx, req.Addr)
if err != nil {
retrieveChunkFail.Inc(1)
@@ -213,11 +213,12 @@ func (d *Delivery) handleChunkDeliveryMsg(ctx context.Context, sp *Peer, req *Ch
ctx, osp = spancontext.StartSpan(
ctx,
"chunk.delivery")
defer osp.Finish()
processReceivedChunksCount.Inc(1)
go func() {
defer osp.Finish()
req.peer = sp
err := d.chunkStore.Put(ctx, storage.NewChunk(req.Addr, req.SData))
if err != nil {
@@ -271,7 +272,7 @@ func (d *Delivery) RequestFromPeers(ctx context.Context, req *network.Request) (
Addr: req.Addr,
SkipCheck: req.SkipCheck,
HopCount: req.HopCount,
}, Top)
}, Top, "request.from.peers")
if err != nil {
return nil, nil, err
}

View File

@@ -300,7 +300,7 @@ func (p *Peer) handleOfferedHashesMsg(ctx context.Context, req *OfferedHashesMsg
return
}
log.Trace("sending want batch", "peer", p.ID(), "stream", msg.Stream, "from", msg.From, "to", msg.To)
err := p.SendPriority(ctx, msg, c.priority)
err := p.SendPriority(ctx, msg, c.priority, "")
if err != nil {
log.Warn("SendPriority error", "err", err)
}

View File

@@ -65,6 +65,7 @@ type Peer struct {
// on creating a new client in offered hashes handler.
clientParams map[Stream]*clientParams
quit chan struct{}
spans sync.Map
}
type WrappedPriorityMsg struct {
@@ -82,10 +83,16 @@ func NewPeer(peer *protocols.Peer, streamer *Registry) *Peer {
clients: make(map[Stream]*client),
clientParams: make(map[Stream]*clientParams),
quit: make(chan struct{}),
spans: sync.Map{},
}
ctx, cancel := context.WithCancel(context.Background())
go p.pq.Run(ctx, func(i interface{}) {
wmsg := i.(WrappedPriorityMsg)
defer p.spans.Delete(wmsg.Context)
sp, ok := p.spans.Load(wmsg.Context)
if ok {
defer sp.(opentracing.Span).Finish()
}
err := p.Send(wmsg.Context, wmsg.Msg)
if err != nil {
log.Error("Message send error, dropping peer", "peer", p.ID(), "err", err)
@@ -130,7 +137,6 @@ func NewPeer(peer *protocols.Peer, streamer *Registry) *Peer {
// Deliver sends a storeRequestMsg protocol message to the peer
// Depending on the `syncing` parameter we send different message types
func (p *Peer) Deliver(ctx context.Context, chunk storage.Chunk, priority uint8, syncing bool) error {
var sp opentracing.Span
var msg interface{}
spanName := "send.chunk.delivery"
@@ -151,18 +157,22 @@ func (p *Peer) Deliver(ctx context.Context, chunk storage.Chunk, priority uint8,
}
spanName += ".retrieval"
}
ctx, sp = spancontext.StartSpan(
ctx,
spanName)
defer sp.Finish()
return p.SendPriority(ctx, msg, priority)
return p.SendPriority(ctx, msg, priority, spanName)
}
// SendPriority sends message to the peer using the outgoing priority queue
func (p *Peer) SendPriority(ctx context.Context, msg interface{}, priority uint8) error {
func (p *Peer) SendPriority(ctx context.Context, msg interface{}, priority uint8, traceId string) error {
defer metrics.GetOrRegisterResettingTimer(fmt.Sprintf("peer.sendpriority_t.%d", priority), nil).UpdateSince(time.Now())
metrics.GetOrRegisterCounter(fmt.Sprintf("peer.sendpriority.%d", priority), nil).Inc(1)
if traceId != "" {
var sp opentracing.Span
ctx, sp = spancontext.StartSpan(
ctx,
traceId,
)
p.spans.Store(ctx, sp)
}
wmsg := WrappedPriorityMsg{
Context: ctx,
Msg: msg,
@@ -205,7 +215,7 @@ func (p *Peer) SendOfferedHashes(s *server, f, t uint64) error {
Stream: s.stream,
}
log.Trace("Swarm syncer offer batch", "peer", p.ID(), "stream", s.stream, "len", len(hashes), "from", from, "to", to)
return p.SendPriority(ctx, msg, s.priority)
return p.SendPriority(ctx, msg, s.priority, "send.offered.hashes")
}
func (p *Peer) getServer(s Stream) (*server, error) {

View File

@@ -359,7 +359,7 @@ func (r *Registry) Subscribe(peerId enode.ID, s Stream, h *Range, priority uint8
}
log.Debug("Subscribe ", "peer", peerId, "stream", s, "history", h)
return peer.SendPriority(context.TODO(), msg, priority)
return peer.SendPriority(context.TODO(), msg, priority, "")
}
func (r *Registry) Unsubscribe(peerId enode.ID, s Stream) error {
@@ -730,7 +730,8 @@ func (c *client) batchDone(p *Peer, req *OfferedHashesMsg, hashes []byte) error
if err != nil {
return err
}
if err := p.SendPriority(context.TODO(), tp, c.priority); err != nil {
if err := p.SendPriority(context.TODO(), tp, c.priority, ""); err != nil {
return err
}
if c.to > 0 && tp.Takeover.End >= c.to {