swarm: Chunk refactor (#17659)

Co-authored-by: Janos Guljas <janos@resenje.org>
Co-authored-by: Balint Gabor <balint.g@gmail.com>
Co-authored-by: Anton Evangelatov <anton.evangelatov@gmail.com>
Co-authored-by: Viktor Trón <viktor.tron@gmail.com>
This commit is contained in:
Balint Gabor
2018-09-13 11:42:19 +02:00
committed by GitHub
parent ff3a5d24d2
commit 3ff2f75636
55 changed files with 3193 additions and 1925 deletions

View File

@ -18,181 +18,275 @@ package storage
import (
"context"
"encoding/hex"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/p2p/discover"
"github.com/ethereum/go-ethereum/swarm/log"
"github.com/ethereum/go-ethereum/swarm/spancontext"
opentracing "github.com/opentracing/opentracing-go"
lru "github.com/hashicorp/golang-lru"
)
var (
// NetStore.Get timeout for get and get retries
// This is the maximum period that the Get will block.
// If it is reached, Get will return ErrChunkNotFound.
netStoreRetryTimeout = 30 * time.Second
// Minimal period between calling get method on NetStore
// on retry. It protects calling get very frequently if
// it returns ErrChunkNotFound very fast.
netStoreMinRetryDelay = 3 * time.Second
// Timeout interval before retrieval is timed out.
// It is used in NetStore.get on waiting for ReqC to be
// closed on a single retrieve request.
searchTimeout = 10 * time.Second
type (
NewNetFetcherFunc func(ctx context.Context, addr Address, peers *sync.Map) NetFetcher
)
// NetStore implements the ChunkStore interface,
// this chunk access layer assumed 2 chunk stores
// local storage eg. LocalStore and network storage eg., NetStore
// access by calling network is blocking with a timeout
type NetFetcher interface {
Request(ctx context.Context)
Offer(ctx context.Context, source *discover.NodeID)
}
// NetStore is an extension of local storage
// it implements the ChunkStore interface
// on request it initiates remote cloud retrieval using a fetcher
// fetchers are unique to a chunk and are stored in fetchers LRU memory cache
// fetchFuncFactory is a factory object to create a fetch function for a specific chunk address
type NetStore struct {
localStore *LocalStore
retrieve func(ctx context.Context, chunk *Chunk) error
mu sync.Mutex
store SyncChunkStore
fetchers *lru.Cache
NewNetFetcherFunc NewNetFetcherFunc
closeC chan struct{}
}
func NewNetStore(localStore *LocalStore, retrieve func(ctx context.Context, chunk *Chunk) error) *NetStore {
return &NetStore{localStore, retrieve}
// NewNetStore creates a new NetStore object using the given local store. newFetchFunc is a
// constructor function that can create a fetch function for a specific chunk address.
func NewNetStore(store SyncChunkStore, nnf NewNetFetcherFunc) (*NetStore, error) {
fetchers, err := lru.New(defaultChunkRequestsCacheCapacity)
if err != nil {
return nil, err
}
return &NetStore{
store: store,
fetchers: fetchers,
NewNetFetcherFunc: nnf,
closeC: make(chan struct{}),
}, nil
}
// Get is the entrypoint for local retrieve requests
// waits for response or times out
//
// Get uses get method to retrieve request, but retries if the
// ErrChunkNotFound is returned by get, until the netStoreRetryTimeout
// is reached.
func (ns *NetStore) Get(ctx context.Context, addr Address) (chunk *Chunk, err error) {
// Put stores a chunk in localstore, and delivers to all requestor peers using the fetcher stored in
// the fetchers cache
func (n *NetStore) Put(ctx context.Context, ch Chunk) error {
n.mu.Lock()
defer n.mu.Unlock()
var sp opentracing.Span
ctx, sp = spancontext.StartSpan(
ctx,
"netstore.get.global")
defer sp.Finish()
timer := time.NewTimer(netStoreRetryTimeout)
defer timer.Stop()
// result and resultC provide results from the goroutine
// where NetStore.get is called.
type result struct {
chunk *Chunk
err error
// put to the chunk to the store, there should be no error
err := n.store.Put(ctx, ch)
if err != nil {
return err
}
resultC := make(chan result)
// quitC ensures that retring goroutine is terminated
// when this function returns.
quitC := make(chan struct{})
defer close(quitC)
// do retries in a goroutine so that the timer can
// force this method to return after the netStoreRetryTimeout.
go func() {
// limiter ensures that NetStore.get is not called more frequently
// then netStoreMinRetryDelay. If NetStore.get takes longer
// then netStoreMinRetryDelay, the next retry call will be
// without a delay.
limiter := time.NewTimer(netStoreMinRetryDelay)
defer limiter.Stop()
for {
chunk, err := ns.get(ctx, addr, 0)
if err != ErrChunkNotFound {
// break retry only if the error is nil
// or other error then ErrChunkNotFound
select {
case <-quitC:
// Maybe NetStore.Get function has returned
// by the timer.C while we were waiting for the
// results. Terminate this goroutine.
case resultC <- result{chunk: chunk, err: err}:
// Send the result to the parrent goroutine.
}
return
}
select {
case <-quitC:
// NetStore.Get function has returned, possibly
// by the timer.C, which makes this goroutine
// not needed.
return
case <-limiter.C:
}
// Reset the limiter for the next iteration.
limiter.Reset(netStoreMinRetryDelay)
log.Debug("NetStore.Get retry chunk", "key", addr)
}
}()
select {
case r := <-resultC:
return r.chunk, r.err
case <-timer.C:
return nil, ErrChunkNotFound
// if chunk is now put in the store, check if there was an active fetcher and call deliver on it
// (this delivers the chunk to requestors via the fetcher)
if f := n.getFetcher(ch.Address()); f != nil {
f.deliver(ctx, ch)
}
return nil
}
// GetWithTimeout makes a single retrieval attempt for a chunk with a explicit timeout parameter
func (ns *NetStore) GetWithTimeout(ctx context.Context, addr Address, timeout time.Duration) (chunk *Chunk, err error) {
return ns.get(ctx, addr, timeout)
// Get retrieves the chunk from the NetStore DPA synchronously.
// It calls NetStore.get, and if the chunk is not in local Storage
// it calls fetch with the request, which blocks until the chunk
// arrived or context is done
func (n *NetStore) Get(rctx context.Context, ref Address) (Chunk, error) {
chunk, fetch, err := n.get(rctx, ref)
if err != nil {
return nil, err
}
if chunk != nil {
return chunk, nil
}
return fetch(rctx)
}
func (ns *NetStore) get(ctx context.Context, addr Address, timeout time.Duration) (chunk *Chunk, err error) {
if timeout == 0 {
timeout = searchTimeout
}
var sp opentracing.Span
ctx, sp = spancontext.StartSpan(
ctx,
"netstore.get")
defer sp.Finish()
if ns.retrieve == nil {
chunk, err = ns.localStore.Get(ctx, addr)
if err == nil {
return chunk, nil
}
if err != ErrFetching {
return nil, err
}
} else {
var created bool
chunk, created = ns.localStore.GetOrCreateRequest(ctx, addr)
if chunk.ReqC == nil {
return chunk, nil
}
if created {
err := ns.retrieve(ctx, chunk)
if err != nil {
// mark chunk request as failed so that we can retry it later
chunk.SetErrored(ErrChunkUnavailable)
return nil, err
}
}
}
t := time.NewTicker(timeout)
defer t.Stop()
select {
case <-t.C:
// mark chunk request as failed so that we can retry
chunk.SetErrored(ErrChunkNotFound)
return nil, ErrChunkNotFound
case <-chunk.ReqC:
}
chunk.SetErrored(nil)
return chunk, nil
func (n *NetStore) BinIndex(po uint8) uint64 {
return n.store.BinIndex(po)
}
// Put is the entrypoint for local store requests coming from storeLoop
func (ns *NetStore) Put(ctx context.Context, chunk *Chunk) {
ns.localStore.Put(ctx, chunk)
func (n *NetStore) Iterator(from uint64, to uint64, po uint8, f func(Address, uint64) bool) error {
return n.store.Iterator(from, to, po, f)
}
// FetchFunc returns nil if the store contains the given address. Otherwise it returns a wait function,
// which returns after the chunk is available or the context is done
func (n *NetStore) FetchFunc(ctx context.Context, ref Address) func(context.Context) error {
chunk, fetch, _ := n.get(ctx, ref)
if chunk != nil {
return nil
}
return func(ctx context.Context) error {
_, err := fetch(ctx)
return err
}
}
// Close chunk store
func (ns *NetStore) Close() {
ns.localStore.Close()
func (n *NetStore) Close() {
close(n.closeC)
n.store.Close()
// TODO: loop through fetchers to cancel them
}
// get attempts at retrieving the chunk from LocalStore
// If it is not found then using getOrCreateFetcher:
// 1. Either there is already a fetcher to retrieve it
// 2. A new fetcher is created and saved in the fetchers cache
// From here on, all Get will hit on this fetcher until the chunk is delivered
// or all fetcher contexts are done.
// It returns a chunk, a fetcher function and an error
// If chunk is nil, the returned fetch function needs to be called with a context to return the chunk.
func (n *NetStore) get(ctx context.Context, ref Address) (Chunk, func(context.Context) (Chunk, error), error) {
n.mu.Lock()
defer n.mu.Unlock()
chunk, err := n.store.Get(ctx, ref)
if err != nil {
if err != ErrChunkNotFound {
log.Debug("Received error from LocalStore other than ErrNotFound", "err", err)
}
// The chunk is not available in the LocalStore, let's get the fetcher for it, or create a new one
// if it doesn't exist yet
f := n.getOrCreateFetcher(ref)
// If the caller needs the chunk, it has to use the returned fetch function to get it
return nil, f.Fetch, nil
}
return chunk, nil, nil
}
// getOrCreateFetcher attempts at retrieving an existing fetchers
// if none exists, creates one and saves it in the fetchers cache
// caller must hold the lock
func (n *NetStore) getOrCreateFetcher(ref Address) *fetcher {
if f := n.getFetcher(ref); f != nil {
return f
}
// no fetcher for the given address, we have to create a new one
key := hex.EncodeToString(ref)
// create the context during which fetching is kept alive
ctx, cancel := context.WithCancel(context.Background())
// destroy is called when all requests finish
destroy := func() {
// remove fetcher from fetchers
n.fetchers.Remove(key)
// stop fetcher by cancelling context called when
// all requests cancelled/timedout or chunk is delivered
cancel()
}
// peers always stores all the peers which have an active request for the chunk. It is shared
// between fetcher and the NewFetchFunc function. It is needed by the NewFetchFunc because
// the peers which requested the chunk should not be requested to deliver it.
peers := &sync.Map{}
fetcher := newFetcher(ref, n.NewNetFetcherFunc(ctx, ref, peers), destroy, peers, n.closeC)
n.fetchers.Add(key, fetcher)
return fetcher
}
// getFetcher retrieves the fetcher for the given address from the fetchers cache if it exists,
// otherwise it returns nil
func (n *NetStore) getFetcher(ref Address) *fetcher {
key := hex.EncodeToString(ref)
f, ok := n.fetchers.Get(key)
if ok {
return f.(*fetcher)
}
return nil
}
// RequestsCacheLen returns the current number of outgoing requests stored in the cache
func (n *NetStore) RequestsCacheLen() int {
return n.fetchers.Len()
}
// One fetcher object is responsible to fetch one chunk for one address, and keep track of all the
// peers who have requested it and did not receive it yet.
type fetcher struct {
addr Address // address of chunk
chunk Chunk // fetcher can set the chunk on the fetcher
deliveredC chan struct{} // chan signalling chunk delivery to requests
cancelledC chan struct{} // chan signalling the fetcher has been cancelled (removed from fetchers in NetStore)
netFetcher NetFetcher // remote fetch function to be called with a request source taken from the context
cancel func() // cleanup function for the remote fetcher to call when all upstream contexts are called
peers *sync.Map // the peers which asked for the chunk
requestCnt int32 // number of requests on this chunk. If all the requests are done (delivered or context is done) the cancel function is called
deliverOnce *sync.Once // guarantees that we only close deliveredC once
}
// newFetcher creates a new fetcher object for the fiven addr. fetch is the function which actually
// does the retrieval (in non-test cases this is coming from the network package). cancel function is
// called either
// 1. when the chunk has been fetched all peers have been either notified or their context has been done
// 2. the chunk has not been fetched but all context from all the requests has been done
// The peers map stores all the peers which have requested chunk.
func newFetcher(addr Address, nf NetFetcher, cancel func(), peers *sync.Map, closeC chan struct{}) *fetcher {
cancelOnce := &sync.Once{} // cancel should only be called once
return &fetcher{
addr: addr,
deliveredC: make(chan struct{}),
deliverOnce: &sync.Once{},
cancelledC: closeC,
netFetcher: nf,
cancel: func() {
cancelOnce.Do(func() {
cancel()
})
},
peers: peers,
}
}
// Fetch fetches the chunk synchronously, it is called by NetStore.Get is the chunk is not available
// locally.
func (f *fetcher) Fetch(rctx context.Context) (Chunk, error) {
atomic.AddInt32(&f.requestCnt, 1)
defer func() {
// if all the requests are done the fetcher can be cancelled
if atomic.AddInt32(&f.requestCnt, -1) == 0 {
f.cancel()
}
}()
// The peer asking for the chunk. Store in the shared peers map, but delete after the request
// has been delivered
peer := rctx.Value("peer")
if peer != nil {
f.peers.Store(peer, time.Now())
defer f.peers.Delete(peer)
}
// If there is a source in the context then it is an offer, otherwise a request
sourceIF := rctx.Value("source")
if sourceIF != nil {
var source *discover.NodeID
id := discover.MustHexID(sourceIF.(string))
source = &id
f.netFetcher.Offer(rctx, source)
} else {
f.netFetcher.Request(rctx)
}
// wait until either the chunk is delivered or the context is done
select {
case <-rctx.Done():
return nil, rctx.Err()
case <-f.deliveredC:
return f.chunk, nil
case <-f.cancelledC:
return nil, fmt.Errorf("fetcher cancelled")
}
}
// deliver is called by NetStore.Put to notify all pending requests
func (f *fetcher) deliver(ctx context.Context, ch Chunk) {
f.deliverOnce.Do(func() {
f.chunk = ch
// closing the deliveredC channel will terminate ongoing requests
close(f.deliveredC)
})
}