swarm/storage: Support for uploading 100gb files (#1395)
* swarm/storage: Fix gouroutine leak while uploading large files * swarm/storage: Fix pyramid chunker to wrap properly level3 and above
This commit is contained in:
@ -238,7 +238,7 @@ func TestRandomData(t *testing.T) {
|
||||
// This test can validate files up to a relatively short length, as tree chunker slows down drastically.
|
||||
// Validation of longer files is done by TestLocalStoreAndRetrieve in swarm package.
|
||||
//sizes := []int{1, 60, 83, 179, 253, 1024, 4095, 4096, 4097, 8191, 8192, 8193, 12287, 12288, 12289, 524288, 524288 + 1, 524288 + 4097, 7 * 524288, 7*524288 + 1, 7*524288 + 4097}
|
||||
sizes := []int{1, 60, 83, 179, 253, 1024, 4095, 4097, 8191, 8192, 12288, 12289, 524288}
|
||||
sizes := []int{1, 60, 83, 179, 253, 1024, 4095, 4097, 8191, 8192, 12288, 12289, 524288, 2345678}
|
||||
tester := &chunkerTester{t: t}
|
||||
|
||||
for _, s := range sizes {
|
||||
|
@ -19,6 +19,7 @@ package storage
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/ethersphere/swarm/chunk"
|
||||
@ -26,6 +27,11 @@ import (
|
||||
"golang.org/x/crypto/sha3"
|
||||
)
|
||||
|
||||
const (
|
||||
noOfStorageWorkers = 150 // Since we want 128 data chunks to be processed parallel + few for processing tree chunks
|
||||
|
||||
)
|
||||
|
||||
type hasherStore struct {
|
||||
// nrChunks is used with atomic functions
|
||||
// it is required to be at the start of the struct to ensure 64bit alignment for ARM, x86-32, and 32-bit MIPS architectures
|
||||
@ -34,12 +40,15 @@ type hasherStore struct {
|
||||
store ChunkStore
|
||||
tag *chunk.Tag
|
||||
toEncrypt bool
|
||||
doWait sync.Once
|
||||
hashFunc SwarmHasher
|
||||
hashSize int // content hash size
|
||||
refSize int64 // reference size (content hash + possibly encryption key)
|
||||
errC chan error // global error channel
|
||||
waitC chan error // global wait channel
|
||||
doneC chan struct{} // closed by Close() call to indicate that count is the final number of chunks
|
||||
quitC chan struct{} // closed to quit unterminated routines
|
||||
workers chan Chunk // back pressure for limiting storage workers goroutines
|
||||
}
|
||||
|
||||
// NewHasherStore creates a hasherStore object, which implements Putter and Getter interfaces.
|
||||
@ -60,10 +69,11 @@ func NewHasherStore(store ChunkStore, hashFunc SwarmHasher, toEncrypt bool, tag
|
||||
hashSize: hashSize,
|
||||
refSize: refSize,
|
||||
errC: make(chan error),
|
||||
waitC: make(chan error),
|
||||
doneC: make(chan struct{}),
|
||||
quitC: make(chan struct{}),
|
||||
workers: make(chan Chunk, noOfStorageWorkers),
|
||||
}
|
||||
|
||||
return h
|
||||
}
|
||||
|
||||
@ -83,6 +93,11 @@ func (h *hasherStore) Put(ctx context.Context, chunkData ChunkData) (Reference,
|
||||
chunk := h.createChunk(c)
|
||||
h.storeChunk(ctx, chunk)
|
||||
|
||||
// Start the wait function which will detect completion of put
|
||||
h.doWait.Do(func() {
|
||||
go h.startWait(ctx)
|
||||
})
|
||||
|
||||
return Reference(append(chunk.Address(), encryptionKey...)), nil
|
||||
}
|
||||
|
||||
@ -121,8 +136,15 @@ func (h *hasherStore) Close() {
|
||||
// Wait returns when
|
||||
// 1) the Close() function has been called and
|
||||
// 2) all the chunks which has been Put has been stored
|
||||
// OR
|
||||
// 1) if there is error while storing chunk
|
||||
func (h *hasherStore) Wait(ctx context.Context) error {
|
||||
defer close(h.quitC)
|
||||
err := <-h.waitC
|
||||
return err
|
||||
}
|
||||
|
||||
func (h *hasherStore) startWait(ctx context.Context) {
|
||||
var nrStoredChunks uint64 // number of stored chunks
|
||||
var done bool
|
||||
doneC := h.doneC
|
||||
@ -130,7 +152,7 @@ func (h *hasherStore) Wait(ctx context.Context) error {
|
||||
select {
|
||||
// if context is done earlier, just return with the error
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
h.waitC <- ctx.Err()
|
||||
// doneC is closed if all chunks have been submitted, from then we just wait until all of them are also stored
|
||||
case <-doneC:
|
||||
done = true
|
||||
@ -138,14 +160,15 @@ func (h *hasherStore) Wait(ctx context.Context) error {
|
||||
// a chunk has been stored, if err is nil, then successfully, so increase the stored chunk counter
|
||||
case err := <-h.errC:
|
||||
if err != nil {
|
||||
return err
|
||||
h.waitC <- err
|
||||
}
|
||||
nrStoredChunks++
|
||||
}
|
||||
// if all the chunks have been submitted and all of them are stored, then we can return
|
||||
if done {
|
||||
if nrStoredChunks >= atomic.LoadUint64(&h.nrChunks) {
|
||||
return nil
|
||||
h.waitC <- nil
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -242,8 +265,12 @@ func (h *hasherStore) newDataEncryption(key encryption.Key) encryption.Encryptio
|
||||
}
|
||||
|
||||
func (h *hasherStore) storeChunk(ctx context.Context, ch Chunk) {
|
||||
h.workers <- ch
|
||||
atomic.AddUint64(&h.nrChunks, 1)
|
||||
go func() {
|
||||
defer func() {
|
||||
<-h.workers
|
||||
}()
|
||||
seen, err := h.store.Put(ctx, chunk.ModePutUpload, ch)
|
||||
h.tag.Inc(chunk.StateStored)
|
||||
if seen {
|
||||
|
@ -42,8 +42,9 @@ func TestHasherStore(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
chunkStore := NewMapChunkStore()
|
||||
hasherStore := NewHasherStore(chunkStore, MakeHashFunc(DefaultHash), tt.toEncrypt, chunk.NewTag(0, "test-tag", 0))
|
||||
hasherStore := NewHasherStore(chunkStore, MakeHashFunc(DefaultHash), tt.toEncrypt, chunk.NewTag(0, "test-tag", 2))
|
||||
|
||||
// Put two random chunks into the hasherStore
|
||||
chunkData1 := GenerateRandomChunk(int64(tt.chunkLength)).Data()
|
||||
|
@ -134,7 +134,7 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) {
|
||||
if o == nil {
|
||||
// default options
|
||||
o = &Options{
|
||||
Capacity: 5000000,
|
||||
Capacity: defaultCapacity,
|
||||
}
|
||||
}
|
||||
db = &DB{
|
||||
|
@ -204,7 +204,7 @@ func (pc *PyramidChunker) decrementWorkerCount() {
|
||||
|
||||
func (pc *PyramidChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) {
|
||||
pc.wg.Add(1)
|
||||
pc.prepareChunks(ctx, false)
|
||||
go pc.prepareChunks(ctx, false)
|
||||
|
||||
// closes internal error channel if all subprocesses in the workgroup finished
|
||||
go func() {
|
||||
@ -239,7 +239,7 @@ func (pc *PyramidChunker) Append(ctx context.Context) (k Address, wait func(cont
|
||||
pc.loadTree(ctx)
|
||||
|
||||
pc.wg.Add(1)
|
||||
pc.prepareChunks(ctx, true)
|
||||
go pc.prepareChunks(ctx, true)
|
||||
|
||||
// closes internal error channel if all subprocesses in the workgroup finished
|
||||
go func() {
|
||||
@ -539,6 +539,15 @@ func (pc *PyramidChunker) buildTree(isAppend bool, ent *TreeEntry, chunkWG *sync
|
||||
if lvlCount >= pc.branches {
|
||||
endLvl = lvl + 1
|
||||
compress = true
|
||||
|
||||
// Move up the chunk level to see if there is any boundary wrapping
|
||||
for uprLvl := endLvl; uprLvl < pc.branches; uprLvl++ {
|
||||
uprLvlCount := int64(len(pc.chunkLevel[uprLvl]))
|
||||
if uprLvlCount >= pc.branches-1 {
|
||||
endLvl = endLvl + 1
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user