swarm/storage: pyramid chunker re-write (#14382)

This commit is contained in:
Zahoor Mohamed
2017-09-22 01:52:51 +05:30
committed by Felix Lange
parent 3c8656347f
commit d558a595ad
12 changed files with 1022 additions and 247 deletions

View File

@ -20,12 +20,14 @@ import (
"bytes"
"crypto/rand"
"encoding/binary"
"errors"
"fmt"
"io"
"runtime"
"sync"
"testing"
"time"
"github.com/ethereum/go-ethereum/crypto/sha3"
)
/*
@ -43,7 +45,7 @@ type chunkerTester struct {
t test
}
func (self *chunkerTester) Split(chunker Splitter, data io.Reader, size int64, chunkC chan *Chunk, swg *sync.WaitGroup, expectedError error) (key Key) {
func (self *chunkerTester) Split(chunker Splitter, data io.Reader, size int64, chunkC chan *Chunk, swg *sync.WaitGroup, expectedError error) (key Key, err error) {
// reset
self.chunks = make(map[string]*Chunk)
@ -54,13 +56,13 @@ func (self *chunkerTester) Split(chunker Splitter, data io.Reader, size int64, c
quitC := make(chan bool)
timeout := time.After(600 * time.Second)
if chunkC != nil {
go func() {
go func() error {
for {
select {
case <-timeout:
self.t.Fatalf("Join timeout error")
return errors.New(("Split timeout error"))
case <-quitC:
return
return nil
case chunk := <-chunkC:
// self.chunks = append(self.chunks, chunk)
self.chunks[chunk.Key.String()] = chunk
@ -68,22 +70,69 @@ func (self *chunkerTester) Split(chunker Splitter, data io.Reader, size int64, c
chunk.wg.Done()
}
}
}
}()
}
key, err := chunker.Split(data, size, chunkC, swg, nil)
key, err = chunker.Split(data, size, chunkC, swg, nil)
if err != nil && expectedError == nil {
self.t.Fatalf("Split error: %v", err)
} else if expectedError != nil && (err == nil || err.Error() != expectedError.Error()) {
self.t.Fatalf("Not receiving the correct error! Expected %v, received %v", expectedError, err)
err = errors.New(fmt.Sprintf("Split error: %v", err))
}
if chunkC != nil {
if swg != nil {
swg.Wait()
}
close(quitC)
}
return
return key, err
}
func (self *chunkerTester) Append(chunker Splitter, rootKey Key, data io.Reader, chunkC chan *Chunk, swg *sync.WaitGroup, expectedError error) (key Key, err error) {
quitC := make(chan bool)
timeout := time.After(60 * time.Second)
if chunkC != nil {
go func() error {
for {
select {
case <-timeout:
return errors.New(("Append timeout error"))
case <-quitC:
return nil
case chunk := <-chunkC:
if chunk != nil {
stored, success := self.chunks[chunk.Key.String()]
if !success {
// Requesting data
self.chunks[chunk.Key.String()] = chunk
if chunk.wg != nil {
chunk.wg.Done()
}
} else {
// getting data
chunk.SData = stored.SData
chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
close(chunk.C)
}
}
}
}
}()
}
key, err = chunker.Append(rootKey, data, chunkC, swg, nil)
if err != nil && expectedError == nil {
err = errors.New(fmt.Sprintf("Append error: %v", err))
}
if chunkC != nil {
if swg != nil {
swg.Wait()
}
close(quitC)
}
return key, err
}
func (self *chunkerTester) Join(chunker Chunker, key Key, c int, chunkC chan *Chunk, quitC chan bool) LazySectionReader {
@ -93,22 +142,20 @@ func (self *chunkerTester) Join(chunker Chunker, key Key, c int, chunkC chan *Ch
timeout := time.After(600 * time.Second)
i := 0
go func() {
go func() error {
for {
select {
case <-timeout:
self.t.Fatalf("Join timeout error")
return errors.New(("Join timeout error"))
case chunk, ok := <-chunkC:
if !ok {
close(quitC)
return
return nil
}
// this just mocks the behaviour of a chunk store retrieval
stored, success := self.chunks[chunk.Key.String()]
if !success {
self.t.Fatalf("not found")
return
return errors.New(("Not found"))
}
chunk.SData = stored.SData
chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
@ -136,11 +183,15 @@ func testRandomBrokenData(splitter Splitter, n int, tester *chunkerTester) {
chunkC := make(chan *Chunk, 1000)
swg := &sync.WaitGroup{}
key := tester.Split(splitter, brokendata, int64(n), chunkC, swg, fmt.Errorf("Broken reader"))
expectedError := fmt.Errorf("Broken reader")
key, err := tester.Split(splitter, brokendata, int64(n), chunkC, swg, expectedError)
if err == nil || err.Error() != expectedError.Error() {
tester.t.Fatalf("Not receiving the correct error! Expected %v, received %v", expectedError, err)
}
tester.t.Logf(" Key = %v\n", key)
}
func testRandomData(splitter Splitter, n int, tester *chunkerTester) {
func testRandomData(splitter Splitter, n int, tester *chunkerTester) Key {
if tester.inputs == nil {
tester.inputs = make(map[uint64][]byte)
}
@ -156,7 +207,10 @@ func testRandomData(splitter Splitter, n int, tester *chunkerTester) {
chunkC := make(chan *Chunk, 1000)
swg := &sync.WaitGroup{}
key := tester.Split(splitter, data, int64(n), chunkC, swg, nil)
key, err := tester.Split(splitter, data, int64(n), chunkC, swg, nil)
if err != nil {
tester.t.Fatalf(err.Error())
}
tester.t.Logf(" Key = %v\n", key)
chunkC = make(chan *Chunk, 1000)
@ -176,29 +230,145 @@ func testRandomData(splitter Splitter, n int, tester *chunkerTester) {
}
close(chunkC)
<-quitC
return key
}
func testRandomDataAppend(splitter Splitter, n, m int, tester *chunkerTester) {
if tester.inputs == nil {
tester.inputs = make(map[uint64][]byte)
}
input, found := tester.inputs[uint64(n)]
var data io.Reader
if !found {
data, input = testDataReaderAndSlice(n)
tester.inputs[uint64(n)] = input
} else {
data = io.LimitReader(bytes.NewReader(input), int64(n))
}
chunkC := make(chan *Chunk, 1000)
swg := &sync.WaitGroup{}
key, err := tester.Split(splitter, data, int64(n), chunkC, swg, nil)
if err != nil {
tester.t.Fatalf(err.Error())
}
tester.t.Logf(" Key = %v\n", key)
//create a append data stream
appendInput, found := tester.inputs[uint64(m)]
var appendData io.Reader
if !found {
appendData, appendInput = testDataReaderAndSlice(m)
tester.inputs[uint64(m)] = appendInput
} else {
appendData = io.LimitReader(bytes.NewReader(appendInput), int64(m))
}
chunkC = make(chan *Chunk, 1000)
swg = &sync.WaitGroup{}
newKey, err := tester.Append(splitter, key, appendData, chunkC, swg, nil)
if err != nil {
tester.t.Fatalf(err.Error())
}
tester.t.Logf(" NewKey = %v\n", newKey)
chunkC = make(chan *Chunk, 1000)
quitC := make(chan bool)
chunker := NewTreeChunker(NewChunkerParams())
reader := tester.Join(chunker, newKey, 0, chunkC, quitC)
newOutput := make([]byte, n+m)
r, err := reader.Read(newOutput)
if r != (n + m) {
tester.t.Fatalf("read error read: %v n = %v err = %v\n", r, n, err)
}
newInput := append(input, appendInput...)
if !bytes.Equal(newOutput, newInput) {
tester.t.Fatalf("input and output mismatch\n IN: %v\nOUT: %v\n", newInput, newOutput)
}
close(chunkC)
}
func TestSha3ForCorrectness(t *testing.T) {
tester := &chunkerTester{t: t}
size := 4096
input := make([]byte, size+8)
binary.LittleEndian.PutUint64(input[:8], uint64(size))
io.LimitReader(bytes.NewReader(input[8:]), int64(size))
rawSha3 := sha3.NewKeccak256()
rawSha3.Reset()
rawSha3.Write(input)
rawSha3Output := rawSha3.Sum(nil)
sha3FromMakeFunc := MakeHashFunc(SHA3Hash)()
sha3FromMakeFunc.ResetWithLength(input[:8])
sha3FromMakeFunc.Write(input[8:])
sha3FromMakeFuncOutput := sha3FromMakeFunc.Sum(nil)
if len(rawSha3Output) != len(sha3FromMakeFuncOutput) {
tester.t.Fatalf("Original SHA3 and abstracted Sha3 has different length %v:%v\n", len(rawSha3Output), len(sha3FromMakeFuncOutput))
}
if !bytes.Equal(rawSha3Output, sha3FromMakeFuncOutput) {
tester.t.Fatalf("Original SHA3 and abstracted Sha3 mismatch %v:%v\n", rawSha3Output, sha3FromMakeFuncOutput)
}
}
func TestDataAppend(t *testing.T) {
sizes := []int{1, 1, 1, 4095, 4096, 4097, 1, 1, 1, 123456, 2345678, 2345678}
appendSizes := []int{4095, 4096, 4097, 1, 1, 1, 8191, 8192, 8193, 9000, 3000, 5000}
tester := &chunkerTester{t: t}
chunker := NewPyramidChunker(NewChunkerParams())
for i, s := range sizes {
testRandomDataAppend(chunker, s, appendSizes[i], tester)
}
}
func TestRandomData(t *testing.T) {
// sizes := []int{123456}
sizes := []int{1, 60, 83, 179, 253, 1024, 4095, 4096, 4097, 8191, 8192, 8193, 123456, 2345678}
sizes := []int{1, 60, 83, 179, 253, 1024, 4095, 4096, 4097, 8191, 8192, 8193, 12287, 12288, 12289, 123456, 2345678}
tester := &chunkerTester{t: t}
chunker := NewTreeChunker(NewChunkerParams())
for _, s := range sizes {
testRandomData(chunker, s, tester)
}
pyramid := NewPyramidChunker(NewChunkerParams())
for _, s := range sizes {
testRandomData(pyramid, s, tester)
treeChunkerKey := testRandomData(chunker, s, tester)
pyramidChunkerKey := testRandomData(pyramid, s, tester)
if treeChunkerKey.String() != pyramidChunkerKey.String() {
tester.t.Fatalf("tree chunker and pyramid chunker key mismatch for size %v\n TC: %v\n PC: %v\n", s, treeChunkerKey.String(), pyramidChunkerKey.String())
}
}
cp := NewChunkerParams()
cp.Hash = BMTHash
chunker = NewTreeChunker(cp)
pyramid = NewPyramidChunker(cp)
for _, s := range sizes {
treeChunkerKey := testRandomData(chunker, s, tester)
pyramidChunkerKey := testRandomData(pyramid, s, tester)
if treeChunkerKey.String() != pyramidChunkerKey.String() {
tester.t.Fatalf("tree chunker BMT and pyramid chunker BMT key mismatch for size %v \n TC: %v\n PC: %v\n", s, treeChunkerKey.String(), pyramidChunkerKey.String())
}
}
}
func TestRandomBrokenData(t *testing.T) {
sizes := []int{1, 60, 83, 179, 253, 1024, 4095, 4096, 4097, 8191, 8192, 8193, 123456, 2345678}
sizes := []int{1, 60, 83, 179, 253, 1024, 4095, 4096, 4097, 8191, 8192, 8193, 12287, 12288, 12289, 123456, 2345678}
tester := &chunkerTester{t: t}
chunker := NewTreeChunker(NewChunkerParams())
for _, s := range sizes {
testRandomBrokenData(chunker, s, tester)
t.Logf("done size: %v", s)
}
}
@ -220,45 +390,100 @@ func benchmarkJoin(n int, t *testing.B) {
chunkC := make(chan *Chunk, 1000)
swg := &sync.WaitGroup{}
key := tester.Split(chunker, data, int64(n), chunkC, swg, nil)
// t.StartTimer()
key, err := tester.Split(chunker, data, int64(n), chunkC, swg, nil)
if err != nil {
tester.t.Fatalf(err.Error())
}
chunkC = make(chan *Chunk, 1000)
quitC := make(chan bool)
reader := tester.Join(chunker, key, i, chunkC, quitC)
benchReadAll(reader)
close(chunkC)
<-quitC
// t.StopTimer()
}
stats := new(runtime.MemStats)
runtime.ReadMemStats(stats)
fmt.Println(stats.Sys)
}
func benchmarkSplitTree(n int, t *testing.B) {
func benchmarkSplitTreeSHA3(n int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
chunker := NewTreeChunker(NewChunkerParams())
tester := &chunkerTester{t: t}
data := testDataReader(n)
tester.Split(chunker, data, int64(n), nil, nil, nil)
_, err := tester.Split(chunker, data, int64(n), nil, nil, nil)
if err != nil {
tester.t.Fatalf(err.Error())
}
}
stats := new(runtime.MemStats)
runtime.ReadMemStats(stats)
fmt.Println(stats.Sys)
}
func benchmarkSplitPyramid(n int, t *testing.B) {
func benchmarkSplitTreeBMT(n int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
cp := NewChunkerParams()
cp.Hash = BMTHash
chunker := NewTreeChunker(cp)
tester := &chunkerTester{t: t}
data := testDataReader(n)
_, err := tester.Split(chunker, data, int64(n), nil, nil, nil)
if err != nil {
tester.t.Fatalf(err.Error())
}
}
}
func benchmarkSplitPyramidSHA3(n int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
splitter := NewPyramidChunker(NewChunkerParams())
tester := &chunkerTester{t: t}
data := testDataReader(n)
tester.Split(splitter, data, int64(n), nil, nil, nil)
_, err := tester.Split(splitter, data, int64(n), nil, nil, nil)
if err != nil {
tester.t.Fatalf(err.Error())
}
}
}
func benchmarkSplitPyramidBMT(n int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
cp := NewChunkerParams()
cp.Hash = BMTHash
splitter := NewPyramidChunker(cp)
tester := &chunkerTester{t: t}
data := testDataReader(n)
_, err := tester.Split(splitter, data, int64(n), nil, nil, nil)
if err != nil {
tester.t.Fatalf(err.Error())
}
}
}
func benchmarkAppendPyramid(n, m int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
chunker := NewPyramidChunker(NewChunkerParams())
tester := &chunkerTester{t: t}
data := testDataReader(n)
data1 := testDataReader(m)
chunkC := make(chan *Chunk, 1000)
swg := &sync.WaitGroup{}
key, err := tester.Split(chunker, data, int64(n), chunkC, swg, nil)
if err != nil {
tester.t.Fatalf(err.Error())
}
chunkC = make(chan *Chunk, 1000)
swg = &sync.WaitGroup{}
_, err = tester.Append(chunker, key, data1, chunkC, swg, nil)
if err != nil {
tester.t.Fatalf(err.Error())
}
close(chunkC)
}
stats := new(runtime.MemStats)
runtime.ReadMemStats(stats)
fmt.Println(stats.Sys)
}
func BenchmarkJoin_2(t *testing.B) { benchmarkJoin(100, t) }
@ -269,26 +494,59 @@ func BenchmarkJoin_6(t *testing.B) { benchmarkJoin(1000000, t) }
func BenchmarkJoin_7(t *testing.B) { benchmarkJoin(10000000, t) }
func BenchmarkJoin_8(t *testing.B) { benchmarkJoin(100000000, t) }
func BenchmarkSplitTree_2(t *testing.B) { benchmarkSplitTree(100, t) }
func BenchmarkSplitTree_2h(t *testing.B) { benchmarkSplitTree(500, t) }
func BenchmarkSplitTree_3(t *testing.B) { benchmarkSplitTree(1000, t) }
func BenchmarkSplitTree_3h(t *testing.B) { benchmarkSplitTree(5000, t) }
func BenchmarkSplitTree_4(t *testing.B) { benchmarkSplitTree(10000, t) }
func BenchmarkSplitTree_4h(t *testing.B) { benchmarkSplitTree(50000, t) }
func BenchmarkSplitTree_5(t *testing.B) { benchmarkSplitTree(100000, t) }
func BenchmarkSplitTree_6(t *testing.B) { benchmarkSplitTree(1000000, t) }
func BenchmarkSplitTree_7(t *testing.B) { benchmarkSplitTree(10000000, t) }
func BenchmarkSplitTree_8(t *testing.B) { benchmarkSplitTree(100000000, t) }
func BenchmarkSplitTreeSHA3_2(t *testing.B) { benchmarkSplitTreeSHA3(100, t) }
func BenchmarkSplitTreeSHA3_2h(t *testing.B) { benchmarkSplitTreeSHA3(500, t) }
func BenchmarkSplitTreeSHA3_3(t *testing.B) { benchmarkSplitTreeSHA3(1000, t) }
func BenchmarkSplitTreeSHA3_3h(t *testing.B) { benchmarkSplitTreeSHA3(5000, t) }
func BenchmarkSplitTreeSHA3_4(t *testing.B) { benchmarkSplitTreeSHA3(10000, t) }
func BenchmarkSplitTreeSHA3_4h(t *testing.B) { benchmarkSplitTreeSHA3(50000, t) }
func BenchmarkSplitTreeSHA3_5(t *testing.B) { benchmarkSplitTreeSHA3(100000, t) }
func BenchmarkSplitTreeSHA3_6(t *testing.B) { benchmarkSplitTreeSHA3(1000000, t) }
func BenchmarkSplitTreeSHA3_7(t *testing.B) { benchmarkSplitTreeSHA3(10000000, t) }
func BenchmarkSplitTreeSHA3_8(t *testing.B) { benchmarkSplitTreeSHA3(100000000, t) }
func BenchmarkSplitPyramid_2(t *testing.B) { benchmarkSplitPyramid(100, t) }
func BenchmarkSplitPyramid_2h(t *testing.B) { benchmarkSplitPyramid(500, t) }
func BenchmarkSplitPyramid_3(t *testing.B) { benchmarkSplitPyramid(1000, t) }
func BenchmarkSplitPyramid_3h(t *testing.B) { benchmarkSplitPyramid(5000, t) }
func BenchmarkSplitPyramid_4(t *testing.B) { benchmarkSplitPyramid(10000, t) }
func BenchmarkSplitPyramid_4h(t *testing.B) { benchmarkSplitPyramid(50000, t) }
func BenchmarkSplitPyramid_5(t *testing.B) { benchmarkSplitPyramid(100000, t) }
func BenchmarkSplitPyramid_6(t *testing.B) { benchmarkSplitPyramid(1000000, t) }
func BenchmarkSplitPyramid_7(t *testing.B) { benchmarkSplitPyramid(10000000, t) }
func BenchmarkSplitPyramid_8(t *testing.B) { benchmarkSplitPyramid(100000000, t) }
func BenchmarkSplitTreeBMT_2(t *testing.B) { benchmarkSplitTreeBMT(100, t) }
func BenchmarkSplitTreeBMT_2h(t *testing.B) { benchmarkSplitTreeBMT(500, t) }
func BenchmarkSplitTreeBMT_3(t *testing.B) { benchmarkSplitTreeBMT(1000, t) }
func BenchmarkSplitTreeBMT_3h(t *testing.B) { benchmarkSplitTreeBMT(5000, t) }
func BenchmarkSplitTreeBMT_4(t *testing.B) { benchmarkSplitTreeBMT(10000, t) }
func BenchmarkSplitTreeBMT_4h(t *testing.B) { benchmarkSplitTreeBMT(50000, t) }
func BenchmarkSplitTreeBMT_5(t *testing.B) { benchmarkSplitTreeBMT(100000, t) }
func BenchmarkSplitTreeBMT_6(t *testing.B) { benchmarkSplitTreeBMT(1000000, t) }
func BenchmarkSplitTreeBMT_7(t *testing.B) { benchmarkSplitTreeBMT(10000000, t) }
func BenchmarkSplitTreeBMT_8(t *testing.B) { benchmarkSplitTreeBMT(100000000, t) }
// godep go test -bench ./swarm/storage -cpuprofile cpu.out -memprofile mem.out
func BenchmarkSplitPyramidSHA3_2(t *testing.B) { benchmarkSplitPyramidSHA3(100, t) }
func BenchmarkSplitPyramidSHA3_2h(t *testing.B) { benchmarkSplitPyramidSHA3(500, t) }
func BenchmarkSplitPyramidSHA3_3(t *testing.B) { benchmarkSplitPyramidSHA3(1000, t) }
func BenchmarkSplitPyramidSHA3_3h(t *testing.B) { benchmarkSplitPyramidSHA3(5000, t) }
func BenchmarkSplitPyramidSHA3_4(t *testing.B) { benchmarkSplitPyramidSHA3(10000, t) }
func BenchmarkSplitPyramidSHA3_4h(t *testing.B) { benchmarkSplitPyramidSHA3(50000, t) }
func BenchmarkSplitPyramidSHA3_5(t *testing.B) { benchmarkSplitPyramidSHA3(100000, t) }
func BenchmarkSplitPyramidSHA3_6(t *testing.B) { benchmarkSplitPyramidSHA3(1000000, t) }
func BenchmarkSplitPyramidSHA3_7(t *testing.B) { benchmarkSplitPyramidSHA3(10000000, t) }
func BenchmarkSplitPyramidSHA3_8(t *testing.B) { benchmarkSplitPyramidSHA3(100000000, t) }
func BenchmarkSplitPyramidBMT_2(t *testing.B) { benchmarkSplitPyramidBMT(100, t) }
func BenchmarkSplitPyramidBMT_2h(t *testing.B) { benchmarkSplitPyramidBMT(500, t) }
func BenchmarkSplitPyramidBMT_3(t *testing.B) { benchmarkSplitPyramidBMT(1000, t) }
func BenchmarkSplitPyramidBMT_3h(t *testing.B) { benchmarkSplitPyramidBMT(5000, t) }
func BenchmarkSplitPyramidBMT_4(t *testing.B) { benchmarkSplitPyramidBMT(10000, t) }
func BenchmarkSplitPyramidBMT_4h(t *testing.B) { benchmarkSplitPyramidBMT(50000, t) }
func BenchmarkSplitPyramidBMT_5(t *testing.B) { benchmarkSplitPyramidBMT(100000, t) }
func BenchmarkSplitPyramidBMT_6(t *testing.B) { benchmarkSplitPyramidBMT(1000000, t) }
func BenchmarkSplitPyramidBMT_7(t *testing.B) { benchmarkSplitPyramidBMT(10000000, t) }
func BenchmarkSplitPyramidBMT_8(t *testing.B) { benchmarkSplitPyramidBMT(100000000, t) }
func BenchmarkAppendPyramid_2(t *testing.B) { benchmarkAppendPyramid(100, 1000, t) }
func BenchmarkAppendPyramid_2h(t *testing.B) { benchmarkAppendPyramid(500, 1000, t) }
func BenchmarkAppendPyramid_3(t *testing.B) { benchmarkAppendPyramid(1000, 1000, t) }
func BenchmarkAppendPyramid_4(t *testing.B) { benchmarkAppendPyramid(10000, 1000, t) }
func BenchmarkAppendPyramid_4h(t *testing.B) { benchmarkAppendPyramid(50000, 1000, t) }
func BenchmarkAppendPyramid_5(t *testing.B) { benchmarkAppendPyramid(1000000, 1000, t) }
func BenchmarkAppendPyramid_6(t *testing.B) { benchmarkAppendPyramid(1000000, 1000, t) }
func BenchmarkAppendPyramid_7(t *testing.B) { benchmarkAppendPyramid(10000000, 1000, t) }
func BenchmarkAppendPyramid_8(t *testing.B) { benchmarkAppendPyramid(100000000, 1000, t) }
// go test -timeout 20m -cpu 4 -bench=./swarm/storage -run no
// If you dont add the timeout argument above .. the benchmark will timeout and dump