swarm: codebase split from go-ethereum (#1405)

This commit is contained in:
Rafael Matias
2019-06-03 12:28:18 +02:00
committed by Anton Evangelatov
parent 7a22da98b9
commit b046760db1
1540 changed files with 4654 additions and 129393 deletions

612
storage/chunker.go Normal file
View File

@ -0,0 +1,612 @@
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"sync"
"time"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/log"
"github.com/ethersphere/swarm/spancontext"
opentracing "github.com/opentracing/opentracing-go"
olog "github.com/opentracing/opentracing-go/log"
)
/*
The distributed storage implemented in this package requires fix sized chunks of content.
Chunker is the interface to a component that is responsible for disassembling and assembling larger data.
TreeChunker implements a Chunker based on a tree structure defined as follows:
1 each node in the tree including the root and other branching nodes are stored as a chunk.
2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children :
data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
3 Leaf nodes encode an actual subslice of the input data.
4 if data size is not more than maximum chunksize, the data is stored in a single chunk
key = hash(int64(size) + data)
5 if data size is more than chunksize*branches^l, but no more than chunksize*
branches^(l+1), the data vector is split into slices of chunksize*
branches^l length (except the last one).
key = hash(int64(size) + key(slice0) + key(slice1) + ...)
The underlying hash function is configurable
*/
/*
Tree chunker is a concrete implementation of data chunking.
This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree.
If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering.
The hashing itself does use extra copies and allocation though, since it does need it.
*/
type ChunkerParams struct {
chunkSize int64
hashSize int64
}
type SplitterParams struct {
ChunkerParams
reader io.Reader
putter Putter
addr Address
}
type TreeSplitterParams struct {
SplitterParams
size int64
}
type JoinerParams struct {
ChunkerParams
addr Address
getter Getter
// TODO: there is a bug, so depth can only be 0 today, see: https://github.com/ethersphere/go-ethereum/issues/344
depth int
ctx context.Context
}
type TreeChunker struct {
ctx context.Context
branches int64
dataSize int64
data io.Reader
// calculated
addr Address
depth int
hashSize int64 // self.hashFunc.New().Size()
chunkSize int64 // hashSize* branches
workerCount int64 // the number of worker routines used
workerLock sync.RWMutex // lock for the worker count
jobC chan *hashJob
wg *sync.WaitGroup
putter Putter
getter Getter
errC chan error
quitC chan bool
}
/*
Join reconstructs original content based on a root key.
When joining, the caller gets returned a Lazy SectionReader, which is
seekable and implements on-demand fetching of chunks as and where it is read.
New chunks to retrieve are coming from the getter, which the caller provides.
If an error is encountered during joining, it appears as a reader error.
The SectionReader.
As a result, partial reads from a document are possible even if other parts
are corrupt or lost.
The chunks are not meant to be validated by the chunker when joining. This
is because it is left to the DPA to decide which sources are trusted.
*/
func TreeJoin(ctx context.Context, addr Address, getter Getter, depth int) *LazyChunkReader {
jp := &JoinerParams{
ChunkerParams: ChunkerParams{
chunkSize: chunk.DefaultSize,
hashSize: int64(len(addr)),
},
addr: addr,
getter: getter,
depth: depth,
ctx: ctx,
}
return NewTreeJoiner(jp).Join(ctx)
}
/*
When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes.
New chunks to store are store using the putter which the caller provides.
*/
func TreeSplit(ctx context.Context, data io.Reader, size int64, putter Putter) (k Address, wait func(context.Context) error, err error) {
tsp := &TreeSplitterParams{
SplitterParams: SplitterParams{
ChunkerParams: ChunkerParams{
chunkSize: chunk.DefaultSize,
hashSize: putter.RefSize(),
},
reader: data,
putter: putter,
},
size: size,
}
return NewTreeSplitter(tsp).Split(ctx)
}
func NewTreeJoiner(params *JoinerParams) *TreeChunker {
tc := &TreeChunker{}
tc.hashSize = params.hashSize
tc.branches = params.chunkSize / params.hashSize
tc.addr = params.addr
tc.getter = params.getter
tc.depth = params.depth
tc.chunkSize = params.chunkSize
tc.workerCount = 0
tc.jobC = make(chan *hashJob, 2*ChunkProcessors)
tc.wg = &sync.WaitGroup{}
tc.errC = make(chan error)
tc.quitC = make(chan bool)
tc.ctx = params.ctx
return tc
}
func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker {
tc := &TreeChunker{}
tc.data = params.reader
tc.dataSize = params.size
tc.hashSize = params.hashSize
tc.branches = params.chunkSize / params.hashSize
tc.addr = params.addr
tc.chunkSize = params.chunkSize
tc.putter = params.putter
tc.workerCount = 0
tc.jobC = make(chan *hashJob, 2*ChunkProcessors)
tc.wg = &sync.WaitGroup{}
tc.errC = make(chan error)
tc.quitC = make(chan bool)
return tc
}
type hashJob struct {
key Address
chunk []byte
size int64
parentWg *sync.WaitGroup
}
func (tc *TreeChunker) incrementWorkerCount() {
tc.workerLock.Lock()
defer tc.workerLock.Unlock()
tc.workerCount += 1
}
func (tc *TreeChunker) getWorkerCount() int64 {
tc.workerLock.RLock()
defer tc.workerLock.RUnlock()
return tc.workerCount
}
func (tc *TreeChunker) decrementWorkerCount() {
tc.workerLock.Lock()
defer tc.workerLock.Unlock()
tc.workerCount -= 1
}
func (tc *TreeChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) {
if tc.chunkSize <= 0 {
panic("chunker must be initialised")
}
tc.runWorker(ctx)
depth := 0
treeSize := tc.chunkSize
// takes lowest depth such that chunksize*HashCount^(depth+1) > size
// power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree.
for ; treeSize < tc.dataSize; treeSize *= tc.branches {
depth++
}
key := make([]byte, tc.hashSize)
// this waitgroup member is released after the root hash is calculated
tc.wg.Add(1)
//launch actual recursive function passing the waitgroups
go tc.split(ctx, depth, treeSize/tc.branches, key, tc.dataSize, tc.wg)
// closes internal error channel if all subprocesses in the workgroup finished
go func() {
// waiting for all threads to finish
tc.wg.Wait()
close(tc.errC)
}()
defer close(tc.quitC)
defer tc.putter.Close()
select {
case err := <-tc.errC:
if err != nil {
return nil, nil, err
}
case <-ctx.Done():
return nil, nil, ctx.Err()
}
return key, tc.putter.Wait, nil
}
func (tc *TreeChunker) split(ctx context.Context, depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) {
//
for depth > 0 && size < treeSize {
treeSize /= tc.branches
depth--
}
if depth == 0 {
// leaf nodes -> content chunks
chunkData := make([]byte, size+8)
binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size))
var readBytes int64
for readBytes < size {
n, err := tc.data.Read(chunkData[8+readBytes:])
readBytes += int64(n)
if err != nil && !(err == io.EOF && readBytes == size) {
tc.errC <- err
return
}
}
select {
case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}:
case <-tc.quitC:
}
return
}
// dept > 0
// intermediate chunk containing child nodes hashes
branchCnt := (size + treeSize - 1) / treeSize
var chunk = make([]byte, branchCnt*tc.hashSize+8)
var pos, i int64
binary.LittleEndian.PutUint64(chunk[0:8], uint64(size))
childrenWg := &sync.WaitGroup{}
var secSize int64
for i < branchCnt {
// the last item can have shorter data
if size-pos < treeSize {
secSize = size - pos
} else {
secSize = treeSize
}
// the hash of that data
subTreeAddress := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize]
childrenWg.Add(1)
tc.split(ctx, depth-1, treeSize/tc.branches, subTreeAddress, secSize, childrenWg)
i++
pos += treeSize
}
// wait for all the children to complete calculating their hashes and copying them onto sections of the chunk
// parentWg.Add(1)
// go func() {
childrenWg.Wait()
worker := tc.getWorkerCount()
if int64(len(tc.jobC)) > worker && worker < ChunkProcessors {
tc.runWorker(ctx)
}
select {
case tc.jobC <- &hashJob{addr, chunk, size, parentWg}:
case <-tc.quitC:
}
}
func (tc *TreeChunker) runWorker(ctx context.Context) {
tc.incrementWorkerCount()
go func() {
defer tc.decrementWorkerCount()
for {
select {
case job, ok := <-tc.jobC:
if !ok {
return
}
h, err := tc.putter.Put(ctx, job.chunk)
if err != nil {
tc.errC <- err
return
}
copy(job.key, h)
job.parentWg.Done()
case <-tc.quitC:
return
}
}
}()
}
// LazyChunkReader implements LazySectionReader
type LazyChunkReader struct {
ctx context.Context
addr Address // root address
chunkData ChunkData
off int64 // offset
chunkSize int64 // inherit from chunker
branches int64 // inherit from chunker
hashSize int64 // inherit from chunker
depth int
getter Getter
}
func (tc *TreeChunker) Join(ctx context.Context) *LazyChunkReader {
return &LazyChunkReader{
addr: tc.addr,
chunkSize: tc.chunkSize,
branches: tc.branches,
hashSize: tc.hashSize,
depth: tc.depth,
getter: tc.getter,
ctx: tc.ctx,
}
}
func (r *LazyChunkReader) Context() context.Context {
return r.ctx
}
// Size is meant to be called on the LazySectionReader
func (r *LazyChunkReader) Size(ctx context.Context, quitC chan bool) (n int64, err error) {
metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1)
var sp opentracing.Span
var cctx context.Context
cctx, sp = spancontext.StartSpan(
ctx,
"lcr.size")
defer sp.Finish()
log.Debug("lazychunkreader.size", "addr", r.addr)
if r.chunkData == nil {
startTime := time.Now()
chunkData, err := r.getter.Get(cctx, Reference(r.addr))
if err != nil {
metrics.GetOrRegisterResettingTimer("lcr.getter.get.err", nil).UpdateSince(startTime)
return 0, err
}
metrics.GetOrRegisterResettingTimer("lcr.getter.get", nil).UpdateSince(startTime)
r.chunkData = chunkData
}
s := r.chunkData.Size()
log.Debug("lazychunkreader.size", "key", r.addr, "size", s)
return int64(s), nil
}
// read at can be called numerous times
// concurrent reads are allowed
// Size() needs to be called synchronously on the LazyChunkReader first
func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) {
metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1)
var sp opentracing.Span
var cctx context.Context
cctx, sp = spancontext.StartSpan(
r.ctx,
"lcr.read")
defer sp.Finish()
defer func() {
sp.LogFields(
olog.Int("off", int(off)),
olog.Int("read", read))
}()
// this is correct, a swarm doc cannot be zero length, so no EOF is expected
if len(b) == 0 {
return 0, nil
}
quitC := make(chan bool)
size, err := r.Size(cctx, quitC)
if err != nil {
log.Debug("lazychunkreader.readat.size", "size", size, "err", err)
return 0, err
}
errC := make(chan error)
// }
var treeSize int64
var depth int
// calculate depth and max treeSize
treeSize = r.chunkSize
for ; treeSize < size; treeSize *= r.branches {
depth++
}
wg := sync.WaitGroup{}
length := int64(len(b))
for d := 0; d < r.depth; d++ {
off *= r.chunkSize
length *= r.chunkSize
}
wg.Add(1)
go r.join(cctx, b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC)
go func() {
wg.Wait()
close(errC)
}()
err = <-errC
if err != nil {
log.Debug("lazychunkreader.readat.errc", "err", err)
close(quitC)
return 0, err
}
if off+int64(len(b)) >= size {
log.Debug("lazychunkreader.readat.return at end", "size", size, "off", off)
return int(size - off), io.EOF
}
log.Debug("lazychunkreader.readat.errc", "buff", len(b))
return len(b), nil
}
func (r *LazyChunkReader) join(ctx context.Context, b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) {
defer parentWg.Done()
// find appropriate block level
for chunkData.Size() < uint64(treeSize) && depth > r.depth {
treeSize /= r.branches
depth--
}
// leaf chunk found
if depth == r.depth {
extra := 8 + eoff - int64(len(chunkData))
if extra > 0 {
eoff -= extra
}
copy(b, chunkData[8+off:8+eoff])
return // simply give back the chunks reader for content chunks
}
// subtree
start := off / treeSize
end := (eoff + treeSize - 1) / treeSize
// last non-leaf chunk can be shorter than default chunk size, let's not read it further then its end
currentBranches := int64(len(chunkData)-8) / r.hashSize
if end > currentBranches {
end = currentBranches
}
wg := &sync.WaitGroup{}
defer wg.Wait()
for i := start; i < end; i++ {
soff := i * treeSize
roff := soff
seoff := soff + treeSize
if soff < off {
soff = off
}
if seoff > eoff {
seoff = eoff
}
if depth > 1 {
wg.Wait()
}
wg.Add(1)
go func(j int64) {
childAddress := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize]
startTime := time.Now()
chunkData, err := r.getter.Get(ctx, Reference(childAddress))
if err != nil {
metrics.GetOrRegisterResettingTimer("lcr.getter.get.err", nil).UpdateSince(startTime)
select {
case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childAddress)):
case <-quitC:
}
return
}
metrics.GetOrRegisterResettingTimer("lcr.getter.get", nil).UpdateSince(startTime)
if l := len(chunkData); l < 9 {
select {
case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childAddress), l):
case <-quitC:
}
return
}
if soff < off {
soff = off
}
r.join(ctx, b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC)
}(i)
} //for
}
// Read keeps a cursor so cannot be called simulateously, see ReadAt
func (r *LazyChunkReader) Read(b []byte) (read int, err error) {
log.Trace("lazychunkreader.read", "key", r.addr)
metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1)
read, err = r.ReadAt(b, r.off)
if err != nil && err != io.EOF {
log.Trace("lazychunkreader.readat", "read", read, "err", err)
metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1)
}
metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read))
r.off += int64(read)
return read, err
}
// completely analogous to standard SectionReader implementation
var errWhence = errors.New("Seek: invalid whence")
var errOffset = errors.New("Seek: invalid offset")
func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) {
cctx, sp := spancontext.StartSpan(
r.ctx,
"lcr.seek")
defer sp.Finish()
log.Debug("lazychunkreader.seek", "key", r.addr, "offset", offset)
switch whence {
default:
return 0, errWhence
case 0:
offset += 0
case 1:
offset += r.off
case 2:
if r.chunkData == nil { //seek from the end requires rootchunk for size. call Size first
_, err := r.Size(cctx, nil)
if err != nil {
return 0, fmt.Errorf("can't get size: %v", err)
}
}
offset += int64(r.chunkData.Size())
}
if offset < 0 {
return 0, errOffset
}
r.off = offset
return offset, nil
}

470
storage/chunker_test.go Normal file
View File

@ -0,0 +1,470 @@
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"io"
"testing"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/testutil"
"golang.org/x/crypto/sha3"
)
/*
Tests TreeChunker by splitting and joining a random byte slice
*/
type test interface {
Fatalf(string, ...interface{})
Logf(string, ...interface{})
}
type chunkerTester struct {
inputs map[uint64][]byte
t test
}
var mockTag = chunk.NewTag(0, "mock-tag", 0)
func newTestHasherStore(store ChunkStore, hash string) *hasherStore {
return NewHasherStore(store, MakeHashFunc(hash), false, chunk.NewTag(0, "test-tag", 0))
}
func testRandomBrokenData(n int, tester *chunkerTester) {
data := testutil.RandomReader(1, n)
brokendata := brokenLimitReader(data, n, n/2)
buf := make([]byte, n)
_, err := brokendata.Read(buf)
if err == nil || err.Error() != "Broken reader" {
tester.t.Fatalf("Broken reader is not broken, hence broken. Returns: %v", err)
}
data = testutil.RandomReader(2, n)
brokendata = brokenLimitReader(data, n, n/2)
putGetter := newTestHasherStore(NewMapChunkStore(), SHA3Hash)
expectedError := fmt.Errorf("Broken reader")
ctx := context.Background()
key, _, err := TreeSplit(ctx, brokendata, int64(n), putGetter)
if err == nil || err.Error() != expectedError.Error() {
tester.t.Fatalf("Not receiving the correct error! Expected %v, received %v", expectedError, err)
}
tester.t.Logf(" Address = %v\n", key)
}
func testRandomData(usePyramid bool, hash string, n int, tester *chunkerTester) Address {
if tester.inputs == nil {
tester.inputs = make(map[uint64][]byte)
}
input, found := tester.inputs[uint64(n)]
var data io.Reader
if !found {
input = testutil.RandomBytes(1, n)
data = bytes.NewReader(input)
tester.inputs[uint64(n)] = input
} else {
data = io.LimitReader(bytes.NewReader(input), int64(n))
}
putGetter := newTestHasherStore(NewMapChunkStore(), hash)
var addr Address
var wait func(context.Context) error
var err error
ctx := context.TODO()
if usePyramid {
addr, wait, err = PyramidSplit(ctx, data, putGetter, putGetter, mockTag)
} else {
addr, wait, err = TreeSplit(ctx, data, int64(n), putGetter)
}
if err != nil {
tester.t.Fatalf(err.Error())
}
tester.t.Logf(" Address = %v\n", addr)
err = wait(ctx)
if err != nil {
tester.t.Fatalf(err.Error())
}
reader := TreeJoin(ctx, addr, putGetter, 0)
output := make([]byte, n)
r, err := reader.Read(output)
if r != n || err != io.EOF {
tester.t.Fatalf("read error read: %v n = %v err = %v\n", r, n, err)
}
if input != nil {
if !bytes.Equal(output, input) {
tester.t.Fatalf("input and output mismatch\n IN: %v\nOUT: %v\n", input, output)
}
}
// testing partial read
for i := 1; i < n; i += 10000 {
readableLength := n - i
r, err := reader.ReadAt(output, int64(i))
if r != readableLength || err != io.EOF {
tester.t.Fatalf("readAt error with offset %v read: %v n = %v err = %v\n", i, r, readableLength, err)
}
if input != nil {
if !bytes.Equal(output[:readableLength], input[i:]) {
tester.t.Fatalf("input and output mismatch\n IN: %v\nOUT: %v\n", input[i:], output[:readableLength])
}
}
}
return addr
}
func TestSha3ForCorrectness(t *testing.T) {
tester := &chunkerTester{t: t}
size := 4096
input := make([]byte, size+8)
binary.LittleEndian.PutUint64(input[:8], uint64(size))
io.LimitReader(bytes.NewReader(input[8:]), int64(size))
rawSha3 := sha3.NewLegacyKeccak256()
rawSha3.Reset()
rawSha3.Write(input)
rawSha3Output := rawSha3.Sum(nil)
sha3FromMakeFunc := MakeHashFunc(SHA3Hash)()
sha3FromMakeFunc.ResetWithLength(input[:8])
sha3FromMakeFunc.Write(input[8:])
sha3FromMakeFuncOutput := sha3FromMakeFunc.Sum(nil)
if len(rawSha3Output) != len(sha3FromMakeFuncOutput) {
tester.t.Fatalf("Original SHA3 and abstracted Sha3 has different length %v:%v\n", len(rawSha3Output), len(sha3FromMakeFuncOutput))
}
if !bytes.Equal(rawSha3Output, sha3FromMakeFuncOutput) {
tester.t.Fatalf("Original SHA3 and abstracted Sha3 mismatch %v:%v\n", rawSha3Output, sha3FromMakeFuncOutput)
}
}
func TestDataAppend(t *testing.T) {
sizes := []int{1, 1, 1, 4095, 4096, 4097, 1, 1, 1, 123456, 2345678, 2345678}
appendSizes := []int{4095, 4096, 4097, 1, 1, 1, 8191, 8192, 8193, 9000, 3000, 5000}
tester := &chunkerTester{t: t}
for i := range sizes {
n := sizes[i]
m := appendSizes[i]
if tester.inputs == nil {
tester.inputs = make(map[uint64][]byte)
}
input, found := tester.inputs[uint64(n)]
var data io.Reader
if !found {
input = testutil.RandomBytes(i, n)
data = bytes.NewReader(input)
tester.inputs[uint64(n)] = input
} else {
data = io.LimitReader(bytes.NewReader(input), int64(n))
}
store := NewMapChunkStore()
putGetter := newTestHasherStore(store, SHA3Hash)
ctx := context.TODO()
addr, wait, err := PyramidSplit(ctx, data, putGetter, putGetter, mockTag)
if err != nil {
tester.t.Fatalf(err.Error())
}
err = wait(ctx)
if err != nil {
tester.t.Fatalf(err.Error())
}
//create a append data stream
appendInput, found := tester.inputs[uint64(m)]
var appendData io.Reader
if !found {
appendInput = testutil.RandomBytes(i, m)
appendData = bytes.NewReader(appendInput)
tester.inputs[uint64(m)] = appendInput
} else {
appendData = io.LimitReader(bytes.NewReader(appendInput), int64(m))
}
putGetter = newTestHasherStore(store, SHA3Hash)
newAddr, wait, err := PyramidAppend(ctx, addr, appendData, putGetter, putGetter, mockTag)
if err != nil {
tester.t.Fatalf(err.Error())
}
err = wait(ctx)
if err != nil {
tester.t.Fatalf(err.Error())
}
reader := TreeJoin(ctx, newAddr, putGetter, 0)
newOutput := make([]byte, n+m)
r, err := reader.Read(newOutput)
if r != (n + m) {
tester.t.Fatalf("read error read: %v n = %v m = %v err = %v\n", r, n, m, err)
}
newInput := append(input, appendInput...)
if !bytes.Equal(newOutput, newInput) {
tester.t.Fatalf("input and output mismatch\n IN: %v\nOUT: %v\n", newInput, newOutput)
}
}
}
func TestRandomData(t *testing.T) {
// This test can validate files up to a relatively short length, as tree chunker slows down drastically.
// Validation of longer files is done by TestLocalStoreAndRetrieve in swarm package.
//sizes := []int{1, 60, 83, 179, 253, 1024, 4095, 4096, 4097, 8191, 8192, 8193, 12287, 12288, 12289, 524288, 524288 + 1, 524288 + 4097, 7 * 524288, 7*524288 + 1, 7*524288 + 4097}
sizes := []int{1, 60, 83, 179, 253, 1024, 4095, 4097, 8191, 8192, 12288, 12289, 524288}
tester := &chunkerTester{t: t}
for _, s := range sizes {
treeChunkerAddress := testRandomData(false, SHA3Hash, s, tester)
pyramidChunkerAddress := testRandomData(true, SHA3Hash, s, tester)
if treeChunkerAddress.String() != pyramidChunkerAddress.String() {
tester.t.Fatalf("tree chunker and pyramid chunker key mismatch for size %v\n TC: %v\n PC: %v\n", s, treeChunkerAddress.String(), pyramidChunkerAddress.String())
}
}
for _, s := range sizes {
treeChunkerAddress := testRandomData(false, BMTHash, s, tester)
pyramidChunkerAddress := testRandomData(true, BMTHash, s, tester)
if treeChunkerAddress.String() != pyramidChunkerAddress.String() {
tester.t.Fatalf("tree chunker and pyramid chunker key mismatch for size %v\n TC: %v\n PC: %v\n", s, treeChunkerAddress.String(), pyramidChunkerAddress.String())
}
}
}
func TestRandomBrokenData(t *testing.T) {
sizes := []int{1, 60, 83, 179, 253, 1024, 4095, 4096, 4097, 8191, 8192, 8193, 12287, 12288, 12289, 123456, 2345678}
tester := &chunkerTester{t: t}
for _, s := range sizes {
testRandomBrokenData(s, tester)
}
}
func benchReadAll(reader LazySectionReader) {
size, _ := reader.Size(context.TODO(), nil)
output := make([]byte, 1000)
for pos := int64(0); pos < size; pos += 1000 {
reader.ReadAt(output, pos)
}
}
func benchmarkSplitJoin(n int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
data := testutil.RandomReader(i, n)
putGetter := newTestHasherStore(NewMapChunkStore(), SHA3Hash)
ctx := context.TODO()
key, wait, err := PyramidSplit(ctx, data, putGetter, putGetter, mockTag)
if err != nil {
t.Fatalf(err.Error())
}
err = wait(ctx)
if err != nil {
t.Fatalf(err.Error())
}
reader := TreeJoin(ctx, key, putGetter, 0)
benchReadAll(reader)
}
}
func benchmarkSplitTreeSHA3(n int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
data := testutil.RandomReader(i, n)
putGetter := newTestHasherStore(&FakeChunkStore{}, SHA3Hash)
ctx := context.Background()
_, wait, err := TreeSplit(ctx, data, int64(n), putGetter)
if err != nil {
t.Fatalf(err.Error())
}
err = wait(ctx)
if err != nil {
t.Fatalf(err.Error())
}
}
}
func benchmarkSplitTreeBMT(n int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
data := testutil.RandomReader(i, n)
putGetter := newTestHasherStore(&FakeChunkStore{}, BMTHash)
ctx := context.Background()
_, wait, err := TreeSplit(ctx, data, int64(n), putGetter)
if err != nil {
t.Fatalf(err.Error())
}
err = wait(ctx)
if err != nil {
t.Fatalf(err.Error())
}
}
}
func benchmarkSplitPyramidBMT(n int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
data := testutil.RandomReader(i, n)
putGetter := newTestHasherStore(&FakeChunkStore{}, BMTHash)
ctx := context.Background()
_, wait, err := PyramidSplit(ctx, data, putGetter, putGetter, mockTag)
if err != nil {
t.Fatalf(err.Error())
}
err = wait(ctx)
if err != nil {
t.Fatalf(err.Error())
}
}
}
func benchmarkSplitPyramidSHA3(n int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
data := testutil.RandomReader(i, n)
putGetter := newTestHasherStore(&FakeChunkStore{}, SHA3Hash)
ctx := context.Background()
_, wait, err := PyramidSplit(ctx, data, putGetter, putGetter, mockTag)
if err != nil {
t.Fatalf(err.Error())
}
err = wait(ctx)
if err != nil {
t.Fatalf(err.Error())
}
}
}
func benchmarkSplitAppendPyramid(n, m int, t *testing.B) {
t.ReportAllocs()
for i := 0; i < t.N; i++ {
data := testutil.RandomReader(i, n)
data1 := testutil.RandomReader(t.N+i, m)
store := NewMapChunkStore()
putGetter := newTestHasherStore(store, SHA3Hash)
ctx := context.Background()
key, wait, err := PyramidSplit(ctx, data, putGetter, putGetter, mockTag)
if err != nil {
t.Fatalf(err.Error())
}
err = wait(ctx)
if err != nil {
t.Fatalf(err.Error())
}
putGetter = newTestHasherStore(store, SHA3Hash)
_, wait, err = PyramidAppend(ctx, key, data1, putGetter, putGetter, mockTag)
if err != nil {
t.Fatalf(err.Error())
}
err = wait(ctx)
if err != nil {
t.Fatalf(err.Error())
}
}
}
func BenchmarkSplitJoin_2(t *testing.B) { benchmarkSplitJoin(100, t) }
func BenchmarkSplitJoin_3(t *testing.B) { benchmarkSplitJoin(1000, t) }
func BenchmarkSplitJoin_4(t *testing.B) { benchmarkSplitJoin(10000, t) }
func BenchmarkSplitJoin_5(t *testing.B) { benchmarkSplitJoin(100000, t) }
func BenchmarkSplitJoin_6(t *testing.B) { benchmarkSplitJoin(1000000, t) }
func BenchmarkSplitJoin_7(t *testing.B) { benchmarkSplitJoin(10000000, t) }
// func BenchmarkSplitJoin_8(t *testing.B) { benchmarkJoin(100000000, t) }
func BenchmarkSplitTreeSHA3_2(t *testing.B) { benchmarkSplitTreeSHA3(100, t) }
func BenchmarkSplitTreeSHA3_2h(t *testing.B) { benchmarkSplitTreeSHA3(500, t) }
func BenchmarkSplitTreeSHA3_3(t *testing.B) { benchmarkSplitTreeSHA3(1000, t) }
func BenchmarkSplitTreeSHA3_3h(t *testing.B) { benchmarkSplitTreeSHA3(5000, t) }
func BenchmarkSplitTreeSHA3_4(t *testing.B) { benchmarkSplitTreeSHA3(10000, t) }
func BenchmarkSplitTreeSHA3_4h(t *testing.B) { benchmarkSplitTreeSHA3(50000, t) }
func BenchmarkSplitTreeSHA3_5(t *testing.B) { benchmarkSplitTreeSHA3(100000, t) }
func BenchmarkSplitTreeSHA3_6(t *testing.B) { benchmarkSplitTreeSHA3(1000000, t) }
func BenchmarkSplitTreeSHA3_7(t *testing.B) { benchmarkSplitTreeSHA3(10000000, t) }
// func BenchmarkSplitTreeSHA3_8(t *testing.B) { benchmarkSplitTreeSHA3(100000000, t) }
func BenchmarkSplitTreeBMT_2(t *testing.B) { benchmarkSplitTreeBMT(100, t) }
func BenchmarkSplitTreeBMT_2h(t *testing.B) { benchmarkSplitTreeBMT(500, t) }
func BenchmarkSplitTreeBMT_3(t *testing.B) { benchmarkSplitTreeBMT(1000, t) }
func BenchmarkSplitTreeBMT_3h(t *testing.B) { benchmarkSplitTreeBMT(5000, t) }
func BenchmarkSplitTreeBMT_4(t *testing.B) { benchmarkSplitTreeBMT(10000, t) }
func BenchmarkSplitTreeBMT_4h(t *testing.B) { benchmarkSplitTreeBMT(50000, t) }
func BenchmarkSplitTreeBMT_5(t *testing.B) { benchmarkSplitTreeBMT(100000, t) }
func BenchmarkSplitTreeBMT_6(t *testing.B) { benchmarkSplitTreeBMT(1000000, t) }
func BenchmarkSplitTreeBMT_7(t *testing.B) { benchmarkSplitTreeBMT(10000000, t) }
// func BenchmarkSplitTreeBMT_8(t *testing.B) { benchmarkSplitTreeBMT(100000000, t) }
func BenchmarkSplitPyramidSHA3_2(t *testing.B) { benchmarkSplitPyramidSHA3(100, t) }
func BenchmarkSplitPyramidSHA3_2h(t *testing.B) { benchmarkSplitPyramidSHA3(500, t) }
func BenchmarkSplitPyramidSHA3_3(t *testing.B) { benchmarkSplitPyramidSHA3(1000, t) }
func BenchmarkSplitPyramidSHA3_3h(t *testing.B) { benchmarkSplitPyramidSHA3(5000, t) }
func BenchmarkSplitPyramidSHA3_4(t *testing.B) { benchmarkSplitPyramidSHA3(10000, t) }
func BenchmarkSplitPyramidSHA3_4h(t *testing.B) { benchmarkSplitPyramidSHA3(50000, t) }
func BenchmarkSplitPyramidSHA3_5(t *testing.B) { benchmarkSplitPyramidSHA3(100000, t) }
func BenchmarkSplitPyramidSHA3_6(t *testing.B) { benchmarkSplitPyramidSHA3(1000000, t) }
func BenchmarkSplitPyramidSHA3_7(t *testing.B) { benchmarkSplitPyramidSHA3(10000000, t) }
// func BenchmarkSplitPyramidSHA3_8(t *testing.B) { benchmarkSplitPyramidSHA3(100000000, t) }
func BenchmarkSplitPyramidBMT_2(t *testing.B) { benchmarkSplitPyramidBMT(100, t) }
func BenchmarkSplitPyramidBMT_2h(t *testing.B) { benchmarkSplitPyramidBMT(500, t) }
func BenchmarkSplitPyramidBMT_3(t *testing.B) { benchmarkSplitPyramidBMT(1000, t) }
func BenchmarkSplitPyramidBMT_3h(t *testing.B) { benchmarkSplitPyramidBMT(5000, t) }
func BenchmarkSplitPyramidBMT_4(t *testing.B) { benchmarkSplitPyramidBMT(10000, t) }
func BenchmarkSplitPyramidBMT_4h(t *testing.B) { benchmarkSplitPyramidBMT(50000, t) }
func BenchmarkSplitPyramidBMT_5(t *testing.B) { benchmarkSplitPyramidBMT(100000, t) }
func BenchmarkSplitPyramidBMT_6(t *testing.B) { benchmarkSplitPyramidBMT(1000000, t) }
func BenchmarkSplitPyramidBMT_7(t *testing.B) { benchmarkSplitPyramidBMT(10000000, t) }
// func BenchmarkSplitPyramidBMT_8(t *testing.B) { benchmarkSplitPyramidBMT(100000000, t) }
func BenchmarkSplitAppendPyramid_2(t *testing.B) { benchmarkSplitAppendPyramid(100, 1000, t) }
func BenchmarkSplitAppendPyramid_2h(t *testing.B) { benchmarkSplitAppendPyramid(500, 1000, t) }
func BenchmarkSplitAppendPyramid_3(t *testing.B) { benchmarkSplitAppendPyramid(1000, 1000, t) }
func BenchmarkSplitAppendPyramid_4(t *testing.B) { benchmarkSplitAppendPyramid(10000, 1000, t) }
func BenchmarkSplitAppendPyramid_4h(t *testing.B) { benchmarkSplitAppendPyramid(50000, 1000, t) }
func BenchmarkSplitAppendPyramid_5(t *testing.B) { benchmarkSplitAppendPyramid(1000000, 1000, t) }
func BenchmarkSplitAppendPyramid_6(t *testing.B) { benchmarkSplitAppendPyramid(1000000, 1000, t) }
func BenchmarkSplitAppendPyramid_7(t *testing.B) { benchmarkSplitAppendPyramid(10000000, 1000, t) }
// func BenchmarkAppendPyramid_8(t *testing.B) { benchmarkAppendPyramid(100000000, 1000, t) }
// go test -timeout 20m -cpu 4 -bench=./swarm/storage -run no
// If you dont add the timeout argument above .. the benchmark will timeout and dump

277
storage/common_test.go Normal file
View File

@ -0,0 +1,277 @@
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"bytes"
"context"
"flag"
"fmt"
"io"
"sync"
"testing"
"time"
"github.com/ethereum/go-ethereum/log"
"github.com/ethersphere/swarm/chunk"
"github.com/mattn/go-colorable"
)
var (
loglevel = flag.Int("loglevel", 3, "verbosity of logs")
getTimeout = 30 * time.Second
)
func init() {
flag.Parse()
log.PrintOrigins(true)
log.Root().SetHandler(log.LvlFilterHandler(log.Lvl(*loglevel), log.StreamHandler(colorable.NewColorableStderr(), log.TerminalFormat(true))))
}
type brokenLimitedReader struct {
lr io.Reader
errAt int
off int
size int
}
func brokenLimitReader(data io.Reader, size int, errAt int) *brokenLimitedReader {
return &brokenLimitedReader{
lr: data,
errAt: errAt,
size: size,
}
}
func mputRandomChunks(store ChunkStore, n int) ([]Chunk, error) {
return mput(store, n, GenerateRandomChunk)
}
func mput(store ChunkStore, n int, f func(i int64) Chunk) (hs []Chunk, err error) {
// put to localstore and wait for stored channel
// does not check delivery error state
errc := make(chan error)
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
for i := int64(0); i < int64(n); i++ {
ch := f(chunk.DefaultSize)
go func() {
_, err := store.Put(ctx, chunk.ModePutUpload, ch)
select {
case errc <- err:
case <-ctx.Done():
}
}()
hs = append(hs, ch)
}
// wait for all chunks to be stored
for i := 0; i < n; i++ {
err := <-errc
if err != nil {
return nil, err
}
}
return hs, nil
}
func mget(store ChunkStore, hs []Address, f func(h Address, chunk Chunk) error) error {
wg := sync.WaitGroup{}
wg.Add(len(hs))
errc := make(chan error)
for _, k := range hs {
go func(h Address) {
defer wg.Done()
// TODO: write timeout with context
ch, err := store.Get(context.TODO(), chunk.ModeGetRequest, h)
if err != nil {
errc <- err
return
}
if f != nil {
err = f(h, ch)
if err != nil {
errc <- err
return
}
}
}(k)
}
go func() {
wg.Wait()
close(errc)
}()
var err error
timeout := 20 * time.Second
select {
case err = <-errc:
case <-time.NewTimer(timeout).C:
err = fmt.Errorf("timed out after %v", timeout)
}
return err
}
func (r *brokenLimitedReader) Read(buf []byte) (int, error) {
if r.off+len(buf) > r.errAt {
return 0, fmt.Errorf("Broken reader")
}
r.off += len(buf)
return r.lr.Read(buf)
}
func testStoreRandom(m ChunkStore, n int, t *testing.T) {
chunks, err := mputRandomChunks(m, n)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
err = mget(m, chunkAddresses(chunks), nil)
if err != nil {
t.Fatalf("testStore failed: %v", err)
}
}
func testStoreCorrect(m ChunkStore, n int, t *testing.T) {
chunks, err := mputRandomChunks(m, n)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
f := func(h Address, chunk Chunk) error {
if !bytes.Equal(h, chunk.Address()) {
return fmt.Errorf("key does not match retrieved chunk Address")
}
hasher := MakeHashFunc(DefaultHash)()
data := chunk.Data()
hasher.ResetWithLength(data[:8])
hasher.Write(data[8:])
exp := hasher.Sum(nil)
if !bytes.Equal(h, exp) {
return fmt.Errorf("key is not hash of chunk data")
}
return nil
}
err = mget(m, chunkAddresses(chunks), f)
if err != nil {
t.Fatalf("testStore failed: %v", err)
}
}
func benchmarkStorePut(store ChunkStore, n int, b *testing.B) {
chunks := make([]Chunk, n)
i := 0
f := func(dataSize int64) Chunk {
chunk := GenerateRandomChunk(dataSize)
chunks[i] = chunk
i++
return chunk
}
mput(store, n, f)
f = func(dataSize int64) Chunk {
chunk := chunks[i]
i++
return chunk
}
b.ReportAllocs()
b.ResetTimer()
for j := 0; j < b.N; j++ {
i = 0
mput(store, n, f)
}
}
func benchmarkStoreGet(store ChunkStore, n int, b *testing.B) {
chunks, err := mputRandomChunks(store, n)
if err != nil {
b.Fatalf("expected no error, got %v", err)
}
b.ReportAllocs()
b.ResetTimer()
addrs := chunkAddresses(chunks)
for i := 0; i < b.N; i++ {
err := mget(store, addrs, nil)
if err != nil {
b.Fatalf("mget failed: %v", err)
}
}
}
// MapChunkStore is a very simple ChunkStore implementation to store chunks in a map in memory.
type MapChunkStore struct {
chunks map[string]Chunk
mu sync.RWMutex
}
func NewMapChunkStore() *MapChunkStore {
return &MapChunkStore{
chunks: make(map[string]Chunk),
}
}
func (m *MapChunkStore) Put(_ context.Context, _ chunk.ModePut, ch Chunk) (bool, error) {
m.mu.Lock()
defer m.mu.Unlock()
_, exists := m.chunks[ch.Address().Hex()]
m.chunks[ch.Address().Hex()] = ch
return exists, nil
}
func (m *MapChunkStore) Get(_ context.Context, _ chunk.ModeGet, ref Address) (Chunk, error) {
m.mu.RLock()
defer m.mu.RUnlock()
chunk := m.chunks[ref.Hex()]
if chunk == nil {
return nil, ErrChunkNotFound
}
return chunk, nil
}
// Need to implement Has from SyncChunkStore
func (m *MapChunkStore) Has(ctx context.Context, ref Address) (has bool, err error) {
m.mu.RLock()
defer m.mu.RUnlock()
_, has = m.chunks[ref.Hex()]
return has, nil
}
func (m *MapChunkStore) Set(ctx context.Context, mode chunk.ModeSet, addr chunk.Address) (err error) {
return nil
}
func (m *MapChunkStore) LastPullSubscriptionBinID(bin uint8) (id uint64, err error) {
return 0, nil
}
func (m *MapChunkStore) SubscribePull(ctx context.Context, bin uint8, since, until uint64) (c <-chan chunk.Descriptor, stop func()) {
return nil, nil
}
func (m *MapChunkStore) Close() error {
return nil
}
func chunkAddresses(chunks []Chunk) []Address {
addrs := make([]Address, len(chunks))
for i, ch := range chunks {
addrs[i] = ch.Address()
}
return addrs
}

View File

@ -0,0 +1,152 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package encryption
import (
"crypto/rand"
"encoding/binary"
"fmt"
"hash"
"sync"
)
const KeyLength = 32
type Key []byte
type Encryption interface {
Encrypt(data []byte) ([]byte, error)
Decrypt(data []byte) ([]byte, error)
}
type encryption struct {
key Key // the encryption key (hashSize bytes long)
keyLen int // length of the key = length of blockcipher block
padding int // encryption will pad the data upto this if > 0
initCtr uint32 // initial counter used for counter mode blockcipher
hashFunc func() hash.Hash // hasher constructor function
}
// New constructs a new encryptor/decryptor
func New(key Key, padding int, initCtr uint32, hashFunc func() hash.Hash) *encryption {
return &encryption{
key: key,
keyLen: len(key),
padding: padding,
initCtr: initCtr,
hashFunc: hashFunc,
}
}
// Encrypt encrypts the data and does padding if specified
func (e *encryption) Encrypt(data []byte) ([]byte, error) {
length := len(data)
outLength := length
isFixedPadding := e.padding > 0
if isFixedPadding {
if length > e.padding {
return nil, fmt.Errorf("Data length longer than padding, data length %v padding %v", length, e.padding)
}
outLength = e.padding
}
out := make([]byte, outLength)
e.transform(data, out)
return out, nil
}
// Decrypt decrypts the data, if padding was used caller must know original length and truncate
func (e *encryption) Decrypt(data []byte) ([]byte, error) {
length := len(data)
if e.padding > 0 && length != e.padding {
return nil, fmt.Errorf("Data length different than padding, data length %v padding %v", length, e.padding)
}
out := make([]byte, length)
e.transform(data, out)
return out, nil
}
//
func (e *encryption) transform(in, out []byte) {
inLength := len(in)
wg := sync.WaitGroup{}
wg.Add((inLength-1)/e.keyLen + 1)
for i := 0; i < inLength; i += e.keyLen {
l := min(e.keyLen, inLength-i)
// call transformations per segment (asyncronously)
go func(i int, x, y []byte) {
defer wg.Done()
e.Transcrypt(i, x, y)
}(i/e.keyLen, in[i:i+l], out[i:i+l])
}
// pad the rest if out is longer
pad(out[inLength:])
wg.Wait()
}
// used for segmentwise transformation
// if in is shorter than out, padding is used
func (e *encryption) Transcrypt(i int, in []byte, out []byte) {
// first hash key with counter (initial counter + i)
hasher := e.hashFunc()
hasher.Write(e.key)
ctrBytes := make([]byte, 4)
binary.LittleEndian.PutUint32(ctrBytes, uint32(i)+e.initCtr)
hasher.Write(ctrBytes)
ctrHash := hasher.Sum(nil)
hasher.Reset()
// second round of hashing for selective disclosure
hasher.Write(ctrHash)
segmentKey := hasher.Sum(nil)
hasher.Reset()
// XOR bytes uptil length of in (out must be at least as long)
inLength := len(in)
for j := 0; j < inLength; j++ {
out[j] = in[j] ^ segmentKey[j]
}
// insert padding if out is longer
pad(out[inLength:])
}
func pad(b []byte) {
l := len(b)
for total := 0; total < l; {
read, _ := rand.Read(b[total:])
total += read
}
}
// GenerateRandomKey generates a random key of length l
func GenerateRandomKey(l int) Key {
key := make([]byte, l)
var total int
for total < l {
read, _ := rand.Read(key[total:])
total += read
}
return key
}
func min(x, y int) int {
if x < y {
return x
}
return y
}

File diff suppressed because one or more lines are too long

36
storage/error.go Normal file
View File

@ -0,0 +1,36 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import "github.com/ethersphere/swarm/chunk"
const (
ErrInit = iota
ErrNotFound
ErrUnauthorized
ErrInvalidValue
ErrDataOverflow
ErrNothingToReturn
ErrInvalidSignature
ErrNotSynced
)
// Errors are the same as the ones in chunk package for backward compatibility.
var (
ErrChunkNotFound = chunk.ErrChunkNotFound
ErrChunkInvalid = chunk.ErrChunkNotFound
)

View File

@ -0,0 +1,44 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import "github.com/ethereum/go-ethereum/common/hexutil"
type binarySerializer interface {
binaryPut(serializedData []byte) error
binaryLength() int
binaryGet(serializedData []byte) error
}
// Values interface represents a string key-value store
// useful for building query strings
type Values interface {
Get(key string) string
Set(key, value string)
}
type valueSerializer interface {
FromValues(values Values) error
AppendValues(values Values)
}
// Hex serializes the structure and converts it to a hex string
func Hex(bin binarySerializer) string {
b := make([]byte, bin.binaryLength())
bin.binaryPut(b)
return hexutil.Encode(b)
}

View File

@ -0,0 +1,98 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"encoding/json"
"reflect"
"testing"
"github.com/ethereum/go-ethereum/common/hexutil"
)
// KV mocks a key value store
type KV map[string]string
func (kv KV) Get(key string) string {
return kv[key]
}
func (kv KV) Set(key, value string) {
kv[key] = value
}
func compareByteSliceToExpectedHex(t *testing.T, variableName string, actualValue []byte, expectedHex string) {
if hexutil.Encode(actualValue) != expectedHex {
t.Fatalf("%s: Expected %s to be %s, got %s", t.Name(), variableName, expectedHex, hexutil.Encode(actualValue))
}
}
func testBinarySerializerRecovery(t *testing.T, bin binarySerializer, expectedHex string) {
name := reflect.TypeOf(bin).Elem().Name()
serialized := make([]byte, bin.binaryLength())
if err := bin.binaryPut(serialized); err != nil {
t.Fatalf("%s.binaryPut error when trying to serialize structure: %s", name, err)
}
compareByteSliceToExpectedHex(t, name, serialized, expectedHex)
recovered := reflect.New(reflect.TypeOf(bin).Elem()).Interface().(binarySerializer)
if err := recovered.binaryGet(serialized); err != nil {
t.Fatalf("%s.binaryGet error when trying to deserialize structure: %s", name, err)
}
if !reflect.DeepEqual(bin, recovered) {
t.Fatalf("Expected that the recovered %s equals the marshalled %s", name, name)
}
serializedWrongLength := make([]byte, 1)
copy(serializedWrongLength[:], serialized)
if err := recovered.binaryGet(serializedWrongLength); err == nil {
t.Fatalf("Expected %s.binaryGet to fail since data is too small", name)
}
}
func testBinarySerializerLengthCheck(t *testing.T, bin binarySerializer) {
name := reflect.TypeOf(bin).Elem().Name()
// make a slice that is too small to contain the metadata
serialized := make([]byte, bin.binaryLength()-1)
if err := bin.binaryPut(serialized); err == nil {
t.Fatalf("Expected %s.binaryPut to fail, since target slice is too small", name)
}
}
func testValueSerializer(t *testing.T, v valueSerializer, expected KV) {
name := reflect.TypeOf(v).Elem().Name()
kv := make(KV)
v.AppendValues(kv)
if !reflect.DeepEqual(expected, kv) {
expj, _ := json.Marshal(expected)
gotj, _ := json.Marshal(kv)
t.Fatalf("Expected %s.AppendValues to return %s, got %s", name, string(expj), string(gotj))
}
recovered := reflect.New(reflect.TypeOf(v).Elem()).Interface().(valueSerializer)
err := recovered.FromValues(kv)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(recovered, v) {
t.Fatalf("Expected recovered %s to be the same", name)
}
}

View File

@ -0,0 +1,48 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"bytes"
"context"
"time"
"github.com/ethersphere/swarm/storage"
)
const (
hasherCount = 8
feedsHashAlgorithm = storage.SHA3Hash
defaultRetrieveTimeout = 1000 * time.Millisecond
)
// cacheEntry caches the last known update of a specific Swarm feed.
type cacheEntry struct {
Update
*bytes.Reader
lastKey storage.Address
}
// implements storage.LazySectionReader
func (r *cacheEntry) Size(ctx context.Context, _ chan bool) (int64, error) {
return int64(len(r.Update.data)), nil
}
//returns the feed's topic
func (r *cacheEntry) Topic() Topic {
return r.Feed.Topic
}

43
storage/feed/doc.go Normal file
View File

@ -0,0 +1,43 @@
/*
Package feeds defines Swarm Feeds.
Swarm Feeds allows a user to build an update feed about a particular topic
without resorting to ENS on each update.
The update scheme is built on swarm chunks with chunk keys following
a predictable, versionable pattern.
A Feed is tied to a unique identifier that is deterministically generated out of
the chosen topic.
A Feed is defined as the series of updates of a specific user about a particular topic
Actual data updates are also made in the form of swarm chunks. The keys
of the updates are the hash of a concatenation of properties as follows:
updateAddr = H(Feed, Epoch ID)
where H is the SHA3 hash function
Feed is the combination of Topic and the user address
Epoch ID is a time slot. See the lookup package for more information.
A user looking up a the latest update in a Feed only needs to know the Topic
and the other user's address.
The Feed Update data is:
updatedata = Feed|Epoch|data
The full update data that goes in the chunk payload is:
updatedata|sign(updatedata)
Structure Summary:
Request: Feed Update with signature
Update: headers + data
Header: Protocol version and reserved for future use placeholders
ID: Information about how to locate a specific update
Feed: Represents a user's series of publications about a specific Topic
Topic: Item that the updates are about
User: User who updates the Feed
Epoch: time slot where the update is stored
*/
package feed

73
storage/feed/error.go Normal file
View File

@ -0,0 +1,73 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"fmt"
)
const (
ErrInit = iota
ErrNotFound
ErrIO
ErrUnauthorized
ErrInvalidValue
ErrDataOverflow
ErrNothingToReturn
ErrCorruptData
ErrInvalidSignature
ErrNotSynced
ErrPeriodDepth
ErrCnt
)
// Error is a the typed error object used for Swarm feeds
type Error struct {
code int
err string
}
// Error implements the error interface
func (e *Error) Error() string {
return e.err
}
// Code returns the error code
// Error codes are enumerated in the error.go file within the feeds package
func (e *Error) Code() int {
return e.code
}
// NewError creates a new Swarm feeds Error object with the specified code and custom error message
func NewError(code int, s string) error {
if code < 0 || code >= ErrCnt {
panic("no such error code!")
}
r := &Error{
err: s,
}
switch code {
case ErrNotFound, ErrIO, ErrUnauthorized, ErrInvalidValue, ErrDataOverflow, ErrNothingToReturn, ErrInvalidSignature, ErrNotSynced, ErrPeriodDepth, ErrCorruptData:
r.code = code
}
return r
}
// NewErrorf is a convenience version of NewError that incorporates printf-style formatting
func NewErrorf(code int, format string, args ...interface{}) error {
return NewError(code, fmt.Sprintf(format, args...))
}

125
storage/feed/feed.go Normal file
View File

@ -0,0 +1,125 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"hash"
"unsafe"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethersphere/swarm/storage"
)
// Feed represents a particular user's stream of updates on a topic
type Feed struct {
Topic Topic `json:"topic"`
User common.Address `json:"user"`
}
// Feed layout:
// TopicLength bytes
// userAddr common.AddressLength bytes
const feedLength = TopicLength + common.AddressLength
// mapKey calculates a unique id for this feed. Used by the cache map in `Handler`
func (f *Feed) mapKey() uint64 {
serializedData := make([]byte, feedLength)
f.binaryPut(serializedData)
hasher := hashPool.Get().(hash.Hash)
defer hashPool.Put(hasher)
hasher.Reset()
hasher.Write(serializedData)
hash := hasher.Sum(nil)
return *(*uint64)(unsafe.Pointer(&hash[0]))
}
// binaryPut serializes this feed instance into the provided slice
func (f *Feed) binaryPut(serializedData []byte) error {
if len(serializedData) != feedLength {
return NewErrorf(ErrInvalidValue, "Incorrect slice size to serialize feed. Expected %d, got %d", feedLength, len(serializedData))
}
var cursor int
copy(serializedData[cursor:cursor+TopicLength], f.Topic[:TopicLength])
cursor += TopicLength
copy(serializedData[cursor:cursor+common.AddressLength], f.User[:])
cursor += common.AddressLength
return nil
}
// binaryLength returns the expected size of this structure when serialized
func (f *Feed) binaryLength() int {
return feedLength
}
// binaryGet restores the current instance from the information contained in the passed slice
func (f *Feed) binaryGet(serializedData []byte) error {
if len(serializedData) != feedLength {
return NewErrorf(ErrInvalidValue, "Incorrect slice size to read feed. Expected %d, got %d", feedLength, len(serializedData))
}
var cursor int
copy(f.Topic[:], serializedData[cursor:cursor+TopicLength])
cursor += TopicLength
copy(f.User[:], serializedData[cursor:cursor+common.AddressLength])
cursor += common.AddressLength
return nil
}
// Hex serializes the feed to a hex string
func (f *Feed) Hex() string {
serializedData := make([]byte, feedLength)
f.binaryPut(serializedData)
return hexutil.Encode(serializedData)
}
// FromValues deserializes this instance from a string key-value store
// useful to parse query strings
func (f *Feed) FromValues(values Values) (err error) {
topic := values.Get("topic")
if topic != "" {
if err := f.Topic.FromHex(values.Get("topic")); err != nil {
return err
}
} else { // see if the user set name and relatedcontent
name := values.Get("name")
relatedContent, _ := hexutil.Decode(values.Get("relatedcontent"))
if len(relatedContent) > 0 {
if len(relatedContent) < storage.AddressLength {
return NewErrorf(ErrInvalidValue, "relatedcontent field must be a hex-encoded byte array exactly %d bytes long", storage.AddressLength)
}
relatedContent = relatedContent[:storage.AddressLength]
}
f.Topic, err = NewTopic(name, relatedContent)
if err != nil {
return err
}
}
f.User = common.HexToAddress(values.Get("user"))
return nil
}
// AppendValues serializes this structure into the provided string key-value store
// useful to build query strings
func (f *Feed) AppendValues(values Values) {
values.Set("topic", f.Topic.Hex())
values.Set("user", f.User.Hex())
}

36
storage/feed/feed_test.go Normal file
View File

@ -0,0 +1,36 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"testing"
)
func getTestFeed() *Feed {
topic, _ := NewTopic("world news report, every hour", nil)
return &Feed{
Topic: topic,
User: newCharlieSigner().Address(),
}
}
func TestFeedSerializerDeserializer(t *testing.T) {
testBinarySerializerRecovery(t, getTestFeed(), "0x776f726c64206e657773207265706f72742c20657665727920686f7572000000876a8936a7cd0b79ef0735ad0896c1afe278781c")
}
func TestFeedSerializerLengthCheck(t *testing.T) {
testBinarySerializerLengthCheck(t, getTestFeed())
}

298
storage/feed/handler.go Normal file
View File

@ -0,0 +1,298 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Handler is the API for feeds
// It enables creating, updating, syncing and retrieving feed updates and their data
package feed
import (
"bytes"
"context"
"fmt"
"sync"
"sync/atomic"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/storage/feed/lookup"
"github.com/ethersphere/swarm/log"
"github.com/ethersphere/swarm/storage"
)
type Handler struct {
chunkStore *storage.NetStore
HashSize int
cache map[uint64]*cacheEntry
cacheLock sync.RWMutex
}
// HandlerParams pass parameters to the Handler constructor NewHandler
// Signer and TimestampProvider are mandatory parameters
type HandlerParams struct {
}
// hashPool contains a pool of ready hashers
var hashPool sync.Pool
// init initializes the package and hashPool
func init() {
hashPool = sync.Pool{
New: func() interface{} {
return storage.MakeHashFunc(feedsHashAlgorithm)()
},
}
}
// NewHandler creates a new Swarm feeds API
func NewHandler(params *HandlerParams) *Handler {
fh := &Handler{
cache: make(map[uint64]*cacheEntry),
}
for i := 0; i < hasherCount; i++ {
hashfunc := storage.MakeHashFunc(feedsHashAlgorithm)()
if fh.HashSize == 0 {
fh.HashSize = hashfunc.Size()
}
hashPool.Put(hashfunc)
}
return fh
}
// SetStore sets the store backend for the Swarm feeds API
func (h *Handler) SetStore(store *storage.NetStore) {
h.chunkStore = store
}
// Validate is a chunk validation method
// If it looks like a feed update, the chunk address is checked against the userAddr of the update's signature
// It implements the storage.ChunkValidator interface
func (h *Handler) Validate(chunk storage.Chunk) bool {
if len(chunk.Data()) < minimumSignedUpdateLength {
return false
}
// check if it is a properly formatted update chunk with
// valid signature and proof of ownership of the feed it is trying
// to update
// First, deserialize the chunk
var r Request
if err := r.fromChunk(chunk); err != nil {
log.Debug("Invalid feed update chunk", "addr", chunk.Address(), "err", err)
return false
}
// Verify signatures and that the signer actually owns the feed
// If it fails, it means either the signature is not valid, data is corrupted
// or someone is trying to update someone else's feed.
if err := r.Verify(); err != nil {
log.Debug("Invalid feed update signature", "err", err)
return false
}
return true
}
// GetContent retrieves the data payload of the last synced update of the feed
func (h *Handler) GetContent(feed *Feed) (storage.Address, []byte, error) {
if feed == nil {
return nil, nil, NewError(ErrInvalidValue, "feed is nil")
}
feedUpdate := h.get(feed)
if feedUpdate == nil {
return nil, nil, NewError(ErrNotFound, "feed update not cached")
}
return feedUpdate.lastKey, feedUpdate.data, nil
}
// NewRequest prepares a Request structure with all the necessary information to
// just add the desired data and sign it.
// The resulting structure can then be signed and passed to Handler.Update to be verified and sent
func (h *Handler) NewRequest(ctx context.Context, feed *Feed) (request *Request, err error) {
if feed == nil {
return nil, NewError(ErrInvalidValue, "feed cannot be nil")
}
now := TimestampProvider.Now().Time
request = new(Request)
request.Header.Version = ProtocolVersion
query := NewQueryLatest(feed, lookup.NoClue)
feedUpdate, err := h.Lookup(ctx, query)
if err != nil {
if err.(*Error).code != ErrNotFound {
return nil, err
}
// not finding updates means that there is a network error
// or that the feed really does not have updates
}
request.Feed = *feed
// if we already have an update, then find next epoch
if feedUpdate != nil {
request.Epoch = lookup.GetNextEpoch(feedUpdate.Epoch, now)
} else {
request.Epoch = lookup.GetFirstEpoch(now)
}
return request, nil
}
// Lookup retrieves a specific or latest feed update
// Lookup works differently depending on the configuration of `query`
// See the `query` documentation and helper functions:
// `NewQueryLatest` and `NewQuery`
func (h *Handler) Lookup(ctx context.Context, query *Query) (*cacheEntry, error) {
timeLimit := query.TimeLimit
if timeLimit == 0 { // if time limit is set to zero, the user wants to get the latest update
timeLimit = TimestampProvider.Now().Time
}
if query.Hint == lookup.NoClue { // try to use our cache
entry := h.get(&query.Feed)
if entry != nil && entry.Epoch.Time <= timeLimit { // avoid bad hints
query.Hint = entry.Epoch
}
}
// we can't look for anything without a store
if h.chunkStore == nil {
return nil, NewError(ErrInit, "Call Handler.SetStore() before performing lookups")
}
var readCount int32
// Invoke the lookup engine.
// The callback will be called every time the lookup algorithm needs to guess
requestPtr, err := lookup.Lookup(ctx, timeLimit, query.Hint, func(ctx context.Context, epoch lookup.Epoch, now uint64) (interface{}, error) {
atomic.AddInt32(&readCount, 1)
id := ID{
Feed: query.Feed,
Epoch: epoch,
}
ctx, cancel := context.WithTimeout(ctx, defaultRetrieveTimeout)
defer cancel()
ch, err := h.chunkStore.Get(ctx, chunk.ModeGetLookup, id.Addr())
if err != nil {
if err == context.DeadlineExceeded { // chunk not found
return nil, nil
}
return nil, err //something else happened or context was cancelled.
}
var request Request
if err := request.fromChunk(ch); err != nil {
return nil, nil
}
if request.Time <= timeLimit {
return &request, nil
}
return nil, nil
})
if err != nil {
return nil, err
}
log.Info(fmt.Sprintf("Feed lookup finished in %d lookups", readCount))
request, _ := requestPtr.(*Request)
if request == nil {
return nil, NewError(ErrNotFound, "no feed updates found")
}
return h.updateCache(request)
}
// update feed updates cache with specified content
func (h *Handler) updateCache(request *Request) (*cacheEntry, error) {
updateAddr := request.Addr()
log.Trace("feed cache update", "topic", request.Topic.Hex(), "updateaddr", updateAddr, "epoch time", request.Epoch.Time, "epoch level", request.Epoch.Level)
entry := h.get(&request.Feed)
if entry == nil {
entry = &cacheEntry{}
h.set(&request.Feed, entry)
}
// update our rsrcs entry map
entry.lastKey = updateAddr
entry.Update = request.Update
entry.Reader = bytes.NewReader(entry.data)
return entry, nil
}
// Update publishes a feed update
// Note that a feed update cannot span chunks, and thus has a MAX NET LENGTH 4096, INCLUDING update header data and signature.
// This results in a max payload of `maxUpdateDataLength` (check update.go for more details)
// An error will be returned if the total length of the chunk payload will exceed this limit.
// Update can only check if the caller is trying to overwrite the very last known version, otherwise it just puts the update
// on the network.
func (h *Handler) Update(ctx context.Context, r *Request) (updateAddr storage.Address, err error) {
// we can't update anything without a store
if h.chunkStore == nil {
return nil, NewError(ErrInit, "Call Handler.SetStore() before updating")
}
feedUpdate := h.get(&r.Feed)
if feedUpdate != nil && feedUpdate.Epoch.Equals(r.Epoch) { // This is the only cheap check we can do for sure
return nil, NewError(ErrInvalidValue, "A former update in this epoch is already known to exist")
}
ch, err := r.toChunk() // Serialize the update into a chunk. Fails if data is too big
if err != nil {
return nil, err
}
// send the chunk
h.chunkStore.Put(ctx, chunk.ModePutUpload, ch)
log.Trace("feed update", "updateAddr", r.idAddr, "epoch time", r.Epoch.Time, "epoch level", r.Epoch.Level, "data", ch.Data())
// update our feed updates map cache entry if the new update is older than the one we have, if we have it.
if feedUpdate != nil && r.Epoch.After(feedUpdate.Epoch) {
feedUpdate.Epoch = r.Epoch
feedUpdate.data = make([]byte, len(r.data))
feedUpdate.lastKey = r.idAddr
copy(feedUpdate.data, r.data)
feedUpdate.Reader = bytes.NewReader(feedUpdate.data)
}
return r.idAddr, nil
}
// Retrieves the feed update cache value for the given nameHash
func (h *Handler) get(feed *Feed) *cacheEntry {
mapKey := feed.mapKey()
h.cacheLock.RLock()
defer h.cacheLock.RUnlock()
feedUpdate := h.cache[mapKey]
return feedUpdate
}
// Sets the feed update cache value for the given feed
func (h *Handler) set(feed *Feed, feedUpdate *cacheEntry) {
mapKey := feed.mapKey()
h.cacheLock.Lock()
defer h.cacheLock.Unlock()
h.cache[mapKey] = feedUpdate
}

View File

@ -0,0 +1,505 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"bytes"
"context"
"flag"
"fmt"
"io/ioutil"
"os"
"testing"
"time"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/storage"
"github.com/ethersphere/swarm/storage/feed/lookup"
"github.com/ethersphere/swarm/storage/localstore"
)
var (
loglevel = flag.Int("loglevel", 3, "loglevel")
startTime = Timestamp{
Time: uint64(4200),
}
cleanF func()
subtopicName = "føø.bar"
)
func init() {
flag.Parse()
log.Root().SetHandler(log.CallerFileHandler(log.LvlFilterHandler(log.Lvl(*loglevel), log.StreamHandler(os.Stderr, log.TerminalFormat(true)))))
}
// simulated timeProvider
type fakeTimeProvider struct {
currentTime uint64
}
func (f *fakeTimeProvider) Tick() {
f.currentTime++
}
func (f *fakeTimeProvider) Set(time uint64) {
f.currentTime = time
}
func (f *fakeTimeProvider) FastForward(offset uint64) {
f.currentTime += offset
}
func (f *fakeTimeProvider) Now() Timestamp {
return Timestamp{
Time: f.currentTime,
}
}
// make updates and retrieve them based on periods and versions
func TestFeedsHandler(t *testing.T) {
// make fake timeProvider
clock := &fakeTimeProvider{
currentTime: startTime.Time, // clock starts at t=4200
}
// signer containing private key
signer := newAliceSigner()
feedsHandler, datadir, teardownTest, err := setupTest(clock, signer)
if err != nil {
t.Fatal(err)
}
defer teardownTest()
// create a new feed
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
topic, _ := NewTopic("Mess with Swarm feeds code and see what ghost catches you", nil)
fd := Feed{
Topic: topic,
User: signer.Address(),
}
// data for updates:
updates := []string{
"blinky", // t=4200
"pinky", // t=4242
"inky", // t=4284
"clyde", // t=4285
}
request := NewFirstRequest(fd.Topic) // this timestamps the update at t = 4200 (start time)
chunkAddress := make(map[string]storage.Address)
data := []byte(updates[0])
request.SetData(data)
if err := request.Sign(signer); err != nil {
t.Fatal(err)
}
chunkAddress[updates[0]], err = feedsHandler.Update(ctx, request)
if err != nil {
t.Fatal(err)
}
// move the clock ahead 21 seconds
clock.FastForward(21) // t=4221
request, err = feedsHandler.NewRequest(ctx, &request.Feed) // this timestamps the update at t = 4221
if err != nil {
t.Fatal(err)
}
if request.Epoch.Base() != 0 || request.Epoch.Level != lookup.HighestLevel-1 {
t.Fatalf("Suggested epoch BaseTime should be 0 and Epoch level should be %d", lookup.HighestLevel-1)
}
request.Epoch.Level = lookup.HighestLevel // force level 25 instead of 24 to make it fail
data = []byte(updates[1])
request.SetData(data)
if err := request.Sign(signer); err != nil {
t.Fatal(err)
}
chunkAddress[updates[1]], err = feedsHandler.Update(ctx, request)
if err == nil {
t.Fatal("Expected update to fail since an update in this epoch already exists")
}
// move the clock ahead 21 seconds
clock.FastForward(21) // t=4242
request, err = feedsHandler.NewRequest(ctx, &request.Feed)
if err != nil {
t.Fatal(err)
}
request.SetData(data)
if err := request.Sign(signer); err != nil {
t.Fatal(err)
}
chunkAddress[updates[1]], err = feedsHandler.Update(ctx, request)
if err != nil {
t.Fatal(err)
}
// move the clock ahead 42 seconds
clock.FastForward(42) // t=4284
request, err = feedsHandler.NewRequest(ctx, &request.Feed)
if err != nil {
t.Fatal(err)
}
data = []byte(updates[2])
request.SetData(data)
if err := request.Sign(signer); err != nil {
t.Fatal(err)
}
chunkAddress[updates[2]], err = feedsHandler.Update(ctx, request)
if err != nil {
t.Fatal(err)
}
// move the clock ahead 1 second
clock.FastForward(1) // t=4285
request, err = feedsHandler.NewRequest(ctx, &request.Feed)
if err != nil {
t.Fatal(err)
}
if request.Epoch.Base() != 0 || request.Epoch.Level != 28 {
t.Fatalf("Expected epoch base time to be %d, got %d. Expected epoch level to be %d, got %d", 0, request.Epoch.Base(), 28, request.Epoch.Level)
}
data = []byte(updates[3])
request.SetData(data)
if err := request.Sign(signer); err != nil {
t.Fatal(err)
}
chunkAddress[updates[3]], err = feedsHandler.Update(ctx, request)
if err != nil {
t.Fatal(err)
}
time.Sleep(time.Second)
feedsHandler.Close()
// check we can retrieve the updates after close
clock.FastForward(2000) // t=6285
feedParams := &HandlerParams{}
feedsHandler2, err := NewTestHandler(datadir, feedParams)
if err != nil {
t.Fatal(err)
}
update2, err := feedsHandler2.Lookup(ctx, NewQueryLatest(&request.Feed, lookup.NoClue))
if err != nil {
t.Fatal(err)
}
// last update should be "clyde"
if !bytes.Equal(update2.data, []byte(updates[len(updates)-1])) {
t.Fatalf("feed update data was %v, expected %v", string(update2.data), updates[len(updates)-1])
}
if update2.Level != 28 {
t.Fatalf("feed update epoch level was %d, expected 28", update2.Level)
}
if update2.Base() != 0 {
t.Fatalf("feed update epoch base time was %d, expected 0", update2.Base())
}
log.Debug("Latest lookup", "epoch base time", update2.Base(), "epoch level", update2.Level, "data", update2.data)
// specific point in time
update, err := feedsHandler2.Lookup(ctx, NewQuery(&request.Feed, 4284, lookup.NoClue))
if err != nil {
t.Fatal(err)
}
// check data
if !bytes.Equal(update.data, []byte(updates[2])) {
t.Fatalf("feed update data (historical) was %v, expected %v", string(update2.data), updates[2])
}
log.Debug("Historical lookup", "epoch base time", update2.Base(), "epoch level", update2.Level, "data", update2.data)
// beyond the first should yield an error
update, err = feedsHandler2.Lookup(ctx, NewQuery(&request.Feed, startTime.Time-1, lookup.NoClue))
if err == nil {
t.Fatalf("expected previous to fail, returned epoch %s data %v", update.Epoch.String(), update.data)
}
}
const Day = 60 * 60 * 24
const Year = Day * 365
const Month = Day * 30
func generateData(x uint64) []byte {
return []byte(fmt.Sprintf("%d", x))
}
func TestSparseUpdates(t *testing.T) {
// make fake timeProvider
timeProvider := &fakeTimeProvider{
currentTime: startTime.Time,
}
// signer containing private key
signer := newAliceSigner()
rh, datadir, teardownTest, err := setupTest(timeProvider, signer)
if err != nil {
t.Fatal(err)
}
defer teardownTest()
defer os.RemoveAll(datadir)
// create a new feed
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
topic, _ := NewTopic("Very slow updates", nil)
fd := Feed{
Topic: topic,
User: signer.Address(),
}
// publish one update every 5 years since Unix 0 until today
today := uint64(1533799046)
var epoch lookup.Epoch
var lastUpdateTime uint64
for T := uint64(0); T < today; T += 5 * Year {
request := NewFirstRequest(fd.Topic)
request.Epoch = lookup.GetNextEpoch(epoch, T)
request.data = generateData(T) // this generates some data that depends on T, so we can check later
request.Sign(signer)
if err != nil {
t.Fatal(err)
}
if _, err := rh.Update(ctx, request); err != nil {
t.Fatal(err)
}
epoch = request.Epoch
lastUpdateTime = T
}
query := NewQuery(&fd, today, lookup.NoClue)
_, err = rh.Lookup(ctx, query)
if err != nil {
t.Fatal(err)
}
_, content, err := rh.GetContent(&fd)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(generateData(lastUpdateTime), content) {
t.Fatalf("Expected to recover last written value %d, got %s", lastUpdateTime, string(content))
}
// lookup the closest update to 35*Year + 6* Month (~ June 2005):
// it should find the update we put on 35*Year, since we were updating every 5 years.
query.TimeLimit = 35*Year + 6*Month
_, err = rh.Lookup(ctx, query)
if err != nil {
t.Fatal(err)
}
_, content, err = rh.GetContent(&fd)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(generateData(35*Year), content) {
t.Fatalf("Expected to recover %d, got %s", 35*Year, string(content))
}
}
func TestValidator(t *testing.T) {
// make fake timeProvider
timeProvider := &fakeTimeProvider{
currentTime: startTime.Time,
}
// signer containing private key. Alice will be the good girl
signer := newAliceSigner()
// set up sim timeProvider
rh, _, teardownTest, err := setupTest(timeProvider, signer)
if err != nil {
t.Fatal(err)
}
defer teardownTest()
// create new feed
topic, _ := NewTopic(subtopicName, nil)
fd := Feed{
Topic: topic,
User: signer.Address(),
}
mr := NewFirstRequest(fd.Topic)
// chunk with address
data := []byte("foo")
mr.SetData(data)
if err := mr.Sign(signer); err != nil {
t.Fatalf("sign fail: %v", err)
}
chunk, err := mr.toChunk()
if err != nil {
t.Fatal(err)
}
if !rh.Validate(chunk) {
t.Fatal("Chunk validator fail on update chunk")
}
address := chunk.Address()
// mess with the address
address[0] = 11
address[15] = 99
if rh.Validate(storage.NewChunk(address, chunk.Data())) {
t.Fatal("Expected Validate to fail with false chunk address")
}
}
// tests that the content address validator correctly checks the data
// tests that feed update chunks are passed through content address validator
// there is some redundancy in this test as it also tests content addressed chunks,
// which should be evaluated as invalid chunks by this validator
func TestValidatorInStore(t *testing.T) {
// make fake timeProvider
TimestampProvider = &fakeTimeProvider{
currentTime: startTime.Time,
}
// signer containing private key
signer := newAliceSigner()
// set up localstore
datadir, err := ioutil.TempDir("", "storage-testfeedsvalidator")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(datadir)
localstore, err := localstore.New(datadir, make([]byte, 32), nil)
if err != nil {
t.Fatal(err)
}
// set up Swarm feeds handler and add is as a validator to the localstore
fhParams := &HandlerParams{}
fh := NewHandler(fhParams)
store := chunk.NewValidatorStore(localstore, fh)
// create content addressed chunks, one good, one faulty
chunks := storage.GenerateRandomChunks(chunk.DefaultSize, 2)
goodChunk := chunks[0]
badChunk := storage.NewChunk(chunks[1].Address(), goodChunk.Data())
topic, _ := NewTopic("xyzzy", nil)
fd := Feed{
Topic: topic,
User: signer.Address(),
}
// create a feed update chunk with correct publickey
id := ID{
Epoch: lookup.Epoch{Time: 42,
Level: 1,
},
Feed: fd,
}
updateAddr := id.Addr()
data := []byte("bar")
r := new(Request)
r.idAddr = updateAddr
r.Update.ID = id
r.data = data
r.Sign(signer)
uglyChunk, err := r.toChunk()
if err != nil {
t.Fatal(err)
}
// put the chunks in the store and check their error status
_, err = store.Put(context.Background(), chunk.ModePutUpload, goodChunk)
if err == nil {
t.Fatal("expected error on good content address chunk with feed update validator only, but got nil")
}
_, err = store.Put(context.Background(), chunk.ModePutUpload, badChunk)
if err == nil {
t.Fatal("expected error on bad content address chunk with feed update validator only, but got nil")
}
_, err = store.Put(context.Background(), chunk.ModePutUpload, uglyChunk)
if err != nil {
t.Fatalf("expected no error on feed update chunk with feed update validator only, but got: %s", err)
}
}
// create rpc and feeds Handler
func setupTest(timeProvider timestampProvider, signer Signer) (fh *TestHandler, datadir string, teardown func(), err error) {
var fsClean func()
var rpcClean func()
cleanF = func() {
if fsClean != nil {
fsClean()
}
if rpcClean != nil {
rpcClean()
}
}
// temp datadir
datadir, err = ioutil.TempDir("", "fh")
if err != nil {
return nil, "", nil, err
}
fsClean = func() {
os.RemoveAll(datadir)
}
TimestampProvider = timeProvider
fhParams := &HandlerParams{}
fh, err = NewTestHandler(datadir, fhParams)
return fh, datadir, cleanF, err
}
func newAliceSigner() *GenericSigner {
privKey, _ := crypto.HexToECDSA("deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
return NewGenericSigner(privKey)
}
func newBobSigner() *GenericSigner {
privKey, _ := crypto.HexToECDSA("accedeaccedeaccedeaccedeaccedeaccedeaccedeaccedeaccedeaccedecaca")
return NewGenericSigner(privKey)
}
func newCharlieSigner() *GenericSigner {
privKey, _ := crypto.HexToECDSA("facadefacadefacadefacadefacadefacadefacadefacadefacadefacadefaca")
return NewGenericSigner(privKey)
}

123
storage/feed/id.go Normal file
View File

@ -0,0 +1,123 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"fmt"
"hash"
"strconv"
"github.com/ethereum/go-ethereum/common"
"github.com/ethersphere/swarm/storage/feed/lookup"
"github.com/ethersphere/swarm/storage"
)
// ID uniquely identifies an update on the network.
type ID struct {
Feed `json:"feed"`
lookup.Epoch `json:"epoch"`
}
// ID layout:
// Feed feedLength bytes
// Epoch EpochLength
const idLength = feedLength + lookup.EpochLength
// Addr calculates the feed update chunk address corresponding to this ID
func (u *ID) Addr() (updateAddr storage.Address) {
serializedData := make([]byte, idLength)
var cursor int
u.Feed.binaryPut(serializedData[cursor : cursor+feedLength])
cursor += feedLength
eid := u.Epoch.ID()
copy(serializedData[cursor:cursor+lookup.EpochLength], eid[:])
hasher := hashPool.Get().(hash.Hash)
defer hashPool.Put(hasher)
hasher.Reset()
hasher.Write(serializedData)
return hasher.Sum(nil)
}
// binaryPut serializes this instance into the provided slice
func (u *ID) binaryPut(serializedData []byte) error {
if len(serializedData) != idLength {
return NewErrorf(ErrInvalidValue, "Incorrect slice size to serialize ID. Expected %d, got %d", idLength, len(serializedData))
}
var cursor int
if err := u.Feed.binaryPut(serializedData[cursor : cursor+feedLength]); err != nil {
return err
}
cursor += feedLength
epochBytes, err := u.Epoch.MarshalBinary()
if err != nil {
return err
}
copy(serializedData[cursor:cursor+lookup.EpochLength], epochBytes[:])
cursor += lookup.EpochLength
return nil
}
// binaryLength returns the expected size of this structure when serialized
func (u *ID) binaryLength() int {
return idLength
}
// binaryGet restores the current instance from the information contained in the passed slice
func (u *ID) binaryGet(serializedData []byte) error {
if len(serializedData) != idLength {
return NewErrorf(ErrInvalidValue, "Incorrect slice size to read ID. Expected %d, got %d", idLength, len(serializedData))
}
var cursor int
if err := u.Feed.binaryGet(serializedData[cursor : cursor+feedLength]); err != nil {
return err
}
cursor += feedLength
if err := u.Epoch.UnmarshalBinary(serializedData[cursor : cursor+lookup.EpochLength]); err != nil {
return err
}
cursor += lookup.EpochLength
return nil
}
// FromValues deserializes this instance from a string key-value store
// useful to parse query strings
func (u *ID) FromValues(values Values) error {
level, _ := strconv.ParseUint(values.Get("level"), 10, 32)
u.Epoch.Level = uint8(level)
u.Epoch.Time, _ = strconv.ParseUint(values.Get("time"), 10, 64)
if u.Feed.User == (common.Address{}) {
return u.Feed.FromValues(values)
}
return nil
}
// AppendValues serializes this structure into the provided string key-value store
// useful to build query strings
func (u *ID) AppendValues(values Values) {
values.Set("level", fmt.Sprintf("%d", u.Epoch.Level))
values.Set("time", fmt.Sprintf("%d", u.Epoch.Time))
u.Feed.AppendValues(values)
}

28
storage/feed/id_test.go Normal file
View File

@ -0,0 +1,28 @@
package feed
import (
"testing"
"github.com/ethersphere/swarm/storage/feed/lookup"
)
func getTestID() *ID {
return &ID{
Feed: *getTestFeed(),
Epoch: lookup.GetFirstEpoch(1000),
}
}
func TestIDAddr(t *testing.T) {
id := getTestID()
updateAddr := id.Addr()
compareByteSliceToExpectedHex(t, "updateAddr", updateAddr, "0x842d0a81987b9755dfeaa5558f5c134c1c0af48b6545005cac7b533d9411453a")
}
func TestIDSerializer(t *testing.T) {
testBinarySerializerRecovery(t, getTestID(), "0x776f726c64206e657773207265706f72742c20657665727920686f7572000000876a8936a7cd0b79ef0735ad0896c1afe278781ce80300000000001f")
}
func TestIDLengthCheck(t *testing.T) {
testBinarySerializerLengthCheck(t, getTestID())
}

View File

@ -0,0 +1,63 @@
package lookup
import "context"
// FluzCapacitorAlgorithm works by narrowing the epoch search area if an update is found
// going back and forth in time
// First, it will attempt to find an update where it should be now if the hint was
// really the last update. If that lookup fails, then the last update must be either the hint itself
// or the epochs right below. If however, that lookup succeeds, then the update must be
// that one or within the epochs right below.
// see the guide for a more graphical representation
func FluzCapacitorAlgorithm(ctx context.Context, now uint64, hint Epoch, read ReadFunc) (value interface{}, err error) {
var lastFound interface{}
var epoch Epoch
if hint == NoClue {
hint = worstHint
}
t := now
for {
epoch = GetNextEpoch(hint, t)
value, err = read(ctx, epoch, now)
if err != nil {
return nil, err
}
if value != nil {
lastFound = value
if epoch.Level == LowestLevel || epoch.Equals(hint) {
return value, nil
}
hint = epoch
continue
}
if epoch.Base() == hint.Base() {
if lastFound != nil {
return lastFound, nil
}
// we have reached the hint itself
if hint == worstHint {
return nil, nil
}
// check it out
value, err = read(ctx, hint, now)
if err != nil {
return nil, err
}
if value != nil {
return value, nil
}
// bad hint.
t = hint.Base()
hint = worstHint
continue
}
base := epoch.Base()
if base == 0 {
return nil, nil
}
t = base - 1
}
}

View File

@ -0,0 +1,185 @@
package lookup
import (
"context"
"sync/atomic"
"time"
)
type stepFunc func(ctx context.Context, t uint64, hint Epoch) interface{}
// LongEarthLookaheadDelay is the headstart the lookahead gives R before it launches
var LongEarthLookaheadDelay = 250 * time.Millisecond
// LongEarthLookbackDelay is the headstart the lookback gives R before it launches
var LongEarthLookbackDelay = 250 * time.Millisecond
// LongEarthAlgorithm explores possible lookup paths in parallel, pruning paths as soon
// as a more promising lookup path is found. As a result, this lookup algorithm is an order
// of magnitude faster than the FluzCapacitor algorithm, but at the expense of more exploratory reads.
// This algorithm works as follows. On each step, the next epoch is immediately looked up (R)
// and given a head start, while two parallel "steps" are launched a short time after:
// look ahead (A) is the path the algorithm would take if the R lookup returns a value, whereas
// look back (B) is the path the algorithm would take if the R lookup failed.
// as soon as R is actually finished, the A or B paths are pruned depending on the value of R.
// if A returns earlier than R, then R and B read operations can be safely canceled, saving time.
// The maximum number of active read operations is calculated as 2^(timeout/headstart).
// If headstart is infinite, this algorithm behaves as FluzCapacitor.
// timeout is the maximum execution time of the passed `read` function.
// the two head starts can be configured by changing LongEarthLookaheadDelay or LongEarthLookbackDelay
func LongEarthAlgorithm(ctx context.Context, now uint64, hint Epoch, read ReadFunc) (interface{}, error) {
if hint == NoClue {
hint = worstHint
}
var stepCounter int32 // for debugging, stepCounter allows to give an ID to each step instance
errc := make(chan struct{}) // errc will help as an error shortcut signal
var gerr error // in case of error, this variable will be set
var step stepFunc // For efficiency, the algorithm step is defined as a closure
step = func(ctxS context.Context, t uint64, last Epoch) interface{} {
stepID := atomic.AddInt32(&stepCounter, 1) // give an ID to this call instance
trace(stepID, "init: t=%d, last=%s", t, last.String())
var valueA, valueB, valueR interface{}
// initialize the three read contexts
ctxR, cancelR := context.WithCancel(ctxS) // will handle the current read operation
ctxA, cancelA := context.WithCancel(ctxS) // will handle the lookahead path
ctxB, cancelB := context.WithCancel(ctxS) // will handle the lookback path
epoch := GetNextEpoch(last, t) // calculate the epoch to look up in this step instance
// define the lookAhead function, which will follow the path as if R was successful
lookAhead := func() {
valueA = step(ctxA, t, epoch) // launch the next step, recursively.
if valueA != nil { // if this path is successful, we don't need R or B.
cancelB()
cancelR()
}
}
// define the lookBack function, which will follow the path as if R was unsuccessful
lookBack := func() {
if epoch.Base() == last.Base() {
return
}
base := epoch.Base()
if base == 0 {
return
}
valueB = step(ctxB, base-1, last)
}
go func() { //goroutine to read the current epoch (R)
defer cancelR()
var err error
valueR, err = read(ctxR, epoch, now) // read this epoch
if valueR == nil { // if unsuccessful, cancel lookahead, otherwise cancel lookback.
cancelA()
} else {
cancelB()
}
if err != nil && err != context.Canceled {
gerr = err
close(errc)
}
}()
go func() { // goroutine to give a headstart to R and then launch lookahead.
defer cancelA()
// if we are at the lowest level or the epoch to look up equals the last one,
// then we cannot lookahead (can't go lower or repeat the same lookup, this would
// cause an infinite loop)
if epoch.Level == LowestLevel || epoch.Equals(last) {
return
}
// give a head start to R, or launch immediately if R finishes early enough
select {
case <-TimeAfter(LongEarthLookaheadDelay):
lookAhead()
case <-ctxR.Done():
if valueR != nil {
lookAhead() // only look ahead if R was successful
}
case <-ctxA.Done():
}
}()
go func() { // goroutine to give a headstart to R and then launch lookback.
defer cancelB()
// give a head start to R, or launch immediately if R finishes early enough
select {
case <-TimeAfter(LongEarthLookbackDelay):
lookBack()
case <-ctxR.Done():
if valueR == nil {
lookBack() // only look back in case R failed
}
case <-ctxB.Done():
}
}()
<-ctxA.Done()
if valueA != nil {
trace(stepID, "Returning valueA=%v", valueA)
return valueA
}
<-ctxR.Done()
if valueR != nil {
trace(stepID, "Returning valueR=%v", valueR)
return valueR
}
<-ctxB.Done()
trace(stepID, "Returning valueB=%v", valueB)
return valueB
}
var value interface{}
stepCtx, cancel := context.WithCancel(ctx)
go func() { // launch the root step in its own goroutine to allow cancellation
defer cancel()
value = step(stepCtx, now, hint)
}()
// wait for the algorithm to finish, but shortcut in case
// of errors
select {
case <-stepCtx.Done():
case <-errc:
cancel()
return nil, gerr
}
if ctx.Err() != nil {
return nil, ctx.Err()
}
if value != nil || hint == worstHint {
return value, nil
}
// at this point the algorithm did not return a value,
// so we challenge the hint given.
value, err := read(ctx, hint, now)
if err != nil {
return nil, err
}
if value != nil {
return value, nil // hint is valid, return it.
}
// hint is invalid. Invoke the algorithm
// without hint.
now = hint.Base()
if hint.Level == HighestLevel {
now--
}
return LongEarthAlgorithm(ctx, now, NoClue, read)
}

View File

@ -0,0 +1,91 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package lookup
import (
"encoding/binary"
"errors"
"fmt"
)
// Epoch represents a time slot at a particular frequency level
type Epoch struct {
Time uint64 `json:"time"` // Time stores the time at which the update or lookup takes place
Level uint8 `json:"level"` // Level indicates the frequency level as the exponent of a power of 2
}
// EpochID is a unique identifier for an Epoch, based on its level and base time.
type EpochID [8]byte
// EpochLength stores the serialized binary length of an Epoch
const EpochLength = 8
// MaxTime contains the highest possible time value an Epoch can handle
const MaxTime uint64 = (1 << 56) - 1
// Base returns the base time of the Epoch
func (e *Epoch) Base() uint64 {
return getBaseTime(e.Time, e.Level)
}
// ID Returns the unique identifier of this epoch
func (e *Epoch) ID() EpochID {
base := e.Base()
var id EpochID
binary.LittleEndian.PutUint64(id[:], base)
id[7] = e.Level
return id
}
// MarshalBinary implements the encoding.BinaryMarshaller interface
func (e *Epoch) MarshalBinary() (data []byte, err error) {
b := make([]byte, 8)
binary.LittleEndian.PutUint64(b[:], e.Time)
b[7] = e.Level
return b, nil
}
// UnmarshalBinary implements the encoding.BinaryUnmarshaller interface
func (e *Epoch) UnmarshalBinary(data []byte) error {
if len(data) != EpochLength {
return errors.New("Invalid data unmarshalling Epoch")
}
b := make([]byte, 8)
copy(b, data)
e.Level = b[7]
b[7] = 0
e.Time = binary.LittleEndian.Uint64(b)
return nil
}
// After returns true if this epoch occurs later or exactly at the other epoch.
func (e *Epoch) After(epoch Epoch) bool {
if e.Time == epoch.Time {
return e.Level < epoch.Level
}
return e.Time >= epoch.Time
}
// Equals compares two epochs and returns true if they refer to the same time period.
func (e *Epoch) Equals(epoch Epoch) bool {
return e.Level == epoch.Level && e.Base() == epoch.Base()
}
// String implements the Stringer interface.
func (e *Epoch) String() string {
return fmt.Sprintf("Epoch{Base: %d, Time:%d, Level:%d}", e.Base(), e.Time, e.Level)
}

View File

@ -0,0 +1,57 @@
package lookup_test
import (
"testing"
"github.com/ethersphere/swarm/storage/feed/lookup"
)
func TestMarshallers(t *testing.T) {
for i := uint64(1); i < lookup.MaxTime; i *= 3 {
e := lookup.Epoch{
Time: i,
Level: uint8(i % 20),
}
b, err := e.MarshalBinary()
if err != nil {
t.Fatal(err)
}
var e2 lookup.Epoch
if err := e2.UnmarshalBinary(b); err != nil {
t.Fatal(err)
}
if e != e2 {
t.Fatal("Expected unmarshalled epoch to be equal to marshalled onet.Fatal(err)")
}
}
}
func TestAfter(t *testing.T) {
a := lookup.Epoch{
Time: 5,
Level: 3,
}
b := lookup.Epoch{
Time: 6,
Level: 3,
}
c := lookup.Epoch{
Time: 6,
Level: 4,
}
if !b.After(a) {
t.Fatal("Expected 'after' to be true, got false")
}
if b.After(b) {
t.Fatal("Expected 'after' to be false when both epochs are identical, got true")
}
if !b.After(c) {
t.Fatal("Expected 'after' to be true when both epochs have the same time but the level is lower in the first one, but got false")
}
}

View File

@ -0,0 +1,136 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
/*
Package lookup defines feed lookup algorithms and provides tools to place updates
so they can be found
*/
package lookup
import (
"context"
"time"
)
const maxuint64 = ^uint64(0)
// LowestLevel establishes the frequency resolution of the lookup algorithm as a power of 2.
const LowestLevel uint8 = 0 // default is 0 (1 second)
// HighestLevel sets the lowest frequency the algorithm will operate at, as a power of 2.
// 31 -> 2^31 equals to roughly 38 years.
const HighestLevel = 31
// DefaultLevel sets what level will be chosen to search when there is no hint
const DefaultLevel = HighestLevel
//Algorithm is the function signature of a lookup algorithm
type Algorithm func(ctx context.Context, now uint64, hint Epoch, read ReadFunc) (value interface{}, err error)
// Lookup finds the update with the highest timestamp that is smaller or equal than 'now'
// It takes a hint which should be the epoch where the last known update was
// If you don't know in what epoch the last update happened, simply submit lookup.NoClue
// read() will be called on each lookup attempt
// Returns an error only if read() returns an error
// Returns nil if an update was not found
var Lookup Algorithm = LongEarthAlgorithm
// TimeAfter must point to a function that returns a timer
// This is here so that tests can replace it with
// a mock up timer factory to simulate time deterministically
var TimeAfter = time.After
// ReadFunc is a handler called by Lookup each time it attempts to find a value
// It should return <nil> if a value is not found
// It should return <nil> if a value is found, but its timestamp is higher than "now"
// It should only return an error in case the handler wants to stop the
// lookup process entirely.
type ReadFunc func(ctx context.Context, epoch Epoch, now uint64) (interface{}, error)
// NoClue is a hint that can be provided when the Lookup caller does not have
// a clue about where the last update may be
var NoClue = Epoch{}
// getBaseTime returns the epoch base time of the given
// time and level
func getBaseTime(t uint64, level uint8) uint64 {
return t & (maxuint64 << level)
}
// Hint creates a hint based only on the last known update time
func Hint(last uint64) Epoch {
return Epoch{
Time: last,
Level: DefaultLevel,
}
}
// GetNextLevel returns the frequency level a next update should be placed at, provided where
// the last update was and what time it is now.
// This is the first nonzero bit of the XOR of 'last' and 'now', counting from the highest significant bit
// but limited to not return a level that is smaller than the last-1
func GetNextLevel(last Epoch, now uint64) uint8 {
// First XOR the last epoch base time with the current clock.
// This will set all the common most significant bits to zero.
mix := (last.Base() ^ now)
// Then, make sure we stop the below loop before one level below the current, by setting
// that level's bit to 1.
// If the next level is lower than the current one, it must be exactly level-1 and not lower.
mix |= (1 << (last.Level - 1))
// if the last update was more than 2^highestLevel seconds ago, choose the highest level
if mix > (maxuint64 >> (64 - HighestLevel - 1)) {
return HighestLevel
}
// set up a mask to scan for nonzero bits, starting at the highest level
mask := uint64(1 << (HighestLevel))
for i := uint8(HighestLevel); i > LowestLevel; i-- {
if mix&mask != 0 { // if we find a nonzero bit, this is the level the next update should be at.
return i
}
mask = mask >> 1 // move our bit one position to the right
}
return 0
}
// GetNextEpoch returns the epoch where the next update should be located
// according to where the previous update was
// and what time it is now.
func GetNextEpoch(last Epoch, now uint64) Epoch {
if last == NoClue {
return GetFirstEpoch(now)
}
level := GetNextLevel(last, now)
return Epoch{
Level: level,
Time: now,
}
}
// GetFirstEpoch returns the epoch where the first update should be located
// based on what time it is now.
func GetFirstEpoch(now uint64) Epoch {
return Epoch{Level: HighestLevel, Time: now}
}
var worstHint = Epoch{Time: 0, Level: 63}
var trace = func(id int32, formatString string, a ...interface{}) {
//fmt.Printf("Step ID #%d "+formatString+"\n", append([]interface{}{id}, a...)...)
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,154 @@
package lookup_test
/*
This file contains components to mock a storage for testing
lookup algorithms and measure the number of reads.
*/
import (
"context"
"fmt"
"sync"
"time"
"github.com/ethersphere/swarm/log"
"github.com/ethersphere/swarm/storage/feed/lookup"
)
// Data is a struct to keep a value to store/retrieve during testing
type Data struct {
Payload uint64
Time uint64
}
// String implements fmt.Stringer
func (d *Data) String() string {
return fmt.Sprintf("%d-%d", d.Payload, d.Time)
}
// Datamap is an internal map to hold the mocked storage
type DataMap map[lookup.EpochID]*Data
// StoreConfig allows to specify the simulated delays for each type of
// read operation
type StoreConfig struct {
CacheReadTime time.Duration // time it takes to read from the cache
FailedReadTime time.Duration // time it takes to acknowledge a read as failed
SuccessfulReadTime time.Duration // time it takes to fetch data
}
// StoreCounters will track read count metrics
type StoreCounters struct {
reads int
cacheHits int
failed int
successful int
canceled int
maxSimultaneous int
}
// Store simulates a store and keeps track of performance counters
type Store struct {
StoreConfig
StoreCounters
data DataMap
cache DataMap
lock sync.RWMutex
activeReads int
}
// NewStore returns a new mock store ready for use
func NewStore(config *StoreConfig) *Store {
store := &Store{
StoreConfig: *config,
data: make(DataMap),
}
store.Reset()
return store
}
// Reset reset performance counters and clears the cache
func (s *Store) Reset() {
s.cache = make(DataMap)
s.StoreCounters = StoreCounters{}
}
// Put stores a value in the mock store at the given epoch
func (s *Store) Put(epoch lookup.Epoch, value *Data) {
log.Debug("Write: %d-%d, value='%d'\n", epoch.Base(), epoch.Level, value.Payload)
s.data[epoch.ID()] = value
}
// Update runs the seed algorithm to place the update in the appropriate epoch
func (s *Store) Update(last lookup.Epoch, now uint64, value *Data) lookup.Epoch {
epoch := lookup.GetNextEpoch(last, now)
s.Put(epoch, value)
return epoch
}
// Get retrieves data at the specified epoch, simulating a delay
func (s *Store) Get(ctx context.Context, epoch lookup.Epoch, now uint64) (value interface{}, err error) {
epochID := epoch.ID()
var operationTime time.Duration
defer func() { // simulate a delay according to what has actually happened
select {
case <-lookup.TimeAfter(operationTime):
case <-ctx.Done():
s.lock.Lock()
s.canceled++
s.lock.Unlock()
value = nil
err = ctx.Err()
}
s.lock.Lock()
s.activeReads--
s.lock.Unlock()
}()
s.lock.Lock()
defer s.lock.Unlock()
s.reads++
s.activeReads++
if s.activeReads > s.maxSimultaneous {
s.maxSimultaneous = s.activeReads
}
// 1.- Simulate a cache read
item := s.cache[epochID]
operationTime += s.CacheReadTime
if item != nil {
s.cacheHits++
if item.Time <= now {
s.successful++
return item, nil
}
return nil, nil
}
// 2.- simulate a full read
item = s.data[epochID]
if item != nil {
operationTime += s.SuccessfulReadTime
s.successful++
s.cache[epochID] = item
if item.Time <= now {
return item, nil
}
} else {
operationTime += s.FailedReadTime
s.failed++
}
return nil, nil
}
// MakeReadFunc returns a read function suitable for the lookup algorithm, mapped
// to this mock storage
func (s *Store) MakeReadFunc() lookup.ReadFunc {
return func(ctx context.Context, epoch lookup.Epoch, now uint64) (interface{}, error) {
return s.Get(ctx, epoch, now)
}
}

View File

@ -0,0 +1,128 @@
package lookup_test
// This file contains simple time simulation tools for testing
// and measuring time-aware algorithms
import (
"sync"
"time"
)
// Timer tracks information about a simulated timer
type Timer struct {
deadline time.Time
signal chan time.Time
id int
}
// Stopwatch measures simulated execution time and manages simulated timers
type Stopwatch struct {
t time.Time
resolution time.Duration
timers map[int]*Timer
timerCounter int
stopSignal chan struct{}
lock sync.RWMutex
}
// NewStopwatch returns a simulated clock that ticks on `resolution` intervals
func NewStopwatch(resolution time.Duration) *Stopwatch {
s := &Stopwatch{
resolution: resolution,
}
s.Reset()
return s
}
// Reset clears all timers and sents the stopwatch to zero
func (s *Stopwatch) Reset() {
s.t = time.Time{}
s.timers = make(map[int]*Timer)
s.Stop()
}
// Tick advances simulated time by the stopwatch's resolution and triggers
// all due timers
func (s *Stopwatch) Tick() {
s.t = s.t.Add(s.resolution)
s.lock.Lock()
defer s.lock.Unlock()
for id, timer := range s.timers {
if s.t.After(timer.deadline) || s.t.Equal(timer.deadline) {
timer.signal <- s.t
close(timer.signal)
delete(s.timers, id)
}
}
}
// NewTimer returns a new timer that will trigger after `duration` elapses in the
// simulation
func (s *Stopwatch) NewTimer(duration time.Duration) <-chan time.Time {
s.lock.Lock()
defer s.lock.Unlock()
s.timerCounter++
timer := &Timer{
deadline: s.t.Add(duration),
signal: make(chan time.Time, 1),
id: s.timerCounter,
}
s.timers[timer.id] = timer
return timer.signal
}
// TimeAfter returns a simulated timer factory that can replace `time.After`
func (s *Stopwatch) TimeAfter() func(d time.Duration) <-chan time.Time {
return func(d time.Duration) <-chan time.Time {
return s.NewTimer(d)
}
}
// Elapsed returns the time that has passed in the simulation
func (s *Stopwatch) Elapsed() time.Duration {
return s.t.Sub(time.Time{})
}
// Run starts the time simulation
func (s *Stopwatch) Run() {
go func() {
stopSignal := make(chan struct{})
s.lock.Lock()
if s.stopSignal != nil {
close(s.stopSignal)
}
s.stopSignal = stopSignal
s.lock.Unlock()
for {
select {
case <-time.After(1 * time.Millisecond):
s.Tick()
case <-stopSignal:
return
}
}
}()
}
// Stop stops the time simulation
func (s *Stopwatch) Stop() {
s.lock.Lock()
defer s.lock.Unlock()
if s.stopSignal != nil {
close(s.stopSignal)
s.stopSignal = nil
}
}
func (s *Stopwatch) Measure(measuredFunc func()) time.Duration {
s.Reset()
s.Run()
defer s.Stop()
measuredFunc()
return s.Elapsed()
}

78
storage/feed/query.go Normal file
View File

@ -0,0 +1,78 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"fmt"
"strconv"
"github.com/ethereum/go-ethereum/common"
"github.com/ethersphere/swarm/storage/feed/lookup"
)
// Query is used to specify constraints when performing an update lookup
// TimeLimit indicates an upper bound for the search. Set to 0 for "now"
type Query struct {
Feed
Hint lookup.Epoch
TimeLimit uint64
}
// FromValues deserializes this instance from a string key-value store
// useful to parse query strings
func (q *Query) FromValues(values Values) error {
time, _ := strconv.ParseUint(values.Get("time"), 10, 64)
q.TimeLimit = time
level, _ := strconv.ParseUint(values.Get("hint.level"), 10, 32)
q.Hint.Level = uint8(level)
q.Hint.Time, _ = strconv.ParseUint(values.Get("hint.time"), 10, 64)
if q.Feed.User == (common.Address{}) {
return q.Feed.FromValues(values)
}
return nil
}
// AppendValues serializes this structure into the provided string key-value store
// useful to build query strings
func (q *Query) AppendValues(values Values) {
if q.TimeLimit != 0 {
values.Set("time", fmt.Sprintf("%d", q.TimeLimit))
}
if q.Hint.Level != 0 {
values.Set("hint.level", fmt.Sprintf("%d", q.Hint.Level))
}
if q.Hint.Time != 0 {
values.Set("hint.time", fmt.Sprintf("%d", q.Hint.Time))
}
q.Feed.AppendValues(values)
}
// NewQuery constructs an Query structure to find updates on or before `time`
// if time == 0, the latest update will be looked up
func NewQuery(feed *Feed, time uint64, hint lookup.Epoch) *Query {
return &Query{
TimeLimit: time,
Feed: *feed,
Hint: hint,
}
}
// NewQueryLatest generates lookup parameters that look for the latest update to a feed
func NewQueryLatest(feed *Feed, hint lookup.Epoch) *Query {
return NewQuery(feed, 0, hint)
}

View File

@ -0,0 +1,38 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"testing"
)
func getTestQuery() *Query {
id := getTestID()
return &Query{
TimeLimit: 5000,
Feed: id.Feed,
Hint: id.Epoch,
}
}
func TestQueryValues(t *testing.T) {
var expected = KV{"hint.level": "31", "hint.time": "1000", "time": "5000", "topic": "0x776f726c64206e657773207265706f72742c20657665727920686f7572000000", "user": "0x876A8936A7Cd0b79Ef0735AD0896c1AFe278781c"}
query := getTestQuery()
testValueSerializer(t, query, expected)
}

286
storage/feed/request.go Normal file
View File

@ -0,0 +1,286 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"bytes"
"encoding/json"
"hash"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethersphere/swarm/storage"
"github.com/ethersphere/swarm/storage/feed/lookup"
)
// Request represents a request to sign or signed feed update message
type Request struct {
Update // actual content that will be put on the chunk, less signature
Signature *Signature
idAddr storage.Address // cached chunk address for the update (not serialized, for internal use)
binaryData []byte // cached serialized data (does not get serialized again!, for efficiency/internal use)
}
// updateRequestJSON represents a JSON-serialized UpdateRequest
type updateRequestJSON struct {
ID
ProtocolVersion uint8 `json:"protocolVersion"`
Data string `json:"data,omitempty"`
Signature string `json:"signature,omitempty"`
}
// Request layout
// Update bytes
// SignatureLength bytes
const minimumSignedUpdateLength = minimumUpdateDataLength + signatureLength
// NewFirstRequest returns a ready to sign request to publish a first feed update
func NewFirstRequest(topic Topic) *Request {
request := new(Request)
// get the current time
now := TimestampProvider.Now().Time
request.Epoch = lookup.GetFirstEpoch(now)
request.Feed.Topic = topic
request.Header.Version = ProtocolVersion
return request
}
// SetData stores the payload data the feed update will be updated with
func (r *Request) SetData(data []byte) {
r.data = data
r.Signature = nil
}
// IsUpdate returns true if this request models a signed update or otherwise it is a signature request
func (r *Request) IsUpdate() bool {
return r.Signature != nil
}
// Verify checks that signatures are valid
func (r *Request) Verify() (err error) {
if len(r.data) == 0 {
return NewError(ErrInvalidValue, "Update does not contain data")
}
if r.Signature == nil {
return NewError(ErrInvalidSignature, "Missing signature field")
}
digest, err := r.GetDigest()
if err != nil {
return err
}
// get the address of the signer (which also checks that it's a valid signature)
r.Feed.User, err = getUserAddr(digest, *r.Signature)
if err != nil {
return err
}
// check that the lookup information contained in the chunk matches the updateAddr (chunk search key)
// that was used to retrieve this chunk
// if this validation fails, someone forged a chunk.
if !bytes.Equal(r.idAddr, r.Addr()) {
return NewError(ErrInvalidSignature, "Signature address does not match with update user address")
}
return nil
}
// Sign executes the signature to validate the update message
func (r *Request) Sign(signer Signer) error {
r.Feed.User = signer.Address()
r.binaryData = nil //invalidate serialized data
digest, err := r.GetDigest() // computes digest and serializes into .binaryData
if err != nil {
return err
}
signature, err := signer.Sign(digest)
if err != nil {
return err
}
// Although the Signer interface returns the public address of the signer,
// recover it from the signature to see if they match
userAddr, err := getUserAddr(digest, signature)
if err != nil {
return NewError(ErrInvalidSignature, "Error verifying signature")
}
if userAddr != signer.Address() { // sanity check to make sure the Signer is declaring the same address used to sign!
return NewError(ErrInvalidSignature, "Signer address does not match update user address")
}
r.Signature = &signature
r.idAddr = r.Addr()
return nil
}
// GetDigest creates the feed update digest used in signatures
// the serialized payload is cached in .binaryData
func (r *Request) GetDigest() (result common.Hash, err error) {
hasher := hashPool.Get().(hash.Hash)
defer hashPool.Put(hasher)
hasher.Reset()
dataLength := r.Update.binaryLength()
if r.binaryData == nil {
r.binaryData = make([]byte, dataLength+signatureLength)
if err := r.Update.binaryPut(r.binaryData[:dataLength]); err != nil {
return result, err
}
}
hasher.Write(r.binaryData[:dataLength]) //everything except the signature.
return common.BytesToHash(hasher.Sum(nil)), nil
}
// create an update chunk.
func (r *Request) toChunk() (storage.Chunk, error) {
// Check that the update is signed and serialized
// For efficiency, data is serialized during signature and cached in
// the binaryData field when computing the signature digest in .getDigest()
if r.Signature == nil || r.binaryData == nil {
return nil, NewError(ErrInvalidSignature, "toChunk called without a valid signature or payload data. Call .Sign() first.")
}
updateLength := r.Update.binaryLength()
// signature is the last item in the chunk data
copy(r.binaryData[updateLength:], r.Signature[:])
chunk := storage.NewChunk(r.idAddr, r.binaryData)
return chunk, nil
}
// fromChunk populates this structure from chunk data. It does not verify the signature is valid.
func (r *Request) fromChunk(chunk storage.Chunk) error {
// for update chunk layout see Request definition
chunkdata := chunk.Data()
//deserialize the feed update portion
if err := r.Update.binaryGet(chunkdata[:len(chunkdata)-signatureLength]); err != nil {
return err
}
// Extract the signature
var signature *Signature
cursor := r.Update.binaryLength()
sigdata := chunkdata[cursor : cursor+signatureLength]
if len(sigdata) > 0 {
signature = &Signature{}
copy(signature[:], sigdata)
}
r.Signature = signature
r.idAddr = chunk.Address()
r.binaryData = chunkdata
return nil
}
// FromValues deserializes this instance from a string key-value store
// useful to parse query strings
func (r *Request) FromValues(values Values, data []byte) error {
signatureBytes, err := hexutil.Decode(values.Get("signature"))
if err != nil {
r.Signature = nil
} else {
if len(signatureBytes) != signatureLength {
return NewError(ErrInvalidSignature, "Incorrect signature length")
}
r.Signature = new(Signature)
copy(r.Signature[:], signatureBytes)
}
err = r.Update.FromValues(values, data)
if err != nil {
return err
}
r.idAddr = r.Addr()
return err
}
// AppendValues serializes this structure into the provided string key-value store
// useful to build query strings
func (r *Request) AppendValues(values Values) []byte {
if r.Signature != nil {
values.Set("signature", hexutil.Encode(r.Signature[:]))
}
return r.Update.AppendValues(values)
}
// fromJSON takes an update request JSON and populates an UpdateRequest
func (r *Request) fromJSON(j *updateRequestJSON) error {
r.ID = j.ID
r.Header.Version = j.ProtocolVersion
var err error
if j.Data != "" {
r.data, err = hexutil.Decode(j.Data)
if err != nil {
return NewError(ErrInvalidValue, "Cannot decode data")
}
}
if j.Signature != "" {
sigBytes, err := hexutil.Decode(j.Signature)
if err != nil || len(sigBytes) != signatureLength {
return NewError(ErrInvalidSignature, "Cannot decode signature")
}
r.Signature = new(Signature)
r.idAddr = r.Addr()
copy(r.Signature[:], sigBytes)
}
return nil
}
// UnmarshalJSON takes a JSON structure stored in a byte array and populates the Request object
// Implements json.Unmarshaler interface
func (r *Request) UnmarshalJSON(rawData []byte) error {
var requestJSON updateRequestJSON
if err := json.Unmarshal(rawData, &requestJSON); err != nil {
return err
}
return r.fromJSON(&requestJSON)
}
// MarshalJSON takes an update request and encodes it as a JSON structure into a byte array
// Implements json.Marshaler interface
func (r *Request) MarshalJSON() (rawData []byte, err error) {
var signatureString, dataString string
if r.Signature != nil {
signatureString = hexutil.Encode(r.Signature[:])
}
if r.data != nil {
dataString = hexutil.Encode(r.data)
}
requestJSON := &updateRequestJSON{
ID: r.ID,
ProtocolVersion: r.Header.Version,
Data: dataString,
Signature: signatureString,
}
return json.Marshal(requestJSON)
}

View File

@ -0,0 +1,312 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"reflect"
"testing"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethersphere/swarm/storage"
"github.com/ethersphere/swarm/storage/feed/lookup"
)
func areEqualJSON(s1, s2 string) (bool, error) {
//credit for the trick: turtlemonvh https://gist.github.com/turtlemonvh/e4f7404e28387fadb8ad275a99596f67
var o1 interface{}
var o2 interface{}
err := json.Unmarshal([]byte(s1), &o1)
if err != nil {
return false, fmt.Errorf("Error mashalling string 1 :: %s", err.Error())
}
err = json.Unmarshal([]byte(s2), &o2)
if err != nil {
return false, fmt.Errorf("Error mashalling string 2 :: %s", err.Error())
}
return reflect.DeepEqual(o1, o2), nil
}
// TestEncodingDecodingUpdateRequests ensures that requests are serialized properly
// while also checking cryptographically that only the owner of a feed can update it.
func TestEncodingDecodingUpdateRequests(t *testing.T) {
charlie := newCharlieSigner() //Charlie
bob := newBobSigner() //Bob
// Create a feed to our good guy Charlie's name
topic, _ := NewTopic("a good topic name", nil)
firstRequest := NewFirstRequest(topic)
firstRequest.User = charlie.Address()
// We now encode the create message to simulate we send it over the wire
messageRawData, err := firstRequest.MarshalJSON()
if err != nil {
t.Fatalf("Error encoding first feed update request: %s", err)
}
// ... the message arrives and is decoded...
var recoveredFirstRequest Request
if err := recoveredFirstRequest.UnmarshalJSON(messageRawData); err != nil {
t.Fatalf("Error decoding first feed update request: %s", err)
}
// ... but verification should fail because it is not signed!
if err := recoveredFirstRequest.Verify(); err == nil {
t.Fatal("Expected Verify to fail since the message is not signed")
}
// We now assume that the feed ypdate was created and propagated.
const expectedSignature = "0x7235b27a68372ddebcf78eba48543fa460864b0b0e99cb533fcd3664820e603312d29426dd00fb39628f5299480a69bf6e462838d78de49ce0704c754c9deb2601"
const expectedJSON = `{"feed":{"topic":"0x6120676f6f6420746f706963206e616d65000000000000000000000000000000","user":"0x876a8936a7cd0b79ef0735ad0896c1afe278781c"},"epoch":{"time":1000,"level":1},"protocolVersion":0,"data":"0x5468697320686f75722773207570646174653a20537761726d2039392e3020686173206265656e2072656c656173656421"}`
//Put together an unsigned update request that we will serialize to send it to the signer.
data := []byte("This hour's update: Swarm 99.0 has been released!")
request := &Request{
Update: Update{
ID: ID{
Epoch: lookup.Epoch{
Time: 1000,
Level: 1,
},
Feed: firstRequest.Update.Feed,
},
data: data,
},
}
messageRawData, err = request.MarshalJSON()
if err != nil {
t.Fatalf("Error encoding update request: %s", err)
}
equalJSON, err := areEqualJSON(string(messageRawData), expectedJSON)
if err != nil {
t.Fatalf("Error decoding update request JSON: %s", err)
}
if !equalJSON {
t.Fatalf("Received a different JSON message. Expected %s, got %s", expectedJSON, string(messageRawData))
}
// now the encoded message messageRawData is sent over the wire and arrives to the signer
//Attempt to extract an UpdateRequest out of the encoded message
var recoveredRequest Request
if err := recoveredRequest.UnmarshalJSON(messageRawData); err != nil {
t.Fatalf("Error decoding update request: %s", err)
}
//sign the request and see if it matches our predefined signature above.
if err := recoveredRequest.Sign(charlie); err != nil {
t.Fatalf("Error signing request: %s", err)
}
compareByteSliceToExpectedHex(t, "signature", recoveredRequest.Signature[:], expectedSignature)
// mess with the signature and see what happens. To alter the signature, we briefly decode it as JSON
// to alter the signature field.
var j updateRequestJSON
if err := json.Unmarshal([]byte(expectedJSON), &j); err != nil {
t.Fatal("Error unmarshalling test json, check expectedJSON constant")
}
j.Signature = "Certainly not a signature"
corruptMessage, _ := json.Marshal(j) // encode the message with the bad signature
var corruptRequest Request
if err = corruptRequest.UnmarshalJSON(corruptMessage); err == nil {
t.Fatal("Expected DecodeUpdateRequest to fail when trying to interpret a corrupt message with an invalid signature")
}
// Now imagine Bob wants to create an update of his own about the same feed,
// signing a message with his private key
if err := request.Sign(bob); err != nil {
t.Fatalf("Error signing: %s", err)
}
// Now Bob encodes the message to send it over the wire...
messageRawData, err = request.MarshalJSON()
if err != nil {
t.Fatalf("Error encoding message:%s", err)
}
// ... the message arrives to our Swarm node and it is decoded.
recoveredRequest = Request{}
if err := recoveredRequest.UnmarshalJSON(messageRawData); err != nil {
t.Fatalf("Error decoding message:%s", err)
}
// Before checking what happened with Bob's update, let's see what would happen if we mess
// with the signature big time to see if Verify catches it
savedSignature := *recoveredRequest.Signature // save the signature for later
binary.LittleEndian.PutUint64(recoveredRequest.Signature[5:], 556845463424) // write some random data to break the signature
if err = recoveredRequest.Verify(); err == nil {
t.Fatal("Expected Verify to fail on corrupt signature")
}
// restore the Bob's signature from corruption
*recoveredRequest.Signature = savedSignature
// Now the signature is not corrupt
if err = recoveredRequest.Verify(); err != nil {
t.Fatal(err)
}
// Reuse object and sign with our friend Charlie's private key
if err := recoveredRequest.Sign(charlie); err != nil {
t.Fatalf("Error signing with the correct private key: %s", err)
}
// And now, Verify should work since this update now belongs to Charlie
if err = recoveredRequest.Verify(); err != nil {
t.Fatalf("Error verifying that Charlie, can sign a reused request object:%s", err)
}
// mess with the lookup key to make sure Verify fails:
recoveredRequest.Time = 77999 // this will alter the lookup key
if err = recoveredRequest.Verify(); err == nil {
t.Fatalf("Expected Verify to fail since the lookup key has been altered")
}
}
func getTestRequest() *Request {
return &Request{
Update: *getTestFeedUpdate(),
}
}
func TestUpdateChunkSerializationErrorChecking(t *testing.T) {
// Test that parseUpdate fails if the chunk is too small
var r Request
if err := r.fromChunk(storage.NewChunk(storage.ZeroAddr, make([]byte, minimumUpdateDataLength-1+signatureLength))); err == nil {
t.Fatalf("Expected request.fromChunk to fail when chunkData contains less than %d bytes", minimumUpdateDataLength)
}
r = *getTestRequest()
_, err := r.toChunk()
if err == nil {
t.Fatal("Expected request.toChunk to fail when there is no data")
}
r.data = []byte("Al bien hacer jamás le falta premio") // put some arbitrary length data
_, err = r.toChunk()
if err == nil {
t.Fatal("expected request.toChunk to fail when there is no signature")
}
charlie := newCharlieSigner()
if err := r.Sign(charlie); err != nil {
t.Fatalf("error signing:%s", err)
}
chunk, err := r.toChunk()
if err != nil {
t.Fatalf("error creating update chunk:%s", err)
}
compareByteSliceToExpectedHex(t, "chunk", chunk.Data(), "0x0000000000000000776f726c64206e657773207265706f72742c20657665727920686f7572000000876a8936a7cd0b79ef0735ad0896c1afe278781ce80300000000001f416c206269656e206861636572206a616dc3a173206c652066616c7461207072656d696f9896df5937e64e51a7994479ff3fe0ed790d539b9b3e85e93c0014a8a64374f23603c79d16e99b50a757896d3816d7022ac594ad1415679a9b164afb2e5926d801")
var recovered Request
recovered.fromChunk(chunk)
if !reflect.DeepEqual(recovered, r) {
t.Fatal("Expected recovered feed update request to equal the original one")
}
}
// check that signature address matches update signer address
func TestReverse(t *testing.T) {
epoch := lookup.Epoch{
Time: 7888,
Level: 6,
}
// make fake timeProvider
timeProvider := &fakeTimeProvider{
currentTime: startTime.Time,
}
// signer containing private key
signer := newAliceSigner()
// set up rpc and create feeds handler
_, _, teardownTest, err := setupTest(timeProvider, signer)
if err != nil {
t.Fatal(err)
}
defer teardownTest()
topic, _ := NewTopic("Cervantes quotes", nil)
fd := Feed{
Topic: topic,
User: signer.Address(),
}
data := []byte("Donde una puerta se cierra, otra se abre")
request := new(Request)
request.Feed = fd
request.Epoch = epoch
request.data = data
// generate a chunk key for this request
key := request.Addr()
if err = request.Sign(signer); err != nil {
t.Fatal(err)
}
chunk, err := request.toChunk()
if err != nil {
t.Fatal(err)
}
// check that we can recover the owner account from the update chunk's signature
var checkUpdate Request
if err := checkUpdate.fromChunk(chunk); err != nil {
t.Fatal(err)
}
checkdigest, err := checkUpdate.GetDigest()
if err != nil {
t.Fatal(err)
}
recoveredAddr, err := getUserAddr(checkdigest, *checkUpdate.Signature)
if err != nil {
t.Fatalf("Retrieve address from signature fail: %v", err)
}
originalAddr := crypto.PubkeyToAddress(signer.PrivKey.PublicKey)
// check that the metadata retrieved from the chunk matches what we gave it
if recoveredAddr != originalAddr {
t.Fatalf("addresses dont match: %x != %x", originalAddr, recoveredAddr)
}
if !bytes.Equal(key[:], chunk.Address()[:]) {
t.Fatalf("Expected chunk key '%x', was '%x'", key, chunk.Address())
}
if epoch != checkUpdate.Epoch {
t.Fatalf("Expected epoch to be '%s', was '%s'", epoch.String(), checkUpdate.Epoch.String())
}
if !bytes.Equal(data, checkUpdate.data) {
t.Fatalf("Expected data '%x', was '%x'", data, checkUpdate.data)
}
}

75
storage/feed/sign.go Normal file
View File

@ -0,0 +1,75 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"crypto/ecdsa"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
)
const signatureLength = 65
// Signature is an alias for a static byte array with the size of a signature
type Signature [signatureLength]byte
// Signer signs feed update payloads
type Signer interface {
Sign(common.Hash) (Signature, error)
Address() common.Address
}
// GenericSigner implements the Signer interface
// It is the vanilla signer that probably should be used in most cases
type GenericSigner struct {
PrivKey *ecdsa.PrivateKey
address common.Address
}
// NewGenericSigner builds a signer that will sign everything with the provided private key
func NewGenericSigner(privKey *ecdsa.PrivateKey) *GenericSigner {
return &GenericSigner{
PrivKey: privKey,
address: crypto.PubkeyToAddress(privKey.PublicKey),
}
}
// Sign signs the supplied data
// It wraps the ethereum crypto.Sign() method
func (s *GenericSigner) Sign(data common.Hash) (signature Signature, err error) {
signaturebytes, err := crypto.Sign(data.Bytes(), s.PrivKey)
if err != nil {
return
}
copy(signature[:], signaturebytes)
return
}
// Address returns the public key of the signer's private key
func (s *GenericSigner) Address() common.Address {
return s.address
}
// getUserAddr extracts the address of the feed update signer
func getUserAddr(digest common.Hash, signature Signature) (common.Address, error) {
pub, err := crypto.SigToPub(digest.Bytes(), signature[:])
if err != nil {
return common.Address{}, err
}
return crypto.PubkeyToAddress(*pub), nil
}

72
storage/feed/testutil.go Normal file
View File

@ -0,0 +1,72 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"context"
"path/filepath"
"sync"
"github.com/ethereum/go-ethereum/p2p/enode"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/storage"
"github.com/ethersphere/swarm/storage/localstore"
)
const (
testDbDirName = "feeds"
)
type TestHandler struct {
*Handler
}
func (t *TestHandler) Close() {
t.chunkStore.Close()
}
type mockNetFetcher struct{}
func (m *mockNetFetcher) Request(hopCount uint8) {
}
func (m *mockNetFetcher) Offer(source *enode.ID) {
}
func newFakeNetFetcher(context.Context, storage.Address, *sync.Map) storage.NetFetcher {
return &mockNetFetcher{}
}
// NewTestHandler creates Handler object to be used for testing purposes.
func NewTestHandler(datadir string, params *HandlerParams) (*TestHandler, error) {
path := filepath.Join(datadir, testDbDirName)
fh := NewHandler(params)
db, err := localstore.New(filepath.Join(path, "chunks"), make([]byte, 32), nil)
if err != nil {
return nil, err
}
localStore := chunk.NewValidatorStore(db, storage.NewContentAddressValidator(storage.MakeHashFunc(feedsHashAlgorithm)), fh)
netStore, err := storage.NewNetStore(localStore, nil)
if err != nil {
return nil, err
}
netStore.NewNetFetcherFunc = newFakeNetFetcher
fh.SetStore(netStore)
return &TestHandler{fh}, nil
}

View File

@ -0,0 +1,62 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"encoding/json"
"time"
)
// TimestampProvider sets the time source of the feeds package
var TimestampProvider timestampProvider = NewDefaultTimestampProvider()
// Timestamp encodes a point in time as a Unix epoch
type Timestamp struct {
Time uint64 `json:"time"` // Unix epoch timestamp, in seconds
}
// timestampProvider interface describes a source of timestamp information
type timestampProvider interface {
Now() Timestamp // returns the current timestamp information
}
// UnmarshalJSON implements the json.Unmarshaller interface
func (t *Timestamp) UnmarshalJSON(data []byte) error {
return json.Unmarshal(data, &t.Time)
}
// MarshalJSON implements the json.Marshaller interface
func (t *Timestamp) MarshalJSON() ([]byte, error) {
return json.Marshal(t.Time)
}
// DefaultTimestampProvider is a TimestampProvider that uses system time
// as time source
type DefaultTimestampProvider struct {
}
// NewDefaultTimestampProvider creates a system clock based timestamp provider
func NewDefaultTimestampProvider() *DefaultTimestampProvider {
return &DefaultTimestampProvider{}
}
// Now returns the current time according to this provider
func (dtp *DefaultTimestampProvider) Now() Timestamp {
return Timestamp{
Time: uint64(time.Now().Unix()),
}
}

105
storage/feed/topic.go Normal file
View File

@ -0,0 +1,105 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"bytes"
"encoding/json"
"fmt"
"github.com/ethereum/go-ethereum/common/bitutil"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethersphere/swarm/storage"
)
// TopicLength establishes the max length of a topic string
const TopicLength = storage.AddressLength
// Topic represents what a feed is about
type Topic [TopicLength]byte
// ErrTopicTooLong is returned when creating a topic with a name/related content too long
var ErrTopicTooLong = fmt.Errorf("Topic is too long. Max length is %d", TopicLength)
// NewTopic creates a new topic from a provided name and "related content" byte array,
// merging the two together.
// If relatedContent or name are longer than TopicLength, they will be truncated and an error returned
// name can be an empty string
// relatedContent can be nil
func NewTopic(name string, relatedContent []byte) (topic Topic, err error) {
if relatedContent != nil {
contentLength := len(relatedContent)
if contentLength > TopicLength {
contentLength = TopicLength
err = ErrTopicTooLong
}
copy(topic[:], relatedContent[:contentLength])
}
nameBytes := []byte(name)
nameLength := len(nameBytes)
if nameLength > TopicLength {
nameLength = TopicLength
err = ErrTopicTooLong
}
bitutil.XORBytes(topic[:], topic[:], nameBytes[:nameLength])
return topic, err
}
// Hex will return the topic encoded as an hex string
func (t *Topic) Hex() string {
return hexutil.Encode(t[:])
}
// FromHex will parse a hex string into this Topic instance
func (t *Topic) FromHex(hex string) error {
bytes, err := hexutil.Decode(hex)
if err != nil || len(bytes) != len(t) {
return NewErrorf(ErrInvalidValue, "Cannot decode topic")
}
copy(t[:], bytes)
return nil
}
// Name will try to extract the topic name out of the Topic
func (t *Topic) Name(relatedContent []byte) string {
nameBytes := *t
if relatedContent != nil {
contentLength := len(relatedContent)
if contentLength > TopicLength {
contentLength = TopicLength
}
bitutil.XORBytes(nameBytes[:], t[:], relatedContent[:contentLength])
}
z := bytes.IndexByte(nameBytes[:], 0)
if z < 0 {
z = TopicLength
}
return string(nameBytes[:z])
}
// UnmarshalJSON implements the json.Unmarshaller interface
func (t *Topic) UnmarshalJSON(data []byte) error {
var hex string
json.Unmarshal(data, &hex)
return t.FromHex(hex)
}
// MarshalJSON implements the json.Marshaller interface
func (t *Topic) MarshalJSON() ([]byte, error) {
return json.Marshal(t.Hex())
}

View File

@ -0,0 +1,50 @@
package feed
import (
"testing"
"github.com/ethereum/go-ethereum/common/hexutil"
)
func TestTopic(t *testing.T) {
related, _ := hexutil.Decode("0xabcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789")
topicName := "test-topic"
topic, _ := NewTopic(topicName, related)
hex := topic.Hex()
expectedHex := "0xdfa89c750e3108f9c2aeef0123456789abcdef0123456789abcdef0123456789"
if hex != expectedHex {
t.Fatalf("Expected %s, got %s", expectedHex, hex)
}
var topic2 Topic
topic2.FromHex(hex)
if topic2 != topic {
t.Fatal("Expected recovered topic to be equal to original one")
}
if topic2.Name(related) != topicName {
t.Fatal("Retrieved name does not match")
}
bytes, err := topic2.MarshalJSON()
if err != nil {
t.Fatal(err)
}
expectedJSON := `"0xdfa89c750e3108f9c2aeef0123456789abcdef0123456789abcdef0123456789"`
equal, err := areEqualJSON(expectedJSON, string(bytes))
if err != nil {
t.Fatal(err)
}
if !equal {
t.Fatalf("Expected JSON to be %s, got %s", expectedJSON, string(bytes))
}
err = topic2.UnmarshalJSON(bytes)
if err != nil {
t.Fatal(err)
}
if topic2 != topic {
t.Fatal("Expected recovered topic to be equal to original one")
}
}

134
storage/feed/update.go Normal file
View File

@ -0,0 +1,134 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"fmt"
"strconv"
"github.com/ethersphere/swarm/chunk"
)
// ProtocolVersion defines the current version of the protocol that will be included in each update message
const ProtocolVersion uint8 = 0
const headerLength = 8
// Header defines a update message header including a protocol version byte
type Header struct {
Version uint8 // Protocol version
Padding [headerLength - 1]uint8 // reserved for future use
}
// Update encapsulates the information sent as part of a feed update
type Update struct {
Header Header //
ID // Feed Update identifying information
data []byte // actual data payload
}
const minimumUpdateDataLength = idLength + headerLength + 1
//MaxUpdateDataLength indicates the maximum payload size for a feed update
const MaxUpdateDataLength = chunk.DefaultSize - signatureLength - idLength - headerLength
// binaryPut serializes the feed update information into the given slice
func (r *Update) binaryPut(serializedData []byte) error {
datalength := len(r.data)
if datalength == 0 {
return NewError(ErrInvalidValue, "a feed update must contain data")
}
if datalength > MaxUpdateDataLength {
return NewErrorf(ErrInvalidValue, "feed update data is too big (length=%d). Max length=%d", datalength, MaxUpdateDataLength)
}
if len(serializedData) != r.binaryLength() {
return NewErrorf(ErrInvalidValue, "slice passed to putBinary must be of exact size. Expected %d bytes", r.binaryLength())
}
var cursor int
// serialize Header
serializedData[cursor] = r.Header.Version
copy(serializedData[cursor+1:headerLength], r.Header.Padding[:headerLength-1])
cursor += headerLength
// serialize ID
if err := r.ID.binaryPut(serializedData[cursor : cursor+idLength]); err != nil {
return err
}
cursor += idLength
// add the data
copy(serializedData[cursor:], r.data)
cursor += datalength
return nil
}
// binaryLength returns the expected number of bytes this structure will take to encode
func (r *Update) binaryLength() int {
return idLength + headerLength + len(r.data)
}
// binaryGet populates this instance from the information contained in the passed byte slice
func (r *Update) binaryGet(serializedData []byte) error {
if len(serializedData) < minimumUpdateDataLength {
return NewErrorf(ErrNothingToReturn, "chunk less than %d bytes cannot be a feed update chunk", minimumUpdateDataLength)
}
dataLength := len(serializedData) - idLength - headerLength
// at this point we can be satisfied that we have the correct data length to read
var cursor int
// deserialize Header
r.Header.Version = serializedData[cursor] // extract the protocol version
copy(r.Header.Padding[:headerLength-1], serializedData[cursor+1:headerLength]) // extract the padding
cursor += headerLength
if err := r.ID.binaryGet(serializedData[cursor : cursor+idLength]); err != nil {
return err
}
cursor += idLength
data := serializedData[cursor : cursor+dataLength]
cursor += dataLength
// now that all checks have passed, copy data into structure
r.data = make([]byte, dataLength)
copy(r.data, data)
return nil
}
// FromValues deserializes this instance from a string key-value store
// useful to parse query strings
func (r *Update) FromValues(values Values, data []byte) error {
r.data = data
version, _ := strconv.ParseUint(values.Get("protocolVersion"), 10, 32)
r.Header.Version = uint8(version)
return r.ID.FromValues(values)
}
// AppendValues serializes this structure into the provided string key-value store
// useful to build query strings
func (r *Update) AppendValues(values Values) []byte {
r.ID.AppendValues(values)
values.Set("protocolVersion", fmt.Sprintf("%d", r.Header.Version))
return r.data
}

View File

@ -0,0 +1,50 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package feed
import (
"testing"
)
func getTestFeedUpdate() *Update {
return &Update{
ID: *getTestID(),
data: []byte("El que lee mucho y anda mucho, ve mucho y sabe mucho"),
}
}
func TestUpdateSerializer(t *testing.T) {
testBinarySerializerRecovery(t, getTestFeedUpdate(), "0x0000000000000000776f726c64206e657773207265706f72742c20657665727920686f7572000000876a8936a7cd0b79ef0735ad0896c1afe278781ce80300000000001f456c20717565206c6565206d7563686f207920616e6461206d7563686f2c207665206d7563686f20792073616265206d7563686f")
}
func TestUpdateLengthCheck(t *testing.T) {
testBinarySerializerLengthCheck(t, getTestFeedUpdate())
// Test fail if update is too big
update := getTestFeedUpdate()
update.data = make([]byte, MaxUpdateDataLength+100)
serialized := make([]byte, update.binaryLength())
if err := update.binaryPut(serialized); err == nil {
t.Fatal("Expected update.binaryPut to fail since update is too big")
}
// test fail if data is empty or nil
update.data = nil
serialized = make([]byte, update.binaryLength())
if err := update.binaryPut(serialized); err == nil {
t.Fatal("Expected update.binaryPut to fail since data is empty")
}
}

163
storage/filestore.go Normal file
View File

@ -0,0 +1,163 @@
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"context"
"io"
"sort"
"sync"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/storage/localstore"
)
/*
FileStore provides the client API entrypoints Store and Retrieve to store and retrieve
It can store anything that has a byte slice representation, so files or serialised objects etc.
Storage: FileStore calls the Chunker to segment the input datastream of any size to a merkle hashed tree of chunks. The key of the root block is returned to the client.
Retrieval: given the key of the root block, the FileStore retrieves the block chunks and reconstructs the original data and passes it back as a lazy reader. A lazy reader is a reader with on-demand delayed processing, i.e. the chunks needed to reconstruct a large file are only fetched and processed if that particular part of the document is actually read.
As the chunker produces chunks, FileStore dispatches them to its own chunk store
implementation for storage or retrieval.
*/
const (
defaultLDBCapacity = 5000000 // capacity for LevelDB, by default 5*10^6*4096 bytes == 20GB
defaultCacheCapacity = 10000 // capacity for in-memory chunks' cache
defaultChunkRequestsCacheCapacity = 5000000 // capacity for container holding outgoing requests for chunks. should be set to LevelDB capacity
)
type FileStore struct {
ChunkStore
hashFunc SwarmHasher
tags *chunk.Tags
}
type FileStoreParams struct {
Hash string
}
func NewFileStoreParams() *FileStoreParams {
return &FileStoreParams{
Hash: DefaultHash,
}
}
// for testing locally
func NewLocalFileStore(datadir string, basekey []byte, tags *chunk.Tags) (*FileStore, error) {
localStore, err := localstore.New(datadir, basekey, nil)
if err != nil {
return nil, err
}
return NewFileStore(chunk.NewValidatorStore(localStore, NewContentAddressValidator(MakeHashFunc(DefaultHash))), NewFileStoreParams(), tags), nil
}
func NewFileStore(store ChunkStore, params *FileStoreParams, tags *chunk.Tags) *FileStore {
hashFunc := MakeHashFunc(params.Hash)
return &FileStore{
ChunkStore: store,
hashFunc: hashFunc,
tags: tags,
}
}
// Retrieve is a public API. Main entry point for document retrieval directly. Used by the
// FS-aware API and httpaccess
// Chunk retrieval blocks on netStore requests with a timeout so reader will
// report error if retrieval of chunks within requested range time out.
// It returns a reader with the chunk data and whether the content was encrypted
func (f *FileStore) Retrieve(ctx context.Context, addr Address) (reader *LazyChunkReader, isEncrypted bool) {
isEncrypted = len(addr) > f.hashFunc().Size()
tag, err := f.tags.GetFromContext(ctx)
if err != nil {
tag = chunk.NewTag(0, "ephemeral-retrieval-tag", 0)
}
getter := NewHasherStore(f.ChunkStore, f.hashFunc, isEncrypted, tag)
reader = TreeJoin(ctx, addr, getter, 0)
return
}
// Store is a public API. Main entry point for document storage directly. Used by the
// FS-aware API and httpaccess
func (f *FileStore) Store(ctx context.Context, data io.Reader, size int64, toEncrypt bool) (addr Address, wait func(context.Context) error, err error) {
tag, err := f.tags.GetFromContext(ctx)
if err != nil {
// some of the parts of the codebase, namely the manifest trie, do not store the context
// of the original request nor the tag with the trie, recalculating the trie hence
// loses the tag uid. thus we create an ephemeral tag here for that purpose
tag = chunk.NewTag(0, "", 0)
//return nil, nil, err
}
putter := NewHasherStore(f.ChunkStore, f.hashFunc, toEncrypt, tag)
return PyramidSplit(ctx, data, putter, putter, tag)
}
func (f *FileStore) HashSize() int {
return f.hashFunc().Size()
}
// GetAllReferences is a public API. This endpoint returns all chunk hashes (only) for a given file
func (f *FileStore) GetAllReferences(ctx context.Context, data io.Reader, toEncrypt bool) (addrs AddressCollection, err error) {
tag := chunk.NewTag(0, "ephemeral-tag", 0) //this tag is just a mock ephemeral tag since we don't want to save these results
// create a special kind of putter, which only will store the references
putter := &hashExplorer{
hasherStore: NewHasherStore(f.ChunkStore, f.hashFunc, toEncrypt, tag),
}
// do the actual splitting anyway, no way around it
_, wait, err := PyramidSplit(ctx, data, putter, putter, tag)
if err != nil {
return nil, err
}
// wait for splitting to be complete and all chunks processed
err = wait(ctx)
if err != nil {
return nil, err
}
// collect all references
addrs = NewAddressCollection(0)
for _, ref := range putter.references {
addrs = append(addrs, Address(ref))
}
sort.Sort(addrs)
return addrs, nil
}
// hashExplorer is a special kind of putter which will only store chunk references
type hashExplorer struct {
*hasherStore
references []Reference
lock sync.Mutex
}
// HashExplorer's Put will add just the chunk hashes to its `References`
func (he *hashExplorer) Put(ctx context.Context, chunkData ChunkData) (Reference, error) {
// Need to do the actual Put, which returns the references
ref, err := he.hasherStore.Put(ctx, chunkData)
if err != nil {
return nil, err
}
// internally store the reference
he.lock.Lock()
he.references = append(he.references, ref)
he.lock.Unlock()
return ref, nil
}

202
storage/filestore_test.go Normal file
View File

@ -0,0 +1,202 @@
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"bytes"
"context"
"io"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/storage/localstore"
"github.com/ethersphere/swarm/testutil"
)
const testDataSize = 0x0001000
func TestFileStorerandom(t *testing.T) {
testFileStoreRandom(false, t)
testFileStoreRandom(true, t)
}
func testFileStoreRandom(toEncrypt bool, t *testing.T) {
dir, err := ioutil.TempDir("", "swarm-storage-")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
localStore, err := localstore.New(dir, make([]byte, 32), nil)
if err != nil {
t.Fatal(err)
}
defer localStore.Close()
fileStore := NewFileStore(localStore, NewFileStoreParams(), chunk.NewTags())
slice := testutil.RandomBytes(1, testDataSize)
ctx := context.TODO()
key, wait, err := fileStore.Store(ctx, bytes.NewReader(slice), testDataSize, toEncrypt)
if err != nil {
t.Fatalf("Store error: %v", err)
}
err = wait(ctx)
if err != nil {
t.Fatalf("Store waitt error: %v", err.Error())
}
resultReader, isEncrypted := fileStore.Retrieve(context.TODO(), key)
if isEncrypted != toEncrypt {
t.Fatalf("isEncrypted expected %v got %v", toEncrypt, isEncrypted)
}
resultSlice := make([]byte, testDataSize)
n, err := resultReader.ReadAt(resultSlice, 0)
if err != io.EOF {
t.Fatalf("Retrieve error: %v", err)
}
if n != testDataSize {
t.Fatalf("Slice size error got %d, expected %d.", n, testDataSize)
}
if !bytes.Equal(slice, resultSlice) {
t.Fatalf("Comparison error.")
}
ioutil.WriteFile(filepath.Join(dir, "slice.bzz.16M"), slice, 0666)
ioutil.WriteFile(filepath.Join(dir, "result.bzz.16M"), resultSlice, 0666)
resultReader, isEncrypted = fileStore.Retrieve(context.TODO(), key)
if isEncrypted != toEncrypt {
t.Fatalf("isEncrypted expected %v got %v", toEncrypt, isEncrypted)
}
for i := range resultSlice {
resultSlice[i] = 0
}
n, err = resultReader.ReadAt(resultSlice, 0)
if err != io.EOF {
t.Fatalf("Retrieve error after removing memStore: %v", err)
}
if n != len(slice) {
t.Fatalf("Slice size error after removing memStore got %d, expected %d.", n, len(slice))
}
if !bytes.Equal(slice, resultSlice) {
t.Fatalf("Comparison error after removing memStore.")
}
}
func TestFileStoreCapacity(t *testing.T) {
testFileStoreCapacity(false, t)
testFileStoreCapacity(true, t)
}
func testFileStoreCapacity(toEncrypt bool, t *testing.T) {
dir, err := ioutil.TempDir("", "swarm-storage-")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
localStore, err := localstore.New(dir, make([]byte, 32), nil)
if err != nil {
t.Fatal(err)
}
defer localStore.Close()
fileStore := NewFileStore(localStore, NewFileStoreParams(), chunk.NewTags())
slice := testutil.RandomBytes(1, testDataSize)
ctx := context.TODO()
key, wait, err := fileStore.Store(ctx, bytes.NewReader(slice), testDataSize, toEncrypt)
if err != nil {
t.Errorf("Store error: %v", err)
}
err = wait(ctx)
if err != nil {
t.Fatalf("Store error: %v", err)
}
resultReader, isEncrypted := fileStore.Retrieve(context.TODO(), key)
if isEncrypted != toEncrypt {
t.Fatalf("isEncrypted expected %v got %v", toEncrypt, isEncrypted)
}
resultSlice := make([]byte, len(slice))
n, err := resultReader.ReadAt(resultSlice, 0)
if err != io.EOF {
t.Fatalf("Retrieve error: %v", err)
}
if n != len(slice) {
t.Fatalf("Slice size error got %d, expected %d.", n, len(slice))
}
if !bytes.Equal(slice, resultSlice) {
t.Fatalf("Comparison error.")
}
resultReader, isEncrypted = fileStore.Retrieve(context.TODO(), key)
if isEncrypted != toEncrypt {
t.Fatalf("isEncrypted expected %v got %v", toEncrypt, isEncrypted)
}
if _, err = resultReader.ReadAt(resultSlice, 0); err == nil {
t.Fatalf("Was able to read %d bytes from an empty memStore.", len(slice))
}
// check how it works with localStore
fileStore.ChunkStore = localStore
// localStore.dbStore.setCapacity(0)
resultReader, isEncrypted = fileStore.Retrieve(context.TODO(), key)
if isEncrypted != toEncrypt {
t.Fatalf("isEncrypted expected %v got %v", toEncrypt, isEncrypted)
}
for i := range resultSlice {
resultSlice[i] = 0
}
n, err = resultReader.ReadAt(resultSlice, 0)
if err != io.EOF {
t.Fatalf("Retrieve error after clearing memStore: %v", err)
}
if n != len(slice) {
t.Fatalf("Slice size error after clearing memStore got %d, expected %d.", n, len(slice))
}
if !bytes.Equal(slice, resultSlice) {
t.Fatalf("Comparison error after clearing memStore.")
}
}
// TestGetAllReferences only tests that GetAllReferences returns an expected
// number of references for a given file
func TestGetAllReferences(t *testing.T) {
dir, err := ioutil.TempDir("", "swarm-storage-")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
localStore, err := localstore.New(dir, make([]byte, 32), nil)
if err != nil {
t.Fatal(err)
}
defer localStore.Close()
fileStore := NewFileStore(localStore, NewFileStoreParams(), chunk.NewTags())
// testRuns[i] and expectedLen[i] are dataSize and expected length respectively
testRuns := []int{1024, 8192, 16000, 30000, 1000000}
expectedLens := []int{1, 3, 5, 9, 248}
for i, r := range testRuns {
slice := testutil.RandomBytes(1, r)
addrs, err := fileStore.GetAllReferences(context.Background(), bytes.NewReader(slice), false)
if err != nil {
t.Fatal(err)
}
if len(addrs) != expectedLens[i] {
t.Fatalf("Expected reference array length to be %d, but is %d", expectedLens[i], len(addrs))
}
}
}

270
storage/hasherstore.go Normal file
View File

@ -0,0 +1,270 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"context"
"fmt"
"sync/atomic"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/storage/encryption"
"golang.org/x/crypto/sha3"
)
type hasherStore struct {
store ChunkStore
tag *chunk.Tag
toEncrypt bool
hashFunc SwarmHasher
hashSize int // content hash size
refSize int64 // reference size (content hash + possibly encryption key)
errC chan error // global error channel
doneC chan struct{} // closed by Close() call to indicate that count is the final number of chunks
quitC chan struct{} // closed to quit unterminated routines
// nrChunks is used with atomic functions
// it is required to be at the end of the struct to ensure 64bit alignment for arm architecture
// see: https://golang.org/pkg/sync/atomic/#pkg-note-BUG
nrChunks uint64 // number of chunks to store
}
// NewHasherStore creates a hasherStore object, which implements Putter and Getter interfaces.
// With the HasherStore you can put and get chunk data (which is just []byte) into a ChunkStore
// and the hasherStore will take core of encryption/decryption of data if necessary
func NewHasherStore(store ChunkStore, hashFunc SwarmHasher, toEncrypt bool, tag *chunk.Tag) *hasherStore {
hashSize := hashFunc().Size()
refSize := int64(hashSize)
if toEncrypt {
refSize += encryption.KeyLength
}
h := &hasherStore{
store: store,
tag: tag,
toEncrypt: toEncrypt,
hashFunc: hashFunc,
hashSize: hashSize,
refSize: refSize,
errC: make(chan error),
doneC: make(chan struct{}),
quitC: make(chan struct{}),
}
return h
}
// Put stores the chunkData into the ChunkStore of the hasherStore and returns the reference.
// If hasherStore has a chunkEncryption object, the data will be encrypted.
// Asynchronous function, the data will not necessarily be stored when it returns.
func (h *hasherStore) Put(ctx context.Context, chunkData ChunkData) (Reference, error) {
c := chunkData
var encryptionKey encryption.Key
if h.toEncrypt {
var err error
c, encryptionKey, err = h.encryptChunkData(chunkData)
if err != nil {
return nil, err
}
}
chunk := h.createChunk(c)
h.storeChunk(ctx, chunk)
return Reference(append(chunk.Address(), encryptionKey...)), nil
}
// Get returns data of the chunk with the given reference (retrieved from the ChunkStore of hasherStore).
// If the data is encrypted and the reference contains an encryption key, it will be decrypted before
// return.
func (h *hasherStore) Get(ctx context.Context, ref Reference) (ChunkData, error) {
addr, encryptionKey, err := parseReference(ref, h.hashSize)
if err != nil {
return nil, err
}
chunk, err := h.store.Get(ctx, chunk.ModeGetRequest, addr)
if err != nil {
return nil, err
}
chunkData := ChunkData(chunk.Data())
toDecrypt := (encryptionKey != nil)
if toDecrypt {
var err error
chunkData, err = h.decryptChunkData(chunkData, encryptionKey)
if err != nil {
return nil, err
}
}
return chunkData, nil
}
// Close indicates that no more chunks will be put with the hasherStore, so the Wait
// function can return when all the previously put chunks has been stored.
func (h *hasherStore) Close() {
close(h.doneC)
}
// Wait returns when
// 1) the Close() function has been called and
// 2) all the chunks which has been Put has been stored
func (h *hasherStore) Wait(ctx context.Context) error {
defer close(h.quitC)
var nrStoredChunks uint64 // number of stored chunks
var done bool
doneC := h.doneC
for {
select {
// if context is done earlier, just return with the error
case <-ctx.Done():
return ctx.Err()
// doneC is closed if all chunks have been submitted, from then we just wait until all of them are also stored
case <-doneC:
done = true
doneC = nil
// a chunk has been stored, if err is nil, then successfully, so increase the stored chunk counter
case err := <-h.errC:
if err != nil {
return err
}
nrStoredChunks++
}
// if all the chunks have been submitted and all of them are stored, then we can return
if done {
if nrStoredChunks >= atomic.LoadUint64(&h.nrChunks) {
return nil
}
}
}
}
func (h *hasherStore) createHash(chunkData ChunkData) Address {
hasher := h.hashFunc()
hasher.ResetWithLength(chunkData[:8]) // 8 bytes of length
hasher.Write(chunkData[8:]) // minus 8 []byte length
return hasher.Sum(nil)
}
func (h *hasherStore) createChunk(chunkData ChunkData) Chunk {
hash := h.createHash(chunkData)
chunk := NewChunk(hash, chunkData)
return chunk
}
func (h *hasherStore) encryptChunkData(chunkData ChunkData) (ChunkData, encryption.Key, error) {
if len(chunkData) < 8 {
return nil, nil, fmt.Errorf("Invalid ChunkData, min length 8 got %v", len(chunkData))
}
key, encryptedSpan, encryptedData, err := h.encrypt(chunkData)
if err != nil {
return nil, nil, err
}
c := make(ChunkData, len(encryptedSpan)+len(encryptedData))
copy(c[:8], encryptedSpan)
copy(c[8:], encryptedData)
return c, key, nil
}
func (h *hasherStore) decryptChunkData(chunkData ChunkData, encryptionKey encryption.Key) (ChunkData, error) {
if len(chunkData) < 8 {
return nil, fmt.Errorf("Invalid ChunkData, min length 8 got %v", len(chunkData))
}
decryptedSpan, decryptedData, err := h.decrypt(chunkData, encryptionKey)
if err != nil {
return nil, err
}
// removing extra bytes which were just added for padding
length := ChunkData(decryptedSpan).Size()
for length > chunk.DefaultSize {
length = length + (chunk.DefaultSize - 1)
length = length / chunk.DefaultSize
length *= uint64(h.refSize)
}
c := make(ChunkData, length+8)
copy(c[:8], decryptedSpan)
copy(c[8:], decryptedData[:length])
return c, nil
}
func (h *hasherStore) RefSize() int64 {
return h.refSize
}
func (h *hasherStore) encrypt(chunkData ChunkData) (encryption.Key, []byte, []byte, error) {
key := encryption.GenerateRandomKey(encryption.KeyLength)
encryptedSpan, err := h.newSpanEncryption(key).Encrypt(chunkData[:8])
if err != nil {
return nil, nil, nil, err
}
encryptedData, err := h.newDataEncryption(key).Encrypt(chunkData[8:])
if err != nil {
return nil, nil, nil, err
}
return key, encryptedSpan, encryptedData, nil
}
func (h *hasherStore) decrypt(chunkData ChunkData, key encryption.Key) ([]byte, []byte, error) {
encryptedSpan, err := h.newSpanEncryption(key).Encrypt(chunkData[:8])
if err != nil {
return nil, nil, err
}
encryptedData, err := h.newDataEncryption(key).Encrypt(chunkData[8:])
if err != nil {
return nil, nil, err
}
return encryptedSpan, encryptedData, nil
}
func (h *hasherStore) newSpanEncryption(key encryption.Key) encryption.Encryption {
return encryption.New(key, 0, uint32(chunk.DefaultSize/h.refSize), sha3.NewLegacyKeccak256)
}
func (h *hasherStore) newDataEncryption(key encryption.Key) encryption.Encryption {
return encryption.New(key, int(chunk.DefaultSize), 0, sha3.NewLegacyKeccak256)
}
func (h *hasherStore) storeChunk(ctx context.Context, ch Chunk) {
atomic.AddUint64(&h.nrChunks, 1)
go func() {
seen, err := h.store.Put(ctx, chunk.ModePutUpload, ch)
h.tag.Inc(chunk.StateStored)
if seen {
h.tag.Inc(chunk.StateSeen)
}
select {
case h.errC <- err:
case <-h.quitC:
}
}()
}
func parseReference(ref Reference, hashSize int) (Address, encryption.Key, error) {
encryptedRefLength := hashSize + encryption.KeyLength
switch len(ref) {
case AddressLength:
return Address(ref), nil, nil
case encryptedRefLength:
encKeyIdx := len(ref) - encryption.KeyLength
return Address(ref[:encKeyIdx]), encryption.Key(ref[encKeyIdx:]), nil
default:
return nil, nil, fmt.Errorf("Invalid reference length, expected %v or %v got %v", hashSize, encryptedRefLength, len(ref))
}
}

124
storage/hasherstore_test.go Normal file
View File

@ -0,0 +1,124 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"bytes"
"context"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/storage/encryption"
)
func TestHasherStore(t *testing.T) {
var tests = []struct {
chunkLength int
toEncrypt bool
}{
{10, false},
{100, false},
{1000, false},
{4096, false},
{10, true},
{100, true},
{1000, true},
{4096, true},
}
for _, tt := range tests {
chunkStore := NewMapChunkStore()
hasherStore := NewHasherStore(chunkStore, MakeHashFunc(DefaultHash), tt.toEncrypt, chunk.NewTag(0, "test-tag", 0))
// Put two random chunks into the hasherStore
chunkData1 := GenerateRandomChunk(int64(tt.chunkLength)).Data()
ctx, cancel := context.WithTimeout(context.Background(), getTimeout)
defer cancel()
key1, err := hasherStore.Put(ctx, chunkData1)
if err != nil {
t.Fatalf("Expected no error got \"%v\"", err)
}
chunkData2 := GenerateRandomChunk(int64(tt.chunkLength)).Data()
key2, err := hasherStore.Put(ctx, chunkData2)
if err != nil {
t.Fatalf("Expected no error got \"%v\"", err)
}
hasherStore.Close()
// Wait until chunks are really stored
err = hasherStore.Wait(ctx)
if err != nil {
t.Fatalf("Expected no error got \"%v\"", err)
}
// Get the first chunk
retrievedChunkData1, err := hasherStore.Get(ctx, key1)
if err != nil {
t.Fatalf("Expected no error, got \"%v\"", err)
}
// Retrieved data should be same as the original
if !bytes.Equal(chunkData1, retrievedChunkData1) {
t.Fatalf("Expected retrieved chunk data %v, got %v", common.Bytes2Hex(chunkData1), common.Bytes2Hex(retrievedChunkData1))
}
// Get the second chunk
retrievedChunkData2, err := hasherStore.Get(ctx, key2)
if err != nil {
t.Fatalf("Expected no error, got \"%v\"", err)
}
// Retrieved data should be same as the original
if !bytes.Equal(chunkData2, retrievedChunkData2) {
t.Fatalf("Expected retrieved chunk data %v, got %v", common.Bytes2Hex(chunkData2), common.Bytes2Hex(retrievedChunkData2))
}
hash1, encryptionKey1, err := parseReference(key1, hasherStore.hashSize)
if err != nil {
t.Fatalf("Expected no error, got \"%v\"", err)
}
if tt.toEncrypt {
if encryptionKey1 == nil {
t.Fatal("Expected non-nil encryption key, got nil")
} else if len(encryptionKey1) != encryption.KeyLength {
t.Fatalf("Expected encryption key length %v, got %v", encryption.KeyLength, len(encryptionKey1))
}
}
if !tt.toEncrypt && encryptionKey1 != nil {
t.Fatalf("Expected nil encryption key, got key with length %v", len(encryptionKey1))
}
// Check if chunk data in store is encrypted or not
chunkInStore, err := chunkStore.Get(ctx, chunk.ModeGetRequest, hash1)
if err != nil {
t.Fatalf("Expected no error got \"%v\"", err)
}
chunkDataInStore := chunkInStore.Data()
if tt.toEncrypt && bytes.Equal(chunkData1, chunkDataInStore) {
t.Fatalf("Chunk expected to be encrypted but it is stored without encryption")
}
if !tt.toEncrypt && !bytes.Equal(chunkData1, chunkDataInStore) {
t.Fatalf("Chunk expected to be not encrypted but stored content is different. Expected %v got %v", common.Bytes2Hex(chunkData1), common.Bytes2Hex(chunkDataInStore))
}
}
}

56
storage/localstore/doc.go Normal file
View File

@ -0,0 +1,56 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
/*
Package localstore provides disk storage layer for Swarm Chunk persistence.
It uses swarm/shed abstractions on top of github.com/syndtr/goleveldb LevelDB
implementation.
The main type is DB which manages the storage by providing methods to
access and add Chunks and to manage their status.
Modes are abstractions that do specific changes to Chunks. There are three
mode types:
- ModeGet, for Chunk access
- ModePut, for adding Chunks to the database
- ModeSet, for changing Chunk statuses
Every mode type has a corresponding type (Getter, Putter and Setter)
that provides adequate method to perform the opperation and that type
should be injected into localstore consumers instead the whole DB.
This provides more clear insight which operations consumer is performing
on the database.
Getters, Putters and Setters accept different get, put and set modes
to perform different actions. For example, ModeGet has two different
variables ModeGetRequest and ModeGetSync and two different Getters
can be constructed with them that are used when the chunk is requested
or when the chunk is synced as this two events are differently changing
the database.
Subscription methods are implemented for a specific purpose of
continuous iterations over Chunks that should be provided to
Push and Pull syncing.
DB implements an internal garbage collector that removes only synced
Chunks from the database based on their most recent access time.
Internally, DB stores Chunk data and any required information, such as
store and access timestamps in different shed indexes that can be
iterated on by garbage collector or subscriptions.
*/
package localstore

View File

@ -0,0 +1,204 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"archive/tar"
"context"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"sync"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/log"
"github.com/ethersphere/swarm/shed"
)
const (
// filename in tar archive that holds the information
// about exported data format version
exportVersionFilename = ".swarm-export-version"
// legacy version for previous LDBStore
legacyExportVersion = "1"
// current export format version
currentExportVersion = "2"
)
// Export writes a tar structured data to the writer of
// all chunks in the retrieval data index. It returns the
// number of chunks exported.
func (db *DB) Export(w io.Writer) (count int64, err error) {
tw := tar.NewWriter(w)
defer tw.Close()
if err := tw.WriteHeader(&tar.Header{
Name: exportVersionFilename,
Mode: 0644,
Size: int64(len(currentExportVersion)),
}); err != nil {
return 0, err
}
if _, err := tw.Write([]byte(currentExportVersion)); err != nil {
return 0, err
}
err = db.retrievalDataIndex.Iterate(func(item shed.Item) (stop bool, err error) {
hdr := &tar.Header{
Name: hex.EncodeToString(item.Address),
Mode: 0644,
Size: int64(len(item.Data)),
}
if err := tw.WriteHeader(hdr); err != nil {
return false, err
}
if _, err := tw.Write(item.Data); err != nil {
return false, err
}
count++
return false, nil
}, nil)
return count, err
}
// Import reads a tar structured data from the reader and
// stores chunks in the database. It returns the number of
// chunks imported.
func (db *DB) Import(r io.Reader, legacy bool) (count int64, err error) {
tr := tar.NewReader(r)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
errC := make(chan error)
doneC := make(chan struct{})
tokenPool := make(chan struct{}, 100)
var wg sync.WaitGroup
go func() {
var (
firstFile = true
// if exportVersionFilename file is not present
// assume legacy version
version = legacyExportVersion
)
for {
hdr, err := tr.Next()
if err != nil {
if err == io.EOF {
break
}
select {
case errC <- err:
case <-ctx.Done():
}
}
if firstFile {
firstFile = false
if hdr.Name == exportVersionFilename {
data, err := ioutil.ReadAll(tr)
if err != nil {
select {
case errC <- err:
case <-ctx.Done():
}
}
version = string(data)
continue
}
}
if len(hdr.Name) != 64 {
log.Warn("ignoring non-chunk file", "name", hdr.Name)
continue
}
keybytes, err := hex.DecodeString(hdr.Name)
if err != nil {
log.Warn("ignoring invalid chunk file", "name", hdr.Name, "err", err)
continue
}
data, err := ioutil.ReadAll(tr)
if err != nil {
select {
case errC <- err:
case <-ctx.Done():
}
}
key := chunk.Address(keybytes)
var ch chunk.Chunk
switch version {
case legacyExportVersion:
// LDBStore Export exported chunk data prefixed with the chunk key.
// That is not necessary, as the key is in the chunk filename,
// but backward compatibility needs to be preserved.
ch = chunk.NewChunk(key, data[32:])
case currentExportVersion:
ch = chunk.NewChunk(key, data)
default:
select {
case errC <- fmt.Errorf("unsupported export data version %q", version):
case <-ctx.Done():
}
}
tokenPool <- struct{}{}
wg.Add(1)
go func() {
_, err := db.Put(ctx, chunk.ModePutUpload, ch)
select {
case errC <- err:
case <-ctx.Done():
wg.Done()
<-tokenPool
default:
_, err := db.Put(ctx, chunk.ModePutUpload, ch)
if err != nil {
errC <- err
}
wg.Done()
<-tokenPool
}
}()
count++
}
wg.Wait()
close(doneC)
}()
// wait for all chunks to be stored
for {
select {
case err := <-errC:
if err != nil {
return count, err
}
case <-ctx.Done():
return count, ctx.Err()
default:
select {
case <-doneC:
return count, nil
default:
}
}
}
}

View File

@ -0,0 +1,80 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"bytes"
"context"
"testing"
"github.com/ethersphere/swarm/chunk"
)
// TestExportImport constructs two databases, one to put and export
// chunks and another one to import and validate that all chunks are
// imported.
func TestExportImport(t *testing.T) {
db1, cleanup1 := newTestDB(t, nil)
defer cleanup1()
var chunkCount = 100
chunks := make(map[string][]byte, chunkCount)
for i := 0; i < chunkCount; i++ {
ch := generateTestRandomChunk()
_, err := db1.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
chunks[string(ch.Address())] = ch.Data()
}
var buf bytes.Buffer
c, err := db1.Export(&buf)
if err != nil {
t.Fatal(err)
}
wantChunksCount := int64(len(chunks))
if c != wantChunksCount {
t.Errorf("got export count %v, want %v", c, wantChunksCount)
}
db2, cleanup2 := newTestDB(t, nil)
defer cleanup2()
c, err = db2.Import(&buf, false)
if err != nil {
t.Fatal(err)
}
if c != wantChunksCount {
t.Errorf("got import count %v, want %v", c, wantChunksCount)
}
for a, want := range chunks {
addr := chunk.Address([]byte(a))
ch, err := db2.Get(context.Background(), chunk.ModeGetRequest, addr)
if err != nil {
t.Fatal(err)
}
got := ch.Data()
if !bytes.Equal(got, want) {
t.Fatalf("chunk %s: got data %x, want %x", addr.Hex(), got, want)
}
}
}

194
storage/localstore/gc.go Normal file
View File

@ -0,0 +1,194 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"time"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethersphere/swarm/shed"
"github.com/syndtr/goleveldb/leveldb"
)
var (
// gcTargetRatio defines the target number of items
// in garbage collection index that will not be removed
// on garbage collection. The target number of items
// is calculated by gcTarget function. This value must be
// in range (0,1]. For example, with 0.9 value,
// garbage collection will leave 90% of defined capacity
// in database after its run. This prevents frequent
// garbage collection runs.
gcTargetRatio = 0.9
// gcBatchSize limits the number of chunks in a single
// leveldb batch on garbage collection.
gcBatchSize uint64 = 1000
)
// collectGarbageWorker is a long running function that waits for
// collectGarbageTrigger channel to signal a garbage collection
// run. GC run iterates on gcIndex and removes older items
// form retrieval and other indexes.
func (db *DB) collectGarbageWorker() {
defer close(db.collectGarbageWorkerDone)
for {
select {
case <-db.collectGarbageTrigger:
// run a single collect garbage run and
// if done is false, gcBatchSize is reached and
// another collect garbage run is needed
collectedCount, done, err := db.collectGarbage()
if err != nil {
log.Error("localstore collect garbage", "err", err)
}
// check if another gc run is needed
if !done {
db.triggerGarbageCollection()
}
if collectedCount > 0 && testHookCollectGarbage != nil {
testHookCollectGarbage(collectedCount)
}
case <-db.close:
return
}
}
}
// collectGarbage removes chunks from retrieval and other
// indexes if maximal number of chunks in database is reached.
// This function returns the number of removed chunks. If done
// is false, another call to this function is needed to collect
// the rest of the garbage as the batch size limit is reached.
// This function is called in collectGarbageWorker.
func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) {
metricName := "localstore.gc"
metrics.GetOrRegisterCounter(metricName, nil).Inc(1)
defer totalTimeMetric(metricName, time.Now())
defer func() {
if err != nil {
metrics.GetOrRegisterCounter(metricName+".error", nil).Inc(1)
}
}()
batch := new(leveldb.Batch)
target := db.gcTarget()
// protect database from changing idexes and gcSize
db.batchMu.Lock()
defer db.batchMu.Unlock()
gcSize, err := db.gcSize.Get()
if err != nil {
return 0, true, err
}
metrics.GetOrRegisterGauge(metricName+".gcsize", nil).Update(int64(gcSize))
done = true
err = db.gcIndex.Iterate(func(item shed.Item) (stop bool, err error) {
if gcSize-collectedCount <= target {
return true, nil
}
metrics.GetOrRegisterGauge(metricName+".storets", nil).Update(item.StoreTimestamp)
metrics.GetOrRegisterGauge(metricName+".accessts", nil).Update(item.AccessTimestamp)
// delete from retrieve, pull, gc
db.retrievalDataIndex.DeleteInBatch(batch, item)
db.retrievalAccessIndex.DeleteInBatch(batch, item)
db.pullIndex.DeleteInBatch(batch, item)
db.gcIndex.DeleteInBatch(batch, item)
collectedCount++
if collectedCount >= gcBatchSize {
// bach size limit reached,
// another gc run is needed
done = false
return true, nil
}
return false, nil
}, nil)
if err != nil {
return 0, false, err
}
metrics.GetOrRegisterCounter(metricName+".collected-count", nil).Inc(int64(collectedCount))
db.gcSize.PutInBatch(batch, gcSize-collectedCount)
err = db.shed.WriteBatch(batch)
if err != nil {
metrics.GetOrRegisterCounter(metricName+".writebatch.err", nil).Inc(1)
return 0, false, err
}
return collectedCount, done, nil
}
// gcTrigger retruns the absolute value for garbage collection
// target value, calculated from db.capacity and gcTargetRatio.
func (db *DB) gcTarget() (target uint64) {
return uint64(float64(db.capacity) * gcTargetRatio)
}
// triggerGarbageCollection signals collectGarbageWorker
// to call collectGarbage.
func (db *DB) triggerGarbageCollection() {
select {
case db.collectGarbageTrigger <- struct{}{}:
case <-db.close:
default:
}
}
// incGCSizeInBatch changes gcSize field value
// by change which can be negative. This function
// must be called under batchMu lock.
func (db *DB) incGCSizeInBatch(batch *leveldb.Batch, change int64) (err error) {
if change == 0 {
return nil
}
gcSize, err := db.gcSize.Get()
if err != nil {
return err
}
var new uint64
if change > 0 {
new = gcSize + uint64(change)
} else {
// 'change' is an int64 and is negative
// a conversion is needed with correct sign
c := uint64(-change)
if c > gcSize {
// protect uint64 undeflow
return nil
}
new = gcSize - c
}
db.gcSize.PutInBatch(batch, new)
// trigger garbage collection if we reached the capacity
if new >= db.capacity {
db.triggerGarbageCollection()
}
return nil
}
// testHookCollectGarbage is a hook that can provide
// information when a garbage collection run is done
// and how many items it removed.
var testHookCollectGarbage func(collectedCount uint64)

View File

@ -0,0 +1,363 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"context"
"io/ioutil"
"math/rand"
"os"
"testing"
"time"
"github.com/ethersphere/swarm/chunk"
)
// TestDB_collectGarbageWorker tests garbage collection runs
// by uploading and syncing a number of chunks.
func TestDB_collectGarbageWorker(t *testing.T) {
testDB_collectGarbageWorker(t)
}
// TestDB_collectGarbageWorker_multipleBatches tests garbage
// collection runs by uploading and syncing a number of
// chunks by having multiple smaller batches.
func TestDB_collectGarbageWorker_multipleBatches(t *testing.T) {
// lower the maximal number of chunks in a single
// gc batch to ensure multiple batches.
defer func(s uint64) { gcBatchSize = s }(gcBatchSize)
gcBatchSize = 2
testDB_collectGarbageWorker(t)
}
// testDB_collectGarbageWorker is a helper test function to test
// garbage collection runs by uploading and syncing a number of chunks.
func testDB_collectGarbageWorker(t *testing.T) {
t.Helper()
chunkCount := 150
db, cleanupFunc := newTestDB(t, &Options{
Capacity: 100,
})
testHookCollectGarbageChan := make(chan uint64)
defer setTestHookCollectGarbage(func(collectedCount uint64) {
select {
case testHookCollectGarbageChan <- collectedCount:
case <-db.close:
}
})()
defer cleanupFunc()
addrs := make([]chunk.Address, 0)
// upload random chunks
for i := 0; i < chunkCount; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
err = db.Set(context.Background(), chunk.ModeSetSync, ch.Address())
if err != nil {
t.Fatal(err)
}
addrs = append(addrs, ch.Address())
}
gcTarget := db.gcTarget()
for {
select {
case <-testHookCollectGarbageChan:
case <-time.After(10 * time.Second):
t.Error("collect garbage timeout")
}
gcSize, err := db.gcSize.Get()
if err != nil {
t.Fatal(err)
}
if gcSize == gcTarget {
break
}
}
t.Run("pull index count", newItemsCountTest(db.pullIndex, int(gcTarget)))
t.Run("gc index count", newItemsCountTest(db.gcIndex, int(gcTarget)))
t.Run("gc size", newIndexGCSizeTest(db))
// the first synced chunk should be removed
t.Run("get the first synced chunk", func(t *testing.T) {
_, err := db.Get(context.Background(), chunk.ModeGetRequest, addrs[0])
if err != chunk.ErrChunkNotFound {
t.Errorf("got error %v, want %v", err, chunk.ErrChunkNotFound)
}
})
// last synced chunk should not be removed
t.Run("get most recent synced chunk", func(t *testing.T) {
_, err := db.Get(context.Background(), chunk.ModeGetRequest, addrs[len(addrs)-1])
if err != nil {
t.Fatal(err)
}
})
}
// TestDB_collectGarbageWorker_withRequests is a helper test function
// to test garbage collection runs by uploading, syncing and
// requesting a number of chunks.
func TestDB_collectGarbageWorker_withRequests(t *testing.T) {
db, cleanupFunc := newTestDB(t, &Options{
Capacity: 100,
})
defer cleanupFunc()
testHookCollectGarbageChan := make(chan uint64)
defer setTestHookCollectGarbage(func(collectedCount uint64) {
testHookCollectGarbageChan <- collectedCount
})()
addrs := make([]chunk.Address, 0)
// upload random chunks just up to the capacity
for i := 0; i < int(db.capacity)-1; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
err = db.Set(context.Background(), chunk.ModeSetSync, ch.Address())
if err != nil {
t.Fatal(err)
}
addrs = append(addrs, ch.Address())
}
// set update gc test hook to signal when
// update gc goroutine is done by closing
// testHookUpdateGCChan channel
testHookUpdateGCChan := make(chan struct{})
resetTestHookUpdateGC := setTestHookUpdateGC(func() {
close(testHookUpdateGCChan)
})
// request the latest synced chunk
// to prioritize it in the gc index
// not to be collected
_, err := db.Get(context.Background(), chunk.ModeGetRequest, addrs[0])
if err != nil {
t.Fatal(err)
}
// wait for update gc goroutine to finish for garbage
// collector to be correctly triggered after the last upload
select {
case <-testHookUpdateGCChan:
case <-time.After(10 * time.Second):
t.Fatal("updateGC was not called after getting chunk with ModeGetRequest")
}
// no need to wait for update gc hook anymore
resetTestHookUpdateGC()
// upload and sync another chunk to trigger
// garbage collection
ch := generateTestRandomChunk()
_, err = db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
err = db.Set(context.Background(), chunk.ModeSetSync, ch.Address())
if err != nil {
t.Fatal(err)
}
addrs = append(addrs, ch.Address())
// wait for garbage collection
gcTarget := db.gcTarget()
var totalCollectedCount uint64
for {
select {
case c := <-testHookCollectGarbageChan:
totalCollectedCount += c
case <-time.After(10 * time.Second):
t.Error("collect garbage timeout")
}
gcSize, err := db.gcSize.Get()
if err != nil {
t.Fatal(err)
}
if gcSize == gcTarget {
break
}
}
wantTotalCollectedCount := uint64(len(addrs)) - gcTarget
if totalCollectedCount != wantTotalCollectedCount {
t.Errorf("total collected chunks %v, want %v", totalCollectedCount, wantTotalCollectedCount)
}
t.Run("pull index count", newItemsCountTest(db.pullIndex, int(gcTarget)))
t.Run("gc index count", newItemsCountTest(db.gcIndex, int(gcTarget)))
t.Run("gc size", newIndexGCSizeTest(db))
// requested chunk should not be removed
t.Run("get requested chunk", func(t *testing.T) {
_, err := db.Get(context.Background(), chunk.ModeGetRequest, addrs[0])
if err != nil {
t.Fatal(err)
}
})
// the second synced chunk should be removed
t.Run("get gc-ed chunk", func(t *testing.T) {
_, err := db.Get(context.Background(), chunk.ModeGetRequest, addrs[1])
if err != chunk.ErrChunkNotFound {
t.Errorf("got error %v, want %v", err, chunk.ErrChunkNotFound)
}
})
// last synced chunk should not be removed
t.Run("get most recent synced chunk", func(t *testing.T) {
_, err := db.Get(context.Background(), chunk.ModeGetRequest, addrs[len(addrs)-1])
if err != nil {
t.Fatal(err)
}
})
}
// TestDB_gcSize checks if gcSize has a correct value after
// database is initialized with existing data.
func TestDB_gcSize(t *testing.T) {
dir, err := ioutil.TempDir("", "localstore-stored-gc-size")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
baseKey := make([]byte, 32)
if _, err := rand.Read(baseKey); err != nil {
t.Fatal(err)
}
db, err := New(dir, baseKey, nil)
if err != nil {
t.Fatal(err)
}
count := 100
for i := 0; i < count; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
err = db.Set(context.Background(), chunk.ModeSetSync, ch.Address())
if err != nil {
t.Fatal(err)
}
}
if err := db.Close(); err != nil {
t.Fatal(err)
}
db, err = New(dir, baseKey, nil)
if err != nil {
t.Fatal(err)
}
defer db.Close()
t.Run("gc index size", newIndexGCSizeTest(db))
}
// setTestHookCollectGarbage sets testHookCollectGarbage and
// returns a function that will reset it to the
// value before the change.
func setTestHookCollectGarbage(h func(collectedCount uint64)) (reset func()) {
current := testHookCollectGarbage
reset = func() { testHookCollectGarbage = current }
testHookCollectGarbage = h
return reset
}
// TestSetTestHookCollectGarbage tests if setTestHookCollectGarbage changes
// testHookCollectGarbage function correctly and if its reset function
// resets the original function.
func TestSetTestHookCollectGarbage(t *testing.T) {
// Set the current function after the test finishes.
defer func(h func(collectedCount uint64)) { testHookCollectGarbage = h }(testHookCollectGarbage)
// expected value for the unchanged function
original := 1
// expected value for the changed function
changed := 2
// this variable will be set with two different functions
var got int
// define the original (unchanged) functions
testHookCollectGarbage = func(_ uint64) {
got = original
}
// set got variable
testHookCollectGarbage(0)
// test if got variable is set correctly
if got != original {
t.Errorf("got hook value %v, want %v", got, original)
}
// set the new function
reset := setTestHookCollectGarbage(func(_ uint64) {
got = changed
})
// set got variable
testHookCollectGarbage(0)
// test if got variable is set correctly to changed value
if got != changed {
t.Errorf("got hook value %v, want %v", got, changed)
}
// set the function to the original one
reset()
// set got variable
testHookCollectGarbage(0)
// test if got variable is set correctly to original value
if got != original {
t.Errorf("got hook value %v, want %v", got, original)
}
}

View File

@ -0,0 +1,216 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"bytes"
"context"
"math/rand"
"testing"
"github.com/ethersphere/swarm/chunk"
)
// TestDB_pullIndex validates the ordering of keys in pull index.
// Pull index key contains PO prefix which is calculated from
// DB base key and chunk address. This is not an Item field
// which are checked in Mode tests.
// This test uploads chunks, sorts them in expected order and
// validates that pull index iterator will iterate it the same
// order.
func TestDB_pullIndex(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
chunkCount := 50
chunks := make([]testIndexChunk, chunkCount)
// upload random chunks
for i := 0; i < chunkCount; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
chunks[i] = testIndexChunk{
Chunk: ch,
binID: uint64(i),
}
}
testItemsOrder(t, db.pullIndex, chunks, func(i, j int) (less bool) {
poi := chunk.Proximity(db.baseKey, chunks[i].Address())
poj := chunk.Proximity(db.baseKey, chunks[j].Address())
if poi < poj {
return true
}
if poi > poj {
return false
}
if chunks[i].binID < chunks[j].binID {
return true
}
if chunks[i].binID > chunks[j].binID {
return false
}
return bytes.Compare(chunks[i].Address(), chunks[j].Address()) == -1
})
}
// TestDB_gcIndex validates garbage collection index by uploading
// a chunk with and performing operations using synced, access and
// request modes.
func TestDB_gcIndex(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
chunkCount := 50
chunks := make([]testIndexChunk, chunkCount)
// upload random chunks
for i := 0; i < chunkCount; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
chunks[i] = testIndexChunk{
Chunk: ch,
}
}
// check if all chunks are stored
newItemsCountTest(db.pullIndex, chunkCount)(t)
// check that chunks are not collectable for garbage
newItemsCountTest(db.gcIndex, 0)(t)
// set update gc test hook to signal when
// update gc goroutine is done by sending to
// testHookUpdateGCChan channel, which is
// used to wait for indexes change verifications
testHookUpdateGCChan := make(chan struct{})
defer setTestHookUpdateGC(func() {
testHookUpdateGCChan <- struct{}{}
})()
t.Run("request unsynced", func(t *testing.T) {
ch := chunks[1]
_, err := db.Get(context.Background(), chunk.ModeGetRequest, ch.Address())
if err != nil {
t.Fatal(err)
}
// wait for update gc goroutine to be done
<-testHookUpdateGCChan
// the chunk is not synced
// should not be in the garbace collection index
newItemsCountTest(db.gcIndex, 0)(t)
newIndexGCSizeTest(db)(t)
})
t.Run("sync one chunk", func(t *testing.T) {
ch := chunks[0]
err := db.Set(context.Background(), chunk.ModeSetSync, ch.Address())
if err != nil {
t.Fatal(err)
}
// the chunk is synced and should be in gc index
newItemsCountTest(db.gcIndex, 1)(t)
newIndexGCSizeTest(db)(t)
})
t.Run("sync all chunks", func(t *testing.T) {
for i := range chunks {
err := db.Set(context.Background(), chunk.ModeSetSync, chunks[i].Address())
if err != nil {
t.Fatal(err)
}
}
testItemsOrder(t, db.gcIndex, chunks, nil)
newIndexGCSizeTest(db)(t)
})
t.Run("request one chunk", func(t *testing.T) {
i := 6
_, err := db.Get(context.Background(), chunk.ModeGetRequest, chunks[i].Address())
if err != nil {
t.Fatal(err)
}
// wait for update gc goroutine to be done
<-testHookUpdateGCChan
// move the chunk to the end of the expected gc
c := chunks[i]
chunks = append(chunks[:i], chunks[i+1:]...)
chunks = append(chunks, c)
testItemsOrder(t, db.gcIndex, chunks, nil)
newIndexGCSizeTest(db)(t)
})
t.Run("random chunk request", func(t *testing.T) {
rand.Shuffle(len(chunks), func(i, j int) {
chunks[i], chunks[j] = chunks[j], chunks[i]
})
for _, ch := range chunks {
_, err := db.Get(context.Background(), chunk.ModeGetRequest, ch.Address())
if err != nil {
t.Fatal(err)
}
// wait for update gc goroutine to be done
<-testHookUpdateGCChan
}
testItemsOrder(t, db.gcIndex, chunks, nil)
newIndexGCSizeTest(db)(t)
})
t.Run("remove one chunk", func(t *testing.T) {
i := 3
err := db.Set(context.Background(), chunk.ModeSetRemove, chunks[i].Address())
if err != nil {
t.Fatal(err)
}
// remove the chunk from the expected chunks in gc index
chunks = append(chunks[:i], chunks[i+1:]...)
testItemsOrder(t, db.gcIndex, chunks, nil)
newIndexGCSizeTest(db)(t)
})
}

View File

@ -0,0 +1,400 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"encoding/binary"
"errors"
"sync"
"time"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/shed"
"github.com/ethersphere/swarm/storage/mock"
)
// DB implements chunk.Store.
var _ chunk.Store = &DB{}
var (
// ErrInvalidMode is retuned when an unknown Mode
// is provided to the function.
ErrInvalidMode = errors.New("invalid mode")
// ErrAddressLockTimeout is returned when the same chunk
// is updated in parallel and one of the updates
// takes longer then the configured timeout duration.
ErrAddressLockTimeout = errors.New("address lock timeout")
)
var (
// Default value for Capacity DB option.
defaultCapacity uint64 = 5000000
// Limit the number of goroutines created by Getters
// that call updateGC function. Value 0 sets no limit.
maxParallelUpdateGC = 1000
)
// DB is the local store implementation and holds
// database related objects.
type DB struct {
shed *shed.DB
// schema name of loaded data
schemaName shed.StringField
// retrieval indexes
retrievalDataIndex shed.Index
retrievalAccessIndex shed.Index
// push syncing index
pushIndex shed.Index
// push syncing subscriptions triggers
pushTriggers []chan struct{}
pushTriggersMu sync.RWMutex
// pull syncing index
pullIndex shed.Index
// pull syncing subscriptions triggers per bin
pullTriggers map[uint8][]chan struct{}
pullTriggersMu sync.RWMutex
// binIDs stores the latest chunk serial ID for every
// proximity order bin
binIDs shed.Uint64Vector
// garbage collection index
gcIndex shed.Index
// field that stores number of intems in gc index
gcSize shed.Uint64Field
// garbage collection is triggered when gcSize exceeds
// the capacity value
capacity uint64
// triggers garbage collection event loop
collectGarbageTrigger chan struct{}
// a buffered channel acting as a semaphore
// to limit the maximal number of goroutines
// created by Getters to call updateGC function
updateGCSem chan struct{}
// a wait group to ensure all updateGC goroutines
// are done before closing the database
updateGCWG sync.WaitGroup
baseKey []byte
batchMu sync.Mutex
// this channel is closed when close function is called
// to terminate other goroutines
close chan struct{}
// protect Close method from exiting before
// garbage collection and gc size write workers
// are done
collectGarbageWorkerDone chan struct{}
}
// Options struct holds optional parameters for configuring DB.
type Options struct {
// MockStore is a mock node store that is used to store
// chunk data in a central store. It can be used to reduce
// total storage space requirements in testing large number
// of swarm nodes with chunk data deduplication provided by
// the mock global store.
MockStore *mock.NodeStore
// Capacity is a limit that triggers garbage collection when
// number of items in gcIndex equals or exceeds it.
Capacity uint64
// MetricsPrefix defines a prefix for metrics names.
MetricsPrefix string
}
// New returns a new DB. All fields and indexes are initialized
// and possible conflicts with schema from existing database is checked.
// One goroutine for writing batches is created.
func New(path string, baseKey []byte, o *Options) (db *DB, err error) {
if o == nil {
// default options
o = &Options{
Capacity: 5000000,
}
}
db = &DB{
capacity: o.Capacity,
baseKey: baseKey,
// channel collectGarbageTrigger
// needs to be buffered with the size of 1
// to signal another event if it
// is triggered during already running function
collectGarbageTrigger: make(chan struct{}, 1),
close: make(chan struct{}),
collectGarbageWorkerDone: make(chan struct{}),
}
if db.capacity <= 0 {
db.capacity = defaultCapacity
}
if maxParallelUpdateGC > 0 {
db.updateGCSem = make(chan struct{}, maxParallelUpdateGC)
}
db.shed, err = shed.NewDB(path, o.MetricsPrefix)
if err != nil {
return nil, err
}
// Identify current storage schema by arbitrary name.
db.schemaName, err = db.shed.NewStringField("schema-name")
if err != nil {
return nil, err
}
schemaName, err := db.schemaName.Get()
if err != nil {
return nil, err
}
if schemaName == "" {
// initial new localstore run
err := db.schemaName.Put(DbSchemaSanctuary)
if err != nil {
return nil, err
}
}
// Persist gc size.
db.gcSize, err = db.shed.NewUint64Field("gc-size")
if err != nil {
return nil, err
}
// Functions for retrieval data index.
var (
encodeValueFunc func(fields shed.Item) (value []byte, err error)
decodeValueFunc func(keyItem shed.Item, value []byte) (e shed.Item, err error)
)
if o.MockStore != nil {
encodeValueFunc = func(fields shed.Item) (value []byte, err error) {
b := make([]byte, 16)
binary.BigEndian.PutUint64(b[:8], fields.BinID)
binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp))
err = o.MockStore.Put(fields.Address, fields.Data)
if err != nil {
return nil, err
}
return b, nil
}
decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[8:16]))
e.BinID = binary.BigEndian.Uint64(value[:8])
e.Data, err = o.MockStore.Get(keyItem.Address)
return e, err
}
} else {
encodeValueFunc = func(fields shed.Item) (value []byte, err error) {
b := make([]byte, 16)
binary.BigEndian.PutUint64(b[:8], fields.BinID)
binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp))
value = append(b, fields.Data...)
return value, nil
}
decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[8:16]))
e.BinID = binary.BigEndian.Uint64(value[:8])
e.Data = value[16:]
return e, nil
}
}
// Index storing actual chunk address, data and bin id.
db.retrievalDataIndex, err = db.shed.NewIndex("Address->StoreTimestamp|BinID|Data", shed.IndexFuncs{
EncodeKey: func(fields shed.Item) (key []byte, err error) {
return fields.Address, nil
},
DecodeKey: func(key []byte) (e shed.Item, err error) {
e.Address = key
return e, nil
},
EncodeValue: encodeValueFunc,
DecodeValue: decodeValueFunc,
})
if err != nil {
return nil, err
}
// Index storing access timestamp for a particular address.
// It is needed in order to update gc index keys for iteration order.
db.retrievalAccessIndex, err = db.shed.NewIndex("Address->AccessTimestamp", shed.IndexFuncs{
EncodeKey: func(fields shed.Item) (key []byte, err error) {
return fields.Address, nil
},
DecodeKey: func(key []byte) (e shed.Item, err error) {
e.Address = key
return e, nil
},
EncodeValue: func(fields shed.Item) (value []byte, err error) {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, uint64(fields.AccessTimestamp))
return b, nil
},
DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
e.AccessTimestamp = int64(binary.BigEndian.Uint64(value))
return e, nil
},
})
if err != nil {
return nil, err
}
// pull index allows history and live syncing per po bin
db.pullIndex, err = db.shed.NewIndex("PO|BinID->Hash", shed.IndexFuncs{
EncodeKey: func(fields shed.Item) (key []byte, err error) {
key = make([]byte, 41)
key[0] = db.po(fields.Address)
binary.BigEndian.PutUint64(key[1:9], fields.BinID)
return key, nil
},
DecodeKey: func(key []byte) (e shed.Item, err error) {
e.BinID = binary.BigEndian.Uint64(key[1:9])
return e, nil
},
EncodeValue: func(fields shed.Item) (value []byte, err error) {
return fields.Address, nil
},
DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
e.Address = value
return e, nil
},
})
if err != nil {
return nil, err
}
// create a vector for bin IDs
db.binIDs, err = db.shed.NewUint64Vector("bin-ids")
if err != nil {
return nil, err
}
// create a pull syncing triggers used by SubscribePull function
db.pullTriggers = make(map[uint8][]chan struct{})
// push index contains as yet unsynced chunks
db.pushIndex, err = db.shed.NewIndex("StoreTimestamp|Hash->Tags", shed.IndexFuncs{
EncodeKey: func(fields shed.Item) (key []byte, err error) {
key = make([]byte, 40)
binary.BigEndian.PutUint64(key[:8], uint64(fields.StoreTimestamp))
copy(key[8:], fields.Address[:])
return key, nil
},
DecodeKey: func(key []byte) (e shed.Item, err error) {
e.Address = key[8:]
e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[:8]))
return e, nil
},
EncodeValue: func(fields shed.Item) (value []byte, err error) {
return nil, nil
},
DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
return e, nil
},
})
if err != nil {
return nil, err
}
// create a push syncing triggers used by SubscribePush function
db.pushTriggers = make([]chan struct{}, 0)
// gc index for removable chunk ordered by ascending last access time
db.gcIndex, err = db.shed.NewIndex("AccessTimestamp|BinID|Hash->nil", shed.IndexFuncs{
EncodeKey: func(fields shed.Item) (key []byte, err error) {
b := make([]byte, 16, 16+len(fields.Address))
binary.BigEndian.PutUint64(b[:8], uint64(fields.AccessTimestamp))
binary.BigEndian.PutUint64(b[8:16], fields.BinID)
key = append(b, fields.Address...)
return key, nil
},
DecodeKey: func(key []byte) (e shed.Item, err error) {
e.AccessTimestamp = int64(binary.BigEndian.Uint64(key[:8]))
e.BinID = binary.BigEndian.Uint64(key[8:16])
e.Address = key[16:]
return e, nil
},
EncodeValue: func(fields shed.Item) (value []byte, err error) {
return nil, nil
},
DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
return e, nil
},
})
if err != nil {
return nil, err
}
// start garbage collection worker
go db.collectGarbageWorker()
return db, nil
}
// Close closes the underlying database.
func (db *DB) Close() (err error) {
close(db.close)
db.updateGCWG.Wait()
// wait for gc worker to
// return before closing the shed
select {
case <-db.collectGarbageWorkerDone:
case <-time.After(5 * time.Second):
log.Error("localstore: collect garbage worker did not return after db close")
}
return db.shed.Close()
}
// po computes the proximity order between the address
// and database base key.
func (db *DB) po(addr chunk.Address) (bin uint8) {
return uint8(chunk.Proximity(db.baseKey, addr))
}
// chunkToItem creates new Item with data provided by the Chunk.
func chunkToItem(ch chunk.Chunk) shed.Item {
return shed.Item{
Address: ch.Address(),
Data: ch.Data(),
}
}
// addressToItem creates new Item with a provided address.
func addressToItem(addr chunk.Address) shed.Item {
return shed.Item{
Address: addr,
}
}
// now is a helper function that returns a current unix timestamp
// in UTC timezone.
// It is set in the init function for usage in production, and
// optionally overridden in tests for data validation.
var now func() int64
func init() {
// set the now function
now = func() (t int64) {
return time.Now().UTC().UnixNano()
}
}
// totalTimeMetric logs a message about time between provided start time
// and the time when the function is called and sends a resetting timer metric
// with provided name appended with ".total-time".
func totalTimeMetric(name string, start time.Time) {
totalTime := time.Since(start)
log.Trace(name+" total time", "time", totalTime)
metrics.GetOrRegisterResettingTimer(name+".total-time", nil).Update(totalTime)
}

View File

@ -0,0 +1,453 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"bytes"
"context"
"fmt"
"io/ioutil"
"math/rand"
"os"
"runtime"
"sort"
"sync"
"testing"
"time"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/shed"
"github.com/syndtr/goleveldb/leveldb"
)
func init() {
// Some of the tests in localstore package rely on the same ordering of
// items uploaded or accessed compared to the ordering of items in indexes
// that contain StoreTimestamp or AccessTimestamp in keys. In tests
// where the same order is required from the database as the order
// in which chunks are put or accessed, if the StoreTimestamp or
// AccessTimestamp are the same for two or more sequential items
// their order in database will be based on the chunk address value,
// in which case the ordering of items/chunks stored in a test slice
// will not be the same. To ensure the same ordering in database on such
// indexes on windows systems, an additional short sleep is added to
// the now function.
if runtime.GOOS == "windows" {
setNow(func() int64 {
time.Sleep(time.Microsecond)
return time.Now().UTC().UnixNano()
})
}
}
// TestDB validates if the chunk can be uploaded and
// correctly retrieved.
func TestDB(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
got, err := db.Get(context.Background(), chunk.ModeGetRequest, ch.Address())
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(got.Address(), ch.Address()) {
t.Errorf("got address %x, want %x", got.Address(), ch.Address())
}
if !bytes.Equal(got.Data(), ch.Data()) {
t.Errorf("got data %x, want %x", got.Data(), ch.Data())
}
}
// TestDB_updateGCSem tests maxParallelUpdateGC limit.
// This test temporary sets the limit to a low number,
// makes updateGC function execution time longer by
// setting a custom testHookUpdateGC function with a sleep
// and a count current and maximal number of goroutines.
func TestDB_updateGCSem(t *testing.T) {
updateGCSleep := time.Second
var count int
var max int
var mu sync.Mutex
defer setTestHookUpdateGC(func() {
mu.Lock()
// add to the count of current goroutines
count++
if count > max {
// set maximal detected numbers of goroutines
max = count
}
mu.Unlock()
// wait for some time to ensure multiple parallel goroutines
time.Sleep(updateGCSleep)
mu.Lock()
count--
mu.Unlock()
})()
defer func(m int) { maxParallelUpdateGC = m }(maxParallelUpdateGC)
maxParallelUpdateGC = 3
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
// get more chunks then maxParallelUpdateGC
// in time shorter then updateGCSleep
for i := 0; i < 5; i++ {
_, err = db.Get(context.Background(), chunk.ModeGetRequest, ch.Address())
if err != nil {
t.Fatal(err)
}
}
if max != maxParallelUpdateGC {
t.Errorf("got max %v, want %v", max, maxParallelUpdateGC)
}
}
// newTestDB is a helper function that constructs a
// temporary database and returns a cleanup function that must
// be called to remove the data.
func newTestDB(t testing.TB, o *Options) (db *DB, cleanupFunc func()) {
t.Helper()
dir, err := ioutil.TempDir("", "localstore-test")
if err != nil {
t.Fatal(err)
}
cleanupFunc = func() { os.RemoveAll(dir) }
baseKey := make([]byte, 32)
if _, err := rand.Read(baseKey); err != nil {
t.Fatal(err)
}
db, err = New(dir, baseKey, o)
if err != nil {
cleanupFunc()
t.Fatal(err)
}
cleanupFunc = func() {
err := db.Close()
if err != nil {
t.Error(err)
}
os.RemoveAll(dir)
}
return db, cleanupFunc
}
func init() {
// needed for generateTestRandomChunk
rand.Seed(time.Now().UnixNano())
}
// generateTestRandomChunk generates a Chunk that is not
// valid, but it contains a random key and a random value.
// This function is faster then storage.generateTestRandomChunk
// which generates a valid chunk.
// Some tests in this package do not need valid chunks, just
// random data, and their execution time can be decreased
// using this function.
func generateTestRandomChunk() chunk.Chunk {
data := make([]byte, chunk.DefaultSize)
rand.Read(data)
key := make([]byte, 32)
rand.Read(key)
return chunk.NewChunk(key, data)
}
// TestGenerateTestRandomChunk validates that
// generateTestRandomChunk returns random data by comparing
// two generated chunks.
func TestGenerateTestRandomChunk(t *testing.T) {
c1 := generateTestRandomChunk()
c2 := generateTestRandomChunk()
addrLen := len(c1.Address())
if addrLen != 32 {
t.Errorf("first chunk address length %v, want %v", addrLen, 32)
}
dataLen := len(c1.Data())
if dataLen != chunk.DefaultSize {
t.Errorf("first chunk data length %v, want %v", dataLen, chunk.DefaultSize)
}
addrLen = len(c2.Address())
if addrLen != 32 {
t.Errorf("second chunk address length %v, want %v", addrLen, 32)
}
dataLen = len(c2.Data())
if dataLen != chunk.DefaultSize {
t.Errorf("second chunk data length %v, want %v", dataLen, chunk.DefaultSize)
}
if bytes.Equal(c1.Address(), c2.Address()) {
t.Error("fake chunks addresses do not differ")
}
if bytes.Equal(c1.Data(), c2.Data()) {
t.Error("fake chunks data bytes do not differ")
}
}
// newRetrieveIndexesTest returns a test function that validates if the right
// chunk values are in the retrieval indexes.
func newRetrieveIndexesTest(db *DB, chunk chunk.Chunk, storeTimestamp, accessTimestamp int64) func(t *testing.T) {
return func(t *testing.T) {
item, err := db.retrievalDataIndex.Get(addressToItem(chunk.Address()))
if err != nil {
t.Fatal(err)
}
validateItem(t, item, chunk.Address(), chunk.Data(), storeTimestamp, 0)
// access index should not be set
wantErr := leveldb.ErrNotFound
item, err = db.retrievalAccessIndex.Get(addressToItem(chunk.Address()))
if err != wantErr {
t.Errorf("got error %v, want %v", err, wantErr)
}
}
}
// newRetrieveIndexesTestWithAccess returns a test function that validates if the right
// chunk values are in the retrieval indexes when access time must be stored.
func newRetrieveIndexesTestWithAccess(db *DB, ch chunk.Chunk, storeTimestamp, accessTimestamp int64) func(t *testing.T) {
return func(t *testing.T) {
item, err := db.retrievalDataIndex.Get(addressToItem(ch.Address()))
if err != nil {
t.Fatal(err)
}
validateItem(t, item, ch.Address(), ch.Data(), storeTimestamp, 0)
if accessTimestamp > 0 {
item, err = db.retrievalAccessIndex.Get(addressToItem(ch.Address()))
if err != nil {
t.Fatal(err)
}
validateItem(t, item, ch.Address(), nil, 0, accessTimestamp)
}
}
}
// newPullIndexTest returns a test function that validates if the right
// chunk values are in the pull index.
func newPullIndexTest(db *DB, ch chunk.Chunk, binID uint64, wantError error) func(t *testing.T) {
return func(t *testing.T) {
item, err := db.pullIndex.Get(shed.Item{
Address: ch.Address(),
BinID: binID,
})
if err != wantError {
t.Errorf("got error %v, want %v", err, wantError)
}
if err == nil {
validateItem(t, item, ch.Address(), nil, 0, 0)
}
}
}
// newPushIndexTest returns a test function that validates if the right
// chunk values are in the push index.
func newPushIndexTest(db *DB, ch chunk.Chunk, storeTimestamp int64, wantError error) func(t *testing.T) {
return func(t *testing.T) {
item, err := db.pushIndex.Get(shed.Item{
Address: ch.Address(),
StoreTimestamp: storeTimestamp,
})
if err != wantError {
t.Errorf("got error %v, want %v", err, wantError)
}
if err == nil {
validateItem(t, item, ch.Address(), nil, storeTimestamp, 0)
}
}
}
// newGCIndexTest returns a test function that validates if the right
// chunk values are in the push index.
func newGCIndexTest(db *DB, chunk chunk.Chunk, storeTimestamp, accessTimestamp int64, binID uint64) func(t *testing.T) {
return func(t *testing.T) {
item, err := db.gcIndex.Get(shed.Item{
Address: chunk.Address(),
BinID: binID,
AccessTimestamp: accessTimestamp,
})
if err != nil {
t.Fatal(err)
}
validateItem(t, item, chunk.Address(), nil, 0, accessTimestamp)
}
}
// newItemsCountTest returns a test function that validates if
// an index contains expected number of key/value pairs.
func newItemsCountTest(i shed.Index, want int) func(t *testing.T) {
return func(t *testing.T) {
var c int
err := i.Iterate(func(item shed.Item) (stop bool, err error) {
c++
return
}, nil)
if err != nil {
t.Fatal(err)
}
if c != want {
t.Errorf("got %v items in index, want %v", c, want)
}
}
}
// newIndexGCSizeTest retruns a test function that validates if DB.gcSize
// value is the same as the number of items in DB.gcIndex.
func newIndexGCSizeTest(db *DB) func(t *testing.T) {
return func(t *testing.T) {
var want uint64
err := db.gcIndex.Iterate(func(item shed.Item) (stop bool, err error) {
want++
return
}, nil)
if err != nil {
t.Fatal(err)
}
got, err := db.gcSize.Get()
if err != nil {
t.Fatal(err)
}
if got != want {
t.Errorf("got gc size %v, want %v", got, want)
}
}
}
// testIndexChunk embeds storageChunk with additional data that is stored
// in database. It is used for index values validations.
type testIndexChunk struct {
chunk.Chunk
binID uint64
}
// testItemsOrder tests the order of chunks in the index. If sortFunc is not nil,
// chunks will be sorted with it before validation.
func testItemsOrder(t *testing.T, i shed.Index, chunks []testIndexChunk, sortFunc func(i, j int) (less bool)) {
newItemsCountTest(i, len(chunks))(t)
if sortFunc != nil {
sort.Slice(chunks, sortFunc)
}
var cursor int
err := i.Iterate(func(item shed.Item) (stop bool, err error) {
want := chunks[cursor].Address()
got := item.Address
if !bytes.Equal(got, want) {
return true, fmt.Errorf("got address %x at position %v, want %x", got, cursor, want)
}
cursor++
return false, nil
}, nil)
if err != nil {
t.Fatal(err)
}
}
// validateItem is a helper function that checks Item values.
func validateItem(t *testing.T, item shed.Item, address, data []byte, storeTimestamp, accessTimestamp int64) {
t.Helper()
if !bytes.Equal(item.Address, address) {
t.Errorf("got item address %x, want %x", item.Address, address)
}
if !bytes.Equal(item.Data, data) {
t.Errorf("got item data %x, want %x", item.Data, data)
}
if item.StoreTimestamp != storeTimestamp {
t.Errorf("got item store timestamp %v, want %v", item.StoreTimestamp, storeTimestamp)
}
if item.AccessTimestamp != accessTimestamp {
t.Errorf("got item access timestamp %v, want %v", item.AccessTimestamp, accessTimestamp)
}
}
// setNow replaces now function and
// returns a function that will reset it to the
// value before the change.
func setNow(f func() int64) (reset func()) {
current := now
reset = func() { now = current }
now = f
return reset
}
// TestSetNow tests if setNow function changes now function
// correctly and if its reset function resets the original function.
func TestSetNow(t *testing.T) {
// set the current function after the test finishes
defer func(f func() int64) { now = f }(now)
// expected value for the unchanged function
var original int64 = 1
// expected value for the changed function
var changed int64 = 2
// define the original (unchanged) functions
now = func() int64 {
return original
}
// get the time
got := now()
// test if got variable is set correctly
if got != original {
t.Errorf("got now value %v, want %v", got, original)
}
// set the new function
reset := setNow(func() int64 {
return changed
})
// get the time
got = now()
// test if got variable is set correctly to changed value
if got != changed {
t.Errorf("got hook value %v, want %v", got, changed)
}
// set the function to the original one
reset()
// get the time
got = now()
// test if got variable is set correctly to original value
if got != original {
t.Errorf("got hook value %v, want %v", got, original)
}
}

View File

@ -0,0 +1,148 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"context"
"fmt"
"time"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/shed"
"github.com/syndtr/goleveldb/leveldb"
)
// Get returns a chunk from the database. If the chunk is
// not found chunk.ErrChunkNotFound will be returned.
// All required indexes will be updated required by the
// Getter Mode. Get is required to implement chunk.Store
// interface.
func (db *DB) Get(ctx context.Context, mode chunk.ModeGet, addr chunk.Address) (ch chunk.Chunk, err error) {
metricName := fmt.Sprintf("localstore.Get.%s", mode)
metrics.GetOrRegisterCounter(metricName, nil).Inc(1)
defer totalTimeMetric(metricName, time.Now())
defer func() {
if err != nil {
metrics.GetOrRegisterCounter(metricName+".error", nil).Inc(1)
}
}()
out, err := db.get(mode, addr)
if err != nil {
if err == leveldb.ErrNotFound {
return nil, chunk.ErrChunkNotFound
}
return nil, err
}
return chunk.NewChunk(out.Address, out.Data), nil
}
// get returns Item from the retrieval index
// and updates other indexes.
func (db *DB) get(mode chunk.ModeGet, addr chunk.Address) (out shed.Item, err error) {
item := addressToItem(addr)
out, err = db.retrievalDataIndex.Get(item)
if err != nil {
return out, err
}
switch mode {
// update the access timestamp and gc index
case chunk.ModeGetRequest:
if db.updateGCSem != nil {
// wait before creating new goroutines
// if updateGCSem buffer id full
db.updateGCSem <- struct{}{}
}
db.updateGCWG.Add(1)
go func() {
defer db.updateGCWG.Done()
if db.updateGCSem != nil {
// free a spot in updateGCSem buffer
// for a new goroutine
defer func() { <-db.updateGCSem }()
}
metricName := "localstore.updateGC"
metrics.GetOrRegisterCounter(metricName, nil).Inc(1)
defer totalTimeMetric(metricName, time.Now())
err := db.updateGC(out)
if err != nil {
metrics.GetOrRegisterCounter(metricName+".error", nil).Inc(1)
log.Error("localstore update gc", "err", err)
}
// if gc update hook is defined, call it
if testHookUpdateGC != nil {
testHookUpdateGC()
}
}()
// no updates to indexes
case chunk.ModeGetSync:
case chunk.ModeGetLookup:
default:
return out, ErrInvalidMode
}
return out, nil
}
// updateGC updates garbage collection index for
// a single item. Provided item is expected to have
// only Address and Data fields with non zero values,
// which is ensured by the get function.
func (db *DB) updateGC(item shed.Item) (err error) {
db.batchMu.Lock()
defer db.batchMu.Unlock()
batch := new(leveldb.Batch)
// update accessTimeStamp in retrieve, gc
i, err := db.retrievalAccessIndex.Get(item)
switch err {
case nil:
item.AccessTimestamp = i.AccessTimestamp
case leveldb.ErrNotFound:
// no chunk accesses
default:
return err
}
if item.AccessTimestamp == 0 {
// chunk is not yet synced
// do not add it to the gc index
return nil
}
// delete current entry from the gc index
db.gcIndex.DeleteInBatch(batch, item)
// update access timestamp
item.AccessTimestamp = now()
// update retrieve access index
db.retrievalAccessIndex.PutInBatch(batch, item)
// add new entry to gc index
db.gcIndex.PutInBatch(batch, item)
return db.shed.WriteBatch(batch)
}
// testHookUpdateGC is a hook that can provide
// information when a garbage collection index is updated.
var testHookUpdateGC func()

View File

@ -0,0 +1,238 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"bytes"
"context"
"testing"
"time"
"github.com/ethersphere/swarm/chunk"
)
// TestModeGetRequest validates ModeGetRequest index values on the provided DB.
func TestModeGetRequest(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
uploadTimestamp := time.Now().UTC().UnixNano()
defer setNow(func() (t int64) {
return uploadTimestamp
})()
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
// set update gc test hook to signal when
// update gc goroutine is done by sending to
// testHookUpdateGCChan channel, which is
// used to wait for garbage colletion index
// changes
testHookUpdateGCChan := make(chan struct{})
defer setTestHookUpdateGC(func() {
testHookUpdateGCChan <- struct{}{}
})()
t.Run("get unsynced", func(t *testing.T) {
got, err := db.Get(context.Background(), chunk.ModeGetRequest, ch.Address())
if err != nil {
t.Fatal(err)
}
// wait for update gc goroutine to be done
<-testHookUpdateGCChan
if !bytes.Equal(got.Address(), ch.Address()) {
t.Errorf("got chunk address %x, want %x", got.Address(), ch.Address())
}
if !bytes.Equal(got.Data(), ch.Data()) {
t.Errorf("got chunk data %x, want %x", got.Data(), ch.Data())
}
t.Run("retrieve indexes", newRetrieveIndexesTestWithAccess(db, ch, uploadTimestamp, 0))
t.Run("gc index count", newItemsCountTest(db.gcIndex, 0))
t.Run("gc size", newIndexGCSizeTest(db))
})
// set chunk to synced state
err = db.Set(context.Background(), chunk.ModeSetSync, ch.Address())
if err != nil {
t.Fatal(err)
}
t.Run("first get", func(t *testing.T) {
got, err := db.Get(context.Background(), chunk.ModeGetRequest, ch.Address())
if err != nil {
t.Fatal(err)
}
// wait for update gc goroutine to be done
<-testHookUpdateGCChan
if !bytes.Equal(got.Address(), ch.Address()) {
t.Errorf("got chunk address %x, want %x", got.Address(), ch.Address())
}
if !bytes.Equal(got.Data(), ch.Data()) {
t.Errorf("got chunk data %x, want %x", got.Data(), ch.Data())
}
t.Run("retrieve indexes", newRetrieveIndexesTestWithAccess(db, ch, uploadTimestamp, uploadTimestamp))
t.Run("gc index", newGCIndexTest(db, ch, uploadTimestamp, uploadTimestamp, 1))
t.Run("gc index count", newItemsCountTest(db.gcIndex, 1))
t.Run("gc size", newIndexGCSizeTest(db))
})
t.Run("second get", func(t *testing.T) {
accessTimestamp := time.Now().UTC().UnixNano()
defer setNow(func() (t int64) {
return accessTimestamp
})()
got, err := db.Get(context.Background(), chunk.ModeGetRequest, ch.Address())
if err != nil {
t.Fatal(err)
}
// wait for update gc goroutine to be done
<-testHookUpdateGCChan
if !bytes.Equal(got.Address(), ch.Address()) {
t.Errorf("got chunk address %x, want %x", got.Address(), ch.Address())
}
if !bytes.Equal(got.Data(), ch.Data()) {
t.Errorf("got chunk data %x, want %x", got.Data(), ch.Data())
}
t.Run("retrieve indexes", newRetrieveIndexesTestWithAccess(db, ch, uploadTimestamp, accessTimestamp))
t.Run("gc index", newGCIndexTest(db, ch, uploadTimestamp, accessTimestamp, 1))
t.Run("gc index count", newItemsCountTest(db.gcIndex, 1))
t.Run("gc size", newIndexGCSizeTest(db))
})
}
// TestModeGetSync validates ModeGetSync index values on the provided DB.
func TestModeGetSync(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
uploadTimestamp := time.Now().UTC().UnixNano()
defer setNow(func() (t int64) {
return uploadTimestamp
})()
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
got, err := db.Get(context.Background(), chunk.ModeGetSync, ch.Address())
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(got.Address(), ch.Address()) {
t.Errorf("got chunk address %x, want %x", got.Address(), ch.Address())
}
if !bytes.Equal(got.Data(), ch.Data()) {
t.Errorf("got chunk data %x, want %x", got.Data(), ch.Data())
}
t.Run("retrieve indexes", newRetrieveIndexesTestWithAccess(db, ch, uploadTimestamp, 0))
t.Run("gc index count", newItemsCountTest(db.gcIndex, 0))
t.Run("gc size", newIndexGCSizeTest(db))
}
// setTestHookUpdateGC sets testHookUpdateGC and
// returns a function that will reset it to the
// value before the change.
func setTestHookUpdateGC(h func()) (reset func()) {
current := testHookUpdateGC
reset = func() { testHookUpdateGC = current }
testHookUpdateGC = h
return reset
}
// TestSetTestHookUpdateGC tests if setTestHookUpdateGC changes
// testHookUpdateGC function correctly and if its reset function
// resets the original function.
func TestSetTestHookUpdateGC(t *testing.T) {
// Set the current function after the test finishes.
defer func(h func()) { testHookUpdateGC = h }(testHookUpdateGC)
// expected value for the unchanged function
original := 1
// expected value for the changed function
changed := 2
// this variable will be set with two different functions
var got int
// define the original (unchanged) functions
testHookUpdateGC = func() {
got = original
}
// set got variable
testHookUpdateGC()
// test if got variable is set correctly
if got != original {
t.Errorf("got hook value %v, want %v", got, original)
}
// set the new function
reset := setTestHookUpdateGC(func() {
got = changed
})
// set got variable
testHookUpdateGC()
// test if got variable is set correctly to changed value
if got != changed {
t.Errorf("got hook value %v, want %v", got, changed)
}
// set the function to the original one
reset()
// set got variable
testHookUpdateGC()
// test if got variable is set correctly to original value
if got != original {
t.Errorf("got hook value %v, want %v", got, original)
}
}

View File

@ -0,0 +1,39 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"context"
"time"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethersphere/swarm/chunk"
)
// Has returns true if the chunk is stored in database.
func (db *DB) Has(ctx context.Context, addr chunk.Address) (bool, error) {
metricName := "localstore.Has"
metrics.GetOrRegisterCounter(metricName, nil).Inc(1)
defer totalTimeMetric(metricName, time.Now())
has, err := db.retrievalDataIndex.Has(addressToItem(addr))
if err != nil {
metrics.GetOrRegisterCounter(metricName+".error", nil).Inc(1)
}
return has, err
}

View File

@ -0,0 +1,56 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"context"
"testing"
"github.com/ethersphere/swarm/chunk"
)
// TestHas validates that Hasser is returning true for
// the stored chunk and false for one that is not stored.
func TestHas(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
has, err := db.Has(context.Background(), ch.Address())
if err != nil {
t.Fatal(err)
}
if !has {
t.Error("chunk not found")
}
missingChunk := generateTestRandomChunk()
has, err = db.Has(context.Background(), missingChunk.Address())
if err != nil {
t.Fatal(err)
}
if has {
t.Error("unexpected chunk is found")
}
}

View File

@ -0,0 +1,176 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"context"
"fmt"
"time"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/shed"
"github.com/syndtr/goleveldb/leveldb"
)
// Put stores the Chunk to database and depending
// on the Putter mode, it updates required indexes.
// Put is required to implement chunk.Store
// interface.
func (db *DB) Put(ctx context.Context, mode chunk.ModePut, ch chunk.Chunk) (exists bool, err error) {
metricName := fmt.Sprintf("localstore.Put.%s", mode)
metrics.GetOrRegisterCounter(metricName, nil).Inc(1)
defer totalTimeMetric(metricName, time.Now())
exists, err = db.put(mode, chunkToItem(ch))
if err != nil {
metrics.GetOrRegisterCounter(metricName+".error", nil).Inc(1)
}
return exists, err
}
// put stores Item to database and updates other
// indexes. It acquires lockAddr to protect two calls
// of this function for the same address in parallel.
// Item fields Address and Data must not be
// with their nil values.
func (db *DB) put(mode chunk.ModePut, item shed.Item) (exists bool, err error) {
// protect parallel updates
db.batchMu.Lock()
defer db.batchMu.Unlock()
batch := new(leveldb.Batch)
// variables that provide information for operations
// to be done after write batch function successfully executes
var gcSizeChange int64 // number to add or subtract from gcSize
var triggerPullFeed bool // signal pull feed subscriptions to iterate
var triggerPushFeed bool // signal push feed subscriptions to iterate
switch mode {
case chunk.ModePutRequest:
// put to indexes: retrieve, gc; it does not enter the syncpool
// check if the chunk already is in the database
// as gc index is updated
i, err := db.retrievalAccessIndex.Get(item)
switch err {
case nil:
exists = true
item.AccessTimestamp = i.AccessTimestamp
case leveldb.ErrNotFound:
exists = false
// no chunk accesses
default:
return false, err
}
i, err = db.retrievalDataIndex.Get(item)
switch err {
case nil:
exists = true
item.StoreTimestamp = i.StoreTimestamp
item.BinID = i.BinID
case leveldb.ErrNotFound:
// no chunk accesses
exists = false
default:
return false, err
}
if item.AccessTimestamp != 0 {
// delete current entry from the gc index
db.gcIndex.DeleteInBatch(batch, item)
gcSizeChange--
}
if item.StoreTimestamp == 0 {
item.StoreTimestamp = now()
}
if item.BinID == 0 {
item.BinID, err = db.binIDs.IncInBatch(batch, uint64(db.po(item.Address)))
if err != nil {
return false, err
}
}
// update access timestamp
item.AccessTimestamp = now()
// update retrieve access index
db.retrievalAccessIndex.PutInBatch(batch, item)
// add new entry to gc index
db.gcIndex.PutInBatch(batch, item)
gcSizeChange++
db.retrievalDataIndex.PutInBatch(batch, item)
case chunk.ModePutUpload:
// put to indexes: retrieve, push, pull
exists, err = db.retrievalDataIndex.Has(item)
if err != nil {
return false, err
}
if !exists {
item.StoreTimestamp = now()
item.BinID, err = db.binIDs.IncInBatch(batch, uint64(db.po(item.Address)))
if err != nil {
return false, err
}
db.retrievalDataIndex.PutInBatch(batch, item)
db.pullIndex.PutInBatch(batch, item)
triggerPullFeed = true
db.pushIndex.PutInBatch(batch, item)
triggerPushFeed = true
}
case chunk.ModePutSync:
// put to indexes: retrieve, pull
exists, err = db.retrievalDataIndex.Has(item)
if err != nil {
return exists, err
}
if !exists {
item.StoreTimestamp = now()
item.BinID, err = db.binIDs.IncInBatch(batch, uint64(db.po(item.Address)))
if err != nil {
return false, err
}
db.retrievalDataIndex.PutInBatch(batch, item)
db.pullIndex.PutInBatch(batch, item)
triggerPullFeed = true
}
default:
return false, ErrInvalidMode
}
err = db.incGCSizeInBatch(batch, gcSizeChange)
if err != nil {
return false, err
}
err = db.shed.WriteBatch(batch)
if err != nil {
return false, err
}
if triggerPullFeed {
db.triggerPullSubscriptions(db.po(item.Address))
}
if triggerPushFeed {
db.triggerPushSubscriptions()
}
return exists, nil
}

View File

@ -0,0 +1,362 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"bytes"
"context"
"fmt"
"sync"
"testing"
"time"
"github.com/ethersphere/swarm/chunk"
)
// TestModePutRequest validates ModePutRequest index values on the provided DB.
func TestModePutRequest(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
ch := generateTestRandomChunk()
// keep the record when the chunk is stored
var storeTimestamp int64
t.Run("first put", func(t *testing.T) {
wantTimestamp := time.Now().UTC().UnixNano()
defer setNow(func() (t int64) {
return wantTimestamp
})()
storeTimestamp = wantTimestamp
_, err := db.Put(context.Background(), chunk.ModePutRequest, ch)
if err != nil {
t.Fatal(err)
}
t.Run("retrieve indexes", newRetrieveIndexesTestWithAccess(db, ch, wantTimestamp, wantTimestamp))
t.Run("gc index count", newItemsCountTest(db.gcIndex, 1))
t.Run("gc size", newIndexGCSizeTest(db))
})
t.Run("second put", func(t *testing.T) {
wantTimestamp := time.Now().UTC().UnixNano()
defer setNow(func() (t int64) {
return wantTimestamp
})()
_, err := db.Put(context.Background(), chunk.ModePutRequest, ch)
if err != nil {
t.Fatal(err)
}
t.Run("retrieve indexes", newRetrieveIndexesTestWithAccess(db, ch, storeTimestamp, wantTimestamp))
t.Run("gc index count", newItemsCountTest(db.gcIndex, 1))
t.Run("gc size", newIndexGCSizeTest(db))
})
}
// TestModePutSync validates ModePutSync index values on the provided DB.
func TestModePutSync(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
wantTimestamp := time.Now().UTC().UnixNano()
defer setNow(func() (t int64) {
return wantTimestamp
})()
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutSync, ch)
if err != nil {
t.Fatal(err)
}
t.Run("retrieve indexes", newRetrieveIndexesTest(db, ch, wantTimestamp, 0))
t.Run("pull index", newPullIndexTest(db, ch, 1, nil))
}
// TestModePutUpload validates ModePutUpload index values on the provided DB.
func TestModePutUpload(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
wantTimestamp := time.Now().UTC().UnixNano()
defer setNow(func() (t int64) {
return wantTimestamp
})()
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
t.Run("retrieve indexes", newRetrieveIndexesTest(db, ch, wantTimestamp, 0))
t.Run("pull index", newPullIndexTest(db, ch, 1, nil))
t.Run("push index", newPushIndexTest(db, ch, wantTimestamp, nil))
}
// TestModePutUpload_parallel uploads chunks in parallel
// and validates if all chunks can be retrieved with correct data.
func TestModePutUpload_parallel(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
chunkCount := 1000
workerCount := 100
chunkChan := make(chan chunk.Chunk)
errChan := make(chan error)
doneChan := make(chan struct{})
defer close(doneChan)
// start uploader workers
for i := 0; i < workerCount; i++ {
go func(i int) {
for {
select {
case ch, ok := <-chunkChan:
if !ok {
return
}
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
select {
case errChan <- err:
case <-doneChan:
}
case <-doneChan:
return
}
}
}(i)
}
chunks := make([]chunk.Chunk, 0)
var chunksMu sync.Mutex
// send chunks to workers
go func() {
for i := 0; i < chunkCount; i++ {
chunk := generateTestRandomChunk()
select {
case chunkChan <- chunk:
case <-doneChan:
return
}
chunksMu.Lock()
chunks = append(chunks, chunk)
chunksMu.Unlock()
}
close(chunkChan)
}()
// validate every error from workers
for i := 0; i < chunkCount; i++ {
err := <-errChan
if err != nil {
t.Fatal(err)
}
}
// get every chunk and validate its data
chunksMu.Lock()
defer chunksMu.Unlock()
for _, ch := range chunks {
got, err := db.Get(context.Background(), chunk.ModeGetRequest, ch.Address())
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(got.Data(), ch.Data()) {
t.Fatalf("got chunk %s data %x, want %x", ch.Address().Hex(), got.Data(), ch.Data())
}
}
}
// TestModePut_sameChunk puts the same chunk multiple times
// and validates that all relevant indexes have only one item
// in them.
func TestModePut_sameChunk(t *testing.T) {
ch := generateTestRandomChunk()
for _, tc := range []struct {
name string
mode chunk.ModePut
pullIndex bool
pushIndex bool
}{
{
name: "ModePutRequest",
mode: chunk.ModePutRequest,
pullIndex: false,
pushIndex: false,
},
{
name: "ModePutUpload",
mode: chunk.ModePutUpload,
pullIndex: true,
pushIndex: true,
},
{
name: "ModePutSync",
mode: chunk.ModePutSync,
pullIndex: true,
pushIndex: false,
},
} {
t.Run(tc.name, func(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
for i := 0; i < 10; i++ {
exists, err := db.Put(context.Background(), tc.mode, ch)
if err != nil {
t.Fatal(err)
}
switch exists {
case false:
if i != 0 {
t.Fatal("should not exist only on first Put")
}
case true:
if i == 0 {
t.Fatal("should exist on all cases other than the first one")
}
}
count := func(b bool) (c int) {
if b {
return 1
}
return 0
}
newItemsCountTest(db.retrievalDataIndex, 1)(t)
newItemsCountTest(db.pullIndex, count(tc.pullIndex))(t)
newItemsCountTest(db.pushIndex, count(tc.pushIndex))(t)
}
})
}
}
// BenchmarkPutUpload runs a series of benchmarks that upload
// a specific number of chunks in parallel.
//
// Measurements on MacBook Pro (Retina, 15-inch, Mid 2014)
//
// # go test -benchmem -run=none github.com/ethersphere/swarm/storage/localstore -bench BenchmarkPutUpload -v
//
// goos: darwin
// goarch: amd64
// pkg: github.com/ethersphere/swarm/storage/localstore
// BenchmarkPutUpload/count_100_parallel_1-8 300 5107704 ns/op 2081461 B/op 2374 allocs/op
// BenchmarkPutUpload/count_100_parallel_2-8 300 5411742 ns/op 2081608 B/op 2364 allocs/op
// BenchmarkPutUpload/count_100_parallel_4-8 500 3704964 ns/op 2081696 B/op 2324 allocs/op
// BenchmarkPutUpload/count_100_parallel_8-8 500 2932663 ns/op 2082594 B/op 2295 allocs/op
// BenchmarkPutUpload/count_100_parallel_16-8 500 3117157 ns/op 2085438 B/op 2282 allocs/op
// BenchmarkPutUpload/count_100_parallel_32-8 500 3449122 ns/op 2089721 B/op 2286 allocs/op
// BenchmarkPutUpload/count_1000_parallel_1-8 20 79784470 ns/op 25211240 B/op 23225 allocs/op
// BenchmarkPutUpload/count_1000_parallel_2-8 20 75422164 ns/op 25210730 B/op 23187 allocs/op
// BenchmarkPutUpload/count_1000_parallel_4-8 20 70698378 ns/op 25206522 B/op 22692 allocs/op
// BenchmarkPutUpload/count_1000_parallel_8-8 20 71285528 ns/op 25213436 B/op 22345 allocs/op
// BenchmarkPutUpload/count_1000_parallel_16-8 20 71301826 ns/op 25205040 B/op 22090 allocs/op
// BenchmarkPutUpload/count_1000_parallel_32-8 30 57713506 ns/op 25219781 B/op 21848 allocs/op
// BenchmarkPutUpload/count_10000_parallel_1-8 2 656719345 ns/op 216792908 B/op 248940 allocs/op
// BenchmarkPutUpload/count_10000_parallel_2-8 2 646301962 ns/op 216730800 B/op 248270 allocs/op
// BenchmarkPutUpload/count_10000_parallel_4-8 2 532784228 ns/op 216667080 B/op 241910 allocs/op
// BenchmarkPutUpload/count_10000_parallel_8-8 3 494290188 ns/op 216297749 B/op 236247 allocs/op
// BenchmarkPutUpload/count_10000_parallel_16-8 3 483485315 ns/op 216060384 B/op 231090 allocs/op
// BenchmarkPutUpload/count_10000_parallel_32-8 3 434461294 ns/op 215371280 B/op 224800 allocs/op
// BenchmarkPutUpload/count_100000_parallel_1-8 1 22767894338 ns/op 2331372088 B/op 4049876 allocs/op
// BenchmarkPutUpload/count_100000_parallel_2-8 1 25347872677 ns/op 2344140160 B/op 4106763 allocs/op
// BenchmarkPutUpload/count_100000_parallel_4-8 1 23580460174 ns/op 2338582576 B/op 4027452 allocs/op
// BenchmarkPutUpload/count_100000_parallel_8-8 1 22197559193 ns/op 2321803496 B/op 3877553 allocs/op
// BenchmarkPutUpload/count_100000_parallel_16-8 1 22527046476 ns/op 2327854800 B/op 3885455 allocs/op
// BenchmarkPutUpload/count_100000_parallel_32-8 1 21332243613 ns/op 2299654568 B/op 3697181 allocs/op
// PASS
func BenchmarkPutUpload(b *testing.B) {
for _, count := range []int{
100,
1000,
10000,
100000,
} {
for _, maxParallelUploads := range []int{
1,
2,
4,
8,
16,
32,
} {
name := fmt.Sprintf("count %v parallel %v", count, maxParallelUploads)
b.Run(name, func(b *testing.B) {
for n := 0; n < b.N; n++ {
benchmarkPutUpload(b, nil, count, maxParallelUploads)
}
})
}
}
}
// benchmarkPutUpload runs a benchmark by uploading a specific number
// of chunks with specified max parallel uploads.
func benchmarkPutUpload(b *testing.B, o *Options, count, maxParallelUploads int) {
b.StopTimer()
db, cleanupFunc := newTestDB(b, o)
defer cleanupFunc()
chunks := make([]chunk.Chunk, count)
for i := 0; i < count; i++ {
chunks[i] = generateTestRandomChunk()
}
errs := make(chan error)
b.StartTimer()
go func() {
sem := make(chan struct{}, maxParallelUploads)
for i := 0; i < count; i++ {
sem <- struct{}{}
go func(i int) {
defer func() { <-sem }()
_, err := db.Put(context.Background(), chunk.ModePutUpload, chunks[i])
errs <- err
}(i)
}
}()
for i := 0; i < count; i++ {
err := <-errs
if err != nil {
b.Fatal(err)
}
}
}

View File

@ -0,0 +1,194 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"context"
"fmt"
"time"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethersphere/swarm/chunk"
"github.com/syndtr/goleveldb/leveldb"
)
// Set updates database indexes for a specific
// chunk represented by the address.
// Set is required to implement chunk.Store
// interface.
func (db *DB) Set(ctx context.Context, mode chunk.ModeSet, addr chunk.Address) (err error) {
metricName := fmt.Sprintf("localstore.Set.%s", mode)
metrics.GetOrRegisterCounter(metricName, nil).Inc(1)
defer totalTimeMetric(metricName, time.Now())
err = db.set(mode, addr)
if err != nil {
metrics.GetOrRegisterCounter(metricName+".error", nil).Inc(1)
}
return err
}
// set updates database indexes for a specific
// chunk represented by the address.
// It acquires lockAddr to protect two calls
// of this function for the same address in parallel.
func (db *DB) set(mode chunk.ModeSet, addr chunk.Address) (err error) {
// protect parallel updates
db.batchMu.Lock()
defer db.batchMu.Unlock()
batch := new(leveldb.Batch)
// variables that provide information for operations
// to be done after write batch function successfully executes
var gcSizeChange int64 // number to add or subtract from gcSize
var triggerPullFeed bool // signal pull feed subscriptions to iterate
item := addressToItem(addr)
switch mode {
case chunk.ModeSetAccess:
// add to pull, insert to gc
// need to get access timestamp here as it is not
// provided by the access function, and it is not
// a property of a chunk provided to Accessor.Put.
i, err := db.retrievalDataIndex.Get(item)
switch err {
case nil:
item.StoreTimestamp = i.StoreTimestamp
item.BinID = i.BinID
case leveldb.ErrNotFound:
db.pushIndex.DeleteInBatch(batch, item)
item.StoreTimestamp = now()
item.BinID, err = db.binIDs.Inc(uint64(db.po(item.Address)))
if err != nil {
return err
}
default:
return err
}
i, err = db.retrievalAccessIndex.Get(item)
switch err {
case nil:
item.AccessTimestamp = i.AccessTimestamp
db.gcIndex.DeleteInBatch(batch, item)
gcSizeChange--
case leveldb.ErrNotFound:
// the chunk is not accessed before
default:
return err
}
item.AccessTimestamp = now()
db.retrievalAccessIndex.PutInBatch(batch, item)
db.pullIndex.PutInBatch(batch, item)
triggerPullFeed = true
db.gcIndex.PutInBatch(batch, item)
gcSizeChange++
case chunk.ModeSetSync:
// delete from push, insert to gc
// need to get access timestamp here as it is not
// provided by the access function, and it is not
// a property of a chunk provided to Accessor.Put.
i, err := db.retrievalDataIndex.Get(item)
if err != nil {
if err == leveldb.ErrNotFound {
// chunk is not found,
// no need to update gc index
// just delete from the push index
// if it is there
db.pushIndex.DeleteInBatch(batch, item)
return nil
}
return err
}
item.StoreTimestamp = i.StoreTimestamp
item.BinID = i.BinID
i, err = db.retrievalAccessIndex.Get(item)
switch err {
case nil:
item.AccessTimestamp = i.AccessTimestamp
db.gcIndex.DeleteInBatch(batch, item)
gcSizeChange--
case leveldb.ErrNotFound:
// the chunk is not accessed before
default:
return err
}
item.AccessTimestamp = now()
db.retrievalAccessIndex.PutInBatch(batch, item)
db.pushIndex.DeleteInBatch(batch, item)
db.gcIndex.PutInBatch(batch, item)
gcSizeChange++
case chunk.ModeSetRemove:
// delete from retrieve, pull, gc
// need to get access timestamp here as it is not
// provided by the access function, and it is not
// a property of a chunk provided to Accessor.Put.
i, err := db.retrievalAccessIndex.Get(item)
switch err {
case nil:
item.AccessTimestamp = i.AccessTimestamp
case leveldb.ErrNotFound:
default:
return err
}
i, err = db.retrievalDataIndex.Get(item)
if err != nil {
return err
}
item.StoreTimestamp = i.StoreTimestamp
item.BinID = i.BinID
db.retrievalDataIndex.DeleteInBatch(batch, item)
db.retrievalAccessIndex.DeleteInBatch(batch, item)
db.pullIndex.DeleteInBatch(batch, item)
db.gcIndex.DeleteInBatch(batch, item)
// a check is needed for decrementing gcSize
// as delete is not reporting if the key/value pair
// is deleted or not
if _, err := db.gcIndex.Get(item); err == nil {
gcSizeChange = -1
}
default:
return ErrInvalidMode
}
err = db.incGCSizeInBatch(batch, gcSizeChange)
if err != nil {
return err
}
err = db.shed.WriteBatch(batch)
if err != nil {
return err
}
if triggerPullFeed {
db.triggerPullSubscriptions(db.po(item.Address))
}
return nil
}

View File

@ -0,0 +1,129 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"context"
"testing"
"time"
"github.com/ethersphere/swarm/chunk"
"github.com/syndtr/goleveldb/leveldb"
)
// TestModeSetAccess validates ModeSetAccess index values on the provided DB.
func TestModeSetAccess(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
ch := generateTestRandomChunk()
wantTimestamp := time.Now().UTC().UnixNano()
defer setNow(func() (t int64) {
return wantTimestamp
})()
err := db.Set(context.Background(), chunk.ModeSetAccess, ch.Address())
if err != nil {
t.Fatal(err)
}
t.Run("pull index", newPullIndexTest(db, ch, 1, nil))
t.Run("pull index count", newItemsCountTest(db.pullIndex, 1))
t.Run("gc index", newGCIndexTest(db, ch, wantTimestamp, wantTimestamp, 1))
t.Run("gc index count", newItemsCountTest(db.gcIndex, 1))
t.Run("gc size", newIndexGCSizeTest(db))
}
// TestModeSetSync validates ModeSetSync index values on the provided DB.
func TestModeSetSync(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
ch := generateTestRandomChunk()
wantTimestamp := time.Now().UTC().UnixNano()
defer setNow(func() (t int64) {
return wantTimestamp
})()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
err = db.Set(context.Background(), chunk.ModeSetSync, ch.Address())
if err != nil {
t.Fatal(err)
}
t.Run("retrieve indexes", newRetrieveIndexesTestWithAccess(db, ch, wantTimestamp, wantTimestamp))
t.Run("push index", newPushIndexTest(db, ch, wantTimestamp, leveldb.ErrNotFound))
t.Run("gc index", newGCIndexTest(db, ch, wantTimestamp, wantTimestamp, 1))
t.Run("gc index count", newItemsCountTest(db.gcIndex, 1))
t.Run("gc size", newIndexGCSizeTest(db))
}
// TestModeSetRemove validates ModeSetRemove index values on the provided DB.
func TestModeSetRemove(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
err = db.Set(context.Background(), chunk.ModeSetRemove, ch.Address())
if err != nil {
t.Fatal(err)
}
t.Run("retrieve indexes", func(t *testing.T) {
wantErr := leveldb.ErrNotFound
_, err := db.retrievalDataIndex.Get(addressToItem(ch.Address()))
if err != wantErr {
t.Errorf("got error %v, want %v", err, wantErr)
}
t.Run("retrieve data index count", newItemsCountTest(db.retrievalDataIndex, 0))
// access index should not be set
_, err = db.retrievalAccessIndex.Get(addressToItem(ch.Address()))
if err != wantErr {
t.Errorf("got error %v, want %v", err, wantErr)
}
t.Run("retrieve access index count", newItemsCountTest(db.retrievalAccessIndex, 0))
})
t.Run("pull index", newPullIndexTest(db, ch, 0, leveldb.ErrNotFound))
t.Run("pull index count", newItemsCountTest(db.pullIndex, 0))
t.Run("gc index count", newItemsCountTest(db.gcIndex, 0))
t.Run("gc size", newIndexGCSizeTest(db))
}

View File

@ -0,0 +1,147 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"context"
"strconv"
"testing"
"github.com/ethersphere/swarm/chunk"
)
// BenchmarkRetrievalIndexes uploads a number of chunks in order to measure
// total time of updating their retrieval indexes by setting them
// to synced state and requesting them.
//
// This benchmark takes significant amount of time.
//
// Measurements on MacBook Pro (Retina, 15-inch, Mid 2014) show
// that two separated indexes perform better.
//
// # go test -benchmem -run=none github.com/ethersphere/swarm/storage/localstore -bench BenchmarkRetrievalIndexes -v
// goos: darwin
// goarch: amd64
// pkg: github.com/ethersphere/swarm/storage/localstore
// BenchmarkRetrievalIndexes/1000-8 20 75556686 ns/op 19033493 B/op 84500 allocs/op
// BenchmarkRetrievalIndexes/10000-8 1 1079084922 ns/op 382792064 B/op 1429644 allocs/op
// BenchmarkRetrievalIndexes/100000-8 1 16891305737 ns/op 2629165304 B/op 12465019 allocs/op
// PASS
func BenchmarkRetrievalIndexes(b *testing.B) {
for _, count := range []int{
1000,
10000,
100000,
} {
b.Run(strconv.Itoa(count)+"-split", func(b *testing.B) {
for n := 0; n < b.N; n++ {
benchmarkRetrievalIndexes(b, nil, count)
}
})
}
}
// benchmarkRetrievalIndexes is used in BenchmarkRetrievalIndexes
// to do benchmarks with a specific number of chunks and different
// database options.
func benchmarkRetrievalIndexes(b *testing.B, o *Options, count int) {
b.StopTimer()
db, cleanupFunc := newTestDB(b, o)
defer cleanupFunc()
addrs := make([]chunk.Address, count)
for i := 0; i < count; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
b.Fatal(err)
}
addrs[i] = ch.Address()
}
// set update gc test hook to signal when
// update gc goroutine is done by sending to
// testHookUpdateGCChan channel, which is
// used to wait for gc index updates to be
// included in the benchmark time
testHookUpdateGCChan := make(chan struct{})
defer setTestHookUpdateGC(func() {
testHookUpdateGCChan <- struct{}{}
})()
b.StartTimer()
for i := 0; i < count; i++ {
err := db.Set(context.Background(), chunk.ModeSetSync, addrs[i])
if err != nil {
b.Fatal(err)
}
_, err = db.Get(context.Background(), chunk.ModeGetRequest, addrs[i])
if err != nil {
b.Fatal(err)
}
// wait for update gc goroutine to be done
<-testHookUpdateGCChan
}
}
// BenchmarkUpload compares uploading speed for different
// retrieval indexes and various number of chunks.
//
// Measurements on MacBook Pro (Retina, 15-inch, Mid 2014).
//
// go test -benchmem -run=none github.com/ethersphere/swarm/storage/localstore -bench BenchmarkUpload -v
// goos: darwin
// goarch: amd64
// pkg: github.com/ethersphere/swarm/storage/localstore
// BenchmarkUpload/1000-8 20 59437463 ns/op 25205193 B/op 23208 allocs/op
// BenchmarkUpload/10000-8 2 580646362 ns/op 216532932 B/op 248090 allocs/op
// BenchmarkUpload/100000-8 1 22373390892 ns/op 2323055312 B/op 3995903 allocs/op
// PASS
func BenchmarkUpload(b *testing.B) {
for _, count := range []int{
1000,
10000,
100000,
} {
b.Run(strconv.Itoa(count), func(b *testing.B) {
for n := 0; n < b.N; n++ {
benchmarkUpload(b, nil, count)
}
})
}
}
// benchmarkUpload is used in BenchmarkUpload
// to do benchmarks with a specific number of chunks and different
// database options.
func benchmarkUpload(b *testing.B, o *Options, count int) {
b.StopTimer()
db, cleanupFunc := newTestDB(b, o)
defer cleanupFunc()
chunks := make([]chunk.Chunk, count)
for i := 0; i < count; i++ {
chunk := generateTestRandomChunk()
chunks[i] = chunk
}
b.StartTimer()
for i := 0; i < count; i++ {
_, err := db.Put(context.Background(), chunk.ModePutUpload, chunks[i])
if err != nil {
b.Fatal(err)
}
}
}

View File

@ -0,0 +1,52 @@
package localstore
import (
"github.com/ethersphere/swarm/log"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/opt"
)
// The DB schema we want to use. The actual/current DB schema might differ
// until migrations are run.
const CurrentDbSchema = DbSchemaSanctuary
// There was a time when we had no schema at all.
const DbSchemaNone = ""
// "purity" is the first formal schema of LevelDB we release together with Swarm 0.3.5
const DbSchemaPurity = "purity"
// "halloween" is here because we had a screw in the garbage collector index.
// Because of that we had to rebuild the GC index to get rid of erroneous
// entries and that takes a long time. This schema is used for bookkeeping,
// so rebuild index will run just once.
const DbSchemaHalloween = "halloween"
const DbSchemaSanctuary = "sanctuary"
// returns true if legacy database is in the datadir
func IsLegacyDatabase(datadir string) bool {
var (
legacyDbSchemaKey = []byte{8}
)
db, err := leveldb.OpenFile(datadir, &opt.Options{OpenFilesCacheCapacity: 128})
if err != nil {
log.Error("got an error while trying to open leveldb path", "path", datadir, "err", err)
return false
}
defer db.Close()
data, err := db.Get(legacyDbSchemaKey, nil)
if err != nil {
if err == leveldb.ErrNotFound {
// if we haven't found anything under the legacy db schema key- we are not on legacy
return false
}
log.Error("got an unexpected error fetching legacy name from the database", "err", err)
}
log.Trace("checking if database scheme is legacy", "schema name", string(data))
return string(data) == DbSchemaHalloween || string(data) == DbSchemaPurity
}

View File

@ -0,0 +1,221 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"context"
"errors"
"sync"
"time"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/shed"
"github.com/syndtr/goleveldb/leveldb"
)
// SubscribePull returns a channel that provides chunk addresses and stored times from pull syncing index.
// Pull syncing index can be only subscribed to a particular proximity order bin. If since
// is not 0, the iteration will start from the since item (the item with binID == since). If until is not 0,
// only chunks stored up to this id will be sent to the channel, and the returned channel will be
// closed. The since-until interval is closed on since side, and closed on until side: [since,until]. Returned stop
// function will terminate current and further iterations without errors, and also close the returned channel.
// Make sure that you check the second returned parameter from the channel to stop iteration when its value
// is false.
func (db *DB) SubscribePull(ctx context.Context, bin uint8, since, until uint64) (c <-chan chunk.Descriptor, stop func()) {
metricName := "localstore.SubscribePull"
metrics.GetOrRegisterCounter(metricName, nil).Inc(1)
chunkDescriptors := make(chan chunk.Descriptor)
trigger := make(chan struct{}, 1)
db.pullTriggersMu.Lock()
if _, ok := db.pullTriggers[bin]; !ok {
db.pullTriggers[bin] = make([]chan struct{}, 0)
}
db.pullTriggers[bin] = append(db.pullTriggers[bin], trigger)
db.pullTriggersMu.Unlock()
// send signal for the initial iteration
trigger <- struct{}{}
stopChan := make(chan struct{})
var stopChanOnce sync.Once
// used to provide information from the iterator to
// stop subscription when until chunk descriptor is reached
var errStopSubscription = errors.New("stop subscription")
go func() {
defer metrics.GetOrRegisterCounter(metricName+".stop", nil).Inc(1)
// close the returned chunk.Descriptor channel at the end to
// signal that the subscription is done
defer close(chunkDescriptors)
// sinceItem is the Item from which the next iteration
// should start. The first iteration starts from the first Item.
var sinceItem *shed.Item
if since > 0 {
sinceItem = &shed.Item{
Address: db.addressInBin(bin),
BinID: since,
}
}
first := true // first iteration flag for SkipStartFromItem
for {
select {
case <-trigger:
// iterate until:
// - last index Item is reached
// - subscription stop is called
// - context is done
metrics.GetOrRegisterCounter(metricName+".iter", nil).Inc(1)
iterStart := time.Now()
var count int
err := db.pullIndex.Iterate(func(item shed.Item) (stop bool, err error) {
select {
case chunkDescriptors <- chunk.Descriptor{
Address: item.Address,
BinID: item.BinID,
}:
count++
// until chunk descriptor is sent
// break the iteration
if until > 0 && item.BinID >= until {
return true, errStopSubscription
}
// set next iteration start item
// when its chunk is successfully sent to channel
sinceItem = &item
return false, nil
case <-stopChan:
// gracefully stop the iteration
// on stop
return true, nil
case <-db.close:
// gracefully stop the iteration
// on database close
return true, nil
case <-ctx.Done():
return true, ctx.Err()
}
}, &shed.IterateOptions{
StartFrom: sinceItem,
// sinceItem was sent as the last Address in the previous
// iterator call, skip it in this one, but not the item with
// the provided since bin id as it should be sent to a channel
SkipStartFromItem: !first,
Prefix: []byte{bin},
})
totalTimeMetric(metricName+".iter", iterStart)
if err != nil {
if err == errStopSubscription {
// stop subscription without any errors
// if until is reached
return
}
metrics.GetOrRegisterCounter(metricName+".iter.error", nil).Inc(1)
log.Error("localstore pull subscription iteration", "bin", bin, "since", since, "until", until, "err", err)
return
}
if count > 0 {
first = false
}
case <-stopChan:
// terminate the subscription
// on stop
return
case <-db.close:
// terminate the subscription
// on database close
return
case <-ctx.Done():
err := ctx.Err()
if err != nil {
log.Error("localstore pull subscription", "bin", bin, "since", since, "until", until, "err", err)
}
return
}
}
}()
stop = func() {
stopChanOnce.Do(func() {
close(stopChan)
})
db.pullTriggersMu.Lock()
defer db.pullTriggersMu.Unlock()
for i, t := range db.pullTriggers[bin] {
if t == trigger {
db.pullTriggers[bin] = append(db.pullTriggers[bin][:i], db.pullTriggers[bin][i+1:]...)
break
}
}
}
return chunkDescriptors, stop
}
// LastPullSubscriptionBinID returns chunk bin id of the latest Chunk
// in pull syncing index for a provided bin. If there are no chunks in
// that bin, 0 value is returned.
func (db *DB) LastPullSubscriptionBinID(bin uint8) (id uint64, err error) {
metrics.GetOrRegisterCounter("localstore.LastPullSubscriptionBinID", nil).Inc(1)
item, err := db.pullIndex.Last([]byte{bin})
if err != nil {
if err == leveldb.ErrNotFound {
return 0, nil
}
return 0, err
}
return item.BinID, nil
}
// triggerPullSubscriptions is used internally for starting iterations
// on Pull subscriptions for a particular bin. When new item with address
// that is in particular bin for DB's baseKey is added to pull index
// this function should be called.
func (db *DB) triggerPullSubscriptions(bin uint8) {
db.pullTriggersMu.RLock()
triggers, ok := db.pullTriggers[bin]
db.pullTriggersMu.RUnlock()
if !ok {
return
}
for _, t := range triggers {
select {
case t <- struct{}{}:
default:
}
}
}
// addressInBin returns an address that is in a specific
// proximity order bin from database base key.
func (db *DB) addressInBin(bin uint8) (addr chunk.Address) {
addr = append([]byte(nil), db.baseKey...)
b := bin / 8
addr[b] = addr[b] ^ (1 << (7 - bin%8))
return addr
}

View File

@ -0,0 +1,585 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"bytes"
"context"
"fmt"
"sync"
"testing"
"time"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/shed"
)
// TestDB_SubscribePull_first is a regression test for the first=false (from-1) bug
// The bug was that `first=false` was not behind an if-condition `if count > 0`. This resulted in chunks being missed, when
// the subscription is established before the chunk is actually uploaded. For example if a subscription is established with since=49,
// which means that the `SubscribePull` method should return chunk with BinID=49 via the channel, and the chunk for BinID=49 is uploaded,
// after the subscription, then it would have been skipped, where the correct behaviour is to not skip it and return it via the channel.
func TestDB_SubscribePull_first(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
addrs := make(map[uint8][]chunk.Address)
var addrsMu sync.Mutex
var wantedChunksCount int
// prepopulate database with some chunks
// before the subscription
uploadRandomChunksBin(t, db, addrs, &addrsMu, &wantedChunksCount, 100)
// any bin should do the trick
bin := uint8(1)
chunksInGivenBin := uint64(len(addrs[bin]))
errc := make(chan error)
since := chunksInGivenBin + 1
go func() {
ch, stop := db.SubscribePull(context.TODO(), bin, since, 0)
defer stop()
chnk := <-ch
if chnk.BinID != since {
errc <- fmt.Errorf("expected chunk.BinID to be %v , but got %v", since, chnk.BinID)
} else {
errc <- nil
}
}()
time.Sleep(100 * time.Millisecond)
uploadRandomChunksBin(t, db, addrs, &addrsMu, &wantedChunksCount, 100)
err := <-errc
if err != nil {
t.Fatal(err)
}
}
// TestDB_SubscribePull uploads some chunks before and after
// pull syncing subscription is created and validates if
// all addresses are received in the right order
// for expected proximity order bins.
func TestDB_SubscribePull(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
addrs := make(map[uint8][]chunk.Address)
var addrsMu sync.Mutex
var wantedChunksCount int
// prepopulate database with some chunks
// before the subscription
uploadRandomChunksBin(t, db, addrs, &addrsMu, &wantedChunksCount, 10)
// set a timeout on subscription
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// collect all errors from validating addresses, even nil ones
// to validate the number of addresses received by the subscription
errChan := make(chan error)
for bin := uint8(0); bin <= uint8(chunk.MaxPO); bin++ {
ch, stop := db.SubscribePull(ctx, bin, 0, 0)
defer stop()
// receive and validate addresses from the subscription
go readPullSubscriptionBin(ctx, db, bin, ch, addrs, &addrsMu, errChan)
}
// upload some chunks just after subscribe
uploadRandomChunksBin(t, db, addrs, &addrsMu, &wantedChunksCount, 5)
time.Sleep(200 * time.Millisecond)
// upload some chunks after some short time
// to ensure that subscription will include them
// in a dynamic environment
uploadRandomChunksBin(t, db, addrs, &addrsMu, &wantedChunksCount, 3)
checkErrChan(ctx, t, errChan, wantedChunksCount)
}
// TestDB_SubscribePull_multiple uploads chunks before and after
// multiple pull syncing subscriptions are created and
// validates if all addresses are received in the right order
// for expected proximity order bins.
func TestDB_SubscribePull_multiple(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
addrs := make(map[uint8][]chunk.Address)
var addrsMu sync.Mutex
var wantedChunksCount int
// prepopulate database with some chunks
// before the subscription
uploadRandomChunksBin(t, db, addrs, &addrsMu, &wantedChunksCount, 10)
// set a timeout on subscription
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// collect all errors from validating addresses, even nil ones
// to validate the number of addresses received by the subscription
errChan := make(chan error)
subsCount := 10
// start a number of subscriptions
// that all of them will write every address error to errChan
for j := 0; j < subsCount; j++ {
for bin := uint8(0); bin <= uint8(chunk.MaxPO); bin++ {
ch, stop := db.SubscribePull(ctx, bin, 0, 0)
defer stop()
// receive and validate addresses from the subscription
go readPullSubscriptionBin(ctx, db, bin, ch, addrs, &addrsMu, errChan)
}
}
// upload some chunks just after subscribe
uploadRandomChunksBin(t, db, addrs, &addrsMu, &wantedChunksCount, 5)
time.Sleep(200 * time.Millisecond)
// upload some chunks after some short time
// to ensure that subscription will include them
// in a dynamic environment
uploadRandomChunksBin(t, db, addrs, &addrsMu, &wantedChunksCount, 3)
checkErrChan(ctx, t, errChan, wantedChunksCount*subsCount)
}
// TestDB_SubscribePull_since uploads chunks before and after
// pull syncing subscriptions are created with a since argument
// and validates if all expected addresses are received in the
// right order for expected proximity order bins.
func TestDB_SubscribePull_since(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
addrs := make(map[uint8][]chunk.Address)
var addrsMu sync.Mutex
var wantedChunksCount int
binIDCounter := make(map[uint8]uint64)
var binIDCounterMu sync.RWMutex
uploadRandomChunks := func(count int, wanted bool) (first map[uint8]uint64) {
addrsMu.Lock()
defer addrsMu.Unlock()
first = make(map[uint8]uint64)
for i := 0; i < count; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
bin := db.po(ch.Address())
binIDCounterMu.RLock()
binIDCounter[bin]++
binIDCounterMu.RUnlock()
if wanted {
if _, ok := addrs[bin]; !ok {
addrs[bin] = make([]chunk.Address, 0)
}
addrs[bin] = append(addrs[bin], ch.Address())
wantedChunksCount++
if _, ok := first[bin]; !ok {
first[bin] = binIDCounter[bin]
}
}
}
return first
}
// prepopulate database with some chunks
// before the subscription
uploadRandomChunks(30, false)
first := uploadRandomChunks(25, true)
// set a timeout on subscription
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// collect all errors from validating addresses, even nil ones
// to validate the number of addresses received by the subscription
errChan := make(chan error)
for bin := uint8(0); bin <= uint8(chunk.MaxPO); bin++ {
since, ok := first[bin]
if !ok {
continue
}
ch, stop := db.SubscribePull(ctx, bin, since, 0)
defer stop()
// receive and validate addresses from the subscription
go readPullSubscriptionBin(ctx, db, bin, ch, addrs, &addrsMu, errChan)
}
checkErrChan(ctx, t, errChan, wantedChunksCount)
}
// TestDB_SubscribePull_until uploads chunks before and after
// pull syncing subscriptions are created with an until argument
// and validates if all expected addresses are received in the
// right order for expected proximity order bins.
func TestDB_SubscribePull_until(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
addrs := make(map[uint8][]chunk.Address)
var addrsMu sync.Mutex
var wantedChunksCount int
binIDCounter := make(map[uint8]uint64)
var binIDCounterMu sync.RWMutex
uploadRandomChunks := func(count int, wanted bool) (last map[uint8]uint64) {
addrsMu.Lock()
defer addrsMu.Unlock()
last = make(map[uint8]uint64)
for i := 0; i < count; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
bin := db.po(ch.Address())
if _, ok := addrs[bin]; !ok {
addrs[bin] = make([]chunk.Address, 0)
}
if wanted {
addrs[bin] = append(addrs[bin], ch.Address())
wantedChunksCount++
}
binIDCounterMu.RLock()
binIDCounter[bin]++
binIDCounterMu.RUnlock()
last[bin] = binIDCounter[bin]
}
return last
}
// prepopulate database with some chunks
// before the subscription
last := uploadRandomChunks(30, true)
uploadRandomChunks(25, false)
// set a timeout on subscription
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// collect all errors from validating addresses, even nil ones
// to validate the number of addresses received by the subscription
errChan := make(chan error)
for bin := uint8(0); bin <= uint8(chunk.MaxPO); bin++ {
until, ok := last[bin]
if !ok {
continue
}
ch, stop := db.SubscribePull(ctx, bin, 0, until)
defer stop()
// receive and validate addresses from the subscription
go readPullSubscriptionBin(ctx, db, bin, ch, addrs, &addrsMu, errChan)
}
// upload some chunks just after subscribe
uploadRandomChunks(15, false)
checkErrChan(ctx, t, errChan, wantedChunksCount)
}
// TestDB_SubscribePull_sinceAndUntil uploads chunks before and
// after pull syncing subscriptions are created with since
// and until arguments, and validates if all expected addresses
// are received in the right order for expected proximity order bins.
func TestDB_SubscribePull_sinceAndUntil(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
addrs := make(map[uint8][]chunk.Address)
var addrsMu sync.Mutex
var wantedChunksCount int
binIDCounter := make(map[uint8]uint64)
var binIDCounterMu sync.RWMutex
uploadRandomChunks := func(count int, wanted bool) (last map[uint8]uint64) {
addrsMu.Lock()
defer addrsMu.Unlock()
last = make(map[uint8]uint64)
for i := 0; i < count; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
bin := db.po(ch.Address())
if _, ok := addrs[bin]; !ok {
addrs[bin] = make([]chunk.Address, 0)
}
if wanted {
addrs[bin] = append(addrs[bin], ch.Address())
wantedChunksCount++
}
binIDCounterMu.RLock()
binIDCounter[bin]++
binIDCounterMu.RUnlock()
last[bin] = binIDCounter[bin]
}
return last
}
// all chunks from upload1 are not expected
// as upload1 chunk is used as since for subscriptions
upload1 := uploadRandomChunks(100, false)
// all chunks from upload2 are expected
// as upload2 chunk is used as until for subscriptions
upload2 := uploadRandomChunks(100, true)
// upload some chunks before subscribe but after
// wanted chunks
uploadRandomChunks(8, false)
// set a timeout on subscription
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// collect all errors from validating addresses, even nil ones
// to validate the number of addresses received by the subscription
errChan := make(chan error)
for bin := uint8(0); bin <= uint8(chunk.MaxPO); bin++ {
since, ok := upload1[bin]
if ok {
// start from the next uploaded chunk
since++
}
until, ok := upload2[bin]
if !ok {
// no chunks un this bin uploaded in the upload2
// skip this bin from testing
continue
}
ch, stop := db.SubscribePull(ctx, bin, since, until)
defer stop()
// receive and validate addresses from the subscription
go readPullSubscriptionBin(ctx, db, bin, ch, addrs, &addrsMu, errChan)
}
// upload some chunks just after subscribe
uploadRandomChunks(15, false)
checkErrChan(ctx, t, errChan, wantedChunksCount)
}
// uploadRandomChunksBin uploads random chunks to database and adds them to
// the map of addresses ber bin.
func uploadRandomChunksBin(t *testing.T, db *DB, addrs map[uint8][]chunk.Address, addrsMu *sync.Mutex, wantedChunksCount *int, count int) {
addrsMu.Lock()
defer addrsMu.Unlock()
for i := 0; i < count; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
bin := db.po(ch.Address())
if _, ok := addrs[bin]; !ok {
addrs[bin] = make([]chunk.Address, 0)
}
addrs[bin] = append(addrs[bin], ch.Address())
*wantedChunksCount++
}
}
// readPullSubscriptionBin is a helper function that reads all chunk.Descriptors from a channel and
// sends error to errChan, even if it is nil, to count the number of chunk.Descriptors
// returned by the channel.
func readPullSubscriptionBin(ctx context.Context, db *DB, bin uint8, ch <-chan chunk.Descriptor, addrs map[uint8][]chunk.Address, addrsMu *sync.Mutex, errChan chan error) {
var i int // address index
for {
select {
case got, ok := <-ch:
if !ok {
return
}
var err error
addrsMu.Lock()
if i+1 > len(addrs[bin]) {
err = fmt.Errorf("got more chunk addresses %v, then expected %v, for bin %v", i+1, len(addrs[bin]), bin)
} else {
addr := addrs[bin][i]
if !bytes.Equal(got.Address, addr) {
err = fmt.Errorf("got chunk bin id %v in bin %v %v, want %v", i, bin, got.Address.Hex(), addr.Hex())
} else {
want, err := db.retrievalDataIndex.Get(shed.Item{
Address: addr,
})
if err != nil {
err = fmt.Errorf("got chunk (bin id %v in bin %v) from retrieval index %s: %v", i, bin, addrs[bin][i].Hex(), err)
} else {
if got.BinID != want.BinID {
err = fmt.Errorf("got chunk bin id %v in bin %v %v, want %v", i, bin, got, want)
}
}
}
}
addrsMu.Unlock()
i++
// send one and only one error per received address
select {
case errChan <- err:
case <-ctx.Done():
return
}
case <-ctx.Done():
return
}
}
}
// checkErrChan expects the number of wantedChunksCount errors from errChan
// and calls t.Error for the ones that are not nil.
func checkErrChan(ctx context.Context, t *testing.T, errChan chan error, wantedChunksCount int) {
t.Helper()
for i := 0; i < wantedChunksCount; i++ {
select {
case err := <-errChan:
if err != nil {
t.Error(err)
}
case <-ctx.Done():
t.Fatal(ctx.Err())
}
}
}
// TestDB_LastPullSubscriptionBinID validates that LastPullSubscriptionBinID
// is returning the last chunk descriptor for proximity order bins by
// doing a few rounds of chunk uploads.
func TestDB_LastPullSubscriptionBinID(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
addrs := make(map[uint8][]chunk.Address)
binIDCounter := make(map[uint8]uint64)
var binIDCounterMu sync.RWMutex
last := make(map[uint8]uint64)
// do a few rounds of uploads and check if
// last pull subscription chunk is correct
for _, count := range []int{1, 3, 10, 11, 100, 120} {
// upload
for i := 0; i < count; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
bin := db.po(ch.Address())
if _, ok := addrs[bin]; !ok {
addrs[bin] = make([]chunk.Address, 0)
}
addrs[bin] = append(addrs[bin], ch.Address())
binIDCounterMu.RLock()
binIDCounter[bin]++
binIDCounterMu.RUnlock()
last[bin] = binIDCounter[bin]
}
// check
for bin := uint8(0); bin <= uint8(chunk.MaxPO); bin++ {
want, ok := last[bin]
got, err := db.LastPullSubscriptionBinID(bin)
if ok {
if err != nil {
t.Errorf("got unexpected error value %v", err)
}
}
if got != want {
t.Errorf("got last bin id %v, want %v", got, want)
}
}
}
}
// TestAddressInBin validates that function addressInBin
// returns a valid address for every proximity order bin.
func TestAddressInBin(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
for po := uint8(0); po < chunk.MaxPO; po++ {
addr := db.addressInBin(po)
got := db.po(addr)
if got != uint8(po) {
t.Errorf("got po %v, want %v", got, po)
}
}
}

View File

@ -0,0 +1,160 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"context"
"sync"
"time"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/shed"
)
// SubscribePush returns a channel that provides storage chunks with ordering from push syncing index.
// Returned stop function will terminate current and further iterations, and also it will close
// the returned channel without any errors. Make sure that you check the second returned parameter
// from the channel to stop iteration when its value is false.
func (db *DB) SubscribePush(ctx context.Context) (c <-chan chunk.Chunk, stop func()) {
metricName := "localstore.SubscribePush"
metrics.GetOrRegisterCounter(metricName, nil).Inc(1)
chunks := make(chan chunk.Chunk)
trigger := make(chan struct{}, 1)
db.pushTriggersMu.Lock()
db.pushTriggers = append(db.pushTriggers, trigger)
db.pushTriggersMu.Unlock()
// send signal for the initial iteration
trigger <- struct{}{}
stopChan := make(chan struct{})
var stopChanOnce sync.Once
go func() {
defer metrics.GetOrRegisterCounter(metricName+".done", nil).Inc(1)
// close the returned chunkInfo channel at the end to
// signal that the subscription is done
defer close(chunks)
// sinceItem is the Item from which the next iteration
// should start. The first iteration starts from the first Item.
var sinceItem *shed.Item
for {
select {
case <-trigger:
// iterate until:
// - last index Item is reached
// - subscription stop is called
// - context is done
metrics.GetOrRegisterCounter(metricName+".iter", nil).Inc(1)
iterStart := time.Now()
var count int
err := db.pushIndex.Iterate(func(item shed.Item) (stop bool, err error) {
// get chunk data
dataItem, err := db.retrievalDataIndex.Get(item)
if err != nil {
return true, err
}
select {
case chunks <- chunk.NewChunk(dataItem.Address, dataItem.Data):
count++
// set next iteration start item
// when its chunk is successfully sent to channel
sinceItem = &item
return false, nil
case <-stopChan:
// gracefully stop the iteration
// on stop
return true, nil
case <-db.close:
// gracefully stop the iteration
// on database close
return true, nil
case <-ctx.Done():
return true, ctx.Err()
}
}, &shed.IterateOptions{
StartFrom: sinceItem,
// sinceItem was sent as the last Address in the previous
// iterator call, skip it in this one
SkipStartFromItem: true,
})
totalTimeMetric(metricName+".iter", iterStart)
if err != nil {
metrics.GetOrRegisterCounter(metricName+".iter.error", nil).Inc(1)
log.Error("localstore push subscription iteration", "err", err)
return
}
case <-stopChan:
// terminate the subscription
// on stop
return
case <-db.close:
// terminate the subscription
// on database close
return
case <-ctx.Done():
err := ctx.Err()
if err != nil {
log.Error("localstore push subscription", "err", err)
}
return
}
}
}()
stop = func() {
stopChanOnce.Do(func() {
close(stopChan)
})
db.pushTriggersMu.Lock()
defer db.pushTriggersMu.Unlock()
for i, t := range db.pushTriggers {
if t == trigger {
db.pushTriggers = append(db.pushTriggers[:i], db.pushTriggers[i+1:]...)
break
}
}
}
return chunks, stop
}
// triggerPushSubscriptions is used internally for starting iterations
// on Push subscriptions. Whenever new item is added to the push index,
// this function should be called.
func (db *DB) triggerPushSubscriptions() {
db.pushTriggersMu.RLock()
triggers := db.pushTriggers
db.pushTriggersMu.RUnlock()
for _, t := range triggers {
select {
case t <- struct{}{}:
default:
}
}
}

View File

@ -0,0 +1,206 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"bytes"
"context"
"fmt"
"sync"
"testing"
"time"
"github.com/ethersphere/swarm/chunk"
)
// TestDB_SubscribePush uploads some chunks before and after
// push syncing subscription is created and validates if
// all addresses are received in the right order.
func TestDB_SubscribePush(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
chunks := make([]chunk.Chunk, 0)
var chunksMu sync.Mutex
uploadRandomChunks := func(count int) {
chunksMu.Lock()
defer chunksMu.Unlock()
for i := 0; i < count; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
chunks = append(chunks, ch)
}
}
// prepopulate database with some chunks
// before the subscription
uploadRandomChunks(10)
// set a timeout on subscription
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// collect all errors from validating addresses, even nil ones
// to validate the number of addresses received by the subscription
errChan := make(chan error)
ch, stop := db.SubscribePush(ctx)
defer stop()
// receive and validate addresses from the subscription
go func() {
var i int // address index
for {
select {
case got, ok := <-ch:
if !ok {
return
}
chunksMu.Lock()
want := chunks[i]
chunksMu.Unlock()
var err error
if !bytes.Equal(got.Data(), want.Data()) {
err = fmt.Errorf("got chunk %v data %x, want %x", i, got.Data(), want.Data())
}
if !bytes.Equal(got.Address(), want.Address()) {
err = fmt.Errorf("got chunk %v address %s, want %s", i, got.Address().Hex(), want.Address().Hex())
}
i++
// send one and only one error per received address
select {
case errChan <- err:
case <-ctx.Done():
return
}
case <-ctx.Done():
return
}
}
}()
// upload some chunks just after subscribe
uploadRandomChunks(5)
time.Sleep(200 * time.Millisecond)
// upload some chunks after some short time
// to ensure that subscription will include them
// in a dynamic environment
uploadRandomChunks(3)
checkErrChan(ctx, t, errChan, len(chunks))
}
// TestDB_SubscribePush_multiple uploads chunks before and after
// multiple push syncing subscriptions are created and
// validates if all addresses are received in the right order.
func TestDB_SubscribePush_multiple(t *testing.T) {
db, cleanupFunc := newTestDB(t, nil)
defer cleanupFunc()
addrs := make([]chunk.Address, 0)
var addrsMu sync.Mutex
uploadRandomChunks := func(count int) {
addrsMu.Lock()
defer addrsMu.Unlock()
for i := 0; i < count; i++ {
ch := generateTestRandomChunk()
_, err := db.Put(context.Background(), chunk.ModePutUpload, ch)
if err != nil {
t.Fatal(err)
}
addrs = append(addrs, ch.Address())
}
}
// prepopulate database with some chunks
// before the subscription
uploadRandomChunks(10)
// set a timeout on subscription
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// collect all errors from validating addresses, even nil ones
// to validate the number of addresses received by the subscription
errChan := make(chan error)
subsCount := 10
// start a number of subscriptions
// that all of them will write every addresses error to errChan
for j := 0; j < subsCount; j++ {
ch, stop := db.SubscribePush(ctx)
defer stop()
// receive and validate addresses from the subscription
go func(j int) {
var i int // address index
for {
select {
case got, ok := <-ch:
if !ok {
return
}
addrsMu.Lock()
want := addrs[i]
addrsMu.Unlock()
var err error
if !bytes.Equal(got.Address(), want) {
err = fmt.Errorf("got chunk %v address on subscription %v %s, want %s", i, j, got, want)
}
i++
// send one and only one error per received address
select {
case errChan <- err:
case <-ctx.Done():
return
}
case <-ctx.Done():
return
}
}
}(j)
}
// upload some chunks just after subscribe
uploadRandomChunks(5)
time.Sleep(200 * time.Millisecond)
// upload some chunks after some short time
// to ensure that subscription will include them
// in a dynamic environment
uploadRandomChunks(3)
// number of addresses received by all subscriptions
wantedChunksCount := len(addrs) * subsCount
checkErrChan(ctx, t, errChan, wantedChunksCount)
}

476
storage/mock/db/db.go Normal file
View File

@ -0,0 +1,476 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Package db implements a mock store that keeps all chunk data in LevelDB database.
package db
import (
"archive/tar"
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"sync"
"time"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/util"
"github.com/ethereum/go-ethereum/common"
"github.com/ethersphere/swarm/storage/mock"
)
// GlobalStore contains the LevelDB database that is storing
// chunk data for all swarm nodes.
// Closing the GlobalStore with Close method is required to
// release resources used by the database.
type GlobalStore struct {
db *leveldb.DB
// protects nodes and keys indexes
// in Put and Delete methods
nodesLocks sync.Map
keysLocks sync.Map
}
// NewGlobalStore creates a new instance of GlobalStore.
func NewGlobalStore(path string) (s *GlobalStore, err error) {
db, err := leveldb.OpenFile(path, nil)
if err != nil {
return nil, err
}
return &GlobalStore{
db: db,
}, nil
}
// Close releases the resources used by the underlying LevelDB.
func (s *GlobalStore) Close() error {
return s.db.Close()
}
// NewNodeStore returns a new instance of NodeStore that retrieves and stores
// chunk data only for a node with address addr.
func (s *GlobalStore) NewNodeStore(addr common.Address) *mock.NodeStore {
return mock.NewNodeStore(addr, s)
}
// Get returns chunk data if the chunk with key exists for node
// on address addr.
func (s *GlobalStore) Get(addr common.Address, key []byte) (data []byte, err error) {
has, err := s.db.Has(indexForHashesPerNode(addr, key), nil)
if err != nil {
return nil, mock.ErrNotFound
}
if !has {
return nil, mock.ErrNotFound
}
data, err = s.db.Get(indexDataKey(key), nil)
if err == leveldb.ErrNotFound {
err = mock.ErrNotFound
}
return
}
// Put saves the chunk data for node with address addr.
func (s *GlobalStore) Put(addr common.Address, key []byte, data []byte) error {
unlock, err := s.lock(addr, key)
if err != nil {
return err
}
defer unlock()
batch := new(leveldb.Batch)
batch.Put(indexForHashesPerNode(addr, key), nil)
batch.Put(indexForNodesWithHash(key, addr), nil)
batch.Put(indexForNodes(addr), nil)
batch.Put(indexForHashes(key), nil)
batch.Put(indexDataKey(key), data)
return s.db.Write(batch, nil)
}
// Delete removes the chunk reference to node with address addr.
func (s *GlobalStore) Delete(addr common.Address, key []byte) error {
unlock, err := s.lock(addr, key)
if err != nil {
return err
}
defer unlock()
batch := new(leveldb.Batch)
batch.Delete(indexForHashesPerNode(addr, key))
batch.Delete(indexForNodesWithHash(key, addr))
// check if this node contains any keys, and if not
// remove it from the
x := indexForHashesPerNodePrefix(addr)
if k, _ := s.db.Get(x, nil); !bytes.HasPrefix(k, x) {
batch.Delete(indexForNodes(addr))
}
x = indexForNodesWithHashPrefix(key)
if k, _ := s.db.Get(x, nil); !bytes.HasPrefix(k, x) {
batch.Delete(indexForHashes(key))
}
return s.db.Write(batch, nil)
}
// HasKey returns whether a node with addr contains the key.
func (s *GlobalStore) HasKey(addr common.Address, key []byte) bool {
has, err := s.db.Has(indexForHashesPerNode(addr, key), nil)
if err != nil {
has = false
}
return has
}
// Keys returns a paginated list of keys on all nodes.
func (s *GlobalStore) Keys(startKey []byte, limit int) (keys mock.Keys, err error) {
return s.keys(nil, startKey, limit)
}
// Nodes returns a paginated list of all known nodes.
func (s *GlobalStore) Nodes(startAddr *common.Address, limit int) (nodes mock.Nodes, err error) {
return s.nodes(nil, startAddr, limit)
}
// NodeKeys returns a paginated list of keys on a node with provided address.
func (s *GlobalStore) NodeKeys(addr common.Address, startKey []byte, limit int) (keys mock.Keys, err error) {
return s.keys(&addr, startKey, limit)
}
// KeyNodes returns a paginated list of nodes that contain a particular key.
func (s *GlobalStore) KeyNodes(key []byte, startAddr *common.Address, limit int) (nodes mock.Nodes, err error) {
return s.nodes(key, startAddr, limit)
}
// keys returns a paginated list of keys. If addr is not nil, only keys on that
// node will be returned.
func (s *GlobalStore) keys(addr *common.Address, startKey []byte, limit int) (keys mock.Keys, err error) {
iter := s.db.NewIterator(nil, nil)
defer iter.Release()
if limit <= 0 {
limit = mock.DefaultLimit
}
prefix := []byte{indexForHashesPrefix}
if addr != nil {
prefix = indexForHashesPerNodePrefix(*addr)
}
if startKey != nil {
if addr != nil {
startKey = indexForHashesPerNode(*addr, startKey)
} else {
startKey = indexForHashes(startKey)
}
} else {
startKey = prefix
}
ok := iter.Seek(startKey)
if !ok {
return keys, iter.Error()
}
for ; ok; ok = iter.Next() {
k := iter.Key()
if !bytes.HasPrefix(k, prefix) {
break
}
key := append([]byte(nil), bytes.TrimPrefix(k, prefix)...)
if len(keys.Keys) >= limit {
keys.Next = key
break
}
keys.Keys = append(keys.Keys, key)
}
return keys, iter.Error()
}
// nodes returns a paginated list of node addresses. If key is not nil,
// only nodes that contain that key will be returned.
func (s *GlobalStore) nodes(key []byte, startAddr *common.Address, limit int) (nodes mock.Nodes, err error) {
iter := s.db.NewIterator(nil, nil)
defer iter.Release()
if limit <= 0 {
limit = mock.DefaultLimit
}
prefix := []byte{indexForNodesPrefix}
if key != nil {
prefix = indexForNodesWithHashPrefix(key)
}
startKey := prefix
if startAddr != nil {
if key != nil {
startKey = indexForNodesWithHash(key, *startAddr)
} else {
startKey = indexForNodes(*startAddr)
}
}
ok := iter.Seek(startKey)
if !ok {
return nodes, iter.Error()
}
for ; ok; ok = iter.Next() {
k := iter.Key()
if !bytes.HasPrefix(k, prefix) {
break
}
addr := common.BytesToAddress(append([]byte(nil), bytes.TrimPrefix(k, prefix)...))
if len(nodes.Addrs) >= limit {
nodes.Next = &addr
break
}
nodes.Addrs = append(nodes.Addrs, addr)
}
return nodes, iter.Error()
}
// Import reads tar archive from a reader that contains exported chunk data.
// It returns the number of chunks imported and an error.
func (s *GlobalStore) Import(r io.Reader) (n int, err error) {
tr := tar.NewReader(r)
for {
hdr, err := tr.Next()
if err != nil {
if err == io.EOF {
break
}
return n, err
}
data, err := ioutil.ReadAll(tr)
if err != nil {
return n, err
}
var c mock.ExportedChunk
if err = json.Unmarshal(data, &c); err != nil {
return n, err
}
key := common.Hex2Bytes(hdr.Name)
batch := new(leveldb.Batch)
for _, addr := range c.Addrs {
batch.Put(indexForHashesPerNode(addr, key), nil)
batch.Put(indexForNodesWithHash(key, addr), nil)
batch.Put(indexForNodes(addr), nil)
}
batch.Put(indexForHashes(key), nil)
batch.Put(indexDataKey(key), c.Data)
if err = s.db.Write(batch, nil); err != nil {
return n, err
}
n++
}
return n, err
}
// Export writes to a writer a tar archive with all chunk data from
// the store. It returns the number fo chunks exported and an error.
func (s *GlobalStore) Export(w io.Writer) (n int, err error) {
tw := tar.NewWriter(w)
defer tw.Close()
buf := bytes.NewBuffer(make([]byte, 0, 1024))
encoder := json.NewEncoder(buf)
snap, err := s.db.GetSnapshot()
if err != nil {
return 0, err
}
iter := snap.NewIterator(util.BytesPrefix([]byte{indexForHashesByNodePrefix}), nil)
defer iter.Release()
var currentKey string
var addrs []common.Address
saveChunk := func() error {
hexKey := currentKey
data, err := snap.Get(indexDataKey(common.Hex2Bytes(hexKey)), nil)
if err != nil {
return fmt.Errorf("get data %s: %v", hexKey, err)
}
buf.Reset()
if err = encoder.Encode(mock.ExportedChunk{
Addrs: addrs,
Data: data,
}); err != nil {
return err
}
d := buf.Bytes()
hdr := &tar.Header{
Name: hexKey,
Mode: 0644,
Size: int64(len(d)),
}
if err := tw.WriteHeader(hdr); err != nil {
return err
}
if _, err := tw.Write(d); err != nil {
return err
}
n++
return nil
}
for iter.Next() {
k := bytes.TrimPrefix(iter.Key(), []byte{indexForHashesByNodePrefix})
i := bytes.Index(k, []byte{keyTermByte})
if i < 0 {
continue
}
hexKey := string(k[:i])
if currentKey == "" {
currentKey = hexKey
}
if hexKey != currentKey {
if err = saveChunk(); err != nil {
return n, err
}
addrs = addrs[:0]
}
currentKey = hexKey
addrs = append(addrs, common.BytesToAddress(k[i+1:]))
}
if len(addrs) > 0 {
if err = saveChunk(); err != nil {
return n, err
}
}
return n, iter.Error()
}
var (
// maximal time for lock to wait until it returns error
lockTimeout = 3 * time.Second
// duration between two lock checks.
lockCheckDelay = 30 * time.Microsecond
// error returned by lock method when lock timeout is reached
errLockTimeout = errors.New("lock timeout")
)
// lock protects parallel writes in Put and Delete methods for both
// node with provided address and for data with provided key.
func (s *GlobalStore) lock(addr common.Address, key []byte) (unlock func(), err error) {
start := time.Now()
nodeLockKey := addr.Hex()
for {
_, loaded := s.nodesLocks.LoadOrStore(nodeLockKey, struct{}{})
if !loaded {
break
}
time.Sleep(lockCheckDelay)
if time.Since(start) > lockTimeout {
return nil, errLockTimeout
}
}
start = time.Now()
keyLockKey := common.Bytes2Hex(key)
for {
_, loaded := s.keysLocks.LoadOrStore(keyLockKey, struct{}{})
if !loaded {
break
}
time.Sleep(lockCheckDelay)
if time.Since(start) > lockTimeout {
return nil, errLockTimeout
}
}
return func() {
s.nodesLocks.Delete(nodeLockKey)
s.keysLocks.Delete(keyLockKey)
}, nil
}
const (
// prefixes for different indexes
indexDataPrefix = 0
indexForNodesWithHashesPrefix = 1
indexForHashesByNodePrefix = 2
indexForNodesPrefix = 3
indexForHashesPrefix = 4
// keyTermByte splits keys and node addresses
// in database keys
keyTermByte = 0xff
)
// indexForHashesPerNode constructs a database key to store keys used in
// NodeKeys method.
func indexForHashesPerNode(addr common.Address, key []byte) []byte {
return append(indexForHashesPerNodePrefix(addr), key...)
}
// indexForHashesPerNodePrefix returns a prefix containing a node address used in
// NodeKeys method. Node address is hex encoded to be able to use keyTermByte
// for splitting node address and key.
func indexForHashesPerNodePrefix(addr common.Address) []byte {
return append([]byte{indexForNodesWithHashesPrefix}, append([]byte(addr.Hex()), keyTermByte)...)
}
// indexForNodesWithHash constructs a database key to store keys used in
// KeyNodes method.
func indexForNodesWithHash(key []byte, addr common.Address) []byte {
return append(indexForNodesWithHashPrefix(key), addr[:]...)
}
// indexForNodesWithHashPrefix returns a prefix containing a key used in
// KeyNodes method. Key is hex encoded to be able to use keyTermByte
// for splitting key and node address.
func indexForNodesWithHashPrefix(key []byte) []byte {
return append([]byte{indexForHashesByNodePrefix}, append([]byte(common.Bytes2Hex(key)), keyTermByte)...)
}
// indexForNodes constructs a database key to store keys used in
// Nodes method.
func indexForNodes(addr common.Address) []byte {
return append([]byte{indexForNodesPrefix}, addr[:]...)
}
// indexForHashes constructs a database key to store keys used in
// Keys method.
func indexForHashes(key []byte) []byte {
return append([]byte{indexForHashesPrefix}, key...)
}
// indexDataKey constructs a database key for key/data storage.
func indexDataKey(key []byte) []byte {
return append([]byte{indexDataPrefix}, key...)
}

View File

@ -0,0 +1,75 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package db
import (
"io/ioutil"
"os"
"testing"
"github.com/ethersphere/swarm/storage/mock/test"
)
// TestDBStore is running a test.MockStore tests
// using test.MockStore function.
func TestDBStore(t *testing.T) {
store, cleanup := newTestStore(t)
defer cleanup()
test.MockStore(t, store, 100)
}
// TestDBStoreListings is running test.MockStoreListings tests.
func TestDBStoreListings(t *testing.T) {
store, cleanup := newTestStore(t)
defer cleanup()
test.MockStoreListings(t, store, 1000)
}
// TestImportExport is running a test.ImportExport tests
// using test.MockStore function.
func TestImportExport(t *testing.T) {
store1, cleanup := newTestStore(t)
defer cleanup()
store2, cleanup := newTestStore(t)
defer cleanup()
test.ImportExport(t, store1, store2, 100)
}
// newTestStore creates a temporary GlobalStore
// that will be closed and data deleted when
// calling returned cleanup function.
func newTestStore(t *testing.T) (s *GlobalStore, cleanup func()) {
dir, err := ioutil.TempDir("", "swarm-mock-db-")
if err != nil {
t.Fatal(err)
}
s, err = NewGlobalStore(dir)
if err != nil {
os.RemoveAll(dir)
t.Fatal(err)
}
return s, func() {
s.Close()
os.RemoveAll(dir)
}
}

View File

@ -0,0 +1,257 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package explorer
import (
"bytes"
"encoding/json"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"github.com/ethereum/go-ethereum/common"
"github.com/ethersphere/swarm/log"
"github.com/ethersphere/swarm/storage/mock"
"github.com/rs/cors"
)
const jsonContentType = "application/json; charset=utf-8"
// NewHandler constructs an http.Handler with router
// that servers requests required by chunk explorer.
//
// /api/has-key/{node}/{key}
// /api/keys?start={key}&node={node}&limit={int[0..1000]}
// /api/nodes?start={node}&key={key}&limit={int[0..1000]}
//
// Data from global store will be served and appropriate
// CORS headers will be sent if allowed origins are provided.
func NewHandler(store mock.GlobalStorer, corsOrigins []string) (handler http.Handler) {
mux := http.NewServeMux()
mux.Handle("/api/has-key/", newHasKeyHandler(store))
mux.Handle("/api/keys", newKeysHandler(store))
mux.Handle("/api/nodes", newNodesHandler(store))
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
jsonStatusResponse(w, http.StatusNotFound)
})
handler = noCacheHandler(mux)
if corsOrigins != nil {
handler = cors.New(cors.Options{
AllowedOrigins: corsOrigins,
AllowedMethods: []string{"GET"},
MaxAge: 600,
}).Handler(handler)
}
return handler
}
// newHasKeyHandler returns a new handler that serves
// requests for HasKey global store method.
// Possible responses are StatusResponse with
// status codes 200 or 404 if the chunk is found or not.
func newHasKeyHandler(store mock.GlobalStorer) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
addr, key, ok := parseHasKeyPath(r.URL.Path)
if !ok {
jsonStatusResponse(w, http.StatusNotFound)
return
}
found := store.HasKey(addr, key)
if !found {
jsonStatusResponse(w, http.StatusNotFound)
return
}
jsonStatusResponse(w, http.StatusOK)
}
}
// KeysResponse is a JSON-encoded response for global store
// Keys and NodeKeys methods.
type KeysResponse struct {
Keys []string `json:"keys"`
Next string `json:"next,omitempty"`
}
// newKeysHandler returns a new handler that serves
// requests for Key global store method.
// HTTP response body will be JSON-encoded KeysResponse.
func newKeysHandler(store mock.GlobalStorer) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
q := r.URL.Query()
node := q.Get("node")
start, limit := listingPage(q)
var keys mock.Keys
if node == "" {
var err error
keys, err = store.Keys(common.Hex2Bytes(start), limit)
if err != nil {
log.Error("chunk explorer: keys handler: get keys", "start", start, "err", err)
jsonStatusResponse(w, http.StatusInternalServerError)
return
}
} else {
var err error
keys, err = store.NodeKeys(common.HexToAddress(node), common.Hex2Bytes(start), limit)
if err != nil {
log.Error("chunk explorer: keys handler: get node keys", "node", node, "start", start, "err", err)
jsonStatusResponse(w, http.StatusInternalServerError)
return
}
}
ks := make([]string, len(keys.Keys))
for i, k := range keys.Keys {
ks[i] = common.Bytes2Hex(k)
}
data, err := json.Marshal(KeysResponse{
Keys: ks,
Next: common.Bytes2Hex(keys.Next),
})
if err != nil {
log.Error("chunk explorer: keys handler: json marshal", "err", err)
jsonStatusResponse(w, http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", jsonContentType)
_, err = io.Copy(w, bytes.NewReader(data))
if err != nil {
log.Error("chunk explorer: keys handler: write response", "err", err)
}
}
}
// NodesResponse is a JSON-encoded response for global store
// Nodes and KeyNodes methods.
type NodesResponse struct {
Nodes []string `json:"nodes"`
Next string `json:"next,omitempty"`
}
// newNodesHandler returns a new handler that serves
// requests for Nodes global store method.
// HTTP response body will be JSON-encoded NodesResponse.
func newNodesHandler(store mock.GlobalStorer) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
q := r.URL.Query()
key := q.Get("key")
var start *common.Address
queryStart, limit := listingPage(q)
if queryStart != "" {
s := common.HexToAddress(queryStart)
start = &s
}
var nodes mock.Nodes
if key == "" {
var err error
nodes, err = store.Nodes(start, limit)
if err != nil {
log.Error("chunk explorer: nodes handler: get nodes", "start", queryStart, "err", err)
jsonStatusResponse(w, http.StatusInternalServerError)
return
}
} else {
var err error
nodes, err = store.KeyNodes(common.Hex2Bytes(key), start, limit)
if err != nil {
log.Error("chunk explorer: nodes handler: get key nodes", "key", key, "start", queryStart, "err", err)
jsonStatusResponse(w, http.StatusInternalServerError)
return
}
}
ns := make([]string, len(nodes.Addrs))
for i, n := range nodes.Addrs {
ns[i] = n.Hex()
}
var next string
if nodes.Next != nil {
next = nodes.Next.Hex()
}
data, err := json.Marshal(NodesResponse{
Nodes: ns,
Next: next,
})
if err != nil {
log.Error("chunk explorer: nodes handler", "err", err)
jsonStatusResponse(w, http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", jsonContentType)
_, err = io.Copy(w, bytes.NewReader(data))
if err != nil {
log.Error("chunk explorer: nodes handler: write response", "err", err)
}
}
}
// parseHasKeyPath extracts address and key from HTTP request
// path for HasKey route: /api/has-key/{node}/{key}.
// If ok is false, the provided path is not matched.
func parseHasKeyPath(p string) (addr common.Address, key []byte, ok bool) {
p = strings.TrimPrefix(p, "/api/has-key/")
parts := strings.SplitN(p, "/", 2)
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
return addr, nil, false
}
addr = common.HexToAddress(parts[0])
key = common.Hex2Bytes(parts[1])
return addr, key, true
}
// listingPage returns start value and listing limit
// from url query values.
func listingPage(q url.Values) (start string, limit int) {
// if limit is not a valid integer (or blank string),
// ignore the error and use the returned 0 value
limit, _ = strconv.Atoi(q.Get("limit"))
return q.Get("start"), limit
}
// StatusResponse is a standardized JSON-encoded response
// that contains information about HTTP response code
// for easier status identification.
type StatusResponse struct {
Message string `json:"message"`
Code int `json:"code"`
}
// jsonStatusResponse writes to the response writer
// JSON-encoded StatusResponse based on the provided status code.
func jsonStatusResponse(w http.ResponseWriter, code int) {
w.Header().Set("Content-Type", jsonContentType)
w.WriteHeader(code)
err := json.NewEncoder(w).Encode(StatusResponse{
Message: http.StatusText(code),
Code: code,
})
if err != nil {
log.Error("chunk explorer: json status response", "err", err)
}
}
// noCacheHandler sets required HTTP headers to prevent
// response caching at the client side.
func noCacheHandler(h http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate")
w.Header().Set("Pragma", "no-cache")
w.Header().Set("Expires", "0")
h.ServeHTTP(w, r)
})
}

View File

@ -0,0 +1,471 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package explorer
import (
"encoding/binary"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/http/httptest"
"net/url"
"os"
"sort"
"strconv"
"strings"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethersphere/swarm/storage/mock"
"github.com/ethersphere/swarm/storage/mock/db"
"github.com/ethersphere/swarm/storage/mock/mem"
)
// TestHandler_memGlobalStore runs a set of tests
// to validate handler with mem global store.
func TestHandler_memGlobalStore(t *testing.T) {
t.Parallel()
globalStore := mem.NewGlobalStore()
testHandler(t, globalStore)
}
// TestHandler_dbGlobalStore runs a set of tests
// to validate handler with database global store.
func TestHandler_dbGlobalStore(t *testing.T) {
t.Parallel()
dir, err := ioutil.TempDir("", "swarm-mock-explorer-db-")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
globalStore, err := db.NewGlobalStore(dir)
if err != nil {
t.Fatal(err)
}
defer globalStore.Close()
testHandler(t, globalStore)
}
// testHandler stores data distributed by node addresses
// and validates if this data is correctly retrievable
// by using the http.Handler returned by NewHandler function.
// This test covers all HTTP routes and various get parameters
// on them to check paginated results.
func testHandler(t *testing.T, globalStore mock.GlobalStorer) {
const (
nodeCount = 350
keyCount = 250
keysOnNodeCount = 150
)
// keys for every node
nodeKeys := make(map[string][]string)
// a node address that is not present in global store
invalidAddr := "0x7b8b72938c254cf002c4e1e714d27e022be88d93"
// a key that is not present in global store
invalidKey := "f9824192fb515cfb"
for i := 1; i <= nodeCount; i++ {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, uint64(i))
addr := common.BytesToAddress(b).Hex()
nodeKeys[addr] = make([]string, 0)
}
for i := 1; i <= keyCount; i++ {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, uint64(i))
key := common.Bytes2Hex(b)
var c int
for addr := range nodeKeys {
nodeKeys[addr] = append(nodeKeys[addr], key)
c++
if c >= keysOnNodeCount {
break
}
}
}
// sort keys for every node as they are expected to be
// sorted in HTTP responses
for _, keys := range nodeKeys {
sort.Strings(keys)
}
// nodes for every key
keyNodes := make(map[string][]string)
// construct a reverse mapping of nodes for every key
for addr, keys := range nodeKeys {
for _, key := range keys {
keyNodes[key] = append(keyNodes[key], addr)
}
}
// sort node addresses with case insensitive sort,
// as hex letters in node addresses are in mixed caps
for _, addrs := range keyNodes {
sortCaseInsensitive(addrs)
}
// find a key that is not stored at the address
var (
unmatchedAddr string
unmatchedKey string
)
for addr, keys := range nodeKeys {
for key := range keyNodes {
var found bool
for _, k := range keys {
if k == key {
found = true
break
}
}
if !found {
unmatchedAddr = addr
unmatchedKey = key
}
break
}
if unmatchedAddr != "" {
break
}
}
// check if unmatched key/address pair is found
if unmatchedAddr == "" || unmatchedKey == "" {
t.Fatalf("could not find a key that is not associated with a node")
}
// store the data
for addr, keys := range nodeKeys {
for _, key := range keys {
err := globalStore.Put(common.HexToAddress(addr), common.Hex2Bytes(key), []byte("data"))
if err != nil {
t.Fatal(err)
}
}
}
handler := NewHandler(globalStore, nil)
// this subtest confirms that it has uploaded key and that it does not have invalid keys
t.Run("has key", func(t *testing.T) {
for addr, keys := range nodeKeys {
for _, key := range keys {
testStatusResponse(t, handler, "/api/has-key/"+addr+"/"+key, http.StatusOK)
testStatusResponse(t, handler, "/api/has-key/"+invalidAddr+"/"+key, http.StatusNotFound)
}
testStatusResponse(t, handler, "/api/has-key/"+addr+"/"+invalidKey, http.StatusNotFound)
}
testStatusResponse(t, handler, "/api/has-key/"+invalidAddr+"/"+invalidKey, http.StatusNotFound)
testStatusResponse(t, handler, "/api/has-key/"+unmatchedAddr+"/"+unmatchedKey, http.StatusNotFound)
})
// this subtest confirms that all keys are are listed in correct order with expected pagination
t.Run("keys", func(t *testing.T) {
var allKeys []string
for key := range keyNodes {
allKeys = append(allKeys, key)
}
sort.Strings(allKeys)
t.Run("limit 0", testKeys(handler, allKeys, 0, ""))
t.Run("limit default", testKeys(handler, allKeys, mock.DefaultLimit, ""))
t.Run("limit 2x default", testKeys(handler, allKeys, 2*mock.DefaultLimit, ""))
t.Run("limit 0.5x default", testKeys(handler, allKeys, mock.DefaultLimit/2, ""))
t.Run("limit max", testKeys(handler, allKeys, mock.MaxLimit, ""))
t.Run("limit 2x max", testKeys(handler, allKeys, 2*mock.MaxLimit, ""))
t.Run("limit negative", testKeys(handler, allKeys, -10, ""))
})
// this subtest confirms that all keys are are listed for every node in correct order
// and that for one node different pagination options are correct
t.Run("node keys", func(t *testing.T) {
var limitCheckAddr string
for addr, keys := range nodeKeys {
testKeys(handler, keys, 0, addr)(t)
if limitCheckAddr == "" {
limitCheckAddr = addr
}
}
testKeys(handler, nil, 0, invalidAddr)(t)
limitCheckKeys := nodeKeys[limitCheckAddr]
t.Run("limit 0", testKeys(handler, limitCheckKeys, 0, limitCheckAddr))
t.Run("limit default", testKeys(handler, limitCheckKeys, mock.DefaultLimit, limitCheckAddr))
t.Run("limit 2x default", testKeys(handler, limitCheckKeys, 2*mock.DefaultLimit, limitCheckAddr))
t.Run("limit 0.5x default", testKeys(handler, limitCheckKeys, mock.DefaultLimit/2, limitCheckAddr))
t.Run("limit max", testKeys(handler, limitCheckKeys, mock.MaxLimit, limitCheckAddr))
t.Run("limit 2x max", testKeys(handler, limitCheckKeys, 2*mock.MaxLimit, limitCheckAddr))
t.Run("limit negative", testKeys(handler, limitCheckKeys, -10, limitCheckAddr))
})
// this subtest confirms that all nodes are are listed in correct order with expected pagination
t.Run("nodes", func(t *testing.T) {
var allNodes []string
for addr := range nodeKeys {
allNodes = append(allNodes, addr)
}
sortCaseInsensitive(allNodes)
t.Run("limit 0", testNodes(handler, allNodes, 0, ""))
t.Run("limit default", testNodes(handler, allNodes, mock.DefaultLimit, ""))
t.Run("limit 2x default", testNodes(handler, allNodes, 2*mock.DefaultLimit, ""))
t.Run("limit 0.5x default", testNodes(handler, allNodes, mock.DefaultLimit/2, ""))
t.Run("limit max", testNodes(handler, allNodes, mock.MaxLimit, ""))
t.Run("limit 2x max", testNodes(handler, allNodes, 2*mock.MaxLimit, ""))
t.Run("limit negative", testNodes(handler, allNodes, -10, ""))
})
// this subtest confirms that all nodes are are listed that contain a a particular key in correct order
// and that for one key different node pagination options are correct
t.Run("key nodes", func(t *testing.T) {
var limitCheckKey string
for key, addrs := range keyNodes {
testNodes(handler, addrs, 0, key)(t)
if limitCheckKey == "" {
limitCheckKey = key
}
}
testNodes(handler, nil, 0, invalidKey)(t)
limitCheckKeys := keyNodes[limitCheckKey]
t.Run("limit 0", testNodes(handler, limitCheckKeys, 0, limitCheckKey))
t.Run("limit default", testNodes(handler, limitCheckKeys, mock.DefaultLimit, limitCheckKey))
t.Run("limit 2x default", testNodes(handler, limitCheckKeys, 2*mock.DefaultLimit, limitCheckKey))
t.Run("limit 0.5x default", testNodes(handler, limitCheckKeys, mock.DefaultLimit/2, limitCheckKey))
t.Run("limit max", testNodes(handler, limitCheckKeys, mock.MaxLimit, limitCheckKey))
t.Run("limit 2x max", testNodes(handler, limitCheckKeys, 2*mock.MaxLimit, limitCheckKey))
t.Run("limit negative", testNodes(handler, limitCheckKeys, -10, limitCheckKey))
})
}
// testsKeys returns a test function that validates wantKeys against a series of /api/keys
// HTTP responses with provided limit and node options.
func testKeys(handler http.Handler, wantKeys []string, limit int, node string) func(t *testing.T) {
return func(t *testing.T) {
t.Helper()
wantLimit := limit
if wantLimit <= 0 {
wantLimit = mock.DefaultLimit
}
if wantLimit > mock.MaxLimit {
wantLimit = mock.MaxLimit
}
wantKeysLen := len(wantKeys)
var i int
var startKey string
for {
var wantNext string
start := i * wantLimit
end := (i + 1) * wantLimit
if end < wantKeysLen {
wantNext = wantKeys[end]
} else {
end = wantKeysLen
}
testKeysResponse(t, handler, node, startKey, limit, KeysResponse{
Keys: wantKeys[start:end],
Next: wantNext,
})
if wantNext == "" {
break
}
startKey = wantNext
i++
}
}
}
// testNodes returns a test function that validates wantAddrs against a series of /api/nodes
// HTTP responses with provided limit and key options.
func testNodes(handler http.Handler, wantAddrs []string, limit int, key string) func(t *testing.T) {
return func(t *testing.T) {
t.Helper()
wantLimit := limit
if wantLimit <= 0 {
wantLimit = mock.DefaultLimit
}
if wantLimit > mock.MaxLimit {
wantLimit = mock.MaxLimit
}
wantAddrsLen := len(wantAddrs)
var i int
var startKey string
for {
var wantNext string
start := i * wantLimit
end := (i + 1) * wantLimit
if end < wantAddrsLen {
wantNext = wantAddrs[end]
} else {
end = wantAddrsLen
}
testNodesResponse(t, handler, key, startKey, limit, NodesResponse{
Nodes: wantAddrs[start:end],
Next: wantNext,
})
if wantNext == "" {
break
}
startKey = wantNext
i++
}
}
}
// testStatusResponse validates a response made on url if it matches
// the expected StatusResponse.
func testStatusResponse(t *testing.T, handler http.Handler, url string, code int) {
t.Helper()
resp := httpGet(t, handler, url)
if resp.StatusCode != code {
t.Errorf("got status code %v, want %v", resp.StatusCode, code)
}
if got := resp.Header.Get("Content-Type"); got != jsonContentType {
t.Errorf("got Content-Type header %q, want %q", got, jsonContentType)
}
var r StatusResponse
if err := json.NewDecoder(resp.Body).Decode(&r); err != nil {
t.Fatal(err)
}
if r.Code != code {
t.Errorf("got response code %v, want %v", r.Code, code)
}
if r.Message != http.StatusText(code) {
t.Errorf("got response message %q, want %q", r.Message, http.StatusText(code))
}
}
// testKeysResponse validates response returned from handler on /api/keys
// with node, start and limit options against KeysResponse.
func testKeysResponse(t *testing.T, handler http.Handler, node, start string, limit int, want KeysResponse) {
t.Helper()
u, err := url.Parse("/api/keys")
if err != nil {
t.Fatal(err)
}
q := u.Query()
if node != "" {
q.Set("node", node)
}
if start != "" {
q.Set("start", start)
}
if limit != 0 {
q.Set("limit", strconv.Itoa(limit))
}
u.RawQuery = q.Encode()
resp := httpGet(t, handler, u.String())
if resp.StatusCode != http.StatusOK {
t.Errorf("got status code %v, want %v", resp.StatusCode, http.StatusOK)
}
if got := resp.Header.Get("Content-Type"); got != jsonContentType {
t.Errorf("got Content-Type header %q, want %q", got, jsonContentType)
}
var r KeysResponse
if err := json.NewDecoder(resp.Body).Decode(&r); err != nil {
t.Fatal(err)
}
if fmt.Sprint(r.Keys) != fmt.Sprint(want.Keys) {
t.Errorf("got keys %v, want %v", r.Keys, want.Keys)
}
if r.Next != want.Next {
t.Errorf("got next %s, want %s", r.Next, want.Next)
}
}
// testNodesResponse validates response returned from handler on /api/nodes
// with key, start and limit options against NodesResponse.
func testNodesResponse(t *testing.T, handler http.Handler, key, start string, limit int, want NodesResponse) {
t.Helper()
u, err := url.Parse("/api/nodes")
if err != nil {
t.Fatal(err)
}
q := u.Query()
if key != "" {
q.Set("key", key)
}
if start != "" {
q.Set("start", start)
}
if limit != 0 {
q.Set("limit", strconv.Itoa(limit))
}
u.RawQuery = q.Encode()
resp := httpGet(t, handler, u.String())
if resp.StatusCode != http.StatusOK {
t.Errorf("got status code %v, want %v", resp.StatusCode, http.StatusOK)
}
if got := resp.Header.Get("Content-Type"); got != jsonContentType {
t.Errorf("got Content-Type header %q, want %q", got, jsonContentType)
}
var r NodesResponse
if err := json.NewDecoder(resp.Body).Decode(&r); err != nil {
t.Fatal(err)
}
if fmt.Sprint(r.Nodes) != fmt.Sprint(want.Nodes) {
t.Errorf("got nodes %v, want %v", r.Nodes, want.Nodes)
}
if r.Next != want.Next {
t.Errorf("got next %s, want %s", r.Next, want.Next)
}
}
// httpGet uses httptest recorder to provide a response on handler's url.
func httpGet(t *testing.T, handler http.Handler, url string) (r *http.Response) {
t.Helper()
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
t.Fatal(err)
}
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
return w.Result()
}
// sortCaseInsensitive performs a case insensitive sort on a string slice.
func sortCaseInsensitive(s []string) {
sort.Slice(s, func(i, j int) bool {
return strings.ToLower(s[i]) < strings.ToLower(s[j])
})
}

View File

@ -0,0 +1,163 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package explorer
import (
"fmt"
"net/http"
"net/http/httptest"
"testing"
"github.com/ethersphere/swarm/storage/mock/mem"
)
// TestHandler_CORSOrigin validates that the correct Access-Control-Allow-Origin
// header is served with various allowed origin settings.
func TestHandler_CORSOrigin(t *testing.T) {
notAllowedOrigin := "http://not-allowed-origin.com/"
for _, tc := range []struct {
name string
origins []string
}{
{
name: "no origin",
origins: nil,
},
{
name: "single origin",
origins: []string{"http://localhost/"},
},
{
name: "multiple origins",
origins: []string{"http://localhost/", "http://ethereum.org/"},
},
} {
t.Run(tc.name, func(t *testing.T) {
handler := NewHandler(mem.NewGlobalStore(), tc.origins)
origins := tc.origins
if origins == nil {
// handle the "no origin" test case
origins = []string{""}
}
for _, origin := range origins {
t.Run(fmt.Sprintf("get %q", origin), newTestCORSOrigin(handler, origin, origin))
t.Run(fmt.Sprintf("preflight %q", origin), newTestCORSPreflight(handler, origin, origin))
}
t.Run(fmt.Sprintf("get %q", notAllowedOrigin), newTestCORSOrigin(handler, notAllowedOrigin, ""))
t.Run(fmt.Sprintf("preflight %q", notAllowedOrigin), newTestCORSPreflight(handler, notAllowedOrigin, ""))
})
}
t.Run("wildcard", func(t *testing.T) {
handler := NewHandler(mem.NewGlobalStore(), []string{"*"})
for _, origin := range []string{
"http://example.com/",
"http://ethereum.org",
"http://localhost",
} {
t.Run(fmt.Sprintf("get %q", origin), newTestCORSOrigin(handler, origin, origin))
t.Run(fmt.Sprintf("preflight %q", origin), newTestCORSPreflight(handler, origin, origin))
}
})
}
// newTestCORSOrigin returns a test function that validates if wantOrigin CORS header is
// served by the handler for a GET request.
func newTestCORSOrigin(handler http.Handler, origin, wantOrigin string) func(t *testing.T) {
return func(t *testing.T) {
req, err := http.NewRequest(http.MethodGet, "/", nil)
if err != nil {
t.Fatal(err)
}
req.Header.Set("Origin", origin)
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
resp := w.Result()
header := resp.Header.Get("Access-Control-Allow-Origin")
if header != wantOrigin {
t.Errorf("got Access-Control-Allow-Origin header %q, want %q", header, wantOrigin)
}
}
}
// newTestCORSPreflight returns a test function that validates if wantOrigin CORS header is
// served by the handler for an OPTIONS CORS preflight request.
func newTestCORSPreflight(handler http.Handler, origin, wantOrigin string) func(t *testing.T) {
return func(t *testing.T) {
req, err := http.NewRequest(http.MethodOptions, "/", nil)
if err != nil {
t.Fatal(err)
}
req.Header.Set("Origin", origin)
req.Header.Set("Access-Control-Request-Method", "GET")
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
resp := w.Result()
header := resp.Header.Get("Access-Control-Allow-Origin")
if header != wantOrigin {
t.Errorf("got Access-Control-Allow-Origin header %q, want %q", header, wantOrigin)
}
}
}
// TestHandler_noCacheHeaders validates that no cache headers are server.
func TestHandler_noCacheHeaders(t *testing.T) {
handler := NewHandler(mem.NewGlobalStore(), nil)
for _, tc := range []struct {
url string
}{
{
url: "/",
},
{
url: "/api/nodes",
},
{
url: "/api/keys",
},
} {
req, err := http.NewRequest(http.MethodGet, tc.url, nil)
if err != nil {
t.Fatal(err)
}
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
resp := w.Result()
for header, want := range map[string]string{
"Cache-Control": "no-cache, no-store, must-revalidate",
"Pragma": "no-cache",
"Expires": "0",
} {
got := resp.Header.Get(header)
if got != want {
t.Errorf("got %q header %q for url %q, want %q", header, tc.url, got, want)
}
}
}
}

View File

@ -0,0 +1,176 @@
swagger: '2.0'
info:
title: Swarm Global Store API
version: 0.1.0
tags:
- name: Has Key
description: Checks if a Key is stored on a Node
- name: Keys
description: Lists Keys
- name: Nodes
description: Lists Node addresses
paths:
'/api/has-key/{node}/{key}':
get:
tags:
- Has Key
summary: Checks if a Key is stored on a Node
operationId: hasKey
produces:
- application/json
parameters:
- name: node
in: path
required: true
type: string
format: hex-endoded
description: Node address.
- name: key
in: path
required: true
type: string
format: hex-endoded
description: Key.
responses:
'200':
description: Key is stored on Node
schema:
$ref: '#/definitions/Status'
'404':
description: Key is not stored on Node
schema:
$ref: '#/definitions/Status'
'500':
description: Internal Server Error
schema:
$ref: '#/definitions/Status'
'/api/keys':
get:
tags:
- Keys
summary: Lists Keys
operationId: keys
produces:
- application/json
parameters:
- name: start
in: query
required: false
type: string
format: hex-encoded Key
description: A Key as the starting point for the returned list. It is usually a value from the returned "next" field in the Keys repsonse.
- name: limit
in: query
required: false
type: integer
default: 100
minimum: 1
maximum: 1000
description: Limits the number of Keys returned in on response.
- name: node
in: query
required: false
type: string
format: hex-encoded Node address
description: If this parameter is provided, only Keys that are stored on this Node be returned in the response. If not, all known Keys will be returned.
responses:
'200':
description: List of Keys
schema:
$ref: '#/definitions/Keys'
'500':
description: Internal Server Error
schema:
$ref: '#/definitions/Status'
'/api/nodes':
get:
tags:
- Nodes
summary: Lists Node addresses
operationId: nodes
produces:
- application/json
parameters:
- name: start
in: query
required: false
type: string
format: hex-encoded Node address
description: A Node address as the starting point for the returned list. It is usually a value from the returned "next" field in the Nodes repsonse.
- name: limit
in: query
required: false
type: integer
default: 100
minimum: 1
maximum: 1000
description: Limits the number of Node addresses returned in on response.
- name: key
in: query
required: false
type: string
format: hex-encoded Key
description: If this parameter is provided, only addresses of Nodes that store this Key will be returned in the response. If not, all known Node addresses will be returned.
responses:
'200':
description: List of Node addresses
schema:
$ref: '#/definitions/Nodes'
'500':
description: Internal Server Error
schema:
$ref: '#/definitions/Status'
definitions:
Status:
type: object
properties:
message:
type: string
description: HTTP Status Code name.
code:
type: integer
description: HTTP Status Code.
Keys:
type: object
properties:
keys:
type: array
description: A list of Keys.
items:
type: string
format: hex-encoded Key
next:
type: string
format: hex-encoded Key
description: If present, the next Key in listing. Can be passed as "start" query parameter to continue the listing. If not present, the end of the listing is reached.
Nodes:
type: object
properties:
nodes:
type: array
description: A list of Node addresses.
items:
type: string
format: hex-encoded Node address
next:
type: string
format: hex-encoded Node address
description: If present, the next Node address in listing. Can be passed as "start" query parameter to continue the listing. If not present, the end of the listing is reached.

385
storage/mock/mem/mem.go Normal file
View File

@ -0,0 +1,385 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Package mem implements a mock store that keeps all chunk data in memory.
// While it can be used for testing on smaller scales, the main purpose of this
// package is to provide the simplest reference implementation of a mock store.
package mem
import (
"archive/tar"
"bytes"
"encoding/json"
"io"
"io/ioutil"
"sort"
"sync"
"github.com/ethereum/go-ethereum/common"
"github.com/ethersphere/swarm/storage/mock"
)
// GlobalStore stores all chunk data and also keys and node addresses relations.
// It implements mock.GlobalStore interface.
type GlobalStore struct {
// holds a slice of keys per node
nodeKeys map[common.Address][][]byte
// holds which key is stored on which nodes
keyNodes map[string][]common.Address
// all node addresses
nodes []common.Address
// all keys
keys [][]byte
// all keys data
data map[string][]byte
mu sync.RWMutex
}
// NewGlobalStore creates a new instance of GlobalStore.
func NewGlobalStore() *GlobalStore {
return &GlobalStore{
nodeKeys: make(map[common.Address][][]byte),
keyNodes: make(map[string][]common.Address),
nodes: make([]common.Address, 0),
keys: make([][]byte, 0),
data: make(map[string][]byte),
}
}
// NewNodeStore returns a new instance of NodeStore that retrieves and stores
// chunk data only for a node with address addr.
func (s *GlobalStore) NewNodeStore(addr common.Address) *mock.NodeStore {
return mock.NewNodeStore(addr, s)
}
// Get returns chunk data if the chunk with key exists for node
// on address addr.
func (s *GlobalStore) Get(addr common.Address, key []byte) (data []byte, err error) {
s.mu.RLock()
defer s.mu.RUnlock()
if _, has := s.nodeKeyIndex(addr, key); !has {
return nil, mock.ErrNotFound
}
data, ok := s.data[string(key)]
if !ok {
return nil, mock.ErrNotFound
}
return data, nil
}
// Put saves the chunk data for node with address addr.
func (s *GlobalStore) Put(addr common.Address, key []byte, data []byte) error {
s.mu.Lock()
defer s.mu.Unlock()
if i, found := s.nodeKeyIndex(addr, key); !found {
s.nodeKeys[addr] = append(s.nodeKeys[addr], nil)
copy(s.nodeKeys[addr][i+1:], s.nodeKeys[addr][i:])
s.nodeKeys[addr][i] = key
}
if i, found := s.keyNodeIndex(key, addr); !found {
k := string(key)
s.keyNodes[k] = append(s.keyNodes[k], addr)
copy(s.keyNodes[k][i+1:], s.keyNodes[k][i:])
s.keyNodes[k][i] = addr
}
if i, found := s.nodeIndex(addr); !found {
s.nodes = append(s.nodes, addr)
copy(s.nodes[i+1:], s.nodes[i:])
s.nodes[i] = addr
}
if i, found := s.keyIndex(key); !found {
s.keys = append(s.keys, nil)
copy(s.keys[i+1:], s.keys[i:])
s.keys[i] = key
}
s.data[string(key)] = data
return nil
}
// Delete removes the chunk data for node with address addr.
func (s *GlobalStore) Delete(addr common.Address, key []byte) error {
s.mu.Lock()
defer s.mu.Unlock()
if i, has := s.nodeKeyIndex(addr, key); has {
s.nodeKeys[addr] = append(s.nodeKeys[addr][:i], s.nodeKeys[addr][i+1:]...)
}
k := string(key)
if i, on := s.keyNodeIndex(key, addr); on {
s.keyNodes[k] = append(s.keyNodes[k][:i], s.keyNodes[k][i+1:]...)
}
if len(s.nodeKeys[addr]) == 0 {
if i, found := s.nodeIndex(addr); found {
s.nodes = append(s.nodes[:i], s.nodes[i+1:]...)
}
}
if len(s.keyNodes[k]) == 0 {
if i, found := s.keyIndex(key); found {
s.keys = append(s.keys[:i], s.keys[i+1:]...)
}
}
return nil
}
// HasKey returns whether a node with addr contains the key.
func (s *GlobalStore) HasKey(addr common.Address, key []byte) (yes bool) {
s.mu.RLock()
defer s.mu.RUnlock()
_, yes = s.nodeKeyIndex(addr, key)
return yes
}
// keyIndex returns the index of a key in keys slice.
func (s *GlobalStore) keyIndex(key []byte) (index int, found bool) {
l := len(s.keys)
index = sort.Search(l, func(i int) bool {
return bytes.Compare(s.keys[i], key) >= 0
})
found = index < l && bytes.Equal(s.keys[index], key)
return index, found
}
// nodeIndex returns the index of a node address in nodes slice.
func (s *GlobalStore) nodeIndex(addr common.Address) (index int, found bool) {
l := len(s.nodes)
index = sort.Search(l, func(i int) bool {
return bytes.Compare(s.nodes[i][:], addr[:]) >= 0
})
found = index < l && bytes.Equal(s.nodes[index][:], addr[:])
return index, found
}
// nodeKeyIndex returns the index of a key in nodeKeys slice.
func (s *GlobalStore) nodeKeyIndex(addr common.Address, key []byte) (index int, found bool) {
l := len(s.nodeKeys[addr])
index = sort.Search(l, func(i int) bool {
return bytes.Compare(s.nodeKeys[addr][i], key) >= 0
})
found = index < l && bytes.Equal(s.nodeKeys[addr][index], key)
return index, found
}
// keyNodeIndex returns the index of a node address in keyNodes slice.
func (s *GlobalStore) keyNodeIndex(key []byte, addr common.Address) (index int, found bool) {
k := string(key)
l := len(s.keyNodes[k])
index = sort.Search(l, func(i int) bool {
return bytes.Compare(s.keyNodes[k][i][:], addr[:]) >= 0
})
found = index < l && s.keyNodes[k][index] == addr
return index, found
}
// Keys returns a paginated list of keys on all nodes.
func (s *GlobalStore) Keys(startKey []byte, limit int) (keys mock.Keys, err error) {
s.mu.RLock()
defer s.mu.RUnlock()
var i int
if startKey != nil {
i, _ = s.keyIndex(startKey)
}
total := len(s.keys)
max := maxIndex(i, limit, total)
keys.Keys = make([][]byte, 0, max-i)
for ; i < max; i++ {
keys.Keys = append(keys.Keys, append([]byte(nil), s.keys[i]...))
}
if total > max {
keys.Next = s.keys[max]
}
return keys, nil
}
// Nodes returns a paginated list of all known nodes.
func (s *GlobalStore) Nodes(startAddr *common.Address, limit int) (nodes mock.Nodes, err error) {
s.mu.RLock()
defer s.mu.RUnlock()
var i int
if startAddr != nil {
i, _ = s.nodeIndex(*startAddr)
}
total := len(s.nodes)
max := maxIndex(i, limit, total)
nodes.Addrs = make([]common.Address, 0, max-i)
for ; i < max; i++ {
nodes.Addrs = append(nodes.Addrs, s.nodes[i])
}
if total > max {
nodes.Next = &s.nodes[max]
}
return nodes, nil
}
// NodeKeys returns a paginated list of keys on a node with provided address.
func (s *GlobalStore) NodeKeys(addr common.Address, startKey []byte, limit int) (keys mock.Keys, err error) {
s.mu.RLock()
defer s.mu.RUnlock()
var i int
if startKey != nil {
i, _ = s.nodeKeyIndex(addr, startKey)
}
total := len(s.nodeKeys[addr])
max := maxIndex(i, limit, total)
keys.Keys = make([][]byte, 0, max-i)
for ; i < max; i++ {
keys.Keys = append(keys.Keys, append([]byte(nil), s.nodeKeys[addr][i]...))
}
if total > max {
keys.Next = s.nodeKeys[addr][max]
}
return keys, nil
}
// KeyNodes returns a paginated list of nodes that contain a particular key.
func (s *GlobalStore) KeyNodes(key []byte, startAddr *common.Address, limit int) (nodes mock.Nodes, err error) {
s.mu.RLock()
defer s.mu.RUnlock()
var i int
if startAddr != nil {
i, _ = s.keyNodeIndex(key, *startAddr)
}
total := len(s.keyNodes[string(key)])
max := maxIndex(i, limit, total)
nodes.Addrs = make([]common.Address, 0, max-i)
for ; i < max; i++ {
nodes.Addrs = append(nodes.Addrs, s.keyNodes[string(key)][i])
}
if total > max {
nodes.Next = &s.keyNodes[string(key)][max]
}
return nodes, nil
}
// maxIndex returns the end index for one page listing
// based on the start index, limit and total number of elements.
func maxIndex(start, limit, total int) (max int) {
if limit <= 0 {
limit = mock.DefaultLimit
}
if limit > mock.MaxLimit {
limit = mock.MaxLimit
}
max = total
if start+limit < max {
max = start + limit
}
return max
}
// Import reads tar archive from a reader that contains exported chunk data.
// It returns the number of chunks imported and an error.
func (s *GlobalStore) Import(r io.Reader) (n int, err error) {
s.mu.Lock()
defer s.mu.Unlock()
tr := tar.NewReader(r)
for {
hdr, err := tr.Next()
if err != nil {
if err == io.EOF {
break
}
return n, err
}
data, err := ioutil.ReadAll(tr)
if err != nil {
return n, err
}
var c mock.ExportedChunk
if err = json.Unmarshal(data, &c); err != nil {
return n, err
}
key := common.Hex2Bytes(hdr.Name)
s.keyNodes[string(key)] = c.Addrs
for _, addr := range c.Addrs {
if i, has := s.nodeKeyIndex(addr, key); !has {
s.nodeKeys[addr] = append(s.nodeKeys[addr], nil)
copy(s.nodeKeys[addr][i+1:], s.nodeKeys[addr][i:])
s.nodeKeys[addr][i] = key
}
if i, found := s.nodeIndex(addr); !found {
s.nodes = append(s.nodes, addr)
copy(s.nodes[i+1:], s.nodes[i:])
s.nodes[i] = addr
}
}
if i, found := s.keyIndex(key); !found {
s.keys = append(s.keys, nil)
copy(s.keys[i+1:], s.keys[i:])
s.keys[i] = key
}
s.data[string(key)] = c.Data
n++
}
return n, err
}
// Export writes to a writer a tar archive with all chunk data from
// the store. It returns the number of chunks exported and an error.
func (s *GlobalStore) Export(w io.Writer) (n int, err error) {
s.mu.RLock()
defer s.mu.RUnlock()
tw := tar.NewWriter(w)
defer tw.Close()
buf := bytes.NewBuffer(make([]byte, 0, 1024))
encoder := json.NewEncoder(buf)
for key, addrs := range s.keyNodes {
buf.Reset()
if err = encoder.Encode(mock.ExportedChunk{
Addrs: addrs,
Data: s.data[key],
}); err != nil {
return n, err
}
data := buf.Bytes()
hdr := &tar.Header{
Name: common.Bytes2Hex([]byte(key)),
Mode: 0644,
Size: int64(len(data)),
}
if err := tw.WriteHeader(hdr); err != nil {
return n, err
}
if _, err := tw.Write(data); err != nil {
return n, err
}
n++
}
return n, err
}

View File

@ -0,0 +1,42 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package mem
import (
"testing"
"github.com/ethersphere/swarm/storage/mock/test"
)
// TestGlobalStore is running test for a GlobalStore
// using test.MockStore function.
func TestGlobalStore(t *testing.T) {
test.MockStore(t, NewGlobalStore(), 100)
}
// TestGlobalStoreListings is running test for a GlobalStore
// using test.MockStoreListings function.
func TestGlobalStoreListings(t *testing.T) {
test.MockStoreListings(t, NewGlobalStore(), 1000)
}
// TestImportExport is running tests for importing and
// exporting data between two GlobalStores
// using test.ImportExport function.
func TestImportExport(t *testing.T) {
test.ImportExport(t, NewGlobalStore(), NewGlobalStore(), 100)
}

142
storage/mock/mock.go Normal file
View File

@ -0,0 +1,142 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Package mock defines types that are used by different implementations
// of mock storages.
//
// Implementations of mock storages are located in directories
// under this package:
//
// - db - LevelDB backend
// - mem - in memory map backend
// - rpc - RPC client that can connect to other backends
//
// Mock storages can implement Importer and Exporter interfaces
// for importing and exporting all chunk data that they contain.
// The exported file is a tar archive with all files named by
// hexadecimal representations of chunk keys and with content
// with JSON-encoded ExportedChunk structure. Exported format
// should be preserved across all mock store implementations.
package mock
import (
"errors"
"io"
"github.com/ethereum/go-ethereum/common"
)
const (
// DefaultLimit should be used as default limit for
// Keys, Nodes, NodeKeys and KeyNodes GlobarStorer
// methids implementations.
DefaultLimit = 100
// MaxLimit should be used as the maximal returned number
// of items for Keys, Nodes, NodeKeys and KeyNodes GlobarStorer
// methids implementations, regardless of provided limit.
MaxLimit = 1000
)
// ErrNotFound indicates that the chunk is not found.
var ErrNotFound = errors.New("not found")
// NodeStore holds the node address and a reference to the GlobalStore
// in order to access and store chunk data only for one node.
type NodeStore struct {
store GlobalStorer
addr common.Address
}
// NewNodeStore creates a new instance of NodeStore that keeps
// chunk data using GlobalStorer with a provided address.
func NewNodeStore(addr common.Address, store GlobalStorer) *NodeStore {
return &NodeStore{
store: store,
addr: addr,
}
}
// Get returns chunk data for a key for a node that has the address
// provided on NodeStore initialization.
func (n *NodeStore) Get(key []byte) (data []byte, err error) {
return n.store.Get(n.addr, key)
}
// Put saves chunk data for a key for a node that has the address
// provided on NodeStore initialization.
func (n *NodeStore) Put(key []byte, data []byte) error {
return n.store.Put(n.addr, key, data)
}
// Delete removes chunk data for a key for a node that has the address
// provided on NodeStore initialization.
func (n *NodeStore) Delete(key []byte) error {
return n.store.Delete(n.addr, key)
}
func (n *NodeStore) Keys(startKey []byte, limit int) (keys Keys, err error) {
return n.store.NodeKeys(n.addr, startKey, limit)
}
// GlobalStorer defines methods for mock db store
// that stores chunk data for all swarm nodes.
// It is used in tests to construct mock NodeStores
// for swarm nodes and to track and validate chunks.
type GlobalStorer interface {
Get(addr common.Address, key []byte) (data []byte, err error)
Put(addr common.Address, key []byte, data []byte) error
Delete(addr common.Address, key []byte) error
HasKey(addr common.Address, key []byte) bool
Keys(startKey []byte, limit int) (keys Keys, err error)
Nodes(startAddr *common.Address, limit int) (nodes Nodes, err error)
NodeKeys(addr common.Address, startKey []byte, limit int) (keys Keys, err error)
KeyNodes(key []byte, startAddr *common.Address, limit int) (nodes Nodes, err error)
// NewNodeStore creates an instance of NodeStore
// to be used by a single swarm node with
// address addr.
NewNodeStore(addr common.Address) *NodeStore
}
// Keys are returned results by Keys and NodeKeys GlobalStorer methods.
type Keys struct {
Keys [][]byte
Next []byte
}
// Nodes are returned results by Nodes and KeyNodes GlobalStorer methods.
type Nodes struct {
Addrs []common.Address
Next *common.Address
}
// Importer defines method for importing mock store data
// from an exported tar archive.
type Importer interface {
Import(r io.Reader) (n int, err error)
}
// Exporter defines method for exporting mock store data
// to a tar archive.
type Exporter interface {
Export(w io.Writer) (n int, err error)
}
// ExportedChunk is the structure that is saved in tar archive for
// each chunk as JSON-encoded bytes.
type ExportedChunk struct {
Data []byte `json:"d"`
Addrs []common.Address `json:"a"`
}

114
storage/mock/rpc/rpc.go Normal file
View File

@ -0,0 +1,114 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Package rpc implements an RPC client that connect to a centralized mock store.
// Centralazied mock store can be any other mock store implementation that is
// registered to Ethereum RPC server under mockStore name. Methods that defines
// mock.GlobalStore are the same that are used by RPC. Example:
//
// server := rpc.NewServer()
// server.RegisterName("mockStore", mem.NewGlobalStore())
package rpc
import (
"fmt"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/rpc"
"github.com/ethersphere/swarm/log"
"github.com/ethersphere/swarm/storage/mock"
)
// GlobalStore is rpc.Client that connects to a centralized mock store.
// Closing GlobalStore instance is required to release RPC client resources.
type GlobalStore struct {
client *rpc.Client
}
// NewGlobalStore creates a new instance of GlobalStore.
func NewGlobalStore(client *rpc.Client) *GlobalStore {
return &GlobalStore{
client: client,
}
}
// Close closes RPC client.
func (s *GlobalStore) Close() error {
s.client.Close()
return nil
}
// NewNodeStore returns a new instance of NodeStore that retrieves and stores
// chunk data only for a node with address addr.
func (s *GlobalStore) NewNodeStore(addr common.Address) *mock.NodeStore {
return mock.NewNodeStore(addr, s)
}
// Get calls a Get method to RPC server.
func (s *GlobalStore) Get(addr common.Address, key []byte) (data []byte, err error) {
err = s.client.Call(&data, "mockStore_get", addr, key)
if err != nil && err.Error() == "not found" {
// pass the mock package value of error instead an rpc error
return data, mock.ErrNotFound
}
return data, err
}
// Put calls a Put method to RPC server.
func (s *GlobalStore) Put(addr common.Address, key []byte, data []byte) error {
err := s.client.Call(nil, "mockStore_put", addr, key, data)
return err
}
// Delete calls a Delete method to RPC server.
func (s *GlobalStore) Delete(addr common.Address, key []byte) error {
err := s.client.Call(nil, "mockStore_delete", addr, key)
return err
}
// HasKey calls a HasKey method to RPC server.
func (s *GlobalStore) HasKey(addr common.Address, key []byte) bool {
var has bool
if err := s.client.Call(&has, "mockStore_hasKey", addr, key); err != nil {
log.Error(fmt.Sprintf("mock store HasKey: addr %s, key %064x: %v", addr, key, err))
return false
}
return has
}
// Keys returns a paginated list of keys on all nodes.
func (s *GlobalStore) Keys(startKey []byte, limit int) (keys mock.Keys, err error) {
err = s.client.Call(&keys, "mockStore_keys", startKey, limit)
return keys, err
}
// Nodes returns a paginated list of all known nodes.
func (s *GlobalStore) Nodes(startAddr *common.Address, limit int) (nodes mock.Nodes, err error) {
err = s.client.Call(&nodes, "mockStore_nodes", startAddr, limit)
return nodes, err
}
// NodeKeys returns a paginated list of keys on a node with provided address.
func (s *GlobalStore) NodeKeys(addr common.Address, startKey []byte, limit int) (keys mock.Keys, err error) {
err = s.client.Call(&keys, "mockStore_nodeKeys", addr, startKey, limit)
return keys, err
}
// KeyNodes returns a paginated list of nodes that contain a particular key.
func (s *GlobalStore) KeyNodes(key []byte, startAddr *common.Address, limit int) (nodes mock.Nodes, err error) {
err = s.client.Call(&nodes, "mockStore_keyNodes", key, startAddr, limit)
return nodes, err
}

View File

@ -0,0 +1,64 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package rpc
import (
"testing"
"github.com/ethereum/go-ethereum/rpc"
"github.com/ethersphere/swarm/storage/mock/mem"
"github.com/ethersphere/swarm/storage/mock/test"
)
// TestDBStore is running test for a GlobalStore
// using test.MockStore function.
func TestRPCStore(t *testing.T) {
store, cleanup := newTestStore(t)
defer cleanup()
test.MockStore(t, store, 30)
}
// TestRPCStoreListings is running test for a GlobalStore
// using test.MockStoreListings function.
func TestRPCStoreListings(t *testing.T) {
store, cleanup := newTestStore(t)
defer cleanup()
test.MockStoreListings(t, store, 1000)
}
// newTestStore creates a temporary GlobalStore
// that will be closed when returned cleanup function
// is called.
func newTestStore(t *testing.T) (s *GlobalStore, cleanup func()) {
t.Helper()
serverStore := mem.NewGlobalStore()
server := rpc.NewServer()
if err := server.RegisterName("mockStore", serverStore); err != nil {
t.Fatal(err)
}
store := NewGlobalStore(rpc.DialInProc(server))
return store, func() {
if err := store.Close(); err != nil {
t.Error(err)
}
}
}

362
storage/mock/test/test.go Normal file
View File

@ -0,0 +1,362 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Package test provides functions that are used for testing
// GlobalStorer implementations.
package test
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"strconv"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethersphere/swarm/storage"
"github.com/ethersphere/swarm/storage/mock"
)
// MockStore creates NodeStore instances from provided GlobalStorer,
// each one with a unique address, stores different chunks on them
// and checks if they are retrievable or not on all nodes.
// Attribute n defines the number of NodeStores that will be created.
func MockStore(t *testing.T, globalStore mock.GlobalStorer, n int) {
t.Run("GlobalStore", func(t *testing.T) {
addrs := make([]common.Address, n)
for i := 0; i < n; i++ {
addrs[i] = common.HexToAddress(strconv.FormatInt(int64(i)+1, 16))
}
for i, addr := range addrs {
chunkAddr := storage.Address(append(addr[:], []byte(strconv.FormatInt(int64(i)+1, 16))...))
data := []byte(strconv.FormatInt(int64(i)+1, 16))
data = append(data, make([]byte, 4096-len(data))...)
globalStore.Put(addr, chunkAddr, data)
for _, cAddr := range addrs {
cData, err := globalStore.Get(cAddr, chunkAddr)
if cAddr == addr {
if err != nil {
t.Fatalf("get data from store %s key %s: %v", cAddr.Hex(), chunkAddr.Hex(), err)
}
if !bytes.Equal(data, cData) {
t.Fatalf("data on store %s: expected %x, got %x", cAddr.Hex(), data, cData)
}
if !globalStore.HasKey(cAddr, chunkAddr) {
t.Fatalf("expected key %s on global store for node %s, but it was not found", chunkAddr.Hex(), cAddr.Hex())
}
} else {
if err != mock.ErrNotFound {
t.Fatalf("expected error from store %s: %v, got %v", cAddr.Hex(), mock.ErrNotFound, err)
}
if len(cData) > 0 {
t.Fatalf("data on store %s: expected nil, got %x", cAddr.Hex(), cData)
}
if globalStore.HasKey(cAddr, chunkAddr) {
t.Fatalf("not expected key %s on global store for node %s, but it was found", chunkAddr.Hex(), cAddr.Hex())
}
}
}
}
t.Run("delete", func(t *testing.T) {
chunkAddr := storage.Address([]byte("1234567890abcd"))
for _, addr := range addrs {
err := globalStore.Put(addr, chunkAddr, []byte("data"))
if err != nil {
t.Fatalf("put data to store %s key %s: %v", addr.Hex(), chunkAddr.Hex(), err)
}
}
firstNodeAddr := addrs[0]
if err := globalStore.Delete(firstNodeAddr, chunkAddr); err != nil {
t.Fatalf("delete from store %s key %s: %v", firstNodeAddr.Hex(), chunkAddr.Hex(), err)
}
for i, addr := range addrs {
_, err := globalStore.Get(addr, chunkAddr)
if i == 0 {
if err != mock.ErrNotFound {
t.Errorf("get data from store %s key %s: expected mock.ErrNotFound error, got %v", addr.Hex(), chunkAddr.Hex(), err)
}
} else {
if err != nil {
t.Errorf("get data from store %s key %s: %v", addr.Hex(), chunkAddr.Hex(), err)
}
}
}
})
})
t.Run("NodeStore", func(t *testing.T) {
nodes := make(map[common.Address]*mock.NodeStore)
for i := 0; i < n; i++ {
addr := common.HexToAddress(strconv.FormatInt(int64(i)+1, 16))
nodes[addr] = globalStore.NewNodeStore(addr)
}
i := 0
for addr, store := range nodes {
i++
chunkAddr := storage.Address(append(addr[:], []byte(fmt.Sprintf("%x", i))...))
data := []byte(strconv.FormatInt(int64(i)+1, 16))
data = append(data, make([]byte, 4096-len(data))...)
store.Put(chunkAddr, data)
for cAddr, cStore := range nodes {
cData, err := cStore.Get(chunkAddr)
if cAddr == addr {
if err != nil {
t.Fatalf("get data from store %s key %s: %v", cAddr.Hex(), chunkAddr.Hex(), err)
}
if !bytes.Equal(data, cData) {
t.Fatalf("data on store %s: expected %x, got %x", cAddr.Hex(), data, cData)
}
if !globalStore.HasKey(cAddr, chunkAddr) {
t.Fatalf("expected key %s on global store for node %s, but it was not found", chunkAddr.Hex(), cAddr.Hex())
}
} else {
if err != mock.ErrNotFound {
t.Fatalf("expected error from store %s: %v, got %v", cAddr.Hex(), mock.ErrNotFound, err)
}
if len(cData) > 0 {
t.Fatalf("data on store %s: expected nil, got %x", cAddr.Hex(), cData)
}
if globalStore.HasKey(cAddr, chunkAddr) {
t.Fatalf("not expected key %s on global store for node %s, but it was found", chunkAddr.Hex(), cAddr.Hex())
}
}
}
}
t.Run("delete", func(t *testing.T) {
chunkAddr := storage.Address([]byte("1234567890abcd"))
var chosenStore *mock.NodeStore
for addr, store := range nodes {
if chosenStore == nil {
chosenStore = store
}
err := store.Put(chunkAddr, []byte("data"))
if err != nil {
t.Fatalf("put data to store %s key %s: %v", addr.Hex(), chunkAddr.Hex(), err)
}
}
if err := chosenStore.Delete(chunkAddr); err != nil {
t.Fatalf("delete key %s: %v", chunkAddr.Hex(), err)
}
for addr, store := range nodes {
_, err := store.Get(chunkAddr)
if store == chosenStore {
if err != mock.ErrNotFound {
t.Errorf("get data from store %s key %s: expected mock.ErrNotFound error, got %v", addr.Hex(), chunkAddr.Hex(), err)
}
} else {
if err != nil {
t.Errorf("get data from store %s key %s: %v", addr.Hex(), chunkAddr.Hex(), err)
}
}
}
})
})
}
// MockStoreListings tests global store methods Keys, Nodes, NodeKeys and KeyNodes.
// It uses a provided globalstore to put chunks for n number of node addresses
// and to validate that methods are returning the right responses.
func MockStoreListings(t *testing.T, globalStore mock.GlobalStorer, n int) {
addrs := make([]common.Address, n)
for i := 0; i < n; i++ {
addrs[i] = common.HexToAddress(strconv.FormatInt(int64(i)+1, 16))
}
type chunk struct {
key []byte
data []byte
}
const chunksPerNode = 5
keys := make([][]byte, n*chunksPerNode)
for i := 0; i < n*chunksPerNode; i++ {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, uint64(i))
keys[i] = b
}
// keep track of keys on every node
nodeKeys := make(map[common.Address][][]byte)
// keep track of nodes that store particular key
keyNodes := make(map[string][]common.Address)
for i := 0; i < chunksPerNode; i++ {
// put chunks for every address
for j := 0; j < n; j++ {
addr := addrs[j]
key := keys[(i*n)+j]
err := globalStore.Put(addr, key, []byte("data"))
if err != nil {
t.Fatal(err)
}
nodeKeys[addr] = append(nodeKeys[addr], key)
keyNodes[string(key)] = append(keyNodes[string(key)], addr)
}
// test Keys method
var startKey []byte
var gotKeys [][]byte
for {
keys, err := globalStore.Keys(startKey, 0)
if err != nil {
t.Fatal(err)
}
gotKeys = append(gotKeys, keys.Keys...)
if keys.Next == nil {
break
}
startKey = keys.Next
}
wantKeys := keys[:(i+1)*n]
if fmt.Sprint(gotKeys) != fmt.Sprint(wantKeys) {
t.Fatalf("got #%v keys %v, want %v", i+1, gotKeys, wantKeys)
}
// test Nodes method
var startNode *common.Address
var gotNodes []common.Address
for {
nodes, err := globalStore.Nodes(startNode, 0)
if err != nil {
t.Fatal(err)
}
gotNodes = append(gotNodes, nodes.Addrs...)
if nodes.Next == nil {
break
}
startNode = nodes.Next
}
wantNodes := addrs
if fmt.Sprint(gotNodes) != fmt.Sprint(wantNodes) {
t.Fatalf("got #%v nodes %v, want %v", i+1, gotNodes, wantNodes)
}
// test NodeKeys method
for addr, wantKeys := range nodeKeys {
var startKey []byte
var gotKeys [][]byte
for {
keys, err := globalStore.NodeKeys(addr, startKey, 0)
if err != nil {
t.Fatal(err)
}
gotKeys = append(gotKeys, keys.Keys...)
if keys.Next == nil {
break
}
startKey = keys.Next
}
if fmt.Sprint(gotKeys) != fmt.Sprint(wantKeys) {
t.Fatalf("got #%v %s node keys %v, want %v", i+1, addr.Hex(), gotKeys, wantKeys)
}
}
// test KeyNodes method
for key, wantNodes := range keyNodes {
var startNode *common.Address
var gotNodes []common.Address
for {
nodes, err := globalStore.KeyNodes([]byte(key), startNode, 0)
if err != nil {
t.Fatal(err)
}
gotNodes = append(gotNodes, nodes.Addrs...)
if nodes.Next == nil {
break
}
startNode = nodes.Next
}
if fmt.Sprint(gotNodes) != fmt.Sprint(wantNodes) {
t.Fatalf("got #%v %x key nodes %v, want %v", i+1, []byte(key), gotNodes, wantNodes)
}
}
}
}
// ImportExport saves chunks to the outStore, exports them to the tar archive,
// imports tar archive to the inStore and checks if all chunks are imported correctly.
func ImportExport(t *testing.T, outStore, inStore mock.GlobalStorer, n int) {
exporter, ok := outStore.(mock.Exporter)
if !ok {
t.Fatal("outStore does not implement mock.Exporter")
}
importer, ok := inStore.(mock.Importer)
if !ok {
t.Fatal("inStore does not implement mock.Importer")
}
addrs := make([]common.Address, n)
for i := 0; i < n; i++ {
addrs[i] = common.HexToAddress(strconv.FormatInt(int64(i)+1, 16))
}
for i, addr := range addrs {
chunkAddr := storage.Address(append(addr[:], []byte(strconv.FormatInt(int64(i)+1, 16))...))
data := []byte(strconv.FormatInt(int64(i)+1, 16))
data = append(data, make([]byte, 4096-len(data))...)
outStore.Put(addr, chunkAddr, data)
}
r, w := io.Pipe()
defer r.Close()
exportErrChan := make(chan error)
go func() {
defer w.Close()
_, err := exporter.Export(w)
exportErrChan <- err
}()
if _, err := importer.Import(r); err != nil {
t.Fatalf("import: %v", err)
}
if err := <-exportErrChan; err != nil {
t.Fatalf("export: %v", err)
}
for i, addr := range addrs {
chunkAddr := storage.Address(append(addr[:], []byte(strconv.FormatInt(int64(i)+1, 16))...))
data := []byte(strconv.FormatInt(int64(i)+1, 16))
data = append(data, make([]byte, 4096-len(data))...)
for _, cAddr := range addrs {
cData, err := inStore.Get(cAddr, chunkAddr)
if cAddr == addr {
if err != nil {
t.Fatalf("get data from store %s key %s: %v", cAddr.Hex(), chunkAddr.Hex(), err)
}
if !bytes.Equal(data, cData) {
t.Fatalf("data on store %s: expected %x, got %x", cAddr.Hex(), data, cData)
}
if !inStore.HasKey(cAddr, chunkAddr) {
t.Fatalf("expected key %s on global store for node %s, but it was not found", chunkAddr.Hex(), cAddr.Hex())
}
} else {
if err != mock.ErrNotFound {
t.Fatalf("expected error from store %s: %v, got %v", cAddr.Hex(), mock.ErrNotFound, err)
}
if len(cData) > 0 {
t.Fatalf("data on store %s: expected nil, got %x", cAddr.Hex(), cData)
}
if inStore.HasKey(cAddr, chunkAddr) {
t.Fatalf("not expected key %s on global store for node %s, but it was found", chunkAddr.Hex(), cAddr.Hex())
}
}
}
}
}

335
storage/netstore.go Normal file
View File

@ -0,0 +1,335 @@
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"context"
"encoding/hex"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/p2p/enode"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/log"
"github.com/ethersphere/swarm/spancontext"
"github.com/opentracing/opentracing-go"
olog "github.com/opentracing/opentracing-go/log"
"github.com/syndtr/goleveldb/leveldb"
lru "github.com/hashicorp/golang-lru"
)
type (
NewNetFetcherFunc func(ctx context.Context, addr Address, peers *sync.Map) NetFetcher
)
type NetFetcher interface {
Request(hopCount uint8)
Offer(source *enode.ID)
}
// NetStore is an extension of local storage
// it implements the ChunkStore interface
// on request it initiates remote cloud retrieval using a fetcher
// fetchers are unique to a chunk and are stored in fetchers LRU memory cache
// fetchFuncFactory is a factory object to create a fetch function for a specific chunk address
type NetStore struct {
chunk.Store
mu sync.Mutex
fetchers *lru.Cache
NewNetFetcherFunc NewNetFetcherFunc
closeC chan struct{}
}
var fetcherTimeout = 2 * time.Minute // timeout to cancel the fetcher even if requests are coming in
// NewNetStore creates a new NetStore object using the given local store. newFetchFunc is a
// constructor function that can create a fetch function for a specific chunk address.
func NewNetStore(store chunk.Store, nnf NewNetFetcherFunc) (*NetStore, error) {
fetchers, err := lru.New(defaultChunkRequestsCacheCapacity)
if err != nil {
return nil, err
}
return &NetStore{
Store: store,
fetchers: fetchers,
NewNetFetcherFunc: nnf,
closeC: make(chan struct{}),
}, nil
}
// Put stores a chunk in localstore, and delivers to all requestor peers using the fetcher stored in
// the fetchers cache
func (n *NetStore) Put(ctx context.Context, mode chunk.ModePut, ch Chunk) (bool, error) {
n.mu.Lock()
defer n.mu.Unlock()
// put to the chunk to the store, there should be no error
exists, err := n.Store.Put(ctx, mode, ch)
if err != nil {
return exists, err
}
// if chunk is now put in the store, check if there was an active fetcher and call deliver on it
// (this delivers the chunk to requestors via the fetcher)
log.Trace("n.getFetcher", "ref", ch.Address())
if f := n.getFetcher(ch.Address()); f != nil {
log.Trace("n.getFetcher deliver", "ref", ch.Address())
f.deliver(ctx, ch)
}
return exists, nil
}
// Get retrieves the chunk from the NetStore DPA synchronously.
// It calls NetStore.get, and if the chunk is not in local Storage
// it calls fetch with the request, which blocks until the chunk
// arrived or context is done
func (n *NetStore) Get(rctx context.Context, mode chunk.ModeGet, ref Address) (Chunk, error) {
chunk, fetch, err := n.get(rctx, mode, ref)
if err != nil {
return nil, err
}
if chunk != nil {
// this is not measuring how long it takes to get the chunk for the localstore, but
// rather just adding a span for clarity when inspecting traces in Jaeger, in order
// to make it easier to reason which is the node that actually delivered a chunk.
_, sp := spancontext.StartSpan(
rctx,
"localstore.get")
defer sp.Finish()
return chunk, nil
}
return fetch(rctx)
}
// FetchFunc returns nil if the store contains the given address. Otherwise it returns a wait function,
// which returns after the chunk is available or the context is done
func (n *NetStore) FetchFunc(ctx context.Context, ref Address) func(context.Context) error {
chunk, fetch, _ := n.get(ctx, chunk.ModeGetRequest, ref)
if chunk != nil {
return nil
}
return func(ctx context.Context) error {
_, err := fetch(ctx)
return err
}
}
// Close chunk store
func (n *NetStore) Close() (err error) {
close(n.closeC)
wg := sync.WaitGroup{}
for _, key := range n.fetchers.Keys() {
if f, ok := n.fetchers.Get(key); ok {
if fetch, ok := f.(*fetcher); ok {
wg.Add(1)
go func(fetch *fetcher) {
defer wg.Done()
fetch.cancel()
select {
case <-fetch.deliveredC:
case <-fetch.cancelledC:
}
}(fetch)
}
}
}
wg.Wait()
return n.Store.Close()
}
// get attempts at retrieving the chunk from LocalStore
// If it is not found then using getOrCreateFetcher:
// 1. Either there is already a fetcher to retrieve it
// 2. A new fetcher is created and saved in the fetchers cache
// From here on, all Get will hit on this fetcher until the chunk is delivered
// or all fetcher contexts are done.
// It returns a chunk, a fetcher function and an error
// If chunk is nil, the returned fetch function needs to be called with a context to return the chunk.
func (n *NetStore) get(ctx context.Context, mode chunk.ModeGet, ref Address) (Chunk, func(context.Context) (Chunk, error), error) {
n.mu.Lock()
defer n.mu.Unlock()
chunk, err := n.Store.Get(ctx, mode, ref)
if err != nil {
// TODO: Fix comparison - we should be comparing against leveldb.ErrNotFound, this error should be wrapped.
if err != ErrChunkNotFound && err != leveldb.ErrNotFound {
log.Debug("Received error from LocalStore other than ErrNotFound", "err", err)
}
// The chunk is not available in the LocalStore, let's get the fetcher for it, or create a new one
// if it doesn't exist yet
f := n.getOrCreateFetcher(ctx, ref)
// If the caller needs the chunk, it has to use the returned fetch function to get it
return nil, f.Fetch, nil
}
return chunk, nil, nil
}
// getOrCreateFetcher attempts at retrieving an existing fetchers
// if none exists, creates one and saves it in the fetchers cache
// caller must hold the lock
func (n *NetStore) getOrCreateFetcher(ctx context.Context, ref Address) *fetcher {
if f := n.getFetcher(ref); f != nil {
return f
}
// no fetcher for the given address, we have to create a new one
key := hex.EncodeToString(ref)
// create the context during which fetching is kept alive
cctx, cancel := context.WithTimeout(ctx, fetcherTimeout)
// destroy is called when all requests finish
destroy := func() {
// remove fetcher from fetchers
n.fetchers.Remove(key)
// stop fetcher by cancelling context called when
// all requests cancelled/timedout or chunk is delivered
cancel()
}
// peers always stores all the peers which have an active request for the chunk. It is shared
// between fetcher and the NewFetchFunc function. It is needed by the NewFetchFunc because
// the peers which requested the chunk should not be requested to deliver it.
peers := &sync.Map{}
cctx, sp := spancontext.StartSpan(
cctx,
"netstore.fetcher",
)
sp.LogFields(olog.String("ref", ref.String()))
fetcher := newFetcher(sp, ref, n.NewNetFetcherFunc(cctx, ref, peers), destroy, peers, n.closeC)
n.fetchers.Add(key, fetcher)
return fetcher
}
// getFetcher retrieves the fetcher for the given address from the fetchers cache if it exists,
// otherwise it returns nil
func (n *NetStore) getFetcher(ref Address) *fetcher {
key := hex.EncodeToString(ref)
f, ok := n.fetchers.Get(key)
if ok {
return f.(*fetcher)
}
return nil
}
// RequestsCacheLen returns the current number of outgoing requests stored in the cache
func (n *NetStore) RequestsCacheLen() int {
return n.fetchers.Len()
}
// One fetcher object is responsible to fetch one chunk for one address, and keep track of all the
// peers who have requested it and did not receive it yet.
type fetcher struct {
addr Address // address of chunk
chunk Chunk // fetcher can set the chunk on the fetcher
deliveredC chan struct{} // chan signalling chunk delivery to requests
cancelledC chan struct{} // chan signalling the fetcher has been cancelled (removed from fetchers in NetStore)
netFetcher NetFetcher // remote fetch function to be called with a request source taken from the context
cancel func() // cleanup function for the remote fetcher to call when all upstream contexts are called
peers *sync.Map // the peers which asked for the chunk
requestCnt int32 // number of requests on this chunk. If all the requests are done (delivered or context is done) the cancel function is called
deliverOnce *sync.Once // guarantees that we only close deliveredC once
span opentracing.Span // measure retrieve time per chunk
}
// newFetcher creates a new fetcher object for the fiven addr. fetch is the function which actually
// does the retrieval (in non-test cases this is coming from the network package). cancel function is
// called either
// 1. when the chunk has been fetched all peers have been either notified or their context has been done
// 2. the chunk has not been fetched but all context from all the requests has been done
// The peers map stores all the peers which have requested chunk.
func newFetcher(span opentracing.Span, addr Address, nf NetFetcher, cancel func(), peers *sync.Map, closeC chan struct{}) *fetcher {
cancelOnce := &sync.Once{} // cancel should only be called once
return &fetcher{
addr: addr,
deliveredC: make(chan struct{}),
deliverOnce: &sync.Once{},
cancelledC: closeC,
netFetcher: nf,
cancel: func() {
cancelOnce.Do(func() {
cancel()
})
},
peers: peers,
span: span,
}
}
// Fetch fetches the chunk synchronously, it is called by NetStore.Get is the chunk is not available
// locally.
func (f *fetcher) Fetch(rctx context.Context) (Chunk, error) {
atomic.AddInt32(&f.requestCnt, 1)
defer func() {
// if all the requests are done the fetcher can be cancelled
if atomic.AddInt32(&f.requestCnt, -1) == 0 {
f.cancel()
}
f.span.Finish()
}()
// The peer asking for the chunk. Store in the shared peers map, but delete after the request
// has been delivered
peer := rctx.Value("peer")
if peer != nil {
f.peers.Store(peer, time.Now())
defer f.peers.Delete(peer)
}
// If there is a source in the context then it is an offer, otherwise a request
sourceIF := rctx.Value("source")
hopCount, _ := rctx.Value("hopcount").(uint8)
if sourceIF != nil {
var source enode.ID
if err := source.UnmarshalText([]byte(sourceIF.(string))); err != nil {
return nil, err
}
f.netFetcher.Offer(&source)
} else {
f.netFetcher.Request(hopCount)
}
// wait until either the chunk is delivered or the context is done
select {
case <-rctx.Done():
return nil, rctx.Err()
case <-f.deliveredC:
return f.chunk, nil
case <-f.cancelledC:
return nil, fmt.Errorf("fetcher cancelled")
}
}
// deliver is called by NetStore.Put to notify all pending requests
func (f *fetcher) deliver(ctx context.Context, ch Chunk) {
f.deliverOnce.Do(func() {
f.chunk = ch
// closing the deliveredC channel will terminate ongoing requests
close(f.deliveredC)
log.Trace("n.getFetcher close deliveredC", "ref", ch.Address())
})
}

702
storage/netstore_test.go Normal file
View File

@ -0,0 +1,702 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"bytes"
"context"
"crypto/rand"
"errors"
"fmt"
"io/ioutil"
"os"
"sync"
"testing"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/p2p/enode"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/storage/localstore"
)
var sourcePeerID = enode.HexID("99d8594b52298567d2ca3f4c441a5ba0140ee9245e26460d01102a52773c73b9")
type mockNetFetcher struct {
peers *sync.Map
sources []*enode.ID
peersPerRequest [][]Address
requestCalled bool
offerCalled bool
quit <-chan struct{}
ctx context.Context
hopCounts []uint8
mu sync.Mutex
}
func (m *mockNetFetcher) Offer(source *enode.ID) {
m.offerCalled = true
m.sources = append(m.sources, source)
}
func (m *mockNetFetcher) Request(hopCount uint8) {
m.mu.Lock()
defer m.mu.Unlock()
m.requestCalled = true
var peers []Address
m.peers.Range(func(key interface{}, _ interface{}) bool {
peers = append(peers, common.FromHex(key.(string)))
return true
})
m.peersPerRequest = append(m.peersPerRequest, peers)
m.hopCounts = append(m.hopCounts, hopCount)
}
type mockNetFetchFuncFactory struct {
fetcher *mockNetFetcher
}
func (m *mockNetFetchFuncFactory) newMockNetFetcher(ctx context.Context, _ Address, peers *sync.Map) NetFetcher {
m.fetcher.peers = peers
m.fetcher.quit = ctx.Done()
m.fetcher.ctx = ctx
return m.fetcher
}
func newTestNetStore(t *testing.T) (netStore *NetStore, fetcher *mockNetFetcher, cleanup func()) {
t.Helper()
dir, err := ioutil.TempDir("", "swarm-storage-")
if err != nil {
t.Fatal(err)
}
localStore, err := localstore.New(dir, make([]byte, 32), nil)
if err != nil {
os.RemoveAll(dir)
t.Fatal(err)
}
cleanup = func() {
localStore.Close()
os.RemoveAll(dir)
}
fetcher = new(mockNetFetcher)
mockNetFetchFuncFactory := &mockNetFetchFuncFactory{
fetcher: fetcher,
}
netStore, err = NewNetStore(localStore, mockNetFetchFuncFactory.newMockNetFetcher)
if err != nil {
cleanup()
t.Fatal(err)
}
return netStore, fetcher, cleanup
}
// TestNetStoreGetAndPut tests calling NetStore.Get which is blocked until the same chunk is Put.
// After the Put there should no active fetchers, and the context created for the fetcher should
// be cancelled.
func TestNetStoreGetAndPut(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
ch := GenerateRandomChunk(chunk.DefaultSize)
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
c := make(chan struct{}) // this channel ensures that the gouroutine with the Put does not run earlier than the Get
putErrC := make(chan error)
go func() {
<-c // wait for the Get to be called
time.Sleep(200 * time.Millisecond) // and a little more so it is surely called
// check if netStore created a fetcher in the Get call for the unavailable chunk
if netStore.fetchers.Len() != 1 || netStore.getFetcher(ch.Address()) == nil {
putErrC <- errors.New("Expected netStore to use a fetcher for the Get call")
return
}
_, err := netStore.Put(ctx, chunk.ModePutRequest, ch)
if err != nil {
putErrC <- fmt.Errorf("Expected no err got %v", err)
return
}
putErrC <- nil
}()
close(c)
recChunk, err := netStore.Get(ctx, chunk.ModeGetRequest, ch.Address()) // this is blocked until the Put above is done
if err != nil {
t.Fatalf("Expected no err got %v", err)
}
if err := <-putErrC; err != nil {
t.Fatal(err)
}
// the retrieved chunk should be the same as what we Put
if !bytes.Equal(recChunk.Address(), ch.Address()) || !bytes.Equal(recChunk.Data(), ch.Data()) {
t.Fatalf("Different chunk received than what was put")
}
// the chunk is already available locally, so there should be no active fetchers waiting for it
if netStore.fetchers.Len() != 0 {
t.Fatal("Expected netStore to remove the fetcher after delivery")
}
// A fetcher was created when the Get was called (and the chunk was not available). The chunk
// was delivered with the Put call, so the fetcher should be cancelled now.
select {
case <-fetcher.ctx.Done():
default:
t.Fatal("Expected fetcher context to be cancelled")
}
}
// TestNetStoreGetAndPut tests calling NetStore.Put and then NetStore.Get.
// After the Put the chunk is available locally, so the Get can just retrieve it from LocalStore,
// there is no need to create fetchers.
func TestNetStoreGetAfterPut(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
ch := GenerateRandomChunk(chunk.DefaultSize)
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
defer cancel()
// First we Put the chunk, so the chunk will be available locally
_, err := netStore.Put(ctx, chunk.ModePutRequest, ch)
if err != nil {
t.Fatalf("Expected no err got %v", err)
}
// Get should retrieve the chunk from LocalStore, without creating fetcher
recChunk, err := netStore.Get(ctx, chunk.ModeGetRequest, ch.Address())
if err != nil {
t.Fatalf("Expected no err got %v", err)
}
// the retrieved chunk should be the same as what we Put
if !bytes.Equal(recChunk.Address(), ch.Address()) || !bytes.Equal(recChunk.Data(), ch.Data()) {
t.Fatalf("Different chunk received than what was put")
}
// no fetcher offer or request should be created for a locally available chunk
if fetcher.offerCalled || fetcher.requestCalled {
t.Fatal("NetFetcher.offerCalled or requestCalled not expected to be called")
}
// no fetchers should be created for a locally available chunk
if netStore.fetchers.Len() != 0 {
t.Fatal("Expected netStore to not have fetcher")
}
}
// TestNetStoreGetTimeout tests a Get call for an unavailable chunk and waits for timeout
func TestNetStoreGetTimeout(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
ch := GenerateRandomChunk(chunk.DefaultSize)
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
defer cancel()
c := make(chan struct{}) // this channel ensures that the gouroutine does not run earlier than the Get
fetcherErrC := make(chan error)
go func() {
<-c // wait for the Get to be called
time.Sleep(200 * time.Millisecond) // and a little more so it is surely called
// check if netStore created a fetcher in the Get call for the unavailable chunk
if netStore.fetchers.Len() != 1 || netStore.getFetcher(ch.Address()) == nil {
fetcherErrC <- errors.New("Expected netStore to use a fetcher for the Get call")
return
}
fetcherErrC <- nil
}()
close(c)
// We call Get on this chunk, which is not in LocalStore. We don't Put it at all, so there will
// be a timeout
_, err := netStore.Get(ctx, chunk.ModeGetRequest, ch.Address())
// Check if the timeout happened
if err != context.DeadlineExceeded {
t.Fatalf("Expected context.DeadLineExceeded err got %v", err)
}
if err := <-fetcherErrC; err != nil {
t.Fatal(err)
}
// A fetcher was created, check if it has been removed after timeout
if netStore.fetchers.Len() != 0 {
t.Fatal("Expected netStore to remove the fetcher after timeout")
}
// Check if the fetcher context has been cancelled after the timeout
select {
case <-fetcher.ctx.Done():
default:
t.Fatal("Expected fetcher context to be cancelled")
}
}
// TestNetStoreGetCancel tests a Get call for an unavailable chunk, then cancels the context and checks
// the errors
func TestNetStoreGetCancel(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
ch := GenerateRandomChunk(chunk.DefaultSize)
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
c := make(chan struct{}) // this channel ensures that the gouroutine with the cancel does not run earlier than the Get
fetcherErrC := make(chan error, 1)
go func() {
<-c // wait for the Get to be called
time.Sleep(200 * time.Millisecond) // and a little more so it is surely called
// check if netStore created a fetcher in the Get call for the unavailable chunk
if netStore.fetchers.Len() != 1 || netStore.getFetcher(ch.Address()) == nil {
fetcherErrC <- errors.New("Expected netStore to use a fetcher for the Get call")
return
}
fetcherErrC <- nil
cancel()
}()
close(c)
// We call Get with an unavailable chunk, so it will create a fetcher and wait for delivery
_, err := netStore.Get(ctx, chunk.ModeGetRequest, ch.Address())
if err := <-fetcherErrC; err != nil {
t.Fatal(err)
}
// After the context is cancelled above Get should return with an error
if err != context.Canceled {
t.Fatalf("Expected context.Canceled err got %v", err)
}
// A fetcher was created, check if it has been removed after cancel
if netStore.fetchers.Len() != 0 {
t.Fatal("Expected netStore to remove the fetcher after cancel")
}
// Check if the fetcher context has been cancelled after the request context cancel
select {
case <-fetcher.ctx.Done():
default:
t.Fatal("Expected fetcher context to be cancelled")
}
}
// TestNetStoreMultipleGetAndPut tests four Get calls for the same unavailable chunk. The chunk is
// delivered with a Put, we have to make sure all Get calls return, and they use a single fetcher
// for the chunk retrieval
func TestNetStoreMultipleGetAndPut(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
ch := GenerateRandomChunk(chunk.DefaultSize)
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
putErrC := make(chan error)
go func() {
// sleep to make sure Put is called after all the Get
time.Sleep(500 * time.Millisecond)
// check if netStore created exactly one fetcher for all Get calls
if netStore.fetchers.Len() != 1 {
putErrC <- errors.New("Expected netStore to use one fetcher for all Get calls")
return
}
_, err := netStore.Put(ctx, chunk.ModePutRequest, ch)
if err != nil {
putErrC <- fmt.Errorf("Expected no err got %v", err)
return
}
putErrC <- nil
}()
count := 4
// call Get 4 times for the same unavailable chunk. The calls will be blocked until the Put above.
errC := make(chan error)
for i := 0; i < count; i++ {
go func() {
recChunk, err := netStore.Get(ctx, chunk.ModeGetRequest, ch.Address())
if err != nil {
errC <- fmt.Errorf("Expected no err got %v", err)
}
if !bytes.Equal(recChunk.Address(), ch.Address()) || !bytes.Equal(recChunk.Data(), ch.Data()) {
errC <- errors.New("Different chunk received than what was put")
}
errC <- nil
}()
}
if err := <-putErrC; err != nil {
t.Fatal(err)
}
timeout := time.After(1 * time.Second)
// The Get calls should return after Put, so no timeout expected
for i := 0; i < count; i++ {
select {
case err := <-errC:
if err != nil {
t.Fatal(err)
}
case <-timeout:
t.Fatalf("Timeout waiting for Get calls to return")
}
}
// A fetcher was created, check if it has been removed after cancel
if netStore.fetchers.Len() != 0 {
t.Fatal("Expected netStore to remove the fetcher after delivery")
}
// A fetcher was created, check if it has been removed after delivery
select {
case <-fetcher.ctx.Done():
default:
t.Fatal("Expected fetcher context to be cancelled")
}
}
// TestNetStoreFetchFuncTimeout tests a FetchFunc call for an unavailable chunk and waits for timeout
func TestNetStoreFetchFuncTimeout(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
chunk := GenerateRandomChunk(chunk.DefaultSize)
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
defer cancel()
// FetchFunc is called for an unavaible chunk, so the returned wait function should not be nil
wait := netStore.FetchFunc(ctx, chunk.Address())
if wait == nil {
t.Fatal("Expected wait function to be not nil")
}
// There should an active fetcher for the chunk after the FetchFunc call
if netStore.fetchers.Len() != 1 || netStore.getFetcher(chunk.Address()) == nil {
t.Fatalf("Expected netStore to have one fetcher for the requested chunk")
}
// wait function should timeout because we don't deliver the chunk with a Put
err := wait(ctx)
if err != context.DeadlineExceeded {
t.Fatalf("Expected context.DeadLineExceeded err got %v", err)
}
// the fetcher should be removed after timeout
if netStore.fetchers.Len() != 0 {
t.Fatal("Expected netStore to remove the fetcher after timeout")
}
// the fetcher context should be cancelled after timeout
select {
case <-fetcher.ctx.Done():
default:
t.Fatal("Expected fetcher context to be cancelled")
}
}
// TestNetStoreFetchFuncAfterPut tests that the FetchFunc should return nil for a locally available chunk
func TestNetStoreFetchFuncAfterPut(t *testing.T) {
netStore, _, cleanup := newTestNetStore(t)
defer cleanup()
ch := GenerateRandomChunk(chunk.DefaultSize)
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
// We deliver the created the chunk with a Put
_, err := netStore.Put(ctx, chunk.ModePutRequest, ch)
if err != nil {
t.Fatalf("Expected no err got %v", err)
}
// FetchFunc should return nil, because the chunk is available locally, no need to fetch it
wait := netStore.FetchFunc(ctx, ch.Address())
if wait != nil {
t.Fatal("Expected wait to be nil")
}
// No fetchers should be created at all
if netStore.fetchers.Len() != 0 {
t.Fatal("Expected netStore to not have fetcher")
}
}
// TestNetStoreGetCallsRequest tests if Get created a request on the NetFetcher for an unavailable chunk
func TestNetStoreGetCallsRequest(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
ch := GenerateRandomChunk(chunk.DefaultSize)
ctx := context.WithValue(context.Background(), "hopcount", uint8(5))
ctx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
defer cancel()
// We call get for a not available chunk, it will timeout because the chunk is not delivered
_, err := netStore.Get(ctx, chunk.ModeGetRequest, ch.Address())
if err != context.DeadlineExceeded {
t.Fatalf("Expected context.DeadlineExceeded err got %v", err)
}
// NetStore should call NetFetcher.Request and wait for the chunk
if !fetcher.requestCalled {
t.Fatal("Expected NetFetcher.Request to be called")
}
if fetcher.hopCounts[0] != 5 {
t.Fatalf("Expected NetFetcher.Request be called with hopCount 5, got %v", fetcher.hopCounts[0])
}
}
// TestNetStoreGetCallsOffer tests if Get created a request on the NetFetcher for an unavailable chunk
// in case of a source peer provided in the context.
func TestNetStoreGetCallsOffer(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
ch := GenerateRandomChunk(chunk.DefaultSize)
// If a source peer is added to the context, NetStore will handle it as an offer
ctx := context.WithValue(context.Background(), "source", sourcePeerID.String())
ctx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
defer cancel()
// We call get for a not available chunk, it will timeout because the chunk is not delivered
_, err := netStore.Get(ctx, chunk.ModeGetRequest, ch.Address())
if err != context.DeadlineExceeded {
t.Fatalf("Expect error %v got %v", context.DeadlineExceeded, err)
}
// NetStore should call NetFetcher.Offer with the source peer
if !fetcher.offerCalled {
t.Fatal("Expected NetFetcher.Request to be called")
}
if len(fetcher.sources) != 1 {
t.Fatalf("Expected fetcher sources length 1 got %v", len(fetcher.sources))
}
if fetcher.sources[0].String() != sourcePeerID.String() {
t.Fatalf("Expected fetcher source %v got %v", sourcePeerID, fetcher.sources[0])
}
}
// TestNetStoreFetcherCountPeers tests multiple NetStore.Get calls with peer in the context.
// There is no Put call, so the Get calls timeout
func TestNetStoreFetcherCountPeers(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
addr := randomAddr()
peers := []string{randomAddr().Hex(), randomAddr().Hex(), randomAddr().Hex()}
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
errC := make(chan error)
nrGets := 3
// Call Get 3 times with a peer in context
for i := 0; i < nrGets; i++ {
peer := peers[i]
go func() {
ctx := context.WithValue(ctx, "peer", peer)
_, err := netStore.Get(ctx, chunk.ModeGetRequest, addr)
errC <- err
}()
}
// All 3 Get calls should timeout
for i := 0; i < nrGets; i++ {
err := <-errC
if err != context.DeadlineExceeded {
t.Fatalf("Expected \"%v\" error got \"%v\"", context.DeadlineExceeded, err)
}
}
// fetcher should be closed after timeout
select {
case <-fetcher.quit:
case <-time.After(3 * time.Second):
t.Fatalf("mockNetFetcher not closed after timeout")
}
// All 3 peers should be given to NetFetcher after the 3 Get calls
if len(fetcher.peersPerRequest) != nrGets {
t.Fatalf("Expected 3 got %v", len(fetcher.peersPerRequest))
}
for i, peers := range fetcher.peersPerRequest {
if len(peers) < i+1 {
t.Fatalf("Expected at least %v got %v", i+1, len(peers))
}
}
}
// TestNetStoreFetchFuncCalledMultipleTimes calls the wait function given by FetchFunc three times,
// and checks there is still exactly one fetcher for one chunk. Afthe chunk is delivered, it checks
// if the fetcher is closed.
func TestNetStoreFetchFuncCalledMultipleTimes(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
ch := GenerateRandomChunk(chunk.DefaultSize)
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
defer cancel()
// FetchFunc should return a non-nil wait function, because the chunk is not available
wait := netStore.FetchFunc(ctx, ch.Address())
if wait == nil {
t.Fatal("Expected wait function to be not nil")
}
// There should be exactly one fetcher for the chunk
if netStore.fetchers.Len() != 1 || netStore.getFetcher(ch.Address()) == nil {
t.Fatalf("Expected netStore to have one fetcher for the requested chunk")
}
// Call wait three times in parallel
count := 3
errC := make(chan error)
for i := 0; i < count; i++ {
go func() {
errC <- wait(ctx)
}()
}
// sleep a little so the wait functions are called above
time.Sleep(100 * time.Millisecond)
// there should be still only one fetcher, because all wait calls are for the same chunk
if netStore.fetchers.Len() != 1 || netStore.getFetcher(ch.Address()) == nil {
t.Fatal("Expected netStore to have one fetcher for the requested chunk")
}
// Deliver the chunk with a Put
_, err := netStore.Put(ctx, chunk.ModePutRequest, ch)
if err != nil {
t.Fatalf("Expected no err got %v", err)
}
// wait until all wait calls return (because the chunk is delivered)
for i := 0; i < count; i++ {
err := <-errC
if err != nil {
t.Fatal(err)
}
}
// There should be no more fetchers for the delivered chunk
if netStore.fetchers.Len() != 0 {
t.Fatal("Expected netStore to remove the fetcher after delivery")
}
// The context for the fetcher should be cancelled after delivery
select {
case <-fetcher.ctx.Done():
default:
t.Fatal("Expected fetcher context to be cancelled")
}
}
// TestNetStoreFetcherLifeCycleWithTimeout is similar to TestNetStoreFetchFuncCalledMultipleTimes,
// the only difference is that we don't deilver the chunk, just wait for timeout
func TestNetStoreFetcherLifeCycleWithTimeout(t *testing.T) {
netStore, fetcher, cleanup := newTestNetStore(t)
defer cleanup()
chunk := GenerateRandomChunk(chunk.DefaultSize)
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
// FetchFunc should return a non-nil wait function, because the chunk is not available
wait := netStore.FetchFunc(ctx, chunk.Address())
if wait == nil {
t.Fatal("Expected wait function to be not nil")
}
// There should be exactly one fetcher for the chunk
if netStore.fetchers.Len() != 1 || netStore.getFetcher(chunk.Address()) == nil {
t.Fatalf("Expected netStore to have one fetcher for the requested chunk")
}
// Call wait three times in parallel
count := 3
errC := make(chan error)
for i := 0; i < count; i++ {
go func() {
rctx, rcancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer rcancel()
err := wait(rctx)
if err != context.DeadlineExceeded {
errC <- fmt.Errorf("Expected err %v got %v", context.DeadlineExceeded, err)
return
}
errC <- nil
}()
}
// wait until all wait calls timeout
for i := 0; i < count; i++ {
err := <-errC
if err != nil {
t.Fatal(err)
}
}
// There should be no more fetchers after timeout
if netStore.fetchers.Len() != 0 {
t.Fatal("Expected netStore to remove the fetcher after delivery")
}
// The context for the fetcher should be cancelled after timeout
select {
case <-fetcher.ctx.Done():
default:
t.Fatal("Expected fetcher context to be cancelled")
}
}
func randomAddr() Address {
addr := make([]byte, 32)
rand.Read(addr)
return Address(addr)
}

697
storage/pyramid.go Normal file
View File

@ -0,0 +1,697 @@
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"context"
"encoding/binary"
"errors"
"io"
"io/ioutil"
"sync"
"time"
"github.com/ethersphere/swarm/chunk"
"github.com/ethersphere/swarm/log"
)
/*
The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori.
For this to be achieved, the chunker tree is built from the ground up until the data is exhausted.
This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding
duplication of data chunks.
Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above
chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches
the root tree chunk.
T10 <- Tree chunk lvl1
|
__________________________|_____________________________
/ | | \
/ | \ \
__T00__ ___T01__ ___T02__ ___T03__ <- Tree chunks lvl 0
/ / \ / / \ / / \ / / \
/ / \ / / \ / / \ / / \
D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 <- Data Chunks
The split function continuously read the data and creates data chunks and send them to storage.
When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree
entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal
is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one
tree entry is present in certain level. The key of tree entry is given out as the rootAddress of the file.
*/
var (
errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk")
errLoadingTreeChunk = errors.New("LoadTree Error: Could not load chunk")
)
const (
ChunkProcessors = 8
splitTimeout = time.Minute * 5
)
type PyramidSplitterParams struct {
SplitterParams
getter Getter
}
func NewPyramidSplitterParams(addr Address, reader io.Reader, putter Putter, getter Getter, chunkSize int64) *PyramidSplitterParams {
hashSize := putter.RefSize()
return &PyramidSplitterParams{
SplitterParams: SplitterParams{
ChunkerParams: ChunkerParams{
chunkSize: chunkSize,
hashSize: hashSize,
},
reader: reader,
putter: putter,
addr: addr,
},
getter: getter,
}
}
/*
When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Address), the root hash of the entire content will fill this once processing finishes.
New chunks to store are store using the putter which the caller provides.
*/
func PyramidSplit(ctx context.Context, reader io.Reader, putter Putter, getter Getter, tag *chunk.Tag) (Address, func(context.Context) error, error) {
return NewPyramidSplitter(NewPyramidSplitterParams(nil, reader, putter, getter, chunk.DefaultSize), tag).Split(ctx)
}
func PyramidAppend(ctx context.Context, addr Address, reader io.Reader, putter Putter, getter Getter, tag *chunk.Tag) (Address, func(context.Context) error, error) {
return NewPyramidSplitter(NewPyramidSplitterParams(addr, reader, putter, getter, chunk.DefaultSize), tag).Append(ctx)
}
// Entry to create a tree node
type TreeEntry struct {
level int
branchCount int64
subtreeSize uint64
chunk []byte
key []byte
index int // used in append to indicate the index of existing tree entry
updatePending bool // indicates if the entry is loaded from existing tree
}
func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry {
return &TreeEntry{
level: 0,
branchCount: 0,
subtreeSize: 0,
chunk: make([]byte, pyramid.chunkSize+8),
key: make([]byte, pyramid.hashSize),
index: 0,
updatePending: false,
}
}
// Used by the hash processor to create a data/tree chunk and send to storage
type chunkJob struct {
key Address
chunk []byte
parentWg *sync.WaitGroup
}
type PyramidChunker struct {
chunkSize int64
hashSize int64
branches int64
reader io.Reader
putter Putter
getter Getter
key Address
tag *chunk.Tag
workerCount int64
workerLock sync.RWMutex
jobC chan *chunkJob
wg *sync.WaitGroup
errC chan error
quitC chan bool
rootAddress []byte
chunkLevel [][]*TreeEntry
}
func NewPyramidSplitter(params *PyramidSplitterParams, tag *chunk.Tag) (pc *PyramidChunker) {
pc = &PyramidChunker{}
pc.reader = params.reader
pc.hashSize = params.hashSize
pc.branches = params.chunkSize / pc.hashSize
pc.chunkSize = pc.hashSize * pc.branches
pc.putter = params.putter
pc.getter = params.getter
pc.key = params.addr
pc.tag = tag
pc.workerCount = 0
pc.jobC = make(chan *chunkJob, 2*ChunkProcessors)
pc.wg = &sync.WaitGroup{}
pc.errC = make(chan error)
pc.quitC = make(chan bool)
pc.rootAddress = make([]byte, pc.hashSize)
pc.chunkLevel = make([][]*TreeEntry, pc.branches)
return
}
func (pc *PyramidChunker) Join(addr Address, getter Getter, depth int) LazySectionReader {
return &LazyChunkReader{
addr: addr,
depth: depth,
chunkSize: pc.chunkSize,
branches: pc.branches,
hashSize: pc.hashSize,
getter: getter,
}
}
func (pc *PyramidChunker) incrementWorkerCount() {
pc.workerLock.Lock()
defer pc.workerLock.Unlock()
pc.workerCount += 1
}
func (pc *PyramidChunker) getWorkerCount() int64 {
pc.workerLock.Lock()
defer pc.workerLock.Unlock()
return pc.workerCount
}
func (pc *PyramidChunker) decrementWorkerCount() {
pc.workerLock.Lock()
defer pc.workerLock.Unlock()
pc.workerCount -= 1
}
func (pc *PyramidChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) {
pc.wg.Add(1)
pc.prepareChunks(ctx, false)
// closes internal error channel if all subprocesses in the workgroup finished
go func() {
// waiting for all chunks to finish
pc.wg.Wait()
//We close errC here because this is passed down to 8 parallel routines underneath.
// if a error happens in one of them.. that particular routine raises error...
// once they all complete successfully, the control comes back and we can safely close this here.
close(pc.errC)
}()
defer close(pc.quitC)
defer pc.putter.Close()
select {
case err := <-pc.errC:
if err != nil {
return nil, nil, err
}
case <-ctx.Done():
_ = pc.putter.Wait(ctx) //???
return nil, nil, ctx.Err()
}
return pc.rootAddress, pc.putter.Wait, nil
}
func (pc *PyramidChunker) Append(ctx context.Context) (k Address, wait func(context.Context) error, err error) {
// Load the right most unfinished tree chunks in every level
pc.loadTree(ctx)
pc.wg.Add(1)
pc.prepareChunks(ctx, true)
// closes internal error channel if all subprocesses in the workgroup finished
go func() {
// waiting for all chunks to finish
pc.wg.Wait()
close(pc.errC)
}()
defer close(pc.quitC)
defer pc.putter.Close()
select {
case err := <-pc.errC:
if err != nil {
return nil, nil, err
}
case <-time.NewTimer(splitTimeout).C:
}
return pc.rootAddress, pc.putter.Wait, nil
}
func (pc *PyramidChunker) processor(ctx context.Context, id int64) {
defer pc.decrementWorkerCount()
for {
select {
case job, ok := <-pc.jobC:
if !ok {
return
}
pc.processChunk(ctx, id, job)
pc.tag.Inc(chunk.StateSplit)
case <-pc.quitC:
return
}
}
}
func (pc *PyramidChunker) processChunk(ctx context.Context, id int64, job *chunkJob) {
ref, err := pc.putter.Put(ctx, job.chunk)
if err != nil {
select {
case pc.errC <- err:
case <-pc.quitC:
}
}
// report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk)
copy(job.key, ref)
// send off new chunk to storage
job.parentWg.Done()
}
func (pc *PyramidChunker) loadTree(ctx context.Context) error {
// Get the root chunk to get the total size
chunkData, err := pc.getter.Get(ctx, Reference(pc.key))
if err != nil {
return errLoadingTreeRootChunk
}
chunkSize := int64(chunkData.Size())
log.Trace("pyramid.chunker: root chunk", "chunk.Size", chunkSize, "pc.chunkSize", pc.chunkSize)
//if data size is less than a chunk... add a parent with update as pending
if chunkSize <= pc.chunkSize {
newEntry := &TreeEntry{
level: 0,
branchCount: 1,
subtreeSize: uint64(chunkSize),
chunk: make([]byte, pc.chunkSize+8),
key: make([]byte, pc.hashSize),
index: 0,
updatePending: true,
}
copy(newEntry.chunk[8:], pc.key)
pc.chunkLevel[0] = append(pc.chunkLevel[0], newEntry)
return nil
}
var treeSize int64
var depth int
treeSize = pc.chunkSize
for ; treeSize < chunkSize; treeSize *= pc.branches {
depth++
}
log.Trace("pyramid.chunker", "depth", depth)
// Add the root chunk entry
branchCount := int64(len(chunkData)-8) / pc.hashSize
newEntry := &TreeEntry{
level: depth - 1,
branchCount: branchCount,
subtreeSize: uint64(chunkSize),
chunk: chunkData,
key: pc.key,
index: 0,
updatePending: true,
}
pc.chunkLevel[depth-1] = append(pc.chunkLevel[depth-1], newEntry)
// Add the rest of the tree
for lvl := depth - 1; lvl >= 1; lvl-- {
//TODO(jmozah): instead of loading finished branches and then trim in the end,
//avoid loading them in the first place
for _, ent := range pc.chunkLevel[lvl] {
branchCount = int64(len(ent.chunk)-8) / pc.hashSize
for i := int64(0); i < branchCount; i++ {
key := ent.chunk[8+(i*pc.hashSize) : 8+((i+1)*pc.hashSize)]
newChunkData, err := pc.getter.Get(ctx, Reference(key))
if err != nil {
return errLoadingTreeChunk
}
newChunkSize := newChunkData.Size()
bewBranchCount := int64(len(newChunkData)-8) / pc.hashSize
newEntry := &TreeEntry{
level: lvl - 1,
branchCount: bewBranchCount,
subtreeSize: newChunkSize,
chunk: newChunkData,
key: key,
index: 0,
updatePending: true,
}
pc.chunkLevel[lvl-1] = append(pc.chunkLevel[lvl-1], newEntry)
}
// We need to get only the right most unfinished branch.. so trim all finished branches
if int64(len(pc.chunkLevel[lvl-1])) >= pc.branches {
pc.chunkLevel[lvl-1] = nil
}
}
}
return nil
}
func (pc *PyramidChunker) prepareChunks(ctx context.Context, isAppend bool) {
defer pc.wg.Done()
chunkWG := &sync.WaitGroup{}
pc.incrementWorkerCount()
go pc.processor(ctx, pc.workerCount)
parent := NewTreeEntry(pc)
var unfinishedChunkData ChunkData
var unfinishedChunkSize uint64
if isAppend && len(pc.chunkLevel[0]) != 0 {
lastIndex := len(pc.chunkLevel[0]) - 1
ent := pc.chunkLevel[0][lastIndex]
if ent.branchCount < pc.branches {
parent = &TreeEntry{
level: 0,
branchCount: ent.branchCount,
subtreeSize: ent.subtreeSize,
chunk: ent.chunk,
key: ent.key,
index: lastIndex,
updatePending: true,
}
lastBranch := parent.branchCount - 1
lastAddress := parent.chunk[8+lastBranch*pc.hashSize : 8+(lastBranch+1)*pc.hashSize]
var err error
unfinishedChunkData, err = pc.getter.Get(ctx, lastAddress)
if err != nil {
pc.errC <- err
}
unfinishedChunkSize = unfinishedChunkData.Size()
if unfinishedChunkSize < uint64(pc.chunkSize) {
parent.subtreeSize = parent.subtreeSize - unfinishedChunkSize
parent.branchCount = parent.branchCount - 1
} else {
unfinishedChunkData = nil
}
}
}
for index := 0; ; index++ {
var err error
chunkData := make([]byte, pc.chunkSize+8)
var readBytes int
if unfinishedChunkData != nil {
copy(chunkData, unfinishedChunkData)
readBytes += int(unfinishedChunkSize)
unfinishedChunkData = nil
log.Trace("pyramid.chunker: found unfinished chunk", "readBytes", readBytes)
}
var res []byte
res, err = ioutil.ReadAll(io.LimitReader(pc.reader, int64(len(chunkData)-(8+readBytes))))
// hack for ioutil.ReadAll:
// a successful call to ioutil.ReadAll returns err == nil, not err == EOF, whereas we
// want to propagate the io.EOF error
if len(res) == 0 && err == nil {
err = io.EOF
}
copy(chunkData[8+readBytes:], res)
readBytes += len(res)
log.Trace("pyramid.chunker: copied all data", "readBytes", readBytes)
if err != nil {
if err == io.EOF || err == io.ErrUnexpectedEOF {
pc.cleanChunkLevels()
// Check if we are appending or the chunk is the only one.
if parent.branchCount == 1 && (pc.depth() == 0 || isAppend) {
// Data is exactly one chunk.. pick the last chunk key as root
chunkWG.Wait()
lastChunksAddress := parent.chunk[8 : 8+pc.hashSize]
copy(pc.rootAddress, lastChunksAddress)
break
}
} else {
close(pc.quitC)
break
}
}
// Data ended in chunk boundary.. just signal to start bulding tree
if readBytes == 0 {
pc.buildTree(isAppend, parent, chunkWG, true, nil)
break
} else {
pkey := pc.enqueueDataChunk(chunkData, uint64(readBytes), parent, chunkWG)
// update tree related parent data structures
parent.subtreeSize += uint64(readBytes)
parent.branchCount++
// Data got exhausted... signal to send any parent tree related chunks
if int64(readBytes) < pc.chunkSize {
pc.cleanChunkLevels()
// only one data chunk .. so dont add any parent chunk
if parent.branchCount <= 1 {
chunkWG.Wait()
if isAppend || pc.depth() == 0 {
// No need to build the tree if the depth is 0
// or we are appending.
// Just use the last key.
copy(pc.rootAddress, pkey)
} else {
// We need to build the tree and and provide the lonely
// chunk key to replace the last tree chunk key.
pc.buildTree(isAppend, parent, chunkWG, true, pkey)
}
break
}
pc.buildTree(isAppend, parent, chunkWG, true, nil)
break
}
if parent.branchCount == pc.branches {
pc.buildTree(isAppend, parent, chunkWG, false, nil)
parent = NewTreeEntry(pc)
}
}
workers := pc.getWorkerCount()
if int64(len(pc.jobC)) > workers && workers < ChunkProcessors {
pc.incrementWorkerCount()
go pc.processor(ctx, pc.workerCount)
}
}
}
func (pc *PyramidChunker) buildTree(isAppend bool, ent *TreeEntry, chunkWG *sync.WaitGroup, last bool, lonelyChunkKey []byte) {
chunkWG.Wait()
pc.enqueueTreeChunk(ent, chunkWG, last)
compress := false
endLvl := pc.branches
for lvl := int64(0); lvl < pc.branches; lvl++ {
lvlCount := int64(len(pc.chunkLevel[lvl]))
if lvlCount >= pc.branches {
endLvl = lvl + 1
compress = true
break
}
}
if !compress && !last {
return
}
// Wait for all the keys to be processed before compressing the tree
chunkWG.Wait()
for lvl := int64(ent.level); lvl < endLvl; lvl++ {
lvlCount := int64(len(pc.chunkLevel[lvl]))
if lvlCount == 1 && last {
copy(pc.rootAddress, pc.chunkLevel[lvl][0].key)
return
}
for startCount := int64(0); startCount < lvlCount; startCount += pc.branches {
endCount := startCount + pc.branches
if endCount > lvlCount {
endCount = lvlCount
}
var nextLvlCount int64
var tempEntry *TreeEntry
if len(pc.chunkLevel[lvl+1]) > 0 {
nextLvlCount = int64(len(pc.chunkLevel[lvl+1]) - 1)
tempEntry = pc.chunkLevel[lvl+1][nextLvlCount]
}
if isAppend && tempEntry != nil && tempEntry.updatePending {
updateEntry := &TreeEntry{
level: int(lvl + 1),
branchCount: 0,
subtreeSize: 0,
chunk: make([]byte, pc.chunkSize+8),
key: make([]byte, pc.hashSize),
index: int(nextLvlCount),
updatePending: true,
}
for index := int64(0); index < lvlCount; index++ {
updateEntry.branchCount++
updateEntry.subtreeSize += pc.chunkLevel[lvl][index].subtreeSize
copy(updateEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], pc.chunkLevel[lvl][index].key[:pc.hashSize])
}
pc.enqueueTreeChunk(updateEntry, chunkWG, last)
} else {
noOfBranches := endCount - startCount
newEntry := &TreeEntry{
level: int(lvl + 1),
branchCount: noOfBranches,
subtreeSize: 0,
chunk: make([]byte, (noOfBranches*pc.hashSize)+8),
key: make([]byte, pc.hashSize),
index: int(nextLvlCount),
updatePending: false,
}
index := int64(0)
for i := startCount; i < endCount; i++ {
entry := pc.chunkLevel[lvl][i]
newEntry.subtreeSize += entry.subtreeSize
copy(newEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], entry.key[:pc.hashSize])
index++
}
// Lonely chunk key is the key of the last chunk that is only one on the last branch.
// In this case, ignore the its tree chunk key and replace it with the lonely chunk key.
if lonelyChunkKey != nil {
// Overwrite the last tree chunk key with the lonely data chunk key.
copy(newEntry.chunk[int64(len(newEntry.chunk))-pc.hashSize:], lonelyChunkKey[:pc.hashSize])
}
pc.enqueueTreeChunk(newEntry, chunkWG, last)
}
}
if !isAppend {
chunkWG.Wait()
if compress {
pc.chunkLevel[lvl] = nil
}
}
}
}
func (pc *PyramidChunker) enqueueTreeChunk(ent *TreeEntry, chunkWG *sync.WaitGroup, last bool) {
if ent != nil && ent.branchCount > 0 {
// wait for data chunks to get over before processing the tree chunk
if last {
chunkWG.Wait()
}
binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize)
ent.key = make([]byte, pc.hashSize)
chunkWG.Add(1)
select {
case pc.jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*pc.hashSize+8], chunkWG}:
case <-pc.quitC:
}
// Update or append based on weather it is a new entry or being reused
if ent.updatePending {
chunkWG.Wait()
pc.chunkLevel[ent.level][ent.index] = ent
} else {
pc.chunkLevel[ent.level] = append(pc.chunkLevel[ent.level], ent)
}
}
}
func (pc *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup) Address {
binary.LittleEndian.PutUint64(chunkData[:8], size)
pkey := parent.chunk[8+parent.branchCount*pc.hashSize : 8+(parent.branchCount+1)*pc.hashSize]
chunkWG.Add(1)
select {
case pc.jobC <- &chunkJob{pkey, chunkData[:size+8], chunkWG}:
case <-pc.quitC:
}
return pkey
}
// depth returns the number of chunk levels.
// It is used to detect if there is only one data chunk
// left for the last branch.
func (pc *PyramidChunker) depth() (d int) {
for _, l := range pc.chunkLevel {
if l == nil {
return
}
d++
}
return
}
// cleanChunkLevels removes gaps (nil levels) between chunk levels
// that are not nil.
func (pc *PyramidChunker) cleanChunkLevels() {
for i, l := range pc.chunkLevel {
if l == nil {
pc.chunkLevel = append(pc.chunkLevel[:i], append(pc.chunkLevel[i+1:], nil)...)
}
}
}

41
storage/swarmhasher.go Normal file
View File

@ -0,0 +1,41 @@
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"hash"
)
const (
BMTHash = "BMT"
SHA3Hash = "SHA3" // http://golang.org/pkg/hash/#Hash
DefaultHash = BMTHash
)
type SwarmHash interface {
hash.Hash
ResetWithLength([]byte)
}
type HashWithLength struct {
hash.Hash
}
func (h *HashWithLength) ResetWithLength(length []byte) {
h.Reset()
h.Write(length)
}

250
storage/types.go Normal file
View File

@ -0,0 +1,250 @@
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"bytes"
"context"
"crypto"
"crypto/rand"
"encoding/binary"
"io"
"github.com/ethersphere/swarm/bmt"
"github.com/ethersphere/swarm/chunk"
"golang.org/x/crypto/sha3"
)
// MaxPO is the same as chunk.MaxPO for backward compatibility.
const MaxPO = chunk.MaxPO
// AddressLength is the same as chunk.AddressLength for backward compatibility.
const AddressLength = chunk.AddressLength
type SwarmHasher func() SwarmHash
// Address is an alias for chunk.Address for backward compatibility.
type Address = chunk.Address
// Proximity is the same as chunk.Proximity for backward compatibility.
var Proximity = chunk.Proximity
// ZeroAddr is the same as chunk.ZeroAddr for backward compatibility.
var ZeroAddr = chunk.ZeroAddr
func MakeHashFunc(hash string) SwarmHasher {
switch hash {
case "SHA256":
return func() SwarmHash { return &HashWithLength{crypto.SHA256.New()} }
case "SHA3":
return func() SwarmHash { return &HashWithLength{sha3.NewLegacyKeccak256()} }
case "BMT":
return func() SwarmHash {
hasher := sha3.NewLegacyKeccak256
hasherSize := hasher().Size()
segmentCount := chunk.DefaultSize / hasherSize
pool := bmt.NewTreePool(hasher, segmentCount, bmt.PoolSize)
return bmt.New(pool)
}
}
return nil
}
type AddressCollection []Address
func NewAddressCollection(l int) AddressCollection {
return make(AddressCollection, l)
}
func (c AddressCollection) Len() int {
return len(c)
}
func (c AddressCollection) Less(i, j int) bool {
return bytes.Compare(c[i], c[j]) == -1
}
func (c AddressCollection) Swap(i, j int) {
c[i], c[j] = c[j], c[i]
}
// Chunk is an alias for chunk.Chunk for backward compatibility.
type Chunk = chunk.Chunk
// NewChunk is the same as chunk.NewChunk for backward compatibility.
var NewChunk = chunk.NewChunk
func GenerateRandomChunk(dataSize int64) Chunk {
hasher := MakeHashFunc(DefaultHash)()
sdata := make([]byte, dataSize+8)
rand.Read(sdata[8:])
binary.LittleEndian.PutUint64(sdata[:8], uint64(dataSize))
hasher.ResetWithLength(sdata[:8])
hasher.Write(sdata[8:])
return NewChunk(hasher.Sum(nil), sdata)
}
func GenerateRandomChunks(dataSize int64, count int) (chunks []Chunk) {
for i := 0; i < count; i++ {
ch := GenerateRandomChunk(dataSize)
chunks = append(chunks, ch)
}
return chunks
}
// Size, Seek, Read, ReadAt
type LazySectionReader interface {
Context() context.Context
Size(context.Context, chan bool) (int64, error)
io.Seeker
io.Reader
io.ReaderAt
}
type LazyTestSectionReader struct {
*io.SectionReader
}
func (r *LazyTestSectionReader) Size(context.Context, chan bool) (int64, error) {
return r.SectionReader.Size(), nil
}
func (r *LazyTestSectionReader) Context() context.Context {
return context.TODO()
}
type StoreParams struct {
Hash SwarmHasher `toml:"-"`
DbCapacity uint64
CacheCapacity uint
BaseKey []byte
}
func NewDefaultStoreParams() *StoreParams {
return NewStoreParams(defaultLDBCapacity, defaultCacheCapacity, nil, nil)
}
func NewStoreParams(ldbCap uint64, cacheCap uint, hash SwarmHasher, basekey []byte) *StoreParams {
if basekey == nil {
basekey = make([]byte, 32)
}
if hash == nil {
hash = MakeHashFunc(DefaultHash)
}
return &StoreParams{
Hash: hash,
DbCapacity: ldbCap,
CacheCapacity: cacheCap,
BaseKey: basekey,
}
}
type ChunkData []byte
type Reference []byte
// Putter is responsible to store data and create a reference for it
type Putter interface {
Put(context.Context, ChunkData) (Reference, error)
// RefSize returns the length of the Reference created by this Putter
RefSize() int64
// Close is to indicate that no more chunk data will be Put on this Putter
Close()
// Wait returns if all data has been store and the Close() was called.
Wait(context.Context) error
}
// Getter is an interface to retrieve a chunk's data by its reference
type Getter interface {
Get(context.Context, Reference) (ChunkData, error)
}
// NOTE: this returns invalid data if chunk is encrypted
func (c ChunkData) Size() uint64 {
return binary.LittleEndian.Uint64(c[:8])
}
type ChunkValidator = chunk.Validator
// Provides method for validation of content address in chunks
// Holds the corresponding hasher to create the address
type ContentAddressValidator struct {
Hasher SwarmHasher
}
// Constructor
func NewContentAddressValidator(hasher SwarmHasher) *ContentAddressValidator {
return &ContentAddressValidator{
Hasher: hasher,
}
}
// Validate that the given key is a valid content address for the given data
func (v *ContentAddressValidator) Validate(ch Chunk) bool {
data := ch.Data()
if l := len(data); l < 9 || l > chunk.DefaultSize+8 {
// log.Error("invalid chunk size", "chunk", addr.Hex(), "size", l)
return false
}
hasher := v.Hasher()
hasher.ResetWithLength(data[:8])
hasher.Write(data[8:])
hash := hasher.Sum(nil)
return bytes.Equal(hash, ch.Address())
}
type ChunkStore = chunk.Store
// FakeChunkStore doesn't store anything, just implements the ChunkStore interface
// It can be used to inject into a hasherStore if you don't want to actually store data just do the
// hashing
type FakeChunkStore struct {
}
// Put doesn't store anything it is just here to implement ChunkStore
func (f *FakeChunkStore) Put(_ context.Context, _ chunk.ModePut, ch Chunk) (bool, error) {
return false, nil
}
// Has doesn't do anything it is just here to implement ChunkStore
func (f *FakeChunkStore) Has(_ context.Context, ref Address) (bool, error) {
panic("FakeChunkStore doesn't support Has")
}
// Get doesn't store anything it is just here to implement ChunkStore
func (f *FakeChunkStore) Get(_ context.Context, _ chunk.ModeGet, ref Address) (Chunk, error) {
panic("FakeChunkStore doesn't support Get")
}
func (f *FakeChunkStore) Set(ctx context.Context, mode chunk.ModeSet, addr chunk.Address) (err error) {
panic("FakeChunkStore doesn't support Set")
}
func (f *FakeChunkStore) LastPullSubscriptionBinID(bin uint8) (id uint64, err error) {
panic("FakeChunkStore doesn't support LastPullSubscriptionBinID")
}
func (f *FakeChunkStore) SubscribePull(ctx context.Context, bin uint8, since, until uint64) (c <-chan chunk.Descriptor, stop func()) {
panic("FakeChunkStore doesn't support SubscribePull")
}
// Close doesn't store anything it is just here to implement ChunkStore
func (f *FakeChunkStore) Close() error {
return nil
}