swarm: plan bee for content storage and distribution on web3
This change imports the Swarm protocol codebase. Compared to the 'swarm' branch, a few mostly cosmetic changes had to be made: * The various redundant log message prefixes are gone. * All files now have LGPLv3 license headers. * Minor code changes were needed to please go vet and make the tests pass on Windows. * Further changes were required to adapt to the go-ethereum develop branch and its new Go APIs. Some code has not (yet) been brought over: * swarm/cmd/bzzhash: will reappear as cmd/bzzhash later * swarm/cmd/bzzup.sh: will be reimplemented in cmd/bzzup * swarm/cmd/makegenesis: will reappear somehow * swarm/examples/album: will move to a separate repository * swarm/examples/filemanager: ditto * swarm/examples/files: will not be merged * swarm/test/*: will not be merged * swarm/services/swear: will reappear as contracts/swear when needed
This commit is contained in:
239
swarm/storage/dpa.go
Normal file
239
swarm/storage/dpa.go
Normal file
@ -0,0 +1,239 @@
|
||||
// Copyright 2016 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/logger"
|
||||
"github.com/ethereum/go-ethereum/logger/glog"
|
||||
)
|
||||
|
||||
/*
|
||||
DPA provides the client API entrypoints Store and Retrieve to store and retrieve
|
||||
It can store anything that has a byte slice representation, so files or serialised objects etc.
|
||||
|
||||
Storage: DPA calls the Chunker to segment the input datastream of any size to a merkle hashed tree of chunks. The key of the root block is returned to the client.
|
||||
|
||||
Retrieval: given the key of the root block, the DPA retrieves the block chunks and reconstructs the original data and passes it back as a lazy reader. A lazy reader is a reader with on-demand delayed processing, i.e. the chunks needed to reconstruct a large file are only fetched and processed if that particular part of the document is actually read.
|
||||
|
||||
As the chunker produces chunks, DPA dispatches them to its own chunk store
|
||||
implementation for storage or retrieval.
|
||||
*/
|
||||
|
||||
const (
|
||||
storeChanCapacity = 100
|
||||
retrieveChanCapacity = 100
|
||||
singletonSwarmDbCapacity = 50000
|
||||
singletonSwarmCacheCapacity = 500
|
||||
maxStoreProcesses = 8
|
||||
maxRetrieveProcesses = 8
|
||||
)
|
||||
|
||||
var (
|
||||
notFound = errors.New("not found")
|
||||
)
|
||||
|
||||
type DPA struct {
|
||||
ChunkStore
|
||||
storeC chan *Chunk
|
||||
retrieveC chan *Chunk
|
||||
Chunker Chunker
|
||||
|
||||
lock sync.Mutex
|
||||
running bool
|
||||
wg *sync.WaitGroup
|
||||
quitC chan bool
|
||||
}
|
||||
|
||||
// for testing locally
|
||||
func NewLocalDPA(datadir string) (*DPA, error) {
|
||||
|
||||
hash := MakeHashFunc("SHA256")
|
||||
|
||||
dbStore, err := NewDbStore(datadir, hash, singletonSwarmDbCapacity, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return NewDPA(&LocalStore{
|
||||
NewMemStore(dbStore, singletonSwarmCacheCapacity),
|
||||
dbStore,
|
||||
}, NewChunkerParams()), nil
|
||||
}
|
||||
|
||||
func NewDPA(store ChunkStore, params *ChunkerParams) *DPA {
|
||||
chunker := NewTreeChunker(params)
|
||||
return &DPA{
|
||||
Chunker: chunker,
|
||||
ChunkStore: store,
|
||||
}
|
||||
}
|
||||
|
||||
// Public API. Main entry point for document retrieval directly. Used by the
|
||||
// FS-aware API and httpaccess
|
||||
// Chunk retrieval blocks on netStore requests with a timeout so reader will
|
||||
// report error if retrieval of chunks within requested range time out.
|
||||
func (self *DPA) Retrieve(key Key) LazySectionReader {
|
||||
return self.Chunker.Join(key, self.retrieveC)
|
||||
}
|
||||
|
||||
// Public API. Main entry point for document storage directly. Used by the
|
||||
// FS-aware API and httpaccess
|
||||
func (self *DPA) Store(data io.Reader, size int64, swg *sync.WaitGroup, wwg *sync.WaitGroup) (key Key, err error) {
|
||||
return self.Chunker.Split(data, size, self.storeC, swg, wwg)
|
||||
}
|
||||
|
||||
func (self *DPA) Start() {
|
||||
self.lock.Lock()
|
||||
defer self.lock.Unlock()
|
||||
if self.running {
|
||||
return
|
||||
}
|
||||
self.running = true
|
||||
self.retrieveC = make(chan *Chunk, retrieveChanCapacity)
|
||||
self.storeC = make(chan *Chunk, storeChanCapacity)
|
||||
self.quitC = make(chan bool)
|
||||
self.storeLoop()
|
||||
self.retrieveLoop()
|
||||
}
|
||||
|
||||
func (self *DPA) Stop() {
|
||||
self.lock.Lock()
|
||||
defer self.lock.Unlock()
|
||||
if !self.running {
|
||||
return
|
||||
}
|
||||
self.running = false
|
||||
close(self.quitC)
|
||||
}
|
||||
|
||||
// retrieveLoop dispatches the parallel chunk retrieval requests received on the
|
||||
// retrieve channel to its ChunkStore (NetStore or LocalStore)
|
||||
func (self *DPA) retrieveLoop() {
|
||||
for i := 0; i < maxRetrieveProcesses; i++ {
|
||||
go self.retrieveWorker()
|
||||
}
|
||||
glog.V(logger.Detail).Infof("dpa: retrieve loop spawning %v workers", maxRetrieveProcesses)
|
||||
}
|
||||
|
||||
func (self *DPA) retrieveWorker() {
|
||||
for chunk := range self.retrieveC {
|
||||
glog.V(logger.Detail).Infof("dpa: retrieve loop : chunk %v", chunk.Key.Log())
|
||||
storedChunk, err := self.Get(chunk.Key)
|
||||
if err == notFound {
|
||||
glog.V(logger.Detail).Infof("chunk %v not found", chunk.Key.Log())
|
||||
} else if err != nil {
|
||||
glog.V(logger.Detail).Infof("error retrieving chunk %v: %v", chunk.Key.Log(), err)
|
||||
} else {
|
||||
chunk.SData = storedChunk.SData
|
||||
chunk.Size = storedChunk.Size
|
||||
}
|
||||
close(chunk.C)
|
||||
|
||||
select {
|
||||
case <-self.quitC:
|
||||
return
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// storeLoop dispatches the parallel chunk store request processors
|
||||
// received on the store channel to its ChunkStore (NetStore or LocalStore)
|
||||
func (self *DPA) storeLoop() {
|
||||
for i := 0; i < maxStoreProcesses; i++ {
|
||||
go self.storeWorker()
|
||||
}
|
||||
glog.V(logger.Detail).Infof("dpa: store spawning %v workers", maxStoreProcesses)
|
||||
}
|
||||
|
||||
func (self *DPA) storeWorker() {
|
||||
|
||||
for chunk := range self.storeC {
|
||||
self.Put(chunk)
|
||||
if chunk.wg != nil {
|
||||
glog.V(logger.Detail).Infof("dpa: store processor %v", chunk.Key.Log())
|
||||
chunk.wg.Done()
|
||||
|
||||
}
|
||||
select {
|
||||
case <-self.quitC:
|
||||
return
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DpaChunkStore implements the ChunkStore interface,
|
||||
// this chunk access layer assumed 2 chunk stores
|
||||
// local storage eg. LocalStore and network storage eg., NetStore
|
||||
// access by calling network is blocking with a timeout
|
||||
|
||||
type dpaChunkStore struct {
|
||||
n int
|
||||
localStore ChunkStore
|
||||
netStore ChunkStore
|
||||
}
|
||||
|
||||
func NewDpaChunkStore(localStore, netStore ChunkStore) *dpaChunkStore {
|
||||
return &dpaChunkStore{0, localStore, netStore}
|
||||
}
|
||||
|
||||
// Get is the entrypoint for local retrieve requests
|
||||
// waits for response or times out
|
||||
func (self *dpaChunkStore) Get(key Key) (chunk *Chunk, err error) {
|
||||
chunk, err = self.netStore.Get(key)
|
||||
// timeout := time.Now().Add(searchTimeout)
|
||||
if chunk.SData != nil {
|
||||
glog.V(logger.Detail).Infof("DPA.Get: %v found locally, %d bytes", key.Log(), len(chunk.SData))
|
||||
return
|
||||
}
|
||||
// TODO: use self.timer time.Timer and reset with defer disableTimer
|
||||
timer := time.After(searchTimeout)
|
||||
select {
|
||||
case <-timer:
|
||||
glog.V(logger.Detail).Infof("DPA.Get: %v request time out ", key.Log())
|
||||
err = notFound
|
||||
case <-chunk.Req.C:
|
||||
glog.V(logger.Detail).Infof("DPA.Get: %v retrieved, %d bytes (%p)", key.Log(), len(chunk.SData), chunk)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Put is the entrypoint for local store requests coming from storeLoop
|
||||
func (self *dpaChunkStore) Put(entry *Chunk) {
|
||||
chunk, err := self.localStore.Get(entry.Key)
|
||||
if err != nil {
|
||||
glog.V(logger.Detail).Infof("DPA.Put: %v new chunk. call netStore.Put", entry.Key.Log())
|
||||
chunk = entry
|
||||
} else if chunk.SData == nil {
|
||||
glog.V(logger.Detail).Infof("DPA.Put: %v request entry found", entry.Key.Log())
|
||||
chunk.SData = entry.SData
|
||||
chunk.Size = entry.Size
|
||||
} else {
|
||||
glog.V(logger.Detail).Infof("DPA.Put: %v chunk already known", entry.Key.Log())
|
||||
return
|
||||
}
|
||||
// from this point on the storage logic is the same with network storage requests
|
||||
glog.V(logger.Detail).Infof("DPA.Put %v: %v", self.n, chunk.Key.Log())
|
||||
self.n++
|
||||
self.netStore.Put(chunk)
|
||||
}
|
Reference in New Issue
Block a user