swarm: network rewrite merge
This commit is contained in:
@@ -20,8 +20,11 @@ import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/swarm/log"
|
||||
)
|
||||
|
||||
/*
|
||||
@@ -62,9 +65,8 @@ var (
|
||||
)
|
||||
|
||||
const (
|
||||
ChunkProcessors = 8
|
||||
DefaultBranches int64 = 128
|
||||
splitTimeout = time.Minute * 5
|
||||
ChunkProcessors = 8
|
||||
splitTimeout = time.Minute * 5
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -72,18 +74,39 @@ const (
|
||||
TreeChunk = 1
|
||||
)
|
||||
|
||||
type ChunkerParams struct {
|
||||
Branches int64
|
||||
Hash string
|
||||
type PyramidSplitterParams struct {
|
||||
SplitterParams
|
||||
getter Getter
|
||||
}
|
||||
|
||||
func NewChunkerParams() *ChunkerParams {
|
||||
return &ChunkerParams{
|
||||
Branches: DefaultBranches,
|
||||
Hash: SHA3Hash,
|
||||
func NewPyramidSplitterParams(addr Address, reader io.Reader, putter Putter, getter Getter, chunkSize int64) *PyramidSplitterParams {
|
||||
hashSize := putter.RefSize()
|
||||
return &PyramidSplitterParams{
|
||||
SplitterParams: SplitterParams{
|
||||
ChunkerParams: ChunkerParams{
|
||||
chunkSize: chunkSize,
|
||||
hashSize: hashSize,
|
||||
},
|
||||
reader: reader,
|
||||
putter: putter,
|
||||
addr: addr,
|
||||
},
|
||||
getter: getter,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes.
|
||||
New chunks to store are store using the putter which the caller provides.
|
||||
*/
|
||||
func PyramidSplit(reader io.Reader, putter Putter, getter Getter) (Address, func(), error) {
|
||||
return NewPyramidSplitter(NewPyramidSplitterParams(nil, reader, putter, getter, DefaultChunkSize)).Split()
|
||||
}
|
||||
|
||||
func PyramidAppend(addr Address, reader io.Reader, putter Putter, getter Getter) (Address, func(), error) {
|
||||
return NewPyramidSplitter(NewPyramidSplitterParams(addr, reader, putter, getter, DefaultChunkSize)).Append()
|
||||
}
|
||||
|
||||
// Entry to create a tree node
|
||||
type TreeEntry struct {
|
||||
level int
|
||||
@@ -109,264 +132,250 @@ func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry {
|
||||
|
||||
// Used by the hash processor to create a data/tree chunk and send to storage
|
||||
type chunkJob struct {
|
||||
key Key
|
||||
chunk []byte
|
||||
size int64
|
||||
parentWg *sync.WaitGroup
|
||||
chunkType int // used to identify the tree related chunks for debugging
|
||||
chunkLvl int // leaf-1 is level 0 and goes upwards until it reaches root
|
||||
key Address
|
||||
chunk []byte
|
||||
parentWg *sync.WaitGroup
|
||||
}
|
||||
|
||||
type PyramidChunker struct {
|
||||
hashFunc SwarmHasher
|
||||
chunkSize int64
|
||||
hashSize int64
|
||||
branches int64
|
||||
reader io.Reader
|
||||
putter Putter
|
||||
getter Getter
|
||||
key Address
|
||||
workerCount int64
|
||||
workerLock sync.RWMutex
|
||||
jobC chan *chunkJob
|
||||
wg *sync.WaitGroup
|
||||
errC chan error
|
||||
quitC chan bool
|
||||
rootKey []byte
|
||||
chunkLevel [][]*TreeEntry
|
||||
}
|
||||
|
||||
func NewPyramidChunker(params *ChunkerParams) (self *PyramidChunker) {
|
||||
self = &PyramidChunker{}
|
||||
self.hashFunc = MakeHashFunc(params.Hash)
|
||||
self.branches = params.Branches
|
||||
self.hashSize = int64(self.hashFunc().Size())
|
||||
self.chunkSize = self.hashSize * self.branches
|
||||
self.workerCount = 0
|
||||
func NewPyramidSplitter(params *PyramidSplitterParams) (pc *PyramidChunker) {
|
||||
pc = &PyramidChunker{}
|
||||
pc.reader = params.reader
|
||||
pc.hashSize = params.hashSize
|
||||
pc.branches = params.chunkSize / pc.hashSize
|
||||
pc.chunkSize = pc.hashSize * pc.branches
|
||||
pc.putter = params.putter
|
||||
pc.getter = params.getter
|
||||
pc.key = params.addr
|
||||
pc.workerCount = 0
|
||||
pc.jobC = make(chan *chunkJob, 2*ChunkProcessors)
|
||||
pc.wg = &sync.WaitGroup{}
|
||||
pc.errC = make(chan error)
|
||||
pc.quitC = make(chan bool)
|
||||
pc.rootKey = make([]byte, pc.hashSize)
|
||||
pc.chunkLevel = make([][]*TreeEntry, pc.branches)
|
||||
return
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader {
|
||||
func (pc *PyramidChunker) Join(addr Address, getter Getter, depth int) LazySectionReader {
|
||||
return &LazyChunkReader{
|
||||
key: key,
|
||||
chunkC: chunkC,
|
||||
chunkSize: self.chunkSize,
|
||||
branches: self.branches,
|
||||
hashSize: self.hashSize,
|
||||
key: addr,
|
||||
depth: depth,
|
||||
chunkSize: pc.chunkSize,
|
||||
branches: pc.branches,
|
||||
hashSize: pc.hashSize,
|
||||
getter: getter,
|
||||
}
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) incrementWorkerCount() {
|
||||
self.workerLock.Lock()
|
||||
defer self.workerLock.Unlock()
|
||||
self.workerCount += 1
|
||||
func (pc *PyramidChunker) incrementWorkerCount() {
|
||||
pc.workerLock.Lock()
|
||||
defer pc.workerLock.Unlock()
|
||||
pc.workerCount += 1
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) getWorkerCount() int64 {
|
||||
self.workerLock.Lock()
|
||||
defer self.workerLock.Unlock()
|
||||
return self.workerCount
|
||||
func (pc *PyramidChunker) getWorkerCount() int64 {
|
||||
pc.workerLock.Lock()
|
||||
defer pc.workerLock.Unlock()
|
||||
return pc.workerCount
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) decrementWorkerCount() {
|
||||
self.workerLock.Lock()
|
||||
defer self.workerLock.Unlock()
|
||||
self.workerCount -= 1
|
||||
func (pc *PyramidChunker) decrementWorkerCount() {
|
||||
pc.workerLock.Lock()
|
||||
defer pc.workerLock.Unlock()
|
||||
pc.workerCount -= 1
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, storageWG, processorWG *sync.WaitGroup) (Key, error) {
|
||||
jobC := make(chan *chunkJob, 2*ChunkProcessors)
|
||||
wg := &sync.WaitGroup{}
|
||||
errC := make(chan error)
|
||||
quitC := make(chan bool)
|
||||
rootKey := make([]byte, self.hashSize)
|
||||
chunkLevel := make([][]*TreeEntry, self.branches)
|
||||
func (pc *PyramidChunker) Split() (k Address, wait func(), err error) {
|
||||
log.Debug("pyramid.chunker: Split()")
|
||||
|
||||
wg.Add(1)
|
||||
go self.prepareChunks(false, chunkLevel, data, rootKey, quitC, wg, jobC, processorWG, chunkC, errC, storageWG)
|
||||
pc.wg.Add(1)
|
||||
pc.prepareChunks(false)
|
||||
|
||||
// closes internal error channel if all subprocesses in the workgroup finished
|
||||
go func() {
|
||||
|
||||
// waiting for all chunks to finish
|
||||
wg.Wait()
|
||||
pc.wg.Wait()
|
||||
|
||||
// if storage waitgroup is non-nil, we wait for storage to finish too
|
||||
if storageWG != nil {
|
||||
storageWG.Wait()
|
||||
}
|
||||
//We close errC here because this is passed down to 8 parallel routines underneath.
|
||||
// if a error happens in one of them.. that particular routine raises error...
|
||||
// once they all complete successfully, the control comes back and we can safely close this here.
|
||||
close(errC)
|
||||
close(pc.errC)
|
||||
}()
|
||||
|
||||
defer close(quitC)
|
||||
defer close(pc.quitC)
|
||||
defer pc.putter.Close()
|
||||
|
||||
select {
|
||||
case err := <-errC:
|
||||
case err := <-pc.errC:
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
case <-time.NewTimer(splitTimeout).C:
|
||||
}
|
||||
return rootKey, nil
|
||||
return pc.rootKey, pc.putter.Wait, nil
|
||||
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, storageWG, processorWG *sync.WaitGroup) (Key, error) {
|
||||
quitC := make(chan bool)
|
||||
rootKey := make([]byte, self.hashSize)
|
||||
chunkLevel := make([][]*TreeEntry, self.branches)
|
||||
|
||||
func (pc *PyramidChunker) Append() (k Address, wait func(), err error) {
|
||||
log.Debug("pyramid.chunker: Append()")
|
||||
// Load the right most unfinished tree chunks in every level
|
||||
self.loadTree(chunkLevel, key, chunkC, quitC)
|
||||
pc.loadTree()
|
||||
|
||||
jobC := make(chan *chunkJob, 2*ChunkProcessors)
|
||||
wg := &sync.WaitGroup{}
|
||||
errC := make(chan error)
|
||||
|
||||
wg.Add(1)
|
||||
go self.prepareChunks(true, chunkLevel, data, rootKey, quitC, wg, jobC, processorWG, chunkC, errC, storageWG)
|
||||
pc.wg.Add(1)
|
||||
pc.prepareChunks(true)
|
||||
|
||||
// closes internal error channel if all subprocesses in the workgroup finished
|
||||
go func() {
|
||||
|
||||
// waiting for all chunks to finish
|
||||
wg.Wait()
|
||||
pc.wg.Wait()
|
||||
|
||||
// if storage waitgroup is non-nil, we wait for storage to finish too
|
||||
if storageWG != nil {
|
||||
storageWG.Wait()
|
||||
}
|
||||
close(errC)
|
||||
close(pc.errC)
|
||||
}()
|
||||
|
||||
defer close(quitC)
|
||||
defer close(pc.quitC)
|
||||
defer pc.putter.Close()
|
||||
|
||||
select {
|
||||
case err := <-errC:
|
||||
case err := <-pc.errC:
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
case <-time.NewTimer(splitTimeout).C:
|
||||
}
|
||||
return rootKey, nil
|
||||
|
||||
return pc.rootKey, pc.putter.Wait, nil
|
||||
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) processor(id int64, jobC chan *chunkJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) {
|
||||
defer self.decrementWorkerCount()
|
||||
|
||||
hasher := self.hashFunc()
|
||||
if wwg != nil {
|
||||
defer wwg.Done()
|
||||
}
|
||||
func (pc *PyramidChunker) processor(id int64) {
|
||||
defer pc.decrementWorkerCount()
|
||||
for {
|
||||
select {
|
||||
|
||||
case job, ok := <-jobC:
|
||||
case job, ok := <-pc.jobC:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
self.processChunk(id, hasher, job, chunkC, swg)
|
||||
case <-quitC:
|
||||
pc.processChunk(id, job)
|
||||
case <-pc.quitC:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) processChunk(id int64, hasher SwarmHash, job *chunkJob, chunkC chan *Chunk, swg *sync.WaitGroup) {
|
||||
hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length
|
||||
hasher.Write(job.chunk[8:]) // minus 8 []byte length
|
||||
h := hasher.Sum(nil)
|
||||
func (pc *PyramidChunker) processChunk(id int64, job *chunkJob) {
|
||||
log.Debug("pyramid.chunker: processChunk()", "id", id)
|
||||
|
||||
newChunk := &Chunk{
|
||||
Key: h,
|
||||
SData: job.chunk,
|
||||
Size: job.size,
|
||||
wg: swg,
|
||||
ref, err := pc.putter.Put(job.chunk)
|
||||
if err != nil {
|
||||
pc.errC <- err
|
||||
}
|
||||
|
||||
// report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk)
|
||||
copy(job.key, h)
|
||||
copy(job.key, ref)
|
||||
|
||||
// send off new chunk to storage
|
||||
if chunkC != nil {
|
||||
if swg != nil {
|
||||
swg.Add(1)
|
||||
}
|
||||
}
|
||||
job.parentWg.Done()
|
||||
|
||||
if chunkC != nil {
|
||||
chunkC <- newChunk
|
||||
}
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) loadTree(chunkLevel [][]*TreeEntry, key Key, chunkC chan *Chunk, quitC chan bool) error {
|
||||
func (pc *PyramidChunker) loadTree() error {
|
||||
log.Debug("pyramid.chunker: loadTree()")
|
||||
// Get the root chunk to get the total size
|
||||
chunk := retrieve(key, chunkC, quitC)
|
||||
if chunk == nil {
|
||||
chunkData, err := pc.getter.Get(Reference(pc.key))
|
||||
if err != nil {
|
||||
return errLoadingTreeRootChunk
|
||||
}
|
||||
chunkSize := chunkData.Size()
|
||||
log.Trace("pyramid.chunker: root chunk", "chunk.Size", chunkSize, "pc.chunkSize", pc.chunkSize)
|
||||
|
||||
//if data size is less than a chunk... add a parent with update as pending
|
||||
if chunk.Size <= self.chunkSize {
|
||||
if chunkSize <= pc.chunkSize {
|
||||
newEntry := &TreeEntry{
|
||||
level: 0,
|
||||
branchCount: 1,
|
||||
subtreeSize: uint64(chunk.Size),
|
||||
chunk: make([]byte, self.chunkSize+8),
|
||||
key: make([]byte, self.hashSize),
|
||||
subtreeSize: uint64(chunkSize),
|
||||
chunk: make([]byte, pc.chunkSize+8),
|
||||
key: make([]byte, pc.hashSize),
|
||||
index: 0,
|
||||
updatePending: true,
|
||||
}
|
||||
copy(newEntry.chunk[8:], chunk.Key)
|
||||
chunkLevel[0] = append(chunkLevel[0], newEntry)
|
||||
copy(newEntry.chunk[8:], pc.key)
|
||||
pc.chunkLevel[0] = append(pc.chunkLevel[0], newEntry)
|
||||
return nil
|
||||
}
|
||||
|
||||
var treeSize int64
|
||||
var depth int
|
||||
treeSize = self.chunkSize
|
||||
for ; treeSize < chunk.Size; treeSize *= self.branches {
|
||||
treeSize = pc.chunkSize
|
||||
for ; treeSize < chunkSize; treeSize *= pc.branches {
|
||||
depth++
|
||||
}
|
||||
log.Trace("pyramid.chunker", "depth", depth)
|
||||
|
||||
// Add the root chunk entry
|
||||
branchCount := int64(len(chunk.SData)-8) / self.hashSize
|
||||
branchCount := int64(len(chunkData)-8) / pc.hashSize
|
||||
newEntry := &TreeEntry{
|
||||
level: depth - 1,
|
||||
branchCount: branchCount,
|
||||
subtreeSize: uint64(chunk.Size),
|
||||
chunk: chunk.SData,
|
||||
key: key,
|
||||
subtreeSize: uint64(chunkSize),
|
||||
chunk: chunkData,
|
||||
key: pc.key,
|
||||
index: 0,
|
||||
updatePending: true,
|
||||
}
|
||||
chunkLevel[depth-1] = append(chunkLevel[depth-1], newEntry)
|
||||
pc.chunkLevel[depth-1] = append(pc.chunkLevel[depth-1], newEntry)
|
||||
|
||||
// Add the rest of the tree
|
||||
for lvl := depth - 1; lvl >= 1; lvl-- {
|
||||
|
||||
//TODO(jmozah): instead of loading finished branches and then trim in the end,
|
||||
//avoid loading them in the first place
|
||||
for _, ent := range chunkLevel[lvl] {
|
||||
branchCount = int64(len(ent.chunk)-8) / self.hashSize
|
||||
for _, ent := range pc.chunkLevel[lvl] {
|
||||
branchCount = int64(len(ent.chunk)-8) / pc.hashSize
|
||||
for i := int64(0); i < branchCount; i++ {
|
||||
key := ent.chunk[8+(i*self.hashSize) : 8+((i+1)*self.hashSize)]
|
||||
newChunk := retrieve(key, chunkC, quitC)
|
||||
if newChunk == nil {
|
||||
key := ent.chunk[8+(i*pc.hashSize) : 8+((i+1)*pc.hashSize)]
|
||||
newChunkData, err := pc.getter.Get(Reference(key))
|
||||
if err != nil {
|
||||
return errLoadingTreeChunk
|
||||
}
|
||||
bewBranchCount := int64(len(newChunk.SData)-8) / self.hashSize
|
||||
newChunkSize := newChunkData.Size()
|
||||
bewBranchCount := int64(len(newChunkData)-8) / pc.hashSize
|
||||
newEntry := &TreeEntry{
|
||||
level: lvl - 1,
|
||||
branchCount: bewBranchCount,
|
||||
subtreeSize: uint64(newChunk.Size),
|
||||
chunk: newChunk.SData,
|
||||
subtreeSize: uint64(newChunkSize),
|
||||
chunk: newChunkData,
|
||||
key: key,
|
||||
index: 0,
|
||||
updatePending: true,
|
||||
}
|
||||
chunkLevel[lvl-1] = append(chunkLevel[lvl-1], newEntry)
|
||||
pc.chunkLevel[lvl-1] = append(pc.chunkLevel[lvl-1], newEntry)
|
||||
|
||||
}
|
||||
|
||||
// We need to get only the right most unfinished branch.. so trim all finished branches
|
||||
if int64(len(chunkLevel[lvl-1])) >= self.branches {
|
||||
chunkLevel[lvl-1] = nil
|
||||
if int64(len(pc.chunkLevel[lvl-1])) >= pc.branches {
|
||||
pc.chunkLevel[lvl-1] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -374,29 +383,25 @@ func (self *PyramidChunker) loadTree(chunkLevel [][]*TreeEntry, key Key, chunkC
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) prepareChunks(isAppend bool, chunkLevel [][]*TreeEntry, data io.Reader, rootKey []byte, quitC chan bool, wg *sync.WaitGroup, jobC chan *chunkJob, processorWG *sync.WaitGroup, chunkC chan *Chunk, errC chan error, storageWG *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
func (pc *PyramidChunker) prepareChunks(isAppend bool) {
|
||||
log.Debug("pyramid.chunker: prepareChunks", "isAppend", isAppend)
|
||||
defer pc.wg.Done()
|
||||
|
||||
chunkWG := &sync.WaitGroup{}
|
||||
totalDataSize := 0
|
||||
|
||||
// processorWG keeps track of workers spawned for hashing chunks
|
||||
if processorWG != nil {
|
||||
processorWG.Add(1)
|
||||
}
|
||||
pc.incrementWorkerCount()
|
||||
|
||||
self.incrementWorkerCount()
|
||||
go self.processor(self.workerCount, jobC, chunkC, errC, quitC, storageWG, processorWG)
|
||||
go pc.processor(pc.workerCount)
|
||||
|
||||
parent := NewTreeEntry(self)
|
||||
var unFinishedChunk *Chunk
|
||||
parent := NewTreeEntry(pc)
|
||||
var unfinishedChunkData ChunkData
|
||||
var unfinishedChunkSize int64
|
||||
|
||||
if isAppend && len(chunkLevel[0]) != 0 {
|
||||
if isAppend && len(pc.chunkLevel[0]) != 0 {
|
||||
lastIndex := len(pc.chunkLevel[0]) - 1
|
||||
ent := pc.chunkLevel[0][lastIndex]
|
||||
|
||||
lastIndex := len(chunkLevel[0]) - 1
|
||||
ent := chunkLevel[0][lastIndex]
|
||||
|
||||
if ent.branchCount < self.branches {
|
||||
if ent.branchCount < pc.branches {
|
||||
parent = &TreeEntry{
|
||||
level: 0,
|
||||
branchCount: ent.branchCount,
|
||||
@@ -408,104 +413,132 @@ func (self *PyramidChunker) prepareChunks(isAppend bool, chunkLevel [][]*TreeEnt
|
||||
}
|
||||
|
||||
lastBranch := parent.branchCount - 1
|
||||
lastKey := parent.chunk[8+lastBranch*self.hashSize : 8+(lastBranch+1)*self.hashSize]
|
||||
lastKey := parent.chunk[8+lastBranch*pc.hashSize : 8+(lastBranch+1)*pc.hashSize]
|
||||
|
||||
unFinishedChunk = retrieve(lastKey, chunkC, quitC)
|
||||
if unFinishedChunk.Size < self.chunkSize {
|
||||
|
||||
parent.subtreeSize = parent.subtreeSize - uint64(unFinishedChunk.Size)
|
||||
var err error
|
||||
unfinishedChunkData, err = pc.getter.Get(lastKey)
|
||||
if err != nil {
|
||||
pc.errC <- err
|
||||
}
|
||||
unfinishedChunkSize = unfinishedChunkData.Size()
|
||||
if unfinishedChunkSize < pc.chunkSize {
|
||||
parent.subtreeSize = parent.subtreeSize - uint64(unfinishedChunkSize)
|
||||
parent.branchCount = parent.branchCount - 1
|
||||
} else {
|
||||
unFinishedChunk = nil
|
||||
unfinishedChunkData = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for index := 0; ; index++ {
|
||||
|
||||
var n int
|
||||
var err error
|
||||
chunkData := make([]byte, self.chunkSize+8)
|
||||
if unFinishedChunk != nil {
|
||||
copy(chunkData, unFinishedChunk.SData)
|
||||
n, err = data.Read(chunkData[8+unFinishedChunk.Size:])
|
||||
n += int(unFinishedChunk.Size)
|
||||
unFinishedChunk = nil
|
||||
} else {
|
||||
n, err = data.Read(chunkData[8:])
|
||||
chunkData := make([]byte, pc.chunkSize+8)
|
||||
|
||||
var readBytes int
|
||||
|
||||
if unfinishedChunkData != nil {
|
||||
copy(chunkData, unfinishedChunkData)
|
||||
readBytes += int(unfinishedChunkSize)
|
||||
unfinishedChunkData = nil
|
||||
log.Trace("pyramid.chunker: found unfinished chunk", "readBytes", readBytes)
|
||||
}
|
||||
|
||||
totalDataSize += n
|
||||
var res []byte
|
||||
res, err = ioutil.ReadAll(io.LimitReader(pc.reader, int64(len(chunkData)-(8+readBytes))))
|
||||
|
||||
// hack for ioutil.ReadAll:
|
||||
// a successful call to ioutil.ReadAll returns err == nil, not err == EOF, whereas we
|
||||
// want to propagate the io.EOF error
|
||||
if len(res) == 0 && err == nil {
|
||||
err = io.EOF
|
||||
}
|
||||
copy(chunkData[8+readBytes:], res)
|
||||
|
||||
readBytes += len(res)
|
||||
log.Trace("pyramid.chunker: copied all data", "readBytes", readBytes)
|
||||
|
||||
if err != nil {
|
||||
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
||||
if parent.branchCount == 1 {
|
||||
|
||||
pc.cleanChunkLevels()
|
||||
|
||||
// Check if we are appending or the chunk is the only one.
|
||||
if parent.branchCount == 1 && (pc.depth() == 0 || isAppend) {
|
||||
// Data is exactly one chunk.. pick the last chunk key as root
|
||||
chunkWG.Wait()
|
||||
lastChunksKey := parent.chunk[8 : 8+self.hashSize]
|
||||
copy(rootKey, lastChunksKey)
|
||||
lastChunksKey := parent.chunk[8 : 8+pc.hashSize]
|
||||
copy(pc.rootKey, lastChunksKey)
|
||||
break
|
||||
}
|
||||
} else {
|
||||
close(quitC)
|
||||
close(pc.quitC)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Data ended in chunk boundary.. just signal to start bulding tree
|
||||
if n == 0 {
|
||||
self.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, true, rootKey)
|
||||
if readBytes == 0 {
|
||||
pc.buildTree(isAppend, parent, chunkWG, true, nil)
|
||||
break
|
||||
} else {
|
||||
|
||||
pkey := self.enqueueDataChunk(chunkData, uint64(n), parent, chunkWG, jobC, quitC)
|
||||
pkey := pc.enqueueDataChunk(chunkData, uint64(readBytes), parent, chunkWG)
|
||||
|
||||
// update tree related parent data structures
|
||||
parent.subtreeSize += uint64(n)
|
||||
parent.subtreeSize += uint64(readBytes)
|
||||
parent.branchCount++
|
||||
|
||||
// Data got exhausted... signal to send any parent tree related chunks
|
||||
if int64(n) < self.chunkSize {
|
||||
if int64(readBytes) < pc.chunkSize {
|
||||
|
||||
pc.cleanChunkLevels()
|
||||
|
||||
// only one data chunk .. so dont add any parent chunk
|
||||
if parent.branchCount <= 1 {
|
||||
chunkWG.Wait()
|
||||
copy(rootKey, pkey)
|
||||
|
||||
if isAppend || pc.depth() == 0 {
|
||||
// No need to build the tree if the depth is 0
|
||||
// or we are appending.
|
||||
// Just use the last key.
|
||||
copy(pc.rootKey, pkey)
|
||||
} else {
|
||||
// We need to build the tree and and provide the lonely
|
||||
// chunk key to replace the last tree chunk key.
|
||||
pc.buildTree(isAppend, parent, chunkWG, true, pkey)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
self.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, true, rootKey)
|
||||
pc.buildTree(isAppend, parent, chunkWG, true, nil)
|
||||
break
|
||||
}
|
||||
|
||||
if parent.branchCount == self.branches {
|
||||
self.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, false, rootKey)
|
||||
parent = NewTreeEntry(self)
|
||||
if parent.branchCount == pc.branches {
|
||||
pc.buildTree(isAppend, parent, chunkWG, false, nil)
|
||||
parent = NewTreeEntry(pc)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
workers := self.getWorkerCount()
|
||||
if int64(len(jobC)) > workers && workers < ChunkProcessors {
|
||||
if processorWG != nil {
|
||||
processorWG.Add(1)
|
||||
}
|
||||
self.incrementWorkerCount()
|
||||
go self.processor(self.workerCount, jobC, chunkC, errC, quitC, storageWG, processorWG)
|
||||
workers := pc.getWorkerCount()
|
||||
if int64(len(pc.jobC)) > workers && workers < ChunkProcessors {
|
||||
pc.incrementWorkerCount()
|
||||
go pc.processor(pc.workerCount)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) buildTree(isAppend bool, chunkLevel [][]*TreeEntry, ent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool, last bool, rootKey []byte) {
|
||||
func (pc *PyramidChunker) buildTree(isAppend bool, ent *TreeEntry, chunkWG *sync.WaitGroup, last bool, lonelyChunkKey []byte) {
|
||||
chunkWG.Wait()
|
||||
self.enqueueTreeChunk(chunkLevel, ent, chunkWG, jobC, quitC, last)
|
||||
pc.enqueueTreeChunk(ent, chunkWG, last)
|
||||
|
||||
compress := false
|
||||
endLvl := self.branches
|
||||
for lvl := int64(0); lvl < self.branches; lvl++ {
|
||||
lvlCount := int64(len(chunkLevel[lvl]))
|
||||
if lvlCount >= self.branches {
|
||||
endLvl := pc.branches
|
||||
for lvl := int64(0); lvl < pc.branches; lvl++ {
|
||||
lvlCount := int64(len(pc.chunkLevel[lvl]))
|
||||
if lvlCount >= pc.branches {
|
||||
endLvl = lvl + 1
|
||||
compress = true
|
||||
break
|
||||
@@ -521,42 +554,42 @@ func (self *PyramidChunker) buildTree(isAppend bool, chunkLevel [][]*TreeEntry,
|
||||
|
||||
for lvl := int64(ent.level); lvl < endLvl; lvl++ {
|
||||
|
||||
lvlCount := int64(len(chunkLevel[lvl]))
|
||||
lvlCount := int64(len(pc.chunkLevel[lvl]))
|
||||
if lvlCount == 1 && last {
|
||||
copy(rootKey, chunkLevel[lvl][0].key)
|
||||
copy(pc.rootKey, pc.chunkLevel[lvl][0].key)
|
||||
return
|
||||
}
|
||||
|
||||
for startCount := int64(0); startCount < lvlCount; startCount += self.branches {
|
||||
for startCount := int64(0); startCount < lvlCount; startCount += pc.branches {
|
||||
|
||||
endCount := startCount + self.branches
|
||||
endCount := startCount + pc.branches
|
||||
if endCount > lvlCount {
|
||||
endCount = lvlCount
|
||||
}
|
||||
|
||||
var nextLvlCount int64
|
||||
var tempEntry *TreeEntry
|
||||
if len(chunkLevel[lvl+1]) > 0 {
|
||||
nextLvlCount = int64(len(chunkLevel[lvl+1]) - 1)
|
||||
tempEntry = chunkLevel[lvl+1][nextLvlCount]
|
||||
if len(pc.chunkLevel[lvl+1]) > 0 {
|
||||
nextLvlCount = int64(len(pc.chunkLevel[lvl+1]) - 1)
|
||||
tempEntry = pc.chunkLevel[lvl+1][nextLvlCount]
|
||||
}
|
||||
if isAppend && tempEntry != nil && tempEntry.updatePending {
|
||||
updateEntry := &TreeEntry{
|
||||
level: int(lvl + 1),
|
||||
branchCount: 0,
|
||||
subtreeSize: 0,
|
||||
chunk: make([]byte, self.chunkSize+8),
|
||||
key: make([]byte, self.hashSize),
|
||||
chunk: make([]byte, pc.chunkSize+8),
|
||||
key: make([]byte, pc.hashSize),
|
||||
index: int(nextLvlCount),
|
||||
updatePending: true,
|
||||
}
|
||||
for index := int64(0); index < lvlCount; index++ {
|
||||
updateEntry.branchCount++
|
||||
updateEntry.subtreeSize += chunkLevel[lvl][index].subtreeSize
|
||||
copy(updateEntry.chunk[8+(index*self.hashSize):8+((index+1)*self.hashSize)], chunkLevel[lvl][index].key[:self.hashSize])
|
||||
updateEntry.subtreeSize += pc.chunkLevel[lvl][index].subtreeSize
|
||||
copy(updateEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], pc.chunkLevel[lvl][index].key[:pc.hashSize])
|
||||
}
|
||||
|
||||
self.enqueueTreeChunk(chunkLevel, updateEntry, chunkWG, jobC, quitC, last)
|
||||
pc.enqueueTreeChunk(updateEntry, chunkWG, last)
|
||||
|
||||
} else {
|
||||
|
||||
@@ -565,21 +598,27 @@ func (self *PyramidChunker) buildTree(isAppend bool, chunkLevel [][]*TreeEntry,
|
||||
level: int(lvl + 1),
|
||||
branchCount: noOfBranches,
|
||||
subtreeSize: 0,
|
||||
chunk: make([]byte, (noOfBranches*self.hashSize)+8),
|
||||
key: make([]byte, self.hashSize),
|
||||
chunk: make([]byte, (noOfBranches*pc.hashSize)+8),
|
||||
key: make([]byte, pc.hashSize),
|
||||
index: int(nextLvlCount),
|
||||
updatePending: false,
|
||||
}
|
||||
|
||||
index := int64(0)
|
||||
for i := startCount; i < endCount; i++ {
|
||||
entry := chunkLevel[lvl][i]
|
||||
entry := pc.chunkLevel[lvl][i]
|
||||
newEntry.subtreeSize += entry.subtreeSize
|
||||
copy(newEntry.chunk[8+(index*self.hashSize):8+((index+1)*self.hashSize)], entry.key[:self.hashSize])
|
||||
copy(newEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], entry.key[:pc.hashSize])
|
||||
index++
|
||||
}
|
||||
// Lonely chunk key is the key of the last chunk that is only one on the last branch.
|
||||
// In this case, ignore the its tree chunk key and replace it with the lonely chunk key.
|
||||
if lonelyChunkKey != nil {
|
||||
// Overwrite the last tree chunk key with the lonely data chunk key.
|
||||
copy(newEntry.chunk[int64(len(newEntry.chunk))-pc.hashSize:], lonelyChunkKey[:pc.hashSize])
|
||||
}
|
||||
|
||||
self.enqueueTreeChunk(chunkLevel, newEntry, chunkWG, jobC, quitC, last)
|
||||
pc.enqueueTreeChunk(newEntry, chunkWG, last)
|
||||
|
||||
}
|
||||
|
||||
@@ -588,15 +627,15 @@ func (self *PyramidChunker) buildTree(isAppend bool, chunkLevel [][]*TreeEntry,
|
||||
if !isAppend {
|
||||
chunkWG.Wait()
|
||||
if compress {
|
||||
chunkLevel[lvl] = nil
|
||||
pc.chunkLevel[lvl] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) enqueueTreeChunk(chunkLevel [][]*TreeEntry, ent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool, last bool) {
|
||||
if ent != nil {
|
||||
func (pc *PyramidChunker) enqueueTreeChunk(ent *TreeEntry, chunkWG *sync.WaitGroup, last bool) {
|
||||
if ent != nil && ent.branchCount > 0 {
|
||||
|
||||
// wait for data chunks to get over before processing the tree chunk
|
||||
if last {
|
||||
@@ -604,34 +643,57 @@ func (self *PyramidChunker) enqueueTreeChunk(chunkLevel [][]*TreeEntry, ent *Tre
|
||||
}
|
||||
|
||||
binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize)
|
||||
ent.key = make([]byte, self.hashSize)
|
||||
ent.key = make([]byte, pc.hashSize)
|
||||
chunkWG.Add(1)
|
||||
select {
|
||||
case jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*self.hashSize+8], int64(ent.subtreeSize), chunkWG, TreeChunk, 0}:
|
||||
case <-quitC:
|
||||
case pc.jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*pc.hashSize+8], chunkWG}:
|
||||
case <-pc.quitC:
|
||||
}
|
||||
|
||||
// Update or append based on weather it is a new entry or being reused
|
||||
if ent.updatePending {
|
||||
chunkWG.Wait()
|
||||
chunkLevel[ent.level][ent.index] = ent
|
||||
pc.chunkLevel[ent.level][ent.index] = ent
|
||||
} else {
|
||||
chunkLevel[ent.level] = append(chunkLevel[ent.level], ent)
|
||||
pc.chunkLevel[ent.level] = append(pc.chunkLevel[ent.level], ent)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func (self *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool) Key {
|
||||
func (pc *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup) Address {
|
||||
binary.LittleEndian.PutUint64(chunkData[:8], size)
|
||||
pkey := parent.chunk[8+parent.branchCount*self.hashSize : 8+(parent.branchCount+1)*self.hashSize]
|
||||
pkey := parent.chunk[8+parent.branchCount*pc.hashSize : 8+(parent.branchCount+1)*pc.hashSize]
|
||||
|
||||
chunkWG.Add(1)
|
||||
select {
|
||||
case jobC <- &chunkJob{pkey, chunkData[:size+8], int64(size), chunkWG, DataChunk, -1}:
|
||||
case <-quitC:
|
||||
case pc.jobC <- &chunkJob{pkey, chunkData[:size+8], chunkWG}:
|
||||
case <-pc.quitC:
|
||||
}
|
||||
|
||||
return pkey
|
||||
|
||||
}
|
||||
|
||||
// depth returns the number of chunk levels.
|
||||
// It is used to detect if there is only one data chunk
|
||||
// left for the last branch.
|
||||
func (pc *PyramidChunker) depth() (d int) {
|
||||
for _, l := range pc.chunkLevel {
|
||||
if l == nil {
|
||||
return
|
||||
}
|
||||
d++
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// cleanChunkLevels removes gaps (nil levels) between chunk levels
|
||||
// that are not nil.
|
||||
func (pc *PyramidChunker) cleanChunkLevels() {
|
||||
for i, l := range pc.chunkLevel {
|
||||
if l == nil {
|
||||
pc.chunkLevel = append(pc.chunkLevel[:i], append(pc.chunkLevel[i+1:], nil)...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user