* brokenLimitedReader gives error after half size * TestRandomBrokenData tests chunker with broken reader * add blocking quitC (instead of errC) and use errC only for errors * don't close chunkC in tester Split, * use quitC to quit chunk storage loop
		
			
				
	
	
		
			202 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			202 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2016 The go-ethereum Authors
 | |
| // This file is part of the go-ethereum library.
 | |
| //
 | |
| // The go-ethereum library is free software: you can redistribute it and/or modify
 | |
| // it under the terms of the GNU Lesser General Public License as published by
 | |
| // the Free Software Foundation, either version 3 of the License, or
 | |
| // (at your option) any later version.
 | |
| //
 | |
| // The go-ethereum library is distributed in the hope that it will be useful,
 | |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 | |
| // GNU Lesser General Public License for more details.
 | |
| //
 | |
| // You should have received a copy of the GNU Lesser General Public License
 | |
| // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 | |
| 
 | |
| package storage
 | |
| 
 | |
| import (
 | |
| 	"encoding/binary"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"math"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 
 | |
| 	"github.com/ethereum/go-ethereum/common"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	processors = 8
 | |
| )
 | |
| 
 | |
| type Tree struct {
 | |
| 	Chunks int64
 | |
| 	Levels []map[int64]*Node
 | |
| 	Lock   sync.RWMutex
 | |
| }
 | |
| 
 | |
| type Node struct {
 | |
| 	Pending  int64
 | |
| 	Size     uint64
 | |
| 	Children []common.Hash
 | |
| 	Last     bool
 | |
| }
 | |
| 
 | |
| func (self *Node) String() string {
 | |
| 	var children []string
 | |
| 	for _, node := range self.Children {
 | |
| 		children = append(children, node.Hex())
 | |
| 	}
 | |
| 	return fmt.Sprintf("pending: %v, size: %v, last :%v, children: %v", self.Pending, self.Size, self.Last, strings.Join(children, ", "))
 | |
| }
 | |
| 
 | |
| type Task struct {
 | |
| 	Index int64 // Index of the chunk being processed
 | |
| 	Size  uint64
 | |
| 	Data  []byte // Binary blob of the chunk
 | |
| 	Last  bool
 | |
| }
 | |
| 
 | |
| type PyramidChunker struct {
 | |
| 	hashFunc    Hasher
 | |
| 	chunkSize   int64
 | |
| 	hashSize    int64
 | |
| 	branches    int64
 | |
| 	workerCount int
 | |
| }
 | |
| 
 | |
| func NewPyramidChunker(params *ChunkerParams) (self *PyramidChunker) {
 | |
| 	self = &PyramidChunker{}
 | |
| 	self.hashFunc = MakeHashFunc(params.Hash)
 | |
| 	self.branches = params.Branches
 | |
| 	self.hashSize = int64(self.hashFunc().Size())
 | |
| 	self.chunkSize = self.hashSize * self.branches
 | |
| 	self.workerCount = 1
 | |
| 	return
 | |
| }
 | |
| 
 | |
| func (self *PyramidChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
 | |
| 
 | |
| 	chunks := (size + self.chunkSize - 1) / self.chunkSize
 | |
| 	depth := int(math.Ceil(math.Log(float64(chunks))/math.Log(float64(self.branches)))) + 1
 | |
| 
 | |
| 	results := Tree{
 | |
| 		Chunks: chunks,
 | |
| 		Levels: make([]map[int64]*Node, depth),
 | |
| 	}
 | |
| 	for i := 0; i < depth; i++ {
 | |
| 		results.Levels[i] = make(map[int64]*Node)
 | |
| 	}
 | |
| 	// Create a pool of workers to crunch through the file
 | |
| 	tasks := make(chan *Task, 2*processors)
 | |
| 	pend := new(sync.WaitGroup)
 | |
| 	abortC := make(chan bool)
 | |
| 	for i := 0; i < processors; i++ {
 | |
| 		pend.Add(1)
 | |
| 		go self.processor(pend, swg, tasks, chunkC, &results)
 | |
| 	}
 | |
| 	// Feed the chunks into the task pool
 | |
| 	read := 0
 | |
| 	for index := 0; ; index++ {
 | |
| 		buffer := make([]byte, self.chunkSize+8)
 | |
| 		n, err := data.Read(buffer[8:])
 | |
| 		read += n
 | |
| 		last := int64(read) == size || err == io.ErrUnexpectedEOF || err == io.EOF
 | |
| 		if err != nil && !last {
 | |
| 			close(abortC)
 | |
| 			break
 | |
| 		}
 | |
| 		binary.LittleEndian.PutUint64(buffer[:8], uint64(n))
 | |
| 		pend.Add(1)
 | |
| 		select {
 | |
| 		case tasks <- &Task{Index: int64(index), Size: uint64(n), Data: buffer[:n+8], Last: last}:
 | |
| 		case <-abortC:
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		if last {
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 	// Wait for the workers and return
 | |
| 	close(tasks)
 | |
| 	pend.Wait()
 | |
| 
 | |
| 	key := results.Levels[0][0].Children[0][:]
 | |
| 	return key, nil
 | |
| }
 | |
| 
 | |
| func (self *PyramidChunker) processor(pend, swg *sync.WaitGroup, tasks chan *Task, chunkC chan *Chunk, results *Tree) {
 | |
| 	defer pend.Done()
 | |
| 
 | |
| 	// Start processing leaf chunks ad infinitum
 | |
| 	hasher := self.hashFunc()
 | |
| 	for task := range tasks {
 | |
| 		depth, pow := len(results.Levels)-1, self.branches
 | |
| 		size := task.Size
 | |
| 		data := task.Data
 | |
| 		var node *Node
 | |
| 		for depth >= 0 {
 | |
| 			// New chunk received, reset the hasher and start processing
 | |
| 			hasher.Reset()
 | |
| 			if node == nil { // Leaf node, hash the data chunk
 | |
| 				hasher.Write(task.Data)
 | |
| 			} else { // Internal node, hash the children
 | |
| 				size = node.Size
 | |
| 				data = make([]byte, hasher.Size()*len(node.Children)+8)
 | |
| 				binary.LittleEndian.PutUint64(data[:8], size)
 | |
| 
 | |
| 				hasher.Write(data[:8])
 | |
| 				for i, hash := range node.Children {
 | |
| 					copy(data[i*hasher.Size()+8:], hash[:])
 | |
| 					hasher.Write(hash[:])
 | |
| 				}
 | |
| 			}
 | |
| 			hash := hasher.Sum(nil)
 | |
| 			last := task.Last || (node != nil) && node.Last
 | |
| 			// Insert the subresult into the memoization tree
 | |
| 			results.Lock.Lock()
 | |
| 			if node = results.Levels[depth][task.Index/pow]; node == nil {
 | |
| 				// Figure out the pending tasks
 | |
| 				pending := self.branches
 | |
| 				if task.Index/pow == results.Chunks/pow {
 | |
| 					pending = (results.Chunks + pow/self.branches - 1) / (pow / self.branches) % self.branches
 | |
| 				}
 | |
| 				node = &Node{pending, 0, make([]common.Hash, pending), last}
 | |
| 				results.Levels[depth][task.Index/pow] = node
 | |
| 			}
 | |
| 			node.Pending--
 | |
| 			i := task.Index / (pow / self.branches) % self.branches
 | |
| 			if last {
 | |
| 				node.Last = true
 | |
| 			}
 | |
| 			copy(node.Children[i][:], hash)
 | |
| 			node.Size += size
 | |
| 			left := node.Pending
 | |
| 			if chunkC != nil {
 | |
| 				if swg != nil {
 | |
| 					swg.Add(1)
 | |
| 				}
 | |
| 				select {
 | |
| 				case chunkC <- &Chunk{Key: hash, SData: data, wg: swg}:
 | |
| 					// case <- self.quitC
 | |
| 				}
 | |
| 			}
 | |
| 			if depth+1 < len(results.Levels) {
 | |
| 				delete(results.Levels[depth+1], task.Index/(pow/self.branches))
 | |
| 			}
 | |
| 
 | |
| 			results.Lock.Unlock()
 | |
| 			// If there's more work to be done, leave for others
 | |
| 			if left > 0 {
 | |
| 				break
 | |
| 			}
 | |
| 			// We're the last ones in this batch, merge the children together
 | |
| 			depth--
 | |
| 			pow *= self.branches
 | |
| 		}
 | |
| 		pend.Done()
 | |
| 	}
 | |
| }
 |