| 
									
										
										
										
											2016-08-29 21:18:00 +02:00
										 |  |  | // Copyright 2016 The go-ethereum Authors | 
					
						
							|  |  |  | // This file is part of the go-ethereum library. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // The go-ethereum library is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | // it under the terms of the GNU Lesser General Public License as published by | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | // (at your option) any later version. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // The go-ethereum library is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
					
						
							|  |  |  | // GNU Lesser General Public License for more details. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // You should have received a copy of the GNU Lesser General Public License | 
					
						
							|  |  |  | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package storage | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"encoding/binary" | 
					
						
							|  |  |  | 	"fmt" | 
					
						
							|  |  |  | 	"io" | 
					
						
							|  |  |  | 	"math" | 
					
						
							|  |  |  | 	"strings" | 
					
						
							|  |  |  | 	"sync" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/ethereum/go-ethereum/common" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							|  |  |  | 	processors = 8 | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type Tree struct { | 
					
						
							|  |  |  | 	Chunks int64 | 
					
						
							|  |  |  | 	Levels []map[int64]*Node | 
					
						
							|  |  |  | 	Lock   sync.RWMutex | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type Node struct { | 
					
						
							|  |  |  | 	Pending  int64 | 
					
						
							|  |  |  | 	Size     uint64 | 
					
						
							|  |  |  | 	Children []common.Hash | 
					
						
							|  |  |  | 	Last     bool | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (self *Node) String() string { | 
					
						
							|  |  |  | 	var children []string | 
					
						
							|  |  |  | 	for _, node := range self.Children { | 
					
						
							|  |  |  | 		children = append(children, node.Hex()) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return fmt.Sprintf("pending: %v, size: %v, last :%v, children: %v", self.Pending, self.Size, self.Last, strings.Join(children, ", ")) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type Task struct { | 
					
						
							|  |  |  | 	Index int64 // Index of the chunk being processed | 
					
						
							|  |  |  | 	Size  uint64 | 
					
						
							|  |  |  | 	Data  []byte // Binary blob of the chunk | 
					
						
							|  |  |  | 	Last  bool | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type PyramidChunker struct { | 
					
						
							|  |  |  | 	hashFunc    Hasher | 
					
						
							|  |  |  | 	chunkSize   int64 | 
					
						
							|  |  |  | 	hashSize    int64 | 
					
						
							|  |  |  | 	branches    int64 | 
					
						
							|  |  |  | 	workerCount int | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func NewPyramidChunker(params *ChunkerParams) (self *PyramidChunker) { | 
					
						
							|  |  |  | 	self = &PyramidChunker{} | 
					
						
							|  |  |  | 	self.hashFunc = MakeHashFunc(params.Hash) | 
					
						
							|  |  |  | 	self.branches = params.Branches | 
					
						
							|  |  |  | 	self.hashSize = int64(self.hashFunc().Size()) | 
					
						
							|  |  |  | 	self.chunkSize = self.hashSize * self.branches | 
					
						
							|  |  |  | 	self.workerCount = 1 | 
					
						
							|  |  |  | 	return | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (self *PyramidChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	chunks := (size + self.chunkSize - 1) / self.chunkSize | 
					
						
							|  |  |  | 	depth := int(math.Ceil(math.Log(float64(chunks))/math.Log(float64(self.branches)))) + 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	results := Tree{ | 
					
						
							|  |  |  | 		Chunks: chunks, | 
					
						
							|  |  |  | 		Levels: make([]map[int64]*Node, depth), | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for i := 0; i < depth; i++ { | 
					
						
							|  |  |  | 		results.Levels[i] = make(map[int64]*Node) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Create a pool of workers to crunch through the file | 
					
						
							|  |  |  | 	tasks := make(chan *Task, 2*processors) | 
					
						
							|  |  |  | 	pend := new(sync.WaitGroup) | 
					
						
							|  |  |  | 	abortC := make(chan bool) | 
					
						
							|  |  |  | 	for i := 0; i < processors; i++ { | 
					
						
							|  |  |  | 		pend.Add(1) | 
					
						
							|  |  |  | 		go self.processor(pend, swg, tasks, chunkC, &results) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Feed the chunks into the task pool | 
					
						
							| 
									
										
										
										
											2016-10-08 12:33:52 +02:00
										 |  |  | 	read := 0 | 
					
						
							| 
									
										
										
										
											2016-08-29 21:18:00 +02:00
										 |  |  | 	for index := 0; ; index++ { | 
					
						
							|  |  |  | 		buffer := make([]byte, self.chunkSize+8) | 
					
						
							|  |  |  | 		n, err := data.Read(buffer[8:]) | 
					
						
							| 
									
										
										
										
											2016-10-08 12:33:52 +02:00
										 |  |  | 		read += n | 
					
						
							|  |  |  | 		last := int64(read) == size || err == io.ErrUnexpectedEOF || err == io.EOF | 
					
						
							| 
									
										
										
										
											2016-08-29 21:18:00 +02:00
										 |  |  | 		if err != nil && !last { | 
					
						
							|  |  |  | 			close(abortC) | 
					
						
							|  |  |  | 			break | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		binary.LittleEndian.PutUint64(buffer[:8], uint64(n)) | 
					
						
							|  |  |  | 		pend.Add(1) | 
					
						
							|  |  |  | 		select { | 
					
						
							|  |  |  | 		case tasks <- &Task{Index: int64(index), Size: uint64(n), Data: buffer[:n+8], Last: last}: | 
					
						
							|  |  |  | 		case <-abortC: | 
					
						
							|  |  |  | 			return nil, err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if last { | 
					
						
							|  |  |  | 			break | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Wait for the workers and return | 
					
						
							|  |  |  | 	close(tasks) | 
					
						
							|  |  |  | 	pend.Wait() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	key := results.Levels[0][0].Children[0][:] | 
					
						
							|  |  |  | 	return key, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (self *PyramidChunker) processor(pend, swg *sync.WaitGroup, tasks chan *Task, chunkC chan *Chunk, results *Tree) { | 
					
						
							|  |  |  | 	defer pend.Done() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Start processing leaf chunks ad infinitum | 
					
						
							|  |  |  | 	hasher := self.hashFunc() | 
					
						
							|  |  |  | 	for task := range tasks { | 
					
						
							|  |  |  | 		depth, pow := len(results.Levels)-1, self.branches | 
					
						
							|  |  |  | 		size := task.Size | 
					
						
							|  |  |  | 		data := task.Data | 
					
						
							|  |  |  | 		var node *Node | 
					
						
							|  |  |  | 		for depth >= 0 { | 
					
						
							|  |  |  | 			// New chunk received, reset the hasher and start processing | 
					
						
							|  |  |  | 			hasher.Reset() | 
					
						
							|  |  |  | 			if node == nil { // Leaf node, hash the data chunk | 
					
						
							|  |  |  | 				hasher.Write(task.Data) | 
					
						
							|  |  |  | 			} else { // Internal node, hash the children | 
					
						
							|  |  |  | 				size = node.Size | 
					
						
							|  |  |  | 				data = make([]byte, hasher.Size()*len(node.Children)+8) | 
					
						
							|  |  |  | 				binary.LittleEndian.PutUint64(data[:8], size) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				hasher.Write(data[:8]) | 
					
						
							|  |  |  | 				for i, hash := range node.Children { | 
					
						
							|  |  |  | 					copy(data[i*hasher.Size()+8:], hash[:]) | 
					
						
							|  |  |  | 					hasher.Write(hash[:]) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			hash := hasher.Sum(nil) | 
					
						
							|  |  |  | 			last := task.Last || (node != nil) && node.Last | 
					
						
							|  |  |  | 			// Insert the subresult into the memoization tree | 
					
						
							|  |  |  | 			results.Lock.Lock() | 
					
						
							|  |  |  | 			if node = results.Levels[depth][task.Index/pow]; node == nil { | 
					
						
							|  |  |  | 				// Figure out the pending tasks | 
					
						
							|  |  |  | 				pending := self.branches | 
					
						
							|  |  |  | 				if task.Index/pow == results.Chunks/pow { | 
					
						
							|  |  |  | 					pending = (results.Chunks + pow/self.branches - 1) / (pow / self.branches) % self.branches | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				node = &Node{pending, 0, make([]common.Hash, pending), last} | 
					
						
							|  |  |  | 				results.Levels[depth][task.Index/pow] = node | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			node.Pending-- | 
					
						
							|  |  |  | 			i := task.Index / (pow / self.branches) % self.branches | 
					
						
							|  |  |  | 			if last { | 
					
						
							|  |  |  | 				node.Last = true | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			copy(node.Children[i][:], hash) | 
					
						
							|  |  |  | 			node.Size += size | 
					
						
							|  |  |  | 			left := node.Pending | 
					
						
							|  |  |  | 			if chunkC != nil { | 
					
						
							|  |  |  | 				if swg != nil { | 
					
						
							|  |  |  | 					swg.Add(1) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				select { | 
					
						
							|  |  |  | 				case chunkC <- &Chunk{Key: hash, SData: data, wg: swg}: | 
					
						
							|  |  |  | 					// case <- self.quitC | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if depth+1 < len(results.Levels) { | 
					
						
							|  |  |  | 				delete(results.Levels[depth+1], task.Index/(pow/self.branches)) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			results.Lock.Unlock() | 
					
						
							|  |  |  | 			// If there's more work to be done, leave for others | 
					
						
							|  |  |  | 			if left > 0 { | 
					
						
							|  |  |  | 				break | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			// We're the last ones in this batch, merge the children together | 
					
						
							|  |  |  | 			depth-- | 
					
						
							|  |  |  | 			pow *= self.branches | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		pend.Done() | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } |