core, eth: clean up bloom filtering, add some tests

2017-08-29 14:13:11 +03:00
parent 4ea4d2dc34
commit f585f9eee8
26 changed files with 1650 additions and 1265 deletions
--- a/core/bloombits/doc.go
+++ b/core/bloombits/doc.go
@ -0,0 +1,18 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Package bloombits implements bloom filtering on batches of data.
+package bloombits
--- a/core/bloombits/fetcher_test.go
+++ b/core/bloombits/fetcher_test.go
@ -1,101 +0,0 @@
-// Copyright 2017 The go-ethereum Authors
-// This file is part of the go-ethereum library.
-//
-// The go-ethereum library is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// The go-ethereum library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
-package bloombits
-
-import (
-	"bytes"
-	"encoding/binary"
-	"math/rand"
-	"sync"
-	"sync/atomic"
-	"testing"
-	"time"
-)
-
-const testFetcherReqCount = 5000
-
-func fetcherTestVector(b uint, s uint64) []byte {
-	r := make([]byte, 10)
-	binary.BigEndian.PutUint16(r[0:2], uint16(b))
-	binary.BigEndian.PutUint64(r[2:10], s)
-	return r
-}
-
-func TestFetcher(t *testing.T) {
-	testFetcher(t, 1)
-}
-
-func TestFetcherMultipleReaders(t *testing.T) {
-	testFetcher(t, 10)
-}
-
-func testFetcher(t *testing.T, cnt int) {
-	f := &fetcher{
-		requestMap: make(map[uint64]fetchRequest),
-	}
-	distCh := make(chan distRequest, channelCap)
-	stop := make(chan struct{})
-	var reqCount uint32
-
-	for i := 0; i < 10; i++ {
-		go func() {
-			for {
-				req, ok := <-distCh
-				if !ok {
-					return
-				}
-				time.Sleep(time.Duration(rand.Intn(100000)))
-				atomic.AddUint32(&reqCount, 1)
-				f.deliver([]uint64{req.sectionIndex}, [][]byte{fetcherTestVector(req.bloomIndex, req.sectionIndex)})
-			}
-		}()
-	}
-
-	var wg, wg2 sync.WaitGroup
-	for cc := 0; cc < cnt; cc++ {
-		wg.Add(1)
-		in := make(chan uint64, channelCap)
-		out := f.fetch(in, distCh, stop, &wg2)
-
-		time.Sleep(time.Millisecond * 10 * time.Duration(cc))
-		go func() {
-			for i := uint64(0); i < testFetcherReqCount; i++ {
-				in <- i
-			}
-		}()
-
-		go func() {
-			for i := uint64(0); i < testFetcherReqCount; i++ {
-				bv := <-out
-				if !bytes.Equal(bv, fetcherTestVector(0, i)) {
-					if len(bv) != 10 {
-						t.Errorf("Vector #%d length is %d, expected 10", i, len(bv))
-					} else {
-						j := binary.BigEndian.Uint64(bv[2:10])
-						t.Errorf("Expected vector #%d, fetched #%d", i, j)
-					}
-				}
-			}
-			wg.Done()
-		}()
-	}
-
-	wg.Wait()
-	close(stop)
-	if reqCount != testFetcherReqCount {
-		t.Errorf("Request count mismatch: expected %v, got %v", testFetcherReqCount, reqCount)
-	}
-}
--- a/core/bloombits/generator.go
+++ b/core/bloombits/generator.go
@ -0,0 +1,84 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+	"errors"
+
+	"github.com/ethereum/go-ethereum/core/types"
+)
+
+// errSectionOutOfBounds is returned if the user tried to add more bloom filters
+// to the batch than available space, or if tries to retrieve above the capacity,
+var errSectionOutOfBounds = errors.New("section out of bounds")
+
+// Generator takes a number of bloom filters and generates the rotated bloom bits
+// to be used for batched filtering.
+type Generator struct {
+	blooms   [types.BloomBitLength][]byte // Rotated blooms for per-bit matching
+	sections uint                         // Number of sections to batch together
+	nextBit  uint                         // Next bit to set when adding a bloom
+}
+
+// NewGenerator creates a rotated bloom generator that can iteratively fill a
+// batched bloom filter's bits.
+func NewGenerator(sections uint) (*Generator, error) {
+	if sections%8 != 0 {
+		return nil, errors.New("section count not multiple of 8")
+	}
+	b := &Generator{sections: sections}
+	for i := 0; i < types.BloomBitLength; i++ {
+		b.blooms[i] = make([]byte, sections/8)
+	}
+	return b, nil
+}
+
+// AddBloom takes a single bloom filter and sets the corresponding bit column
+// in memory accordingly.
+func (b *Generator) AddBloom(bloom types.Bloom) error {
+	// Make sure we're not adding more bloom filters than our capacity
+	if b.nextBit >= b.sections {
+		return errSectionOutOfBounds
+	}
+	// Rotate the bloom and insert into our collection
+	byteMask := b.nextBit / 8
+	bitMask := byte(1) << byte(7-b.nextBit%8)
+
+	for i := 0; i < types.BloomBitLength; i++ {
+		bloomByteMask := types.BloomByteLength - 1 - i/8
+		bloomBitMask := byte(1) << byte(i%8)
+
+		if (bloom[bloomByteMask] & bloomBitMask) != 0 {
+			b.blooms[i][byteMask] |= bitMask
+		}
+	}
+	b.nextBit++
+
+	return nil
+}
+
+// Bitset returns the bit vector belonging to the given bit index after all
+// blooms have been added.
+func (b *Generator) Bitset(idx uint) ([]byte, error) {
+	if b.nextBit != b.sections {
+		return nil, errors.New("bloom not fully generated yet")
+	}
+	if idx >= b.sections {
+		return nil, errSectionOutOfBounds
+	}
+	return b.blooms[idx], nil
+}
--- a/core/bloombits/generator_test.go
+++ b/core/bloombits/generator_test.go
@ -0,0 +1,60 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+	"bytes"
+	"math/rand"
+	"testing"
+
+	"github.com/ethereum/go-ethereum/core/types"
+)
+
+// Tests that batched bloom bits are correctly rotated from the input bloom
+// filters.
+func TestGenerator(t *testing.T) {
+	// Generate the input and the rotated output
+	var input, output [types.BloomBitLength][types.BloomByteLength]byte
+
+	for i := 0; i < types.BloomBitLength; i++ {
+		for j := 0; j < types.BloomBitLength; j++ {
+			bit := byte(rand.Int() % 2)
+
+			input[i][j/8] |= bit << byte(7-j%8)
+			output[types.BloomBitLength-1-j][i/8] |= bit << byte(7-i%8)
+		}
+	}
+	// Crunch the input through the generator and verify the result
+	gen, err := NewGenerator(types.BloomBitLength)
+	if err != nil {
+		t.Fatalf("failed to create bloombit generator: %v", err)
+	}
+	for i, bloom := range input {
+		if err := gen.AddBloom(bloom); err != nil {
+			t.Fatalf("bloom %d: failed to add: %v", i, err)
+		}
+	}
+	for i, want := range output {
+		have, err := gen.Bitset(uint(i))
+		if err != nil {
+			t.Fatalf("output %d: failed to retrieve bits: %v", i, err)
+		}
+		if !bytes.Equal(have, want[:]) {
+			t.Errorf("output %d: bit vector mismatch have %x, want %x", i, have, want)
+		}
+	}
+}
--- a/core/bloombits/matcher.go
+++ b/core/bloombits/matcher.go
--- a/core/bloombits/matcher_test.go
+++ b/core/bloombits/matcher_test.go
@ -13,6 +13,7 @@
 //
 // You should have received a copy of the GNU Lesser General Public License
 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
 package bloombits

 import (
@ -20,151 +21,45 @@ import (
 	"sync/atomic"
 	"testing"
 	"time"
-
-	"github.com/ethereum/go-ethereum/core/types"
 )

 const testSectionSize = 4096

-func matcherTestVector(b uint, s uint64) []byte {
-	r := make([]byte, testSectionSize/8)
-	for i, _ := range r {
-		var bb byte
-		for bit := 0; bit < 8; bit++ {
-			blockIdx := s*testSectionSize + uint64(i*8+bit)
-			bb += bb
-			if (blockIdx % uint64(b)) == 0 {
-				bb++
-			}
-		}
-		r[i] = bb
-	}
-	return r
+// Tests the matcher pipeline on a single continuous workflow without interrupts.
+func TestMatcherContinuous(t *testing.T) {
+	testMatcherDiffBatches(t, [][]bloomIndexes{{{10, 20, 30}}}, 100000, false, 75)
+	testMatcherDiffBatches(t, [][]bloomIndexes{{{32, 3125, 100}}, {{40, 50, 10}}}, 100000, false, 81)
+	testMatcherDiffBatches(t, [][]bloomIndexes{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, false, 36)
 }

-func expMatch1(idxs types.BloomIndexList, i uint64) bool {
-	for _, ii := range idxs {
-		if (i % uint64(ii)) != 0 {
-			return false
-		}
-	}
-	return true
+// Tests the matcher pipeline on a constantly interrupted and resumed work pattern
+// with the aim of ensuring data items are requested only once.
+func TestMatcherIntermittent(t *testing.T) {
+	testMatcherDiffBatches(t, [][]bloomIndexes{{{10, 20, 30}}}, 100000, true, 75)
+	testMatcherDiffBatches(t, [][]bloomIndexes{{{32, 3125, 100}}, {{40, 50, 10}}}, 100000, true, 81)
+	testMatcherDiffBatches(t, [][]bloomIndexes{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, true, 36)
 }

-func expMatch2(idxs []types.BloomIndexList, i uint64) bool {
-	for _, ii := range idxs {
-		if expMatch1(ii, i) {
-			return true
-		}
-	}
-	return false
-}
-
-func expMatch3(idxs [][]types.BloomIndexList, i uint64) bool {
-	for _, ii := range idxs {
-		if !expMatch2(ii, i) {
-			return false
-		}
-	}
-	return true
-}
-
-func testServeMatcher(m *Matcher, stop chan struct{}, cnt *uint32, maxRequestLen int) {
-	// serve matcher with test vectors
+// Tests the matcher pipeline on random input to hopefully catch anomalies.
+func TestMatcherRandom(t *testing.T) {
 	for i := 0; i < 10; i++ {
-		go func() {
-			for {
-				select {
-				case <-stop:
-					return
-				default:
-				}
-				b, ok := m.AllocSectionQueue()
-				if !ok {
-					return
-				}
-				if m.SectionCount(b) < maxRequestLen {
-					time.Sleep(time.Microsecond * 100)
-				}
-				s := m.FetchSections(b, maxRequestLen)
-				res := make([][]byte, len(s))
-				for i, ss := range s {
-					res[i] = matcherTestVector(b, ss)
-					atomic.AddUint32(cnt, 1)
-				}
-				m.Deliver(b, s, res)
-			}
-		}()
+		testMatcherBothModes(t, makeRandomIndexes([]int{1}, 50), 10000, 0)
+		testMatcherBothModes(t, makeRandomIndexes([]int{3}, 50), 10000, 0)
+		testMatcherBothModes(t, makeRandomIndexes([]int{2, 2, 2}, 20), 10000, 0)
+		testMatcherBothModes(t, makeRandomIndexes([]int{5, 5, 5}, 50), 10000, 0)
+		testMatcherBothModes(t, makeRandomIndexes([]int{4, 4, 4}, 20), 10000, 0)
 	}
 }

-func testMatcher(t *testing.T, idxs [][]types.BloomIndexList, cnt uint64, stopOnMatches bool, expCount uint32) uint32 {
-	count1 := testMatcherWithReqCount(t, idxs, cnt, stopOnMatches, expCount, 1)
-	count16 := testMatcherWithReqCount(t, idxs, cnt, stopOnMatches, expCount, 16)
-	if count1 != count16 {
-		t.Errorf("Error matching idxs = %v  count = %v  stopOnMatches = %v: request count mismatch, %v with maxReqCount = 1 vs. %v with maxReqCount = 16", idxs, cnt, stopOnMatches, count1, count16)
-	}
-	return count1
-}
-
-func testMatcherWithReqCount(t *testing.T, idxs [][]types.BloomIndexList, cnt uint64, stopOnMatches bool, expCount uint32, maxReqCount int) uint32 {
-	m := NewMatcher(testSectionSize, nil, nil)
-
-	for _, idxss := range idxs {
-		for _, idxs := range idxss {
-			for _, idx := range idxs {
-				m.newFetcher(idx)
-			}
-		}
-	}
-
-	m.addresses = idxs[0]
-	m.topics = idxs[1:]
-	var reqCount uint32
-
-	stop := make(chan struct{})
-	chn := m.Start(0, cnt-1)
-	testServeMatcher(m, stop, &reqCount, maxReqCount)
-
-	for i := uint64(0); i < cnt; i++ {
-		if expMatch3(idxs, i) {
-			match, ok := <-chn
-			if !ok {
-				t.Errorf("Error matching idxs = %v  count = %v  stopOnMatches = %v: expected #%v, results channel closed", idxs, cnt, stopOnMatches, i)
-				return 0
-			}
-			if match != i {
-				t.Errorf("Error matching idxs = %v  count = %v  stopOnMatches = %v: expected #%v, got #%v", idxs, cnt, stopOnMatches, i, match)
-			}
-			if stopOnMatches {
-				m.Stop()
-				close(stop)
-				stop = make(chan struct{})
-				chn = m.Start(i+1, cnt-1)
-				testServeMatcher(m, stop, &reqCount, maxReqCount)
-			}
-		}
-	}
-	match, ok := <-chn
-	if ok {
-		t.Errorf("Error matching idxs = %v  count = %v  stopOnMatches = %v: expected closed channel, got #%v", idxs, cnt, stopOnMatches, match)
-	}
-	m.Stop()
-	close(stop)
-
-	if expCount != 0 && expCount != reqCount {
-		t.Errorf("Error matching idxs = %v  count = %v  stopOnMatches = %v: request count mismatch, expected #%v, got #%v", idxs, cnt, stopOnMatches, expCount, reqCount)
-	}
-
-	return reqCount
-}
-
-func testRandomIdxs(l []int, max int) [][]types.BloomIndexList {
-	res := make([][]types.BloomIndexList, len(l))
-	for i, ll := range l {
-		res[i] = make([]types.BloomIndexList, ll)
-		for j, _ := range res[i] {
-			for k, _ := range res[i][j] {
+// makeRandomIndexes generates a random filter system, composed on multiple filter
+// criteria, each having one bloom list component for the address and arbitrarilly
+// many topic bloom list components.
+func makeRandomIndexes(lengths []int, max int) [][]bloomIndexes {
+	res := make([][]bloomIndexes, len(lengths))
+	for i, topics := range lengths {
+		res[i] = make([]bloomIndexes, topics)
+		for j := 0; j < topics; j++ {
+			for k := 0; k < len(res[i][j]); k++ {
 				res[i][j][k] = uint(rand.Intn(max-1) + 2)
 			}
 		}
@ -172,25 +67,173 @@ func testRandomIdxs(l []int, max int) [][]types.BloomIndexList {
 	return res
 }

-func TestMatcher(t *testing.T) {
-	testMatcher(t, [][]types.BloomIndexList{{{10, 20, 30}}}, 100000, false, 75)
-	testMatcher(t, [][]types.BloomIndexList{{{32, 3125, 100}}, {{40, 50, 10}}}, 100000, false, 81)
-	testMatcher(t, [][]types.BloomIndexList{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, false, 36)
-}
+// testMatcherDiffBatches runs the given matches test in single-delivery and also
+// in batches delivery mode, verifying that all kinds of deliveries are handled
+// correctly withn.
+func testMatcherDiffBatches(t *testing.T, filter [][]bloomIndexes, blocks uint64, intermittent bool, retrievals uint32) {
+	singleton := testMatcher(t, filter, blocks, intermittent, retrievals, 1)
+	batched := testMatcher(t, filter, blocks, intermittent, retrievals, 16)

-func TestMatcherStopOnMatches(t *testing.T) {
-	testMatcher(t, [][]types.BloomIndexList{{{10, 20, 30}}}, 100000, true, 75)
-	testMatcher(t, [][]types.BloomIndexList{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, true, 36)
-}
-
-func TestMatcherRandom(t *testing.T) {
-	for i := 0; i < 20; i++ {
-		testMatcher(t, testRandomIdxs([]int{1}, 50), 100000, false, 0)
-		testMatcher(t, testRandomIdxs([]int{3}, 50), 100000, false, 0)
-		testMatcher(t, testRandomIdxs([]int{2, 2, 2}, 20), 100000, false, 0)
-		testMatcher(t, testRandomIdxs([]int{5, 5, 5}, 50), 100000, false, 0)
-		idxs := testRandomIdxs([]int{2, 2, 2}, 20)
-		reqCount := testMatcher(t, idxs, 10000, false, 0)
-		testMatcher(t, idxs, 10000, true, reqCount)
+	if singleton != batched {
+		t.Errorf("filter = %v blocks = %v intermittent = %v: request count mismatch, %v in signleton vs. %v in batched mode", filter, blocks, intermittent, singleton, batched)
 	}
 }
+
+// testMatcherBothModes runs the given matcher test in both continuous as well as
+// in intermittent mode, verifying that the request counts match each other.
+func testMatcherBothModes(t *testing.T, filter [][]bloomIndexes, blocks uint64, retrievals uint32) {
+	continuous := testMatcher(t, filter, blocks, false, retrievals, 16)
+	intermittent := testMatcher(t, filter, blocks, true, retrievals, 16)
+
+	if continuous != intermittent {
+		t.Errorf("filter = %v blocks = %v: request count mismatch, %v in continuous vs. %v in intermittent mode", filter, blocks, continuous, intermittent)
+	}
+}
+
+// testMatcher is a generic tester to run the given matcher test and return the
+// number of requests made for cross validation between different modes.
+func testMatcher(t *testing.T, filter [][]bloomIndexes, blocks uint64, intermittent bool, retrievals uint32, maxReqCount int) uint32 {
+	// Create a new matcher an simulate our explicit random bitsets
+	matcher := NewMatcher(testSectionSize, nil, nil)
+
+	matcher.addresses = filter[0]
+	matcher.topics = filter[1:]
+
+	for _, rule := range filter {
+		for _, topic := range rule {
+			for _, bit := range topic {
+				matcher.addScheduler(bit)
+			}
+		}
+	}
+	// Track the number of retrieval requests made
+	var requested uint32
+
+	// Start the matching session for the filter and the retriver goroutines
+	quit := make(chan struct{})
+	matches := make(chan uint64, 16)
+
+	session, err := matcher.Start(0, blocks-1, matches)
+	if err != nil {
+		t.Fatalf("failed to stat matcher session: %v", err)
+	}
+	startRetrievers(session, quit, &requested, maxReqCount)
+
+	// Iterate over all the blocks and verify that the pipeline produces the correct matches
+	for i := uint64(0); i < blocks; i++ {
+		if expMatch3(filter, i) {
+			match, ok := <-matches
+			if !ok {
+				t.Errorf("filter = %v  blocks = %v  intermittent = %v: expected #%v, results channel closed", filter, blocks, intermittent, i)
+				return 0
+			}
+			if match != i {
+				t.Errorf("filter = %v  blocks = %v  intermittent = %v: expected #%v, got #%v", filter, blocks, intermittent, i, match)
+			}
+			// If we're testing intermittent mode, abort and restart the pipeline
+			if intermittent {
+				session.Close(time.Second)
+				close(quit)
+
+				quit = make(chan struct{})
+				matches = make(chan uint64, 16)
+
+				session, err = matcher.Start(i+1, blocks-1, matches)
+				if err != nil {
+					t.Fatalf("failed to stat matcher session: %v", err)
+				}
+				startRetrievers(session, quit, &requested, maxReqCount)
+			}
+		}
+	}
+	// Ensure the result channel is torn down after the last block
+	match, ok := <-matches
+	if ok {
+		t.Errorf("filter = %v  blocks = %v  intermittent = %v: expected closed channel, got #%v", filter, blocks, intermittent, match)
+	}
+	// Clean up the session and ensure we match the expected retrieval count
+	session.Close(time.Second)
+	close(quit)
+
+	if retrievals != 0 && requested != retrievals {
+		t.Errorf("filter = %v  blocks = %v  intermittent = %v: request count mismatch, have #%v, want #%v", filter, blocks, intermittent, requested, retrievals)
+	}
+	return requested
+}
+
+// startRetrievers starts a batch of goroutines listening for section requests
+// and serving them.
+func startRetrievers(session *MatcherSession, quit chan struct{}, retrievals *uint32, batch int) {
+	requests := make(chan chan *Retrieval)
+
+	for i := 0; i < 10; i++ {
+		// Start a multiplexer to test multiple threaded execution
+		go session.Multiplex(batch, 100*time.Microsecond, requests)
+
+		// Start a services to match the above multiplexer
+		go func() {
+			for {
+				// Wait for a service request or a shutdown
+				select {
+				case <-quit:
+					return
+
+				case request := <-requests:
+					task := <-request
+
+					task.Bitsets = make([][]byte, len(task.Sections))
+					for i, section := range task.Sections {
+						if rand.Int()%4 != 0 { // Handle occasional missing deliveries
+							task.Bitsets[i] = generateBitset(task.Bit, section)
+							atomic.AddUint32(retrievals, 1)
+						}
+					}
+					request <- task
+				}
+			}
+		}()
+	}
+}
+
+// generateBitset generates the rotated bitset for the given bloom bit and section
+// numbers.
+func generateBitset(bit uint, section uint64) []byte {
+	bitset := make([]byte, testSectionSize/8)
+	for i := 0; i < len(bitset); i++ {
+		for b := 0; b < 8; b++ {
+			blockIdx := section*testSectionSize + uint64(i*8+b)
+			bitset[i] += bitset[i]
+			if (blockIdx % uint64(bit)) == 0 {
+				bitset[i]++
+			}
+		}
+	}
+	return bitset
+}
+
+func expMatch1(filter bloomIndexes, i uint64) bool {
+	for _, ii := range filter {
+		if (i % uint64(ii)) != 0 {
+			return false
+		}
+	}
+	return true
+}
+
+func expMatch2(filter []bloomIndexes, i uint64) bool {
+	for _, ii := range filter {
+		if expMatch1(ii, i) {
+			return true
+		}
+	}
+	return false
+}
+
+func expMatch3(filter [][]bloomIndexes, i uint64) bool {
+	for _, ii := range filter {
+		if !expMatch2(ii, i) {
+			return false
+		}
+	}
+	return true
+}
--- a/core/bloombits/scheduler.go
+++ b/core/bloombits/scheduler.go
@ -0,0 +1,181 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+	"sync"
+)
+
+// request represents a bloom retrieval task to prioritize and pull from the local
+// database or remotely from the network.
+type request struct {
+	section uint64 // Section index to retrieve the a bit-vector from
+	bit     uint   // Bit index within the section to retrieve the vector of
+}
+
+// response represents the state of a requested bit-vector through a scheduler.
+type response struct {
+	cached []byte        // Cached bits to dedup multiple requests
+	done   chan struct{} // Channel to allow waiting for completion
+}
+
+// scheduler handles the scheduling of bloom-filter retrieval operations for
+// entire section-batches belonging to a single bloom bit. Beside scheduling the
+// retrieval operations, this struct also deduplicates the requests and caches
+// the results to minimize network/database overhead even in complex filtering
+// scenarios.
+type scheduler struct {
+	bit       uint                 // Index of the bit in the bloom filter this scheduler is responsible for
+	responses map[uint64]*response // Currently pending retrieval requests or already cached responses
+	lock      sync.Mutex           // Lock protecting the responses from concurrent access
+}
+
+// newScheduler creates a new bloom-filter retrieval scheduler for a specific
+// bit index.
+func newScheduler(idx uint) *scheduler {
+	return &scheduler{
+		bit:       idx,
+		responses: make(map[uint64]*response),
+	}
+}
+
+// run creates a retrieval pipeline, receiving section indexes from sections and
+// returning the results in the same order through the done channel. Concurrent
+// runs of the same scheduler are allowed, leading to retrieval task deduplication.
+func (s *scheduler) run(sections chan uint64, dist chan *request, done chan []byte, quit chan struct{}, wg *sync.WaitGroup) {
+	// Create a forwarder channel between requests and responses of the same size as
+	// the distribution channel (since that will block the pipeline anyway).
+	pend := make(chan uint64, cap(dist))
+
+	// Start the pipeline schedulers to forward between user -> distributor -> user
+	wg.Add(2)
+	go s.scheduleRequests(sections, dist, pend, quit, wg)
+	go s.scheduleDeliveries(pend, done, quit, wg)
+}
+
+// reset cleans up any leftovers from previous runs. This is required before a
+// restart to ensure the no previously requested but never delivered state will
+// cause a lockup.
+func (s *scheduler) reset() {
+	s.lock.Lock()
+	defer s.lock.Unlock()
+
+	for section, res := range s.responses {
+		if res.cached == nil {
+			delete(s.responses, section)
+		}
+	}
+}
+
+// scheduleRequests reads section retrieval requests from the input channel,
+// deduplicates the stream and pushes unique retrieval tasks into the distribution
+// channel for a database or network layer to honour.
+func (s *scheduler) scheduleRequests(reqs chan uint64, dist chan *request, pend chan uint64, quit chan struct{}, wg *sync.WaitGroup) {
+	// Clean up the goroutine and pipeline when done
+	defer wg.Done()
+	defer close(pend)
+
+	// Keep reading and scheduling section requests
+	for {
+		select {
+		case <-quit:
+			return
+
+		case section, ok := <-reqs:
+			// New section retrieval requested
+			if !ok {
+				return
+			}
+			// Deduplicate retrieval requests
+			unique := false
+
+			s.lock.Lock()
+			if s.responses[section] == nil {
+				s.responses[section] = &response{
+					done: make(chan struct{}),
+				}
+				unique = true
+			}
+			s.lock.Unlock()
+
+			// Schedule the section for retrieval and notify the deliverer to expect this section
+			if unique {
+				select {
+				case <-quit:
+					return
+				case dist <- &request{bit: s.bit, section: section}:
+				}
+			}
+			select {
+			case <-quit:
+				return
+			case pend <- section:
+			}
+		}
+	}
+}
+
+// scheduleDeliveries reads section acceptance notifications and waits for them
+// to be delivered, pushing them into the output data buffer.
+func (s *scheduler) scheduleDeliveries(pend chan uint64, done chan []byte, quit chan struct{}, wg *sync.WaitGroup) {
+	// Clean up the goroutine and pipeline when done
+	defer wg.Done()
+	defer close(done)
+
+	// Keep reading notifications and scheduling deliveries
+	for {
+		select {
+		case <-quit:
+			return
+
+		case idx, ok := <-pend:
+			// New section retrieval pending
+			if !ok {
+				return
+			}
+			// Wait until the request is honoured
+			s.lock.Lock()
+			res := s.responses[idx]
+			s.lock.Unlock()
+
+			select {
+			case <-quit:
+				return
+			case <-res.done:
+			}
+			// Deliver the result
+			select {
+			case <-quit:
+				return
+			case done <- res.cached:
+			}
+		}
+	}
+}
+
+// deliver is called by the request distributor when a reply to a request arrives.
+func (s *scheduler) deliver(sections []uint64, data [][]byte) {
+	s.lock.Lock()
+	defer s.lock.Unlock()
+
+	for i, section := range sections {
+		if res := s.responses[section]; res != nil && res.cached == nil { // Avoid non-requests and double deliveries
+			res.cached = data[i]
+			close(res.done)
+		}
+	}
+}
--- a/core/bloombits/scheduler_test.go
+++ b/core/bloombits/scheduler_test.go
@ -0,0 +1,105 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+	"bytes"
+	"math/big"
+	"math/rand"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// Tests that the scheduler can deduplicate and forward retrieval requests to
+// underlying fetchers and serve responses back, irrelevant of the concurrency
+// of the requesting clients or serving data fetchers.
+func TestSchedulerSingleClientSingleFetcher(t *testing.T) { testScheduler(t, 1, 1, 5000) }
+func TestSchedulerSingleClientMultiFetcher(t *testing.T)  { testScheduler(t, 1, 10, 5000) }
+func TestSchedulerMultiClientSingleFetcher(t *testing.T)  { testScheduler(t, 10, 1, 5000) }
+func TestSchedulerMultiClientMultiFetcher(t *testing.T)   { testScheduler(t, 10, 10, 5000) }
+
+func testScheduler(t *testing.T, clients int, fetchers int, requests int) {
+	f := newScheduler(0)
+
+	// Create a batch of handler goroutines that respond to bloom bit requests and
+	// deliver them to the scheduler.
+	var fetchPend sync.WaitGroup
+	fetchPend.Add(fetchers)
+	defer fetchPend.Wait()
+
+	fetch := make(chan *request, 16)
+	defer close(fetch)
+
+	var delivered uint32
+	for i := 0; i < fetchers; i++ {
+		go func() {
+			defer fetchPend.Done()
+
+			for req := range fetch {
+				time.Sleep(time.Duration(rand.Intn(int(100 * time.Microsecond))))
+				atomic.AddUint32(&delivered, 1)
+
+				f.deliver([]uint64{
+					req.section + uint64(requests), // Non-requested data (ensure it doesn't go out of bounds)
+					req.section,                    // Requested data
+					req.section,                    // Duplicated data (ensure it doesn't double close anything)
+				}, [][]byte{
+					[]byte{},
+					new(big.Int).SetUint64(req.section).Bytes(),
+					new(big.Int).SetUint64(req.section).Bytes(),
+				})
+			}
+		}()
+	}
+	// Start a batch of goroutines to concurrently run scheduling tasks
+	quit := make(chan struct{})
+
+	var pend sync.WaitGroup
+	pend.Add(clients)
+
+	for i := 0; i < clients; i++ {
+		go func() {
+			defer pend.Done()
+
+			in := make(chan uint64, 16)
+			out := make(chan []byte, 16)
+
+			f.run(in, fetch, out, quit, &pend)
+
+			go func() {
+				for j := 0; j < requests; j++ {
+					in <- uint64(j)
+				}
+				close(in)
+			}()
+
+			for j := 0; j < requests; j++ {
+				bits := <-out
+				if want := new(big.Int).SetUint64(uint64(j)).Bytes(); !bytes.Equal(bits, want) {
+					t.Errorf("vector %d: delivered content mismatch: have %x, want %x", j, bits, want)
+				}
+			}
+		}()
+	}
+	pend.Wait()
+
+	if have := atomic.LoadUint32(&delivered); int(have) != requests {
+		t.Errorf("request count mismatch: have %v, want %v", have, requests)
+	}
+}
--- a/core/bloombits/utils.go
+++ b/core/bloombits/utils.go
@ -1,63 +0,0 @@
-// Copyright 2017 The go-ethereum Authors
-// This file is part of the go-ethereum library.
-//
-// The go-ethereum library is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// The go-ethereum library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
-package bloombits
-
-import (
-	"github.com/ethereum/go-ethereum/core/types"
-)
-
-const BloomLength = 2048
-
-// BloomBitsCreator takes SectionSize number of header bloom filters and calculates the bloomBits vectors of the section
-type BloomBitsCreator struct {
-	blooms              [BloomLength][]byte
-	sectionSize, bitIndex uint64
-}
-
-func NewBloomBitsCreator(sectionSize uint64) *BloomBitsCreator {
-	b := &BloomBitsCreator{sectionSize: sectionSize}
-	for i, _ := range b.blooms {
-		b.blooms[i] = make([]byte, sectionSize/8)
-	}
-	return b
-}
-
-// AddHeaderBloom takes a single bloom filter and sets the corresponding bit column in memory accordingly
-func (b *BloomBitsCreator) AddHeaderBloom(bloom types.Bloom) {
-	if b.bitIndex >= b.sectionSize {
-		panic("too many header blooms added")
-	}
-
-	byteIdx := b.bitIndex / 8
-	bitMask := byte(1) << byte(7-b.bitIndex%8)
-	for bloomBitIdx, _ := range b.blooms {
-		bloomByteIdx := BloomLength/8 - 1 - bloomBitIdx/8
-		bloomBitMask := byte(1) << byte(bloomBitIdx%8)
-		if (bloom[bloomByteIdx] & bloomBitMask) != 0 {
-			b.blooms[bloomBitIdx][byteIdx] |= bitMask
-		}
-	}
-	b.bitIndex++
-}
-
-// GetBitVector returns the bit vector belonging to the given bit index after header blooms have been added
-func (b *BloomBitsCreator) GetBitVector(idx uint) []byte {
-	if b.bitIndex != b.sectionSize {
-		panic("not enough header blooms added")
-	}
-
-	return b.blooms[idx][:]
-}