core/rawdb: fix datarace in freezer (#22728)

The Append / truncate operations were racy. When a datafile reaches 2Gb, a new file is needed. For this operation, we require a writelock, which is not needed in the 99.99% of all cases where the data does fit in the current head-file.

This transition from readlock to writelock was incorrect, and as the readlock was released, a truncate operation could slip in between, and truncate the data. This would have been fine, however, the Append operation continued writing as if no truncation had occurred, e.g writing item 5 where item 0 should reside.

This PR changes the behaviour, so that if when we run into the situation that a new file is needed, it aborts, and retries, this time with a writelock.

The outcome of the situation described above, running on this PR, would instead be that the Append operation exits with a failure.
This commit is contained in:
Martin Holst Swende
2021-04-26 18:19:07 +02:00
committed by GitHub
parent 34f3c9539b
commit 9b99e3dfe0
2 changed files with 117 additions and 41 deletions

View File

@ -18,10 +18,13 @@ package rawdb
import (
"bytes"
"encoding/binary"
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"sync"
"testing"
"time"
@ -637,6 +640,55 @@ func TestOffset(t *testing.T) {
// 1. have data files d0, d1, d2, d3
// 2. remove d2,d3
//
// However, all 'normal' failure modes arising due to failing to sync() or save a file should be
// handled already, and the case described above can only (?) happen if an external process/user
// deletes files from the filesystem.
// However, all 'normal' failure modes arising due to failing to sync() or save a file
// should be handled already, and the case described above can only (?) happen if an
// external process/user deletes files from the filesystem.
// TestAppendTruncateParallel is a test to check if the Append/truncate operations are
// racy.
//
// The reason why it's not a regular fuzzer, within tests/fuzzers, is that it is dependent
// on timing rather than 'clever' input -- there's no determinism.
func TestAppendTruncateParallel(t *testing.T) {
dir, err := ioutil.TempDir("", "freezer")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
f, err := newCustomTable(dir, "tmp", metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, 8, true)
if err != nil {
t.Fatal(err)
}
fill := func(mark uint64) []byte {
data := make([]byte, 8)
binary.LittleEndian.PutUint64(data, mark)
return data
}
for i := 0; i < 5000; i++ {
f.truncate(0)
data0 := fill(0)
f.Append(0, data0)
data1 := fill(1)
var wg sync.WaitGroup
wg.Add(2)
go func() {
f.truncate(0)
wg.Done()
}()
go func() {
f.Append(1, data1)
wg.Done()
}()
wg.Wait()
if have, err := f.Retrieve(0); err == nil {
if !bytes.Equal(have, data0) {
t.Fatalf("have %x want %x", have, data0)
}
}
}
}