Provoke unreferenced packs using fewer goroutines

TestParallelSaveWithDuplication has been reworked to provoke
unreferenced packs using fewer goroutines than before and create
only one bytes.Reader per blob. This reduces memory usage
significantly.

The following actions have been taken to keep the chance of provoking
unreferenced packs due to #358 high:
 * Interweaved processing of subsequent chunks
 * Delaying each goroutine by a few pseudo-randomly chosen nanoseconds
   (depending on the platform this will most probably only make the os
   yield execution to another thread): together with the interweaved
   processing of subsequent chunks, this ensures a minimalistic delay
   between processing of (some) duplicated chunks
 * Repeating the test 5 times with different seeds

On my test machine, the modified test provoked unreferenced packs 60
times in a row.
This commit is contained in:
Philipp Serr 2015-12-09 21:09:49 +01:00
parent 6a548336ec
commit 3d7f72311a

View file

@ -4,8 +4,8 @@ import (
"bytes" "bytes"
"crypto/sha256" "crypto/sha256"
"io" "io"
"math"
"testing" "testing"
"time"
"github.com/restic/chunker" "github.com/restic/chunker"
"github.com/restic/restic" "github.com/restic/restic"
@ -242,47 +242,48 @@ func BenchmarkLoadTree(t *testing.B) {
} }
} }
// Saves several identical chunks concurrently and later check that there are no // Saves several identical chunks concurrently and later checks that there are no
// unreferenced packs in the repository. See also #292 and #358. // unreferenced packs in the repository. See also #292 and #358.
// The combination of high duplication and high concurrency should provoke any func TestParallelSaveWithDuplication(t *testing.T) {
// issues leading to unreferenced packs. for seed := 0; seed < 5; seed++ {
func TestParallelSaveWithHighDuplication(t *testing.T) { testParallelSaveWithDuplication(t, seed)
}
}
func testParallelSaveWithDuplication(t *testing.T, seed int) {
repo := SetupRepo() repo := SetupRepo()
defer TeardownRepo(repo) defer TeardownRepo(repo)
// For every seed a pseudo-random 32Mb blob is generated and split into dataSizeMb := 92
// chunks. During the test all chunks of all blobs are processed in parallel duplication := 7
// goroutines. To increase duplication, each chunk is processed
// <duplication> times. Concurrency can be limited by changing <maxParallel>.
// Note: seeds 5, 3, 66, 4, 12 produce the most chunks (descending)
seeds := []int{5, 3, 66, 4, 12}
maxParallel := math.MaxInt32
duplication := 15
arch := restic.NewArchiver(repo) arch := restic.NewArchiver(repo)
data := getRandomData(seeds) data, chunks := getRandomData(seed, dataSizeMb*1024*1024)
reader := bytes.NewReader(data)
barrier := make(chan struct{}, maxParallel)
errChannels := [](<-chan error){} errChannels := [](<-chan error){}
for _, d := range data { // interweaved processing of subsequent chunks
for _, c := range d.chunks { maxParallel := 2*duplication - 1
for dupIdx := 0; dupIdx < duplication; dupIdx++ { barrier := make(chan struct{}, maxParallel)
errChan := make(chan error)
errChannels = append(errChannels, errChan)
go func(buf *[]byte, c *chunker.Chunk, errChan chan<- error) { for _, c := range chunks {
barrier <- struct{}{} for dupIdx := 0; dupIdx < duplication; dupIdx++ {
errChan := make(chan error)
errChannels = append(errChannels, errChan)
hash := c.Digest go func(reader *bytes.Reader, c *chunker.Chunk, errChan chan<- error) {
id := backend.ID{} barrier <- struct{}{}
copy(id[:], hash)
err := arch.Save(pack.Data, id, c.Length, c.Reader(bytes.NewReader(*buf))) hash := c.Digest
<-barrier id := backend.ID{}
errChan <- err copy(id[:], hash)
}(&d.buf, c, errChan)
} time.Sleep(time.Duration(hash[0]))
err := arch.Save(pack.Data, id, c.Length, c.Reader(reader))
<-barrier
errChan <- err
}(reader, c, errChan)
} }
} }
@ -297,34 +298,20 @@ func TestParallelSaveWithHighDuplication(t *testing.T) {
assertNoUnreferencedPacks(t, chkr) assertNoUnreferencedPacks(t, chkr)
} }
func getRandomData(seeds []int) []*chunkedData { func getRandomData(seed int, size int) ([]byte, []*chunker.Chunk) {
chunks := []*chunkedData{} buf := Random(seed, size)
sem := make(chan struct{}, len(seeds)) chunks := []*chunker.Chunk{}
chunker := chunker.New(bytes.NewReader(buf), testPol, sha256.New())
for seed := range seeds { for {
c := &chunkedData{} c, err := chunker.Next()
if err == io.EOF {
break
}
chunks = append(chunks, c) chunks = append(chunks, c)
go func(seed int, data *chunkedData) {
data.buf = Random(seed, 32*1024*1024)
chunker := chunker.New(bytes.NewReader(data.buf), testPol, sha256.New())
for {
c, err := chunker.Next()
if err == io.EOF {
break
}
data.chunks = append(data.chunks, c)
}
sem <- struct{}{}
}(seed, c)
} }
for i := 0; i < len(seeds); i++ { return buf, chunks
<-sem
}
return chunks
} }
func createAndInitChecker(t *testing.T, repo *repository.Repository) *checker.Checker { func createAndInitChecker(t *testing.T, repo *repository.Repository) *checker.Checker {