Fix pilorama sorted listing #1330
3 changed files with 92 additions and 8 deletions
|
@ -1161,6 +1161,7 @@ func (t *boltForest) fillSortedChildren(b *bbolt.Bucket, nodeIDs MultiNode, h *f
|
||||||
lastFilename = nil
|
lastFilename = nil
|
||||||
nodes = nil
|
nodes = nil
|
||||||
length = actualLength + 1
|
length = actualLength + 1
|
||||||
|
count = 0
|
||||||
c.Seek(append(prefix, byte(length), byte(length>>8)))
|
c.Seek(append(prefix, byte(length), byte(length>>8)))
|
||||||
c.Prev() // c.Next() will be performed by for loop
|
c.Prev() // c.Next() will be performed by for loop
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
package pilorama
|
package pilorama
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"fmt"
|
"fmt"
|
||||||
mrand "math/rand"
|
mrand "math/rand"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -232,6 +234,65 @@ func BenchmarkForestSortedIteration(b *testing.B) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The issue which we call "BugWithSkip" is easiest to understand when filenames are
|
||||||
|
// monotonically increasing numbers. We want the list of sorted filenames to have different length interleaved.
|
||||||
|
// The bug happens when we switch between length during listing.
|
||||||
|
// Thus this test contains numbers from 1 to 2000 and batch size of size 10.
|
||||||
|
func TestForest_TreeSortedIterationBugWithSkip(t *testing.T) {
|
||||||
|
for i := range providers {
|
||||||
|
t.Run(providers[i].name, func(t *testing.T) {
|
||||||
|
testForestTreeSortedIterationBugWithSkip(t, providers[i].construct(t))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testForestTreeSortedIterationBugWithSkip(t *testing.T, s ForestStorage) {
|
||||||
|
defer func() { require.NoError(t, s.Close()) }()
|
||||||
|
|
||||||
|
cid := cidtest.ID()
|
||||||
|
d := CIDDescriptor{cid, 0, 1}
|
||||||
|
treeID := "version"
|
||||||
|
treeAdd := func(t *testing.T, ts int, filename string) {
|
||||||
|
_, err := s.TreeMove(context.Background(), d, treeID, &Move{
|
||||||
|
Child: RootID + uint64(ts),
|
||||||
|
Parent: RootID,
|
||||||
|
Meta: Meta{
|
||||||
|
Time: Timestamp(ts),
|
||||||
|
Items: []KeyValue{
|
||||||
|
{Key: AttributeFilename, Value: []byte(filename)},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
const count = 2000
|
||||||
|
treeAdd(t, 1, "")
|
||||||
|
for i := 1; i < count; i++ {
|
||||||
|
treeAdd(t, i+1, strconv.Itoa(i+1))
|
||||||
|
}
|
||||||
|
|
||||||
|
var result []MultiNodeInfo
|
||||||
|
treeAppend := func(t *testing.T, last *string, count int) *string {
|
||||||
|
res, cursor, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, count)
|
||||||
|
require.NoError(t, err)
|
||||||
|
result = append(result, res...)
|
||||||
|
return cursor
|
||||||
|
}
|
||||||
|
|
||||||
|
const batchSize = 10
|
||||||
|
last := treeAppend(t, nil, batchSize)
|
||||||
|
for i := 1; i < count/batchSize; i++ {
|
||||||
|
last = treeAppend(t, last, batchSize)
|
||||||
|
}
|
||||||
|
require.Len(t, result, count)
|
||||||
|
require.True(t, slices.IsSortedFunc(result, func(a, b MultiNodeInfo) int {
|
||||||
|
filenameA := findAttr(a.Meta, AttributeFilename)
|
||||||
|
filenameB := findAttr(b.Meta, AttributeFilename)
|
||||||
|
return bytes.Compare(filenameA, filenameB)
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
func TestForest_TreeSortedIteration(t *testing.T) {
|
func TestForest_TreeSortedIteration(t *testing.T) {
|
||||||
for i := range providers {
|
for i := range providers {
|
||||||
t.Run(providers[i].name, func(t *testing.T) {
|
t.Run(providers[i].name, func(t *testing.T) {
|
||||||
|
|
|
@ -2,6 +2,8 @@ package pilorama
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"container/heap"
|
"container/heap"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type heapInfo struct {
|
type heapInfo struct {
|
||||||
|
@ -28,9 +30,10 @@ func (h *filenameHeap) Pop() any {
|
||||||
|
|
||||||
// fixedHeap maintains a fixed number of smallest elements started at some point.
|
// fixedHeap maintains a fixed number of smallest elements started at some point.
|
||||||
type fixedHeap struct {
|
type fixedHeap struct {
|
||||||
start *string
|
start *string
|
||||||
count int
|
sorted bool
|
||||||
h *filenameHeap
|
count int
|
||||||
|
h *filenameHeap
|
||||||
}
|
}
|
||||||
|
|
||||||
func newHeap(start *string, count int) *fixedHeap {
|
func newHeap(start *string, count int) *fixedHeap {
|
||||||
|
@ -44,20 +47,39 @@ func newHeap(start *string, count int) *fixedHeap {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const amortizationMultiplier = 5
|
||||||
|
|
||||||
func (h *fixedHeap) push(id MultiNode, filename string) bool {
|
func (h *fixedHeap) push(id MultiNode, filename string) bool {
|
||||||
if h.start != nil && filename <= *h.start {
|
if h.start != nil && filename <= *h.start {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
heap.Push(h.h, heapInfo{id: id, filename: filename})
|
|
||||||
if h.h.Len() > h.count {
|
*h.h = append(*h.h, heapInfo{id: id, filename: filename})
|
||||||
heap.Remove(h.h, h.h.Len()-1)
|
h.sorted = false
|
||||||
|
|
||||||
|
if h.h.Len() > h.count*amortizationMultiplier {
|
||||||
|
slices.SortFunc(*h.h, func(a, b heapInfo) int {
|
||||||
|
return strings.Compare(a.filename, b.filename)
|
||||||
|
})
|
||||||
|
*h.h = (*h.h)[:h.count]
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *fixedHeap) pop() (heapInfo, bool) {
|
func (h *fixedHeap) pop() (heapInfo, bool) {
|
||||||
if h.h.Len() != 0 {
|
if !h.sorted {
|
||||||
return heap.Pop(h.h).(heapInfo), true
|
slices.SortFunc(*h.h, func(a, b heapInfo) int {
|
||||||
|
return strings.Compare(a.filename, b.filename)
|
||||||
|
})
|
||||||
|
if len(*h.h) > h.count {
|
||||||
|
*h.h = (*h.h)[:h.count]
|
||||||
|
}
|
||||||
|
h.sorted = true
|
||||||
|
}
|
||||||
|
if len(*h.h) != 0 {
|
||||||
|
info := (*h.h)[0]
|
||||||
|
*h.h = (*h.h)[1:]
|
||||||
|
return info, true
|
||||||
}
|
}
|
||||||
return heapInfo{}, false
|
return heapInfo{}, false
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue