package pilorama import ( "container/heap" "slices" "strings" ) type heapInfo struct { id MultiNode filename string } type filenameHeap []heapInfo func (h filenameHeap) Len() int { return len(h) } func (h filenameHeap) Less(i, j int) bool { return h[i].filename < h[j].filename } func (h filenameHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } func (h *filenameHeap) Push(x any) { *h = append(*h, x.(heapInfo)) } func (h *filenameHeap) Pop() any { old := *h n := len(old) x := old[n-1] *h = old[0 : n-1] return x } // fixedHeap maintains a fixed number of smallest elements started at some point. type fixedHeap struct { start *Cursor sorted bool count int h *filenameHeap } func newHeap(start *Cursor, count int) *fixedHeap { h := new(filenameHeap) heap.Init(h) return &fixedHeap{ start: start, count: count, h: h, } } const amortizationMultiplier = 5 func (h *fixedHeap) push(id MultiNode, filename string) bool { if h.start != nil { if filename < h.start.GetFilename() { return false } else if filename == h.start.GetFilename() { // A tree may have a lot of nodes with the same filename but different versions so that // len(nodes) > batch_size. The cut nodes should be pushed into the result on repeated call // with the same filename. pos := slices.Index(id, h.start.GetNode()) if pos == -1 || pos+1 >= len(id) { return false } id = id[pos+1:] } } *h.h = append(*h.h, heapInfo{id: id, filename: filename}) h.sorted = false if h.h.Len() > h.count*amortizationMultiplier { slices.SortFunc(*h.h, func(a, b heapInfo) int { return strings.Compare(a.filename, b.filename) }) *h.h = (*h.h)[:h.count] } return true } func (h *fixedHeap) pop() (heapInfo, bool) { if !h.sorted { slices.SortFunc(*h.h, func(a, b heapInfo) int { return strings.Compare(a.filename, b.filename) }) if len(*h.h) > h.count { *h.h = (*h.h)[:h.count] } h.sorted = true } if len(*h.h) != 0 { info := (*h.h)[0] *h.h = (*h.h)[1:] return info, true } return heapInfo{}, false }