Simplify SortByValue/Weight a bit

Get rid of unneeded types.
This commit is contained in:
Evgenii Stratonikov 2020-09-11 13:34:16 +03:00 committed by Alex Vanin
parent f52ea8fb21
commit dddcfc8fc5
2 changed files with 101 additions and 149 deletions

220
hrw.go
View file

@ -13,20 +13,13 @@ import (
) )
type ( type (
swapper func(i, j int)
// Hasher interface used by SortSliceByValue // Hasher interface used by SortSliceByValue
Hasher interface{ Hash() uint64 } Hasher interface{ Hash() uint64 }
hashed struct { sorter struct {
length int l int
sorted []uint64 less func(i, j int) bool
distance []uint64 swap func(i, j int)
}
weighted struct {
h hashed
normal []float64 // normalized input weights
} }
) )
@ -36,6 +29,10 @@ const (
NormalizedMinWeight = 0.0 NormalizedMinWeight = 0.0
) )
func (s *sorter) Len() int { return s.l }
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
func distance(x uint64, y uint64) uint64 { func distance(x uint64, y uint64) uint64 {
acc := x ^ y acc := x ^ y
// here used mmh3 64 bit finalizer // here used mmh3 64 bit finalizer
@ -48,23 +45,6 @@ func distance(x uint64, y uint64) uint64 {
return acc return acc
} }
func (h hashed) Len() int { return h.length }
func (h hashed) Less(i, j int) bool { return h.distance[i] < h.distance[j] }
func (h hashed) Swap(i, j int) {
h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i]
h.distance[i], h.distance[j] = h.distance[j], h.distance[i]
}
func (w weighted) Len() int { return w.h.length }
func (w weighted) Less(i, j int) bool {
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
wi := float64(^uint64(0)-w.h.distance[i]) * w.normal[i]
wj := float64(^uint64(0)-w.h.distance[j]) * w.normal[j]
return wi > wj // higher distance must be placed lower to be first
}
func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) }
// Hash uses murmur3 hash to return uint64 // Hash uses murmur3 hash to return uint64
func Hash(key []byte) uint64 { func Hash(key []byte) uint64 {
return murmur3.Sum64(key) return murmur3.Sum64(key)
@ -72,59 +52,26 @@ func Hash(key []byte) uint64 {
// Sort receive nodes and hash, and sort it by distance // Sort receive nodes and hash, and sort it by distance
func Sort(nodes []uint64, hash uint64) []uint64 { func Sort(nodes []uint64, hash uint64) []uint64 {
var ( l := len(nodes)
l = len(nodes) sorted := make([]uint64, l)
h = hashed{ dist := make([]uint64, l)
length: l,
sorted: make([]uint64, 0, l),
distance: make([]uint64, 0, l),
}
)
for i := range nodes { for i := range nodes {
h.sorted = append(h.sorted, uint64(i)) sorted[i] = uint64(i)
h.distance = append(h.distance, distance(nodes[i], hash)) dist[i] = distance(nodes[i], hash)
} }
sort.Sort(h) sort.Slice(sorted, func(i, j int) bool {
return h.sorted return dist[sorted[i]] < dist[sorted[j]]
})
return sorted
} }
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight // SortByWeight receive nodes, weights and hash, and sort it by distance * weight
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 { func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
// check if numbers of weights and nodes are equal result := make([]uint64, len(nodes))
uniform := true copy(nodes, result)
for i := range weights { sortByWeight(len(nodes), false, nodes, weights, hash, reflect.Swapper(result))
// check if all nodes have the same distance return result
if weights[i] != weights[0] {
uniform = false
break
}
}
l := len(nodes)
w := weighted{
h: hashed{
length: l,
sorted: make([]uint64, 0, l),
distance: make([]uint64, 0, l),
},
normal: make([]float64, l),
}
// if all nodes have the same distance then sort uniformly
if uniform || len(weights) != l {
return Sort(nodes, hash)
}
for i := range nodes {
w.h.sorted = append(w.h.sorted, uint64(i))
w.h.distance = append(w.h.distance, distance(nodes[i], hash))
}
copy(w.normal, weights)
sort.Sort(w)
return w.h.sorted
} }
// SortSliceByValue received []T and hash to sort by value-distance // SortSliceByValue received []T and hash to sort by value-distance
@ -132,8 +79,7 @@ func SortSliceByValue(slice interface{}, hash uint64) {
rule := prepareRule(slice) rule := prepareRule(slice)
if rule != nil { if rule != nil {
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
rule = Sort(rule, hash) sortByDistance(len(rule), false, rule, hash, swap)
sortByRuleInverse(swap, uint64(len(rule)), rule)
} }
} }
@ -142,60 +88,22 @@ func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
rule := prepareRule(slice) rule := prepareRule(slice)
if rule != nil { if rule != nil {
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
rule = SortByWeight(rule, weights, hash) sortByWeight(reflect.ValueOf(slice).Len(), false, rule, weights, hash, swap)
sortByRuleInverse(swap, uint64(len(rule)), rule)
} }
} }
// SortSliceByIndex received []T and hash to sort by index-distance // SortSliceByIndex received []T and hash to sort by index-distance
func SortSliceByIndex(slice interface{}, hash uint64) { func SortSliceByIndex(slice interface{}, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len()) length := reflect.ValueOf(slice).Len()
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length) sortByDistance(length, true, nil, hash, swap)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = Sort(rule, hash)
sortByRuleInverse(swap, length, rule)
} }
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights // SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) { func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len()) length := reflect.ValueOf(slice).Len()
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length) sortByWeight(length, true, nil, weights, hash, swap)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = SortByWeight(rule, weights, hash)
sortByRuleInverse(swap, length, rule)
}
func sortByRuleDirect(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}
for j := rule[i]; !done[rule[j]]; j = rule[j] {
swap(int(i), int(j))
done[j] = true
}
}
}
func sortByRuleInverse(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}
for j := i; !done[rule[j]]; j = rule[j] {
swap(int(j), int(rule[j]))
done[j] = true
}
}
} }
func prepareRule(slice interface{}) []uint64 { func prepareRule(slice interface{}) []uint64 {
@ -300,3 +208,77 @@ func ValidateWeights(weights []float64) error {
} }
return nil return nil
} }
func newSorter(l int, byIndex bool, nodes []uint64, h uint64,
swap func(i, j int)) (*sorter, []int, []uint64) {
ind := make([]int, l)
dist := make([]uint64, l)
for i := 0; i < l; i++ {
ind[i] = i
dist[i] = getDistance(byIndex, i, nodes, h)
}
return &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
ind[i], ind[j] = ind[j], ind[i]
},
}, ind, dist
}
// sortByWeight sorts nodes by weight using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
// if all nodes have the same distance then sort uniformly
if allSameF64(weights) {
sortByDistance(l, byIndex, nodes, hash, swap)
return
}
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
s.less = func(i, j int) bool {
ii, jj := ind[i], ind[j]
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
wi := float64(^uint64(0)-dist[ii]) * weights[ii]
wj := float64(^uint64(0)-dist[jj]) * weights[jj]
return wi > wj // higher distance must be placed lower to be first
}
sort.Sort(s)
}
// sortByDistance sorts nodes by hrw distance using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
s.less = func(i, j int) bool {
return dist[ind[i]] < dist[ind[j]]
}
sort.Sort(s)
}
// getDistance return distance from nodes[i] to h.
// If byIndex is true, nodes index is used.
// Else if nodes[i] != nil, distance is calculated from this value.
// Otherwise, and hash from node index is taken.
func getDistance(byIndex bool, i int, nodes []uint64, h uint64) uint64 {
if nodes != nil {
return distance(nodes[i], h)
} else if byIndex {
return distance(uint64(i), h)
} else {
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, uint64(i))
return distance(Hash(buf), h)
}
}
func allSameF64(fs []float64) bool {
for i := range fs {
if fs[i] != fs[0] {
return false
}
}
return true
}

View file

@ -101,36 +101,6 @@ func TestSortSliceByValue(t *testing.T) {
require.Equal(t, expect, actual) require.Equal(t, expect, actual)
} }
func TestSortByRule(t *testing.T) {
t.Run("direct", func(t *testing.T) {
// 0 1 2 3 4 5
actual := []string{"a", "b", "c", "d", "e", "f"}
// 4 2 0 5 3 1
expect := []string{"c", "f", "b", "e", "a", "d"}
rule := []uint64{4, 2, 0, 5, 3, 1}
sortByRuleDirect(
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
6, rule)
require.Equal(t, expect, actual)
})
t.Run("inverse", func(t *testing.T) {
// 0 1 2 3 4 5
actual := []string{"a", "b", "c", "d", "e", "f"}
// 4 2 0 5 3 1
expect := []string{"e", "c", "a", "f", "d", "b"}
rule := []uint64{4, 2, 0, 5, 3, 1}
sortByRuleInverse(
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
6, rule)
require.Equal(t, expect, actual)
})
}
func TestSortSliceByValueFail(t *testing.T) { func TestSortSliceByValueFail(t *testing.T) {
t.Run("empty slice", func(t *testing.T) { t.Run("empty slice", func(t *testing.T) {
var ( var (