hrw/hrw.go

364 lines
9.5 KiB
Go
Raw Normal View History

2019-01-29 22:58:30 +00:00
// Package hrw implements Rendezvous hashing.
// http://en.wikipedia.org/wiki/Rendezvous_hashing.
package hrw
import (
"encoding/binary"
"errors"
"math"
2019-01-29 22:58:30 +00:00
"reflect"
"sort"
[#8] go.mod: Use faster murmur3 lib Specifically, this line became possible, because of noescape annotations for assembly. ``` ./hrw.go:307:14: make([]byte, 8) does not escape ``` ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/hrw cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ 1 │ 2 │ │ sec/op │ sec/op vs base │ SortHashersByValue_Typed_fnv_10-8 580.1n ± 1% 368.5n ± 2% -36.47% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 4.215µ ± 2% 2.411µ ± 4% -42.79% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 39.40µ ± 1% 22.19µ ± 2% -43.68% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 599.6n ± 2% 364.3n ± 2% -39.25% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 4.337µ ± 5% 2.541µ ± 3% -41.41% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 4.344µ ± 3% 2.483µ ± 1% -42.84% (p=0.000 n=10) geomean 4.400µ 1.888µ -41.13% ¹ ¹ benchmark set differs from baseline; geomeans may not be comparable │ 1 │ 2 │ │ B/op │ B/op vs base │ SortHashersByValue_Typed_fnv_10-8 472.0 ± 0% 312.0 ± 0% -33.90% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 3.461Ki ± 0% 1.898Ki ± 0% -45.15% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 31.77Ki ± 0% 16.15Ki ± 0% -49.18% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 472.0 ± 0% 312.0 ± 0% -33.90% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 3.461Ki ± 0% 1.898Ki ± 0% -45.15% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 3.461Ki ± 0% 1.898Ki ± 0% -45.15% (p=0.000 n=10) geomean 3.070Ki 1.474Ki -42.37% ¹ ¹ benchmark set differs from baseline; geomeans may not be comparable │ 1 │ 2 │ │ allocs/op │ allocs/op vs base │ SortHashersByValue_Typed_fnv_10-8 16.000 ± 0% 6.000 ± 0% -62.50% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 106.000 ± 0% 6.000 ± 0% -94.34% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 1006.000 ± 0% 6.000 ± 0% -99.40% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 16.000 ± 0% 6.000 ± 0% -62.50% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 106.000 ± 0% 6.000 ± 0% -94.34% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 106.000 ± 0% 6.000 ± 0% -94.34% (p=0.000 n=10) geomean 113.0 6.000 -92.69% ¹ ¹ benchmark set differs from baseline; geomeans may not be comparable ``` Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 16:41:22 +00:00
"github.com/twmb/murmur3"
2019-01-29 22:58:30 +00:00
)
type (
// Hasher interface used by SortSliceByValue
2019-01-29 22:58:30 +00:00
Hasher interface{ Hash() uint64 }
sorter struct {
l int
less func(i, j int) bool
swap func(i, j int)
}
2019-01-29 22:58:30 +00:00
)
// Boundaries of valid normalized weights
const (
NormalizedMaxWeight = 1.0
NormalizedMinWeight = 0.0
)
func (s *sorter) Len() int { return s.l }
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
func distance(x uint64, y uint64) uint64 {
2019-01-29 22:58:30 +00:00
acc := x ^ y
// here used mmh3 64 bit finalizer
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
2019-01-29 22:58:30 +00:00
acc ^= acc >> 33
acc = acc * 0xff51afd7ed558ccd
2019-01-29 22:58:30 +00:00
acc ^= acc >> 33
acc = acc * 0xc4ceb9fe1a85ec53
2019-01-29 22:58:30 +00:00
acc ^= acc >> 33
return acc
}
// Hash uses murmur3 hash to return uint64
func Hash(key []byte) uint64 {
return murmur3.Sum64(key)
}
// Sort receive nodes and hash, and sort it by distance
func Sort(nodes []uint64, hash uint64) []uint64 {
l := len(nodes)
sorted := make([]uint64, l)
dist := make([]uint64, l)
for i := range nodes {
sorted[i] = uint64(i)
dist[i] = distance(nodes[i], hash)
2019-01-29 22:58:30 +00:00
}
sort.Slice(sorted, func(i, j int) bool {
return dist[sorted[i]] < dist[sorted[j]]
})
return sorted
2019-01-29 22:58:30 +00:00
}
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
result := make([]uint64, len(nodes))
copy(nodes, result)
sortByWeight(len(nodes), false, nodes, weights, hash, reflect.Swapper(result))
return result
}
// SortSliceByValue received []T and hash to sort by value-distance
func SortSliceByValue(slice interface{}, hash uint64) {
rule := prepareRule(slice)
if rule != nil {
swap := reflect.Swapper(slice)
sortByDistance(len(rule), false, rule, hash, swap)
}
}
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
rule := prepareHasherRule(slice)
if rule != nil {
[#8] hrw: Inline swap() when slice is known ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/hrw cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ 2 │ 3 │ │ sec/op │ sec/op vs base │ SortHashersByValue_Typed_fnv_10-8 368.5n ± 2% 336.3n ± 3% -8.75% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 2.411µ ± 4% 2.424µ ± 3% ~ (p=0.853 n=10) SortHashersByValue_Typed_fnv_1000-8 22.19µ ± 2% 22.35µ ± 1% ~ (p=0.247 n=10) SortHashersByWeightValueTyped_fnv_10-8 364.3n ± 2% 346.6n ± 3% -4.86% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 2.541µ ± 3% 2.637µ ± 6% ~ (p=0.055 n=10) SortHashersByWeightValueTyped_fnv_1000-8 2.483µ ± 1% 2.609µ ± 4% +5.07% (p=0.003 n=10) geomean 1.888µ 1.875µ -0.71% │ 2 │ 3 │ │ B/op │ B/op vs base │ SortHashersByValue_Typed_fnv_10-8 312.0 ± 0% 296.0 ± 0% -5.13% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 16.15Ki ± 0% 16.13Ki ± 0% -0.10% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 312.0 ± 0% 296.0 ± 0% -5.13% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) geomean 1.474Ki 1.442Ki -2.16% │ 2 │ 3 │ │ allocs/op │ allocs/op vs base │ SortHashersByValue_Typed_fnv_10-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) geomean 6.000 5.000 -16.67% ``` Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 17:22:47 +00:00
sortHasherByDistance(slice, false, rule, hash)
}
}
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
rule := prepareRule(slice)
if rule != nil {
swap := reflect.Swapper(slice)
sortByWeight(reflect.ValueOf(slice).Len(), false, rule, weights, hash, swap)
}
}
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
rule := prepareHasherRule(slice)
if rule != nil {
[#8] hrw: Inline swap() when slice is known ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/hrw cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ 2 │ 3 │ │ sec/op │ sec/op vs base │ SortHashersByValue_Typed_fnv_10-8 368.5n ± 2% 336.3n ± 3% -8.75% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 2.411µ ± 4% 2.424µ ± 3% ~ (p=0.853 n=10) SortHashersByValue_Typed_fnv_1000-8 22.19µ ± 2% 22.35µ ± 1% ~ (p=0.247 n=10) SortHashersByWeightValueTyped_fnv_10-8 364.3n ± 2% 346.6n ± 3% -4.86% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 2.541µ ± 3% 2.637µ ± 6% ~ (p=0.055 n=10) SortHashersByWeightValueTyped_fnv_1000-8 2.483µ ± 1% 2.609µ ± 4% +5.07% (p=0.003 n=10) geomean 1.888µ 1.875µ -0.71% │ 2 │ 3 │ │ B/op │ B/op vs base │ SortHashersByValue_Typed_fnv_10-8 312.0 ± 0% 296.0 ± 0% -5.13% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 16.15Ki ± 0% 16.13Ki ± 0% -0.10% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 312.0 ± 0% 296.0 ± 0% -5.13% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) geomean 1.474Ki 1.442Ki -2.16% │ 2 │ 3 │ │ allocs/op │ allocs/op vs base │ SortHashersByValue_Typed_fnv_10-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) geomean 6.000 5.000 -16.67% ``` Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 17:22:47 +00:00
sortHasherByWeight(slice, false, rule, weights, hash)
}
}
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, nodes []uint64, hash uint64) {
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = getDistance(byIndex, i, nodes, hash)
}
s := &sorter{
l: len(slice),
swap: func(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] < dist[j]
},
}
sort.Sort(s)
}
// sortHasherByWeight is similar to sortByWeight but accepts slice directly.
func sortHasherByWeight[T Hasher](slice []T, byIndex bool, nodes []uint64, weights []float64, hash uint64) {
// if all nodes have the same distance then sort uniformly
if allSameF64(weights) {
sortHasherByDistance(slice, byIndex, nodes, hash)
return
}
dist := make([]float64, len(slice))
for i := range dist {
d := getDistance(byIndex, i, nodes, hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i]
}
s := &sorter{
l: len(slice),
swap: func(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
[#8] hrw: Inline swap() when slice is known ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/hrw cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ 2 │ 3 │ │ sec/op │ sec/op vs base │ SortHashersByValue_Typed_fnv_10-8 368.5n ± 2% 336.3n ± 3% -8.75% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 2.411µ ± 4% 2.424µ ± 3% ~ (p=0.853 n=10) SortHashersByValue_Typed_fnv_1000-8 22.19µ ± 2% 22.35µ ± 1% ~ (p=0.247 n=10) SortHashersByWeightValueTyped_fnv_10-8 364.3n ± 2% 346.6n ± 3% -4.86% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 2.541µ ± 3% 2.637µ ± 6% ~ (p=0.055 n=10) SortHashersByWeightValueTyped_fnv_1000-8 2.483µ ± 1% 2.609µ ± 4% +5.07% (p=0.003 n=10) geomean 1.888µ 1.875µ -0.71% │ 2 │ 3 │ │ B/op │ B/op vs base │ SortHashersByValue_Typed_fnv_10-8 312.0 ± 0% 296.0 ± 0% -5.13% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 16.15Ki ± 0% 16.13Ki ± 0% -0.10% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 312.0 ± 0% 296.0 ± 0% -5.13% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) geomean 1.474Ki 1.442Ki -2.16% │ 2 │ 3 │ │ allocs/op │ allocs/op vs base │ SortHashersByValue_Typed_fnv_10-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) geomean 6.000 5.000 -16.67% ``` Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 17:22:47 +00:00
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] > dist[j] // higher distance must be placed lower to be first
},
}
[#8] hrw: Inline swap() when slice is known ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/hrw cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ 2 │ 3 │ │ sec/op │ sec/op vs base │ SortHashersByValue_Typed_fnv_10-8 368.5n ± 2% 336.3n ± 3% -8.75% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 2.411µ ± 4% 2.424µ ± 3% ~ (p=0.853 n=10) SortHashersByValue_Typed_fnv_1000-8 22.19µ ± 2% 22.35µ ± 1% ~ (p=0.247 n=10) SortHashersByWeightValueTyped_fnv_10-8 364.3n ± 2% 346.6n ± 3% -4.86% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 2.541µ ± 3% 2.637µ ± 6% ~ (p=0.055 n=10) SortHashersByWeightValueTyped_fnv_1000-8 2.483µ ± 1% 2.609µ ± 4% +5.07% (p=0.003 n=10) geomean 1.888µ 1.875µ -0.71% │ 2 │ 3 │ │ B/op │ B/op vs base │ SortHashersByValue_Typed_fnv_10-8 312.0 ± 0% 296.0 ± 0% -5.13% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 16.15Ki ± 0% 16.13Ki ± 0% -0.10% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 312.0 ± 0% 296.0 ± 0% -5.13% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 1.898Ki ± 0% 1.883Ki ± 0% -0.82% (p=0.000 n=10) geomean 1.474Ki 1.442Ki -2.16% │ 2 │ 3 │ │ allocs/op │ allocs/op vs base │ SortHashersByValue_Typed_fnv_10-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 6.000 ± 0% 5.000 ± 0% -16.67% (p=0.000 n=10) geomean 6.000 5.000 -16.67% ``` Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 17:22:47 +00:00
sort.Sort(s)
}
// SortSliceByIndex received []T and hash to sort by index-distance
func SortSliceByIndex(slice interface{}, hash uint64) {
length := reflect.ValueOf(slice).Len()
swap := reflect.Swapper(slice)
sortByDistance(length, true, nil, hash, swap)
}
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
length := reflect.ValueOf(slice).Len()
swap := reflect.Swapper(slice)
sortByWeight(length, true, nil, weights, hash, swap)
}
func prepareRule(slice interface{}) []uint64 {
2019-01-29 22:58:30 +00:00
t := reflect.TypeOf(slice)
if t.Kind() != reflect.Slice {
panic("HRW sort expects slice, got " + t.Kind().String())
2019-01-29 22:58:30 +00:00
}
var (
val = reflect.ValueOf(slice)
length = val.Len()
rule = make([]uint64, 0, length)
)
if length == 0 {
return nil
2019-01-29 22:58:30 +00:00
}
2019-04-12 11:19:18 +00:00
switch slice := slice.(type) {
case []int:
var key = make([]byte, 16)
2019-01-29 22:58:30 +00:00
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, uint64(slice[i]))
rule = append(rule, Hash(key))
2019-01-29 22:58:30 +00:00
}
2019-04-12 11:19:18 +00:00
case []uint:
var key = make([]byte, 16)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, uint64(slice[i]))
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []int8:
for i := 0; i < length; i++ {
key := byte(slice[i])
rule = append(rule, Hash([]byte{key}))
}
2019-04-12 11:19:18 +00:00
case []uint8:
for i := 0; i < length; i++ {
key := slice[i]
rule = append(rule, Hash([]byte{key}))
}
2019-04-12 11:19:18 +00:00
case []int16:
var key = make([]byte, 8)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint16(key, uint16(slice[i]))
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []uint16:
var key = make([]byte, 8)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint16(key, slice[i])
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []int32:
2019-02-01 09:57:05 +00:00
var key = make([]byte, 16)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint32(key, uint32(slice[i]))
rule = append(rule, Hash(key))
2019-02-01 09:57:05 +00:00
}
2019-04-12 11:19:18 +00:00
case []uint32:
var key = make([]byte, 16)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint32(key, slice[i])
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []int64:
var key = make([]byte, 32)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, uint64(slice[i]))
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []uint64:
var key = make([]byte, 32)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, slice[i])
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []string:
2019-01-29 22:58:30 +00:00
for i := 0; i < length; i++ {
rule = append(rule, Hash([]byte(slice[i])))
2019-01-29 22:58:30 +00:00
}
2019-04-12 11:19:18 +00:00
default:
if _, ok := val.Index(0).Interface().(Hasher); !ok {
panic("slice elements must implement hrw.Hasher")
2019-04-12 11:19:18 +00:00
}
2019-01-29 22:58:30 +00:00
for i := 0; i < length; i++ {
h := val.Index(i).Interface().(Hasher)
rule = append(rule, h.Hash())
2019-01-29 22:58:30 +00:00
}
}
return rule
}
func prepareHasherRule[T Hasher](hashers []T) []uint64 {
length := len(hashers)
if length == 0 {
return nil
}
result := make([]uint64, length)
for i := 0; i < length; i++ {
result[i] = hashers[i].Hash()
}
return result
}
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
func ValidateWeights(weights []float64) error {
for i := range weights {
if math.IsNaN(weights[i]) || weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
return errors.New("weights are not normalized")
}
}
return nil
}
// sortByWeight sorts nodes by weight using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
// if all nodes have the same distance then sort uniformly
if allSameF64(weights) {
sortByDistance(l, byIndex, nodes, hash, swap)
return
}
[#8] hrw: Do not create index slice for sorter `ind` is only needed to index dist or weights, swap them directly. ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/hrw cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ 0 │ 1 │ │ sec/op │ sec/op vs base │ SortHashersByValue_Typed_fnv_10-8 596.2n ± 4% 580.1n ± 1% -2.72% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 4.453µ ± 2% 4.215µ ± 2% -5.35% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 41.58µ ± 4% 39.40µ ± 1% -5.23% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 624.5n ± 2% 599.6n ± 2% -3.99% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 4.593µ ± 2% 4.337µ ± 5% -5.56% (p=0.003 n=10) SortHashersByWeightValueTyped_fnv_1000-8 4.896µ ± 8% 4.344µ ± 3% -11.27% (p=0.000 n=10) geomean 4.668µ 4.400µ -5.75% │ 0 │ 1 │ │ B/op │ B/op vs base │ SortHashersByValue_Typed_fnv_10-8 584.0 ± 0% 472.0 ± 0% -19.18% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 4.367Ki ± 0% 3.461Ki ± 0% -20.75% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 39.80Ki ± 0% 31.77Ki ± 0% -20.18% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 600.0 ± 0% 472.0 ± 0% -21.33% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 4.383Ki ± 0% 3.461Ki ± 0% -21.03% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 4.383Ki ± 0% 3.461Ki ± 0% -21.03% (p=0.000 n=10) geomean 3.742Ki 3.070Ki -17.96% │ 0 │ 1 │ │ allocs/op │ allocs/op vs base │ SortHashersByValue_Typed_fnv_10-8 17.00 ± 0% 16.00 ± 0% -5.88% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 107.0 ± 0% 106.0 ± 0% -0.93% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 1.007k ± 0% 1.006k ± 0% -0.10% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 17.00 ± 0% 16.00 ± 0% -5.88% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 107.0 ± 0% 106.0 ± 0% -0.93% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 107.0 ± 0% 106.0 ± 0% -0.93% (p=0.000 n=10) geomean 115.3 113.0 -1.94% ``` Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 16:34:37 +00:00
dist := make([]float64, l)
for i := 0; i < l; i++ {
d := getDistance(byIndex, i, nodes, hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
[#8] hrw: Do not create index slice for sorter `ind` is only needed to index dist or weights, swap them directly. ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/hrw cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ 0 │ 1 │ │ sec/op │ sec/op vs base │ SortHashersByValue_Typed_fnv_10-8 596.2n ± 4% 580.1n ± 1% -2.72% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 4.453µ ± 2% 4.215µ ± 2% -5.35% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 41.58µ ± 4% 39.40µ ± 1% -5.23% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 624.5n ± 2% 599.6n ± 2% -3.99% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 4.593µ ± 2% 4.337µ ± 5% -5.56% (p=0.003 n=10) SortHashersByWeightValueTyped_fnv_1000-8 4.896µ ± 8% 4.344µ ± 3% -11.27% (p=0.000 n=10) geomean 4.668µ 4.400µ -5.75% │ 0 │ 1 │ │ B/op │ B/op vs base │ SortHashersByValue_Typed_fnv_10-8 584.0 ± 0% 472.0 ± 0% -19.18% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 4.367Ki ± 0% 3.461Ki ± 0% -20.75% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 39.80Ki ± 0% 31.77Ki ± 0% -20.18% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 600.0 ± 0% 472.0 ± 0% -21.33% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 4.383Ki ± 0% 3.461Ki ± 0% -21.03% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 4.383Ki ± 0% 3.461Ki ± 0% -21.03% (p=0.000 n=10) geomean 3.742Ki 3.070Ki -17.96% │ 0 │ 1 │ │ allocs/op │ allocs/op vs base │ SortHashersByValue_Typed_fnv_10-8 17.00 ± 0% 16.00 ± 0% -5.88% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 107.0 ± 0% 106.0 ± 0% -0.93% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 1.007k ± 0% 1.006k ± 0% -0.10% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 17.00 ± 0% 16.00 ± 0% -5.88% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 107.0 ± 0% 106.0 ± 0% -0.93% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 107.0 ± 0% 106.0 ± 0% -0.93% (p=0.000 n=10) geomean 115.3 113.0 -1.94% ``` Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 16:34:37 +00:00
dist[i] = float64(^uint64(0)-d) * weights[i]
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] > dist[j] // higher distance must be placed lower to be first
},
}
sort.Sort(s)
}
// sortByDistance sorts nodes by hrw distance using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
[#8] hrw: Do not create index slice for sorter `ind` is only needed to index dist or weights, swap them directly. ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/hrw cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ 0 │ 1 │ │ sec/op │ sec/op vs base │ SortHashersByValue_Typed_fnv_10-8 596.2n ± 4% 580.1n ± 1% -2.72% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 4.453µ ± 2% 4.215µ ± 2% -5.35% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 41.58µ ± 4% 39.40µ ± 1% -5.23% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 624.5n ± 2% 599.6n ± 2% -3.99% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 4.593µ ± 2% 4.337µ ± 5% -5.56% (p=0.003 n=10) SortHashersByWeightValueTyped_fnv_1000-8 4.896µ ± 8% 4.344µ ± 3% -11.27% (p=0.000 n=10) geomean 4.668µ 4.400µ -5.75% │ 0 │ 1 │ │ B/op │ B/op vs base │ SortHashersByValue_Typed_fnv_10-8 584.0 ± 0% 472.0 ± 0% -19.18% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 4.367Ki ± 0% 3.461Ki ± 0% -20.75% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 39.80Ki ± 0% 31.77Ki ± 0% -20.18% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 600.0 ± 0% 472.0 ± 0% -21.33% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 4.383Ki ± 0% 3.461Ki ± 0% -21.03% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 4.383Ki ± 0% 3.461Ki ± 0% -21.03% (p=0.000 n=10) geomean 3.742Ki 3.070Ki -17.96% │ 0 │ 1 │ │ allocs/op │ allocs/op vs base │ SortHashersByValue_Typed_fnv_10-8 17.00 ± 0% 16.00 ± 0% -5.88% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 107.0 ± 0% 106.0 ± 0% -0.93% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 1.007k ± 0% 1.006k ± 0% -0.10% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 17.00 ± 0% 16.00 ± 0% -5.88% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 107.0 ± 0% 106.0 ± 0% -0.93% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 107.0 ± 0% 106.0 ± 0% -0.93% (p=0.000 n=10) geomean 115.3 113.0 -1.94% ``` Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 16:34:37 +00:00
dist := make([]uint64, l)
for i := 0; i < l; i++ {
dist[i] = getDistance(byIndex, i, nodes, hash)
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] < dist[j]
},
}
sort.Sort(s)
}
// getDistance return distance from nodes[i] to h.
// If byIndex is true, nodes index is used.
// Else if nodes[i] != nil, distance is calculated from this value.
// Otherwise, and hash from node index is taken.
func getDistance(byIndex bool, i int, nodes []uint64, h uint64) uint64 {
if nodes != nil {
return distance(nodes[i], h)
} else if byIndex {
return distance(uint64(i), h)
} else {
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, uint64(i))
return distance(Hash(buf), h)
}
}
func allSameF64(fs []float64) bool {
for i := range fs {
if fs[i] != fs[0] {
return false
}
}
return true
}