forked from TrueCloudLab/hrw
Evgenii Stratonikov
266da7c69a
``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/hrw cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ 4 │ 5 │ │ sec/op │ sec/op vs base │ SortHashersByValue_Typed_fnv_10-8 309.2n ± 2% 294.4n ± 1% -4.75% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 2.306µ ± 1% 2.549µ ± 1% +10.54% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 21.73µ ± 1% 24.80µ ± 3% +14.14% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 347.1n ± 1% 334.8n ± 2% -3.56% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 2.668µ ± 1% 2.954µ ± 3% +10.72% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 2.673µ ± 1% 2.957µ ± 4% +10.63% (p=0.000 n=10) geomean 1.836µ 1.947µ +6.01% │ 4 │ 5 │ │ B/op │ B/op vs base │ SortHashersByValue_Typed_fnv_10-8 216.0 ± 0% 144.0 ± 0% -33.33% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 1032.0 ± 0% 960.0 ± 0% -6.98% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 8.133Ki ± 0% 8.062Ki ± 0% -0.86% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 216.0 ± 0% 144.0 ± 0% -33.33% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 1032.0 ± 0% 960.0 ± 0% -6.98% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 1032.0 ± 0% 960.0 ± 0% -6.98% (p=0.000 n=10) geomean 867.8 730.1 -15.87% │ 4 │ 5 │ │ allocs/op │ allocs/op vs base │ SortHashersByValue_Typed_fnv_10-8 4.000 ± 0% 2.000 ± 0% -50.00% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 4.000 ± 0% 2.000 ± 0% -50.00% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 4.000 ± 0% 2.000 ± 0% -50.00% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 4.000 ± 0% 2.000 ± 0% -50.00% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 4.000 ± 0% 2.000 ± 0% -50.00% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 4.000 ± 0% 2.000 ± 0% -50.00% (p=0.000 n=10) geomean 4.000 2.000 -50.00% ``` Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
356 lines
9.1 KiB
Go
356 lines
9.1 KiB
Go
// Package hrw implements Rendezvous hashing.
|
|
// http://en.wikipedia.org/wiki/Rendezvous_hashing.
|
|
package hrw
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"errors"
|
|
"math"
|
|
"reflect"
|
|
"sort"
|
|
|
|
"github.com/twmb/murmur3"
|
|
)
|
|
|
|
type (
|
|
// Hasher interface used by SortSliceByValue
|
|
Hasher interface{ Hash() uint64 }
|
|
|
|
sorter struct {
|
|
l int
|
|
less func(i, j int) bool
|
|
swap func(i, j int)
|
|
}
|
|
|
|
hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
|
|
slice []T
|
|
dist []N
|
|
asc bool
|
|
}
|
|
)
|
|
|
|
// Boundaries of valid normalized weights
|
|
const (
|
|
NormalizedMaxWeight = 1.0
|
|
NormalizedMinWeight = 0.0
|
|
)
|
|
|
|
func (s *sorter) Len() int { return s.l }
|
|
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
|
|
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
|
|
|
|
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
|
|
func (s *hasherSorter[T, N]) Less(i, j int) bool {
|
|
if s.asc {
|
|
return s.dist[i] < s.dist[j]
|
|
}
|
|
return s.dist[i] > s.dist[j]
|
|
}
|
|
func (s *hasherSorter[T, N]) Swap(i, j int) {
|
|
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
|
|
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
|
|
}
|
|
|
|
func distance(x uint64, y uint64) uint64 {
|
|
acc := x ^ y
|
|
// here used mmh3 64 bit finalizer
|
|
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
|
|
acc ^= acc >> 33
|
|
acc = acc * 0xff51afd7ed558ccd
|
|
acc ^= acc >> 33
|
|
acc = acc * 0xc4ceb9fe1a85ec53
|
|
acc ^= acc >> 33
|
|
return acc
|
|
}
|
|
|
|
// Hash uses murmur3 hash to return uint64
|
|
func Hash(key []byte) uint64 {
|
|
return murmur3.Sum64(key)
|
|
}
|
|
|
|
// Sort receive nodes and hash, and sort it by distance
|
|
func Sort(nodes []uint64, hash uint64) []uint64 {
|
|
l := len(nodes)
|
|
sorted := make([]uint64, l)
|
|
dist := make([]uint64, l)
|
|
for i := range nodes {
|
|
sorted[i] = uint64(i)
|
|
dist[i] = distance(nodes[i], hash)
|
|
}
|
|
|
|
sort.Slice(sorted, func(i, j int) bool {
|
|
return dist[sorted[i]] < dist[sorted[j]]
|
|
})
|
|
return sorted
|
|
}
|
|
|
|
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight
|
|
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
|
|
result := make([]uint64, len(nodes))
|
|
copy(nodes, result)
|
|
sortByWeight(len(nodes), false, nodes, weights, hash, reflect.Swapper(result))
|
|
return result
|
|
}
|
|
|
|
// SortSliceByValue received []T and hash to sort by value-distance
|
|
func SortSliceByValue(slice interface{}, hash uint64) {
|
|
rule := prepareRule(slice)
|
|
if rule != nil {
|
|
swap := reflect.Swapper(slice)
|
|
sortByDistance(len(rule), false, rule, hash, swap)
|
|
}
|
|
}
|
|
|
|
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
|
|
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
|
|
if len(slice) == 0 {
|
|
return
|
|
}
|
|
|
|
dist := make([]uint64, len(slice))
|
|
for i := range dist {
|
|
dist[i] = distance(slice[i].Hash(), hash)
|
|
}
|
|
sortHasherByDistance(slice, false, dist)
|
|
}
|
|
|
|
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
|
|
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
|
|
rule := prepareRule(slice)
|
|
if rule != nil {
|
|
swap := reflect.Swapper(slice)
|
|
sortByWeight(reflect.ValueOf(slice).Len(), false, rule, weights, hash, swap)
|
|
}
|
|
}
|
|
|
|
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
|
|
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
|
|
if len(slice) == 0 {
|
|
return
|
|
}
|
|
|
|
if allSameF64(weights) {
|
|
dist := make([]uint64, len(slice))
|
|
for i := range dist {
|
|
dist[i] = distance(slice[i].Hash(), hash)
|
|
}
|
|
sortHasherByDistance(slice, false, dist)
|
|
return
|
|
}
|
|
|
|
dist := make([]float64, len(slice))
|
|
for i := range dist {
|
|
d := distance(slice[i].Hash(), hash)
|
|
// `maxUint64 - distance` makes the shorter distance more valuable
|
|
// it is necessary for operation with normalized values
|
|
dist[i] = float64(^uint64(0)-d) * weights[i]
|
|
}
|
|
|
|
sort.Sort(&hasherSorter[T, float64]{
|
|
slice: slice,
|
|
dist: dist,
|
|
asc: false,
|
|
})
|
|
}
|
|
|
|
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
|
|
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
|
|
sort.Sort(&hasherSorter[T, uint64]{
|
|
slice: slice,
|
|
dist: dist,
|
|
asc: true,
|
|
})
|
|
}
|
|
|
|
// SortSliceByIndex received []T and hash to sort by index-distance
|
|
func SortSliceByIndex(slice interface{}, hash uint64) {
|
|
length := reflect.ValueOf(slice).Len()
|
|
swap := reflect.Swapper(slice)
|
|
sortByDistance(length, true, nil, hash, swap)
|
|
}
|
|
|
|
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
|
|
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
|
|
length := reflect.ValueOf(slice).Len()
|
|
swap := reflect.Swapper(slice)
|
|
sortByWeight(length, true, nil, weights, hash, swap)
|
|
}
|
|
|
|
func prepareRule(slice interface{}) []uint64 {
|
|
t := reflect.TypeOf(slice)
|
|
if t.Kind() != reflect.Slice {
|
|
panic("HRW sort expects slice, got " + t.Kind().String())
|
|
}
|
|
|
|
var (
|
|
val = reflect.ValueOf(slice)
|
|
length = val.Len()
|
|
rule = make([]uint64, 0, length)
|
|
)
|
|
|
|
if length == 0 {
|
|
return nil
|
|
}
|
|
|
|
switch slice := slice.(type) {
|
|
case []int:
|
|
var key = make([]byte, 16)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint64(key, uint64(slice[i]))
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []uint:
|
|
var key = make([]byte, 16)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint64(key, uint64(slice[i]))
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []int8:
|
|
for i := 0; i < length; i++ {
|
|
key := byte(slice[i])
|
|
rule = append(rule, Hash([]byte{key}))
|
|
}
|
|
case []uint8:
|
|
for i := 0; i < length; i++ {
|
|
key := slice[i]
|
|
rule = append(rule, Hash([]byte{key}))
|
|
}
|
|
case []int16:
|
|
var key = make([]byte, 8)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint16(key, uint16(slice[i]))
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []uint16:
|
|
var key = make([]byte, 8)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint16(key, slice[i])
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []int32:
|
|
var key = make([]byte, 16)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint32(key, uint32(slice[i]))
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []uint32:
|
|
var key = make([]byte, 16)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint32(key, slice[i])
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []int64:
|
|
var key = make([]byte, 32)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint64(key, uint64(slice[i]))
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []uint64:
|
|
var key = make([]byte, 32)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint64(key, slice[i])
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []string:
|
|
for i := 0; i < length; i++ {
|
|
rule = append(rule, Hash([]byte(slice[i])))
|
|
}
|
|
|
|
default:
|
|
if _, ok := val.Index(0).Interface().(Hasher); !ok {
|
|
panic("slice elements must implement hrw.Hasher")
|
|
}
|
|
|
|
for i := 0; i < length; i++ {
|
|
h := val.Index(i).Interface().(Hasher)
|
|
rule = append(rule, h.Hash())
|
|
}
|
|
}
|
|
return rule
|
|
}
|
|
|
|
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
|
|
func ValidateWeights(weights []float64) error {
|
|
for i := range weights {
|
|
if math.IsNaN(weights[i]) || weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
|
|
return errors.New("weights are not normalized")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// sortByWeight sorts nodes by weight using provided swapper.
|
|
// nodes contains hrw hashes. If it is nil, indices are used.
|
|
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
|
|
// if all nodes have the same distance then sort uniformly
|
|
if allSameF64(weights) {
|
|
sortByDistance(l, byIndex, nodes, hash, swap)
|
|
return
|
|
}
|
|
|
|
dist := make([]float64, l)
|
|
for i := 0; i < l; i++ {
|
|
d := getDistance(byIndex, i, nodes, hash)
|
|
// `maxUint64 - distance` makes the shorter distance more valuable
|
|
// it is necessary for operation with normalized values
|
|
dist[i] = float64(^uint64(0)-d) * weights[i]
|
|
}
|
|
|
|
s := &sorter{
|
|
l: l,
|
|
swap: func(i, j int) {
|
|
swap(i, j)
|
|
dist[i], dist[j] = dist[j], dist[i]
|
|
},
|
|
less: func(i, j int) bool {
|
|
return dist[i] > dist[j] // higher distance must be placed lower to be first
|
|
},
|
|
}
|
|
sort.Sort(s)
|
|
}
|
|
|
|
// sortByDistance sorts nodes by hrw distance using provided swapper.
|
|
// nodes contains hrw hashes. If it is nil, indices are used.
|
|
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
|
|
dist := make([]uint64, l)
|
|
for i := 0; i < l; i++ {
|
|
dist[i] = getDistance(byIndex, i, nodes, hash)
|
|
}
|
|
|
|
s := &sorter{
|
|
l: l,
|
|
swap: func(i, j int) {
|
|
swap(i, j)
|
|
dist[i], dist[j] = dist[j], dist[i]
|
|
},
|
|
less: func(i, j int) bool {
|
|
return dist[i] < dist[j]
|
|
},
|
|
}
|
|
sort.Sort(s)
|
|
}
|
|
|
|
// getDistance return distance from nodes[i] to h.
|
|
// If byIndex is true, nodes index is used.
|
|
// Else if nodes[i] != nil, distance is calculated from this value.
|
|
// Otherwise, and hash from node index is taken.
|
|
func getDistance(byIndex bool, i int, nodes []uint64, h uint64) uint64 {
|
|
if nodes != nil {
|
|
return distance(nodes[i], h)
|
|
} else if byIndex {
|
|
return distance(uint64(i), h)
|
|
} else {
|
|
buf := make([]byte, 8)
|
|
binary.LittleEndian.PutUint64(buf, uint64(i))
|
|
return distance(Hash(buf), h)
|
|
}
|
|
}
|
|
|
|
func allSameF64(fs []float64) bool {
|
|
for i := range fs {
|
|
if fs[i] != fs[0] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|