forked from TrueCloudLab/hrw
Compare commits
10 commits
Author | SHA1 | Date | |
---|---|---|---|
3a8489bfe7 | |||
1b7ec474c9 | |||
78c3f718b1 | |||
16a7740ccd | |||
2ac89c82b6 | |||
266da7c69a | |||
c52f74d8e1 | |||
895ecf150f | |||
213c105ac1 | |||
c175ef4099 |
6 changed files with 104 additions and 65 deletions
3
LICENSE
3
LICENSE
|
@ -1,6 +1,7 @@
|
||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2019 NSPCC
|
Copyright (c) 2023-2024 TrueCloudLab
|
||||||
|
Copyright (c) 2019-2023 NSPCC
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|
|
@ -1,15 +1,10 @@
|
||||||
# Golang HRW implementation
|
# Golang HRW implementation
|
||||||
|
|
||||||
[![Build Status](https://travis-ci.org/nspcc-dev/hrw.svg?branch=master)](https://travis-ci.org/nspcc-dev/hrw)
|
|
||||||
[![codecov](https://codecov.io/gh/nspcc-dev/hrw/badge.svg)](https://codecov.io/gh/nspcc-dev/hrw)
|
|
||||||
[![Report](https://goreportcard.com/badge/github.com/nspcc-dev/hrw)](https://goreportcard.com/report/github.com/nspcc-dev/hrw)
|
|
||||||
[![GitHub release](https://img.shields.io/github/release/nspcc-dev/hrw.svg)](https://github.com/nspcc-dev/hrw)
|
|
||||||
|
|
||||||
[Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method.
|
[Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method.
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
|
|
||||||
`go get github.com/nspcc-dev/hrw`
|
`go get git.frostfs.info/TrueCloudLab/hrw`
|
||||||
|
|
||||||
## Benchmark:
|
## Benchmark:
|
||||||
|
|
||||||
|
|
2
go.mod
2
go.mod
|
@ -3,8 +3,8 @@ module git.frostfs.info/TrueCloudLab/hrw
|
||||||
go 1.18
|
go 1.18
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/spaolacci/murmur3 v1.1.0
|
|
||||||
github.com/stretchr/testify v1.3.0
|
github.com/stretchr/testify v1.3.0
|
||||||
|
github.com/twmb/murmur3 v1.1.8
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
|
4
go.sum
4
go.sum
|
@ -2,8 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
|
|
||||||
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
|
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
|
||||||
|
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=
|
||||||
|
|
143
hrw.go
143
hrw.go
|
@ -9,7 +9,7 @@ import (
|
||||||
"reflect"
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
|
|
||||||
"github.com/spaolacci/murmur3"
|
"github.com/twmb/murmur3"
|
||||||
)
|
)
|
||||||
|
|
||||||
type (
|
type (
|
||||||
|
@ -21,6 +21,12 @@ type (
|
||||||
less func(i, j int) bool
|
less func(i, j int) bool
|
||||||
swap func(i, j int)
|
swap func(i, j int)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
|
||||||
|
slice []T
|
||||||
|
dist []N
|
||||||
|
asc bool
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
// Boundaries of valid normalized weights
|
// Boundaries of valid normalized weights
|
||||||
|
@ -33,6 +39,18 @@ func (s *sorter) Len() int { return s.l }
|
||||||
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
|
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
|
||||||
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
|
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
|
||||||
|
|
||||||
|
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
|
||||||
|
func (s *hasherSorter[T, N]) Less(i, j int) bool {
|
||||||
|
if s.asc {
|
||||||
|
return s.dist[i] < s.dist[j]
|
||||||
|
}
|
||||||
|
return s.dist[i] > s.dist[j]
|
||||||
|
}
|
||||||
|
func (s *hasherSorter[T, N]) Swap(i, j int) {
|
||||||
|
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
|
||||||
|
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
|
||||||
|
}
|
||||||
|
|
||||||
func distance(x uint64, y uint64) uint64 {
|
func distance(x uint64, y uint64) uint64 {
|
||||||
acc := x ^ y
|
acc := x ^ y
|
||||||
// here used mmh3 64 bit finalizer
|
// here used mmh3 64 bit finalizer
|
||||||
|
@ -50,6 +68,11 @@ func Hash(key []byte) uint64 {
|
||||||
return murmur3.Sum64(key)
|
return murmur3.Sum64(key)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StringHash uses murmur3 hash to return uint64
|
||||||
|
func StringHash(key string) uint64 {
|
||||||
|
return murmur3.StringSum64(key)
|
||||||
|
}
|
||||||
|
|
||||||
// Sort receive nodes and hash, and sort it by distance
|
// Sort receive nodes and hash, and sort it by distance
|
||||||
func Sort(nodes []uint64, hash uint64) []uint64 {
|
func Sort(nodes []uint64, hash uint64) []uint64 {
|
||||||
l := len(nodes)
|
l := len(nodes)
|
||||||
|
@ -85,13 +108,15 @@ func SortSliceByValue(slice interface{}, hash uint64) {
|
||||||
|
|
||||||
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
|
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
|
||||||
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
|
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
|
||||||
rule := prepareHasherRule(slice)
|
if len(slice) == 0 {
|
||||||
if rule != nil {
|
return
|
||||||
swap := func(i, j int) {
|
|
||||||
slice[i], slice[j] = slice[j], slice[i]
|
|
||||||
}
|
}
|
||||||
sortByDistance(len(rule), false, rule, hash, swap)
|
|
||||||
|
dist := make([]uint64, len(slice))
|
||||||
|
for i := range dist {
|
||||||
|
dist[i] = distance(slice[i].Hash(), hash)
|
||||||
}
|
}
|
||||||
|
sortHasherByDistance(slice, false, dist)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
|
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
|
||||||
|
@ -105,13 +130,41 @@ func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
|
||||||
|
|
||||||
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
|
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
|
||||||
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
|
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
|
||||||
rule := prepareHasherRule(slice)
|
if len(slice) == 0 {
|
||||||
if rule != nil {
|
return
|
||||||
swap := func(i, j int) {
|
|
||||||
slice[i], slice[j] = slice[j], slice[i]
|
|
||||||
}
|
}
|
||||||
sortByWeight(len(slice), false, rule, weights, hash, swap)
|
|
||||||
|
if allSameF64(weights) {
|
||||||
|
dist := make([]uint64, len(slice))
|
||||||
|
for i := range dist {
|
||||||
|
dist[i] = distance(slice[i].Hash(), hash)
|
||||||
}
|
}
|
||||||
|
sortHasherByDistance(slice, false, dist)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
dist := make([]float64, len(slice))
|
||||||
|
for i := range dist {
|
||||||
|
d := distance(slice[i].Hash(), hash)
|
||||||
|
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||||
|
// it is necessary for operation with normalized values
|
||||||
|
dist[i] = float64(^uint64(0)-d) * weights[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Sort(&hasherSorter[T, float64]{
|
||||||
|
slice: slice,
|
||||||
|
dist: dist,
|
||||||
|
asc: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
|
||||||
|
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
|
||||||
|
sort.Sort(&hasherSorter[T, uint64]{
|
||||||
|
slice: slice,
|
||||||
|
dist: dist,
|
||||||
|
asc: true,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// SortSliceByIndex received []T and hash to sort by index-distance
|
// SortSliceByIndex received []T and hash to sort by index-distance
|
||||||
|
@ -221,18 +274,6 @@ func prepareRule(slice interface{}) []uint64 {
|
||||||
return rule
|
return rule
|
||||||
}
|
}
|
||||||
|
|
||||||
func prepareHasherRule[T Hasher](hashers []T) []uint64 {
|
|
||||||
length := len(hashers)
|
|
||||||
if length == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
result := make([]uint64, length)
|
|
||||||
for i := 0; i < length; i++ {
|
|
||||||
result[i] = hashers[i].Hash()
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
|
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
|
||||||
func ValidateWeights(weights []float64) error {
|
func ValidateWeights(weights []float64) error {
|
||||||
for i := range weights {
|
for i := range weights {
|
||||||
|
@ -243,24 +284,6 @@ func ValidateWeights(weights []float64) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func newSorter(l int, byIndex bool, nodes []uint64, h uint64,
|
|
||||||
swap func(i, j int)) (*sorter, []int, []uint64) {
|
|
||||||
ind := make([]int, l)
|
|
||||||
dist := make([]uint64, l)
|
|
||||||
for i := 0; i < l; i++ {
|
|
||||||
ind[i] = i
|
|
||||||
dist[i] = getDistance(byIndex, i, nodes, h)
|
|
||||||
}
|
|
||||||
|
|
||||||
return &sorter{
|
|
||||||
l: l,
|
|
||||||
swap: func(i, j int) {
|
|
||||||
swap(i, j)
|
|
||||||
ind[i], ind[j] = ind[j], ind[i]
|
|
||||||
},
|
|
||||||
}, ind, dist
|
|
||||||
}
|
|
||||||
|
|
||||||
// sortByWeight sorts nodes by weight using provided swapper.
|
// sortByWeight sorts nodes by weight using provided swapper.
|
||||||
// nodes contains hrw hashes. If it is nil, indices are used.
|
// nodes contains hrw hashes. If it is nil, indices are used.
|
||||||
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
|
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
|
||||||
|
@ -270,14 +293,23 @@ func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash u
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
|
dist := make([]float64, l)
|
||||||
s.less = func(i, j int) bool {
|
for i := 0; i < l; i++ {
|
||||||
ii, jj := ind[i], ind[j]
|
d := getDistance(byIndex, i, nodes, hash)
|
||||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||||
// it is necessary for operation with normalized values
|
// it is necessary for operation with normalized values
|
||||||
wi := float64(^uint64(0)-dist[ii]) * weights[ii]
|
dist[i] = float64(^uint64(0)-d) * weights[i]
|
||||||
wj := float64(^uint64(0)-dist[jj]) * weights[jj]
|
}
|
||||||
return wi > wj // higher distance must be placed lower to be first
|
|
||||||
|
s := &sorter{
|
||||||
|
l: l,
|
||||||
|
swap: func(i, j int) {
|
||||||
|
swap(i, j)
|
||||||
|
dist[i], dist[j] = dist[j], dist[i]
|
||||||
|
},
|
||||||
|
less: func(i, j int) bool {
|
||||||
|
return dist[i] > dist[j] // higher distance must be placed lower to be first
|
||||||
|
},
|
||||||
}
|
}
|
||||||
sort.Sort(s)
|
sort.Sort(s)
|
||||||
}
|
}
|
||||||
|
@ -285,9 +317,20 @@ func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash u
|
||||||
// sortByDistance sorts nodes by hrw distance using provided swapper.
|
// sortByDistance sorts nodes by hrw distance using provided swapper.
|
||||||
// nodes contains hrw hashes. If it is nil, indices are used.
|
// nodes contains hrw hashes. If it is nil, indices are used.
|
||||||
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
|
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
|
||||||
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
|
dist := make([]uint64, l)
|
||||||
s.less = func(i, j int) bool {
|
for i := 0; i < l; i++ {
|
||||||
return dist[ind[i]] < dist[ind[j]]
|
dist[i] = getDistance(byIndex, i, nodes, hash)
|
||||||
|
}
|
||||||
|
|
||||||
|
s := &sorter{
|
||||||
|
l: l,
|
||||||
|
swap: func(i, j int) {
|
||||||
|
swap(i, j)
|
||||||
|
dist[i], dist[j] = dist[j], dist[i]
|
||||||
|
},
|
||||||
|
less: func(i, j int) bool {
|
||||||
|
return dist[i] < dist[j]
|
||||||
|
},
|
||||||
}
|
}
|
||||||
sort.Sort(s)
|
sort.Sort(s)
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ func Example() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h hashString) Hash() uint64 {
|
func (h hashString) Hash() uint64 {
|
||||||
return Hash([]byte(h))
|
return StringHash(string(h))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSortSliceByIndex(t *testing.T) {
|
func TestSortSliceByIndex(t *testing.T) {
|
||||||
|
@ -737,7 +737,7 @@ func BenchmarkSortHashersByWeightValueReflection_fnv_100(b *testing.B) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
|
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
|
||||||
benchmarkSortHashersByWeightValueRelection(b, 100, Hash(testKey))
|
benchmarkSortHashersByWeightValueRelection(b, 1000, Hash(testKey))
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
|
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
|
||||||
|
@ -749,7 +749,7 @@ func BenchmarkSortHashersByWeightValueTyped_fnv_100(b *testing.B) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
|
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
|
||||||
benchmarkSortHashersByWeightValueTyped(b, 100, Hash(testKey))
|
benchmarkSortHashersByWeightValueTyped(b, 1000, Hash(testKey))
|
||||||
}
|
}
|
||||||
|
|
||||||
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {
|
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {
|
||||||
|
|
Loading…
Reference in a new issue