Compare commits
No commits in common. "master" and "master" have entirely different histories.
6 changed files with 64 additions and 103 deletions
3
LICENSE
3
LICENSE
|
@ -1,7 +1,6 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2023-2024 TrueCloudLab
|
||||
Copyright (c) 2019-2023 NSPCC
|
||||
Copyright (c) 2019 NSPCC
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
|
|
@ -1,10 +1,15 @@
|
|||
# Golang HRW implementation
|
||||
|
||||
[![Build Status](https://travis-ci.org/nspcc-dev/hrw.svg?branch=master)](https://travis-ci.org/nspcc-dev/hrw)
|
||||
[![codecov](https://codecov.io/gh/nspcc-dev/hrw/badge.svg)](https://codecov.io/gh/nspcc-dev/hrw)
|
||||
[![Report](https://goreportcard.com/badge/github.com/nspcc-dev/hrw)](https://goreportcard.com/report/github.com/nspcc-dev/hrw)
|
||||
[![GitHub release](https://img.shields.io/github/release/nspcc-dev/hrw.svg)](https://github.com/nspcc-dev/hrw)
|
||||
|
||||
[Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method.
|
||||
|
||||
## Install
|
||||
|
||||
`go get git.frostfs.info/TrueCloudLab/hrw`
|
||||
`go get github.com/nspcc-dev/hrw`
|
||||
|
||||
## Benchmark:
|
||||
|
||||
|
|
2
go.mod
2
go.mod
|
@ -3,8 +3,8 @@ module git.frostfs.info/TrueCloudLab/hrw
|
|||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/spaolacci/murmur3 v1.1.0
|
||||
github.com/stretchr/testify v1.3.0
|
||||
github.com/twmb/murmur3 v1.1.8
|
||||
)
|
||||
|
||||
require (
|
||||
|
|
4
go.sum
4
go.sum
|
@ -2,8 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
|
|||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
|
||||
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
|
||||
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=
|
||||
|
|
145
hrw.go
145
hrw.go
|
@ -9,7 +9,7 @@ import (
|
|||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/twmb/murmur3"
|
||||
"github.com/spaolacci/murmur3"
|
||||
)
|
||||
|
||||
type (
|
||||
|
@ -21,12 +21,6 @@ type (
|
|||
less func(i, j int) bool
|
||||
swap func(i, j int)
|
||||
}
|
||||
|
||||
hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
|
||||
slice []T
|
||||
dist []N
|
||||
asc bool
|
||||
}
|
||||
)
|
||||
|
||||
// Boundaries of valid normalized weights
|
||||
|
@ -39,18 +33,6 @@ func (s *sorter) Len() int { return s.l }
|
|||
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
|
||||
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
|
||||
|
||||
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
|
||||
func (s *hasherSorter[T, N]) Less(i, j int) bool {
|
||||
if s.asc {
|
||||
return s.dist[i] < s.dist[j]
|
||||
}
|
||||
return s.dist[i] > s.dist[j]
|
||||
}
|
||||
func (s *hasherSorter[T, N]) Swap(i, j int) {
|
||||
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
|
||||
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
|
||||
}
|
||||
|
||||
func distance(x uint64, y uint64) uint64 {
|
||||
acc := x ^ y
|
||||
// here used mmh3 64 bit finalizer
|
||||
|
@ -68,11 +50,6 @@ func Hash(key []byte) uint64 {
|
|||
return murmur3.Sum64(key)
|
||||
}
|
||||
|
||||
// StringHash uses murmur3 hash to return uint64
|
||||
func StringHash(key string) uint64 {
|
||||
return murmur3.StringSum64(key)
|
||||
}
|
||||
|
||||
// Sort receive nodes and hash, and sort it by distance
|
||||
func Sort(nodes []uint64, hash uint64) []uint64 {
|
||||
l := len(nodes)
|
||||
|
@ -108,15 +85,13 @@ func SortSliceByValue(slice interface{}, hash uint64) {
|
|||
|
||||
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
|
||||
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
|
||||
if len(slice) == 0 {
|
||||
return
|
||||
rule := prepareHasherRule(slice)
|
||||
if rule != nil {
|
||||
swap := func(i, j int) {
|
||||
slice[i], slice[j] = slice[j], slice[i]
|
||||
}
|
||||
sortByDistance(len(rule), false, rule, hash, swap)
|
||||
}
|
||||
|
||||
dist := make([]uint64, len(slice))
|
||||
for i := range dist {
|
||||
dist[i] = distance(slice[i].Hash(), hash)
|
||||
}
|
||||
sortHasherByDistance(slice, false, dist)
|
||||
}
|
||||
|
||||
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
|
||||
|
@ -130,41 +105,13 @@ func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
|
|||
|
||||
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
|
||||
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
|
||||
if len(slice) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if allSameF64(weights) {
|
||||
dist := make([]uint64, len(slice))
|
||||
for i := range dist {
|
||||
dist[i] = distance(slice[i].Hash(), hash)
|
||||
rule := prepareHasherRule(slice)
|
||||
if rule != nil {
|
||||
swap := func(i, j int) {
|
||||
slice[i], slice[j] = slice[j], slice[i]
|
||||
}
|
||||
sortHasherByDistance(slice, false, dist)
|
||||
return
|
||||
sortByWeight(len(slice), false, rule, weights, hash, swap)
|
||||
}
|
||||
|
||||
dist := make([]float64, len(slice))
|
||||
for i := range dist {
|
||||
d := distance(slice[i].Hash(), hash)
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
dist[i] = float64(^uint64(0)-d) * weights[i]
|
||||
}
|
||||
|
||||
sort.Sort(&hasherSorter[T, float64]{
|
||||
slice: slice,
|
||||
dist: dist,
|
||||
asc: false,
|
||||
})
|
||||
}
|
||||
|
||||
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
|
||||
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
|
||||
sort.Sort(&hasherSorter[T, uint64]{
|
||||
slice: slice,
|
||||
dist: dist,
|
||||
asc: true,
|
||||
})
|
||||
}
|
||||
|
||||
// SortSliceByIndex received []T and hash to sort by index-distance
|
||||
|
@ -274,6 +221,18 @@ func prepareRule(slice interface{}) []uint64 {
|
|||
return rule
|
||||
}
|
||||
|
||||
func prepareHasherRule[T Hasher](hashers []T) []uint64 {
|
||||
length := len(hashers)
|
||||
if length == 0 {
|
||||
return nil
|
||||
}
|
||||
result := make([]uint64, length)
|
||||
for i := 0; i < length; i++ {
|
||||
result[i] = hashers[i].Hash()
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
|
||||
func ValidateWeights(weights []float64) error {
|
||||
for i := range weights {
|
||||
|
@ -284,6 +243,24 @@ func ValidateWeights(weights []float64) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func newSorter(l int, byIndex bool, nodes []uint64, h uint64,
|
||||
swap func(i, j int)) (*sorter, []int, []uint64) {
|
||||
ind := make([]int, l)
|
||||
dist := make([]uint64, l)
|
||||
for i := 0; i < l; i++ {
|
||||
ind[i] = i
|
||||
dist[i] = getDistance(byIndex, i, nodes, h)
|
||||
}
|
||||
|
||||
return &sorter{
|
||||
l: l,
|
||||
swap: func(i, j int) {
|
||||
swap(i, j)
|
||||
ind[i], ind[j] = ind[j], ind[i]
|
||||
},
|
||||
}, ind, dist
|
||||
}
|
||||
|
||||
// sortByWeight sorts nodes by weight using provided swapper.
|
||||
// nodes contains hrw hashes. If it is nil, indices are used.
|
||||
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
|
||||
|
@ -293,23 +270,14 @@ func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash u
|
|||
return
|
||||
}
|
||||
|
||||
dist := make([]float64, l)
|
||||
for i := 0; i < l; i++ {
|
||||
d := getDistance(byIndex, i, nodes, hash)
|
||||
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
|
||||
s.less = func(i, j int) bool {
|
||||
ii, jj := ind[i], ind[j]
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
dist[i] = float64(^uint64(0)-d) * weights[i]
|
||||
}
|
||||
|
||||
s := &sorter{
|
||||
l: l,
|
||||
swap: func(i, j int) {
|
||||
swap(i, j)
|
||||
dist[i], dist[j] = dist[j], dist[i]
|
||||
},
|
||||
less: func(i, j int) bool {
|
||||
return dist[i] > dist[j] // higher distance must be placed lower to be first
|
||||
},
|
||||
wi := float64(^uint64(0)-dist[ii]) * weights[ii]
|
||||
wj := float64(^uint64(0)-dist[jj]) * weights[jj]
|
||||
return wi > wj // higher distance must be placed lower to be first
|
||||
}
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
@ -317,20 +285,9 @@ func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash u
|
|||
// sortByDistance sorts nodes by hrw distance using provided swapper.
|
||||
// nodes contains hrw hashes. If it is nil, indices are used.
|
||||
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
|
||||
dist := make([]uint64, l)
|
||||
for i := 0; i < l; i++ {
|
||||
dist[i] = getDistance(byIndex, i, nodes, hash)
|
||||
}
|
||||
|
||||
s := &sorter{
|
||||
l: l,
|
||||
swap: func(i, j int) {
|
||||
swap(i, j)
|
||||
dist[i], dist[j] = dist[j], dist[i]
|
||||
},
|
||||
less: func(i, j int) bool {
|
||||
return dist[i] < dist[j]
|
||||
},
|
||||
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
|
||||
s.less = func(i, j int) bool {
|
||||
return dist[ind[i]] < dist[ind[j]]
|
||||
}
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
|
|
@ -61,7 +61,7 @@ func Example() {
|
|||
}
|
||||
|
||||
func (h hashString) Hash() uint64 {
|
||||
return StringHash(string(h))
|
||||
return Hash([]byte(h))
|
||||
}
|
||||
|
||||
func TestSortSliceByIndex(t *testing.T) {
|
||||
|
@ -737,7 +737,7 @@ func BenchmarkSortHashersByWeightValueReflection_fnv_100(b *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueRelection(b, 1000, Hash(testKey))
|
||||
benchmarkSortHashersByWeightValueRelection(b, 100, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
|
||||
|
@ -749,7 +749,7 @@ func BenchmarkSortHashersByWeightValueTyped_fnv_100(b *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueTyped(b, 1000, Hash(testKey))
|
||||
benchmarkSortHashersByWeightValueTyped(b, 100, Hash(testKey))
|
||||
}
|
||||
|
||||
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {
|
||||
|
|
Loading…
Reference in a new issue