Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

6 changed files with 64 additions and 103 deletions

View file

@ -1,7 +1,6 @@
MIT License
Copyright (c) 2023-2024 TrueCloudLab
Copyright (c) 2019-2023 NSPCC
Copyright (c) 2019 NSPCC
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View file

@ -1,10 +1,15 @@
# Golang HRW implementation
[![Build Status](https://travis-ci.org/nspcc-dev/hrw.svg?branch=master)](https://travis-ci.org/nspcc-dev/hrw)
[![codecov](https://codecov.io/gh/nspcc-dev/hrw/badge.svg)](https://codecov.io/gh/nspcc-dev/hrw)
[![Report](https://goreportcard.com/badge/github.com/nspcc-dev/hrw)](https://goreportcard.com/report/github.com/nspcc-dev/hrw)
[![GitHub release](https://img.shields.io/github/release/nspcc-dev/hrw.svg)](https://github.com/nspcc-dev/hrw)
[Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method.
## Install
`go get git.frostfs.info/TrueCloudLab/hrw`
`go get github.com/nspcc-dev/hrw`
## Benchmark:

2
go.mod
View file

@ -3,8 +3,8 @@ module git.frostfs.info/TrueCloudLab/hrw
go 1.18
require (
github.com/spaolacci/murmur3 v1.1.0
github.com/stretchr/testify v1.3.0
github.com/twmb/murmur3 v1.1.8
)
require (

4
go.sum
View file

@ -2,8 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=

145
hrw.go
View file

@ -9,7 +9,7 @@ import (
"reflect"
"sort"
"github.com/twmb/murmur3"
"github.com/spaolacci/murmur3"
)
type (
@ -21,12 +21,6 @@ type (
less func(i, j int) bool
swap func(i, j int)
}
hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
slice []T
dist []N
asc bool
}
)
// Boundaries of valid normalized weights
@ -39,18 +33,6 @@ func (s *sorter) Len() int { return s.l }
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
func (s *hasherSorter[T, N]) Less(i, j int) bool {
if s.asc {
return s.dist[i] < s.dist[j]
}
return s.dist[i] > s.dist[j]
}
func (s *hasherSorter[T, N]) Swap(i, j int) {
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
}
func distance(x uint64, y uint64) uint64 {
acc := x ^ y
// here used mmh3 64 bit finalizer
@ -68,11 +50,6 @@ func Hash(key []byte) uint64 {
return murmur3.Sum64(key)
}
// StringHash uses murmur3 hash to return uint64
func StringHash(key string) uint64 {
return murmur3.StringSum64(key)
}
// Sort receive nodes and hash, and sort it by distance
func Sort(nodes []uint64, hash uint64) []uint64 {
l := len(nodes)
@ -108,15 +85,13 @@ func SortSliceByValue(slice interface{}, hash uint64) {
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
if len(slice) == 0 {
return
rule := prepareHasherRule(slice)
if rule != nil {
swap := func(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
}
sortByDistance(len(rule), false, rule, hash, swap)
}
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
}
sortHasherByDistance(slice, false, dist)
}
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
@ -130,41 +105,13 @@ func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
if len(slice) == 0 {
return
}
if allSameF64(weights) {
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
rule := prepareHasherRule(slice)
if rule != nil {
swap := func(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
}
sortHasherByDistance(slice, false, dist)
return
sortByWeight(len(slice), false, rule, weights, hash, swap)
}
dist := make([]float64, len(slice))
for i := range dist {
d := distance(slice[i].Hash(), hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i]
}
sort.Sort(&hasherSorter[T, float64]{
slice: slice,
dist: dist,
asc: false,
})
}
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
sort.Sort(&hasherSorter[T, uint64]{
slice: slice,
dist: dist,
asc: true,
})
}
// SortSliceByIndex received []T and hash to sort by index-distance
@ -274,6 +221,18 @@ func prepareRule(slice interface{}) []uint64 {
return rule
}
func prepareHasherRule[T Hasher](hashers []T) []uint64 {
length := len(hashers)
if length == 0 {
return nil
}
result := make([]uint64, length)
for i := 0; i < length; i++ {
result[i] = hashers[i].Hash()
}
return result
}
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
func ValidateWeights(weights []float64) error {
for i := range weights {
@ -284,6 +243,24 @@ func ValidateWeights(weights []float64) error {
return nil
}
func newSorter(l int, byIndex bool, nodes []uint64, h uint64,
swap func(i, j int)) (*sorter, []int, []uint64) {
ind := make([]int, l)
dist := make([]uint64, l)
for i := 0; i < l; i++ {
ind[i] = i
dist[i] = getDistance(byIndex, i, nodes, h)
}
return &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
ind[i], ind[j] = ind[j], ind[i]
},
}, ind, dist
}
// sortByWeight sorts nodes by weight using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
@ -293,23 +270,14 @@ func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash u
return
}
dist := make([]float64, l)
for i := 0; i < l; i++ {
d := getDistance(byIndex, i, nodes, hash)
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
s.less = func(i, j int) bool {
ii, jj := ind[i], ind[j]
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i]
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] > dist[j] // higher distance must be placed lower to be first
},
wi := float64(^uint64(0)-dist[ii]) * weights[ii]
wj := float64(^uint64(0)-dist[jj]) * weights[jj]
return wi > wj // higher distance must be placed lower to be first
}
sort.Sort(s)
}
@ -317,20 +285,9 @@ func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash u
// sortByDistance sorts nodes by hrw distance using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
dist := make([]uint64, l)
for i := 0; i < l; i++ {
dist[i] = getDistance(byIndex, i, nodes, hash)
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] < dist[j]
},
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
s.less = func(i, j int) bool {
return dist[ind[i]] < dist[ind[j]]
}
sort.Sort(s)
}

View file

@ -61,7 +61,7 @@ func Example() {
}
func (h hashString) Hash() uint64 {
return StringHash(string(h))
return Hash([]byte(h))
}
func TestSortSliceByIndex(t *testing.T) {
@ -737,7 +737,7 @@ func BenchmarkSortHashersByWeightValueReflection_fnv_100(b *testing.B) {
}
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 1000, Hash(testKey))
benchmarkSortHashersByWeightValueRelection(b, 100, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
@ -749,7 +749,7 @@ func BenchmarkSortHashersByWeightValueTyped_fnv_100(b *testing.B) {
}
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 1000, Hash(testKey))
benchmarkSortHashersByWeightValueTyped(b, 100, Hash(testKey))
}
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {