Compare commits

...

No commits in common. "empty" and "master" have entirely different histories.

9 changed files with 1441 additions and 2 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
.idea
.vscode
*.out

10
.gitlint Normal file
View file

@ -0,0 +1,10 @@
[general]
fail-without-commits=true
contrib=CC1
[title-match-regex]
regex=^\[\#[0-9]+\]\s
[ignore-by-title]
regex=^Release(.*)
ignore=title-match-regex

30
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,30 @@
ci:
autofix_prs: false
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-shebang-scripts-are-executable
- id: check-merge-conflict
- id: check-json
- id: check-xml
- id: check-yaml
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: end-of-file-fixer
exclude: ".key$"
- repo: https://github.com/golangci/golangci-lint
rev: v1.51.2
hooks:
- id: golangci-lint
- repo: https://github.com/jorisroovers/gitlint
rev: v0.18.0
hooks:
- id: gitlint
stages: [commit-msg]

22
LICENSE Normal file
View file

@ -0,0 +1,22 @@
MIT License
Copyright (c) 2023-2024 TrueCloudLab
Copyright (c) 2019-2023 NSPCC
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -1,3 +1,87 @@
# WIP area: this repo is just a fork!
# Golang HRW implementation
Useful things may be published only in [other branches](../../../branches)
[Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method.
## Install
`go get git.frostfs.info/TrueCloudLab/hrw`
## Benchmark:
```
BenchmarkSort_fnv_10-8 4812801 240.9 ns/op 216 B/op 4 allocs/op
BenchmarkSort_fnv_100-8 434767 2600 ns/op 1848 B/op 4 allocs/op
BenchmarkSort_fnv_1000-8 20428 66116 ns/op 16440 B/op 4 allocs/op
BenchmarkSortByIndex_fnv_10-8 2505410 486.5 ns/op 352 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_100-8 254556 4697 ns/op 1984 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_1000-8 13581 88334 ns/op 16576 B/op 7 allocs/op
BenchmarkSortByValue_fnv_10-8 1761030 682.1 ns/op 592 B/op 18 allocs/op
BenchmarkSortByValue_fnv_100-8 258838 4675 ns/op 4480 B/op 108 allocs/op
BenchmarkSortByValue_fnv_1000-8 27027 44649 ns/op 40768 B/op 1008 allocs/op
BenchmarkSortHashersByValue_Reflection_fnv_10-8 1013560 1249 ns/op 768 B/op 29 allocs/op
BenchmarkSortHashersByValue_Reflection_fnv_100-8 106029 11414 ns/op 6096 B/op 209 allocs/op
BenchmarkSortHashersByValue_Reflection_fnv_1000-8 10000 108977 ns/op 56784 B/op 2009 allocs/op
BenchmarkSortHashersByValue_Typed_fnv_10-8 1577814 700.3 ns/op 584 B/op 17 allocs/op
BenchmarkSortHashersByValue_Typed_fnv_100-8 215938 5024 ns/op 4472 B/op 107 allocs/op
BenchmarkSortHashersByValue_Typed_fnv_1000-8 24447 46889 ns/op 40760 B/op 1007 allocs/op
BenchmarkSortByWeight_fnv_10-8 2924833 370.6 ns/op 448 B/op 8 allocs/op
BenchmarkSortByWeight_fnv_100-8 816069 1516 ns/op 2896 B/op 8 allocs/op
BenchmarkSortByWeight_fnv_1000-8 80391 17478 ns/op 24784 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_10-8 1945612 550.3 ns/op 368 B/op 7 allocs/op
BenchmarkSortByWeightIndex_fnv_100-8 140473 8084 ns/op 2000 B/op 7 allocs/op
BenchmarkSortByWeightIndex_fnv_1000-8 5518 200949 ns/op 16592 B/op 7 allocs/op
BenchmarkSortByWeightValue_fnv_10-8 1305580 909.8 ns/op 608 B/op 18 allocs/op
BenchmarkSortByWeightValue_fnv_100-8 165410 6796 ns/op 4496 B/op 108 allocs/op
BenchmarkSortByWeightValue_fnv_1000-8 17922 78555 ns/op 40784 B/op 1008 allocs/op
BenchmarkSortHashersByWeightValueReflection_fnv_10-8 454976 2229 ns/op 784 B/op 29 allocs/op
BenchmarkSortHashersByWeightValueReflection_fnv_100-8 76264 15332 ns/op 6112 B/op 209 allocs/op
BenchmarkSortHashersByWeightValueReflection_fnv_1000-8 80288 13192 ns/op 6112 B/op 209 allocs/op
BenchmarkSortHashersByWeightValueTyped_fnv_10-8 1433113 901.4 ns/op 600 B/op 17 allocs/op
BenchmarkSortHashersByWeightValueTyped_fnv_100-8 188626 5896 ns/op 4488 B/op 107 allocs/op
BenchmarkSortHashersByWeightValueTyped_fnv_1000-8 178131 6518 ns/op 4488 B/op 107 allocs/op
```
## Example
```go
package main
import (
"fmt"
"git.frostfs.info/TrueCloudLab/hrw"
)
func main() {
// given a set of servers
servers := []string{
"one.example.com",
"two.example.com",
"three.example.com",
"four.example.com",
"five.example.com",
"six.example.com",
}
// HRW can consistently select a uniformly-distributed set of servers for
// any given key
var (
key = []byte("/examples/object-key")
h = hrw.Hash(key)
)
hrw.SortSliceByValue(servers, h)
for id := range servers {
fmt.Printf("trying GET %s%s\n", servers[id], key)
}
// Output:
// trying GET three.example.com/examples/object-key
// trying GET two.example.com/examples/object-key
// trying GET five.example.com/examples/object-key
// trying GET six.example.com/examples/object-key
// trying GET one.example.com/examples/object-key
// trying GET four.example.com/examples/object-key
}
```

13
go.mod Normal file
View file

@ -0,0 +1,13 @@
module git.frostfs.info/TrueCloudLab/hrw
go 1.18
require (
github.com/stretchr/testify v1.3.0
github.com/twmb/murmur3 v1.1.8
)
require (
github.com/davecgh/go-spew v1.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
)

9
go.sum Normal file
View file

@ -0,0 +1,9 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=

361
hrw.go Normal file
View file

@ -0,0 +1,361 @@
// Package hrw implements Rendezvous hashing.
// http://en.wikipedia.org/wiki/Rendezvous_hashing.
package hrw
import (
"encoding/binary"
"errors"
"math"
"reflect"
"sort"
"github.com/twmb/murmur3"
)
type (
// Hasher interface used by SortSliceByValue
Hasher interface{ Hash() uint64 }
sorter struct {
l int
less func(i, j int) bool
swap func(i, j int)
}
hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
slice []T
dist []N
asc bool
}
)
// Boundaries of valid normalized weights
const (
NormalizedMaxWeight = 1.0
NormalizedMinWeight = 0.0
)
func (s *sorter) Len() int { return s.l }
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
func (s *hasherSorter[T, N]) Less(i, j int) bool {
if s.asc {
return s.dist[i] < s.dist[j]
}
return s.dist[i] > s.dist[j]
}
func (s *hasherSorter[T, N]) Swap(i, j int) {
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
}
func distance(x uint64, y uint64) uint64 {
acc := x ^ y
// here used mmh3 64 bit finalizer
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
acc ^= acc >> 33
acc = acc * 0xff51afd7ed558ccd
acc ^= acc >> 33
acc = acc * 0xc4ceb9fe1a85ec53
acc ^= acc >> 33
return acc
}
// Hash uses murmur3 hash to return uint64
func Hash(key []byte) uint64 {
return murmur3.Sum64(key)
}
// StringHash uses murmur3 hash to return uint64
func StringHash(key string) uint64 {
return murmur3.StringSum64(key)
}
// Sort receive nodes and hash, and sort it by distance
func Sort(nodes []uint64, hash uint64) []uint64 {
l := len(nodes)
sorted := make([]uint64, l)
dist := make([]uint64, l)
for i := range nodes {
sorted[i] = uint64(i)
dist[i] = distance(nodes[i], hash)
}
sort.Slice(sorted, func(i, j int) bool {
return dist[sorted[i]] < dist[sorted[j]]
})
return sorted
}
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
result := make([]uint64, len(nodes))
copy(nodes, result)
sortByWeight(len(nodes), false, nodes, weights, hash, reflect.Swapper(result))
return result
}
// SortSliceByValue received []T and hash to sort by value-distance
func SortSliceByValue(slice interface{}, hash uint64) {
rule := prepareRule(slice)
if rule != nil {
swap := reflect.Swapper(slice)
sortByDistance(len(rule), false, rule, hash, swap)
}
}
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
if len(slice) == 0 {
return
}
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
}
sortHasherByDistance(slice, false, dist)
}
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
rule := prepareRule(slice)
if rule != nil {
swap := reflect.Swapper(slice)
sortByWeight(reflect.ValueOf(slice).Len(), false, rule, weights, hash, swap)
}
}
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
if len(slice) == 0 {
return
}
if allSameF64(weights) {
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
}
sortHasherByDistance(slice, false, dist)
return
}
dist := make([]float64, len(slice))
for i := range dist {
d := distance(slice[i].Hash(), hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i]
}
sort.Sort(&hasherSorter[T, float64]{
slice: slice,
dist: dist,
asc: false,
})
}
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
sort.Sort(&hasherSorter[T, uint64]{
slice: slice,
dist: dist,
asc: true,
})
}
// SortSliceByIndex received []T and hash to sort by index-distance
func SortSliceByIndex(slice interface{}, hash uint64) {
length := reflect.ValueOf(slice).Len()
swap := reflect.Swapper(slice)
sortByDistance(length, true, nil, hash, swap)
}
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
length := reflect.ValueOf(slice).Len()
swap := reflect.Swapper(slice)
sortByWeight(length, true, nil, weights, hash, swap)
}
func prepareRule(slice interface{}) []uint64 {
t := reflect.TypeOf(slice)
if t.Kind() != reflect.Slice {
panic("HRW sort expects slice, got " + t.Kind().String())
}
var (
val = reflect.ValueOf(slice)
length = val.Len()
rule = make([]uint64, 0, length)
)
if length == 0 {
return nil
}
switch slice := slice.(type) {
case []int:
var key = make([]byte, 16)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, uint64(slice[i]))
rule = append(rule, Hash(key))
}
case []uint:
var key = make([]byte, 16)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, uint64(slice[i]))
rule = append(rule, Hash(key))
}
case []int8:
for i := 0; i < length; i++ {
key := byte(slice[i])
rule = append(rule, Hash([]byte{key}))
}
case []uint8:
for i := 0; i < length; i++ {
key := slice[i]
rule = append(rule, Hash([]byte{key}))
}
case []int16:
var key = make([]byte, 8)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint16(key, uint16(slice[i]))
rule = append(rule, Hash(key))
}
case []uint16:
var key = make([]byte, 8)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint16(key, slice[i])
rule = append(rule, Hash(key))
}
case []int32:
var key = make([]byte, 16)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint32(key, uint32(slice[i]))
rule = append(rule, Hash(key))
}
case []uint32:
var key = make([]byte, 16)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint32(key, slice[i])
rule = append(rule, Hash(key))
}
case []int64:
var key = make([]byte, 32)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, uint64(slice[i]))
rule = append(rule, Hash(key))
}
case []uint64:
var key = make([]byte, 32)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, slice[i])
rule = append(rule, Hash(key))
}
case []string:
for i := 0; i < length; i++ {
rule = append(rule, Hash([]byte(slice[i])))
}
default:
if _, ok := val.Index(0).Interface().(Hasher); !ok {
panic("slice elements must implement hrw.Hasher")
}
for i := 0; i < length; i++ {
h := val.Index(i).Interface().(Hasher)
rule = append(rule, h.Hash())
}
}
return rule
}
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
func ValidateWeights(weights []float64) error {
for i := range weights {
if math.IsNaN(weights[i]) || weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
return errors.New("weights are not normalized")
}
}
return nil
}
// sortByWeight sorts nodes by weight using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
// if all nodes have the same distance then sort uniformly
if allSameF64(weights) {
sortByDistance(l, byIndex, nodes, hash, swap)
return
}
dist := make([]float64, l)
for i := 0; i < l; i++ {
d := getDistance(byIndex, i, nodes, hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i]
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] > dist[j] // higher distance must be placed lower to be first
},
}
sort.Sort(s)
}
// sortByDistance sorts nodes by hrw distance using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
dist := make([]uint64, l)
for i := 0; i < l; i++ {
dist[i] = getDistance(byIndex, i, nodes, hash)
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] < dist[j]
},
}
sort.Sort(s)
}
// getDistance return distance from nodes[i] to h.
// If byIndex is true, nodes index is used.
// Else if nodes[i] != nil, distance is calculated from this value.
// Otherwise, and hash from node index is taken.
func getDistance(byIndex bool, i int, nodes []uint64, h uint64) uint64 {
if nodes != nil {
return distance(nodes[i], h)
} else if byIndex {
return distance(uint64(i), h)
} else {
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, uint64(i))
return distance(Hash(buf), h)
}
}
func allSameF64(fs []float64) bool {
for i := range fs {
if fs[i] != fs[0] {
return false
}
}
return true
}

907
hrw_test.go Normal file
View file

@ -0,0 +1,907 @@
package hrw
import (
"encoding/binary"
"fmt"
"math"
"math/rand"
"strconv"
"testing"
"github.com/stretchr/testify/require"
)
type (
hashString string
unknown byte
slices struct {
actual interface{}
expect interface{}
}
Uint32Slice []uint32
)
var testKey = []byte("0xff51afd7ed558ccd")
func (p Uint32Slice) Len() int { return len(p) }
func (p Uint32Slice) Less(i, j int) bool { return p[i] < p[j] }
func (p Uint32Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func Example() {
// given a set of servers
servers := []string{
"one.example.com",
"two.example.com",
"three.example.com",
"four.example.com",
"five.example.com",
"six.example.com",
}
// HRW can consistently select a uniformly-distributed set of servers for
// any given key
var (
key = []byte("/examples/object-key")
h = Hash(key)
)
SortSliceByValue(servers, h)
for id := range servers {
fmt.Printf("trying GET %s%s\n", servers[id], key)
}
// Output:
// trying GET three.example.com/examples/object-key
// trying GET two.example.com/examples/object-key
// trying GET five.example.com/examples/object-key
// trying GET six.example.com/examples/object-key
// trying GET one.example.com/examples/object-key
// trying GET four.example.com/examples/object-key
}
func (h hashString) Hash() uint64 {
return StringHash(string(h))
}
func TestSortSliceByIndex(t *testing.T) {
actual := []string{"a", "b", "c", "d", "e", "f"}
expect := []string{"e", "a", "c", "f", "d", "b"}
hash := Hash(testKey)
SortSliceByIndex(actual, hash)
require.Equal(t, expect, actual)
}
func TestValidateWeights(t *testing.T) {
weights := []float64{10, 10, 10, 2, 2, 2}
err := ValidateWeights(weights)
require.Error(t, err)
weights = []float64{math.NaN(), 1, 1, 0.2, 0.2, 0.2}
err = ValidateWeights(weights)
require.Error(t, err)
weights = []float64{1, 1, 1, 0.2, 0.2, 0.2}
err = ValidateWeights(weights)
require.NoError(t, err)
}
func TestSortSliceByWeightIndex(t *testing.T) {
actual := []string{"a", "b", "c", "d", "e", "f"}
weights := []float64{1, 1, 1, 0.2, 0.2, 0.2}
expect := []string{"a", "c", "b", "e", "f", "d"}
hash := Hash(testKey)
SortSliceByWeightIndex(actual, weights, hash)
require.Equal(t, expect, actual)
}
func TestSortSliceByValue(t *testing.T) {
actual := []string{"a", "b", "c", "d", "e", "f"}
expect := []string{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortSliceByValue(actual, hash)
require.Equal(t, expect, actual)
}
func TestSortSliceByValueFail(t *testing.T) {
t.Run("empty slice", func(t *testing.T) {
var (
actual []int
hash = Hash(testKey)
)
require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
})
t.Run("must be slice", func(t *testing.T) {
actual := 10
hash := Hash(testKey)
require.Panics(t, func() { SortSliceByValue(actual, hash) })
})
t.Run("must 'fail' for unknown type", func(t *testing.T) {
actual := []unknown{1, 2, 3, 4, 5}
hash := Hash(testKey)
require.Panics(t, func() { SortSliceByValue(actual, hash) })
})
}
func TestSortSliceByValueHasher(t *testing.T) {
actual := []hashString{"a", "b", "c", "d", "e", "f"}
expect := []hashString{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortSliceByValue(actual, hash)
require.Equal(t, expect, actual)
}
func TestSortHasherSliceByValue(t *testing.T) {
actual := []hashString{"a", "b", "c", "d", "e", "f"}
expect := []hashString{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortHasherSliceByValue(actual, hash)
require.EqualValues(t, expect, actual)
}
func TestSortHasherSliceByWeightValue(t *testing.T) {
actual := []hashString{"a", "b", "c", "d", "e", "f"}
weights := []float64{1.0, 1.0, 1.0, 1.0, 1.0, 1.0}
expect := []hashString{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortHasherSliceByWeightValue(actual, weights, hash)
require.EqualValues(t, expect, actual)
}
func TestSortSliceByValueIntSlice(t *testing.T) {
cases := []slices{
{
actual: []int{0, 1, 2, 3, 4, 5},
expect: []int{2, 0, 5, 3, 1, 4},
},
{
actual: []uint{0, 1, 2, 3, 4, 5},
expect: []uint{2, 0, 5, 3, 1, 4},
},
{
actual: []int8{0, 1, 2, 3, 4, 5},
expect: []int8{5, 2, 1, 4, 0, 3},
},
{
actual: []uint8{0, 1, 2, 3, 4, 5},
expect: []uint8{5, 2, 1, 4, 0, 3},
},
{
actual: []int16{0, 1, 2, 3, 4, 5},
expect: []int16{1, 0, 3, 2, 4, 5},
},
{
actual: []uint16{0, 1, 2, 3, 4, 5},
expect: []uint16{1, 0, 3, 2, 4, 5},
},
{
actual: []int32{0, 1, 2, 3, 4, 5},
expect: []int32{5, 1, 2, 0, 3, 4},
},
{
actual: []uint32{0, 1, 2, 3, 4, 5},
expect: []uint32{5, 1, 2, 0, 3, 4},
},
{
actual: []int64{0, 1, 2, 3, 4, 5},
expect: []int64{5, 3, 0, 1, 4, 2},
},
{
actual: []uint64{0, 1, 2, 3, 4, 5},
expect: []uint64{5, 3, 0, 1, 4, 2},
},
}
hash := Hash(testKey)
for _, tc := range cases {
SortSliceByValue(tc.actual, hash)
require.Equal(t, tc.expect, tc.actual)
}
}
func TestSort(t *testing.T) {
nodes := []uint64{1, 2, 3, 4, 5}
hash := Hash(testKey)
actual := Sort(nodes, hash)
expected := []uint64{3, 1, 4, 2, 0}
require.Equal(t, expected, actual)
}
func TestDistribution(t *testing.T) {
const (
size = 10
keys = 100000
percent = 0.03
)
// We use χ2 method to determine similarity of distribution with uniform distribution.
// χ2 = Σ((n-N)**2/N)
// https://www.medcalc.org/manual/chi-square-table.php p=0.1
var chiTable = map[int]float64{9: 14.68, 99: 117.407}
t.Run("sort", func(t *testing.T) {
var (
i uint64
nodes [size]uint64
counts = make(map[uint64]uint64, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
nodes[i] = i
}
for i = 0; i < keys; i++ {
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
counts[Sort(nodes[:], hash)[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByIndex", func(t *testing.T) {
var (
i uint64
a, b [size]uint64
counts = make(map[uint64]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = i
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByIndex(b[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByValue", func(t *testing.T) {
var (
i uint64
a, b [size]int
counts = make(map[int]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByValue(b[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByStringValue", func(t *testing.T) {
var (
i uint64
a, b [size]string
counts = make(map[string]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = strconv.FormatUint(i, 10)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByValue(b[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByInt32Value", func(t *testing.T) {
var (
i uint64
a, b [size]int32
counts = make(map[int32]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int32(i)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByValue(b[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByWeightValue", func(t *testing.T) {
var (
i uint64
a, b, result [size]int
w [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
w[i] = float64(size-i) / float64(size)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
result[b[0]]++
}
for i := 0; i < size-1; i++ {
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
"result array %v must be corresponded to weights %v", result, w)
}
})
t.Run("sortByWeightValueShuffledWeight", func(t *testing.T) {
var (
i uint64
a, b, result [size]int
w [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
w[i] = float64(size-i) / float64(size)
}
rand.Shuffle(size, func(i, j int) {
w[i], w[j] = w[j], w[i]
})
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
result[b[0]]++
}
for i := 0; i < size-1; i++ {
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
"result array %v must be corresponded to weights %v", result, w)
}
})
t.Run("sortByWeightValueEmptyWeight", func(t *testing.T) {
var (
i uint64
a, b [size]int
w [size]float64
counts = make(map[int]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByWeightValueUniformWeight", func(t *testing.T) {
var (
i uint64
a, b [size]int
w [size]float64
counts = make(map[int]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
w[i] = 0.5
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) {
const keys = 1
var (
i uint64
a, b [size]int
w [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
}
w[size-1] = 1
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
require.True(t, b[0] == a[size-1],
"expected last value of %v to be the first with highest distance", a)
}
})
t.Run("sortByWeightValueNormalizedWeight", func(t *testing.T) {
var (
i uint64
a, b, result [size]uint64
w, normalizedW [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = i
w[int(i)] = 10
}
w[0] = 100
// Here let's use logarithm normalization
for i = 0; i < size; i++ {
normalizedW[i] = math.Log2(w[i]) / math.Log2(w[0])
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], normalizedW[:], hash)
for j := range b {
result[b[j]] += uint64(len(b) - j)
}
}
cutResult := result[1:]
var total uint64
for i := range cutResult {
total += cutResult[i]
}
var chi2 float64
mean := float64(total) / float64(len(cutResult))
delta := mean * percent
for node, count := range cutResult {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("hash collision", func(t *testing.T) {
var (
i uint64
counts = make(map[uint64]uint64)
key = make([]byte, 16)
)
for i = 0; i < keys; i++ {
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
counts[hash]++
}
for node, count := range counts {
if count > 1 {
t.Errorf("Node %d received %d keys", node, count)
}
}
})
}
func BenchmarkSort_fnv_10(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSort(b, 10, hash)
}
func BenchmarkSort_fnv_100(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSort(b, 100, hash)
}
func BenchmarkSort_fnv_1000(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSort(b, 1000, hash)
}
func BenchmarkSortByIndex_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByIndex(b, 10, hash)
}
func BenchmarkSortByIndex_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByIndex(b, 100, hash)
}
func BenchmarkSortByIndex_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByIndex(b, 1000, hash)
}
func BenchmarkSortByValue_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByValue(b, 10, hash)
}
func BenchmarkSortByValue_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByValue(b, 100, hash)
}
func BenchmarkSortByValue_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByValue(b, 1000, hash)
}
func BenchmarkSortHashersByValue_Reflection_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueReflection(b, 10, hash)
}
func BenchmarkSortHashersByValue_Reflection_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueReflection(b, 100, hash)
}
func BenchmarkSortHashersByValue_Reflection_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueReflection(b, 1000, hash)
}
func BenchmarkSortHashersByValue_Typed_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueTyped(b, 10, hash)
}
func BenchmarkSortHashersByValue_Typed_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueTyped(b, 100, hash)
}
func BenchmarkSortHashersByValue_Typed_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueTyped(b, 1000, hash)
}
func BenchmarkSortByWeight_fnv_10(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSortByWeight(b, 10, hash)
}
func BenchmarkSortByWeight_fnv_100(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSortByWeight(b, 100, hash)
}
func BenchmarkSortByWeight_fnv_1000(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSortByWeight(b, 1000, hash)
}
func BenchmarkSortByWeightIndex_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightIndex(b, 10, hash)
}
func BenchmarkSortByWeightIndex_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightIndex(b, 100, hash)
}
func BenchmarkSortByWeightIndex_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightIndex(b, 1000, hash)
}
func BenchmarkSortByWeightValue_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightValue(b, 10, hash)
}
func BenchmarkSortByWeightValue_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightValue(b, 100, hash)
}
func BenchmarkSortByWeightValue_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightValue(b, 1000, hash)
}
func BenchmarkSortHashersByWeightValueReflection_fnv_10(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 10, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueReflection_fnv_100(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 100, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 1000, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 10, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_100(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 100, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 1000, Hash(testKey))
}
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {
servers := make([]uint64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = i
}
b.ResetTimer()
b.ReportAllocs()
var x uint64
for i := 0; i < b.N; i++ {
x += Sort(servers, hash)[0]
}
return x
}
func benchmarkSortByIndex(b *testing.B, n int, hash uint64) {
servers := make([]uint64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = i
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByIndex(servers, hash)
}
}
func benchmarkSortByValue(b *testing.B, n int, hash uint64) {
servers := make([]string, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10)
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByValue(servers, hash)
}
}
func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
servers := make([]uint64, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = i
}
b.ResetTimer()
b.ReportAllocs()
var x uint64
for i := 0; i < b.N; i++ {
x += SortByWeight(servers, weights, hash)[0]
}
return x
}
func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
servers := make([]uint64, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = i
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByWeightIndex(servers, weights, hash)
}
}
func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
servers := make([]string, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10)
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByWeightValue(servers, weights, hash)
}
}
func benchmarkSortHashersByWeightValueRelection(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByWeightValue(servers, weights, hash)
}
}
func benchmarkSortHashersByWeightValueTyped(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortHasherSliceByWeightValue(servers, weights, hash)
}
}
func benchmarkSortHashersByValueReflection(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByValue(servers, hash)
}
}
func benchmarkSortHashersByValueTyped(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortHasherSliceByValue(servers, hash)
}
}