forked from TrueCloudLab/hrw
Compare commits
24 commits
feature/ge
...
master
Author | SHA1 | Date | |
---|---|---|---|
78c3f718b1 | |||
16a7740ccd | |||
2ac89c82b6 | |||
266da7c69a | |||
c52f74d8e1 | |||
895ecf150f | |||
213c105ac1 | |||
c175ef4099 | |||
2c085708de | |||
15b3800347 | |||
2e205cf1ca | |||
ebca2848ad | |||
0ad932400c | |||
08e14caaf3 | |||
7e33833933 | |||
79b208bebf | |||
5671632658 | |||
9e9fc653e5 | |||
997b540432 | |||
22b833d972 | |||
fad35bbd3b | |||
dddcfc8fc5 | |||
|
f52ea8fb21 | ||
|
aa230933d1 |
9 changed files with 568 additions and 355 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,3 +1,3 @@
|
|||
.idea
|
||||
.vscode
|
||||
*.out
|
||||
*.out
|
||||
|
|
10
.gitlint
Normal file
10
.gitlint
Normal file
|
@ -0,0 +1,10 @@
|
|||
[general]
|
||||
fail-without-commits=true
|
||||
contrib=CC1
|
||||
|
||||
[title-match-regex]
|
||||
regex=^\[\#[0-9]+\]\s
|
||||
|
||||
[ignore-by-title]
|
||||
regex=^Release(.*)
|
||||
ignore=title-match-regex
|
30
.pre-commit-config.yaml
Normal file
30
.pre-commit-config.yaml
Normal file
|
@ -0,0 +1,30 @@
|
|||
ci:
|
||||
autofix_prs: false
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.4.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
- id: check-case-conflict
|
||||
- id: check-executables-have-shebangs
|
||||
- id: check-shebang-scripts-are-executable
|
||||
- id: check-merge-conflict
|
||||
- id: check-json
|
||||
- id: check-xml
|
||||
- id: check-yaml
|
||||
- id: trailing-whitespace
|
||||
args: [--markdown-linebreak-ext=md]
|
||||
- id: end-of-file-fixer
|
||||
exclude: ".key$"
|
||||
|
||||
- repo: https://github.com/golangci/golangci-lint
|
||||
rev: v1.51.2
|
||||
hooks:
|
||||
- id: golangci-lint
|
||||
|
||||
- repo: https://github.com/jorisroovers/gitlint
|
||||
rev: v0.18.0
|
||||
hooks:
|
||||
- id: gitlint
|
||||
stages: [commit-msg]
|
17
.travis.yml
17
.travis.yml
|
@ -1,17 +0,0 @@
|
|||
language: go
|
||||
go:
|
||||
- 1.11.x
|
||||
- 1.12.x
|
||||
env:
|
||||
- GO111MODULE=on
|
||||
install:
|
||||
- go get -v golang.org/x/lint/golint
|
||||
- go mod tidy -v
|
||||
script:
|
||||
- golint -set_exit_status ./...
|
||||
- go test -race -coverprofile=coverage.txt -covermode=atomic ./...
|
||||
after_success:
|
||||
- bash <(curl -s https://codecov.io/bash)
|
||||
matrix:
|
||||
allow_failures:
|
||||
- go: tip
|
71
README.md
71
README.md
|
@ -1,39 +1,50 @@
|
|||
# Golang HRW implementation
|
||||
|
||||
[![Build Status](https://travis-ci.org/nspcc-dev/hrw.svg?branch=master)](https://travis-ci.org/nspcc-dev/hrw)
|
||||
[![codecov](https://codecov.io/gh/nspcc-dev/hrw/badge.svg)](https://codecov.io/gh/nspcc-dev/hrw)
|
||||
[![Report](https://goreportcard.com/badge/github.com/nspcc-dev/hrw)](https://goreportcard.com/report/github.com/nspcc-dev/hrw)
|
||||
[![GitHub release](https://img.shields.io/github/release/nspcc-dev/hrw.svg)](https://github.com/nspcc-dev/hrw)
|
||||
[![Build Status](https://travis-ci.org/TrueCloudLab/hrw.svg?branch=master)](https://travis-ci.org/TrueCloudLab/hrw)
|
||||
[![codecov](https://codecov.io/gh/TrueCloudLab/hrw/badge.svg)](https://codecov.io/gh/TrueCloudLab/hrw)
|
||||
[![Report](https://goreportcard.com/badge/github.com/TrueCloudLab/hrw)](https://goreportcard.com/report/github.com/TrueCloudLab/hrw)
|
||||
[![GitHub release](https://img.shields.io/github/v/release/TrueCloudLab/hrw.svg)](https://git.frostfs.info/TrueCloudLab/hrw)
|
||||
|
||||
[Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method.
|
||||
|
||||
## Install
|
||||
|
||||
`go get github.com/nspcc-dev/hrw`
|
||||
`go get git.frostfs.info/TrueCloudLab/hrw`
|
||||
|
||||
## Benchmark:
|
||||
|
||||
```
|
||||
BenchmarkSort_fnv_10-8 5000000 354 ns/op 224 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_100-8 300000 5103 ns/op 1856 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_1000-8 10000 115874 ns/op 16448 B/op 3 allocs/op
|
||||
BenchmarkSortByIndex_fnv_10-8 3000000 562 ns/op 384 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_100-8 200000 5819 ns/op 2928 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_1000-8 10000 125859 ns/op 25728 B/op 7 allocs/op
|
||||
BenchmarkSortByValue_fnv_10-8 2000000 1056 ns/op 544 B/op 17 allocs/op
|
||||
BenchmarkSortByValue_fnv_100-8 200000 9593 ns/op 4528 B/op 107 allocs/op
|
||||
BenchmarkSortByValue_fnv_1000-8 10000 109272 ns/op 41728 B/op 1007 allocs/op
|
||||
|
||||
BenchmarkSortByWeight_fnv_10-8 3000000 500 ns/op 320 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_100-8 200000 8257 ns/op 2768 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_1000-8 10000 197938 ns/op 24656 B/op 4 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_10-8 2000000 760 ns/op 480 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_100-8 200000 9191 ns/op 3840 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_1000-8 10000 208204 ns/op 33936 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_10-8 1000000 1095 ns/op 640 B/op 18 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_100-8 200000 12291 ns/op 5440 B/op 108 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_1000-8 10000 145125 ns/op 49936 B/op 1008 allocs/op
|
||||
BenchmarkSort_fnv_10-8 4812801 240.9 ns/op 216 B/op 4 allocs/op
|
||||
BenchmarkSort_fnv_100-8 434767 2600 ns/op 1848 B/op 4 allocs/op
|
||||
BenchmarkSort_fnv_1000-8 20428 66116 ns/op 16440 B/op 4 allocs/op
|
||||
BenchmarkSortByIndex_fnv_10-8 2505410 486.5 ns/op 352 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_100-8 254556 4697 ns/op 1984 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_1000-8 13581 88334 ns/op 16576 B/op 7 allocs/op
|
||||
BenchmarkSortByValue_fnv_10-8 1761030 682.1 ns/op 592 B/op 18 allocs/op
|
||||
BenchmarkSortByValue_fnv_100-8 258838 4675 ns/op 4480 B/op 108 allocs/op
|
||||
BenchmarkSortByValue_fnv_1000-8 27027 44649 ns/op 40768 B/op 1008 allocs/op
|
||||
BenchmarkSortHashersByValue_Reflection_fnv_10-8 1013560 1249 ns/op 768 B/op 29 allocs/op
|
||||
BenchmarkSortHashersByValue_Reflection_fnv_100-8 106029 11414 ns/op 6096 B/op 209 allocs/op
|
||||
BenchmarkSortHashersByValue_Reflection_fnv_1000-8 10000 108977 ns/op 56784 B/op 2009 allocs/op
|
||||
BenchmarkSortHashersByValue_Typed_fnv_10-8 1577814 700.3 ns/op 584 B/op 17 allocs/op
|
||||
BenchmarkSortHashersByValue_Typed_fnv_100-8 215938 5024 ns/op 4472 B/op 107 allocs/op
|
||||
BenchmarkSortHashersByValue_Typed_fnv_1000-8 24447 46889 ns/op 40760 B/op 1007 allocs/op
|
||||
|
||||
BenchmarkSortByWeight_fnv_10-8 2924833 370.6 ns/op 448 B/op 8 allocs/op
|
||||
BenchmarkSortByWeight_fnv_100-8 816069 1516 ns/op 2896 B/op 8 allocs/op
|
||||
BenchmarkSortByWeight_fnv_1000-8 80391 17478 ns/op 24784 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_10-8 1945612 550.3 ns/op 368 B/op 7 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_100-8 140473 8084 ns/op 2000 B/op 7 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_1000-8 5518 200949 ns/op 16592 B/op 7 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_10-8 1305580 909.8 ns/op 608 B/op 18 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_100-8 165410 6796 ns/op 4496 B/op 108 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_1000-8 17922 78555 ns/op 40784 B/op 1008 allocs/op
|
||||
BenchmarkSortHashersByWeightValueReflection_fnv_10-8 454976 2229 ns/op 784 B/op 29 allocs/op
|
||||
BenchmarkSortHashersByWeightValueReflection_fnv_100-8 76264 15332 ns/op 6112 B/op 209 allocs/op
|
||||
BenchmarkSortHashersByWeightValueReflection_fnv_1000-8 80288 13192 ns/op 6112 B/op 209 allocs/op
|
||||
BenchmarkSortHashersByWeightValueTyped_fnv_10-8 1433113 901.4 ns/op 600 B/op 17 allocs/op
|
||||
BenchmarkSortHashersByWeightValueTyped_fnv_100-8 188626 5896 ns/op 4488 B/op 107 allocs/op
|
||||
BenchmarkSortHashersByWeightValueTyped_fnv_1000-8 178131 6518 ns/op 4488 B/op 107 allocs/op
|
||||
```
|
||||
|
||||
## Example
|
||||
|
@ -43,8 +54,8 @@ package main
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/nspcc-dev/hrw"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/hrw"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
@ -71,11 +82,11 @@ func main() {
|
|||
}
|
||||
|
||||
// Output:
|
||||
// trying GET four.example.com/examples/object-key
|
||||
// trying GET three.example.com/examples/object-key
|
||||
// trying GET one.example.com/examples/object-key
|
||||
// trying GET two.example.com/examples/object-key
|
||||
// trying GET six.example.com/examples/object-key
|
||||
// trying GET five.example.com/examples/object-key
|
||||
// trying GET six.example.com/examples/object-key
|
||||
// trying GET one.example.com/examples/object-key
|
||||
// trying GET four.example.com/examples/object-key
|
||||
}
|
||||
```
|
||||
```
|
||||
|
|
14
go.mod
14
go.mod
|
@ -1,3 +1,13 @@
|
|||
module github.com/nspcc-dev/hrw
|
||||
module git.frostfs.info/TrueCloudLab/hrw
|
||||
|
||||
require github.com/spaolacci/murmur3 v1.1.0
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/stretchr/testify v1.3.0
|
||||
github.com/twmb/murmur3 v1.1.8
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/davecgh/go-spew v1.1.0 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
)
|
||||
|
|
11
go.sum
11
go.sum
|
@ -1,2 +1,9 @@
|
|||
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
|
||||
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
||||
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
|
||||
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=
|
||||
|
|
340
hrw.go
340
hrw.go
|
@ -4,31 +4,54 @@ package hrw
|
|||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/spaolacci/murmur3"
|
||||
"github.com/twmb/murmur3"
|
||||
)
|
||||
|
||||
type (
|
||||
swapper func(i, j int)
|
||||
|
||||
// Hasher interface used by SortSliceByValue
|
||||
Hasher interface{ Hash() uint64 }
|
||||
|
||||
hashed struct {
|
||||
length int
|
||||
sorted []uint64
|
||||
weight []uint64
|
||||
sorter struct {
|
||||
l int
|
||||
less func(i, j int) bool
|
||||
swap func(i, j int)
|
||||
}
|
||||
|
||||
weighted struct {
|
||||
h hashed
|
||||
normal []float64 // normalized input weights
|
||||
hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
|
||||
slice []T
|
||||
dist []N
|
||||
asc bool
|
||||
}
|
||||
)
|
||||
|
||||
func weight(x uint64, y uint64) uint64 {
|
||||
// Boundaries of valid normalized weights
|
||||
const (
|
||||
NormalizedMaxWeight = 1.0
|
||||
NormalizedMinWeight = 0.0
|
||||
)
|
||||
|
||||
func (s *sorter) Len() int { return s.l }
|
||||
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
|
||||
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
|
||||
|
||||
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
|
||||
func (s *hasherSorter[T, N]) Less(i, j int) bool {
|
||||
if s.asc {
|
||||
return s.dist[i] < s.dist[j]
|
||||
}
|
||||
return s.dist[i] > s.dist[j]
|
||||
}
|
||||
func (s *hasherSorter[T, N]) Swap(i, j int) {
|
||||
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
|
||||
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
|
||||
}
|
||||
|
||||
func distance(x uint64, y uint64) uint64 {
|
||||
acc := x ^ y
|
||||
// here used mmh3 64 bit finalizer
|
||||
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
|
||||
|
@ -40,161 +63,128 @@ func weight(x uint64, y uint64) uint64 {
|
|||
return acc
|
||||
}
|
||||
|
||||
func (h hashed) Len() int { return h.length }
|
||||
func (h hashed) Less(i, j int) bool { return h.weight[i] < h.weight[j] }
|
||||
func (h hashed) Swap(i, j int) {
|
||||
h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i]
|
||||
h.weight[i], h.weight[j] = h.weight[j], h.weight[i]
|
||||
}
|
||||
|
||||
func (w weighted) Len() int { return w.h.length }
|
||||
func (w weighted) Less(i, j int) bool {
|
||||
// `maxUint64 - weight` makes least weight most valuable
|
||||
// it is necessary for operation with normalized values
|
||||
wi := float64(^uint64(0)-w.h.weight[i]) * w.normal[i]
|
||||
wj := float64(^uint64(0)-w.h.weight[j]) * w.normal[j]
|
||||
return wi > wj // higher weight must be placed lower to be first
|
||||
}
|
||||
func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) }
|
||||
|
||||
// Hash uses murmur3 hash to return uint64
|
||||
func Hash(key []byte) uint64 {
|
||||
return murmur3.Sum64(key)
|
||||
}
|
||||
|
||||
// Sort receive nodes and hash, and sort it by weight
|
||||
// StringHash uses murmur3 hash to return uint64
|
||||
func StringHash(key string) uint64 {
|
||||
return murmur3.StringSum64(key)
|
||||
}
|
||||
|
||||
// Sort receive nodes and hash, and sort it by distance
|
||||
func Sort(nodes []uint64, hash uint64) []uint64 {
|
||||
var (
|
||||
l = len(nodes)
|
||||
h = hashed{
|
||||
length: l,
|
||||
sorted: make([]uint64, 0, l),
|
||||
weight: make([]uint64, 0, l),
|
||||
}
|
||||
)
|
||||
|
||||
for i, node := range nodes {
|
||||
h.sorted = append(h.sorted, uint64(i))
|
||||
h.weight = append(h.weight, weight(node, hash))
|
||||
l := len(nodes)
|
||||
sorted := make([]uint64, l)
|
||||
dist := make([]uint64, l)
|
||||
for i := range nodes {
|
||||
sorted[i] = uint64(i)
|
||||
dist[i] = distance(nodes[i], hash)
|
||||
}
|
||||
|
||||
sort.Sort(h)
|
||||
return h.sorted
|
||||
sort.Slice(sorted, func(i, j int) bool {
|
||||
return dist[sorted[i]] < dist[sorted[j]]
|
||||
})
|
||||
return sorted
|
||||
}
|
||||
|
||||
// SortByWeight receive nodes and hash, and sort it by weight
|
||||
func SortByWeight(nodes []uint64, weights []uint64, hash uint64) []uint64 {
|
||||
var (
|
||||
maxWeight uint64
|
||||
|
||||
l = len(nodes)
|
||||
w = weighted{
|
||||
h: hashed{
|
||||
length: l,
|
||||
sorted: make([]uint64, 0, l),
|
||||
weight: make([]uint64, 0, l),
|
||||
},
|
||||
normal: make([]float64, 0, l),
|
||||
}
|
||||
)
|
||||
|
||||
// finding max weight to perform normalization
|
||||
for i := range weights {
|
||||
if maxWeight < weights[i] {
|
||||
maxWeight = weights[i]
|
||||
}
|
||||
}
|
||||
|
||||
// if all nodes have 0-weights or weights are incorrect then sort uniformly
|
||||
if maxWeight == 0 || l != len(nodes) {
|
||||
return Sort(nodes, hash)
|
||||
}
|
||||
|
||||
fMaxWeight := float64(maxWeight)
|
||||
for i, node := range nodes {
|
||||
w.h.sorted = append(w.h.sorted, uint64(i))
|
||||
w.h.weight = append(w.h.weight, weight(node, hash))
|
||||
w.normal = append(w.normal, float64(weights[i])/fMaxWeight)
|
||||
}
|
||||
sort.Sort(w)
|
||||
return w.h.sorted
|
||||
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight
|
||||
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
|
||||
result := make([]uint64, len(nodes))
|
||||
copy(nodes, result)
|
||||
sortByWeight(len(nodes), false, nodes, weights, hash, reflect.Swapper(result))
|
||||
return result
|
||||
}
|
||||
|
||||
// SortSliceByValue received []T and hash to sort by value-weight
|
||||
// SortSliceByValue received []T and hash to sort by value-distance
|
||||
func SortSliceByValue(slice interface{}, hash uint64) {
|
||||
rule := prepareRule(slice)
|
||||
if rule != nil {
|
||||
swap := reflect.Swapper(slice)
|
||||
rule = Sort(rule, hash)
|
||||
sortByRuleInverse(swap, uint64(len(rule)), rule)
|
||||
sortByDistance(len(rule), false, rule, hash, swap)
|
||||
}
|
||||
}
|
||||
|
||||
// SortSliceByWeightValue received []T, weights and hash to sort by value-weight
|
||||
func SortSliceByWeightValue(slice interface{}, weight []uint64, hash uint64) {
|
||||
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
|
||||
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
|
||||
if len(slice) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
dist := make([]uint64, len(slice))
|
||||
for i := range dist {
|
||||
dist[i] = distance(slice[i].Hash(), hash)
|
||||
}
|
||||
sortHasherByDistance(slice, false, dist)
|
||||
}
|
||||
|
||||
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
|
||||
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
|
||||
rule := prepareRule(slice)
|
||||
if rule != nil {
|
||||
swap := reflect.Swapper(slice)
|
||||
rule = SortByWeight(rule, weight, hash)
|
||||
sortByRuleInverse(swap, uint64(len(rule)), rule)
|
||||
sortByWeight(reflect.ValueOf(slice).Len(), false, rule, weights, hash, swap)
|
||||
}
|
||||
}
|
||||
|
||||
// SortSliceByIndex received []T and hash to sort by index-weight
|
||||
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
|
||||
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
|
||||
if len(slice) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if allSameF64(weights) {
|
||||
dist := make([]uint64, len(slice))
|
||||
for i := range dist {
|
||||
dist[i] = distance(slice[i].Hash(), hash)
|
||||
}
|
||||
sortHasherByDistance(slice, false, dist)
|
||||
return
|
||||
}
|
||||
|
||||
dist := make([]float64, len(slice))
|
||||
for i := range dist {
|
||||
d := distance(slice[i].Hash(), hash)
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
dist[i] = float64(^uint64(0)-d) * weights[i]
|
||||
}
|
||||
|
||||
sort.Sort(&hasherSorter[T, float64]{
|
||||
slice: slice,
|
||||
dist: dist,
|
||||
asc: false,
|
||||
})
|
||||
}
|
||||
|
||||
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
|
||||
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
|
||||
sort.Sort(&hasherSorter[T, uint64]{
|
||||
slice: slice,
|
||||
dist: dist,
|
||||
asc: true,
|
||||
})
|
||||
}
|
||||
|
||||
// SortSliceByIndex received []T and hash to sort by index-distance
|
||||
func SortSliceByIndex(slice interface{}, hash uint64) {
|
||||
length := uint64(reflect.ValueOf(slice).Len())
|
||||
length := reflect.ValueOf(slice).Len()
|
||||
swap := reflect.Swapper(slice)
|
||||
rule := make([]uint64, 0, length)
|
||||
for i := uint64(0); i < length; i++ {
|
||||
rule = append(rule, i)
|
||||
}
|
||||
rule = Sort(rule, hash)
|
||||
sortByRuleInverse(swap, length, rule)
|
||||
sortByDistance(length, true, nil, hash, swap)
|
||||
}
|
||||
|
||||
// SortSliceByWeightIndex received []T, weights and hash to sort by index-weight
|
||||
func SortSliceByWeightIndex(slice interface{}, weight []uint64, hash uint64) {
|
||||
length := uint64(reflect.ValueOf(slice).Len())
|
||||
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
|
||||
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
|
||||
length := reflect.ValueOf(slice).Len()
|
||||
swap := reflect.Swapper(slice)
|
||||
rule := make([]uint64, 0, length)
|
||||
for i := uint64(0); i < length; i++ {
|
||||
rule = append(rule, i)
|
||||
}
|
||||
rule = SortByWeight(rule, weight, hash)
|
||||
sortByRuleInverse(swap, length, rule)
|
||||
}
|
||||
|
||||
func sortByRuleDirect(swap swapper, length uint64, rule []uint64) {
|
||||
done := make([]bool, length)
|
||||
for i := uint64(0); i < length; i++ {
|
||||
if done[i] {
|
||||
continue
|
||||
}
|
||||
for j := rule[i]; !done[rule[j]]; j = rule[j] {
|
||||
swap(int(i), int(j))
|
||||
done[j] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sortByRuleInverse(swap swapper, length uint64, rule []uint64) {
|
||||
done := make([]bool, length)
|
||||
for i := uint64(0); i < length; i++ {
|
||||
if done[i] {
|
||||
continue
|
||||
}
|
||||
|
||||
for j := i; !done[rule[j]]; j = rule[j] {
|
||||
swap(int(j), int(rule[j]))
|
||||
done[j] = true
|
||||
}
|
||||
}
|
||||
sortByWeight(length, true, nil, weights, hash, swap)
|
||||
}
|
||||
|
||||
func prepareRule(slice interface{}) []uint64 {
|
||||
t := reflect.TypeOf(slice)
|
||||
if t.Kind() != reflect.Slice {
|
||||
return nil
|
||||
panic("HRW sort expects slice, got " + t.Kind().String())
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -273,7 +263,7 @@ func prepareRule(slice interface{}) []uint64 {
|
|||
|
||||
default:
|
||||
if _, ok := val.Index(0).Interface().(Hasher); !ok {
|
||||
return nil
|
||||
panic("slice elements must implement hrw.Hasher")
|
||||
}
|
||||
|
||||
for i := 0; i < length; i++ {
|
||||
|
@ -283,3 +273,89 @@ func prepareRule(slice interface{}) []uint64 {
|
|||
}
|
||||
return rule
|
||||
}
|
||||
|
||||
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
|
||||
func ValidateWeights(weights []float64) error {
|
||||
for i := range weights {
|
||||
if math.IsNaN(weights[i]) || weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
|
||||
return errors.New("weights are not normalized")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sortByWeight sorts nodes by weight using provided swapper.
|
||||
// nodes contains hrw hashes. If it is nil, indices are used.
|
||||
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
|
||||
// if all nodes have the same distance then sort uniformly
|
||||
if allSameF64(weights) {
|
||||
sortByDistance(l, byIndex, nodes, hash, swap)
|
||||
return
|
||||
}
|
||||
|
||||
dist := make([]float64, l)
|
||||
for i := 0; i < l; i++ {
|
||||
d := getDistance(byIndex, i, nodes, hash)
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
dist[i] = float64(^uint64(0)-d) * weights[i]
|
||||
}
|
||||
|
||||
s := &sorter{
|
||||
l: l,
|
||||
swap: func(i, j int) {
|
||||
swap(i, j)
|
||||
dist[i], dist[j] = dist[j], dist[i]
|
||||
},
|
||||
less: func(i, j int) bool {
|
||||
return dist[i] > dist[j] // higher distance must be placed lower to be first
|
||||
},
|
||||
}
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
||||
// sortByDistance sorts nodes by hrw distance using provided swapper.
|
||||
// nodes contains hrw hashes. If it is nil, indices are used.
|
||||
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
|
||||
dist := make([]uint64, l)
|
||||
for i := 0; i < l; i++ {
|
||||
dist[i] = getDistance(byIndex, i, nodes, hash)
|
||||
}
|
||||
|
||||
s := &sorter{
|
||||
l: l,
|
||||
swap: func(i, j int) {
|
||||
swap(i, j)
|
||||
dist[i], dist[j] = dist[j], dist[i]
|
||||
},
|
||||
less: func(i, j int) bool {
|
||||
return dist[i] < dist[j]
|
||||
},
|
||||
}
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
||||
// getDistance return distance from nodes[i] to h.
|
||||
// If byIndex is true, nodes index is used.
|
||||
// Else if nodes[i] != nil, distance is calculated from this value.
|
||||
// Otherwise, and hash from node index is taken.
|
||||
func getDistance(byIndex bool, i int, nodes []uint64, h uint64) uint64 {
|
||||
if nodes != nil {
|
||||
return distance(nodes[i], h)
|
||||
} else if byIndex {
|
||||
return distance(uint64(i), h)
|
||||
} else {
|
||||
buf := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(buf, uint64(i))
|
||||
return distance(Hash(buf), h)
|
||||
}
|
||||
}
|
||||
|
||||
func allSameF64(fs []float64) bool {
|
||||
for i := range fs {
|
||||
if fs[i] != fs[0] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
428
hrw_test.go
428
hrw_test.go
|
@ -5,9 +5,10 @@ import (
|
|||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type (
|
||||
|
@ -60,7 +61,7 @@ func Example() {
|
|||
}
|
||||
|
||||
func (h hashString) Hash() uint64 {
|
||||
return Hash([]byte(h))
|
||||
return StringHash(string(h))
|
||||
}
|
||||
|
||||
func TestSortSliceByIndex(t *testing.T) {
|
||||
|
@ -68,20 +69,28 @@ func TestSortSliceByIndex(t *testing.T) {
|
|||
expect := []string{"e", "a", "c", "f", "d", "b"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByIndex(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestValidateWeights(t *testing.T) {
|
||||
weights := []float64{10, 10, 10, 2, 2, 2}
|
||||
err := ValidateWeights(weights)
|
||||
require.Error(t, err)
|
||||
weights = []float64{math.NaN(), 1, 1, 0.2, 0.2, 0.2}
|
||||
err = ValidateWeights(weights)
|
||||
require.Error(t, err)
|
||||
weights = []float64{1, 1, 1, 0.2, 0.2, 0.2}
|
||||
err = ValidateWeights(weights)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestSortSliceByWeightIndex(t *testing.T) {
|
||||
actual := []string{"a", "b", "c", "d", "e", "f"}
|
||||
weights := []uint64{10, 10, 10, 2, 2, 2}
|
||||
weights := []float64{1, 1, 1, 0.2, 0.2, 0.2}
|
||||
expect := []string{"a", "c", "b", "e", "f", "d"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByWeightIndex(actual, weights, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortSliceByValue(t *testing.T) {
|
||||
|
@ -89,43 +98,7 @@ func TestSortSliceByValue(t *testing.T) {
|
|||
expect := []string{"d", "f", "c", "b", "a", "e"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSortByRule(t *testing.T) {
|
||||
t.Run("direct", func(t *testing.T) {
|
||||
// 0 1 2 3 4 5
|
||||
actual := []string{"a", "b", "c", "d", "e", "f"}
|
||||
// 4 2 0 5 3 1
|
||||
expect := []string{"c", "f", "b", "e", "a", "d"}
|
||||
rule := []uint64{4, 2, 0, 5, 3, 1}
|
||||
|
||||
sortByRuleDirect(
|
||||
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
|
||||
6, rule)
|
||||
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("inverse", func(t *testing.T) {
|
||||
// 0 1 2 3 4 5
|
||||
actual := []string{"a", "b", "c", "d", "e", "f"}
|
||||
// 4 2 0 5 3 1
|
||||
expect := []string{"e", "c", "a", "f", "d", "b"}
|
||||
rule := []uint64{4, 2, 0, 5, 3, 1}
|
||||
|
||||
sortByRuleInverse(
|
||||
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
|
||||
6, rule)
|
||||
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
})
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortSliceByValueFail(t *testing.T) {
|
||||
|
@ -134,23 +107,19 @@ func TestSortSliceByValueFail(t *testing.T) {
|
|||
actual []int
|
||||
hash = Hash(testKey)
|
||||
)
|
||||
SortSliceByValue(actual, hash)
|
||||
require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
|
||||
})
|
||||
|
||||
t.Run("must be slice", func(t *testing.T) {
|
||||
actual := 10
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
require.Panics(t, func() { SortSliceByValue(actual, hash) })
|
||||
})
|
||||
|
||||
t.Run("must 'fail' for unknown type", func(t *testing.T) {
|
||||
actual := []unknown{1, 2, 3, 4, 5}
|
||||
expect := []unknown{1, 2, 3, 4, 5}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Panics(t, func() { SortSliceByValue(actual, hash) })
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -159,9 +128,24 @@ func TestSortSliceByValueHasher(t *testing.T) {
|
|||
expect := []hashString{"d", "f", "c", "b", "a", "e"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortHasherSliceByValue(t *testing.T) {
|
||||
actual := []hashString{"a", "b", "c", "d", "e", "f"}
|
||||
expect := []hashString{"d", "f", "c", "b", "a", "e"}
|
||||
hash := Hash(testKey)
|
||||
SortHasherSliceByValue(actual, hash)
|
||||
require.EqualValues(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortHasherSliceByWeightValue(t *testing.T) {
|
||||
actual := []hashString{"a", "b", "c", "d", "e", "f"}
|
||||
weights := []float64{1.0, 1.0, 1.0, 1.0, 1.0, 1.0}
|
||||
expect := []hashString{"d", "f", "c", "b", "a", "e"}
|
||||
hash := Hash(testKey)
|
||||
SortHasherSliceByWeightValue(actual, weights, hash)
|
||||
require.EqualValues(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortSliceByValueIntSlice(t *testing.T) {
|
||||
|
@ -206,11 +190,6 @@ func TestSortSliceByValueIntSlice(t *testing.T) {
|
|||
expect: []uint32{5, 1, 2, 0, 3, 4},
|
||||
},
|
||||
|
||||
{
|
||||
actual: Uint32Slice{0, 1, 2, 3, 4, 5},
|
||||
expect: Uint32Slice{0, 1, 2, 3, 4, 5},
|
||||
},
|
||||
|
||||
{
|
||||
actual: []int64{0, 1, 2, 3, 4, 5},
|
||||
expect: []int64{5, 3, 0, 1, 4, 2},
|
||||
|
@ -225,9 +204,7 @@ func TestSortSliceByValueIntSlice(t *testing.T) {
|
|||
|
||||
for _, tc := range cases {
|
||||
SortSliceByValue(tc.actual, hash)
|
||||
if !reflect.DeepEqual(tc.actual, tc.expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", tc.actual, tc.expect)
|
||||
}
|
||||
require.Equal(t, tc.expect, tc.actual)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -236,9 +213,7 @@ func TestSort(t *testing.T) {
|
|||
hash := Hash(testKey)
|
||||
actual := Sort(nodes, hash)
|
||||
expected := []uint64{3, 1, 4, 2, 0}
|
||||
if !reflect.DeepEqual(actual, expected) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expected)
|
||||
}
|
||||
require.Equal(t, expected, actual)
|
||||
}
|
||||
|
||||
func TestDistribution(t *testing.T) {
|
||||
|
@ -276,18 +251,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByIndex", func(t *testing.T) {
|
||||
|
@ -317,18 +285,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByValue", func(t *testing.T) {
|
||||
|
@ -357,18 +318,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByStringValue", func(t *testing.T) {
|
||||
|
@ -397,18 +351,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %s received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByInt32Value", func(t *testing.T) {
|
||||
|
@ -437,31 +384,24 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValue", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b, result [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
w[i] = size - i
|
||||
w[i] = float64(size-i) / float64(size)
|
||||
}
|
||||
for i = 0; i < keys; i++ {
|
||||
copy(b[:], a[:])
|
||||
|
@ -470,24 +410,24 @@ func TestDistribution(t *testing.T) {
|
|||
SortSliceByWeightValue(b[:], w[:], hash)
|
||||
result[b[0]]++
|
||||
}
|
||||
|
||||
for i := 0; i < size-1; i++ {
|
||||
if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) {
|
||||
t.Fatalf("result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
|
||||
"result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueShuffledW", func(t *testing.T) {
|
||||
t.Run("sortByWeightValueShuffledWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b, result [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
w[i] = size - i
|
||||
w[i] = float64(size-i) / float64(size)
|
||||
}
|
||||
|
||||
rand.Shuffle(size, func(i, j int) {
|
||||
|
@ -501,17 +441,16 @@ func TestDistribution(t *testing.T) {
|
|||
result[b[0]]++
|
||||
}
|
||||
for i := 0; i < size-1; i++ {
|
||||
if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) {
|
||||
t.Fatalf("result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
|
||||
"result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueEmptyW", func(t *testing.T) {
|
||||
t.Run("sortByWeightValueEmptyWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
counts = make(map[int]int, size)
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
@ -534,32 +473,25 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueUniformW", func(t *testing.T) {
|
||||
t.Run("sortByWeightValueUniformWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
counts = make(map[int]int, size)
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
w[i] = 10
|
||||
w[i] = 0.5
|
||||
}
|
||||
|
||||
for i = 0; i < keys; i++ {
|
||||
|
@ -576,45 +508,85 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) {
|
||||
const keys = 1
|
||||
var (
|
||||
i uint64
|
||||
a, b [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
}
|
||||
w[size-1] = 10
|
||||
w[size-1] = 1
|
||||
|
||||
for i = 0; i < keys; i++ {
|
||||
copy(b[:], a[:])
|
||||
binary.BigEndian.PutUint64(key, i+size)
|
||||
hash := Hash(key)
|
||||
SortSliceByWeightValue(b[:], w[:], hash)
|
||||
if b[0] != a[size-1] {
|
||||
t.Fatalf("expected last value of %v to be the first with highest weight", a)
|
||||
}
|
||||
require.True(t, b[0] == a[size-1],
|
||||
"expected last value of %v to be the first with highest distance", a)
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueNormalizedWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b, result [size]uint64
|
||||
w, normalizedW [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = i
|
||||
w[int(i)] = 10
|
||||
}
|
||||
w[0] = 100
|
||||
|
||||
// Here let's use logarithm normalization
|
||||
for i = 0; i < size; i++ {
|
||||
normalizedW[i] = math.Log2(w[i]) / math.Log2(w[0])
|
||||
}
|
||||
|
||||
for i = 0; i < keys; i++ {
|
||||
copy(b[:], a[:])
|
||||
binary.BigEndian.PutUint64(key, i+size)
|
||||
hash := Hash(key)
|
||||
SortSliceByWeightValue(b[:], normalizedW[:], hash)
|
||||
for j := range b {
|
||||
result[b[j]] += uint64(len(b) - j)
|
||||
}
|
||||
}
|
||||
cutResult := result[1:]
|
||||
var total uint64
|
||||
for i := range cutResult {
|
||||
total += cutResult[i]
|
||||
}
|
||||
|
||||
var chi2 float64
|
||||
mean := float64(total) / float64(len(cutResult))
|
||||
delta := mean * percent
|
||||
for node, count := range cutResult {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("hash collision", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
|
@ -681,6 +653,36 @@ func BenchmarkSortByValue_fnv_1000(b *testing.B) {
|
|||
benchmarkSortByValue(b, 1000, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Reflection_fnv_10(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueReflection(b, 10, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Reflection_fnv_100(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueReflection(b, 100, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Reflection_fnv_1000(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueReflection(b, 1000, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Typed_fnv_10(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueTyped(b, 10, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Typed_fnv_100(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueTyped(b, 100, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Typed_fnv_1000(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueTyped(b, 1000, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortByWeight_fnv_10(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
_ = benchmarkSortByWeight(b, 10, hash)
|
||||
|
@ -726,6 +728,30 @@ func BenchmarkSortByWeightValue_fnv_1000(b *testing.B) {
|
|||
benchmarkSortByWeightValue(b, 1000, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueReflection_fnv_10(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueRelection(b, 10, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueReflection_fnv_100(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueRelection(b, 100, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueRelection(b, 1000, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueTyped(b, 10, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueTyped_fnv_100(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueTyped(b, 100, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueTyped(b, 1000, Hash(testKey))
|
||||
}
|
||||
|
||||
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {
|
||||
servers := make([]uint64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
|
@ -772,9 +798,9 @@ func benchmarkSortByValue(b *testing.B, n int, hash uint64) {
|
|||
|
||||
func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
|
||||
servers := make([]uint64, n)
|
||||
weights := make([]uint64, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = uint64(n) - i
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = i
|
||||
}
|
||||
|
||||
|
@ -790,9 +816,9 @@ func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
|
|||
|
||||
func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]uint64, n)
|
||||
weights := make([]uint64, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = uint64(n) - i
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = i
|
||||
}
|
||||
|
||||
|
@ -806,9 +832,9 @@ func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
|
|||
|
||||
func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]string, n)
|
||||
weights := make([]uint64, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = uint64(n) - i
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10)
|
||||
}
|
||||
|
||||
|
@ -819,3 +845,63 @@ func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
|
|||
SortSliceByWeightValue(servers, weights, hash)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSortHashersByWeightValueRelection(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]hashString, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
SortSliceByWeightValue(servers, weights, hash)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSortHashersByWeightValueTyped(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]hashString, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
SortHasherSliceByWeightValue(servers, weights, hash)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSortHashersByValueReflection(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]hashString, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
SortSliceByValue(servers, hash)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSortHashersByValueTyped(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]hashString, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
SortHasherSliceByValue(servers, hash)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue