Compare commits
1 commit
master
...
feature/ge
Author | SHA1 | Date | |
---|---|---|---|
|
6ea18e48fe |
5 changed files with 221 additions and 207 deletions
43
README.md
43
README.md
|
@ -14,26 +14,25 @@
|
|||
## Benchmark:
|
||||
|
||||
```
|
||||
BenchmarkSort_fnv_10-8 5000000 354 ns/op 224 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_100-8 300000 5103 ns/op 1856 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_1000-8 10000 115874 ns/op 16448 B/op 3 allocs/op
|
||||
BenchmarkSortByIndex_fnv_10-8 3000000 562 ns/op 384 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_100-8 200000 5819 ns/op 2928 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_1000-8 10000 125859 ns/op 25728 B/op 7 allocs/op
|
||||
BenchmarkSortByValue_fnv_10-8 2000000 1056 ns/op 544 B/op 17 allocs/op
|
||||
BenchmarkSortByValue_fnv_100-8 200000 9593 ns/op 4528 B/op 107 allocs/op
|
||||
BenchmarkSortByValue_fnv_1000-8 10000 109272 ns/op 41728 B/op 1007 allocs/op
|
||||
|
||||
BenchmarkSortByWeight_fnv_10-8 3000000 500 ns/op 320 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_100-8 200000 8257 ns/op 2768 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_1000-8 10000 197938 ns/op 24656 B/op 4 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_10-8 2000000 760 ns/op 480 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_100-8 200000 9191 ns/op 3840 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_1000-8 10000 208204 ns/op 33936 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_10-8 1000000 1095 ns/op 640 B/op 18 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_100-8 200000 12291 ns/op 5440 B/op 108 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_1000-8 10000 145125 ns/op 49936 B/op 1008 allocs/op
|
||||
BenchmarkSort_fnv_10-8 5000000 365 ns/op 224 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_100-8 300000 5261 ns/op 1856 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_1000-8 10000 119462 ns/op 16448 B/op 3 allocs/op
|
||||
BenchmarkSortByIndex_fnv_10-8 3000000 546 ns/op 384 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_100-8 200000 5965 ns/op 2928 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_1000-8 10000 127732 ns/op 25728 B/op 7 allocs/op
|
||||
BenchmarkSortByValue_fnv_10-8 2000000 962 ns/op 544 B/op 17 allocs/op
|
||||
BenchmarkSortByValue_fnv_100-8 200000 9604 ns/op 4528 B/op 107 allocs/op
|
||||
BenchmarkSortByValue_fnv_1000-8 10000 111741 ns/op 41728 B/op 1007 allocs/op
|
||||
|
||||
BenchmarkSortByWeight_fnv_10-8 3000000 501 ns/op 320 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_100-8 200000 8495 ns/op 2768 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_1000-8 10000 197880 ns/op 24656 B/op 4 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_10-8 2000000 702 ns/op 480 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_100-8 200000 9338 ns/op 3840 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_1000-8 10000 204669 ns/op 33936 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_10-8 1000000 1083 ns/op 640 B/op 18 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_100-8 200000 11444 ns/op 5440 B/op 108 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_1000-8 10000 148471 ns/op 49936 B/op 1008 allocs/op
|
||||
```
|
||||
|
||||
## Example
|
||||
|
@ -71,11 +70,11 @@ func main() {
|
|||
}
|
||||
|
||||
// Output:
|
||||
// trying GET four.example.com/examples/object-key
|
||||
// trying GET three.example.com/examples/object-key
|
||||
// trying GET one.example.com/examples/object-key
|
||||
// trying GET two.example.com/examples/object-key
|
||||
// trying GET six.example.com/examples/object-key
|
||||
// trying GET five.example.com/examples/object-key
|
||||
// trying GET six.example.com/examples/object-key
|
||||
// trying GET one.example.com/examples/object-key
|
||||
// trying GET four.example.com/examples/object-key
|
||||
}
|
||||
```
|
5
go.mod
5
go.mod
|
@ -1,3 +1,6 @@
|
|||
module github.com/nspcc-dev/hrw
|
||||
|
||||
require github.com/spaolacci/murmur3 v1.1.0
|
||||
require (
|
||||
github.com/spaolacci/murmur3 v1.1.0
|
||||
github.com/stretchr/testify v1.3.0
|
||||
)
|
||||
|
|
7
go.sum
7
go.sum
|
@ -1,2 +1,9 @@
|
|||
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
|
||||
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
|
|
114
hrw.go
114
hrw.go
|
@ -4,6 +4,7 @@ package hrw
|
|||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
|
@ -17,9 +18,9 @@ type (
|
|||
Hasher interface{ Hash() uint64 }
|
||||
|
||||
hashed struct {
|
||||
length int
|
||||
sorted []uint64
|
||||
weight []uint64
|
||||
length int
|
||||
sorted []uint64
|
||||
distance []uint64
|
||||
}
|
||||
|
||||
weighted struct {
|
||||
|
@ -28,7 +29,13 @@ type (
|
|||
}
|
||||
)
|
||||
|
||||
func weight(x uint64, y uint64) uint64 {
|
||||
// Boundaries of valid normalized weights
|
||||
const (
|
||||
NormalizedMaxWeight = 1.0
|
||||
NormalizedMinWeight = 0.0
|
||||
)
|
||||
|
||||
func distance(x uint64, y uint64) uint64 {
|
||||
acc := x ^ y
|
||||
// here used mmh3 64 bit finalizer
|
||||
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
|
||||
|
@ -41,19 +48,19 @@ func weight(x uint64, y uint64) uint64 {
|
|||
}
|
||||
|
||||
func (h hashed) Len() int { return h.length }
|
||||
func (h hashed) Less(i, j int) bool { return h.weight[i] < h.weight[j] }
|
||||
func (h hashed) Less(i, j int) bool { return h.distance[i] < h.distance[j] }
|
||||
func (h hashed) Swap(i, j int) {
|
||||
h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i]
|
||||
h.weight[i], h.weight[j] = h.weight[j], h.weight[i]
|
||||
h.distance[i], h.distance[j] = h.distance[j], h.distance[i]
|
||||
}
|
||||
|
||||
func (w weighted) Len() int { return w.h.length }
|
||||
func (w weighted) Less(i, j int) bool {
|
||||
// `maxUint64 - weight` makes least weight most valuable
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
wi := float64(^uint64(0)-w.h.weight[i]) * w.normal[i]
|
||||
wj := float64(^uint64(0)-w.h.weight[j]) * w.normal[j]
|
||||
return wi > wj // higher weight must be placed lower to be first
|
||||
wi := float64(^uint64(0)-w.h.distance[i]) * w.normal[i]
|
||||
wj := float64(^uint64(0)-w.h.distance[j]) * w.normal[j]
|
||||
return wi > wj // higher distance must be placed lower to be first
|
||||
}
|
||||
func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) }
|
||||
|
||||
|
@ -62,65 +69,64 @@ func Hash(key []byte) uint64 {
|
|||
return murmur3.Sum64(key)
|
||||
}
|
||||
|
||||
// Sort receive nodes and hash, and sort it by weight
|
||||
// Sort receive nodes and hash, and sort it by distance
|
||||
func Sort(nodes []uint64, hash uint64) []uint64 {
|
||||
var (
|
||||
l = len(nodes)
|
||||
h = hashed{
|
||||
length: l,
|
||||
sorted: make([]uint64, 0, l),
|
||||
weight: make([]uint64, 0, l),
|
||||
length: l,
|
||||
sorted: make([]uint64, 0, l),
|
||||
distance: make([]uint64, 0, l),
|
||||
}
|
||||
)
|
||||
|
||||
for i, node := range nodes {
|
||||
for i := range nodes {
|
||||
h.sorted = append(h.sorted, uint64(i))
|
||||
h.weight = append(h.weight, weight(node, hash))
|
||||
h.distance = append(h.distance, distance(nodes[i], hash))
|
||||
}
|
||||
|
||||
sort.Sort(h)
|
||||
return h.sorted
|
||||
}
|
||||
|
||||
// SortByWeight receive nodes and hash, and sort it by weight
|
||||
func SortByWeight(nodes []uint64, weights []uint64, hash uint64) []uint64 {
|
||||
var (
|
||||
maxWeight uint64
|
||||
|
||||
l = len(nodes)
|
||||
w = weighted{
|
||||
h: hashed{
|
||||
length: l,
|
||||
sorted: make([]uint64, 0, l),
|
||||
weight: make([]uint64, 0, l),
|
||||
},
|
||||
normal: make([]float64, 0, l),
|
||||
}
|
||||
)
|
||||
|
||||
// finding max weight to perform normalization
|
||||
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight
|
||||
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
|
||||
// check if numbers of weights and nodes are equal
|
||||
uniform := true
|
||||
for i := range weights {
|
||||
if maxWeight < weights[i] {
|
||||
maxWeight = weights[i]
|
||||
// check if all nodes have the same distance
|
||||
if weights[i] != weights[0] {
|
||||
uniform = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// if all nodes have 0-weights or weights are incorrect then sort uniformly
|
||||
if maxWeight == 0 || l != len(nodes) {
|
||||
l := len(nodes)
|
||||
w := weighted{
|
||||
h: hashed{
|
||||
length: l,
|
||||
sorted: make([]uint64, 0, l),
|
||||
distance: make([]uint64, 0, l),
|
||||
},
|
||||
normal: make([]float64, l),
|
||||
}
|
||||
|
||||
// if all nodes have the same distance then sort uniformly
|
||||
if uniform || len(weights) != l {
|
||||
return Sort(nodes, hash)
|
||||
}
|
||||
|
||||
fMaxWeight := float64(maxWeight)
|
||||
for i, node := range nodes {
|
||||
for i := range nodes {
|
||||
w.h.sorted = append(w.h.sorted, uint64(i))
|
||||
w.h.weight = append(w.h.weight, weight(node, hash))
|
||||
w.normal = append(w.normal, float64(weights[i])/fMaxWeight)
|
||||
w.h.distance = append(w.h.distance, distance(nodes[i], hash))
|
||||
}
|
||||
copy(w.normal, weights)
|
||||
|
||||
sort.Sort(w)
|
||||
return w.h.sorted
|
||||
}
|
||||
|
||||
// SortSliceByValue received []T and hash to sort by value-weight
|
||||
// SortSliceByValue received []T and hash to sort by value-distance
|
||||
func SortSliceByValue(slice interface{}, hash uint64) {
|
||||
rule := prepareRule(slice)
|
||||
if rule != nil {
|
||||
|
@ -130,17 +136,17 @@ func SortSliceByValue(slice interface{}, hash uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
// SortSliceByWeightValue received []T, weights and hash to sort by value-weight
|
||||
func SortSliceByWeightValue(slice interface{}, weight []uint64, hash uint64) {
|
||||
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
|
||||
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
|
||||
rule := prepareRule(slice)
|
||||
if rule != nil {
|
||||
swap := reflect.Swapper(slice)
|
||||
rule = SortByWeight(rule, weight, hash)
|
||||
rule = SortByWeight(rule, weights, hash)
|
||||
sortByRuleInverse(swap, uint64(len(rule)), rule)
|
||||
}
|
||||
}
|
||||
|
||||
// SortSliceByIndex received []T and hash to sort by index-weight
|
||||
// SortSliceByIndex received []T and hash to sort by index-distance
|
||||
func SortSliceByIndex(slice interface{}, hash uint64) {
|
||||
length := uint64(reflect.ValueOf(slice).Len())
|
||||
swap := reflect.Swapper(slice)
|
||||
|
@ -152,15 +158,15 @@ func SortSliceByIndex(slice interface{}, hash uint64) {
|
|||
sortByRuleInverse(swap, length, rule)
|
||||
}
|
||||
|
||||
// SortSliceByWeightIndex received []T, weights and hash to sort by index-weight
|
||||
func SortSliceByWeightIndex(slice interface{}, weight []uint64, hash uint64) {
|
||||
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
|
||||
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
|
||||
length := uint64(reflect.ValueOf(slice).Len())
|
||||
swap := reflect.Swapper(slice)
|
||||
rule := make([]uint64, 0, length)
|
||||
for i := uint64(0); i < length; i++ {
|
||||
rule = append(rule, i)
|
||||
}
|
||||
rule = SortByWeight(rule, weight, hash)
|
||||
rule = SortByWeight(rule, weights, hash)
|
||||
sortByRuleInverse(swap, length, rule)
|
||||
}
|
||||
|
||||
|
@ -283,3 +289,13 @@ func prepareRule(slice interface{}) []uint64 {
|
|||
}
|
||||
return rule
|
||||
}
|
||||
|
||||
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
|
||||
func ValidateWeights(weights []float64) error {
|
||||
for i := range weights {
|
||||
if weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
|
||||
return errors.New("weights are not normalized")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
259
hrw_test.go
259
hrw_test.go
|
@ -5,9 +5,10 @@ import (
|
|||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type (
|
||||
|
@ -68,20 +69,25 @@ func TestSortSliceByIndex(t *testing.T) {
|
|||
expect := []string{"e", "a", "c", "f", "d", "b"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByIndex(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestValidateWeights(t *testing.T) {
|
||||
weights := []float64{10, 10, 10, 2, 2, 2}
|
||||
err := ValidateWeights(weights)
|
||||
require.Error(t, err)
|
||||
weights = []float64{1, 1, 1, 0.2, 0.2, 0.2}
|
||||
err = ValidateWeights(weights)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestSortSliceByWeightIndex(t *testing.T) {
|
||||
actual := []string{"a", "b", "c", "d", "e", "f"}
|
||||
weights := []uint64{10, 10, 10, 2, 2, 2}
|
||||
weights := []float64{1, 1, 1, 0.2, 0.2, 0.2}
|
||||
expect := []string{"a", "c", "b", "e", "f", "d"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByWeightIndex(actual, weights, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortSliceByValue(t *testing.T) {
|
||||
|
@ -89,9 +95,7 @@ func TestSortSliceByValue(t *testing.T) {
|
|||
expect := []string{"d", "f", "c", "b", "a", "e"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortByRule(t *testing.T) {
|
||||
|
@ -106,9 +110,7 @@ func TestSortByRule(t *testing.T) {
|
|||
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
|
||||
6, rule)
|
||||
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
})
|
||||
|
||||
t.Run("inverse", func(t *testing.T) {
|
||||
|
@ -122,9 +124,7 @@ func TestSortByRule(t *testing.T) {
|
|||
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
|
||||
6, rule)
|
||||
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -134,13 +134,13 @@ func TestSortSliceByValueFail(t *testing.T) {
|
|||
actual []int
|
||||
hash = Hash(testKey)
|
||||
)
|
||||
SortSliceByValue(actual, hash)
|
||||
require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
|
||||
})
|
||||
|
||||
t.Run("must be slice", func(t *testing.T) {
|
||||
actual := 10
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
|
||||
})
|
||||
|
||||
t.Run("must 'fail' for unknown type", func(t *testing.T) {
|
||||
|
@ -148,9 +148,7 @@ func TestSortSliceByValueFail(t *testing.T) {
|
|||
expect := []unknown{1, 2, 3, 4, 5}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -159,9 +157,7 @@ func TestSortSliceByValueHasher(t *testing.T) {
|
|||
expect := []hashString{"d", "f", "c", "b", "a", "e"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortSliceByValueIntSlice(t *testing.T) {
|
||||
|
@ -225,9 +221,7 @@ func TestSortSliceByValueIntSlice(t *testing.T) {
|
|||
|
||||
for _, tc := range cases {
|
||||
SortSliceByValue(tc.actual, hash)
|
||||
if !reflect.DeepEqual(tc.actual, tc.expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", tc.actual, tc.expect)
|
||||
}
|
||||
require.Equal(t, tc.expect, tc.actual)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -236,9 +230,7 @@ func TestSort(t *testing.T) {
|
|||
hash := Hash(testKey)
|
||||
actual := Sort(nodes, hash)
|
||||
expected := []uint64{3, 1, 4, 2, 0}
|
||||
if !reflect.DeepEqual(actual, expected) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expected)
|
||||
}
|
||||
require.Equal(t, expected, actual)
|
||||
}
|
||||
|
||||
func TestDistribution(t *testing.T) {
|
||||
|
@ -276,18 +268,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByIndex", func(t *testing.T) {
|
||||
|
@ -317,18 +302,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByValue", func(t *testing.T) {
|
||||
|
@ -357,18 +335,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByStringValue", func(t *testing.T) {
|
||||
|
@ -397,18 +368,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %s received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByInt32Value", func(t *testing.T) {
|
||||
|
@ -437,31 +401,24 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValue", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b, result [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
w[i] = size - i
|
||||
w[i] = float64(size-i) / float64(size)
|
||||
}
|
||||
for i = 0; i < keys; i++ {
|
||||
copy(b[:], a[:])
|
||||
|
@ -470,24 +427,24 @@ func TestDistribution(t *testing.T) {
|
|||
SortSliceByWeightValue(b[:], w[:], hash)
|
||||
result[b[0]]++
|
||||
}
|
||||
|
||||
for i := 0; i < size-1; i++ {
|
||||
if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) {
|
||||
t.Fatalf("result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
|
||||
"result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueShuffledW", func(t *testing.T) {
|
||||
t.Run("sortByWeightValueShuffledWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b, result [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
w[i] = size - i
|
||||
w[i] = float64(size-i) / float64(size)
|
||||
}
|
||||
|
||||
rand.Shuffle(size, func(i, j int) {
|
||||
|
@ -501,17 +458,16 @@ func TestDistribution(t *testing.T) {
|
|||
result[b[0]]++
|
||||
}
|
||||
for i := 0; i < size-1; i++ {
|
||||
if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) {
|
||||
t.Fatalf("result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
|
||||
"result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueEmptyW", func(t *testing.T) {
|
||||
t.Run("sortByWeightValueEmptyWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
counts = make(map[int]int, size)
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
@ -534,32 +490,25 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueUniformW", func(t *testing.T) {
|
||||
t.Run("sortByWeightValueUniformWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
counts = make(map[int]int, size)
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
w[i] = 10
|
||||
w[i] = 0.5
|
||||
}
|
||||
|
||||
for i = 0; i < keys; i++ {
|
||||
|
@ -576,45 +525,85 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) {
|
||||
const keys = 1
|
||||
var (
|
||||
i uint64
|
||||
a, b [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
}
|
||||
w[size-1] = 10
|
||||
w[size-1] = 1
|
||||
|
||||
for i = 0; i < keys; i++ {
|
||||
copy(b[:], a[:])
|
||||
binary.BigEndian.PutUint64(key, i+size)
|
||||
hash := Hash(key)
|
||||
SortSliceByWeightValue(b[:], w[:], hash)
|
||||
if b[0] != a[size-1] {
|
||||
t.Fatalf("expected last value of %v to be the first with highest weight", a)
|
||||
}
|
||||
require.True(t, b[0] == a[size-1],
|
||||
"expected last value of %v to be the first with highest distance", a)
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueNormalizedWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b, result [size]uint64
|
||||
w, normalizedW [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = i
|
||||
w[int(i)] = 10
|
||||
}
|
||||
w[0] = 100
|
||||
|
||||
// Here let's use logarithm normalization
|
||||
for i = 0; i < size; i++ {
|
||||
normalizedW[i] = math.Log2(w[i]) / math.Log2(w[0])
|
||||
}
|
||||
|
||||
for i = 0; i < keys; i++ {
|
||||
copy(b[:], a[:])
|
||||
binary.BigEndian.PutUint64(key, i+size)
|
||||
hash := Hash(key)
|
||||
SortSliceByWeightValue(b[:], normalizedW[:], hash)
|
||||
for j := range b {
|
||||
result[b[j]] += uint64(len(b) - j)
|
||||
}
|
||||
}
|
||||
cutResult := result[1:]
|
||||
var total uint64
|
||||
for i := range cutResult {
|
||||
total += cutResult[i]
|
||||
}
|
||||
|
||||
var chi2 float64
|
||||
mean := float64(total) / float64(len(cutResult))
|
||||
delta := mean * percent
|
||||
for node, count := range cutResult {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("hash collision", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
|
@ -772,9 +761,9 @@ func benchmarkSortByValue(b *testing.B, n int, hash uint64) {
|
|||
|
||||
func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
|
||||
servers := make([]uint64, n)
|
||||
weights := make([]uint64, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = uint64(n) - i
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = i
|
||||
}
|
||||
|
||||
|
@ -790,9 +779,9 @@ func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
|
|||
|
||||
func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]uint64, n)
|
||||
weights := make([]uint64, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = uint64(n) - i
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = i
|
||||
}
|
||||
|
||||
|
@ -806,9 +795,9 @@ func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
|
|||
|
||||
func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]string, n)
|
||||
weights := make([]uint64, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = uint64(n) - i
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue