Move normalization routine out of hrw library
HRW library supports weighted sorting. Weights must be normalized before applying. Since there could be different types of normalization for multiple criteria, there is no point to perform simple normalization in this library. Pass a slice of normalized weights to the `SortByWeight` functions. This commit proposes to: - remove normalization routine from `SortByWeight` function; - add `ValidateWeights` function to check if weights are normalized; - rename `weight` -> `distance` to avoid naming confusion between hash distance and actual weights; - use testify lib in the tests;
This commit is contained in:
parent
58a8ce4e47
commit
6ea18e48fe
5 changed files with 221 additions and 207 deletions
43
README.md
43
README.md
|
@ -14,26 +14,25 @@
|
|||
## Benchmark:
|
||||
|
||||
```
|
||||
BenchmarkSort_fnv_10-8 5000000 354 ns/op 224 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_100-8 300000 5103 ns/op 1856 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_1000-8 10000 115874 ns/op 16448 B/op 3 allocs/op
|
||||
BenchmarkSortByIndex_fnv_10-8 3000000 562 ns/op 384 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_100-8 200000 5819 ns/op 2928 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_1000-8 10000 125859 ns/op 25728 B/op 7 allocs/op
|
||||
BenchmarkSortByValue_fnv_10-8 2000000 1056 ns/op 544 B/op 17 allocs/op
|
||||
BenchmarkSortByValue_fnv_100-8 200000 9593 ns/op 4528 B/op 107 allocs/op
|
||||
BenchmarkSortByValue_fnv_1000-8 10000 109272 ns/op 41728 B/op 1007 allocs/op
|
||||
|
||||
BenchmarkSortByWeight_fnv_10-8 3000000 500 ns/op 320 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_100-8 200000 8257 ns/op 2768 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_1000-8 10000 197938 ns/op 24656 B/op 4 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_10-8 2000000 760 ns/op 480 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_100-8 200000 9191 ns/op 3840 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_1000-8 10000 208204 ns/op 33936 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_10-8 1000000 1095 ns/op 640 B/op 18 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_100-8 200000 12291 ns/op 5440 B/op 108 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_1000-8 10000 145125 ns/op 49936 B/op 1008 allocs/op
|
||||
BenchmarkSort_fnv_10-8 5000000 365 ns/op 224 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_100-8 300000 5261 ns/op 1856 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_1000-8 10000 119462 ns/op 16448 B/op 3 allocs/op
|
||||
BenchmarkSortByIndex_fnv_10-8 3000000 546 ns/op 384 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_100-8 200000 5965 ns/op 2928 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_1000-8 10000 127732 ns/op 25728 B/op 7 allocs/op
|
||||
BenchmarkSortByValue_fnv_10-8 2000000 962 ns/op 544 B/op 17 allocs/op
|
||||
BenchmarkSortByValue_fnv_100-8 200000 9604 ns/op 4528 B/op 107 allocs/op
|
||||
BenchmarkSortByValue_fnv_1000-8 10000 111741 ns/op 41728 B/op 1007 allocs/op
|
||||
|
||||
BenchmarkSortByWeight_fnv_10-8 3000000 501 ns/op 320 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_100-8 200000 8495 ns/op 2768 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_1000-8 10000 197880 ns/op 24656 B/op 4 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_10-8 2000000 702 ns/op 480 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_100-8 200000 9338 ns/op 3840 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_1000-8 10000 204669 ns/op 33936 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_10-8 1000000 1083 ns/op 640 B/op 18 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_100-8 200000 11444 ns/op 5440 B/op 108 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_1000-8 10000 148471 ns/op 49936 B/op 1008 allocs/op
|
||||
```
|
||||
|
||||
## Example
|
||||
|
@ -71,11 +70,11 @@ func main() {
|
|||
}
|
||||
|
||||
// Output:
|
||||
// trying GET four.example.com/examples/object-key
|
||||
// trying GET three.example.com/examples/object-key
|
||||
// trying GET one.example.com/examples/object-key
|
||||
// trying GET two.example.com/examples/object-key
|
||||
// trying GET six.example.com/examples/object-key
|
||||
// trying GET five.example.com/examples/object-key
|
||||
// trying GET six.example.com/examples/object-key
|
||||
// trying GET one.example.com/examples/object-key
|
||||
// trying GET four.example.com/examples/object-key
|
||||
}
|
||||
```
|
5
go.mod
5
go.mod
|
@ -1,3 +1,6 @@
|
|||
module github.com/nspcc-dev/hrw
|
||||
|
||||
require github.com/spaolacci/murmur3 v1.1.0
|
||||
require (
|
||||
github.com/spaolacci/murmur3 v1.1.0
|
||||
github.com/stretchr/testify v1.3.0
|
||||
)
|
||||
|
|
7
go.sum
7
go.sum
|
@ -1,2 +1,9 @@
|
|||
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
|
||||
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
|
|
100
hrw.go
100
hrw.go
|
@ -4,6 +4,7 @@ package hrw
|
|||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
|
@ -19,7 +20,7 @@ type (
|
|||
hashed struct {
|
||||
length int
|
||||
sorted []uint64
|
||||
weight []uint64
|
||||
distance []uint64
|
||||
}
|
||||
|
||||
weighted struct {
|
||||
|
@ -28,7 +29,13 @@ type (
|
|||
}
|
||||
)
|
||||
|
||||
func weight(x uint64, y uint64) uint64 {
|
||||
// Boundaries of valid normalized weights
|
||||
const (
|
||||
NormalizedMaxWeight = 1.0
|
||||
NormalizedMinWeight = 0.0
|
||||
)
|
||||
|
||||
func distance(x uint64, y uint64) uint64 {
|
||||
acc := x ^ y
|
||||
// here used mmh3 64 bit finalizer
|
||||
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
|
||||
|
@ -41,19 +48,19 @@ func weight(x uint64, y uint64) uint64 {
|
|||
}
|
||||
|
||||
func (h hashed) Len() int { return h.length }
|
||||
func (h hashed) Less(i, j int) bool { return h.weight[i] < h.weight[j] }
|
||||
func (h hashed) Less(i, j int) bool { return h.distance[i] < h.distance[j] }
|
||||
func (h hashed) Swap(i, j int) {
|
||||
h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i]
|
||||
h.weight[i], h.weight[j] = h.weight[j], h.weight[i]
|
||||
h.distance[i], h.distance[j] = h.distance[j], h.distance[i]
|
||||
}
|
||||
|
||||
func (w weighted) Len() int { return w.h.length }
|
||||
func (w weighted) Less(i, j int) bool {
|
||||
// `maxUint64 - weight` makes least weight most valuable
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
wi := float64(^uint64(0)-w.h.weight[i]) * w.normal[i]
|
||||
wj := float64(^uint64(0)-w.h.weight[j]) * w.normal[j]
|
||||
return wi > wj // higher weight must be placed lower to be first
|
||||
wi := float64(^uint64(0)-w.h.distance[i]) * w.normal[i]
|
||||
wj := float64(^uint64(0)-w.h.distance[j]) * w.normal[j]
|
||||
return wi > wj // higher distance must be placed lower to be first
|
||||
}
|
||||
func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) }
|
||||
|
||||
|
@ -62,65 +69,64 @@ func Hash(key []byte) uint64 {
|
|||
return murmur3.Sum64(key)
|
||||
}
|
||||
|
||||
// Sort receive nodes and hash, and sort it by weight
|
||||
// Sort receive nodes and hash, and sort it by distance
|
||||
func Sort(nodes []uint64, hash uint64) []uint64 {
|
||||
var (
|
||||
l = len(nodes)
|
||||
h = hashed{
|
||||
length: l,
|
||||
sorted: make([]uint64, 0, l),
|
||||
weight: make([]uint64, 0, l),
|
||||
distance: make([]uint64, 0, l),
|
||||
}
|
||||
)
|
||||
|
||||
for i, node := range nodes {
|
||||
for i := range nodes {
|
||||
h.sorted = append(h.sorted, uint64(i))
|
||||
h.weight = append(h.weight, weight(node, hash))
|
||||
h.distance = append(h.distance, distance(nodes[i], hash))
|
||||
}
|
||||
|
||||
sort.Sort(h)
|
||||
return h.sorted
|
||||
}
|
||||
|
||||
// SortByWeight receive nodes and hash, and sort it by weight
|
||||
func SortByWeight(nodes []uint64, weights []uint64, hash uint64) []uint64 {
|
||||
var (
|
||||
maxWeight uint64
|
||||
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight
|
||||
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
|
||||
// check if numbers of weights and nodes are equal
|
||||
uniform := true
|
||||
for i := range weights {
|
||||
// check if all nodes have the same distance
|
||||
if weights[i] != weights[0] {
|
||||
uniform = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
l = len(nodes)
|
||||
w = weighted{
|
||||
l := len(nodes)
|
||||
w := weighted{
|
||||
h: hashed{
|
||||
length: l,
|
||||
sorted: make([]uint64, 0, l),
|
||||
weight: make([]uint64, 0, l),
|
||||
distance: make([]uint64, 0, l),
|
||||
},
|
||||
normal: make([]float64, 0, l),
|
||||
}
|
||||
)
|
||||
|
||||
// finding max weight to perform normalization
|
||||
for i := range weights {
|
||||
if maxWeight < weights[i] {
|
||||
maxWeight = weights[i]
|
||||
}
|
||||
normal: make([]float64, l),
|
||||
}
|
||||
|
||||
// if all nodes have 0-weights or weights are incorrect then sort uniformly
|
||||
if maxWeight == 0 || l != len(nodes) {
|
||||
// if all nodes have the same distance then sort uniformly
|
||||
if uniform || len(weights) != l {
|
||||
return Sort(nodes, hash)
|
||||
}
|
||||
|
||||
fMaxWeight := float64(maxWeight)
|
||||
for i, node := range nodes {
|
||||
for i := range nodes {
|
||||
w.h.sorted = append(w.h.sorted, uint64(i))
|
||||
w.h.weight = append(w.h.weight, weight(node, hash))
|
||||
w.normal = append(w.normal, float64(weights[i])/fMaxWeight)
|
||||
w.h.distance = append(w.h.distance, distance(nodes[i], hash))
|
||||
}
|
||||
copy(w.normal, weights)
|
||||
|
||||
sort.Sort(w)
|
||||
return w.h.sorted
|
||||
}
|
||||
|
||||
// SortSliceByValue received []T and hash to sort by value-weight
|
||||
// SortSliceByValue received []T and hash to sort by value-distance
|
||||
func SortSliceByValue(slice interface{}, hash uint64) {
|
||||
rule := prepareRule(slice)
|
||||
if rule != nil {
|
||||
|
@ -130,17 +136,17 @@ func SortSliceByValue(slice interface{}, hash uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
// SortSliceByWeightValue received []T, weights and hash to sort by value-weight
|
||||
func SortSliceByWeightValue(slice interface{}, weight []uint64, hash uint64) {
|
||||
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
|
||||
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
|
||||
rule := prepareRule(slice)
|
||||
if rule != nil {
|
||||
swap := reflect.Swapper(slice)
|
||||
rule = SortByWeight(rule, weight, hash)
|
||||
rule = SortByWeight(rule, weights, hash)
|
||||
sortByRuleInverse(swap, uint64(len(rule)), rule)
|
||||
}
|
||||
}
|
||||
|
||||
// SortSliceByIndex received []T and hash to sort by index-weight
|
||||
// SortSliceByIndex received []T and hash to sort by index-distance
|
||||
func SortSliceByIndex(slice interface{}, hash uint64) {
|
||||
length := uint64(reflect.ValueOf(slice).Len())
|
||||
swap := reflect.Swapper(slice)
|
||||
|
@ -152,15 +158,15 @@ func SortSliceByIndex(slice interface{}, hash uint64) {
|
|||
sortByRuleInverse(swap, length, rule)
|
||||
}
|
||||
|
||||
// SortSliceByWeightIndex received []T, weights and hash to sort by index-weight
|
||||
func SortSliceByWeightIndex(slice interface{}, weight []uint64, hash uint64) {
|
||||
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
|
||||
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
|
||||
length := uint64(reflect.ValueOf(slice).Len())
|
||||
swap := reflect.Swapper(slice)
|
||||
rule := make([]uint64, 0, length)
|
||||
for i := uint64(0); i < length; i++ {
|
||||
rule = append(rule, i)
|
||||
}
|
||||
rule = SortByWeight(rule, weight, hash)
|
||||
rule = SortByWeight(rule, weights, hash)
|
||||
sortByRuleInverse(swap, length, rule)
|
||||
}
|
||||
|
||||
|
@ -283,3 +289,13 @@ func prepareRule(slice interface{}) []uint64 {
|
|||
}
|
||||
return rule
|
||||
}
|
||||
|
||||
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
|
||||
func ValidateWeights(weights []float64) error {
|
||||
for i := range weights {
|
||||
if weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
|
||||
return errors.New("weights are not normalized")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
257
hrw_test.go
257
hrw_test.go
|
@ -5,9 +5,10 @@ import (
|
|||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type (
|
||||
|
@ -68,20 +69,25 @@ func TestSortSliceByIndex(t *testing.T) {
|
|||
expect := []string{"e", "a", "c", "f", "d", "b"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByIndex(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestValidateWeights(t *testing.T) {
|
||||
weights := []float64{10, 10, 10, 2, 2, 2}
|
||||
err := ValidateWeights(weights)
|
||||
require.Error(t, err)
|
||||
weights = []float64{1, 1, 1, 0.2, 0.2, 0.2}
|
||||
err = ValidateWeights(weights)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestSortSliceByWeightIndex(t *testing.T) {
|
||||
actual := []string{"a", "b", "c", "d", "e", "f"}
|
||||
weights := []uint64{10, 10, 10, 2, 2, 2}
|
||||
weights := []float64{1, 1, 1, 0.2, 0.2, 0.2}
|
||||
expect := []string{"a", "c", "b", "e", "f", "d"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByWeightIndex(actual, weights, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortSliceByValue(t *testing.T) {
|
||||
|
@ -89,9 +95,7 @@ func TestSortSliceByValue(t *testing.T) {
|
|||
expect := []string{"d", "f", "c", "b", "a", "e"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortByRule(t *testing.T) {
|
||||
|
@ -106,9 +110,7 @@ func TestSortByRule(t *testing.T) {
|
|||
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
|
||||
6, rule)
|
||||
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
})
|
||||
|
||||
t.Run("inverse", func(t *testing.T) {
|
||||
|
@ -122,9 +124,7 @@ func TestSortByRule(t *testing.T) {
|
|||
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
|
||||
6, rule)
|
||||
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -134,13 +134,13 @@ func TestSortSliceByValueFail(t *testing.T) {
|
|||
actual []int
|
||||
hash = Hash(testKey)
|
||||
)
|
||||
SortSliceByValue(actual, hash)
|
||||
require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
|
||||
})
|
||||
|
||||
t.Run("must be slice", func(t *testing.T) {
|
||||
actual := 10
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
|
||||
})
|
||||
|
||||
t.Run("must 'fail' for unknown type", func(t *testing.T) {
|
||||
|
@ -148,9 +148,7 @@ func TestSortSliceByValueFail(t *testing.T) {
|
|||
expect := []unknown{1, 2, 3, 4, 5}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -159,9 +157,7 @@ func TestSortSliceByValueHasher(t *testing.T) {
|
|||
expect := []hashString{"d", "f", "c", "b", "a", "e"}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
if !reflect.DeepEqual(actual, expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expect)
|
||||
}
|
||||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortSliceByValueIntSlice(t *testing.T) {
|
||||
|
@ -225,9 +221,7 @@ func TestSortSliceByValueIntSlice(t *testing.T) {
|
|||
|
||||
for _, tc := range cases {
|
||||
SortSliceByValue(tc.actual, hash)
|
||||
if !reflect.DeepEqual(tc.actual, tc.expect) {
|
||||
t.Errorf("Was %#v, but expected %#v", tc.actual, tc.expect)
|
||||
}
|
||||
require.Equal(t, tc.expect, tc.actual)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -236,9 +230,7 @@ func TestSort(t *testing.T) {
|
|||
hash := Hash(testKey)
|
||||
actual := Sort(nodes, hash)
|
||||
expected := []uint64{3, 1, 4, 2, 0}
|
||||
if !reflect.DeepEqual(actual, expected) {
|
||||
t.Errorf("Was %#v, but expected %#v", actual, expected)
|
||||
}
|
||||
require.Equal(t, expected, actual)
|
||||
}
|
||||
|
||||
func TestDistribution(t *testing.T) {
|
||||
|
@ -276,18 +268,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByIndex", func(t *testing.T) {
|
||||
|
@ -317,18 +302,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByValue", func(t *testing.T) {
|
||||
|
@ -357,18 +335,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByStringValue", func(t *testing.T) {
|
||||
|
@ -397,18 +368,11 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %s received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByInt32Value", func(t *testing.T) {
|
||||
|
@ -437,31 +401,24 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValue", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b, result [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
w[i] = size - i
|
||||
w[i] = float64(size-i) / float64(size)
|
||||
}
|
||||
for i = 0; i < keys; i++ {
|
||||
copy(b[:], a[:])
|
||||
|
@ -470,24 +427,24 @@ func TestDistribution(t *testing.T) {
|
|||
SortSliceByWeightValue(b[:], w[:], hash)
|
||||
result[b[0]]++
|
||||
}
|
||||
|
||||
for i := 0; i < size-1; i++ {
|
||||
if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) {
|
||||
t.Fatalf("result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
|
||||
"result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueShuffledW", func(t *testing.T) {
|
||||
t.Run("sortByWeightValueShuffledWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b, result [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
w[i] = size - i
|
||||
w[i] = float64(size-i) / float64(size)
|
||||
}
|
||||
|
||||
rand.Shuffle(size, func(i, j int) {
|
||||
|
@ -501,17 +458,16 @@ func TestDistribution(t *testing.T) {
|
|||
result[b[0]]++
|
||||
}
|
||||
for i := 0; i < size-1; i++ {
|
||||
if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) {
|
||||
t.Fatalf("result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
|
||||
"result array %v must be corresponded to weights %v", result, w)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueEmptyW", func(t *testing.T) {
|
||||
t.Run("sortByWeightValueEmptyWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
counts = make(map[int]int, size)
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
@ -534,32 +490,25 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueUniformW", func(t *testing.T) {
|
||||
t.Run("sortByWeightValueUniformWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
counts = make(map[int]int, size)
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
w[i] = 10
|
||||
w[i] = 0.5
|
||||
}
|
||||
|
||||
for i = 0; i < keys; i++ {
|
||||
|
@ -576,45 +525,85 @@ func TestDistribution(t *testing.T) {
|
|||
for node, count := range counts {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
if d > delta || (0-d) > delta {
|
||||
t.Errorf(
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)",
|
||||
node, count, mean, delta,
|
||||
)
|
||||
}
|
||||
}
|
||||
if chi2 > chiTable[size-1] {
|
||||
t.Errorf(
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
|
||||
chi2, chiTable[size-1])
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) {
|
||||
const keys = 1
|
||||
var (
|
||||
i uint64
|
||||
a, b [size]int
|
||||
w [size]uint64
|
||||
w [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = int(i)
|
||||
}
|
||||
w[size-1] = 10
|
||||
w[size-1] = 1
|
||||
|
||||
for i = 0; i < keys; i++ {
|
||||
copy(b[:], a[:])
|
||||
binary.BigEndian.PutUint64(key, i+size)
|
||||
hash := Hash(key)
|
||||
SortSliceByWeightValue(b[:], w[:], hash)
|
||||
if b[0] != a[size-1] {
|
||||
t.Fatalf("expected last value of %v to be the first with highest weight", a)
|
||||
}
|
||||
require.True(t, b[0] == a[size-1],
|
||||
"expected last value of %v to be the first with highest distance", a)
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
t.Run("sortByWeightValueNormalizedWeight", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
a, b, result [size]uint64
|
||||
w, normalizedW [size]float64
|
||||
key = make([]byte, 16)
|
||||
)
|
||||
|
||||
for i = 0; i < size; i++ {
|
||||
a[i] = i
|
||||
w[int(i)] = 10
|
||||
}
|
||||
w[0] = 100
|
||||
|
||||
// Here let's use logarithm normalization
|
||||
for i = 0; i < size; i++ {
|
||||
normalizedW[i] = math.Log2(w[i]) / math.Log2(w[0])
|
||||
}
|
||||
|
||||
for i = 0; i < keys; i++ {
|
||||
copy(b[:], a[:])
|
||||
binary.BigEndian.PutUint64(key, i+size)
|
||||
hash := Hash(key)
|
||||
SortSliceByWeightValue(b[:], normalizedW[:], hash)
|
||||
for j := range b {
|
||||
result[b[j]] += uint64(len(b) - j)
|
||||
}
|
||||
}
|
||||
cutResult := result[1:]
|
||||
var total uint64
|
||||
for i := range cutResult {
|
||||
total += cutResult[i]
|
||||
}
|
||||
|
||||
var chi2 float64
|
||||
mean := float64(total) / float64(len(cutResult))
|
||||
delta := mean * percent
|
||||
for node, count := range cutResult {
|
||||
d := mean - float64(count)
|
||||
chi2 += math.Pow(float64(count)-mean, 2) / mean
|
||||
require.True(t, d < delta && (0-d) < delta,
|
||||
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
|
||||
}
|
||||
require.True(t, chi2 < chiTable[size-1],
|
||||
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
|
||||
})
|
||||
|
||||
t.Run("hash collision", func(t *testing.T) {
|
||||
var (
|
||||
i uint64
|
||||
|
@ -772,9 +761,9 @@ func benchmarkSortByValue(b *testing.B, n int, hash uint64) {
|
|||
|
||||
func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
|
||||
servers := make([]uint64, n)
|
||||
weights := make([]uint64, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = uint64(n) - i
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = i
|
||||
}
|
||||
|
||||
|
@ -790,9 +779,9 @@ func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
|
|||
|
||||
func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]uint64, n)
|
||||
weights := make([]uint64, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = uint64(n) - i
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = i
|
||||
}
|
||||
|
||||
|
@ -806,9 +795,9 @@ func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
|
|||
|
||||
func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]string, n)
|
||||
weights := make([]uint64, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = uint64(n) - i
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue