Move normalization routine out of hrw library (#6)

HRW library supports weighted sorting. Weights must be normalized
before applying. Since there could be different types of normalization
for multiple criteria, there is no point to perform simple
normalization in this library. Pass a slice of normalized weights
to the `SortByWeight` functions.

This commit proposes to:
- remove normalization routine from `SortByWeight` function;
- add `ValidateWeights` function to check if weights are normalized;
- rename `weight` -> `distance` to avoid naming confusion between
  hash distance and actual weights;
- use testify lib in the tests;
This commit is contained in:
Alex Vanin 2019-07-05 09:49:24 +03:00 committed by GitHub
parent 58a8ce4e47
commit aa230933d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 221 additions and 207 deletions

View file

@ -14,26 +14,25 @@
## Benchmark: ## Benchmark:
``` ```
BenchmarkSort_fnv_10-8 5000000 354 ns/op 224 B/op 3 allocs/op BenchmarkSort_fnv_10-8 5000000 365 ns/op 224 B/op 3 allocs/op
BenchmarkSort_fnv_100-8 300000 5103 ns/op 1856 B/op 3 allocs/op BenchmarkSort_fnv_100-8 300000 5261 ns/op 1856 B/op 3 allocs/op
BenchmarkSort_fnv_1000-8 10000 115874 ns/op 16448 B/op 3 allocs/op BenchmarkSort_fnv_1000-8 10000 119462 ns/op 16448 B/op 3 allocs/op
BenchmarkSortByIndex_fnv_10-8 3000000 562 ns/op 384 B/op 7 allocs/op BenchmarkSortByIndex_fnv_10-8 3000000 546 ns/op 384 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_100-8 200000 5819 ns/op 2928 B/op 7 allocs/op BenchmarkSortByIndex_fnv_100-8 200000 5965 ns/op 2928 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_1000-8 10000 125859 ns/op 25728 B/op 7 allocs/op BenchmarkSortByIndex_fnv_1000-8 10000 127732 ns/op 25728 B/op 7 allocs/op
BenchmarkSortByValue_fnv_10-8 2000000 1056 ns/op 544 B/op 17 allocs/op BenchmarkSortByValue_fnv_10-8 2000000 962 ns/op 544 B/op 17 allocs/op
BenchmarkSortByValue_fnv_100-8 200000 9593 ns/op 4528 B/op 107 allocs/op BenchmarkSortByValue_fnv_100-8 200000 9604 ns/op 4528 B/op 107 allocs/op
BenchmarkSortByValue_fnv_1000-8 10000 109272 ns/op 41728 B/op 1007 allocs/op BenchmarkSortByValue_fnv_1000-8 10000 111741 ns/op 41728 B/op 1007 allocs/op
BenchmarkSortByWeight_fnv_10-8 3000000 500 ns/op 320 B/op 4 allocs/op
BenchmarkSortByWeight_fnv_100-8 200000 8257 ns/op 2768 B/op 4 allocs/op
BenchmarkSortByWeight_fnv_1000-8 10000 197938 ns/op 24656 B/op 4 allocs/op
BenchmarkSortByWeightIndex_fnv_10-8 2000000 760 ns/op 480 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_100-8 200000 9191 ns/op 3840 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_1000-8 10000 208204 ns/op 33936 B/op 8 allocs/op
BenchmarkSortByWeightValue_fnv_10-8 1000000 1095 ns/op 640 B/op 18 allocs/op
BenchmarkSortByWeightValue_fnv_100-8 200000 12291 ns/op 5440 B/op 108 allocs/op
BenchmarkSortByWeightValue_fnv_1000-8 10000 145125 ns/op 49936 B/op 1008 allocs/op
BenchmarkSortByWeight_fnv_10-8 3000000 501 ns/op 320 B/op 4 allocs/op
BenchmarkSortByWeight_fnv_100-8 200000 8495 ns/op 2768 B/op 4 allocs/op
BenchmarkSortByWeight_fnv_1000-8 10000 197880 ns/op 24656 B/op 4 allocs/op
BenchmarkSortByWeightIndex_fnv_10-8 2000000 702 ns/op 480 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_100-8 200000 9338 ns/op 3840 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_1000-8 10000 204669 ns/op 33936 B/op 8 allocs/op
BenchmarkSortByWeightValue_fnv_10-8 1000000 1083 ns/op 640 B/op 18 allocs/op
BenchmarkSortByWeightValue_fnv_100-8 200000 11444 ns/op 5440 B/op 108 allocs/op
BenchmarkSortByWeightValue_fnv_1000-8 10000 148471 ns/op 49936 B/op 1008 allocs/op
``` ```
## Example ## Example
@ -71,11 +70,11 @@ func main() {
} }
// Output: // Output:
// trying GET four.example.com/examples/object-key
// trying GET three.example.com/examples/object-key // trying GET three.example.com/examples/object-key
// trying GET one.example.com/examples/object-key
// trying GET two.example.com/examples/object-key // trying GET two.example.com/examples/object-key
// trying GET six.example.com/examples/object-key
// trying GET five.example.com/examples/object-key // trying GET five.example.com/examples/object-key
// trying GET six.example.com/examples/object-key
// trying GET one.example.com/examples/object-key
// trying GET four.example.com/examples/object-key
} }
``` ```

5
go.mod
View file

@ -1,3 +1,6 @@
module github.com/nspcc-dev/hrw module github.com/nspcc-dev/hrw
require github.com/spaolacci/murmur3 v1.1.0 require (
github.com/spaolacci/murmur3 v1.1.0
github.com/stretchr/testify v1.3.0
)

7
go.sum
View file

@ -1,2 +1,9 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=

100
hrw.go
View file

@ -4,6 +4,7 @@ package hrw
import ( import (
"encoding/binary" "encoding/binary"
"errors"
"reflect" "reflect"
"sort" "sort"
@ -19,7 +20,7 @@ type (
hashed struct { hashed struct {
length int length int
sorted []uint64 sorted []uint64
weight []uint64 distance []uint64
} }
weighted struct { weighted struct {
@ -28,7 +29,13 @@ type (
} }
) )
func weight(x uint64, y uint64) uint64 { // Boundaries of valid normalized weights
const (
NormalizedMaxWeight = 1.0
NormalizedMinWeight = 0.0
)
func distance(x uint64, y uint64) uint64 {
acc := x ^ y acc := x ^ y
// here used mmh3 64 bit finalizer // here used mmh3 64 bit finalizer
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81 // https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
@ -41,19 +48,19 @@ func weight(x uint64, y uint64) uint64 {
} }
func (h hashed) Len() int { return h.length } func (h hashed) Len() int { return h.length }
func (h hashed) Less(i, j int) bool { return h.weight[i] < h.weight[j] } func (h hashed) Less(i, j int) bool { return h.distance[i] < h.distance[j] }
func (h hashed) Swap(i, j int) { func (h hashed) Swap(i, j int) {
h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i] h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i]
h.weight[i], h.weight[j] = h.weight[j], h.weight[i] h.distance[i], h.distance[j] = h.distance[j], h.distance[i]
} }
func (w weighted) Len() int { return w.h.length } func (w weighted) Len() int { return w.h.length }
func (w weighted) Less(i, j int) bool { func (w weighted) Less(i, j int) bool {
// `maxUint64 - weight` makes least weight most valuable // `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values // it is necessary for operation with normalized values
wi := float64(^uint64(0)-w.h.weight[i]) * w.normal[i] wi := float64(^uint64(0)-w.h.distance[i]) * w.normal[i]
wj := float64(^uint64(0)-w.h.weight[j]) * w.normal[j] wj := float64(^uint64(0)-w.h.distance[j]) * w.normal[j]
return wi > wj // higher weight must be placed lower to be first return wi > wj // higher distance must be placed lower to be first
} }
func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) } func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) }
@ -62,65 +69,64 @@ func Hash(key []byte) uint64 {
return murmur3.Sum64(key) return murmur3.Sum64(key)
} }
// Sort receive nodes and hash, and sort it by weight // Sort receive nodes and hash, and sort it by distance
func Sort(nodes []uint64, hash uint64) []uint64 { func Sort(nodes []uint64, hash uint64) []uint64 {
var ( var (
l = len(nodes) l = len(nodes)
h = hashed{ h = hashed{
length: l, length: l,
sorted: make([]uint64, 0, l), sorted: make([]uint64, 0, l),
weight: make([]uint64, 0, l), distance: make([]uint64, 0, l),
} }
) )
for i, node := range nodes { for i := range nodes {
h.sorted = append(h.sorted, uint64(i)) h.sorted = append(h.sorted, uint64(i))
h.weight = append(h.weight, weight(node, hash)) h.distance = append(h.distance, distance(nodes[i], hash))
} }
sort.Sort(h) sort.Sort(h)
return h.sorted return h.sorted
} }
// SortByWeight receive nodes and hash, and sort it by weight // SortByWeight receive nodes, weights and hash, and sort it by distance * weight
func SortByWeight(nodes []uint64, weights []uint64, hash uint64) []uint64 { func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
var ( // check if numbers of weights and nodes are equal
maxWeight uint64 uniform := true
for i := range weights {
// check if all nodes have the same distance
if weights[i] != weights[0] {
uniform = false
break
}
}
l = len(nodes) l := len(nodes)
w = weighted{ w := weighted{
h: hashed{ h: hashed{
length: l, length: l,
sorted: make([]uint64, 0, l), sorted: make([]uint64, 0, l),
weight: make([]uint64, 0, l), distance: make([]uint64, 0, l),
}, },
normal: make([]float64, 0, l), normal: make([]float64, l),
}
)
// finding max weight to perform normalization
for i := range weights {
if maxWeight < weights[i] {
maxWeight = weights[i]
}
} }
// if all nodes have 0-weights or weights are incorrect then sort uniformly // if all nodes have the same distance then sort uniformly
if maxWeight == 0 || l != len(nodes) { if uniform || len(weights) != l {
return Sort(nodes, hash) return Sort(nodes, hash)
} }
fMaxWeight := float64(maxWeight) for i := range nodes {
for i, node := range nodes {
w.h.sorted = append(w.h.sorted, uint64(i)) w.h.sorted = append(w.h.sorted, uint64(i))
w.h.weight = append(w.h.weight, weight(node, hash)) w.h.distance = append(w.h.distance, distance(nodes[i], hash))
w.normal = append(w.normal, float64(weights[i])/fMaxWeight)
} }
copy(w.normal, weights)
sort.Sort(w) sort.Sort(w)
return w.h.sorted return w.h.sorted
} }
// SortSliceByValue received []T and hash to sort by value-weight // SortSliceByValue received []T and hash to sort by value-distance
func SortSliceByValue(slice interface{}, hash uint64) { func SortSliceByValue(slice interface{}, hash uint64) {
rule := prepareRule(slice) rule := prepareRule(slice)
if rule != nil { if rule != nil {
@ -130,17 +136,17 @@ func SortSliceByValue(slice interface{}, hash uint64) {
} }
} }
// SortSliceByWeightValue received []T, weights and hash to sort by value-weight // SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
func SortSliceByWeightValue(slice interface{}, weight []uint64, hash uint64) { func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
rule := prepareRule(slice) rule := prepareRule(slice)
if rule != nil { if rule != nil {
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
rule = SortByWeight(rule, weight, hash) rule = SortByWeight(rule, weights, hash)
sortByRuleInverse(swap, uint64(len(rule)), rule) sortByRuleInverse(swap, uint64(len(rule)), rule)
} }
} }
// SortSliceByIndex received []T and hash to sort by index-weight // SortSliceByIndex received []T and hash to sort by index-distance
func SortSliceByIndex(slice interface{}, hash uint64) { func SortSliceByIndex(slice interface{}, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len()) length := uint64(reflect.ValueOf(slice).Len())
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
@ -152,15 +158,15 @@ func SortSliceByIndex(slice interface{}, hash uint64) {
sortByRuleInverse(swap, length, rule) sortByRuleInverse(swap, length, rule)
} }
// SortSliceByWeightIndex received []T, weights and hash to sort by index-weight // SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
func SortSliceByWeightIndex(slice interface{}, weight []uint64, hash uint64) { func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len()) length := uint64(reflect.ValueOf(slice).Len())
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length) rule := make([]uint64, 0, length)
for i := uint64(0); i < length; i++ { for i := uint64(0); i < length; i++ {
rule = append(rule, i) rule = append(rule, i)
} }
rule = SortByWeight(rule, weight, hash) rule = SortByWeight(rule, weights, hash)
sortByRuleInverse(swap, length, rule) sortByRuleInverse(swap, length, rule)
} }
@ -283,3 +289,13 @@ func prepareRule(slice interface{}) []uint64 {
} }
return rule return rule
} }
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
func ValidateWeights(weights []float64) error {
for i := range weights {
if weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
return errors.New("weights are not normalized")
}
}
return nil
}

View file

@ -5,9 +5,10 @@ import (
"fmt" "fmt"
"math" "math"
"math/rand" "math/rand"
"reflect"
"strconv" "strconv"
"testing" "testing"
"github.com/stretchr/testify/require"
) )
type ( type (
@ -68,20 +69,25 @@ func TestSortSliceByIndex(t *testing.T) {
expect := []string{"e", "a", "c", "f", "d", "b"} expect := []string{"e", "a", "c", "f", "d", "b"}
hash := Hash(testKey) hash := Hash(testKey)
SortSliceByIndex(actual, hash) SortSliceByIndex(actual, hash)
if !reflect.DeepEqual(actual, expect) { require.Equal(t, expect, actual)
t.Errorf("Was %#v, but expected %#v", actual, expect) }
}
func TestValidateWeights(t *testing.T) {
weights := []float64{10, 10, 10, 2, 2, 2}
err := ValidateWeights(weights)
require.Error(t, err)
weights = []float64{1, 1, 1, 0.2, 0.2, 0.2}
err = ValidateWeights(weights)
require.NoError(t, err)
} }
func TestSortSliceByWeightIndex(t *testing.T) { func TestSortSliceByWeightIndex(t *testing.T) {
actual := []string{"a", "b", "c", "d", "e", "f"} actual := []string{"a", "b", "c", "d", "e", "f"}
weights := []uint64{10, 10, 10, 2, 2, 2} weights := []float64{1, 1, 1, 0.2, 0.2, 0.2}
expect := []string{"a", "c", "b", "e", "f", "d"} expect := []string{"a", "c", "b", "e", "f", "d"}
hash := Hash(testKey) hash := Hash(testKey)
SortSliceByWeightIndex(actual, weights, hash) SortSliceByWeightIndex(actual, weights, hash)
if !reflect.DeepEqual(actual, expect) { require.Equal(t, expect, actual)
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
} }
func TestSortSliceByValue(t *testing.T) { func TestSortSliceByValue(t *testing.T) {
@ -89,9 +95,7 @@ func TestSortSliceByValue(t *testing.T) {
expect := []string{"d", "f", "c", "b", "a", "e"} expect := []string{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey) hash := Hash(testKey)
SortSliceByValue(actual, hash) SortSliceByValue(actual, hash)
if !reflect.DeepEqual(actual, expect) { require.Equal(t, expect, actual)
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
} }
func TestSortByRule(t *testing.T) { func TestSortByRule(t *testing.T) {
@ -106,9 +110,7 @@ func TestSortByRule(t *testing.T) {
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] }, func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
6, rule) 6, rule)
if !reflect.DeepEqual(actual, expect) { require.Equal(t, expect, actual)
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
}) })
t.Run("inverse", func(t *testing.T) { t.Run("inverse", func(t *testing.T) {
@ -122,9 +124,7 @@ func TestSortByRule(t *testing.T) {
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] }, func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
6, rule) 6, rule)
if !reflect.DeepEqual(actual, expect) { require.Equal(t, expect, actual)
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
}) })
} }
@ -134,13 +134,13 @@ func TestSortSliceByValueFail(t *testing.T) {
actual []int actual []int
hash = Hash(testKey) hash = Hash(testKey)
) )
SortSliceByValue(actual, hash) require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
}) })
t.Run("must be slice", func(t *testing.T) { t.Run("must be slice", func(t *testing.T) {
actual := 10 actual := 10
hash := Hash(testKey) hash := Hash(testKey)
SortSliceByValue(actual, hash) require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
}) })
t.Run("must 'fail' for unknown type", func(t *testing.T) { t.Run("must 'fail' for unknown type", func(t *testing.T) {
@ -148,9 +148,7 @@ func TestSortSliceByValueFail(t *testing.T) {
expect := []unknown{1, 2, 3, 4, 5} expect := []unknown{1, 2, 3, 4, 5}
hash := Hash(testKey) hash := Hash(testKey)
SortSliceByValue(actual, hash) SortSliceByValue(actual, hash)
if !reflect.DeepEqual(actual, expect) { require.Equal(t, expect, actual)
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
}) })
} }
@ -159,9 +157,7 @@ func TestSortSliceByValueHasher(t *testing.T) {
expect := []hashString{"d", "f", "c", "b", "a", "e"} expect := []hashString{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey) hash := Hash(testKey)
SortSliceByValue(actual, hash) SortSliceByValue(actual, hash)
if !reflect.DeepEqual(actual, expect) { require.Equal(t, expect, actual)
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
} }
func TestSortSliceByValueIntSlice(t *testing.T) { func TestSortSliceByValueIntSlice(t *testing.T) {
@ -225,9 +221,7 @@ func TestSortSliceByValueIntSlice(t *testing.T) {
for _, tc := range cases { for _, tc := range cases {
SortSliceByValue(tc.actual, hash) SortSliceByValue(tc.actual, hash)
if !reflect.DeepEqual(tc.actual, tc.expect) { require.Equal(t, tc.expect, tc.actual)
t.Errorf("Was %#v, but expected %#v", tc.actual, tc.expect)
}
} }
} }
@ -236,9 +230,7 @@ func TestSort(t *testing.T) {
hash := Hash(testKey) hash := Hash(testKey)
actual := Sort(nodes, hash) actual := Sort(nodes, hash)
expected := []uint64{3, 1, 4, 2, 0} expected := []uint64{3, 1, 4, 2, 0}
if !reflect.DeepEqual(actual, expected) { require.Equal(t, expected, actual)
t.Errorf("Was %#v, but expected %#v", actual, expected)
}
} }
func TestDistribution(t *testing.T) { func TestDistribution(t *testing.T) {
@ -276,18 +268,11 @@ func TestDistribution(t *testing.T) {
for node, count := range counts { for node, count := range counts {
d := mean - float64(count) d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta { require.True(t, d < delta && (0-d) < delta,
t.Errorf( "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
} }
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
}) })
t.Run("sortByIndex", func(t *testing.T) { t.Run("sortByIndex", func(t *testing.T) {
@ -317,18 +302,11 @@ func TestDistribution(t *testing.T) {
for node, count := range counts { for node, count := range counts {
d := mean - float64(count) d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta { require.True(t, d < delta && (0-d) < delta,
t.Errorf( "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
} }
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
}) })
t.Run("sortByValue", func(t *testing.T) { t.Run("sortByValue", func(t *testing.T) {
@ -357,18 +335,11 @@ func TestDistribution(t *testing.T) {
for node, count := range counts { for node, count := range counts {
d := mean - float64(count) d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta { require.True(t, d < delta && (0-d) < delta,
t.Errorf( "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
} }
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
}) })
t.Run("sortByStringValue", func(t *testing.T) { t.Run("sortByStringValue", func(t *testing.T) {
@ -397,18 +368,11 @@ func TestDistribution(t *testing.T) {
for node, count := range counts { for node, count := range counts {
d := mean - float64(count) d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta { require.True(t, d < delta && (0-d) < delta,
t.Errorf( "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
"Node %s received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
} }
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
}) })
t.Run("sortByInt32Value", func(t *testing.T) { t.Run("sortByInt32Value", func(t *testing.T) {
@ -437,31 +401,24 @@ func TestDistribution(t *testing.T) {
for node, count := range counts { for node, count := range counts {
d := mean - float64(count) d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta { require.True(t, d < delta && (0-d) < delta,
t.Errorf( "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
} }
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
}) })
t.Run("sortByWeightValue", func(t *testing.T) { t.Run("sortByWeightValue", func(t *testing.T) {
var ( var (
i uint64 i uint64
a, b, result [size]int a, b, result [size]int
w [size]uint64 w [size]float64
key = make([]byte, 16) key = make([]byte, 16)
) )
for i = 0; i < size; i++ { for i = 0; i < size; i++ {
a[i] = int(i) a[i] = int(i)
w[i] = size - i w[i] = float64(size-i) / float64(size)
} }
for i = 0; i < keys; i++ { for i = 0; i < keys; i++ {
copy(b[:], a[:]) copy(b[:], a[:])
@ -470,24 +427,24 @@ func TestDistribution(t *testing.T) {
SortSliceByWeightValue(b[:], w[:], hash) SortSliceByWeightValue(b[:], w[:], hash)
result[b[0]]++ result[b[0]]++
} }
for i := 0; i < size-1; i++ { for i := 0; i < size-1; i++ {
if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) { require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
t.Fatalf("result array %v must be corresponded to weights %v", result, w) "result array %v must be corresponded to weights %v", result, w)
}
} }
}) })
t.Run("sortByWeightValueShuffledW", func(t *testing.T) { t.Run("sortByWeightValueShuffledWeight", func(t *testing.T) {
var ( var (
i uint64 i uint64
a, b, result [size]int a, b, result [size]int
w [size]uint64 w [size]float64
key = make([]byte, 16) key = make([]byte, 16)
) )
for i = 0; i < size; i++ { for i = 0; i < size; i++ {
a[i] = int(i) a[i] = int(i)
w[i] = size - i w[i] = float64(size-i) / float64(size)
} }
rand.Shuffle(size, func(i, j int) { rand.Shuffle(size, func(i, j int) {
@ -501,17 +458,16 @@ func TestDistribution(t *testing.T) {
result[b[0]]++ result[b[0]]++
} }
for i := 0; i < size-1; i++ { for i := 0; i < size-1; i++ {
if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) { require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
t.Fatalf("result array %v must be corresponded to weights %v", result, w) "result array %v must be corresponded to weights %v", result, w)
}
} }
}) })
t.Run("sortByWeightValueEmptyW", func(t *testing.T) { t.Run("sortByWeightValueEmptyWeight", func(t *testing.T) {
var ( var (
i uint64 i uint64
a, b [size]int a, b [size]int
w [size]uint64 w [size]float64
counts = make(map[int]int, size) counts = make(map[int]int, size)
key = make([]byte, 16) key = make([]byte, 16)
) )
@ -534,32 +490,25 @@ func TestDistribution(t *testing.T) {
for node, count := range counts { for node, count := range counts {
d := mean - float64(count) d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta { require.True(t, d < delta && (0-d) < delta,
t.Errorf( "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
} }
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
}) })
t.Run("sortByWeightValueUniformW", func(t *testing.T) { t.Run("sortByWeightValueUniformWeight", func(t *testing.T) {
var ( var (
i uint64 i uint64
a, b [size]int a, b [size]int
w [size]uint64 w [size]float64
counts = make(map[int]int, size) counts = make(map[int]int, size)
key = make([]byte, 16) key = make([]byte, 16)
) )
for i = 0; i < size; i++ { for i = 0; i < size; i++ {
a[i] = int(i) a[i] = int(i)
w[i] = 10 w[i] = 0.5
} }
for i = 0; i < keys; i++ { for i = 0; i < keys; i++ {
@ -576,45 +525,85 @@ func TestDistribution(t *testing.T) {
for node, count := range counts { for node, count := range counts {
d := mean - float64(count) d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta { require.True(t, d < delta && (0-d) < delta,
t.Errorf( "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
} }
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
}) })
t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) { t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) {
const keys = 1
var ( var (
i uint64 i uint64
a, b [size]int a, b [size]int
w [size]uint64 w [size]float64
key = make([]byte, 16) key = make([]byte, 16)
) )
for i = 0; i < size; i++ { for i = 0; i < size; i++ {
a[i] = int(i) a[i] = int(i)
} }
w[size-1] = 10 w[size-1] = 1
for i = 0; i < keys; i++ { for i = 0; i < keys; i++ {
copy(b[:], a[:]) copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size) binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key) hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash) SortSliceByWeightValue(b[:], w[:], hash)
if b[0] != a[size-1] { require.True(t, b[0] == a[size-1],
t.Fatalf("expected last value of %v to be the first with highest weight", a) "expected last value of %v to be the first with highest distance", a)
}
} }
}) })
t.Run("sortByWeightValueNormalizedWeight", func(t *testing.T) {
var (
i uint64
a, b, result [size]uint64
w, normalizedW [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = i
w[int(i)] = 10
}
w[0] = 100
// Here let's use logarithm normalization
for i = 0; i < size; i++ {
normalizedW[i] = math.Log2(w[i]) / math.Log2(w[0])
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], normalizedW[:], hash)
for j := range b {
result[b[j]] += uint64(len(b) - j)
}
}
cutResult := result[1:]
var total uint64
for i := range cutResult {
total += cutResult[i]
}
var chi2 float64
mean := float64(total) / float64(len(cutResult))
delta := mean * percent
for node, count := range cutResult {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("hash collision", func(t *testing.T) { t.Run("hash collision", func(t *testing.T) {
var ( var (
i uint64 i uint64
@ -772,9 +761,9 @@ func benchmarkSortByValue(b *testing.B, n int, hash uint64) {
func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 { func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
servers := make([]uint64, n) servers := make([]uint64, n)
weights := make([]uint64, n) weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ { for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = uint64(n) - i weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = i servers[i] = i
} }
@ -790,9 +779,9 @@ func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) { func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
servers := make([]uint64, n) servers := make([]uint64, n)
weights := make([]uint64, n) weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ { for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = uint64(n) - i weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = i servers[i] = i
} }
@ -806,9 +795,9 @@ func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) { func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
servers := make([]string, n) servers := make([]string, n)
weights := make([]uint64, n) weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ { for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = uint64(n) - i weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10) servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10)
} }