From aa230933d1ac74d49b09e748c90535e615fbaeac Mon Sep 17 00:00:00 2001 From: Alex Vanin Date: Fri, 5 Jul 2019 09:49:24 +0300 Subject: [PATCH] Move normalization routine out of hrw library (#6) HRW library supports weighted sorting. Weights must be normalized before applying. Since there could be different types of normalization for multiple criteria, there is no point to perform simple normalization in this library. Pass a slice of normalized weights to the `SortByWeight` functions. This commit proposes to: - remove normalization routine from `SortByWeight` function; - add `ValidateWeights` function to check if weights are normalized; - rename `weight` -> `distance` to avoid naming confusion between hash distance and actual weights; - use testify lib in the tests; --- README.md | 43 +++++---- go.mod | 5 +- go.sum | 7 ++ hrw.go | 114 +++++++++++++---------- hrw_test.go | 259 +++++++++++++++++++++++++--------------------------- 5 files changed, 221 insertions(+), 207 deletions(-) diff --git a/README.md b/README.md index bec9cbd..e141fe4 100644 --- a/README.md +++ b/README.md @@ -14,26 +14,25 @@ ## Benchmark: ``` -BenchmarkSort_fnv_10-8 5000000 354 ns/op 224 B/op 3 allocs/op -BenchmarkSort_fnv_100-8 300000 5103 ns/op 1856 B/op 3 allocs/op -BenchmarkSort_fnv_1000-8 10000 115874 ns/op 16448 B/op 3 allocs/op -BenchmarkSortByIndex_fnv_10-8 3000000 562 ns/op 384 B/op 7 allocs/op -BenchmarkSortByIndex_fnv_100-8 200000 5819 ns/op 2928 B/op 7 allocs/op -BenchmarkSortByIndex_fnv_1000-8 10000 125859 ns/op 25728 B/op 7 allocs/op -BenchmarkSortByValue_fnv_10-8 2000000 1056 ns/op 544 B/op 17 allocs/op -BenchmarkSortByValue_fnv_100-8 200000 9593 ns/op 4528 B/op 107 allocs/op -BenchmarkSortByValue_fnv_1000-8 10000 109272 ns/op 41728 B/op 1007 allocs/op - -BenchmarkSortByWeight_fnv_10-8 3000000 500 ns/op 320 B/op 4 allocs/op -BenchmarkSortByWeight_fnv_100-8 200000 8257 ns/op 2768 B/op 4 allocs/op -BenchmarkSortByWeight_fnv_1000-8 10000 197938 ns/op 24656 B/op 4 allocs/op -BenchmarkSortByWeightIndex_fnv_10-8 2000000 760 ns/op 480 B/op 8 allocs/op -BenchmarkSortByWeightIndex_fnv_100-8 200000 9191 ns/op 3840 B/op 8 allocs/op -BenchmarkSortByWeightIndex_fnv_1000-8 10000 208204 ns/op 33936 B/op 8 allocs/op -BenchmarkSortByWeightValue_fnv_10-8 1000000 1095 ns/op 640 B/op 18 allocs/op -BenchmarkSortByWeightValue_fnv_100-8 200000 12291 ns/op 5440 B/op 108 allocs/op -BenchmarkSortByWeightValue_fnv_1000-8 10000 145125 ns/op 49936 B/op 1008 allocs/op +BenchmarkSort_fnv_10-8 5000000 365 ns/op 224 B/op 3 allocs/op +BenchmarkSort_fnv_100-8 300000 5261 ns/op 1856 B/op 3 allocs/op +BenchmarkSort_fnv_1000-8 10000 119462 ns/op 16448 B/op 3 allocs/op +BenchmarkSortByIndex_fnv_10-8 3000000 546 ns/op 384 B/op 7 allocs/op +BenchmarkSortByIndex_fnv_100-8 200000 5965 ns/op 2928 B/op 7 allocs/op +BenchmarkSortByIndex_fnv_1000-8 10000 127732 ns/op 25728 B/op 7 allocs/op +BenchmarkSortByValue_fnv_10-8 2000000 962 ns/op 544 B/op 17 allocs/op +BenchmarkSortByValue_fnv_100-8 200000 9604 ns/op 4528 B/op 107 allocs/op +BenchmarkSortByValue_fnv_1000-8 10000 111741 ns/op 41728 B/op 1007 allocs/op +BenchmarkSortByWeight_fnv_10-8 3000000 501 ns/op 320 B/op 4 allocs/op +BenchmarkSortByWeight_fnv_100-8 200000 8495 ns/op 2768 B/op 4 allocs/op +BenchmarkSortByWeight_fnv_1000-8 10000 197880 ns/op 24656 B/op 4 allocs/op +BenchmarkSortByWeightIndex_fnv_10-8 2000000 702 ns/op 480 B/op 8 allocs/op +BenchmarkSortByWeightIndex_fnv_100-8 200000 9338 ns/op 3840 B/op 8 allocs/op +BenchmarkSortByWeightIndex_fnv_1000-8 10000 204669 ns/op 33936 B/op 8 allocs/op +BenchmarkSortByWeightValue_fnv_10-8 1000000 1083 ns/op 640 B/op 18 allocs/op +BenchmarkSortByWeightValue_fnv_100-8 200000 11444 ns/op 5440 B/op 108 allocs/op +BenchmarkSortByWeightValue_fnv_1000-8 10000 148471 ns/op 49936 B/op 1008 allocs/op ``` ## Example @@ -71,11 +70,11 @@ func main() { } // Output: - // trying GET four.example.com/examples/object-key // trying GET three.example.com/examples/object-key - // trying GET one.example.com/examples/object-key // trying GET two.example.com/examples/object-key - // trying GET six.example.com/examples/object-key // trying GET five.example.com/examples/object-key + // trying GET six.example.com/examples/object-key + // trying GET one.example.com/examples/object-key + // trying GET four.example.com/examples/object-key } ``` \ No newline at end of file diff --git a/go.mod b/go.mod index f2aab65..d3a600f 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,6 @@ module github.com/nspcc-dev/hrw -require github.com/spaolacci/murmur3 v1.1.0 +require ( + github.com/spaolacci/murmur3 v1.1.0 + github.com/stretchr/testify v1.3.0 +) diff --git a/go.sum b/go.sum index c14ec85..d6c0f7c 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,9 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= diff --git a/hrw.go b/hrw.go index 1d3c4c1..f946dda 100644 --- a/hrw.go +++ b/hrw.go @@ -4,6 +4,7 @@ package hrw import ( "encoding/binary" + "errors" "reflect" "sort" @@ -17,9 +18,9 @@ type ( Hasher interface{ Hash() uint64 } hashed struct { - length int - sorted []uint64 - weight []uint64 + length int + sorted []uint64 + distance []uint64 } weighted struct { @@ -28,7 +29,13 @@ type ( } ) -func weight(x uint64, y uint64) uint64 { +// Boundaries of valid normalized weights +const ( + NormalizedMaxWeight = 1.0 + NormalizedMinWeight = 0.0 +) + +func distance(x uint64, y uint64) uint64 { acc := x ^ y // here used mmh3 64 bit finalizer // https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81 @@ -41,19 +48,19 @@ func weight(x uint64, y uint64) uint64 { } func (h hashed) Len() int { return h.length } -func (h hashed) Less(i, j int) bool { return h.weight[i] < h.weight[j] } +func (h hashed) Less(i, j int) bool { return h.distance[i] < h.distance[j] } func (h hashed) Swap(i, j int) { h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i] - h.weight[i], h.weight[j] = h.weight[j], h.weight[i] + h.distance[i], h.distance[j] = h.distance[j], h.distance[i] } func (w weighted) Len() int { return w.h.length } func (w weighted) Less(i, j int) bool { - // `maxUint64 - weight` makes least weight most valuable + // `maxUint64 - distance` makes the shorter distance more valuable // it is necessary for operation with normalized values - wi := float64(^uint64(0)-w.h.weight[i]) * w.normal[i] - wj := float64(^uint64(0)-w.h.weight[j]) * w.normal[j] - return wi > wj // higher weight must be placed lower to be first + wi := float64(^uint64(0)-w.h.distance[i]) * w.normal[i] + wj := float64(^uint64(0)-w.h.distance[j]) * w.normal[j] + return wi > wj // higher distance must be placed lower to be first } func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) } @@ -62,65 +69,64 @@ func Hash(key []byte) uint64 { return murmur3.Sum64(key) } -// Sort receive nodes and hash, and sort it by weight +// Sort receive nodes and hash, and sort it by distance func Sort(nodes []uint64, hash uint64) []uint64 { var ( l = len(nodes) h = hashed{ - length: l, - sorted: make([]uint64, 0, l), - weight: make([]uint64, 0, l), + length: l, + sorted: make([]uint64, 0, l), + distance: make([]uint64, 0, l), } ) - for i, node := range nodes { + for i := range nodes { h.sorted = append(h.sorted, uint64(i)) - h.weight = append(h.weight, weight(node, hash)) + h.distance = append(h.distance, distance(nodes[i], hash)) } sort.Sort(h) return h.sorted } -// SortByWeight receive nodes and hash, and sort it by weight -func SortByWeight(nodes []uint64, weights []uint64, hash uint64) []uint64 { - var ( - maxWeight uint64 - - l = len(nodes) - w = weighted{ - h: hashed{ - length: l, - sorted: make([]uint64, 0, l), - weight: make([]uint64, 0, l), - }, - normal: make([]float64, 0, l), - } - ) - - // finding max weight to perform normalization +// SortByWeight receive nodes, weights and hash, and sort it by distance * weight +func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 { + // check if numbers of weights and nodes are equal + uniform := true for i := range weights { - if maxWeight < weights[i] { - maxWeight = weights[i] + // check if all nodes have the same distance + if weights[i] != weights[0] { + uniform = false + break } } - // if all nodes have 0-weights or weights are incorrect then sort uniformly - if maxWeight == 0 || l != len(nodes) { + l := len(nodes) + w := weighted{ + h: hashed{ + length: l, + sorted: make([]uint64, 0, l), + distance: make([]uint64, 0, l), + }, + normal: make([]float64, l), + } + + // if all nodes have the same distance then sort uniformly + if uniform || len(weights) != l { return Sort(nodes, hash) } - fMaxWeight := float64(maxWeight) - for i, node := range nodes { + for i := range nodes { w.h.sorted = append(w.h.sorted, uint64(i)) - w.h.weight = append(w.h.weight, weight(node, hash)) - w.normal = append(w.normal, float64(weights[i])/fMaxWeight) + w.h.distance = append(w.h.distance, distance(nodes[i], hash)) } + copy(w.normal, weights) + sort.Sort(w) return w.h.sorted } -// SortSliceByValue received []T and hash to sort by value-weight +// SortSliceByValue received []T and hash to sort by value-distance func SortSliceByValue(slice interface{}, hash uint64) { rule := prepareRule(slice) if rule != nil { @@ -130,17 +136,17 @@ func SortSliceByValue(slice interface{}, hash uint64) { } } -// SortSliceByWeightValue received []T, weights and hash to sort by value-weight -func SortSliceByWeightValue(slice interface{}, weight []uint64, hash uint64) { +// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights +func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) { rule := prepareRule(slice) if rule != nil { swap := reflect.Swapper(slice) - rule = SortByWeight(rule, weight, hash) + rule = SortByWeight(rule, weights, hash) sortByRuleInverse(swap, uint64(len(rule)), rule) } } -// SortSliceByIndex received []T and hash to sort by index-weight +// SortSliceByIndex received []T and hash to sort by index-distance func SortSliceByIndex(slice interface{}, hash uint64) { length := uint64(reflect.ValueOf(slice).Len()) swap := reflect.Swapper(slice) @@ -152,15 +158,15 @@ func SortSliceByIndex(slice interface{}, hash uint64) { sortByRuleInverse(swap, length, rule) } -// SortSliceByWeightIndex received []T, weights and hash to sort by index-weight -func SortSliceByWeightIndex(slice interface{}, weight []uint64, hash uint64) { +// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights +func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) { length := uint64(reflect.ValueOf(slice).Len()) swap := reflect.Swapper(slice) rule := make([]uint64, 0, length) for i := uint64(0); i < length; i++ { rule = append(rule, i) } - rule = SortByWeight(rule, weight, hash) + rule = SortByWeight(rule, weights, hash) sortByRuleInverse(swap, length, rule) } @@ -283,3 +289,13 @@ func prepareRule(slice interface{}) []uint64 { } return rule } + +// ValidateWeights checks if weights are normalized between 0.0 and 1.0 +func ValidateWeights(weights []float64) error { + for i := range weights { + if weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight { + return errors.New("weights are not normalized") + } + } + return nil +} diff --git a/hrw_test.go b/hrw_test.go index 3703a55..389d88d 100644 --- a/hrw_test.go +++ b/hrw_test.go @@ -5,9 +5,10 @@ import ( "fmt" "math" "math/rand" - "reflect" "strconv" "testing" + + "github.com/stretchr/testify/require" ) type ( @@ -68,20 +69,25 @@ func TestSortSliceByIndex(t *testing.T) { expect := []string{"e", "a", "c", "f", "d", "b"} hash := Hash(testKey) SortSliceByIndex(actual, hash) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) +} + +func TestValidateWeights(t *testing.T) { + weights := []float64{10, 10, 10, 2, 2, 2} + err := ValidateWeights(weights) + require.Error(t, err) + weights = []float64{1, 1, 1, 0.2, 0.2, 0.2} + err = ValidateWeights(weights) + require.NoError(t, err) } func TestSortSliceByWeightIndex(t *testing.T) { actual := []string{"a", "b", "c", "d", "e", "f"} - weights := []uint64{10, 10, 10, 2, 2, 2} + weights := []float64{1, 1, 1, 0.2, 0.2, 0.2} expect := []string{"a", "c", "b", "e", "f", "d"} hash := Hash(testKey) SortSliceByWeightIndex(actual, weights, hash) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) } func TestSortSliceByValue(t *testing.T) { @@ -89,9 +95,7 @@ func TestSortSliceByValue(t *testing.T) { expect := []string{"d", "f", "c", "b", "a", "e"} hash := Hash(testKey) SortSliceByValue(actual, hash) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) } func TestSortByRule(t *testing.T) { @@ -106,9 +110,7 @@ func TestSortByRule(t *testing.T) { func(i, j int) { actual[i], actual[j] = actual[j], actual[i] }, 6, rule) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) }) t.Run("inverse", func(t *testing.T) { @@ -122,9 +124,7 @@ func TestSortByRule(t *testing.T) { func(i, j int) { actual[i], actual[j] = actual[j], actual[i] }, 6, rule) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) }) } @@ -134,13 +134,13 @@ func TestSortSliceByValueFail(t *testing.T) { actual []int hash = Hash(testKey) ) - SortSliceByValue(actual, hash) + require.NotPanics(t, func() { SortSliceByValue(actual, hash) }) }) t.Run("must be slice", func(t *testing.T) { actual := 10 hash := Hash(testKey) - SortSliceByValue(actual, hash) + require.NotPanics(t, func() { SortSliceByValue(actual, hash) }) }) t.Run("must 'fail' for unknown type", func(t *testing.T) { @@ -148,9 +148,7 @@ func TestSortSliceByValueFail(t *testing.T) { expect := []unknown{1, 2, 3, 4, 5} hash := Hash(testKey) SortSliceByValue(actual, hash) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) }) } @@ -159,9 +157,7 @@ func TestSortSliceByValueHasher(t *testing.T) { expect := []hashString{"d", "f", "c", "b", "a", "e"} hash := Hash(testKey) SortSliceByValue(actual, hash) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) } func TestSortSliceByValueIntSlice(t *testing.T) { @@ -225,9 +221,7 @@ func TestSortSliceByValueIntSlice(t *testing.T) { for _, tc := range cases { SortSliceByValue(tc.actual, hash) - if !reflect.DeepEqual(tc.actual, tc.expect) { - t.Errorf("Was %#v, but expected %#v", tc.actual, tc.expect) - } + require.Equal(t, tc.expect, tc.actual) } } @@ -236,9 +230,7 @@ func TestSort(t *testing.T) { hash := Hash(testKey) actual := Sort(nodes, hash) expected := []uint64{3, 1, 4, 2, 0} - if !reflect.DeepEqual(actual, expected) { - t.Errorf("Was %#v, but expected %#v", actual, expected) - } + require.Equal(t, expected, actual) } func TestDistribution(t *testing.T) { @@ -276,18 +268,11 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByIndex", func(t *testing.T) { @@ -317,18 +302,11 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByValue", func(t *testing.T) { @@ -357,18 +335,11 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByStringValue", func(t *testing.T) { @@ -397,18 +368,11 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %s received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByInt32Value", func(t *testing.T) { @@ -437,31 +401,24 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByWeightValue", func(t *testing.T) { var ( i uint64 a, b, result [size]int - w [size]uint64 + w [size]float64 key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) - w[i] = size - i + w[i] = float64(size-i) / float64(size) } for i = 0; i < keys; i++ { copy(b[:], a[:]) @@ -470,24 +427,24 @@ func TestDistribution(t *testing.T) { SortSliceByWeightValue(b[:], w[:], hash) result[b[0]]++ } + for i := 0; i < size-1; i++ { - if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) { - t.Fatalf("result array %v must be corresponded to weights %v", result, w) - } + require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]), + "result array %v must be corresponded to weights %v", result, w) } }) - t.Run("sortByWeightValueShuffledW", func(t *testing.T) { + t.Run("sortByWeightValueShuffledWeight", func(t *testing.T) { var ( i uint64 a, b, result [size]int - w [size]uint64 + w [size]float64 key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) - w[i] = size - i + w[i] = float64(size-i) / float64(size) } rand.Shuffle(size, func(i, j int) { @@ -501,17 +458,16 @@ func TestDistribution(t *testing.T) { result[b[0]]++ } for i := 0; i < size-1; i++ { - if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) { - t.Fatalf("result array %v must be corresponded to weights %v", result, w) - } + require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]), + "result array %v must be corresponded to weights %v", result, w) } }) - t.Run("sortByWeightValueEmptyW", func(t *testing.T) { + t.Run("sortByWeightValueEmptyWeight", func(t *testing.T) { var ( i uint64 a, b [size]int - w [size]uint64 + w [size]float64 counts = make(map[int]int, size) key = make([]byte, 16) ) @@ -534,32 +490,25 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) - t.Run("sortByWeightValueUniformW", func(t *testing.T) { + t.Run("sortByWeightValueUniformWeight", func(t *testing.T) { var ( i uint64 a, b [size]int - w [size]uint64 + w [size]float64 counts = make(map[int]int, size) key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) - w[i] = 10 + w[i] = 0.5 } for i = 0; i < keys; i++ { @@ -576,45 +525,85 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) { + const keys = 1 var ( i uint64 a, b [size]int - w [size]uint64 + w [size]float64 key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) } - w[size-1] = 10 + w[size-1] = 1 for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByWeightValue(b[:], w[:], hash) - if b[0] != a[size-1] { - t.Fatalf("expected last value of %v to be the first with highest weight", a) - } + require.True(t, b[0] == a[size-1], + "expected last value of %v to be the first with highest distance", a) } }) + t.Run("sortByWeightValueNormalizedWeight", func(t *testing.T) { + var ( + i uint64 + a, b, result [size]uint64 + w, normalizedW [size]float64 + key = make([]byte, 16) + ) + + for i = 0; i < size; i++ { + a[i] = i + w[int(i)] = 10 + } + w[0] = 100 + + // Here let's use logarithm normalization + for i = 0; i < size; i++ { + normalizedW[i] = math.Log2(w[i]) / math.Log2(w[0]) + } + + for i = 0; i < keys; i++ { + copy(b[:], a[:]) + binary.BigEndian.PutUint64(key, i+size) + hash := Hash(key) + SortSliceByWeightValue(b[:], normalizedW[:], hash) + for j := range b { + result[b[j]] += uint64(len(b) - j) + } + } + cutResult := result[1:] + var total uint64 + for i := range cutResult { + total += cutResult[i] + } + + var chi2 float64 + mean := float64(total) / float64(len(cutResult)) + delta := mean * percent + for node, count := range cutResult { + d := mean - float64(count) + chi2 += math.Pow(float64(count)-mean, 2) / mean + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) + } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) + }) + t.Run("hash collision", func(t *testing.T) { var ( i uint64 @@ -772,9 +761,9 @@ func benchmarkSortByValue(b *testing.B, n int, hash uint64) { func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 { servers := make([]uint64, n) - weights := make([]uint64, n) + weights := make([]float64, n) for i := uint64(0); i < uint64(len(servers)); i++ { - weights[i] = uint64(n) - i + weights[i] = float64(uint64(n)-i) / float64(n) servers[i] = i } @@ -790,9 +779,9 @@ func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 { func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) { servers := make([]uint64, n) - weights := make([]uint64, n) + weights := make([]float64, n) for i := uint64(0); i < uint64(len(servers)); i++ { - weights[i] = uint64(n) - i + weights[i] = float64(uint64(n)-i) / float64(n) servers[i] = i } @@ -806,9 +795,9 @@ func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) { func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) { servers := make([]string, n) - weights := make([]uint64, n) + weights := make([]float64, n) for i := uint64(0); i < uint64(len(servers)); i++ { - weights[i] = uint64(n) - i + weights[i] = float64(uint64(n)-i) / float64(n) servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10) }