diff --git a/README.md b/README.md index bec9cbd..e141fe4 100644 --- a/README.md +++ b/README.md @@ -14,26 +14,25 @@ ## Benchmark: ``` -BenchmarkSort_fnv_10-8 5000000 354 ns/op 224 B/op 3 allocs/op -BenchmarkSort_fnv_100-8 300000 5103 ns/op 1856 B/op 3 allocs/op -BenchmarkSort_fnv_1000-8 10000 115874 ns/op 16448 B/op 3 allocs/op -BenchmarkSortByIndex_fnv_10-8 3000000 562 ns/op 384 B/op 7 allocs/op -BenchmarkSortByIndex_fnv_100-8 200000 5819 ns/op 2928 B/op 7 allocs/op -BenchmarkSortByIndex_fnv_1000-8 10000 125859 ns/op 25728 B/op 7 allocs/op -BenchmarkSortByValue_fnv_10-8 2000000 1056 ns/op 544 B/op 17 allocs/op -BenchmarkSortByValue_fnv_100-8 200000 9593 ns/op 4528 B/op 107 allocs/op -BenchmarkSortByValue_fnv_1000-8 10000 109272 ns/op 41728 B/op 1007 allocs/op - -BenchmarkSortByWeight_fnv_10-8 3000000 500 ns/op 320 B/op 4 allocs/op -BenchmarkSortByWeight_fnv_100-8 200000 8257 ns/op 2768 B/op 4 allocs/op -BenchmarkSortByWeight_fnv_1000-8 10000 197938 ns/op 24656 B/op 4 allocs/op -BenchmarkSortByWeightIndex_fnv_10-8 2000000 760 ns/op 480 B/op 8 allocs/op -BenchmarkSortByWeightIndex_fnv_100-8 200000 9191 ns/op 3840 B/op 8 allocs/op -BenchmarkSortByWeightIndex_fnv_1000-8 10000 208204 ns/op 33936 B/op 8 allocs/op -BenchmarkSortByWeightValue_fnv_10-8 1000000 1095 ns/op 640 B/op 18 allocs/op -BenchmarkSortByWeightValue_fnv_100-8 200000 12291 ns/op 5440 B/op 108 allocs/op -BenchmarkSortByWeightValue_fnv_1000-8 10000 145125 ns/op 49936 B/op 1008 allocs/op +BenchmarkSort_fnv_10-8 5000000 365 ns/op 224 B/op 3 allocs/op +BenchmarkSort_fnv_100-8 300000 5261 ns/op 1856 B/op 3 allocs/op +BenchmarkSort_fnv_1000-8 10000 119462 ns/op 16448 B/op 3 allocs/op +BenchmarkSortByIndex_fnv_10-8 3000000 546 ns/op 384 B/op 7 allocs/op +BenchmarkSortByIndex_fnv_100-8 200000 5965 ns/op 2928 B/op 7 allocs/op +BenchmarkSortByIndex_fnv_1000-8 10000 127732 ns/op 25728 B/op 7 allocs/op +BenchmarkSortByValue_fnv_10-8 2000000 962 ns/op 544 B/op 17 allocs/op +BenchmarkSortByValue_fnv_100-8 200000 9604 ns/op 4528 B/op 107 allocs/op +BenchmarkSortByValue_fnv_1000-8 10000 111741 ns/op 41728 B/op 1007 allocs/op +BenchmarkSortByWeight_fnv_10-8 3000000 501 ns/op 320 B/op 4 allocs/op +BenchmarkSortByWeight_fnv_100-8 200000 8495 ns/op 2768 B/op 4 allocs/op +BenchmarkSortByWeight_fnv_1000-8 10000 197880 ns/op 24656 B/op 4 allocs/op +BenchmarkSortByWeightIndex_fnv_10-8 2000000 702 ns/op 480 B/op 8 allocs/op +BenchmarkSortByWeightIndex_fnv_100-8 200000 9338 ns/op 3840 B/op 8 allocs/op +BenchmarkSortByWeightIndex_fnv_1000-8 10000 204669 ns/op 33936 B/op 8 allocs/op +BenchmarkSortByWeightValue_fnv_10-8 1000000 1083 ns/op 640 B/op 18 allocs/op +BenchmarkSortByWeightValue_fnv_100-8 200000 11444 ns/op 5440 B/op 108 allocs/op +BenchmarkSortByWeightValue_fnv_1000-8 10000 148471 ns/op 49936 B/op 1008 allocs/op ``` ## Example @@ -71,11 +70,11 @@ func main() { } // Output: - // trying GET four.example.com/examples/object-key // trying GET three.example.com/examples/object-key - // trying GET one.example.com/examples/object-key // trying GET two.example.com/examples/object-key - // trying GET six.example.com/examples/object-key // trying GET five.example.com/examples/object-key + // trying GET six.example.com/examples/object-key + // trying GET one.example.com/examples/object-key + // trying GET four.example.com/examples/object-key } ``` \ No newline at end of file diff --git a/go.mod b/go.mod index f2aab65..d3a600f 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,6 @@ module github.com/nspcc-dev/hrw -require github.com/spaolacci/murmur3 v1.1.0 +require ( + github.com/spaolacci/murmur3 v1.1.0 + github.com/stretchr/testify v1.3.0 +) diff --git a/go.sum b/go.sum index c14ec85..d6c0f7c 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,9 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= diff --git a/hrw.go b/hrw.go index 1d3c4c1..f946dda 100644 --- a/hrw.go +++ b/hrw.go @@ -4,6 +4,7 @@ package hrw import ( "encoding/binary" + "errors" "reflect" "sort" @@ -17,9 +18,9 @@ type ( Hasher interface{ Hash() uint64 } hashed struct { - length int - sorted []uint64 - weight []uint64 + length int + sorted []uint64 + distance []uint64 } weighted struct { @@ -28,7 +29,13 @@ type ( } ) -func weight(x uint64, y uint64) uint64 { +// Boundaries of valid normalized weights +const ( + NormalizedMaxWeight = 1.0 + NormalizedMinWeight = 0.0 +) + +func distance(x uint64, y uint64) uint64 { acc := x ^ y // here used mmh3 64 bit finalizer // https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81 @@ -41,19 +48,19 @@ func weight(x uint64, y uint64) uint64 { } func (h hashed) Len() int { return h.length } -func (h hashed) Less(i, j int) bool { return h.weight[i] < h.weight[j] } +func (h hashed) Less(i, j int) bool { return h.distance[i] < h.distance[j] } func (h hashed) Swap(i, j int) { h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i] - h.weight[i], h.weight[j] = h.weight[j], h.weight[i] + h.distance[i], h.distance[j] = h.distance[j], h.distance[i] } func (w weighted) Len() int { return w.h.length } func (w weighted) Less(i, j int) bool { - // `maxUint64 - weight` makes least weight most valuable + // `maxUint64 - distance` makes the shorter distance more valuable // it is necessary for operation with normalized values - wi := float64(^uint64(0)-w.h.weight[i]) * w.normal[i] - wj := float64(^uint64(0)-w.h.weight[j]) * w.normal[j] - return wi > wj // higher weight must be placed lower to be first + wi := float64(^uint64(0)-w.h.distance[i]) * w.normal[i] + wj := float64(^uint64(0)-w.h.distance[j]) * w.normal[j] + return wi > wj // higher distance must be placed lower to be first } func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) } @@ -62,65 +69,64 @@ func Hash(key []byte) uint64 { return murmur3.Sum64(key) } -// Sort receive nodes and hash, and sort it by weight +// Sort receive nodes and hash, and sort it by distance func Sort(nodes []uint64, hash uint64) []uint64 { var ( l = len(nodes) h = hashed{ - length: l, - sorted: make([]uint64, 0, l), - weight: make([]uint64, 0, l), + length: l, + sorted: make([]uint64, 0, l), + distance: make([]uint64, 0, l), } ) - for i, node := range nodes { + for i := range nodes { h.sorted = append(h.sorted, uint64(i)) - h.weight = append(h.weight, weight(node, hash)) + h.distance = append(h.distance, distance(nodes[i], hash)) } sort.Sort(h) return h.sorted } -// SortByWeight receive nodes and hash, and sort it by weight -func SortByWeight(nodes []uint64, weights []uint64, hash uint64) []uint64 { - var ( - maxWeight uint64 - - l = len(nodes) - w = weighted{ - h: hashed{ - length: l, - sorted: make([]uint64, 0, l), - weight: make([]uint64, 0, l), - }, - normal: make([]float64, 0, l), - } - ) - - // finding max weight to perform normalization +// SortByWeight receive nodes, weights and hash, and sort it by distance * weight +func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 { + // check if numbers of weights and nodes are equal + uniform := true for i := range weights { - if maxWeight < weights[i] { - maxWeight = weights[i] + // check if all nodes have the same distance + if weights[i] != weights[0] { + uniform = false + break } } - // if all nodes have 0-weights or weights are incorrect then sort uniformly - if maxWeight == 0 || l != len(nodes) { + l := len(nodes) + w := weighted{ + h: hashed{ + length: l, + sorted: make([]uint64, 0, l), + distance: make([]uint64, 0, l), + }, + normal: make([]float64, l), + } + + // if all nodes have the same distance then sort uniformly + if uniform || len(weights) != l { return Sort(nodes, hash) } - fMaxWeight := float64(maxWeight) - for i, node := range nodes { + for i := range nodes { w.h.sorted = append(w.h.sorted, uint64(i)) - w.h.weight = append(w.h.weight, weight(node, hash)) - w.normal = append(w.normal, float64(weights[i])/fMaxWeight) + w.h.distance = append(w.h.distance, distance(nodes[i], hash)) } + copy(w.normal, weights) + sort.Sort(w) return w.h.sorted } -// SortSliceByValue received []T and hash to sort by value-weight +// SortSliceByValue received []T and hash to sort by value-distance func SortSliceByValue(slice interface{}, hash uint64) { rule := prepareRule(slice) if rule != nil { @@ -130,17 +136,17 @@ func SortSliceByValue(slice interface{}, hash uint64) { } } -// SortSliceByWeightValue received []T, weights and hash to sort by value-weight -func SortSliceByWeightValue(slice interface{}, weight []uint64, hash uint64) { +// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights +func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) { rule := prepareRule(slice) if rule != nil { swap := reflect.Swapper(slice) - rule = SortByWeight(rule, weight, hash) + rule = SortByWeight(rule, weights, hash) sortByRuleInverse(swap, uint64(len(rule)), rule) } } -// SortSliceByIndex received []T and hash to sort by index-weight +// SortSliceByIndex received []T and hash to sort by index-distance func SortSliceByIndex(slice interface{}, hash uint64) { length := uint64(reflect.ValueOf(slice).Len()) swap := reflect.Swapper(slice) @@ -152,15 +158,15 @@ func SortSliceByIndex(slice interface{}, hash uint64) { sortByRuleInverse(swap, length, rule) } -// SortSliceByWeightIndex received []T, weights and hash to sort by index-weight -func SortSliceByWeightIndex(slice interface{}, weight []uint64, hash uint64) { +// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights +func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) { length := uint64(reflect.ValueOf(slice).Len()) swap := reflect.Swapper(slice) rule := make([]uint64, 0, length) for i := uint64(0); i < length; i++ { rule = append(rule, i) } - rule = SortByWeight(rule, weight, hash) + rule = SortByWeight(rule, weights, hash) sortByRuleInverse(swap, length, rule) } @@ -283,3 +289,13 @@ func prepareRule(slice interface{}) []uint64 { } return rule } + +// ValidateWeights checks if weights are normalized between 0.0 and 1.0 +func ValidateWeights(weights []float64) error { + for i := range weights { + if weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight { + return errors.New("weights are not normalized") + } + } + return nil +} diff --git a/hrw_test.go b/hrw_test.go index 3703a55..389d88d 100644 --- a/hrw_test.go +++ b/hrw_test.go @@ -5,9 +5,10 @@ import ( "fmt" "math" "math/rand" - "reflect" "strconv" "testing" + + "github.com/stretchr/testify/require" ) type ( @@ -68,20 +69,25 @@ func TestSortSliceByIndex(t *testing.T) { expect := []string{"e", "a", "c", "f", "d", "b"} hash := Hash(testKey) SortSliceByIndex(actual, hash) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) +} + +func TestValidateWeights(t *testing.T) { + weights := []float64{10, 10, 10, 2, 2, 2} + err := ValidateWeights(weights) + require.Error(t, err) + weights = []float64{1, 1, 1, 0.2, 0.2, 0.2} + err = ValidateWeights(weights) + require.NoError(t, err) } func TestSortSliceByWeightIndex(t *testing.T) { actual := []string{"a", "b", "c", "d", "e", "f"} - weights := []uint64{10, 10, 10, 2, 2, 2} + weights := []float64{1, 1, 1, 0.2, 0.2, 0.2} expect := []string{"a", "c", "b", "e", "f", "d"} hash := Hash(testKey) SortSliceByWeightIndex(actual, weights, hash) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) } func TestSortSliceByValue(t *testing.T) { @@ -89,9 +95,7 @@ func TestSortSliceByValue(t *testing.T) { expect := []string{"d", "f", "c", "b", "a", "e"} hash := Hash(testKey) SortSliceByValue(actual, hash) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) } func TestSortByRule(t *testing.T) { @@ -106,9 +110,7 @@ func TestSortByRule(t *testing.T) { func(i, j int) { actual[i], actual[j] = actual[j], actual[i] }, 6, rule) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) }) t.Run("inverse", func(t *testing.T) { @@ -122,9 +124,7 @@ func TestSortByRule(t *testing.T) { func(i, j int) { actual[i], actual[j] = actual[j], actual[i] }, 6, rule) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) }) } @@ -134,13 +134,13 @@ func TestSortSliceByValueFail(t *testing.T) { actual []int hash = Hash(testKey) ) - SortSliceByValue(actual, hash) + require.NotPanics(t, func() { SortSliceByValue(actual, hash) }) }) t.Run("must be slice", func(t *testing.T) { actual := 10 hash := Hash(testKey) - SortSliceByValue(actual, hash) + require.NotPanics(t, func() { SortSliceByValue(actual, hash) }) }) t.Run("must 'fail' for unknown type", func(t *testing.T) { @@ -148,9 +148,7 @@ func TestSortSliceByValueFail(t *testing.T) { expect := []unknown{1, 2, 3, 4, 5} hash := Hash(testKey) SortSliceByValue(actual, hash) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) }) } @@ -159,9 +157,7 @@ func TestSortSliceByValueHasher(t *testing.T) { expect := []hashString{"d", "f", "c", "b", "a", "e"} hash := Hash(testKey) SortSliceByValue(actual, hash) - if !reflect.DeepEqual(actual, expect) { - t.Errorf("Was %#v, but expected %#v", actual, expect) - } + require.Equal(t, expect, actual) } func TestSortSliceByValueIntSlice(t *testing.T) { @@ -225,9 +221,7 @@ func TestSortSliceByValueIntSlice(t *testing.T) { for _, tc := range cases { SortSliceByValue(tc.actual, hash) - if !reflect.DeepEqual(tc.actual, tc.expect) { - t.Errorf("Was %#v, but expected %#v", tc.actual, tc.expect) - } + require.Equal(t, tc.expect, tc.actual) } } @@ -236,9 +230,7 @@ func TestSort(t *testing.T) { hash := Hash(testKey) actual := Sort(nodes, hash) expected := []uint64{3, 1, 4, 2, 0} - if !reflect.DeepEqual(actual, expected) { - t.Errorf("Was %#v, but expected %#v", actual, expected) - } + require.Equal(t, expected, actual) } func TestDistribution(t *testing.T) { @@ -276,18 +268,11 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByIndex", func(t *testing.T) { @@ -317,18 +302,11 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByValue", func(t *testing.T) { @@ -357,18 +335,11 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByStringValue", func(t *testing.T) { @@ -397,18 +368,11 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %s received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByInt32Value", func(t *testing.T) { @@ -437,31 +401,24 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByWeightValue", func(t *testing.T) { var ( i uint64 a, b, result [size]int - w [size]uint64 + w [size]float64 key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) - w[i] = size - i + w[i] = float64(size-i) / float64(size) } for i = 0; i < keys; i++ { copy(b[:], a[:]) @@ -470,24 +427,24 @@ func TestDistribution(t *testing.T) { SortSliceByWeightValue(b[:], w[:], hash) result[b[0]]++ } + for i := 0; i < size-1; i++ { - if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) { - t.Fatalf("result array %v must be corresponded to weights %v", result, w) - } + require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]), + "result array %v must be corresponded to weights %v", result, w) } }) - t.Run("sortByWeightValueShuffledW", func(t *testing.T) { + t.Run("sortByWeightValueShuffledWeight", func(t *testing.T) { var ( i uint64 a, b, result [size]int - w [size]uint64 + w [size]float64 key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) - w[i] = size - i + w[i] = float64(size-i) / float64(size) } rand.Shuffle(size, func(i, j int) { @@ -501,17 +458,16 @@ func TestDistribution(t *testing.T) { result[b[0]]++ } for i := 0; i < size-1; i++ { - if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) { - t.Fatalf("result array %v must be corresponded to weights %v", result, w) - } + require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]), + "result array %v must be corresponded to weights %v", result, w) } }) - t.Run("sortByWeightValueEmptyW", func(t *testing.T) { + t.Run("sortByWeightValueEmptyWeight", func(t *testing.T) { var ( i uint64 a, b [size]int - w [size]uint64 + w [size]float64 counts = make(map[int]int, size) key = make([]byte, 16) ) @@ -534,32 +490,25 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) - t.Run("sortByWeightValueUniformW", func(t *testing.T) { + t.Run("sortByWeightValueUniformWeight", func(t *testing.T) { var ( i uint64 a, b [size]int - w [size]uint64 + w [size]float64 counts = make(map[int]int, size) key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) - w[i] = 10 + w[i] = 0.5 } for i = 0; i < keys; i++ { @@ -576,45 +525,85 @@ func TestDistribution(t *testing.T) { for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %.0f (+/- %.2f)", - node, count, mean, delta, - ) - } - } - if chi2 > chiTable[size-1] { - t.Errorf( - "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", - chi2, chiTable[size-1]) + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) { + const keys = 1 var ( i uint64 a, b [size]int - w [size]uint64 + w [size]float64 key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) } - w[size-1] = 10 + w[size-1] = 1 for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByWeightValue(b[:], w[:], hash) - if b[0] != a[size-1] { - t.Fatalf("expected last value of %v to be the first with highest weight", a) - } + require.True(t, b[0] == a[size-1], + "expected last value of %v to be the first with highest distance", a) } }) + t.Run("sortByWeightValueNormalizedWeight", func(t *testing.T) { + var ( + i uint64 + a, b, result [size]uint64 + w, normalizedW [size]float64 + key = make([]byte, 16) + ) + + for i = 0; i < size; i++ { + a[i] = i + w[int(i)] = 10 + } + w[0] = 100 + + // Here let's use logarithm normalization + for i = 0; i < size; i++ { + normalizedW[i] = math.Log2(w[i]) / math.Log2(w[0]) + } + + for i = 0; i < keys; i++ { + copy(b[:], a[:]) + binary.BigEndian.PutUint64(key, i+size) + hash := Hash(key) + SortSliceByWeightValue(b[:], normalizedW[:], hash) + for j := range b { + result[b[j]] += uint64(len(b) - j) + } + } + cutResult := result[1:] + var total uint64 + for i := range cutResult { + total += cutResult[i] + } + + var chi2 float64 + mean := float64(total) / float64(len(cutResult)) + delta := mean * percent + for node, count := range cutResult { + d := mean - float64(count) + chi2 += math.Pow(float64(count)-mean, 2) / mean + require.True(t, d < delta && (0-d) < delta, + "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) + } + require.True(t, chi2 < chiTable[size-1], + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) + }) + t.Run("hash collision", func(t *testing.T) { var ( i uint64 @@ -772,9 +761,9 @@ func benchmarkSortByValue(b *testing.B, n int, hash uint64) { func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 { servers := make([]uint64, n) - weights := make([]uint64, n) + weights := make([]float64, n) for i := uint64(0); i < uint64(len(servers)); i++ { - weights[i] = uint64(n) - i + weights[i] = float64(uint64(n)-i) / float64(n) servers[i] = i } @@ -790,9 +779,9 @@ func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 { func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) { servers := make([]uint64, n) - weights := make([]uint64, n) + weights := make([]float64, n) for i := uint64(0); i < uint64(len(servers)); i++ { - weights[i] = uint64(n) - i + weights[i] = float64(uint64(n)-i) / float64(n) servers[i] = i } @@ -806,9 +795,9 @@ func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) { func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) { servers := make([]string, n) - weights := make([]uint64, n) + weights := make([]float64, n) for i := uint64(0); i < uint64(len(servers)); i++ { - weights[i] = uint64(n) - i + weights[i] = float64(uint64(n)-i) / float64(n) servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10) }