hrw/hrw_test.go
alexvanin 6ea18e48fe Move normalization routine out of hrw library
HRW library supports weighted sorting. Weights must be normalized
before applying. Since there could be different types of normalization
for multiple criteria, there is no point to perform simple
normalization in this library. Pass a slice of normalized weights
to the `SortByWeight` functions.

This commit proposes to:
- remove normalization routine from `SortByWeight` function;
- add `ValidateWeights` function to check if weights are normalized;
- rename `weight` -> `distance` to avoid naming confusion between
  hash distance and actual weights;
- use testify lib in the tests;
2019-07-05 09:46:01 +03:00

810 lines
19 KiB
Go

package hrw
import (
"encoding/binary"
"fmt"
"math"
"math/rand"
"strconv"
"testing"
"github.com/stretchr/testify/require"
)
type (
hashString string
unknown byte
slices struct {
actual interface{}
expect interface{}
}
Uint32Slice []uint32
)
var testKey = []byte("0xff51afd7ed558ccd")
func (p Uint32Slice) Len() int { return len(p) }
func (p Uint32Slice) Less(i, j int) bool { return p[i] < p[j] }
func (p Uint32Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func Example() {
// given a set of servers
servers := []string{
"one.example.com",
"two.example.com",
"three.example.com",
"four.example.com",
"five.example.com",
"six.example.com",
}
// HRW can consistently select a uniformly-distributed set of servers for
// any given key
var (
key = []byte("/examples/object-key")
h = Hash(key)
)
SortSliceByValue(servers, h)
for id := range servers {
fmt.Printf("trying GET %s%s\n", servers[id], key)
}
// Output:
// trying GET three.example.com/examples/object-key
// trying GET two.example.com/examples/object-key
// trying GET five.example.com/examples/object-key
// trying GET six.example.com/examples/object-key
// trying GET one.example.com/examples/object-key
// trying GET four.example.com/examples/object-key
}
func (h hashString) Hash() uint64 {
return Hash([]byte(h))
}
func TestSortSliceByIndex(t *testing.T) {
actual := []string{"a", "b", "c", "d", "e", "f"}
expect := []string{"e", "a", "c", "f", "d", "b"}
hash := Hash(testKey)
SortSliceByIndex(actual, hash)
require.Equal(t, expect, actual)
}
func TestValidateWeights(t *testing.T) {
weights := []float64{10, 10, 10, 2, 2, 2}
err := ValidateWeights(weights)
require.Error(t, err)
weights = []float64{1, 1, 1, 0.2, 0.2, 0.2}
err = ValidateWeights(weights)
require.NoError(t, err)
}
func TestSortSliceByWeightIndex(t *testing.T) {
actual := []string{"a", "b", "c", "d", "e", "f"}
weights := []float64{1, 1, 1, 0.2, 0.2, 0.2}
expect := []string{"a", "c", "b", "e", "f", "d"}
hash := Hash(testKey)
SortSliceByWeightIndex(actual, weights, hash)
require.Equal(t, expect, actual)
}
func TestSortSliceByValue(t *testing.T) {
actual := []string{"a", "b", "c", "d", "e", "f"}
expect := []string{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortSliceByValue(actual, hash)
require.Equal(t, expect, actual)
}
func TestSortByRule(t *testing.T) {
t.Run("direct", func(t *testing.T) {
// 0 1 2 3 4 5
actual := []string{"a", "b", "c", "d", "e", "f"}
// 4 2 0 5 3 1
expect := []string{"c", "f", "b", "e", "a", "d"}
rule := []uint64{4, 2, 0, 5, 3, 1}
sortByRuleDirect(
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
6, rule)
require.Equal(t, expect, actual)
})
t.Run("inverse", func(t *testing.T) {
// 0 1 2 3 4 5
actual := []string{"a", "b", "c", "d", "e", "f"}
// 4 2 0 5 3 1
expect := []string{"e", "c", "a", "f", "d", "b"}
rule := []uint64{4, 2, 0, 5, 3, 1}
sortByRuleInverse(
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
6, rule)
require.Equal(t, expect, actual)
})
}
func TestSortSliceByValueFail(t *testing.T) {
t.Run("empty slice", func(t *testing.T) {
var (
actual []int
hash = Hash(testKey)
)
require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
})
t.Run("must be slice", func(t *testing.T) {
actual := 10
hash := Hash(testKey)
require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
})
t.Run("must 'fail' for unknown type", func(t *testing.T) {
actual := []unknown{1, 2, 3, 4, 5}
expect := []unknown{1, 2, 3, 4, 5}
hash := Hash(testKey)
SortSliceByValue(actual, hash)
require.Equal(t, expect, actual)
})
}
func TestSortSliceByValueHasher(t *testing.T) {
actual := []hashString{"a", "b", "c", "d", "e", "f"}
expect := []hashString{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortSliceByValue(actual, hash)
require.Equal(t, expect, actual)
}
func TestSortSliceByValueIntSlice(t *testing.T) {
cases := []slices{
{
actual: []int{0, 1, 2, 3, 4, 5},
expect: []int{2, 0, 5, 3, 1, 4},
},
{
actual: []uint{0, 1, 2, 3, 4, 5},
expect: []uint{2, 0, 5, 3, 1, 4},
},
{
actual: []int8{0, 1, 2, 3, 4, 5},
expect: []int8{5, 2, 1, 4, 0, 3},
},
{
actual: []uint8{0, 1, 2, 3, 4, 5},
expect: []uint8{5, 2, 1, 4, 0, 3},
},
{
actual: []int16{0, 1, 2, 3, 4, 5},
expect: []int16{1, 0, 3, 2, 4, 5},
},
{
actual: []uint16{0, 1, 2, 3, 4, 5},
expect: []uint16{1, 0, 3, 2, 4, 5},
},
{
actual: []int32{0, 1, 2, 3, 4, 5},
expect: []int32{5, 1, 2, 0, 3, 4},
},
{
actual: []uint32{0, 1, 2, 3, 4, 5},
expect: []uint32{5, 1, 2, 0, 3, 4},
},
{
actual: Uint32Slice{0, 1, 2, 3, 4, 5},
expect: Uint32Slice{0, 1, 2, 3, 4, 5},
},
{
actual: []int64{0, 1, 2, 3, 4, 5},
expect: []int64{5, 3, 0, 1, 4, 2},
},
{
actual: []uint64{0, 1, 2, 3, 4, 5},
expect: []uint64{5, 3, 0, 1, 4, 2},
},
}
hash := Hash(testKey)
for _, tc := range cases {
SortSliceByValue(tc.actual, hash)
require.Equal(t, tc.expect, tc.actual)
}
}
func TestSort(t *testing.T) {
nodes := []uint64{1, 2, 3, 4, 5}
hash := Hash(testKey)
actual := Sort(nodes, hash)
expected := []uint64{3, 1, 4, 2, 0}
require.Equal(t, expected, actual)
}
func TestDistribution(t *testing.T) {
const (
size = 10
keys = 100000
percent = 0.03
)
// We use χ2 method to determine similarity of distribution with uniform distribution.
// χ2 = Σ((n-N)**2/N)
// https://www.medcalc.org/manual/chi-square-table.php p=0.1
var chiTable = map[int]float64{9: 14.68, 99: 117.407}
t.Run("sort", func(t *testing.T) {
var (
i uint64
nodes [size]uint64
counts = make(map[uint64]uint64, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
nodes[i] = i
}
for i = 0; i < keys; i++ {
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
counts[Sort(nodes[:], hash)[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByIndex", func(t *testing.T) {
var (
i uint64
a, b [size]uint64
counts = make(map[uint64]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = i
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByIndex(b[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByValue", func(t *testing.T) {
var (
i uint64
a, b [size]int
counts = make(map[int]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByValue(b[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByStringValue", func(t *testing.T) {
var (
i uint64
a, b [size]string
counts = make(map[string]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = strconv.FormatUint(i, 10)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByValue(b[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByInt32Value", func(t *testing.T) {
var (
i uint64
a, b [size]int32
counts = make(map[int32]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int32(i)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByValue(b[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByWeightValue", func(t *testing.T) {
var (
i uint64
a, b, result [size]int
w [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
w[i] = float64(size-i) / float64(size)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
result[b[0]]++
}
for i := 0; i < size-1; i++ {
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
"result array %v must be corresponded to weights %v", result, w)
}
})
t.Run("sortByWeightValueShuffledWeight", func(t *testing.T) {
var (
i uint64
a, b, result [size]int
w [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
w[i] = float64(size-i) / float64(size)
}
rand.Shuffle(size, func(i, j int) {
w[i], w[j] = w[j], w[i]
})
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
result[b[0]]++
}
for i := 0; i < size-1; i++ {
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
"result array %v must be corresponded to weights %v", result, w)
}
})
t.Run("sortByWeightValueEmptyWeight", func(t *testing.T) {
var (
i uint64
a, b [size]int
w [size]float64
counts = make(map[int]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByWeightValueUniformWeight", func(t *testing.T) {
var (
i uint64
a, b [size]int
w [size]float64
counts = make(map[int]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
w[i] = 0.5
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
counts[b[0]]++
}
var chi2 float64
mean := float64(keys) / float64(size)
delta := mean * percent
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) {
const keys = 1
var (
i uint64
a, b [size]int
w [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
}
w[size-1] = 1
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
require.True(t, b[0] == a[size-1],
"expected last value of %v to be the first with highest distance", a)
}
})
t.Run("sortByWeightValueNormalizedWeight", func(t *testing.T) {
var (
i uint64
a, b, result [size]uint64
w, normalizedW [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = i
w[int(i)] = 10
}
w[0] = 100
// Here let's use logarithm normalization
for i = 0; i < size; i++ {
normalizedW[i] = math.Log2(w[i]) / math.Log2(w[0])
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], normalizedW[:], hash)
for j := range b {
result[b[j]] += uint64(len(b) - j)
}
}
cutResult := result[1:]
var total uint64
for i := range cutResult {
total += cutResult[i]
}
var chi2 float64
mean := float64(total) / float64(len(cutResult))
delta := mean * percent
for node, count := range cutResult {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("hash collision", func(t *testing.T) {
var (
i uint64
counts = make(map[uint64]uint64)
key = make([]byte, 16)
)
for i = 0; i < keys; i++ {
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
counts[hash]++
}
for node, count := range counts {
if count > 1 {
t.Errorf("Node %d received %d keys", node, count)
}
}
})
}
func BenchmarkSort_fnv_10(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSort(b, 10, hash)
}
func BenchmarkSort_fnv_100(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSort(b, 100, hash)
}
func BenchmarkSort_fnv_1000(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSort(b, 1000, hash)
}
func BenchmarkSortByIndex_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByIndex(b, 10, hash)
}
func BenchmarkSortByIndex_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByIndex(b, 100, hash)
}
func BenchmarkSortByIndex_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByIndex(b, 1000, hash)
}
func BenchmarkSortByValue_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByValue(b, 10, hash)
}
func BenchmarkSortByValue_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByValue(b, 100, hash)
}
func BenchmarkSortByValue_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByValue(b, 1000, hash)
}
func BenchmarkSortByWeight_fnv_10(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSortByWeight(b, 10, hash)
}
func BenchmarkSortByWeight_fnv_100(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSortByWeight(b, 100, hash)
}
func BenchmarkSortByWeight_fnv_1000(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSortByWeight(b, 1000, hash)
}
func BenchmarkSortByWeightIndex_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightIndex(b, 10, hash)
}
func BenchmarkSortByWeightIndex_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightIndex(b, 100, hash)
}
func BenchmarkSortByWeightIndex_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightIndex(b, 1000, hash)
}
func BenchmarkSortByWeightValue_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightValue(b, 10, hash)
}
func BenchmarkSortByWeightValue_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightValue(b, 100, hash)
}
func BenchmarkSortByWeightValue_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortByWeightValue(b, 1000, hash)
}
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {
servers := make([]uint64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = i
}
b.ResetTimer()
b.ReportAllocs()
var x uint64
for i := 0; i < b.N; i++ {
x += Sort(servers, hash)[0]
}
return x
}
func benchmarkSortByIndex(b *testing.B, n int, hash uint64) {
servers := make([]uint64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = i
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByIndex(servers, hash)
}
}
func benchmarkSortByValue(b *testing.B, n int, hash uint64) {
servers := make([]string, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10)
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByValue(servers, hash)
}
}
func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
servers := make([]uint64, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = i
}
b.ResetTimer()
b.ReportAllocs()
var x uint64
for i := 0; i < b.N; i++ {
x += SortByWeight(servers, weights, hash)[0]
}
return x
}
func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
servers := make([]uint64, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = i
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByWeightIndex(servers, weights, hash)
}
}
func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
servers := make([]string, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10)
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByWeightValue(servers, weights, hash)
}
}