package hrw import ( "encoding/binary" "fmt" "math" "math/rand" "strconv" "testing" "github.com/stretchr/testify/require" ) type ( hashString string unknown byte slices struct { actual interface{} expect interface{} } Uint32Slice []uint32 ) var testKey = []byte("0xff51afd7ed558ccd") func (p Uint32Slice) Len() int { return len(p) } func (p Uint32Slice) Less(i, j int) bool { return p[i] < p[j] } func (p Uint32Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func Example() { // given a set of servers servers := []string{ "one.example.com", "two.example.com", "three.example.com", "four.example.com", "five.example.com", "six.example.com", } // HRW can consistently select a uniformly-distributed set of servers for // any given key var ( key = []byte("/examples/object-key") h = Hash(key) ) SortSliceByValue(servers, h) for id := range servers { fmt.Printf("trying GET %s%s\n", servers[id], key) } // Output: // trying GET three.example.com/examples/object-key // trying GET two.example.com/examples/object-key // trying GET five.example.com/examples/object-key // trying GET six.example.com/examples/object-key // trying GET one.example.com/examples/object-key // trying GET four.example.com/examples/object-key } func (h hashString) Hash() uint64 { return Hash([]byte(h)) } func TestSortSliceByIndex(t *testing.T) { actual := []string{"a", "b", "c", "d", "e", "f"} expect := []string{"e", "a", "c", "f", "d", "b"} hash := Hash(testKey) SortSliceByIndex(actual, hash) require.Equal(t, expect, actual) } func TestValidateWeights(t *testing.T) { weights := []float64{10, 10, 10, 2, 2, 2} err := ValidateWeights(weights) require.Error(t, err) weights = []float64{1, 1, 1, 0.2, 0.2, 0.2} err = ValidateWeights(weights) require.NoError(t, err) } func TestSortSliceByWeightIndex(t *testing.T) { actual := []string{"a", "b", "c", "d", "e", "f"} weights := []float64{1, 1, 1, 0.2, 0.2, 0.2} expect := []string{"a", "c", "b", "e", "f", "d"} hash := Hash(testKey) SortSliceByWeightIndex(actual, weights, hash) require.Equal(t, expect, actual) } func TestSortSliceByValue(t *testing.T) { actual := []string{"a", "b", "c", "d", "e", "f"} expect := []string{"d", "f", "c", "b", "a", "e"} hash := Hash(testKey) SortSliceByValue(actual, hash) require.Equal(t, expect, actual) } func TestSortByRule(t *testing.T) { t.Run("direct", func(t *testing.T) { // 0 1 2 3 4 5 actual := []string{"a", "b", "c", "d", "e", "f"} // 4 2 0 5 3 1 expect := []string{"c", "f", "b", "e", "a", "d"} rule := []uint64{4, 2, 0, 5, 3, 1} sortByRuleDirect( func(i, j int) { actual[i], actual[j] = actual[j], actual[i] }, 6, rule) require.Equal(t, expect, actual) }) t.Run("inverse", func(t *testing.T) { // 0 1 2 3 4 5 actual := []string{"a", "b", "c", "d", "e", "f"} // 4 2 0 5 3 1 expect := []string{"e", "c", "a", "f", "d", "b"} rule := []uint64{4, 2, 0, 5, 3, 1} sortByRuleInverse( func(i, j int) { actual[i], actual[j] = actual[j], actual[i] }, 6, rule) require.Equal(t, expect, actual) }) } func TestSortSliceByValueFail(t *testing.T) { t.Run("empty slice", func(t *testing.T) { var ( actual []int hash = Hash(testKey) ) require.NotPanics(t, func() { SortSliceByValue(actual, hash) }) }) t.Run("must be slice", func(t *testing.T) { actual := 10 hash := Hash(testKey) require.NotPanics(t, func() { SortSliceByValue(actual, hash) }) }) t.Run("must 'fail' for unknown type", func(t *testing.T) { actual := []unknown{1, 2, 3, 4, 5} expect := []unknown{1, 2, 3, 4, 5} hash := Hash(testKey) SortSliceByValue(actual, hash) require.Equal(t, expect, actual) }) } func TestSortSliceByValueHasher(t *testing.T) { actual := []hashString{"a", "b", "c", "d", "e", "f"} expect := []hashString{"d", "f", "c", "b", "a", "e"} hash := Hash(testKey) SortSliceByValue(actual, hash) require.Equal(t, expect, actual) } func TestSortSliceByValueIntSlice(t *testing.T) { cases := []slices{ { actual: []int{0, 1, 2, 3, 4, 5}, expect: []int{2, 0, 5, 3, 1, 4}, }, { actual: []uint{0, 1, 2, 3, 4, 5}, expect: []uint{2, 0, 5, 3, 1, 4}, }, { actual: []int8{0, 1, 2, 3, 4, 5}, expect: []int8{5, 2, 1, 4, 0, 3}, }, { actual: []uint8{0, 1, 2, 3, 4, 5}, expect: []uint8{5, 2, 1, 4, 0, 3}, }, { actual: []int16{0, 1, 2, 3, 4, 5}, expect: []int16{1, 0, 3, 2, 4, 5}, }, { actual: []uint16{0, 1, 2, 3, 4, 5}, expect: []uint16{1, 0, 3, 2, 4, 5}, }, { actual: []int32{0, 1, 2, 3, 4, 5}, expect: []int32{5, 1, 2, 0, 3, 4}, }, { actual: []uint32{0, 1, 2, 3, 4, 5}, expect: []uint32{5, 1, 2, 0, 3, 4}, }, { actual: Uint32Slice{0, 1, 2, 3, 4, 5}, expect: Uint32Slice{0, 1, 2, 3, 4, 5}, }, { actual: []int64{0, 1, 2, 3, 4, 5}, expect: []int64{5, 3, 0, 1, 4, 2}, }, { actual: []uint64{0, 1, 2, 3, 4, 5}, expect: []uint64{5, 3, 0, 1, 4, 2}, }, } hash := Hash(testKey) for _, tc := range cases { SortSliceByValue(tc.actual, hash) require.Equal(t, tc.expect, tc.actual) } } func TestSort(t *testing.T) { nodes := []uint64{1, 2, 3, 4, 5} hash := Hash(testKey) actual := Sort(nodes, hash) expected := []uint64{3, 1, 4, 2, 0} require.Equal(t, expected, actual) } func TestDistribution(t *testing.T) { const ( size = 10 keys = 100000 percent = 0.03 ) // We use χ2 method to determine similarity of distribution with uniform distribution. // χ2 = Σ((n-N)**2/N) // https://www.medcalc.org/manual/chi-square-table.php p=0.1 var chiTable = map[int]float64{9: 14.68, 99: 117.407} t.Run("sort", func(t *testing.T) { var ( i uint64 nodes [size]uint64 counts = make(map[uint64]uint64, size) key = make([]byte, 16) ) for i = 0; i < size; i++ { nodes[i] = i } for i = 0; i < keys; i++ { binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) counts[Sort(nodes[:], hash)[0]]++ } var chi2 float64 mean := float64(keys) / float64(size) delta := mean * percent for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean require.True(t, d < delta && (0-d) < delta, "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } require.True(t, chi2 < chiTable[size-1], "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByIndex", func(t *testing.T) { var ( i uint64 a, b [size]uint64 counts = make(map[uint64]int, size) key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = i } for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByIndex(b[:], hash) counts[b[0]]++ } var chi2 float64 mean := float64(keys) / float64(size) delta := mean * percent for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean require.True(t, d < delta && (0-d) < delta, "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } require.True(t, chi2 < chiTable[size-1], "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByValue", func(t *testing.T) { var ( i uint64 a, b [size]int counts = make(map[int]int, size) key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) } for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByValue(b[:], hash) counts[b[0]]++ } var chi2 float64 mean := float64(keys) / float64(size) delta := mean * percent for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean require.True(t, d < delta && (0-d) < delta, "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } require.True(t, chi2 < chiTable[size-1], "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByStringValue", func(t *testing.T) { var ( i uint64 a, b [size]string counts = make(map[string]int, size) key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = strconv.FormatUint(i, 10) } for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByValue(b[:], hash) counts[b[0]]++ } var chi2 float64 mean := float64(keys) / float64(size) delta := mean * percent for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean require.True(t, d < delta && (0-d) < delta, "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } require.True(t, chi2 < chiTable[size-1], "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByInt32Value", func(t *testing.T) { var ( i uint64 a, b [size]int32 counts = make(map[int32]int, size) key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int32(i) } for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByValue(b[:], hash) counts[b[0]]++ } var chi2 float64 mean := float64(keys) / float64(size) delta := mean * percent for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean require.True(t, d < delta && (0-d) < delta, "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } require.True(t, chi2 < chiTable[size-1], "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByWeightValue", func(t *testing.T) { var ( i uint64 a, b, result [size]int w [size]float64 key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) w[i] = float64(size-i) / float64(size) } for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByWeightValue(b[:], w[:], hash) result[b[0]]++ } for i := 0; i < size-1; i++ { require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]), "result array %v must be corresponded to weights %v", result, w) } }) t.Run("sortByWeightValueShuffledWeight", func(t *testing.T) { var ( i uint64 a, b, result [size]int w [size]float64 key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) w[i] = float64(size-i) / float64(size) } rand.Shuffle(size, func(i, j int) { w[i], w[j] = w[j], w[i] }) for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByWeightValue(b[:], w[:], hash) result[b[0]]++ } for i := 0; i < size-1; i++ { require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]), "result array %v must be corresponded to weights %v", result, w) } }) t.Run("sortByWeightValueEmptyWeight", func(t *testing.T) { var ( i uint64 a, b [size]int w [size]float64 counts = make(map[int]int, size) key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) } for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByWeightValue(b[:], w[:], hash) counts[b[0]]++ } var chi2 float64 mean := float64(keys) / float64(size) delta := mean * percent for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean require.True(t, d < delta && (0-d) < delta, "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } require.True(t, chi2 < chiTable[size-1], "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByWeightValueUniformWeight", func(t *testing.T) { var ( i uint64 a, b [size]int w [size]float64 counts = make(map[int]int, size) key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) w[i] = 0.5 } for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByWeightValue(b[:], w[:], hash) counts[b[0]]++ } var chi2 float64 mean := float64(keys) / float64(size) delta := mean * percent for node, count := range counts { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean require.True(t, d < delta && (0-d) < delta, "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } require.True(t, chi2 < chiTable[size-1], "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) { const keys = 1 var ( i uint64 a, b [size]int w [size]float64 key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = int(i) } w[size-1] = 1 for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByWeightValue(b[:], w[:], hash) require.True(t, b[0] == a[size-1], "expected last value of %v to be the first with highest distance", a) } }) t.Run("sortByWeightValueNormalizedWeight", func(t *testing.T) { var ( i uint64 a, b, result [size]uint64 w, normalizedW [size]float64 key = make([]byte, 16) ) for i = 0; i < size; i++ { a[i] = i w[int(i)] = 10 } w[0] = 100 // Here let's use logarithm normalization for i = 0; i < size; i++ { normalizedW[i] = math.Log2(w[i]) / math.Log2(w[0]) } for i = 0; i < keys; i++ { copy(b[:], a[:]) binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) SortSliceByWeightValue(b[:], normalizedW[:], hash) for j := range b { result[b[j]] += uint64(len(b) - j) } } cutResult := result[1:] var total uint64 for i := range cutResult { total += cutResult[i] } var chi2 float64 mean := float64(total) / float64(len(cutResult)) delta := mean * percent for node, count := range cutResult { d := mean - float64(count) chi2 += math.Pow(float64(count)-mean, 2) / mean require.True(t, d < delta && (0-d) < delta, "Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta) } require.True(t, chi2 < chiTable[size-1], "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1]) }) t.Run("hash collision", func(t *testing.T) { var ( i uint64 counts = make(map[uint64]uint64) key = make([]byte, 16) ) for i = 0; i < keys; i++ { binary.BigEndian.PutUint64(key, i+size) hash := Hash(key) counts[hash]++ } for node, count := range counts { if count > 1 { t.Errorf("Node %d received %d keys", node, count) } } }) } func BenchmarkSort_fnv_10(b *testing.B) { hash := Hash(testKey) _ = benchmarkSort(b, 10, hash) } func BenchmarkSort_fnv_100(b *testing.B) { hash := Hash(testKey) _ = benchmarkSort(b, 100, hash) } func BenchmarkSort_fnv_1000(b *testing.B) { hash := Hash(testKey) _ = benchmarkSort(b, 1000, hash) } func BenchmarkSortByIndex_fnv_10(b *testing.B) { hash := Hash(testKey) benchmarkSortByIndex(b, 10, hash) } func BenchmarkSortByIndex_fnv_100(b *testing.B) { hash := Hash(testKey) benchmarkSortByIndex(b, 100, hash) } func BenchmarkSortByIndex_fnv_1000(b *testing.B) { hash := Hash(testKey) benchmarkSortByIndex(b, 1000, hash) } func BenchmarkSortByValue_fnv_10(b *testing.B) { hash := Hash(testKey) benchmarkSortByValue(b, 10, hash) } func BenchmarkSortByValue_fnv_100(b *testing.B) { hash := Hash(testKey) benchmarkSortByValue(b, 100, hash) } func BenchmarkSortByValue_fnv_1000(b *testing.B) { hash := Hash(testKey) benchmarkSortByValue(b, 1000, hash) } func BenchmarkSortByWeight_fnv_10(b *testing.B) { hash := Hash(testKey) _ = benchmarkSortByWeight(b, 10, hash) } func BenchmarkSortByWeight_fnv_100(b *testing.B) { hash := Hash(testKey) _ = benchmarkSortByWeight(b, 100, hash) } func BenchmarkSortByWeight_fnv_1000(b *testing.B) { hash := Hash(testKey) _ = benchmarkSortByWeight(b, 1000, hash) } func BenchmarkSortByWeightIndex_fnv_10(b *testing.B) { hash := Hash(testKey) benchmarkSortByWeightIndex(b, 10, hash) } func BenchmarkSortByWeightIndex_fnv_100(b *testing.B) { hash := Hash(testKey) benchmarkSortByWeightIndex(b, 100, hash) } func BenchmarkSortByWeightIndex_fnv_1000(b *testing.B) { hash := Hash(testKey) benchmarkSortByWeightIndex(b, 1000, hash) } func BenchmarkSortByWeightValue_fnv_10(b *testing.B) { hash := Hash(testKey) benchmarkSortByWeightValue(b, 10, hash) } func BenchmarkSortByWeightValue_fnv_100(b *testing.B) { hash := Hash(testKey) benchmarkSortByWeightValue(b, 100, hash) } func BenchmarkSortByWeightValue_fnv_1000(b *testing.B) { hash := Hash(testKey) benchmarkSortByWeightValue(b, 1000, hash) } func benchmarkSort(b *testing.B, n int, hash uint64) uint64 { servers := make([]uint64, n) for i := uint64(0); i < uint64(len(servers)); i++ { servers[i] = i } b.ResetTimer() b.ReportAllocs() var x uint64 for i := 0; i < b.N; i++ { x += Sort(servers, hash)[0] } return x } func benchmarkSortByIndex(b *testing.B, n int, hash uint64) { servers := make([]uint64, n) for i := uint64(0); i < uint64(len(servers)); i++ { servers[i] = i } b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { SortSliceByIndex(servers, hash) } } func benchmarkSortByValue(b *testing.B, n int, hash uint64) { servers := make([]string, n) for i := uint64(0); i < uint64(len(servers)); i++ { servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10) } b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { SortSliceByValue(servers, hash) } } func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 { servers := make([]uint64, n) weights := make([]float64, n) for i := uint64(0); i < uint64(len(servers)); i++ { weights[i] = float64(uint64(n)-i) / float64(n) servers[i] = i } b.ResetTimer() b.ReportAllocs() var x uint64 for i := 0; i < b.N; i++ { x += SortByWeight(servers, weights, hash)[0] } return x } func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) { servers := make([]uint64, n) weights := make([]float64, n) for i := uint64(0); i < uint64(len(servers)); i++ { weights[i] = float64(uint64(n)-i) / float64(n) servers[i] = i } b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { SortSliceByWeightIndex(servers, weights, hash) } } func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) { servers := make([]string, n) weights := make([]float64, n) for i := uint64(0); i < uint64(len(servers)); i++ { weights[i] = float64(uint64(n)-i) / float64(n) servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10) } b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { SortSliceByWeightValue(servers, weights, hash) } }