From e5d4b478811c6fd76fb10e2d2605386a002d6d9f Mon Sep 17 00:00:00 2001 From: Evgeniy Kulikov Date: Thu, 31 Jan 2019 12:54:02 +0300 Subject: [PATCH] Work around Hashing and tests (#2) * Work around Hashing and tests * fixes * add TravisCI * fix lint * so slow * add badges * work on README * Improved tests and get rid of fnv (#3) --- .travis.yml | 16 +++ README.md | 57 +++++++- go.mod | 2 +- go.sum | 4 +- hrw.go | 65 +++++---- hrw_test.go | 393 +++++++++++++++++++++++++++++++++++----------------- 6 files changed, 376 insertions(+), 161 deletions(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8ea4227 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,16 @@ +language: go +go: + - 1.11.x +env: + - GO111MODULE=on +install: + - go get -v golang.org/x/lint/golint + - go mod tidy -v +script: + - golint -set_exit_status ./... + - go test -race -coverprofile=coverage.txt -covermode=atomic ./... +after_success: + - bash <(curl -s https://codecov.io/bash) +matrix: + allow_failures: + - go: tip \ No newline at end of file diff --git a/README.md b/README.md index 6e1046b..9329da3 100644 --- a/README.md +++ b/README.md @@ -1 +1,56 @@ -# Golang simple HRW implementation +# Golang HRW implementation + +[![Build Status](https://travis-ci.org/im-kulikov/hrw.svg?branch=master)](https://travis-ci.org/im-kulikov/hrw) +[![codecov](https://codecov.io/gh/im-kulikov/hrw/badge.svg)](https://codecov.io/gh/im-kulikov/hrw) +[![Report](https://goreportcard.com/badge/github.com/im-kulikov/hrw)](https://goreportcard.com/report/github.com/im-kulikov/hrw) +[![GitHub release](https://img.shields.io/github/release/im-kulikov/hrw.svg)](https://github.com/im-kulikov/hrw) + +[Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method. + +## Install + +`go get github.com/im-kulikov/hrw` + +## Example + +```go +package main + +import ( + "fmt" + + "github.com/im-kulikov/hrw" +) + +func main() { + // given a set of servers + servers := []string{ + "one.example.com", + "two.example.com", + "three.example.com", + "four.example.com", + "five.example.com", + "six.example.com", + } + + // HRW can consistently select a uniformly-distributed set of servers for + // any given key + var ( + key = []byte("/examples/object-key") + h = hrw.Hash(key) + ) + + hrw.SortSliceByValue(servers, h) + for id := range servers { + fmt.Printf("trying GET %s%s\n", servers[id], key) + } + + // Output: + // trying GET four.example.com/examples/object-key + // trying GET three.example.com/examples/object-key + // trying GET one.example.com/examples/object-key + // trying GET two.example.com/examples/object-key + // trying GET six.example.com/examples/object-key + // trying GET five.example.com/examples/object-key +} +``` \ No newline at end of file diff --git a/go.mod b/go.mod index 86e5730..81c57be 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/im-kulikov/hrw -require github.com/reusee/mmh3 v0.0.0-20140820141314-64b85163255b +require github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 diff --git a/go.sum b/go.sum index 9d1de9d..29ee515 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,2 @@ -github.com/reusee/mmh3 v0.0.0-20140820141314-64b85163255b h1:GQkEnyBFqzQXb3RFqGt5z2QcBZJVQxgzXKF/sPCFh7w= -github.com/reusee/mmh3 v0.0.0-20140820141314-64b85163255b/go.mod h1:ADBBIMrt68BC/v967NyoiPZMwPVq44r8QJ5oRyXJHJs= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= diff --git a/hrw.go b/hrw.go index 786b0a7..05744ef 100644 --- a/hrw.go +++ b/hrw.go @@ -3,16 +3,17 @@ package hrw import ( - "errors" - "hash/fnv" + "encoding/binary" "reflect" "sort" - "strconv" + + "github.com/spaolacci/murmur3" ) type ( swapper func(i, j int) + // Hasher interface used by SortSliceByValue Hasher interface{ Hash() uint64 } hashed struct { @@ -38,6 +39,12 @@ func (h hashed) Len() int { return h.length } func (h hashed) Less(i, j int) bool { return h.weight[h.sorted[i]] < h.weight[h.sorted[j]] } func (h hashed) Swap(i, j int) { h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i] } +// Hash uses murmur3 hash to return uint64 +func Hash(key []byte) uint64 { + return murmur3.Sum64(key) +} + +// SortByWeight receive nodes and hash, and sort it by weight func SortByWeight(nodes []uint64, hash uint64) []uint64 { var ( l = len(nodes) @@ -57,10 +64,11 @@ func SortByWeight(nodes []uint64, hash uint64) []uint64 { return h.sorted } -func SortSliceByValue(slice interface{}, hash uint64) error { +// SortSliceByValue received []T and hash to sort by value-weight +func SortSliceByValue(slice interface{}, hash uint64) { t := reflect.TypeOf(slice) if t.Kind() != reflect.Slice { - return errors.New("must be slice") + return } var ( @@ -71,29 +79,24 @@ func SortSliceByValue(slice interface{}, hash uint64) error { ) if length == 0 { - return nil + return } switch slice := slice.(type) { case []int: - hasher := fnv.New64() + var key = make([]byte, 16) for i := 0; i < length; i++ { - hasher.Reset() - // error always nil - _, _ = hasher.Write([]byte(strconv.Itoa(slice[i]))) - rule = append(rule, weight(hash, hasher.Sum64())) + binary.BigEndian.PutUint64(key, uint64(slice[i])) + rule = append(rule, weight(Hash(key), hash)) } case []string: - hasher := fnv.New64() for i := 0; i < length; i++ { - hasher.Reset() - // error always nil - _, _ = hasher.Write([]byte(slice[i])) - rule = append(rule, weight(hash, hasher.Sum64())) + rule = append(rule, weight(hash, + Hash([]byte(slice[i])))) } default: if _, ok := val.Index(0).Interface().(Hasher); !ok { - return errors.New("unknown type") + return } for i := 0; i < length; i++ { @@ -103,36 +106,44 @@ func SortSliceByValue(slice interface{}, hash uint64) error { } rule = SortByWeight(rule, hash) - sortByRule(swap, uint64(length), rule) - - return nil + sortByRuleInverse(swap, uint64(length), rule) } +// SortSliceByIndex received []T and hash to sort by index-weight func SortSliceByIndex(slice interface{}, hash uint64) { length := uint64(reflect.ValueOf(slice).Len()) swap := reflect.Swapper(slice) - rule := make([]uint64, 0, length) for i := uint64(0); i < length; i++ { rule = append(rule, i) } - rule = SortByWeight(rule, hash) - sortByRule(swap, length, rule) + sortByRuleInverse(swap, length, rule) } -func sortByRule(swap swapper, length uint64, rule []uint64) { +func sortByRuleDirect(swap swapper, length uint64, rule []uint64) { done := make([]bool, length) for i := uint64(0); i < length; i++ { if done[i] { continue } - - done[i] = true - for j := rule[i]; !done[rule[j]]; j = rule[j] { swap(int(i), int(j)) done[j] = true } } } + +func sortByRuleInverse(swap swapper, length uint64, rule []uint64) { + done := make([]bool, length) + for i := uint64(0); i < length; i++ { + if done[i] { + continue + } + + for j := i; !done[rule[j]]; j = rule[j] { + swap(int(j), int(rule[j])) + done[j] = true + } + } +} diff --git a/hrw_test.go b/hrw_test.go index 2da9cf1..2a8e00d 100644 --- a/hrw_test.go +++ b/hrw_test.go @@ -3,18 +3,15 @@ package hrw import ( "encoding/binary" "fmt" - "hash/fnv" "math" "reflect" "strconv" "testing" - - "github.com/reusee/mmh3" ) type hashString string -var testKey = []byte("Golang simple HRW implementation") +var testKey = []byte("0xff51afd7ed558ccd") func Example() { // given a set of servers @@ -31,64 +28,30 @@ func Example() { // any given key var ( key = []byte("/examples/object-key") - h = hash(key) - err = SortSliceByValue(servers, h) + h = Hash(key) ) - if err != nil { - panic(err) - } - + SortSliceByValue(servers, h) for id := range servers { fmt.Printf("trying GET %s%s\n", servers[id], key) } // Output: - // trying GET six.example.com/examples/object-key - // trying GET one.example.com/examples/object-key - // trying GET three.example.com/examples/object-key // trying GET four.example.com/examples/object-key - // trying GET five.example.com/examples/object-key + // trying GET three.example.com/examples/object-key + // trying GET one.example.com/examples/object-key // trying GET two.example.com/examples/object-key + // trying GET six.example.com/examples/object-key + // trying GET five.example.com/examples/object-key } func (h hashString) Hash() uint64 { - hs := fnv.New64() - // error always nil - _, _ = hs.Write([]byte(h)) - return hs.Sum64() >> 1 -} - -func hash(key []byte) uint64 { - h := fnv.New64() - // error always nil - _, _ = h.Write(key) - return (h.Sum64() >> 1) ^ math.MaxUint64 -} - -func mur3hash(key []byte) uint64 { - h := mmh3.New128() - // error always nil - _, _ = h.Write(key) - - var ( - data = h.Sum(nil) - length = len(data) - result uint64 - ) - - for i := 0; i < length; i++ { - result += uint64(data[i]) << uint64(length-i) - } - - return result + return Hash([]byte(h)) } func TestSortSliceByIndex(t *testing.T) { actual := []string{"a", "b", "c", "d", "e", "f"} - expect := []string{"e", "a", "c", "d", "b", "f"} - - hash := hash(testKey) - + expect := []string{"e", "a", "c", "f", "d", "b"} + hash := Hash(testKey) SortSliceByIndex(actual, hash) if !reflect.DeepEqual(actual, expect) { t.Errorf("Was %#v, but expected %#v", actual, expect) @@ -97,60 +60,79 @@ func TestSortSliceByIndex(t *testing.T) { func TestSortSliceByValue(t *testing.T) { actual := []string{"a", "b", "c", "d", "e", "f"} - expect := []string{"e", "b", "c", "d", "f", "a"} - - hash := hash(testKey) - - if err := SortSliceByValue(actual, hash); err != nil { - t.Fatal(err) - } - + expect := []string{"d", "b", "a", "f", "c", "e"} + hash := Hash(testKey) + SortSliceByValue(actual, hash) if !reflect.DeepEqual(actual, expect) { t.Errorf("Was %#v, but expected %#v", actual, expect) } } +func TestSortByRule(t *testing.T) { + t.Run("direct", func(t *testing.T) { + // 0 1 2 3 4 5 + actual := []string{"a", "b", "c", "d", "e", "f"} + // 4 2 0 5 3 1 + expect := []string{"c", "f", "b", "e", "a", "d"} + rule := []uint64{4, 2, 0, 5, 3, 1} + + sortByRuleDirect( + func(i, j int) { actual[i], actual[j] = actual[j], actual[i] }, + 6, rule) + + if !reflect.DeepEqual(actual, expect) { + t.Errorf("Was %#v, but expected %#v", actual, expect) + } + }) + + t.Run("inverse", func(t *testing.T) { + // 0 1 2 3 4 5 + actual := []string{"a", "b", "c", "d", "e", "f"} + // 4 2 0 5 3 1 + expect := []string{"e", "c", "a", "f", "d", "b"} + rule := []uint64{4, 2, 0, 5, 3, 1} + + sortByRuleInverse( + func(i, j int) { actual[i], actual[j] = actual[j], actual[i] }, + 6, rule) + + if !reflect.DeepEqual(actual, expect) { + t.Errorf("Was %#v, but expected %#v", actual, expect) + } + }) +} + func TestSortSliceByValueFail(t *testing.T) { t.Run("empty slice", func(t *testing.T) { - actual := make([]int, 0) - hash := hash(testKey) - - if err := SortSliceByValue(actual, hash); err != nil { - t.Fatal(err) - } - + var ( + actual []int + hash = Hash(testKey) + ) + SortSliceByValue(actual, hash) }) t.Run("must be slice", func(t *testing.T) { actual := 10 - hash := hash(testKey) - - if err := SortSliceByValue(actual, hash); err == nil { - t.Fatal("must fail for bad type") - } - + hash := Hash(testKey) + SortSliceByValue(actual, hash) }) - t.Run("must fail for unknown type", func(t *testing.T) { + t.Run("must 'fail' for unknown type", func(t *testing.T) { actual := []byte{1, 2, 3, 4, 5} - hash := hash(testKey) - - if err := SortSliceByValue(actual, hash); err == nil { - t.Fatal("must fail for bad type") + expect := []byte{1, 2, 3, 4, 5} + hash := Hash(testKey) + SortSliceByValue(actual, hash) + if !reflect.DeepEqual(actual, expect) { + t.Errorf("Was %#v, but expected %#v", actual, expect) } }) } func TestSortSliceByValueHasher(t *testing.T) { actual := []hashString{"a", "b", "c", "d", "e", "f"} - expect := []hashString{"e", "d", "c", "a", "b", "f"} - - hash := hash(testKey) - - if err := SortSliceByValue(actual, hash); err != nil { - t.Fatal(err) - } - + expect := []hashString{"d", "b", "a", "f", "c", "e"} + hash := Hash(testKey) + SortSliceByValue(actual, hash) if !reflect.DeepEqual(actual, expect) { t.Errorf("Was %#v, but expected %#v", actual, expect) } @@ -158,14 +140,9 @@ func TestSortSliceByValueHasher(t *testing.T) { func TestSortSliceByValueIntSlice(t *testing.T) { actual := []int{0, 1, 2, 3, 4, 5} - expect := []int{2, 0, 5, 3, 4, 1} - - hash := hash(testKey) - - if err := SortSliceByValue(actual, hash); err != nil { - t.Fatal(err) - } - + expect := []int{2, 3, 1, 4, 0, 5} + hash := Hash(testKey) + SortSliceByValue(actual, hash) if !reflect.DeepEqual(actual, expect) { t.Errorf("Was %#v, but expected %#v", actual, expect) } @@ -173,90 +150,248 @@ func TestSortSliceByValueIntSlice(t *testing.T) { func TestSortByWeight(t *testing.T) { nodes := []uint64{1, 2, 3, 4, 5} - hash := mur3hash(testKey) - + hash := Hash(testKey) actual := SortByWeight(nodes, hash) - expected := []uint64{0, 1, 4, 2, 3} + expected := []uint64{3, 1, 4, 2, 0} if !reflect.DeepEqual(actual, expected) { t.Errorf("Was %#v, but expected %#v", actual, expected) } } func TestUniformDistribution(t *testing.T) { - var ( - i uint64 - size = uint64(10) - nodes = make([]uint64, 0, size) - counts = make(map[uint64]uint64) - key = make([]byte, 16) - keys = uint64(10000000) + const ( + size = 10 + keys = 100000 + percent = 0.03 ) + // We use χ2 method to determine similarity of distribution with uniform distribution. + // χ2 = Σ((n-N)**2/N) + // https://www.medcalc.org/manual/chi-square-table.php p=0.1 + var chiTable = map[int]float64{9: 14.68, 99: 117.407} - for i = 0; i < size; i++ { - nodes = append(nodes, i) - } + t.Run("sortByWeight", func(t *testing.T) { + var ( + i uint64 + nodes [size]uint64 + counts = make(map[uint64]uint64, size) + key = make([]byte, 16) + ) - for i = 0; i < keys; i++ { - binary.BigEndian.PutUint64(key, i) - hash := hash(key) - counts[SortByWeight(nodes, hash)[0]]++ - } - - mean := float64(keys) / float64(len(nodes)) - delta := mean * 0.01 // 1 % - for node, count := range counts { - d := mean - float64(count) - if d > delta || (0-d) > delta { - t.Errorf( - "Node %d received %d keys, expected %v (+/- %v)", - node, count, mean, delta, - ) + for i = 0; i < size; i++ { + nodes[i] = i } - } + + for i = 0; i < keys; i++ { + binary.BigEndian.PutUint64(key, i) + hash := Hash(key) + counts[SortByWeight(nodes[:], hash)[0]]++ + } + + var chi2 float64 + mean := float64(keys) / float64(size) + delta := mean * percent + for node, count := range counts { + d := mean - float64(count) + chi2 += math.Pow(float64(count)-mean, 2) / mean + if d > delta || (0-d) > delta { + t.Errorf( + "Node %d received %d keys, expected %.0f (+/- %.2f)", + node, count, mean, delta, + ) + } + } + if chi2 > chiTable[size-1] { + t.Errorf( + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", + chi2, chiTable[size-1]) + } + }) + + t.Run("sortByIndex", func(t *testing.T) { + var ( + i uint64 + a, b [size]uint64 + counts = make(map[uint64]int, size) + key = make([]byte, 16) + ) + + for i = 0; i < size; i++ { + a[i] = i + } + + for i = 0; i < keys; i++ { + copy(b[:], a[:]) + + binary.BigEndian.PutUint64(key, i) + hash := Hash(key) + SortSliceByIndex(b[:], hash) + counts[b[0]]++ + } + + var chi2 float64 + mean := float64(keys) / float64(size) + delta := mean * percent + for node, count := range counts { + d := mean - float64(count) + chi2 += math.Pow(float64(count)-mean, 2) / mean + if d > delta || (0-d) > delta { + t.Errorf( + "Node %d received %d keys, expected %.0f (+/- %.2f)", + node, count, mean, delta, + ) + } + } + if chi2 > chiTable[size-1] { + t.Errorf( + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", + chi2, chiTable[size-1]) + } + }) + + t.Run("sortByValue", func(t *testing.T) { + var ( + i uint64 + a, b [size]int + counts = make(map[int]int, size) + key = make([]byte, 16) + ) + + for i = 0; i < size; i++ { + a[i] = int(i) + } + + for i = 0; i < keys; i++ { + copy(b[:], a[:]) + binary.BigEndian.PutUint64(key, i) + hash := Hash(key) + SortSliceByValue(b[:], hash) + counts[b[0]]++ + } + + var chi2 float64 + mean := float64(keys) / float64(size) + delta := mean * percent + for node, count := range counts { + d := mean - float64(count) + chi2 += math.Pow(float64(count)-mean, 2) / mean + if d > delta || (0-d) > delta { + t.Errorf( + "Node %d received %d keys, expected %.0f (+/- %.2f)", + node, count, mean, delta, + ) + } + } + if chi2 > chiTable[size-1] { + t.Errorf( + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", + chi2, chiTable[size-1]) + } + }) + + t.Run("sortByStringValue", func(t *testing.T) { + var ( + i uint64 + a, b [size]string + counts = make(map[string]int, size) + key = make([]byte, 16) + ) + + for i = 0; i < size; i++ { + a[i] = strconv.FormatUint(i, 10) + } + + for i = 0; i < keys; i++ { + copy(b[:], a[:]) + binary.BigEndian.PutUint64(key, i) + hash := Hash(key) + SortSliceByValue(b[:], hash) + counts[b[0]]++ + } + + var chi2 float64 + mean := float64(keys) / float64(size) + delta := mean * percent + for node, count := range counts { + d := mean - float64(count) + chi2 += math.Pow(float64(count)-mean, 2) / mean + if d > delta || (0-d) > delta { + t.Errorf( + "Node %s received %d keys, expected %.0f (+/- %.2f)", + node, count, mean, delta, + ) + } + } + if chi2 > chiTable[size-1] { + t.Errorf( + "Chi2 condition for .9 is not met (expected %.2f <= %.2f)", + chi2, chiTable[size-1]) + } + + }) + + t.Run("hash collision", func(t *testing.T) { + var ( + i uint64 + counts = make(map[uint64]uint64) + key = make([]byte, 16) + ) + + for i = 0; i < keys; i++ { + binary.BigEndian.PutUint64(key, i) + hash := Hash(key) + counts[hash]++ + } + + for node, count := range counts { + if count > 1 { + t.Errorf("Node %d received %d keys", node, count) + } + } + }) } func BenchmarkSortByWeight_fnv_10(b *testing.B) { - hash := hash(testKey) + hash := Hash(testKey) _ = benchmarkSortByWeight(b, 10, hash) } func BenchmarkSortByWeight_fnv_100(b *testing.B) { - hash := hash(testKey) + hash := Hash(testKey) _ = benchmarkSortByWeight(b, 100, hash) } func BenchmarkSortByWeight_fnv_1000(b *testing.B) { - hash := hash(testKey) + hash := Hash(testKey) _ = benchmarkSortByWeight(b, 1000, hash) } func BenchmarkSortByIndex_fnv_10(b *testing.B) { - hash := hash(testKey) + hash := Hash(testKey) benchmarkSortByIndex(b, 10, hash) } func BenchmarkSortByIndex_fnv_100(b *testing.B) { - hash := hash(testKey) + hash := Hash(testKey) benchmarkSortByIndex(b, 100, hash) } func BenchmarkSortByIndex_fnv_1000(b *testing.B) { - hash := hash(testKey) + hash := Hash(testKey) benchmarkSortByIndex(b, 1000, hash) } func BenchmarkSortByValue_fnv_10(b *testing.B) { - hash := hash(testKey) + hash := Hash(testKey) benchmarkSortByValue(b, 10, hash) } func BenchmarkSortByValue_fnv_100(b *testing.B) { - hash := hash(testKey) + hash := Hash(testKey) benchmarkSortByValue(b, 100, hash) } func BenchmarkSortByValue_fnv_1000(b *testing.B) { - hash := hash(testKey) + hash := Hash(testKey) benchmarkSortByValue(b, 1000, hash) } @@ -300,8 +435,6 @@ func benchmarkSortByValue(b *testing.B, n int, hash uint64) { b.ReportAllocs() for i := 0; i < b.N; i++ { - if err := SortSliceByValue(servers, hash); err != nil { - b.Fatal(err) - } + SortSliceByValue(servers, hash) } }