diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ac07c98 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea +.vscode +*.out \ No newline at end of file diff --git a/README.md b/README.md index c2ab52a..6e1046b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1 @@ -# hrw -Golang simple HRW implementation +# Golang simple HRW implementation diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..86e5730 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/im-kulikov/hrw + +require github.com/reusee/mmh3 v0.0.0-20140820141314-64b85163255b diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..9d1de9d --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/reusee/mmh3 v0.0.0-20140820141314-64b85163255b h1:GQkEnyBFqzQXb3RFqGt5z2QcBZJVQxgzXKF/sPCFh7w= +github.com/reusee/mmh3 v0.0.0-20140820141314-64b85163255b/go.mod h1:ADBBIMrt68BC/v967NyoiPZMwPVq44r8QJ5oRyXJHJs= diff --git a/hrw.go b/hrw.go new file mode 100644 index 0000000..550b8b5 --- /dev/null +++ b/hrw.go @@ -0,0 +1,138 @@ +// Package hrw implements Rendezvous hashing. +// http://en.wikipedia.org/wiki/Rendezvous_hashing. +package hrw + +import ( + "errors" + "hash/fnv" + "reflect" + "sort" + "strconv" +) + +type ( + swapper func(i, j int) + + Hasher interface{ Hash() uint64 } + + hashed struct { + length int + sorted []uint64 + weight []uint64 + } +) + +const m64 = 18446744073709551615 // modulus (2**64-1) + +func weight(x uint64, y uint64) uint64 { + acc := x ^ y + acc ^= acc >> 33 + acc = (acc * 0xff51afd7ed558ccd) % m64 + acc ^= acc >> 33 + acc = (acc * 0xc4ceb9fe1a85ec53) % m64 + acc ^= acc >> 33 + return acc +} + +func (h hashed) Len() int { return h.length } +func (h hashed) Less(i, j int) bool { return h.weight[h.sorted[i]] < h.weight[h.sorted[j]] } +func (h hashed) Swap(i, j int) { h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i] } + +func SortByWeight(nodes []uint64, hash uint64) []uint64 { + var ( + l = len(nodes) + h = hashed{ + length: l, + sorted: make([]uint64, 0, l), + weight: make([]uint64, 0, l), + } + ) + + for i, node := range nodes { + h.sorted = append(h.sorted, uint64(i)) + h.weight = append(h.weight, weight(node, hash)) + } + + sort.Sort(h) + return h.sorted +} + +func SortSliceByValue(slice interface{}, hash uint64) error { + t := reflect.TypeOf(slice) + if t.Kind() != reflect.Slice { + return errors.New("must be slice") + } + + var ( + val = reflect.ValueOf(slice) + swap = reflect.Swapper(slice) + length = val.Len() + rule = make([]uint64, 0, length) + ) + + if length == 0 { + return nil + } + + switch slice := slice.(type) { + case []int: + hasher := fnv.New64() + for i := 0; i < length; i++ { + hasher.Reset() + // error always nil + _, _ = hasher.Write([]byte(strconv.Itoa(slice[i]))) + rule = append(rule, weight(hash, hasher.Sum64())) + } + case []string: + hasher := fnv.New64() + for i := 0; i < length; i++ { + hasher.Reset() + // error always nil + _, _ = hasher.Write([]byte(slice[i])) + rule = append(rule, weight(hash, hasher.Sum64())) + } + default: + if _, ok := val.Index(0).Interface().(Hasher); !ok { + return errors.New("unknown type") + } + + for i := 0; i < length; i++ { + h := val.Index(i).Interface().(Hasher) + rule = append(rule, weight(hash, h.Hash())) + } + } + + rule = SortByWeight(rule, hash) + sortByRule(swap, uint64(length), rule) + + return nil +} + +func SortSliceByIndex(slice interface{}, hash uint64) { + length := uint64(reflect.ValueOf(slice).Len()) + swap := reflect.Swapper(slice) + + rule := make([]uint64, 0, length) + for i := uint64(0); i < length; i++ { + rule = append(rule, i) + } + + rule = SortByWeight(rule, hash) + sortByRule(swap, length, rule) +} + +func sortByRule(swap swapper, length uint64, rule []uint64) { + done := make([]bool, length) + for i := uint64(0); i < length; i++ { + if done[i] { + continue + } + + done[i] = true + + for j := rule[i]; !done[rule[j]]; j = rule[j] { + swap(int(i), int(j)) + done[j] = true + } + } +} diff --git a/hrw.test b/hrw.test new file mode 100755 index 0000000..42f2f30 Binary files /dev/null and b/hrw.test differ diff --git a/hrw_test.go b/hrw_test.go new file mode 100644 index 0000000..48c08e4 --- /dev/null +++ b/hrw_test.go @@ -0,0 +1,306 @@ +package hrw + +import ( + "encoding/binary" + "fmt" + "hash/fnv" + "reflect" + "strconv" + "testing" + + "github.com/reusee/mmh3" +) + +type hashString string + +var testKey = []byte("Golang simple HRW implementation") + +func Example() { + // given a set of servers + servers := []string{ + "one.example.com", + "two.example.com", + "three.example.com", + "four.example.com", + "five.example.com", + "six.example.com", + } + + // HRW can consistently select a uniformly-distributed set of servers for + // any given key + var ( + key = []byte("/examples/object-key") + h = hash(key) + err = SortSliceByValue(servers, h) + ) + + if err != nil { + panic(err) + } + + for id := range servers { + fmt.Printf("trying GET %s%s\n", servers[id], key) + } + + // Output: + // trying GET six.example.com/examples/object-key + // trying GET one.example.com/examples/object-key + // trying GET three.example.com/examples/object-key + // trying GET four.example.com/examples/object-key + // trying GET five.example.com/examples/object-key + // trying GET two.example.com/examples/object-key +} +func (h hashString) Hash() uint64 { + hs := fnv.New64() + // error always nil + _, _ = hs.Write([]byte(h)) + return (hs.Sum64() >> 1) % m64 +} + +func hash(key []byte) uint64 { + h := fnv.New64() + // error always nil + _, _ = h.Write(key) + return (h.Sum64() >> 1) ^ m64 +} + +func mur3hash(key []byte) uint64 { + h := mmh3.New128() + // error always nil + _, _ = h.Write(key) + + var ( + data = h.Sum(nil) + length = len(data) + result uint64 + ) + + for i := 0; i < length; i++ { + result += uint64(data[i]) << uint64(length-i) + } + + return result +} + +func TestSortSliceByIndex(t *testing.T) { + actual := []string{"a", "b", "c", "d", "e", "f"} + expect := []string{"e", "a", "c", "d", "b", "f"} + + hash := hash(testKey) + + SortSliceByIndex(actual, hash) + if !reflect.DeepEqual(actual, expect) { + t.Errorf("Was %#v, but expected %#v", actual, expect) + } +} + +func TestSortSliceByValue(t *testing.T) { + actual := []string{"a", "b", "c", "d", "e", "f"} + expect := []string{"e", "b", "c", "d", "f", "a"} + + hash := hash(testKey) + + if err := SortSliceByValue(actual, hash); err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(actual, expect) { + t.Errorf("Was %#v, but expected %#v", actual, expect) + } +} + +func TestSortSliceByValueFail(t *testing.T) { + t.Run("empty slice", func(t *testing.T) { + actual := make([]int, 0) + hash := hash(testKey) + + if err := SortSliceByValue(actual, hash); err != nil { + t.Fatal(err) + } + + }) + + t.Run("must be slice", func(t *testing.T) { + actual := 10 + hash := hash(testKey) + + if err := SortSliceByValue(actual, hash); err == nil { + t.Fatal("must fail for bad type") + } + + }) + + t.Run("must fail for unknown type", func(t *testing.T) { + actual := []byte{1, 2, 3, 4, 5} + hash := hash(testKey) + + if err := SortSliceByValue(actual, hash); err == nil { + t.Fatal("must fail for bad type") + } + }) +} + +func TestSortSliceByValueHasher(t *testing.T) { + actual := []hashString{"a", "b", "c", "d", "e", "f"} + expect := []hashString{"e", "d", "c", "a", "b", "f"} + + hash := hash(testKey) + + if err := SortSliceByValue(actual, hash); err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(actual, expect) { + t.Errorf("Was %#v, but expected %#v", actual, expect) + } +} + +func TestSortSliceByValueIntSlice(t *testing.T) { + actual := []int{0, 1, 2, 3, 4, 5} + expect := []int{2, 0, 5, 3, 4, 1} + + hash := hash(testKey) + + if err := SortSliceByValue(actual, hash); err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(actual, expect) { + t.Errorf("Was %#v, but expected %#v", actual, expect) + } +} + +func TestSortByWeight(t *testing.T) { + nodes := []uint64{1, 2, 3, 4, 5} + hash := mur3hash(testKey) + + actual := SortByWeight(nodes, hash) + expected := []uint64{0, 1, 4, 2, 3} + if !reflect.DeepEqual(actual, expected) { + t.Errorf("Was %#v, but expected %#v", actual, expected) + } +} + +func TestUniformDistribution(t *testing.T) { + var ( + i uint64 + size = uint64(4) + nodes = make([]uint64, 0, size) + counts = make(map[uint64]uint64) + key = make([]byte, 16) + keys = uint64(10000000) + ) + + for i = 0; i < size; i++ { + nodes = append(nodes, i) + } + + for i = 0; i < keys; i++ { + binary.BigEndian.PutUint64(key, i) + hash := hash(key) + counts[SortByWeight(nodes, hash)[0]]++ + } + + mean := float64(keys) / float64(len(nodes)) + delta := mean * 0.002 // 0.2% + for node, count := range counts { + d := mean - float64(count) + if d > delta || (0-d) > delta { + t.Errorf( + "Node %d received %d keys, expected %v (+/- %v)", + node, count, mean, delta, + ) + } + } +} + +func BenchmarkSortByWeight_fnv_10(b *testing.B) { + hash := hash(testKey) + _ = benchmarkSortByWeight(b, 10, hash) +} + +func BenchmarkSortByWeight_fnv_100(b *testing.B) { + hash := hash(testKey) + _ = benchmarkSortByWeight(b, 100, hash) +} + +func BenchmarkSortByWeight_fnv_1000(b *testing.B) { + hash := hash(testKey) + _ = benchmarkSortByWeight(b, 1000, hash) +} + +func BenchmarkSortByIndex_fnv_10(b *testing.B) { + hash := hash(testKey) + benchmarkSortByIndex(b, 10, hash) +} + +func BenchmarkSortByIndex_fnv_100(b *testing.B) { + hash := hash(testKey) + benchmarkSortByIndex(b, 100, hash) +} + +func BenchmarkSortByIndex_fnv_1000(b *testing.B) { + hash := hash(testKey) + benchmarkSortByIndex(b, 1000, hash) +} + +func BenchmarkSortByValue_fnv_10(b *testing.B) { + hash := hash(testKey) + benchmarkSortByValue(b, 10, hash) +} + +func BenchmarkSortByValue_fnv_100(b *testing.B) { + hash := hash(testKey) + benchmarkSortByValue(b, 100, hash) +} + +func BenchmarkSortByValue_fnv_1000(b *testing.B) { + hash := hash(testKey) + benchmarkSortByValue(b, 1000, hash) +} + +func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 { + servers := make([]uint64, n) + for i := uint64(0); i < uint64(len(servers)); i++ { + servers[i] = i + } + + b.ResetTimer() + b.ReportAllocs() + + var x uint64 + for i := 0; i < b.N; i++ { + x += SortByWeight(servers, hash)[0] + } + return x +} + +func benchmarkSortByIndex(b *testing.B, n int, hash uint64) { + servers := make([]uint64, n) + for i := uint64(0); i < uint64(len(servers)); i++ { + servers[i] = i + } + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + SortSliceByIndex(servers, hash) + } +} + +func benchmarkSortByValue(b *testing.B, n int, hash uint64) { + servers := make([]string, n) + for i := uint64(0); i < uint64(len(servers)); i++ { + servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10) + } + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + if err := SortSliceByValue(servers, hash); err != nil { + b.Fatal(err) + } + } +}