forked from TrueCloudLab/hrw
Compare commits
16 commits
Author | SHA1 | Date | |
---|---|---|---|
3a8489bfe7 | |||
1b7ec474c9 | |||
78c3f718b1 | |||
16a7740ccd | |||
2ac89c82b6 | |||
266da7c69a | |||
c52f74d8e1 | |||
895ecf150f | |||
213c105ac1 | |||
c175ef4099 | |||
2c085708de | |||
15b3800347 | |||
2e205cf1ca | |||
ebca2848ad | |||
0ad932400c | |||
08e14caaf3 |
11 changed files with 149 additions and 88 deletions
1
.github/CODEOWNERS
vendored
1
.github/CODEOWNERS
vendored
|
@ -1 +0,0 @@
|
|||
* @TrueCloudLab/dev
|
10
.gitlint
Normal file
10
.gitlint
Normal file
|
@ -0,0 +1,10 @@
|
|||
[general]
|
||||
fail-without-commits=true
|
||||
contrib=CC1
|
||||
|
||||
[title-match-regex]
|
||||
regex=^\[\#[0-9]+\]\s
|
||||
|
||||
[ignore-by-title]
|
||||
regex=^Release(.*)
|
||||
ignore=title-match-regex
|
30
.pre-commit-config.yaml
Normal file
30
.pre-commit-config.yaml
Normal file
|
@ -0,0 +1,30 @@
|
|||
ci:
|
||||
autofix_prs: false
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.4.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
- id: check-case-conflict
|
||||
- id: check-executables-have-shebangs
|
||||
- id: check-shebang-scripts-are-executable
|
||||
- id: check-merge-conflict
|
||||
- id: check-json
|
||||
- id: check-xml
|
||||
- id: check-yaml
|
||||
- id: trailing-whitespace
|
||||
args: [--markdown-linebreak-ext=md]
|
||||
- id: end-of-file-fixer
|
||||
exclude: ".key$"
|
||||
|
||||
- repo: https://github.com/golangci/golangci-lint
|
||||
rev: v1.51.2
|
||||
hooks:
|
||||
- id: golangci-lint
|
||||
|
||||
- repo: https://github.com/jorisroovers/gitlint
|
||||
rev: v0.18.0
|
||||
hooks:
|
||||
- id: gitlint
|
||||
stages: [commit-msg]
|
17
.travis.yml
17
.travis.yml
|
@ -1,17 +0,0 @@
|
|||
language: go
|
||||
go:
|
||||
- 1.11.x
|
||||
- 1.12.x
|
||||
env:
|
||||
- GO111MODULE=on
|
||||
install:
|
||||
- go get -v golang.org/x/lint/golint
|
||||
- go mod tidy -v
|
||||
script:
|
||||
- golint -set_exit_status ./...
|
||||
- go test -race -coverprofile=coverage.txt -covermode=atomic ./...
|
||||
after_success:
|
||||
- bash <(curl -s https://codecov.io/bash)
|
||||
matrix:
|
||||
allow_failures:
|
||||
- go: tip
|
3
LICENSE
3
LICENSE
|
@ -1,6 +1,7 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2019 NSPCC
|
||||
Copyright (c) 2023-2024 TrueCloudLab
|
||||
Copyright (c) 2019-2023 NSPCC
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
|
|
@ -1,15 +1,10 @@
|
|||
# Golang HRW implementation
|
||||
|
||||
[![Build Status](https://travis-ci.org/nspcc-dev/hrw.svg?branch=master)](https://travis-ci.org/nspcc-dev/hrw)
|
||||
[![codecov](https://codecov.io/gh/nspcc-dev/hrw/badge.svg)](https://codecov.io/gh/nspcc-dev/hrw)
|
||||
[![Report](https://goreportcard.com/badge/github.com/nspcc-dev/hrw)](https://goreportcard.com/report/github.com/nspcc-dev/hrw)
|
||||
[![GitHub release](https://img.shields.io/github/release/nspcc-dev/hrw.svg)](https://github.com/nspcc-dev/hrw)
|
||||
|
||||
[Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method.
|
||||
|
||||
## Install
|
||||
|
||||
`go get github.com/nspcc-dev/hrw`
|
||||
`go get git.frostfs.info/TrueCloudLab/hrw`
|
||||
|
||||
## Benchmark:
|
||||
|
||||
|
@ -55,7 +50,7 @@ package main
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/TrueCloudLab/hrw"
|
||||
"git.frostfs.info/TrueCloudLab/hrw"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
|
4
go.mod
4
go.mod
|
@ -1,10 +1,10 @@
|
|||
module github.com/TrueCloudLab/hrw
|
||||
module git.frostfs.info/TrueCloudLab/hrw
|
||||
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/spaolacci/murmur3 v1.1.0
|
||||
github.com/stretchr/testify v1.3.0
|
||||
github.com/twmb/murmur3 v1.1.8
|
||||
)
|
||||
|
||||
require (
|
||||
|
|
4
go.sum
4
go.sum
|
@ -2,8 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
|
|||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
|
||||
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
|
||||
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=
|
||||
|
|
143
hrw.go
143
hrw.go
|
@ -9,7 +9,7 @@ import (
|
|||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/spaolacci/murmur3"
|
||||
"github.com/twmb/murmur3"
|
||||
)
|
||||
|
||||
type (
|
||||
|
@ -21,6 +21,12 @@ type (
|
|||
less func(i, j int) bool
|
||||
swap func(i, j int)
|
||||
}
|
||||
|
||||
hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
|
||||
slice []T
|
||||
dist []N
|
||||
asc bool
|
||||
}
|
||||
)
|
||||
|
||||
// Boundaries of valid normalized weights
|
||||
|
@ -33,6 +39,18 @@ func (s *sorter) Len() int { return s.l }
|
|||
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
|
||||
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
|
||||
|
||||
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
|
||||
func (s *hasherSorter[T, N]) Less(i, j int) bool {
|
||||
if s.asc {
|
||||
return s.dist[i] < s.dist[j]
|
||||
}
|
||||
return s.dist[i] > s.dist[j]
|
||||
}
|
||||
func (s *hasherSorter[T, N]) Swap(i, j int) {
|
||||
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
|
||||
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
|
||||
}
|
||||
|
||||
func distance(x uint64, y uint64) uint64 {
|
||||
acc := x ^ y
|
||||
// here used mmh3 64 bit finalizer
|
||||
|
@ -50,6 +68,11 @@ func Hash(key []byte) uint64 {
|
|||
return murmur3.Sum64(key)
|
||||
}
|
||||
|
||||
// StringHash uses murmur3 hash to return uint64
|
||||
func StringHash(key string) uint64 {
|
||||
return murmur3.StringSum64(key)
|
||||
}
|
||||
|
||||
// Sort receive nodes and hash, and sort it by distance
|
||||
func Sort(nodes []uint64, hash uint64) []uint64 {
|
||||
l := len(nodes)
|
||||
|
@ -85,13 +108,15 @@ func SortSliceByValue(slice interface{}, hash uint64) {
|
|||
|
||||
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
|
||||
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
|
||||
rule := prepareHasherRule(slice)
|
||||
if rule != nil {
|
||||
swap := func(i, j int) {
|
||||
slice[i], slice[j] = slice[j], slice[i]
|
||||
if len(slice) == 0 {
|
||||
return
|
||||
}
|
||||
sortByDistance(len(rule), false, rule, hash, swap)
|
||||
|
||||
dist := make([]uint64, len(slice))
|
||||
for i := range dist {
|
||||
dist[i] = distance(slice[i].Hash(), hash)
|
||||
}
|
||||
sortHasherByDistance(slice, false, dist)
|
||||
}
|
||||
|
||||
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
|
||||
|
@ -105,13 +130,41 @@ func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
|
|||
|
||||
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
|
||||
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
|
||||
rule := prepareHasherRule(slice)
|
||||
if rule != nil {
|
||||
swap := func(i, j int) {
|
||||
slice[i], slice[j] = slice[j], slice[i]
|
||||
if len(slice) == 0 {
|
||||
return
|
||||
}
|
||||
sortByWeight(len(slice), false, rule, weights, hash, swap)
|
||||
|
||||
if allSameF64(weights) {
|
||||
dist := make([]uint64, len(slice))
|
||||
for i := range dist {
|
||||
dist[i] = distance(slice[i].Hash(), hash)
|
||||
}
|
||||
sortHasherByDistance(slice, false, dist)
|
||||
return
|
||||
}
|
||||
|
||||
dist := make([]float64, len(slice))
|
||||
for i := range dist {
|
||||
d := distance(slice[i].Hash(), hash)
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
dist[i] = float64(^uint64(0)-d) * weights[i]
|
||||
}
|
||||
|
||||
sort.Sort(&hasherSorter[T, float64]{
|
||||
slice: slice,
|
||||
dist: dist,
|
||||
asc: false,
|
||||
})
|
||||
}
|
||||
|
||||
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
|
||||
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
|
||||
sort.Sort(&hasherSorter[T, uint64]{
|
||||
slice: slice,
|
||||
dist: dist,
|
||||
asc: true,
|
||||
})
|
||||
}
|
||||
|
||||
// SortSliceByIndex received []T and hash to sort by index-distance
|
||||
|
@ -221,18 +274,6 @@ func prepareRule(slice interface{}) []uint64 {
|
|||
return rule
|
||||
}
|
||||
|
||||
func prepareHasherRule[T Hasher](hashers []T) []uint64 {
|
||||
length := len(hashers)
|
||||
if length == 0 {
|
||||
return nil
|
||||
}
|
||||
result := make([]uint64, length)
|
||||
for i := 0; i < length; i++ {
|
||||
result[i] = hashers[i].Hash()
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
|
||||
func ValidateWeights(weights []float64) error {
|
||||
for i := range weights {
|
||||
|
@ -243,24 +284,6 @@ func ValidateWeights(weights []float64) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func newSorter(l int, byIndex bool, nodes []uint64, h uint64,
|
||||
swap func(i, j int)) (*sorter, []int, []uint64) {
|
||||
ind := make([]int, l)
|
||||
dist := make([]uint64, l)
|
||||
for i := 0; i < l; i++ {
|
||||
ind[i] = i
|
||||
dist[i] = getDistance(byIndex, i, nodes, h)
|
||||
}
|
||||
|
||||
return &sorter{
|
||||
l: l,
|
||||
swap: func(i, j int) {
|
||||
swap(i, j)
|
||||
ind[i], ind[j] = ind[j], ind[i]
|
||||
},
|
||||
}, ind, dist
|
||||
}
|
||||
|
||||
// sortByWeight sorts nodes by weight using provided swapper.
|
||||
// nodes contains hrw hashes. If it is nil, indices are used.
|
||||
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
|
||||
|
@ -270,14 +293,23 @@ func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash u
|
|||
return
|
||||
}
|
||||
|
||||
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
|
||||
s.less = func(i, j int) bool {
|
||||
ii, jj := ind[i], ind[j]
|
||||
dist := make([]float64, l)
|
||||
for i := 0; i < l; i++ {
|
||||
d := getDistance(byIndex, i, nodes, hash)
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
wi := float64(^uint64(0)-dist[ii]) * weights[ii]
|
||||
wj := float64(^uint64(0)-dist[jj]) * weights[jj]
|
||||
return wi > wj // higher distance must be placed lower to be first
|
||||
dist[i] = float64(^uint64(0)-d) * weights[i]
|
||||
}
|
||||
|
||||
s := &sorter{
|
||||
l: l,
|
||||
swap: func(i, j int) {
|
||||
swap(i, j)
|
||||
dist[i], dist[j] = dist[j], dist[i]
|
||||
},
|
||||
less: func(i, j int) bool {
|
||||
return dist[i] > dist[j] // higher distance must be placed lower to be first
|
||||
},
|
||||
}
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
@ -285,9 +317,20 @@ func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash u
|
|||
// sortByDistance sorts nodes by hrw distance using provided swapper.
|
||||
// nodes contains hrw hashes. If it is nil, indices are used.
|
||||
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
|
||||
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
|
||||
s.less = func(i, j int) bool {
|
||||
return dist[ind[i]] < dist[ind[j]]
|
||||
dist := make([]uint64, l)
|
||||
for i := 0; i < l; i++ {
|
||||
dist[i] = getDistance(byIndex, i, nodes, hash)
|
||||
}
|
||||
|
||||
s := &sorter{
|
||||
l: l,
|
||||
swap: func(i, j int) {
|
||||
swap(i, j)
|
||||
dist[i], dist[j] = dist[j], dist[i]
|
||||
},
|
||||
less: func(i, j int) bool {
|
||||
return dist[i] < dist[j]
|
||||
},
|
||||
}
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
|
|
@ -61,7 +61,7 @@ func Example() {
|
|||
}
|
||||
|
||||
func (h hashString) Hash() uint64 {
|
||||
return Hash([]byte(h))
|
||||
return StringHash(string(h))
|
||||
}
|
||||
|
||||
func TestSortSliceByIndex(t *testing.T) {
|
||||
|
@ -737,7 +737,7 @@ func BenchmarkSortHashersByWeightValueReflection_fnv_100(b *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueRelection(b, 100, Hash(testKey))
|
||||
benchmarkSortHashersByWeightValueRelection(b, 1000, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
|
||||
|
@ -749,7 +749,7 @@ func BenchmarkSortHashersByWeightValueTyped_fnv_100(b *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueTyped(b, 100, Hash(testKey))
|
||||
benchmarkSortHashersByWeightValueTyped(b, 1000, Hash(testKey))
|
||||
}
|
||||
|
||||
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {
|
||||
|
|
Loading…
Reference in a new issue