forked from TrueCloudLab/hrw
Compare commits
23 commits
feature/ge
...
master
Author | SHA1 | Date | |
---|---|---|---|
16a7740ccd | |||
2ac89c82b6 | |||
266da7c69a | |||
c52f74d8e1 | |||
895ecf150f | |||
213c105ac1 | |||
c175ef4099 | |||
2c085708de | |||
15b3800347 | |||
2e205cf1ca | |||
ebca2848ad | |||
0ad932400c | |||
08e14caaf3 | |||
7e33833933 | |||
79b208bebf | |||
5671632658 | |||
9e9fc653e5 | |||
997b540432 | |||
22b833d972 | |||
fad35bbd3b | |||
dddcfc8fc5 | |||
|
f52ea8fb21 | ||
|
aa230933d1 |
9 changed files with 403 additions and 204 deletions
10
.gitlint
Normal file
10
.gitlint
Normal file
|
@ -0,0 +1,10 @@
|
|||
[general]
|
||||
fail-without-commits=true
|
||||
contrib=CC1
|
||||
|
||||
[title-match-regex]
|
||||
regex=^\[\#[0-9]+\]\s
|
||||
|
||||
[ignore-by-title]
|
||||
regex=^Release(.*)
|
||||
ignore=title-match-regex
|
30
.pre-commit-config.yaml
Normal file
30
.pre-commit-config.yaml
Normal file
|
@ -0,0 +1,30 @@
|
|||
ci:
|
||||
autofix_prs: false
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.4.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
- id: check-case-conflict
|
||||
- id: check-executables-have-shebangs
|
||||
- id: check-shebang-scripts-are-executable
|
||||
- id: check-merge-conflict
|
||||
- id: check-json
|
||||
- id: check-xml
|
||||
- id: check-yaml
|
||||
- id: trailing-whitespace
|
||||
args: [--markdown-linebreak-ext=md]
|
||||
- id: end-of-file-fixer
|
||||
exclude: ".key$"
|
||||
|
||||
- repo: https://github.com/golangci/golangci-lint
|
||||
rev: v1.51.2
|
||||
hooks:
|
||||
- id: golangci-lint
|
||||
|
||||
- repo: https://github.com/jorisroovers/gitlint
|
||||
rev: v0.18.0
|
||||
hooks:
|
||||
- id: gitlint
|
||||
stages: [commit-msg]
|
17
.travis.yml
17
.travis.yml
|
@ -1,17 +0,0 @@
|
|||
language: go
|
||||
go:
|
||||
- 1.11.x
|
||||
- 1.12.x
|
||||
env:
|
||||
- GO111MODULE=on
|
||||
install:
|
||||
- go get -v golang.org/x/lint/golint
|
||||
- go mod tidy -v
|
||||
script:
|
||||
- golint -set_exit_status ./...
|
||||
- go test -race -coverprofile=coverage.txt -covermode=atomic ./...
|
||||
after_success:
|
||||
- bash <(curl -s https://codecov.io/bash)
|
||||
matrix:
|
||||
allow_failures:
|
||||
- go: tip
|
50
README.md
50
README.md
|
@ -14,25 +14,37 @@
|
|||
## Benchmark:
|
||||
|
||||
```
|
||||
BenchmarkSort_fnv_10-8 5000000 365 ns/op 224 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_100-8 300000 5261 ns/op 1856 B/op 3 allocs/op
|
||||
BenchmarkSort_fnv_1000-8 10000 119462 ns/op 16448 B/op 3 allocs/op
|
||||
BenchmarkSortByIndex_fnv_10-8 3000000 546 ns/op 384 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_100-8 200000 5965 ns/op 2928 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_1000-8 10000 127732 ns/op 25728 B/op 7 allocs/op
|
||||
BenchmarkSortByValue_fnv_10-8 2000000 962 ns/op 544 B/op 17 allocs/op
|
||||
BenchmarkSortByValue_fnv_100-8 200000 9604 ns/op 4528 B/op 107 allocs/op
|
||||
BenchmarkSortByValue_fnv_1000-8 10000 111741 ns/op 41728 B/op 1007 allocs/op
|
||||
BenchmarkSort_fnv_10-8 4812801 240.9 ns/op 216 B/op 4 allocs/op
|
||||
BenchmarkSort_fnv_100-8 434767 2600 ns/op 1848 B/op 4 allocs/op
|
||||
BenchmarkSort_fnv_1000-8 20428 66116 ns/op 16440 B/op 4 allocs/op
|
||||
BenchmarkSortByIndex_fnv_10-8 2505410 486.5 ns/op 352 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_100-8 254556 4697 ns/op 1984 B/op 7 allocs/op
|
||||
BenchmarkSortByIndex_fnv_1000-8 13581 88334 ns/op 16576 B/op 7 allocs/op
|
||||
BenchmarkSortByValue_fnv_10-8 1761030 682.1 ns/op 592 B/op 18 allocs/op
|
||||
BenchmarkSortByValue_fnv_100-8 258838 4675 ns/op 4480 B/op 108 allocs/op
|
||||
BenchmarkSortByValue_fnv_1000-8 27027 44649 ns/op 40768 B/op 1008 allocs/op
|
||||
BenchmarkSortHashersByValue_Reflection_fnv_10-8 1013560 1249 ns/op 768 B/op 29 allocs/op
|
||||
BenchmarkSortHashersByValue_Reflection_fnv_100-8 106029 11414 ns/op 6096 B/op 209 allocs/op
|
||||
BenchmarkSortHashersByValue_Reflection_fnv_1000-8 10000 108977 ns/op 56784 B/op 2009 allocs/op
|
||||
BenchmarkSortHashersByValue_Typed_fnv_10-8 1577814 700.3 ns/op 584 B/op 17 allocs/op
|
||||
BenchmarkSortHashersByValue_Typed_fnv_100-8 215938 5024 ns/op 4472 B/op 107 allocs/op
|
||||
BenchmarkSortHashersByValue_Typed_fnv_1000-8 24447 46889 ns/op 40760 B/op 1007 allocs/op
|
||||
|
||||
BenchmarkSortByWeight_fnv_10-8 3000000 501 ns/op 320 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_100-8 200000 8495 ns/op 2768 B/op 4 allocs/op
|
||||
BenchmarkSortByWeight_fnv_1000-8 10000 197880 ns/op 24656 B/op 4 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_10-8 2000000 702 ns/op 480 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_100-8 200000 9338 ns/op 3840 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_1000-8 10000 204669 ns/op 33936 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_10-8 1000000 1083 ns/op 640 B/op 18 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_100-8 200000 11444 ns/op 5440 B/op 108 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_1000-8 10000 148471 ns/op 49936 B/op 1008 allocs/op
|
||||
BenchmarkSortByWeight_fnv_10-8 2924833 370.6 ns/op 448 B/op 8 allocs/op
|
||||
BenchmarkSortByWeight_fnv_100-8 816069 1516 ns/op 2896 B/op 8 allocs/op
|
||||
BenchmarkSortByWeight_fnv_1000-8 80391 17478 ns/op 24784 B/op 8 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_10-8 1945612 550.3 ns/op 368 B/op 7 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_100-8 140473 8084 ns/op 2000 B/op 7 allocs/op
|
||||
BenchmarkSortByWeightIndex_fnv_1000-8 5518 200949 ns/op 16592 B/op 7 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_10-8 1305580 909.8 ns/op 608 B/op 18 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_100-8 165410 6796 ns/op 4496 B/op 108 allocs/op
|
||||
BenchmarkSortByWeightValue_fnv_1000-8 17922 78555 ns/op 40784 B/op 1008 allocs/op
|
||||
BenchmarkSortHashersByWeightValueReflection_fnv_10-8 454976 2229 ns/op 784 B/op 29 allocs/op
|
||||
BenchmarkSortHashersByWeightValueReflection_fnv_100-8 76264 15332 ns/op 6112 B/op 209 allocs/op
|
||||
BenchmarkSortHashersByWeightValueReflection_fnv_1000-8 80288 13192 ns/op 6112 B/op 209 allocs/op
|
||||
BenchmarkSortHashersByWeightValueTyped_fnv_10-8 1433113 901.4 ns/op 600 B/op 17 allocs/op
|
||||
BenchmarkSortHashersByWeightValueTyped_fnv_100-8 188626 5896 ns/op 4488 B/op 107 allocs/op
|
||||
BenchmarkSortHashersByWeightValueTyped_fnv_1000-8 178131 6518 ns/op 4488 B/op 107 allocs/op
|
||||
```
|
||||
|
||||
## Example
|
||||
|
@ -43,7 +55,7 @@ package main
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/nspcc-dev/hrw"
|
||||
"git.frostfs.info/TrueCloudLab/hrw"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
|
11
go.mod
11
go.mod
|
@ -1,6 +1,13 @@
|
|||
module github.com/nspcc-dev/hrw
|
||||
module git.frostfs.info/TrueCloudLab/hrw
|
||||
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/spaolacci/murmur3 v1.1.0
|
||||
github.com/stretchr/testify v1.3.0
|
||||
github.com/twmb/murmur3 v1.1.8
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/davecgh/go-spew v1.1.0 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
)
|
||||
|
|
4
go.sum
4
go.sum
|
@ -2,8 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
|
|||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
|
||||
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
|
||||
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=
|
||||
|
|
302
hrw.go
302
hrw.go
|
@ -5,27 +5,27 @@ package hrw
|
|||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/spaolacci/murmur3"
|
||||
"github.com/twmb/murmur3"
|
||||
)
|
||||
|
||||
type (
|
||||
swapper func(i, j int)
|
||||
|
||||
// Hasher interface used by SortSliceByValue
|
||||
Hasher interface{ Hash() uint64 }
|
||||
|
||||
hashed struct {
|
||||
length int
|
||||
sorted []uint64
|
||||
distance []uint64
|
||||
sorter struct {
|
||||
l int
|
||||
less func(i, j int) bool
|
||||
swap func(i, j int)
|
||||
}
|
||||
|
||||
weighted struct {
|
||||
h hashed
|
||||
normal []float64 // normalized input weights
|
||||
hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
|
||||
slice []T
|
||||
dist []N
|
||||
asc bool
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -35,6 +35,22 @@ const (
|
|||
NormalizedMinWeight = 0.0
|
||||
)
|
||||
|
||||
func (s *sorter) Len() int { return s.l }
|
||||
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
|
||||
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
|
||||
|
||||
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
|
||||
func (s *hasherSorter[T, N]) Less(i, j int) bool {
|
||||
if s.asc {
|
||||
return s.dist[i] < s.dist[j]
|
||||
}
|
||||
return s.dist[i] > s.dist[j]
|
||||
}
|
||||
func (s *hasherSorter[T, N]) Swap(i, j int) {
|
||||
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
|
||||
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
|
||||
}
|
||||
|
||||
func distance(x uint64, y uint64) uint64 {
|
||||
acc := x ^ y
|
||||
// here used mmh3 64 bit finalizer
|
||||
|
@ -47,83 +63,38 @@ func distance(x uint64, y uint64) uint64 {
|
|||
return acc
|
||||
}
|
||||
|
||||
func (h hashed) Len() int { return h.length }
|
||||
func (h hashed) Less(i, j int) bool { return h.distance[i] < h.distance[j] }
|
||||
func (h hashed) Swap(i, j int) {
|
||||
h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i]
|
||||
h.distance[i], h.distance[j] = h.distance[j], h.distance[i]
|
||||
}
|
||||
|
||||
func (w weighted) Len() int { return w.h.length }
|
||||
func (w weighted) Less(i, j int) bool {
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
wi := float64(^uint64(0)-w.h.distance[i]) * w.normal[i]
|
||||
wj := float64(^uint64(0)-w.h.distance[j]) * w.normal[j]
|
||||
return wi > wj // higher distance must be placed lower to be first
|
||||
}
|
||||
func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) }
|
||||
|
||||
// Hash uses murmur3 hash to return uint64
|
||||
func Hash(key []byte) uint64 {
|
||||
return murmur3.Sum64(key)
|
||||
}
|
||||
|
||||
// StringHash uses murmur3 hash to return uint64
|
||||
func StringHash(key string) uint64 {
|
||||
return murmur3.StringSum64(key)
|
||||
}
|
||||
|
||||
// Sort receive nodes and hash, and sort it by distance
|
||||
func Sort(nodes []uint64, hash uint64) []uint64 {
|
||||
var (
|
||||
l = len(nodes)
|
||||
h = hashed{
|
||||
length: l,
|
||||
sorted: make([]uint64, 0, l),
|
||||
distance: make([]uint64, 0, l),
|
||||
}
|
||||
)
|
||||
|
||||
l := len(nodes)
|
||||
sorted := make([]uint64, l)
|
||||
dist := make([]uint64, l)
|
||||
for i := range nodes {
|
||||
h.sorted = append(h.sorted, uint64(i))
|
||||
h.distance = append(h.distance, distance(nodes[i], hash))
|
||||
sorted[i] = uint64(i)
|
||||
dist[i] = distance(nodes[i], hash)
|
||||
}
|
||||
|
||||
sort.Sort(h)
|
||||
return h.sorted
|
||||
sort.Slice(sorted, func(i, j int) bool {
|
||||
return dist[sorted[i]] < dist[sorted[j]]
|
||||
})
|
||||
return sorted
|
||||
}
|
||||
|
||||
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight
|
||||
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
|
||||
// check if numbers of weights and nodes are equal
|
||||
uniform := true
|
||||
for i := range weights {
|
||||
// check if all nodes have the same distance
|
||||
if weights[i] != weights[0] {
|
||||
uniform = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
l := len(nodes)
|
||||
w := weighted{
|
||||
h: hashed{
|
||||
length: l,
|
||||
sorted: make([]uint64, 0, l),
|
||||
distance: make([]uint64, 0, l),
|
||||
},
|
||||
normal: make([]float64, l),
|
||||
}
|
||||
|
||||
// if all nodes have the same distance then sort uniformly
|
||||
if uniform || len(weights) != l {
|
||||
return Sort(nodes, hash)
|
||||
}
|
||||
|
||||
for i := range nodes {
|
||||
w.h.sorted = append(w.h.sorted, uint64(i))
|
||||
w.h.distance = append(w.h.distance, distance(nodes[i], hash))
|
||||
}
|
||||
copy(w.normal, weights)
|
||||
|
||||
sort.Sort(w)
|
||||
return w.h.sorted
|
||||
result := make([]uint64, len(nodes))
|
||||
copy(nodes, result)
|
||||
sortByWeight(len(nodes), false, nodes, weights, hash, reflect.Swapper(result))
|
||||
return result
|
||||
}
|
||||
|
||||
// SortSliceByValue received []T and hash to sort by value-distance
|
||||
|
@ -131,76 +102,89 @@ func SortSliceByValue(slice interface{}, hash uint64) {
|
|||
rule := prepareRule(slice)
|
||||
if rule != nil {
|
||||
swap := reflect.Swapper(slice)
|
||||
rule = Sort(rule, hash)
|
||||
sortByRuleInverse(swap, uint64(len(rule)), rule)
|
||||
sortByDistance(len(rule), false, rule, hash, swap)
|
||||
}
|
||||
}
|
||||
|
||||
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
|
||||
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
|
||||
if len(slice) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
dist := make([]uint64, len(slice))
|
||||
for i := range dist {
|
||||
dist[i] = distance(slice[i].Hash(), hash)
|
||||
}
|
||||
sortHasherByDistance(slice, false, dist)
|
||||
}
|
||||
|
||||
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
|
||||
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
|
||||
rule := prepareRule(slice)
|
||||
if rule != nil {
|
||||
swap := reflect.Swapper(slice)
|
||||
rule = SortByWeight(rule, weights, hash)
|
||||
sortByRuleInverse(swap, uint64(len(rule)), rule)
|
||||
sortByWeight(reflect.ValueOf(slice).Len(), false, rule, weights, hash, swap)
|
||||
}
|
||||
}
|
||||
|
||||
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
|
||||
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
|
||||
if len(slice) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if allSameF64(weights) {
|
||||
dist := make([]uint64, len(slice))
|
||||
for i := range dist {
|
||||
dist[i] = distance(slice[i].Hash(), hash)
|
||||
}
|
||||
sortHasherByDistance(slice, false, dist)
|
||||
return
|
||||
}
|
||||
|
||||
dist := make([]float64, len(slice))
|
||||
for i := range dist {
|
||||
d := distance(slice[i].Hash(), hash)
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
dist[i] = float64(^uint64(0)-d) * weights[i]
|
||||
}
|
||||
|
||||
sort.Sort(&hasherSorter[T, float64]{
|
||||
slice: slice,
|
||||
dist: dist,
|
||||
asc: false,
|
||||
})
|
||||
}
|
||||
|
||||
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
|
||||
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
|
||||
sort.Sort(&hasherSorter[T, uint64]{
|
||||
slice: slice,
|
||||
dist: dist,
|
||||
asc: true,
|
||||
})
|
||||
}
|
||||
|
||||
// SortSliceByIndex received []T and hash to sort by index-distance
|
||||
func SortSliceByIndex(slice interface{}, hash uint64) {
|
||||
length := uint64(reflect.ValueOf(slice).Len())
|
||||
length := reflect.ValueOf(slice).Len()
|
||||
swap := reflect.Swapper(slice)
|
||||
rule := make([]uint64, 0, length)
|
||||
for i := uint64(0); i < length; i++ {
|
||||
rule = append(rule, i)
|
||||
}
|
||||
rule = Sort(rule, hash)
|
||||
sortByRuleInverse(swap, length, rule)
|
||||
sortByDistance(length, true, nil, hash, swap)
|
||||
}
|
||||
|
||||
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
|
||||
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
|
||||
length := uint64(reflect.ValueOf(slice).Len())
|
||||
length := reflect.ValueOf(slice).Len()
|
||||
swap := reflect.Swapper(slice)
|
||||
rule := make([]uint64, 0, length)
|
||||
for i := uint64(0); i < length; i++ {
|
||||
rule = append(rule, i)
|
||||
}
|
||||
rule = SortByWeight(rule, weights, hash)
|
||||
sortByRuleInverse(swap, length, rule)
|
||||
}
|
||||
|
||||
func sortByRuleDirect(swap swapper, length uint64, rule []uint64) {
|
||||
done := make([]bool, length)
|
||||
for i := uint64(0); i < length; i++ {
|
||||
if done[i] {
|
||||
continue
|
||||
}
|
||||
for j := rule[i]; !done[rule[j]]; j = rule[j] {
|
||||
swap(int(i), int(j))
|
||||
done[j] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sortByRuleInverse(swap swapper, length uint64, rule []uint64) {
|
||||
done := make([]bool, length)
|
||||
for i := uint64(0); i < length; i++ {
|
||||
if done[i] {
|
||||
continue
|
||||
}
|
||||
|
||||
for j := i; !done[rule[j]]; j = rule[j] {
|
||||
swap(int(j), int(rule[j]))
|
||||
done[j] = true
|
||||
}
|
||||
}
|
||||
sortByWeight(length, true, nil, weights, hash, swap)
|
||||
}
|
||||
|
||||
func prepareRule(slice interface{}) []uint64 {
|
||||
t := reflect.TypeOf(slice)
|
||||
if t.Kind() != reflect.Slice {
|
||||
return nil
|
||||
panic("HRW sort expects slice, got " + t.Kind().String())
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -279,7 +263,7 @@ func prepareRule(slice interface{}) []uint64 {
|
|||
|
||||
default:
|
||||
if _, ok := val.Index(0).Interface().(Hasher); !ok {
|
||||
return nil
|
||||
panic("slice elements must implement hrw.Hasher")
|
||||
}
|
||||
|
||||
for i := 0; i < length; i++ {
|
||||
|
@ -293,9 +277,85 @@ func prepareRule(slice interface{}) []uint64 {
|
|||
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
|
||||
func ValidateWeights(weights []float64) error {
|
||||
for i := range weights {
|
||||
if weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
|
||||
if math.IsNaN(weights[i]) || weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
|
||||
return errors.New("weights are not normalized")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sortByWeight sorts nodes by weight using provided swapper.
|
||||
// nodes contains hrw hashes. If it is nil, indices are used.
|
||||
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
|
||||
// if all nodes have the same distance then sort uniformly
|
||||
if allSameF64(weights) {
|
||||
sortByDistance(l, byIndex, nodes, hash, swap)
|
||||
return
|
||||
}
|
||||
|
||||
dist := make([]float64, l)
|
||||
for i := 0; i < l; i++ {
|
||||
d := getDistance(byIndex, i, nodes, hash)
|
||||
// `maxUint64 - distance` makes the shorter distance more valuable
|
||||
// it is necessary for operation with normalized values
|
||||
dist[i] = float64(^uint64(0)-d) * weights[i]
|
||||
}
|
||||
|
||||
s := &sorter{
|
||||
l: l,
|
||||
swap: func(i, j int) {
|
||||
swap(i, j)
|
||||
dist[i], dist[j] = dist[j], dist[i]
|
||||
},
|
||||
less: func(i, j int) bool {
|
||||
return dist[i] > dist[j] // higher distance must be placed lower to be first
|
||||
},
|
||||
}
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
||||
// sortByDistance sorts nodes by hrw distance using provided swapper.
|
||||
// nodes contains hrw hashes. If it is nil, indices are used.
|
||||
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
|
||||
dist := make([]uint64, l)
|
||||
for i := 0; i < l; i++ {
|
||||
dist[i] = getDistance(byIndex, i, nodes, hash)
|
||||
}
|
||||
|
||||
s := &sorter{
|
||||
l: l,
|
||||
swap: func(i, j int) {
|
||||
swap(i, j)
|
||||
dist[i], dist[j] = dist[j], dist[i]
|
||||
},
|
||||
less: func(i, j int) bool {
|
||||
return dist[i] < dist[j]
|
||||
},
|
||||
}
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
||||
// getDistance return distance from nodes[i] to h.
|
||||
// If byIndex is true, nodes index is used.
|
||||
// Else if nodes[i] != nil, distance is calculated from this value.
|
||||
// Otherwise, and hash from node index is taken.
|
||||
func getDistance(byIndex bool, i int, nodes []uint64, h uint64) uint64 {
|
||||
if nodes != nil {
|
||||
return distance(nodes[i], h)
|
||||
} else if byIndex {
|
||||
return distance(uint64(i), h)
|
||||
} else {
|
||||
buf := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(buf, uint64(i))
|
||||
return distance(Hash(buf), h)
|
||||
}
|
||||
}
|
||||
|
||||
func allSameF64(fs []float64) bool {
|
||||
for i := range fs {
|
||||
if fs[i] != fs[0] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
177
hrw_test.go
177
hrw_test.go
|
@ -61,7 +61,7 @@ func Example() {
|
|||
}
|
||||
|
||||
func (h hashString) Hash() uint64 {
|
||||
return Hash([]byte(h))
|
||||
return StringHash(string(h))
|
||||
}
|
||||
|
||||
func TestSortSliceByIndex(t *testing.T) {
|
||||
|
@ -76,6 +76,9 @@ func TestValidateWeights(t *testing.T) {
|
|||
weights := []float64{10, 10, 10, 2, 2, 2}
|
||||
err := ValidateWeights(weights)
|
||||
require.Error(t, err)
|
||||
weights = []float64{math.NaN(), 1, 1, 0.2, 0.2, 0.2}
|
||||
err = ValidateWeights(weights)
|
||||
require.Error(t, err)
|
||||
weights = []float64{1, 1, 1, 0.2, 0.2, 0.2}
|
||||
err = ValidateWeights(weights)
|
||||
require.NoError(t, err)
|
||||
|
@ -98,36 +101,6 @@ func TestSortSliceByValue(t *testing.T) {
|
|||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortByRule(t *testing.T) {
|
||||
t.Run("direct", func(t *testing.T) {
|
||||
// 0 1 2 3 4 5
|
||||
actual := []string{"a", "b", "c", "d", "e", "f"}
|
||||
// 4 2 0 5 3 1
|
||||
expect := []string{"c", "f", "b", "e", "a", "d"}
|
||||
rule := []uint64{4, 2, 0, 5, 3, 1}
|
||||
|
||||
sortByRuleDirect(
|
||||
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
|
||||
6, rule)
|
||||
|
||||
require.Equal(t, expect, actual)
|
||||
})
|
||||
|
||||
t.Run("inverse", func(t *testing.T) {
|
||||
// 0 1 2 3 4 5
|
||||
actual := []string{"a", "b", "c", "d", "e", "f"}
|
||||
// 4 2 0 5 3 1
|
||||
expect := []string{"e", "c", "a", "f", "d", "b"}
|
||||
rule := []uint64{4, 2, 0, 5, 3, 1}
|
||||
|
||||
sortByRuleInverse(
|
||||
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
|
||||
6, rule)
|
||||
|
||||
require.Equal(t, expect, actual)
|
||||
})
|
||||
}
|
||||
|
||||
func TestSortSliceByValueFail(t *testing.T) {
|
||||
t.Run("empty slice", func(t *testing.T) {
|
||||
var (
|
||||
|
@ -140,15 +113,13 @@ func TestSortSliceByValueFail(t *testing.T) {
|
|||
t.Run("must be slice", func(t *testing.T) {
|
||||
actual := 10
|
||||
hash := Hash(testKey)
|
||||
require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
|
||||
require.Panics(t, func() { SortSliceByValue(actual, hash) })
|
||||
})
|
||||
|
||||
t.Run("must 'fail' for unknown type", func(t *testing.T) {
|
||||
actual := []unknown{1, 2, 3, 4, 5}
|
||||
expect := []unknown{1, 2, 3, 4, 5}
|
||||
hash := Hash(testKey)
|
||||
SortSliceByValue(actual, hash)
|
||||
require.Equal(t, expect, actual)
|
||||
require.Panics(t, func() { SortSliceByValue(actual, hash) })
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -160,6 +131,23 @@ func TestSortSliceByValueHasher(t *testing.T) {
|
|||
require.Equal(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortHasherSliceByValue(t *testing.T) {
|
||||
actual := []hashString{"a", "b", "c", "d", "e", "f"}
|
||||
expect := []hashString{"d", "f", "c", "b", "a", "e"}
|
||||
hash := Hash(testKey)
|
||||
SortHasherSliceByValue(actual, hash)
|
||||
require.EqualValues(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortHasherSliceByWeightValue(t *testing.T) {
|
||||
actual := []hashString{"a", "b", "c", "d", "e", "f"}
|
||||
weights := []float64{1.0, 1.0, 1.0, 1.0, 1.0, 1.0}
|
||||
expect := []hashString{"d", "f", "c", "b", "a", "e"}
|
||||
hash := Hash(testKey)
|
||||
SortHasherSliceByWeightValue(actual, weights, hash)
|
||||
require.EqualValues(t, expect, actual)
|
||||
}
|
||||
|
||||
func TestSortSliceByValueIntSlice(t *testing.T) {
|
||||
cases := []slices{
|
||||
{
|
||||
|
@ -202,11 +190,6 @@ func TestSortSliceByValueIntSlice(t *testing.T) {
|
|||
expect: []uint32{5, 1, 2, 0, 3, 4},
|
||||
},
|
||||
|
||||
{
|
||||
actual: Uint32Slice{0, 1, 2, 3, 4, 5},
|
||||
expect: Uint32Slice{0, 1, 2, 3, 4, 5},
|
||||
},
|
||||
|
||||
{
|
||||
actual: []int64{0, 1, 2, 3, 4, 5},
|
||||
expect: []int64{5, 3, 0, 1, 4, 2},
|
||||
|
@ -670,6 +653,36 @@ func BenchmarkSortByValue_fnv_1000(b *testing.B) {
|
|||
benchmarkSortByValue(b, 1000, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Reflection_fnv_10(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueReflection(b, 10, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Reflection_fnv_100(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueReflection(b, 100, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Reflection_fnv_1000(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueReflection(b, 1000, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Typed_fnv_10(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueTyped(b, 10, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Typed_fnv_100(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueTyped(b, 100, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByValue_Typed_fnv_1000(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
benchmarkSortHashersByValueTyped(b, 1000, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortByWeight_fnv_10(b *testing.B) {
|
||||
hash := Hash(testKey)
|
||||
_ = benchmarkSortByWeight(b, 10, hash)
|
||||
|
@ -715,6 +728,30 @@ func BenchmarkSortByWeightValue_fnv_1000(b *testing.B) {
|
|||
benchmarkSortByWeightValue(b, 1000, hash)
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueReflection_fnv_10(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueRelection(b, 10, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueReflection_fnv_100(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueRelection(b, 100, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueRelection(b, 1000, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueTyped(b, 10, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueTyped_fnv_100(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueTyped(b, 100, Hash(testKey))
|
||||
}
|
||||
|
||||
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
|
||||
benchmarkSortHashersByWeightValueTyped(b, 1000, Hash(testKey))
|
||||
}
|
||||
|
||||
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {
|
||||
servers := make([]uint64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
|
@ -808,3 +845,63 @@ func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
|
|||
SortSliceByWeightValue(servers, weights, hash)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSortHashersByWeightValueRelection(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]hashString, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
SortSliceByWeightValue(servers, weights, hash)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSortHashersByWeightValueTyped(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]hashString, n)
|
||||
weights := make([]float64, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
weights[i] = float64(uint64(n)-i) / float64(n)
|
||||
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
SortHasherSliceByWeightValue(servers, weights, hash)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSortHashersByValueReflection(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]hashString, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
SortSliceByValue(servers, hash)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSortHashersByValueTyped(b *testing.B, n int, hash uint64) {
|
||||
servers := make([]hashString, n)
|
||||
for i := uint64(0); i < uint64(len(servers)); i++ {
|
||||
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
SortHasherSliceByValue(servers, hash)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue