forked from TrueCloudLab/hrw
284 lines
7.4 KiB
Go
284 lines
7.4 KiB
Go
// Package hrw implements Rendezvous hashing.
|
|
// http://en.wikipedia.org/wiki/Rendezvous_hashing.
|
|
package hrw
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"errors"
|
|
"math"
|
|
"reflect"
|
|
"sort"
|
|
|
|
"github.com/spaolacci/murmur3"
|
|
)
|
|
|
|
type (
|
|
// Hasher interface used by SortSliceByValue
|
|
Hasher interface{ Hash() uint64 }
|
|
|
|
sorter struct {
|
|
l int
|
|
less func(i, j int) bool
|
|
swap func(i, j int)
|
|
}
|
|
)
|
|
|
|
// Boundaries of valid normalized weights
|
|
const (
|
|
NormalizedMaxWeight = 1.0
|
|
NormalizedMinWeight = 0.0
|
|
)
|
|
|
|
func (s *sorter) Len() int { return s.l }
|
|
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
|
|
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
|
|
|
|
func distance(x uint64, y uint64) uint64 {
|
|
acc := x ^ y
|
|
// here used mmh3 64 bit finalizer
|
|
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
|
|
acc ^= acc >> 33
|
|
acc = acc * 0xff51afd7ed558ccd
|
|
acc ^= acc >> 33
|
|
acc = acc * 0xc4ceb9fe1a85ec53
|
|
acc ^= acc >> 33
|
|
return acc
|
|
}
|
|
|
|
// Hash uses murmur3 hash to return uint64
|
|
func Hash(key []byte) uint64 {
|
|
return murmur3.Sum64(key)
|
|
}
|
|
|
|
// Sort receive nodes and hash, and sort it by distance
|
|
func Sort(nodes []uint64, hash uint64) []uint64 {
|
|
l := len(nodes)
|
|
sorted := make([]uint64, l)
|
|
dist := make([]uint64, l)
|
|
for i := range nodes {
|
|
sorted[i] = uint64(i)
|
|
dist[i] = distance(nodes[i], hash)
|
|
}
|
|
|
|
sort.Slice(sorted, func(i, j int) bool {
|
|
return dist[sorted[i]] < dist[sorted[j]]
|
|
})
|
|
return sorted
|
|
}
|
|
|
|
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight
|
|
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
|
|
result := make([]uint64, len(nodes))
|
|
copy(nodes, result)
|
|
sortByWeight(len(nodes), false, nodes, weights, hash, reflect.Swapper(result))
|
|
return result
|
|
}
|
|
|
|
// SortSliceByValue received []T and hash to sort by value-distance
|
|
func SortSliceByValue(slice interface{}, hash uint64) {
|
|
rule := prepareRule(slice)
|
|
if rule != nil {
|
|
swap := reflect.Swapper(slice)
|
|
sortByDistance(len(rule), false, rule, hash, swap)
|
|
}
|
|
}
|
|
|
|
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
|
|
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
|
|
rule := prepareRule(slice)
|
|
if rule != nil {
|
|
swap := reflect.Swapper(slice)
|
|
sortByWeight(reflect.ValueOf(slice).Len(), false, rule, weights, hash, swap)
|
|
}
|
|
}
|
|
|
|
// SortSliceByIndex received []T and hash to sort by index-distance
|
|
func SortSliceByIndex(slice interface{}, hash uint64) {
|
|
length := reflect.ValueOf(slice).Len()
|
|
swap := reflect.Swapper(slice)
|
|
sortByDistance(length, true, nil, hash, swap)
|
|
}
|
|
|
|
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
|
|
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
|
|
length := reflect.ValueOf(slice).Len()
|
|
swap := reflect.Swapper(slice)
|
|
sortByWeight(length, true, nil, weights, hash, swap)
|
|
}
|
|
|
|
func prepareRule(slice interface{}) []uint64 {
|
|
t := reflect.TypeOf(slice)
|
|
if t.Kind() != reflect.Slice {
|
|
return nil
|
|
}
|
|
|
|
var (
|
|
val = reflect.ValueOf(slice)
|
|
length = val.Len()
|
|
rule = make([]uint64, 0, length)
|
|
)
|
|
|
|
if length == 0 {
|
|
return nil
|
|
}
|
|
|
|
switch slice := slice.(type) {
|
|
case []int:
|
|
var key = make([]byte, 16)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint64(key, uint64(slice[i]))
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []uint:
|
|
var key = make([]byte, 16)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint64(key, uint64(slice[i]))
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []int8:
|
|
for i := 0; i < length; i++ {
|
|
key := byte(slice[i])
|
|
rule = append(rule, Hash([]byte{key}))
|
|
}
|
|
case []uint8:
|
|
for i := 0; i < length; i++ {
|
|
key := slice[i]
|
|
rule = append(rule, Hash([]byte{key}))
|
|
}
|
|
case []int16:
|
|
var key = make([]byte, 8)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint16(key, uint16(slice[i]))
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []uint16:
|
|
var key = make([]byte, 8)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint16(key, slice[i])
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []int32:
|
|
var key = make([]byte, 16)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint32(key, uint32(slice[i]))
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []uint32:
|
|
var key = make([]byte, 16)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint32(key, slice[i])
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []int64:
|
|
var key = make([]byte, 32)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint64(key, uint64(slice[i]))
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []uint64:
|
|
var key = make([]byte, 32)
|
|
for i := 0; i < length; i++ {
|
|
binary.BigEndian.PutUint64(key, slice[i])
|
|
rule = append(rule, Hash(key))
|
|
}
|
|
case []string:
|
|
for i := 0; i < length; i++ {
|
|
rule = append(rule, Hash([]byte(slice[i])))
|
|
}
|
|
|
|
default:
|
|
if _, ok := val.Index(0).Interface().(Hasher); !ok {
|
|
return nil
|
|
}
|
|
|
|
for i := 0; i < length; i++ {
|
|
h := val.Index(i).Interface().(Hasher)
|
|
rule = append(rule, h.Hash())
|
|
}
|
|
}
|
|
return rule
|
|
}
|
|
|
|
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
|
|
func ValidateWeights(weights []float64) error {
|
|
for i := range weights {
|
|
if math.IsNaN(weights[i]) || weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
|
|
return errors.New("weights are not normalized")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func newSorter(l int, byIndex bool, nodes []uint64, h uint64,
|
|
swap func(i, j int)) (*sorter, []int, []uint64) {
|
|
ind := make([]int, l)
|
|
dist := make([]uint64, l)
|
|
for i := 0; i < l; i++ {
|
|
ind[i] = i
|
|
dist[i] = getDistance(byIndex, i, nodes, h)
|
|
}
|
|
|
|
return &sorter{
|
|
l: l,
|
|
swap: func(i, j int) {
|
|
swap(i, j)
|
|
ind[i], ind[j] = ind[j], ind[i]
|
|
},
|
|
}, ind, dist
|
|
}
|
|
|
|
// sortByWeight sorts nodes by weight using provided swapper.
|
|
// nodes contains hrw hashes. If it is nil, indices are used.
|
|
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
|
|
// if all nodes have the same distance then sort uniformly
|
|
if allSameF64(weights) {
|
|
sortByDistance(l, byIndex, nodes, hash, swap)
|
|
return
|
|
}
|
|
|
|
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
|
|
s.less = func(i, j int) bool {
|
|
ii, jj := ind[i], ind[j]
|
|
// `maxUint64 - distance` makes the shorter distance more valuable
|
|
// it is necessary for operation with normalized values
|
|
wi := float64(^uint64(0)-dist[ii]) * weights[ii]
|
|
wj := float64(^uint64(0)-dist[jj]) * weights[jj]
|
|
return wi > wj // higher distance must be placed lower to be first
|
|
}
|
|
sort.Sort(s)
|
|
}
|
|
|
|
// sortByDistance sorts nodes by hrw distance using provided swapper.
|
|
// nodes contains hrw hashes. If it is nil, indices are used.
|
|
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
|
|
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
|
|
s.less = func(i, j int) bool {
|
|
return dist[ind[i]] < dist[ind[j]]
|
|
}
|
|
sort.Sort(s)
|
|
}
|
|
|
|
// getDistance return distance from nodes[i] to h.
|
|
// If byIndex is true, nodes index is used.
|
|
// Else if nodes[i] != nil, distance is calculated from this value.
|
|
// Otherwise, and hash from node index is taken.
|
|
func getDistance(byIndex bool, i int, nodes []uint64, h uint64) uint64 {
|
|
if nodes != nil {
|
|
return distance(nodes[i], h)
|
|
} else if byIndex {
|
|
return distance(uint64(i), h)
|
|
} else {
|
|
buf := make([]byte, 8)
|
|
binary.LittleEndian.PutUint64(buf, uint64(i))
|
|
return distance(Hash(buf), h)
|
|
}
|
|
}
|
|
|
|
func allSameF64(fs []float64) bool {
|
|
for i := range fs {
|
|
if fs[i] != fs[0] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|