hrw/hrw.go

303 lines
7.4 KiB
Go
Raw Permalink Normal View History

2019-01-29 22:58:30 +00:00
// Package hrw implements Rendezvous hashing.
// http://en.wikipedia.org/wiki/Rendezvous_hashing.
package hrw
import (
"encoding/binary"
"errors"
"math"
2019-01-29 22:58:30 +00:00
"reflect"
"sort"
"github.com/spaolacci/murmur3"
2019-01-29 22:58:30 +00:00
)
type (
swapper func(i, j int)
// Hasher interface used by SortSliceByValue
2019-01-29 22:58:30 +00:00
Hasher interface{ Hash() uint64 }
hashed struct {
length int
sorted []uint64
distance []uint64
2019-01-29 22:58:30 +00:00
}
weighted struct {
h hashed
normal []float64 // normalized input weights
}
2019-01-29 22:58:30 +00:00
)
// Boundaries of valid normalized weights
const (
NormalizedMaxWeight = 1.0
NormalizedMinWeight = 0.0
)
func distance(x uint64, y uint64) uint64 {
2019-01-29 22:58:30 +00:00
acc := x ^ y
// here used mmh3 64 bit finalizer
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
2019-01-29 22:58:30 +00:00
acc ^= acc >> 33
acc = acc * 0xff51afd7ed558ccd
2019-01-29 22:58:30 +00:00
acc ^= acc >> 33
acc = acc * 0xc4ceb9fe1a85ec53
2019-01-29 22:58:30 +00:00
acc ^= acc >> 33
return acc
}
func (h hashed) Len() int { return h.length }
func (h hashed) Less(i, j int) bool { return h.distance[i] < h.distance[j] }
func (h hashed) Swap(i, j int) {
h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i]
h.distance[i], h.distance[j] = h.distance[j], h.distance[i]
}
func (w weighted) Len() int { return w.h.length }
func (w weighted) Less(i, j int) bool {
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
wi := float64(^uint64(0)-w.h.distance[i]) * w.normal[i]
wj := float64(^uint64(0)-w.h.distance[j]) * w.normal[j]
return wi > wj // higher distance must be placed lower to be first
}
func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) }
2019-01-29 22:58:30 +00:00
// Hash uses murmur3 hash to return uint64
func Hash(key []byte) uint64 {
return murmur3.Sum64(key)
}
// Sort receive nodes and hash, and sort it by distance
func Sort(nodes []uint64, hash uint64) []uint64 {
2019-01-29 22:58:30 +00:00
var (
l = len(nodes)
h = hashed{
length: l,
sorted: make([]uint64, 0, l),
distance: make([]uint64, 0, l),
2019-01-29 22:58:30 +00:00
}
)
for i := range nodes {
2019-01-29 22:58:30 +00:00
h.sorted = append(h.sorted, uint64(i))
h.distance = append(h.distance, distance(nodes[i], hash))
2019-01-29 22:58:30 +00:00
}
sort.Sort(h)
return h.sorted
}
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
// check if numbers of weights and nodes are equal
uniform := true
for i := range weights {
// check if all nodes have the same distance
if weights[i] != weights[0] {
uniform = false
break
}
}
l := len(nodes)
w := weighted{
h: hashed{
length: l,
sorted: make([]uint64, 0, l),
distance: make([]uint64, 0, l),
},
normal: make([]float64, l),
}
// if all nodes have the same distance then sort uniformly
if uniform || len(weights) != l {
return Sort(nodes, hash)
}
for i := range nodes {
w.h.sorted = append(w.h.sorted, uint64(i))
w.h.distance = append(w.h.distance, distance(nodes[i], hash))
}
copy(w.normal, weights)
sort.Sort(w)
return w.h.sorted
}
// SortSliceByValue received []T and hash to sort by value-distance
func SortSliceByValue(slice interface{}, hash uint64) {
rule := prepareRule(slice)
if rule != nil {
swap := reflect.Swapper(slice)
rule = Sort(rule, hash)
sortByRuleInverse(swap, uint64(len(rule)), rule)
}
}
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
rule := prepareRule(slice)
if rule != nil {
swap := reflect.Swapper(slice)
rule = SortByWeight(rule, weights, hash)
sortByRuleInverse(swap, uint64(len(rule)), rule)
}
}
// SortSliceByIndex received []T and hash to sort by index-distance
func SortSliceByIndex(slice interface{}, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len())
swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = Sort(rule, hash)
sortByRuleInverse(swap, length, rule)
}
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len())
swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = SortByWeight(rule, weights, hash)
sortByRuleInverse(swap, length, rule)
}
func sortByRuleDirect(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}
for j := rule[i]; !done[rule[j]]; j = rule[j] {
swap(int(i), int(j))
done[j] = true
}
}
}
func sortByRuleInverse(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}
for j := i; !done[rule[j]]; j = rule[j] {
swap(int(j), int(rule[j]))
done[j] = true
}
}
}
func prepareRule(slice interface{}) []uint64 {
2019-01-29 22:58:30 +00:00
t := reflect.TypeOf(slice)
if t.Kind() != reflect.Slice {
return nil
2019-01-29 22:58:30 +00:00
}
var (
val = reflect.ValueOf(slice)
length = val.Len()
rule = make([]uint64, 0, length)
)
if length == 0 {
return nil
2019-01-29 22:58:30 +00:00
}
2019-04-12 11:19:18 +00:00
switch slice := slice.(type) {
case []int:
var key = make([]byte, 16)
2019-01-29 22:58:30 +00:00
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, uint64(slice[i]))
rule = append(rule, Hash(key))
2019-01-29 22:58:30 +00:00
}
2019-04-12 11:19:18 +00:00
case []uint:
var key = make([]byte, 16)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, uint64(slice[i]))
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []int8:
for i := 0; i < length; i++ {
key := byte(slice[i])
rule = append(rule, Hash([]byte{key}))
}
2019-04-12 11:19:18 +00:00
case []uint8:
for i := 0; i < length; i++ {
key := slice[i]
rule = append(rule, Hash([]byte{key}))
}
2019-04-12 11:19:18 +00:00
case []int16:
var key = make([]byte, 8)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint16(key, uint16(slice[i]))
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []uint16:
var key = make([]byte, 8)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint16(key, slice[i])
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []int32:
2019-02-01 09:57:05 +00:00
var key = make([]byte, 16)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint32(key, uint32(slice[i]))
rule = append(rule, Hash(key))
2019-02-01 09:57:05 +00:00
}
2019-04-12 11:19:18 +00:00
case []uint32:
var key = make([]byte, 16)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint32(key, slice[i])
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []int64:
var key = make([]byte, 32)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, uint64(slice[i]))
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []uint64:
var key = make([]byte, 32)
for i := 0; i < length; i++ {
binary.BigEndian.PutUint64(key, slice[i])
rule = append(rule, Hash(key))
}
2019-04-12 11:19:18 +00:00
case []string:
2019-01-29 22:58:30 +00:00
for i := 0; i < length; i++ {
rule = append(rule, Hash([]byte(slice[i])))
2019-01-29 22:58:30 +00:00
}
2019-04-12 11:19:18 +00:00
default:
if _, ok := val.Index(0).Interface().(Hasher); !ok {
return nil
2019-04-12 11:19:18 +00:00
}
2019-01-29 22:58:30 +00:00
for i := 0; i < length; i++ {
h := val.Index(i).Interface().(Hasher)
rule = append(rule, h.Hash())
2019-01-29 22:58:30 +00:00
}
}
return rule
}
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
func ValidateWeights(weights []float64) error {
for i := range weights {
if math.IsNaN(weights[i]) || weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
return errors.New("weights are not normalized")
}
}
return nil
}