// Package hrw implements Rendezvous hashing. // http://en.wikipedia.org/wiki/Rendezvous_hashing. package hrw import ( "encoding/binary" "errors" "math" "reflect" "sort" "github.com/twmb/murmur3" ) type ( // Hasher interface used by SortSliceByValue Hasher interface{ Hash() uint64 } sorter struct { l int less func(i, j int) bool swap func(i, j int) } hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct { slice []T dist []N asc bool } ) // Boundaries of valid normalized weights const ( NormalizedMaxWeight = 1.0 NormalizedMinWeight = 0.0 ) func (s *sorter) Len() int { return s.l } func (s *sorter) Less(i, j int) bool { return s.less(i, j) } func (s *sorter) Swap(i, j int) { s.swap(i, j) } func (s *hasherSorter[T, N]) Len() int { return len(s.slice) } func (s *hasherSorter[T, N]) Less(i, j int) bool { if s.asc { return s.dist[i] < s.dist[j] } return s.dist[i] > s.dist[j] } func (s *hasherSorter[T, N]) Swap(i, j int) { s.slice[i], s.slice[j] = s.slice[j], s.slice[i] s.dist[i], s.dist[j] = s.dist[j], s.dist[i] } func distance(x uint64, y uint64) uint64 { acc := x ^ y // here used mmh3 64 bit finalizer // https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81 acc ^= acc >> 33 acc = acc * 0xff51afd7ed558ccd acc ^= acc >> 33 acc = acc * 0xc4ceb9fe1a85ec53 acc ^= acc >> 33 return acc } // Hash uses murmur3 hash to return uint64 func Hash(key []byte) uint64 { return murmur3.Sum64(key) } // StringHash uses murmur3 hash to return uint64 func StringHash(key string) uint64 { return murmur3.StringSum64(key) } // Sort receive nodes and hash, and sort it by distance func Sort(nodes []uint64, hash uint64) []uint64 { l := len(nodes) sorted := make([]uint64, l) dist := make([]uint64, l) for i := range nodes { sorted[i] = uint64(i) dist[i] = distance(nodes[i], hash) } sort.Slice(sorted, func(i, j int) bool { return dist[sorted[i]] < dist[sorted[j]] }) return sorted } // SortByWeight receive nodes, weights and hash, and sort it by distance * weight func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 { result := make([]uint64, len(nodes)) copy(nodes, result) sortByWeight(len(nodes), false, nodes, weights, hash, reflect.Swapper(result)) return result } // SortSliceByValue received []T and hash to sort by value-distance func SortSliceByValue(slice interface{}, hash uint64) { rule := prepareRule(slice) if rule != nil { swap := reflect.Swapper(slice) sortByDistance(len(rule), false, rule, hash, swap) } } // SortHasherSliceByValue receives []Hasher and hash to sort by value-distance. func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) { if len(slice) == 0 { return } dist := make([]uint64, len(slice)) for i := range dist { dist[i] = distance(slice[i].Hash(), hash) } sortHasherByDistance(slice, false, dist) } // SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) { rule := prepareRule(slice) if rule != nil { swap := reflect.Swapper(slice) sortByWeight(reflect.ValueOf(slice).Len(), false, rule, weights, hash, swap) } } // SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights. func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) { if len(slice) == 0 { return } if allSameF64(weights) { dist := make([]uint64, len(slice)) for i := range dist { dist[i] = distance(slice[i].Hash(), hash) } sortHasherByDistance(slice, false, dist) return } dist := make([]float64, len(slice)) for i := range dist { d := distance(slice[i].Hash(), hash) // `maxUint64 - distance` makes the shorter distance more valuable // it is necessary for operation with normalized values dist[i] = float64(^uint64(0)-d) * weights[i] } sort.Sort(&hasherSorter[T, float64]{ slice: slice, dist: dist, asc: false, }) } // sortHasherByDistance is similar to sortByDistance but accepts slice directly. func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) { sort.Sort(&hasherSorter[T, uint64]{ slice: slice, dist: dist, asc: true, }) } // SortSliceByIndex received []T and hash to sort by index-distance func SortSliceByIndex(slice interface{}, hash uint64) { length := reflect.ValueOf(slice).Len() swap := reflect.Swapper(slice) sortByDistance(length, true, nil, hash, swap) } // SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) { length := reflect.ValueOf(slice).Len() swap := reflect.Swapper(slice) sortByWeight(length, true, nil, weights, hash, swap) } func prepareRule(slice interface{}) []uint64 { t := reflect.TypeOf(slice) if t.Kind() != reflect.Slice { panic("HRW sort expects slice, got " + t.Kind().String()) } var ( val = reflect.ValueOf(slice) length = val.Len() rule = make([]uint64, 0, length) ) if length == 0 { return nil } switch slice := slice.(type) { case []int: var key = make([]byte, 16) for i := 0; i < length; i++ { binary.BigEndian.PutUint64(key, uint64(slice[i])) rule = append(rule, Hash(key)) } case []uint: var key = make([]byte, 16) for i := 0; i < length; i++ { binary.BigEndian.PutUint64(key, uint64(slice[i])) rule = append(rule, Hash(key)) } case []int8: for i := 0; i < length; i++ { key := byte(slice[i]) rule = append(rule, Hash([]byte{key})) } case []uint8: for i := 0; i < length; i++ { key := slice[i] rule = append(rule, Hash([]byte{key})) } case []int16: var key = make([]byte, 8) for i := 0; i < length; i++ { binary.BigEndian.PutUint16(key, uint16(slice[i])) rule = append(rule, Hash(key)) } case []uint16: var key = make([]byte, 8) for i := 0; i < length; i++ { binary.BigEndian.PutUint16(key, slice[i]) rule = append(rule, Hash(key)) } case []int32: var key = make([]byte, 16) for i := 0; i < length; i++ { binary.BigEndian.PutUint32(key, uint32(slice[i])) rule = append(rule, Hash(key)) } case []uint32: var key = make([]byte, 16) for i := 0; i < length; i++ { binary.BigEndian.PutUint32(key, slice[i]) rule = append(rule, Hash(key)) } case []int64: var key = make([]byte, 32) for i := 0; i < length; i++ { binary.BigEndian.PutUint64(key, uint64(slice[i])) rule = append(rule, Hash(key)) } case []uint64: var key = make([]byte, 32) for i := 0; i < length; i++ { binary.BigEndian.PutUint64(key, slice[i]) rule = append(rule, Hash(key)) } case []string: for i := 0; i < length; i++ { rule = append(rule, Hash([]byte(slice[i]))) } default: if _, ok := val.Index(0).Interface().(Hasher); !ok { panic("slice elements must implement hrw.Hasher") } for i := 0; i < length; i++ { h := val.Index(i).Interface().(Hasher) rule = append(rule, h.Hash()) } } return rule } // ValidateWeights checks if weights are normalized between 0.0 and 1.0 func ValidateWeights(weights []float64) error { for i := range weights { if math.IsNaN(weights[i]) || weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight { return errors.New("weights are not normalized") } } return nil } // sortByWeight sorts nodes by weight using provided swapper. // nodes contains hrw hashes. If it is nil, indices are used. func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) { // if all nodes have the same distance then sort uniformly if allSameF64(weights) { sortByDistance(l, byIndex, nodes, hash, swap) return } dist := make([]float64, l) for i := 0; i < l; i++ { d := getDistance(byIndex, i, nodes, hash) // `maxUint64 - distance` makes the shorter distance more valuable // it is necessary for operation with normalized values dist[i] = float64(^uint64(0)-d) * weights[i] } s := &sorter{ l: l, swap: func(i, j int) { swap(i, j) dist[i], dist[j] = dist[j], dist[i] }, less: func(i, j int) bool { return dist[i] > dist[j] // higher distance must be placed lower to be first }, } sort.Sort(s) } // sortByDistance sorts nodes by hrw distance using provided swapper. // nodes contains hrw hashes. If it is nil, indices are used. func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) { dist := make([]uint64, l) for i := 0; i < l; i++ { dist[i] = getDistance(byIndex, i, nodes, hash) } s := &sorter{ l: l, swap: func(i, j int) { swap(i, j) dist[i], dist[j] = dist[j], dist[i] }, less: func(i, j int) bool { return dist[i] < dist[j] }, } sort.Sort(s) } // getDistance return distance from nodes[i] to h. // If byIndex is true, nodes index is used. // Else if nodes[i] != nil, distance is calculated from this value. // Otherwise, and hash from node index is taken. func getDistance(byIndex bool, i int, nodes []uint64, h uint64) uint64 { if nodes != nil { return distance(nodes[i], h) } else if byIndex { return distance(uint64(i), h) } else { buf := make([]byte, 8) binary.LittleEndian.PutUint64(buf, uint64(i)) return distance(Hash(buf), h) } } func allSameF64(fs []float64) bool { for i := range fs { if fs[i] != fs[0] { return false } } return true }