[#8] hrw: Do not allocate 2 slices for sort

Currently we allocate `rule` and then create `dist` which depends on it.
In this commit we create `dist` directly.

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      3      │                 4                  │
                                         │   sec/op    │   sec/op     vs base               │
SortHashersByValue_Typed_fnv_10-8          336.3n ± 3%   309.2n ± 2%  -8.06% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.424µ ± 3%   2.306µ ± 1%  -4.87% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        22.35µ ± 1%   21.73µ ± 1%  -2.75% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     346.6n ± 3%   347.1n ± 1%       ~ (p=0.631 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.637µ ± 6%   2.668µ ± 1%       ~ (p=0.481 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   2.609µ ± 4%   2.673µ ± 1%  +2.43% (p=0.000 n=10)
geomean                                    1.875µ        1.836µ       -2.06%

                                         │       3       │                  4                   │
                                         │     B/op      │     B/op      vs base                │
SortHashersByValue_Typed_fnv_10-8             296.0 ± 0%     216.0 ± 0%  -27.03% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8          1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        16.133Ki ± 0%   8.133Ki ± 0%  -49.59% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8        296.0 ± 0%     216.0 ± 0%  -27.03% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8     1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8    1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
geomean                                     1.442Ki          867.8       -41.24%

                                         │     3      │                 4                  │
                                         │ allocs/op  │ allocs/op   vs base                │
SortHashersByValue_Typed_fnv_10-8          5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
geomean                                    5.000        4.000       -20.00%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
This commit is contained in:
Evgenii Stratonikov 2023-06-01 20:33:15 +03:00
parent 895ecf150f
commit c52f74d8e1

75
hrw.go
View file

@ -85,10 +85,15 @@ func SortSliceByValue(slice interface{}, hash uint64) {
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance. // SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) { func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
rule := prepareHasherRule(slice) if len(slice) == 0 {
if rule != nil { return
sortHasherByDistance(slice, false, rule, hash)
} }
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
}
sortHasherByDistance(slice, false, dist)
} }
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights // SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
@ -102,49 +107,28 @@ func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights. // SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) { func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
rule := prepareHasherRule(slice) if len(slice) == 0 {
if rule != nil { return
sortHasherByWeight(slice, false, rule, weights, hash)
} }
}
// sortHasherByDistance is similar to sortByDistance but accepts slice directly. if allSameF64(weights) {
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, nodes []uint64, hash uint64) {
dist := make([]uint64, len(slice)) dist := make([]uint64, len(slice))
for i := range dist { for i := range dist {
dist[i] = getDistance(byIndex, i, nodes, hash) dist[i] = distance(slice[i].Hash(), hash)
} }
sortHasherByDistance(slice, false, dist)
s := &sorter{
l: len(slice),
swap: func(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] < dist[j]
},
}
sort.Sort(s)
}
// sortHasherByWeight is similar to sortByWeight but accepts slice directly.
func sortHasherByWeight[T Hasher](slice []T, byIndex bool, nodes []uint64, weights []float64, hash uint64) {
// if all nodes have the same distance then sort uniformly
if allSameF64(weights) {
sortHasherByDistance(slice, byIndex, nodes, hash)
return return
} }
dist := make([]float64, len(slice)) dist := make([]float64, len(slice))
for i := range dist { for i := range dist {
d := getDistance(byIndex, i, nodes, hash) d := distance(slice[i].Hash(), hash)
// `maxUint64 - distance` makes the shorter distance more valuable // `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values // it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i] dist[i] = float64(^uint64(0)-d) * weights[i]
} }
s := &sorter{ sort.Sort(&sorter{
l: len(slice), l: len(slice),
swap: func(i, j int) { swap: func(i, j int) {
slice[i], slice[j] = slice[j], slice[i] slice[i], slice[j] = slice[j], slice[i]
@ -153,8 +137,21 @@ func sortHasherByWeight[T Hasher](slice []T, byIndex bool, nodes []uint64, weigh
less: func(i, j int) bool { less: func(i, j int) bool {
return dist[i] > dist[j] // higher distance must be placed lower to be first return dist[i] > dist[j] // higher distance must be placed lower to be first
}, },
} })
sort.Sort(s) }
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
sort.Sort(&sorter{
l: len(slice),
swap: func(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] < dist[j]
},
})
} }
// SortSliceByIndex received []T and hash to sort by index-distance // SortSliceByIndex received []T and hash to sort by index-distance
@ -264,18 +261,6 @@ func prepareRule(slice interface{}) []uint64 {
return rule return rule
} }
func prepareHasherRule[T Hasher](hashers []T) []uint64 {
length := len(hashers)
if length == 0 {
return nil
}
result := make([]uint64, length)
for i := 0; i < length; i++ {
result[i] = hashers[i].Hash()
}
return result
}
// ValidateWeights checks if weights are normalized between 0.0 and 1.0 // ValidateWeights checks if weights are normalized between 0.0 and 1.0
func ValidateWeights(weights []float64) error { func ValidateWeights(weights []float64) error {
for i := range weights { for i := range weights {