From c52f74d8e10cb8c54021a4d98d5b11071c3fda84 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 1 Jun 2023 20:33:15 +0300 Subject: [PATCH] [#8] hrw: Do not allocate 2 slices for sort MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we allocate `rule` and then create `dist` which depends on it. In this commit we create `dist` directly. ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/hrw cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ 3 │ 4 │ │ sec/op │ sec/op vs base │ SortHashersByValue_Typed_fnv_10-8 336.3n ± 3% 309.2n ± 2% -8.06% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 2.424µ ± 3% 2.306µ ± 1% -4.87% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 22.35µ ± 1% 21.73µ ± 1% -2.75% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 346.6n ± 3% 347.1n ± 1% ~ (p=0.631 n=10) SortHashersByWeightValueTyped_fnv_100-8 2.637µ ± 6% 2.668µ ± 1% ~ (p=0.481 n=10) SortHashersByWeightValueTyped_fnv_1000-8 2.609µ ± 4% 2.673µ ± 1% +2.43% (p=0.000 n=10) geomean 1.875µ 1.836µ -2.06% │ 3 │ 4 │ │ B/op │ B/op vs base │ SortHashersByValue_Typed_fnv_10-8 296.0 ± 0% 216.0 ± 0% -27.03% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 1.883Ki ± 0% 1.008Ki ± 0% -46.47% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 16.133Ki ± 0% 8.133Ki ± 0% -49.59% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 296.0 ± 0% 216.0 ± 0% -27.03% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 1.883Ki ± 0% 1.008Ki ± 0% -46.47% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 1.883Ki ± 0% 1.008Ki ± 0% -46.47% (p=0.000 n=10) geomean 1.442Ki 867.8 -41.24% │ 3 │ 4 │ │ allocs/op │ allocs/op vs base │ SortHashersByValue_Typed_fnv_10-8 5.000 ± 0% 4.000 ± 0% -20.00% (p=0.000 n=10) SortHashersByValue_Typed_fnv_100-8 5.000 ± 0% 4.000 ± 0% -20.00% (p=0.000 n=10) SortHashersByValue_Typed_fnv_1000-8 5.000 ± 0% 4.000 ± 0% -20.00% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_10-8 5.000 ± 0% 4.000 ± 0% -20.00% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_100-8 5.000 ± 0% 4.000 ± 0% -20.00% (p=0.000 n=10) SortHashersByWeightValueTyped_fnv_1000-8 5.000 ± 0% 4.000 ± 0% -20.00% (p=0.000 n=10) geomean 5.000 4.000 -20.00% ``` Signed-off-by: Evgenii Stratonikov --- hrw.go | 79 ++++++++++++++++++++++++---------------------------------- 1 file changed, 32 insertions(+), 47 deletions(-) diff --git a/hrw.go b/hrw.go index 2effa3a..c874917 100644 --- a/hrw.go +++ b/hrw.go @@ -85,10 +85,15 @@ func SortSliceByValue(slice interface{}, hash uint64) { // SortHasherSliceByValue receives []Hasher and hash to sort by value-distance. func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) { - rule := prepareHasherRule(slice) - if rule != nil { - sortHasherByDistance(slice, false, rule, hash) + if len(slice) == 0 { + return } + + dist := make([]uint64, len(slice)) + for i := range dist { + dist[i] = distance(slice[i].Hash(), hash) + } + sortHasherByDistance(slice, false, dist) } // SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights @@ -102,49 +107,28 @@ func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) { // SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights. func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) { - rule := prepareHasherRule(slice) - if rule != nil { - sortHasherByWeight(slice, false, rule, weights, hash) - } -} - -// sortHasherByDistance is similar to sortByDistance but accepts slice directly. -func sortHasherByDistance[T Hasher](slice []T, byIndex bool, nodes []uint64, hash uint64) { - dist := make([]uint64, len(slice)) - for i := range dist { - dist[i] = getDistance(byIndex, i, nodes, hash) + if len(slice) == 0 { + return } - s := &sorter{ - l: len(slice), - swap: func(i, j int) { - slice[i], slice[j] = slice[j], slice[i] - dist[i], dist[j] = dist[j], dist[i] - }, - less: func(i, j int) bool { - return dist[i] < dist[j] - }, - } - sort.Sort(s) -} - -// sortHasherByWeight is similar to sortByWeight but accepts slice directly. -func sortHasherByWeight[T Hasher](slice []T, byIndex bool, nodes []uint64, weights []float64, hash uint64) { - // if all nodes have the same distance then sort uniformly if allSameF64(weights) { - sortHasherByDistance(slice, byIndex, nodes, hash) + dist := make([]uint64, len(slice)) + for i := range dist { + dist[i] = distance(slice[i].Hash(), hash) + } + sortHasherByDistance(slice, false, dist) return } dist := make([]float64, len(slice)) for i := range dist { - d := getDistance(byIndex, i, nodes, hash) + d := distance(slice[i].Hash(), hash) // `maxUint64 - distance` makes the shorter distance more valuable // it is necessary for operation with normalized values dist[i] = float64(^uint64(0)-d) * weights[i] } - s := &sorter{ + sort.Sort(&sorter{ l: len(slice), swap: func(i, j int) { slice[i], slice[j] = slice[j], slice[i] @@ -153,8 +137,21 @@ func sortHasherByWeight[T Hasher](slice []T, byIndex bool, nodes []uint64, weigh less: func(i, j int) bool { return dist[i] > dist[j] // higher distance must be placed lower to be first }, - } - sort.Sort(s) + }) +} + +// sortHasherByDistance is similar to sortByDistance but accepts slice directly. +func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) { + sort.Sort(&sorter{ + l: len(slice), + swap: func(i, j int) { + slice[i], slice[j] = slice[j], slice[i] + dist[i], dist[j] = dist[j], dist[i] + }, + less: func(i, j int) bool { + return dist[i] < dist[j] + }, + }) } // SortSliceByIndex received []T and hash to sort by index-distance @@ -264,18 +261,6 @@ func prepareRule(slice interface{}) []uint64 { return rule } -func prepareHasherRule[T Hasher](hashers []T) []uint64 { - length := len(hashers) - if length == 0 { - return nil - } - result := make([]uint64, length) - for i := 0; i < length; i++ { - result[i] = hashers[i].Hash() - } - return result -} - // ValidateWeights checks if weights are normalized between 0.0 and 1.0 func ValidateWeights(weights []float64) error { for i := range weights {