Compare commits

..

26 commits

Author SHA1 Message Date
3a8489bfe7 [#13] Restore deleted copyright notice
Signed-off-by: Vitaliy Potyarkin <v.potyarkin@yadro.com>
2024-11-08 15:33:45 +03:00
1b7ec474c9 [#9] *: Remove nspcc mentions
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-08-29 13:53:15 +03:00
78c3f718b1 [#9] doc: Remove nspcc mentions from README
Signed-off-by: Airat Arifullin a.arifullin@yadro.com
2023-06-13 10:55:55 +03:00
16a7740ccd [#8] hrw: Introduce StringHash() for hashing strings directly
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      6      │                  7                   │
                                         │   sec/op    │    sec/op     vs base                │
SortHashersByValue_Typed_fnv_10-8          248.8n ± 1%   166.9n ±  9%  -32.93% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.195µ ± 6%   1.297µ ±  6%  -40.93% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        22.47µ ± 4%   12.42µ ± 10%  -44.73% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     301.7n ± 6%   180.8n ±  4%  -40.09% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.526µ ± 1%   1.378µ ±  5%  -45.47% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   24.37µ ± 2%   12.80µ ±  4%  -47.48% (p=0.000 n=10)
geomean                                    2.472µ        1.430µ        -42.13%

                                         │      6       │                   7                   │
                                         │     B/op     │     B/op      vs base                 │
SortHashersByValue_Typed_fnv_10-8            144.0 ± 0%     144.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_100-8           960.0 ± 0%     960.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_1000-8        8.062Ki ± 0%   8.062Ki ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_10-8       144.0 ± 0%     144.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_100-8      960.0 ± 0%     960.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_1000-8   8.062Ki ± 0%   8.062Ki ± 0%       ~ (p=1.000 n=10) ¹
geomean                                    1.021Ki        1.021Ki       +0.00%
¹ all samples are equal

                                         │     6      │                  7                  │
                                         │ allocs/op  │ allocs/op   vs base                 │
SortHashersByValue_Typed_fnv_10-8          2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_100-8         2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_1000-8        2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_10-8     2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_100-8    2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_1000-8   2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                                    2.000        2.000       +0.00%
¹ all samples are equal
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:56:24 +03:00
2ac89c82b6 [#8] hrw/test: Fix typo in benchmarks
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:56:24 +03:00
266da7c69a [#8] hrw: Do not allocate for swap()/less() helpers
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      4      │                  5                  │
                                         │   sec/op    │   sec/op     vs base                │
SortHashersByValue_Typed_fnv_10-8          309.2n ± 2%   294.4n ± 1%   -4.75% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.306µ ± 1%   2.549µ ± 1%  +10.54% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        21.73µ ± 1%   24.80µ ± 3%  +14.14% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     347.1n ± 1%   334.8n ± 2%   -3.56% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.668µ ± 1%   2.954µ ± 3%  +10.72% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   2.673µ ± 1%   2.957µ ± 4%  +10.63% (p=0.000 n=10)
geomean                                    1.836µ        1.947µ        +6.01%

                                         │      4       │                  5                   │
                                         │     B/op     │     B/op      vs base                │
SortHashersByValue_Typed_fnv_10-8            216.0 ± 0%     144.0 ± 0%  -33.33% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8          1032.0 ± 0%     960.0 ± 0%   -6.98% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        8.133Ki ± 0%   8.062Ki ± 0%   -0.86% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8       216.0 ± 0%     144.0 ± 0%  -33.33% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8     1032.0 ± 0%     960.0 ± 0%   -6.98% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8    1032.0 ± 0%     960.0 ± 0%   -6.98% (p=0.000 n=10)
geomean                                      867.8          730.1       -15.87%

                                         │     4      │                 5                  │
                                         │ allocs/op  │ allocs/op   vs base                │
SortHashersByValue_Typed_fnv_10-8          4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
geomean                                    4.000        2.000       -50.00%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:56:23 +03:00
c52f74d8e1 [#8] hrw: Do not allocate 2 slices for sort
Currently we allocate `rule` and then create `dist` which depends on it.
In this commit we create `dist` directly.

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      3      │                 4                  │
                                         │   sec/op    │   sec/op     vs base               │
SortHashersByValue_Typed_fnv_10-8          336.3n ± 3%   309.2n ± 2%  -8.06% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.424µ ± 3%   2.306µ ± 1%  -4.87% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        22.35µ ± 1%   21.73µ ± 1%  -2.75% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     346.6n ± 3%   347.1n ± 1%       ~ (p=0.631 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.637µ ± 6%   2.668µ ± 1%       ~ (p=0.481 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   2.609µ ± 4%   2.673µ ± 1%  +2.43% (p=0.000 n=10)
geomean                                    1.875µ        1.836µ       -2.06%

                                         │       3       │                  4                   │
                                         │     B/op      │     B/op      vs base                │
SortHashersByValue_Typed_fnv_10-8             296.0 ± 0%     216.0 ± 0%  -27.03% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8          1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        16.133Ki ± 0%   8.133Ki ± 0%  -49.59% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8        296.0 ± 0%     216.0 ± 0%  -27.03% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8     1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8    1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
geomean                                     1.442Ki          867.8       -41.24%

                                         │     3      │                 4                  │
                                         │ allocs/op  │ allocs/op   vs base                │
SortHashersByValue_Typed_fnv_10-8          5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
geomean                                    5.000        4.000       -20.00%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:53:44 +03:00
895ecf150f [#8] hrw: Inline swap() when slice is known
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      2      │                 3                  │
                                         │   sec/op    │   sec/op     vs base               │
SortHashersByValue_Typed_fnv_10-8          368.5n ± 2%   336.3n ± 3%  -8.75% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.411µ ± 4%   2.424µ ± 3%       ~ (p=0.853 n=10)
SortHashersByValue_Typed_fnv_1000-8        22.19µ ± 2%   22.35µ ± 1%       ~ (p=0.247 n=10)
SortHashersByWeightValueTyped_fnv_10-8     364.3n ± 2%   346.6n ± 3%  -4.86% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.541µ ± 3%   2.637µ ± 6%       ~ (p=0.055 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   2.483µ ± 1%   2.609µ ± 4%  +5.07% (p=0.003 n=10)
geomean                                    1.888µ        1.875µ       -0.71%

                                         │      2       │                  3                  │
                                         │     B/op     │     B/op      vs base               │
SortHashersByValue_Typed_fnv_10-8            312.0 ± 0%     296.0 ± 0%  -5.13% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         1.898Ki ± 0%   1.883Ki ± 0%  -0.82% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        16.15Ki ± 0%   16.13Ki ± 0%  -0.10% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8       312.0 ± 0%     296.0 ± 0%  -5.13% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    1.898Ki ± 0%   1.883Ki ± 0%  -0.82% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   1.898Ki ± 0%   1.883Ki ± 0%  -0.82% (p=0.000 n=10)
geomean                                    1.474Ki        1.442Ki       -2.16%

                                         │     2      │                 3                  │
                                         │ allocs/op  │ allocs/op   vs base                │
SortHashersByValue_Typed_fnv_10-8          6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
geomean                                    6.000        5.000       -16.67%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 21:41:16 +03:00
213c105ac1 [#8] go.mod: Use faster murmur3 lib
Specifically, this line became possible, because of noescape annotations
for assembly.
```
./hrw.go:307:14: make([]byte, 8) does not escape
```

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                              │      1       │                   2                   │
                                              │    sec/op    │   sec/op     vs base                  │
SortHashersByValue_Typed_fnv_10-8               580.1n ±  1%   368.5n ± 2%  -36.47% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              4.215µ ±  2%   2.411µ ± 4%  -42.79% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             39.40µ ±  1%   22.19µ ± 2%  -43.68% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8          599.6n ±  2%   364.3n ± 2%  -39.25% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         4.337µ ±  5%   2.541µ ± 3%  -41.41% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        4.344µ ±  3%   2.483µ ± 1%  -42.84% (p=0.000 n=10)
geomean                                         4.400µ         1.888µ       -41.13%                ¹
¹ benchmark set differs from baseline; geomeans may not be comparable

                                              │      1       │                   2                    │
                                              │     B/op     │     B/op      vs base                  │
SortHashersByValue_Typed_fnv_10-8                 472.0 ± 0%     312.0 ± 0%  -33.90% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              3.461Ki ± 0%   1.898Ki ± 0%  -45.15% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             31.77Ki ± 0%   16.15Ki ± 0%  -49.18% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8            472.0 ± 0%     312.0 ± 0%  -33.90% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         3.461Ki ± 0%   1.898Ki ± 0%  -45.15% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        3.461Ki ± 0%   1.898Ki ± 0%  -45.15% (p=0.000 n=10)
geomean                                         3.070Ki        1.474Ki       -42.37%                ¹
¹ benchmark set differs from baseline; geomeans may not be comparable

                                              │       1       │                  2                   │
                                              │   allocs/op   │ allocs/op   vs base                  │
SortHashersByValue_Typed_fnv_10-8                 16.000 ± 0%   6.000 ± 0%  -62.50% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8               106.000 ± 0%   6.000 ± 0%  -94.34% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             1006.000 ± 0%   6.000 ± 0%  -99.40% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8            16.000 ± 0%   6.000 ± 0%  -62.50% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8          106.000 ± 0%   6.000 ± 0%  -94.34% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8         106.000 ± 0%   6.000 ± 0%  -94.34% (p=0.000 n=10)
geomean                                            113.0        6.000       -92.69%                ¹
¹ benchmark set differs from baseline; geomeans may not be comparable
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 21:41:00 +03:00
c175ef4099 [#8] hrw: Do not create index slice for sorter
`ind` is only needed to index dist or weights, swap them directly.

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                              │      0      │                  1                   │
                                              │   sec/op    │    sec/op     vs base                │
SortHashersByValue_Typed_fnv_10-8               596.2n ± 4%   580.1n ±  1%   -2.72% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              4.453µ ± 2%   4.215µ ±  2%   -5.35% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             41.58µ ± 4%   39.40µ ±  1%   -5.23% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8          624.5n ± 2%   599.6n ±  2%   -3.99% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         4.593µ ± 2%   4.337µ ±  5%   -5.56% (p=0.003 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        4.896µ ± 8%   4.344µ ±  3%  -11.27% (p=0.000 n=10)
geomean                                         4.668µ        4.400µ         -5.75%

                                              │      0       │                  1                   │
                                              │     B/op     │     B/op      vs base                │
SortHashersByValue_Typed_fnv_10-8                 584.0 ± 0%     472.0 ± 0%  -19.18% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              4.367Ki ± 0%   3.461Ki ± 0%  -20.75% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             39.80Ki ± 0%   31.77Ki ± 0%  -20.18% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8            600.0 ± 0%     472.0 ± 0%  -21.33% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         4.383Ki ± 0%   3.461Ki ± 0%  -21.03% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        4.383Ki ± 0%   3.461Ki ± 0%  -21.03% (p=0.000 n=10)
geomean                                         3.742Ki        3.070Ki       -17.96%

                                              │      0      │                 1                  │
                                              │  allocs/op  │  allocs/op   vs base               │
SortHashersByValue_Typed_fnv_10-8                17.00 ± 0%    16.00 ± 0%  -5.88% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8               107.0 ± 0%    106.0 ± 0%  -0.93% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             1.007k ± 0%   1.006k ± 0%  -0.10% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8           17.00 ± 0%    16.00 ± 0%  -5.88% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8          107.0 ± 0%    106.0 ± 0%  -0.93% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8         107.0 ± 0%    106.0 ± 0%  -0.93% (p=0.000 n=10)
geomean                                          115.3         113.0       -1.94%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 21:40:46 +03:00
2c085708de [#6] .github: Remove CODEOWNERS
Not supported by Gitea/Forgejo.
See https://github.com/go-gitea/gitea/issues/10161 .

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-03-09 22:50:15 +03:00
15b3800347 [#6] Remove .travis.yml
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-03-09 22:50:15 +03:00
2e205cf1ca [#6] pre-commit: Add gitlint hook
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-03-09 22:50:15 +03:00
ebca2848ad [#6] pre-commit: Add golangci-lint hook
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-03-09 22:50:15 +03:00
0ad932400c [#6] pre-commit: Add initial configuration
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-03-09 22:50:14 +03:00
08e14caaf3 Rename package name
Due to source code relocation from GitHub.

Signed-off-by: Alex Vanin <a.vanin@yadro.com>
2023-03-07 14:00:00 +03:00
7e33833933 [#4] .github: Add CODEOWNERS
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-02-27 17:25:20 +03:00
79b208bebf [#3] hrw: Update README
Update benchmark results

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2023-02-27 14:18:58 +03:00
5671632658 [#2] hrw: Add typed methods for hashers
Add generic methods for sort-methods used in node

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2023-02-27 14:18:58 +03:00
9e9fc653e5 [#2] hrw: Go version update
Go version updated 1.14 -> 1.18

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2023-02-27 14:18:58 +03:00
997b540432 Move from nspcc-dev to TrueCloudLab
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2022-12-12 18:02:07 +03:00
22b833d972 Panic if provided value is not a slice
Signed-off-by: Evgenii Stratonikov <stratonikov@runbox.com>
2021-12-28 14:37:17 +03:00
fad35bbd3b Specify go.mod version 2020-09-11 18:30:18 +03:00
dddcfc8fc5 Simplify SortByValue/Weight a bit
Get rid of unneeded types.
2020-09-11 18:30:18 +03:00
Alex Vanin
f52ea8fb21
Handle NaN values in weight validation function (#7)
NaN values can pass the validation since NaN values don't work
with float arithmetic and comparisons are always true. Now validation
function has check for NaN values.
2019-08-01 12:16:03 +03:00
Alex Vanin
aa230933d1
Move normalization routine out of hrw library (#6)
HRW library supports weighted sorting. Weights must be normalized
before applying. Since there could be different types of normalization
for multiple criteria, there is no point to perform simple
normalization in this library. Pass a slice of normalized weights
to the `SortByWeight` functions.

This commit proposes to:
- remove normalization routine from `SortByWeight` function;
- add `ValidateWeights` function to check if weights are normalized;
- rename `weight` -> `distance` to avoid naming confusion between
  hash distance and actual weights;
- use testify lib in the tests;
2019-07-05 09:49:24 +03:00
10 changed files with 406 additions and 211 deletions

10
.gitlint Normal file
View file

@ -0,0 +1,10 @@
[general]
fail-without-commits=true
contrib=CC1
[title-match-regex]
regex=^\[\#[0-9]+\]\s
[ignore-by-title]
regex=^Release(.*)
ignore=title-match-regex

30
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,30 @@
ci:
autofix_prs: false
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-shebang-scripts-are-executable
- id: check-merge-conflict
- id: check-json
- id: check-xml
- id: check-yaml
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: end-of-file-fixer
exclude: ".key$"
- repo: https://github.com/golangci/golangci-lint
rev: v1.51.2
hooks:
- id: golangci-lint
- repo: https://github.com/jorisroovers/gitlint
rev: v0.18.0
hooks:
- id: gitlint
stages: [commit-msg]

View file

@ -1,17 +0,0 @@
language: go
go:
- 1.11.x
- 1.12.x
env:
- GO111MODULE=on
install:
- go get -v golang.org/x/lint/golint
- go mod tidy -v
script:
- golint -set_exit_status ./...
- go test -race -coverprofile=coverage.txt -covermode=atomic ./...
after_success:
- bash <(curl -s https://codecov.io/bash)
matrix:
allow_failures:
- go: tip

View file

@ -1,6 +1,7 @@
MIT License MIT License
Copyright (c) 2019 NSPCC Copyright (c) 2023-2024 TrueCloudLab
Copyright (c) 2019-2023 NSPCC
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View file

@ -1,38 +1,45 @@
# Golang HRW implementation # Golang HRW implementation
[![Build Status](https://travis-ci.org/nspcc-dev/hrw.svg?branch=master)](https://travis-ci.org/nspcc-dev/hrw)
[![codecov](https://codecov.io/gh/nspcc-dev/hrw/badge.svg)](https://codecov.io/gh/nspcc-dev/hrw)
[![Report](https://goreportcard.com/badge/github.com/nspcc-dev/hrw)](https://goreportcard.com/report/github.com/nspcc-dev/hrw)
[![GitHub release](https://img.shields.io/github/release/nspcc-dev/hrw.svg)](https://github.com/nspcc-dev/hrw)
[Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method. [Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method.
## Install ## Install
`go get github.com/nspcc-dev/hrw` `go get git.frostfs.info/TrueCloudLab/hrw`
## Benchmark: ## Benchmark:
``` ```
BenchmarkSort_fnv_10-8 5000000 365 ns/op 224 B/op 3 allocs/op BenchmarkSort_fnv_10-8 4812801 240.9 ns/op 216 B/op 4 allocs/op
BenchmarkSort_fnv_100-8 300000 5261 ns/op 1856 B/op 3 allocs/op BenchmarkSort_fnv_100-8 434767 2600 ns/op 1848 B/op 4 allocs/op
BenchmarkSort_fnv_1000-8 10000 119462 ns/op 16448 B/op 3 allocs/op BenchmarkSort_fnv_1000-8 20428 66116 ns/op 16440 B/op 4 allocs/op
BenchmarkSortByIndex_fnv_10-8 3000000 546 ns/op 384 B/op 7 allocs/op BenchmarkSortByIndex_fnv_10-8 2505410 486.5 ns/op 352 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_100-8 200000 5965 ns/op 2928 B/op 7 allocs/op BenchmarkSortByIndex_fnv_100-8 254556 4697 ns/op 1984 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_1000-8 10000 127732 ns/op 25728 B/op 7 allocs/op BenchmarkSortByIndex_fnv_1000-8 13581 88334 ns/op 16576 B/op 7 allocs/op
BenchmarkSortByValue_fnv_10-8 2000000 962 ns/op 544 B/op 17 allocs/op BenchmarkSortByValue_fnv_10-8 1761030 682.1 ns/op 592 B/op 18 allocs/op
BenchmarkSortByValue_fnv_100-8 200000 9604 ns/op 4528 B/op 107 allocs/op BenchmarkSortByValue_fnv_100-8 258838 4675 ns/op 4480 B/op 108 allocs/op
BenchmarkSortByValue_fnv_1000-8 10000 111741 ns/op 41728 B/op 1007 allocs/op BenchmarkSortByValue_fnv_1000-8 27027 44649 ns/op 40768 B/op 1008 allocs/op
BenchmarkSortHashersByValue_Reflection_fnv_10-8 1013560 1249 ns/op 768 B/op 29 allocs/op
BenchmarkSortHashersByValue_Reflection_fnv_100-8 106029 11414 ns/op 6096 B/op 209 allocs/op
BenchmarkSortHashersByValue_Reflection_fnv_1000-8 10000 108977 ns/op 56784 B/op 2009 allocs/op
BenchmarkSortHashersByValue_Typed_fnv_10-8 1577814 700.3 ns/op 584 B/op 17 allocs/op
BenchmarkSortHashersByValue_Typed_fnv_100-8 215938 5024 ns/op 4472 B/op 107 allocs/op
BenchmarkSortHashersByValue_Typed_fnv_1000-8 24447 46889 ns/op 40760 B/op 1007 allocs/op
BenchmarkSortByWeight_fnv_10-8 3000000 501 ns/op 320 B/op 4 allocs/op BenchmarkSortByWeight_fnv_10-8 2924833 370.6 ns/op 448 B/op 8 allocs/op
BenchmarkSortByWeight_fnv_100-8 200000 8495 ns/op 2768 B/op 4 allocs/op BenchmarkSortByWeight_fnv_100-8 816069 1516 ns/op 2896 B/op 8 allocs/op
BenchmarkSortByWeight_fnv_1000-8 10000 197880 ns/op 24656 B/op 4 allocs/op BenchmarkSortByWeight_fnv_1000-8 80391 17478 ns/op 24784 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_10-8 2000000 702 ns/op 480 B/op 8 allocs/op BenchmarkSortByWeightIndex_fnv_10-8 1945612 550.3 ns/op 368 B/op 7 allocs/op
BenchmarkSortByWeightIndex_fnv_100-8 200000 9338 ns/op 3840 B/op 8 allocs/op BenchmarkSortByWeightIndex_fnv_100-8 140473 8084 ns/op 2000 B/op 7 allocs/op
BenchmarkSortByWeightIndex_fnv_1000-8 10000 204669 ns/op 33936 B/op 8 allocs/op BenchmarkSortByWeightIndex_fnv_1000-8 5518 200949 ns/op 16592 B/op 7 allocs/op
BenchmarkSortByWeightValue_fnv_10-8 1000000 1083 ns/op 640 B/op 18 allocs/op BenchmarkSortByWeightValue_fnv_10-8 1305580 909.8 ns/op 608 B/op 18 allocs/op
BenchmarkSortByWeightValue_fnv_100-8 200000 11444 ns/op 5440 B/op 108 allocs/op BenchmarkSortByWeightValue_fnv_100-8 165410 6796 ns/op 4496 B/op 108 allocs/op
BenchmarkSortByWeightValue_fnv_1000-8 10000 148471 ns/op 49936 B/op 1008 allocs/op BenchmarkSortByWeightValue_fnv_1000-8 17922 78555 ns/op 40784 B/op 1008 allocs/op
BenchmarkSortHashersByWeightValueReflection_fnv_10-8 454976 2229 ns/op 784 B/op 29 allocs/op
BenchmarkSortHashersByWeightValueReflection_fnv_100-8 76264 15332 ns/op 6112 B/op 209 allocs/op
BenchmarkSortHashersByWeightValueReflection_fnv_1000-8 80288 13192 ns/op 6112 B/op 209 allocs/op
BenchmarkSortHashersByWeightValueTyped_fnv_10-8 1433113 901.4 ns/op 600 B/op 17 allocs/op
BenchmarkSortHashersByWeightValueTyped_fnv_100-8 188626 5896 ns/op 4488 B/op 107 allocs/op
BenchmarkSortHashersByWeightValueTyped_fnv_1000-8 178131 6518 ns/op 4488 B/op 107 allocs/op
``` ```
## Example ## Example
@ -43,7 +50,7 @@ package main
import ( import (
"fmt" "fmt"
"github.com/nspcc-dev/hrw" "git.frostfs.info/TrueCloudLab/hrw"
) )
func main() { func main() {

11
go.mod
View file

@ -1,6 +1,13 @@
module github.com/nspcc-dev/hrw module git.frostfs.info/TrueCloudLab/hrw
go 1.18
require ( require (
github.com/spaolacci/murmur3 v1.1.0
github.com/stretchr/testify v1.3.0 github.com/stretchr/testify v1.3.0
github.com/twmb/murmur3 v1.1.8
)
require (
github.com/davecgh/go-spew v1.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
) )

4
go.sum
View file

@ -2,8 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=

302
hrw.go
View file

@ -5,27 +5,27 @@ package hrw
import ( import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"math"
"reflect" "reflect"
"sort" "sort"
"github.com/spaolacci/murmur3" "github.com/twmb/murmur3"
) )
type ( type (
swapper func(i, j int)
// Hasher interface used by SortSliceByValue // Hasher interface used by SortSliceByValue
Hasher interface{ Hash() uint64 } Hasher interface{ Hash() uint64 }
hashed struct { sorter struct {
length int l int
sorted []uint64 less func(i, j int) bool
distance []uint64 swap func(i, j int)
} }
weighted struct { hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
h hashed slice []T
normal []float64 // normalized input weights dist []N
asc bool
} }
) )
@ -35,6 +35,22 @@ const (
NormalizedMinWeight = 0.0 NormalizedMinWeight = 0.0
) )
func (s *sorter) Len() int { return s.l }
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
func (s *hasherSorter[T, N]) Less(i, j int) bool {
if s.asc {
return s.dist[i] < s.dist[j]
}
return s.dist[i] > s.dist[j]
}
func (s *hasherSorter[T, N]) Swap(i, j int) {
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
}
func distance(x uint64, y uint64) uint64 { func distance(x uint64, y uint64) uint64 {
acc := x ^ y acc := x ^ y
// here used mmh3 64 bit finalizer // here used mmh3 64 bit finalizer
@ -47,83 +63,38 @@ func distance(x uint64, y uint64) uint64 {
return acc return acc
} }
func (h hashed) Len() int { return h.length }
func (h hashed) Less(i, j int) bool { return h.distance[i] < h.distance[j] }
func (h hashed) Swap(i, j int) {
h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i]
h.distance[i], h.distance[j] = h.distance[j], h.distance[i]
}
func (w weighted) Len() int { return w.h.length }
func (w weighted) Less(i, j int) bool {
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
wi := float64(^uint64(0)-w.h.distance[i]) * w.normal[i]
wj := float64(^uint64(0)-w.h.distance[j]) * w.normal[j]
return wi > wj // higher distance must be placed lower to be first
}
func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) }
// Hash uses murmur3 hash to return uint64 // Hash uses murmur3 hash to return uint64
func Hash(key []byte) uint64 { func Hash(key []byte) uint64 {
return murmur3.Sum64(key) return murmur3.Sum64(key)
} }
// StringHash uses murmur3 hash to return uint64
func StringHash(key string) uint64 {
return murmur3.StringSum64(key)
}
// Sort receive nodes and hash, and sort it by distance // Sort receive nodes and hash, and sort it by distance
func Sort(nodes []uint64, hash uint64) []uint64 { func Sort(nodes []uint64, hash uint64) []uint64 {
var ( l := len(nodes)
l = len(nodes) sorted := make([]uint64, l)
h = hashed{ dist := make([]uint64, l)
length: l,
sorted: make([]uint64, 0, l),
distance: make([]uint64, 0, l),
}
)
for i := range nodes { for i := range nodes {
h.sorted = append(h.sorted, uint64(i)) sorted[i] = uint64(i)
h.distance = append(h.distance, distance(nodes[i], hash)) dist[i] = distance(nodes[i], hash)
} }
sort.Sort(h) sort.Slice(sorted, func(i, j int) bool {
return h.sorted return dist[sorted[i]] < dist[sorted[j]]
})
return sorted
} }
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight // SortByWeight receive nodes, weights and hash, and sort it by distance * weight
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 { func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
// check if numbers of weights and nodes are equal result := make([]uint64, len(nodes))
uniform := true copy(nodes, result)
for i := range weights { sortByWeight(len(nodes), false, nodes, weights, hash, reflect.Swapper(result))
// check if all nodes have the same distance return result
if weights[i] != weights[0] {
uniform = false
break
}
}
l := len(nodes)
w := weighted{
h: hashed{
length: l,
sorted: make([]uint64, 0, l),
distance: make([]uint64, 0, l),
},
normal: make([]float64, l),
}
// if all nodes have the same distance then sort uniformly
if uniform || len(weights) != l {
return Sort(nodes, hash)
}
for i := range nodes {
w.h.sorted = append(w.h.sorted, uint64(i))
w.h.distance = append(w.h.distance, distance(nodes[i], hash))
}
copy(w.normal, weights)
sort.Sort(w)
return w.h.sorted
} }
// SortSliceByValue received []T and hash to sort by value-distance // SortSliceByValue received []T and hash to sort by value-distance
@ -131,76 +102,89 @@ func SortSliceByValue(slice interface{}, hash uint64) {
rule := prepareRule(slice) rule := prepareRule(slice)
if rule != nil { if rule != nil {
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
rule = Sort(rule, hash) sortByDistance(len(rule), false, rule, hash, swap)
sortByRuleInverse(swap, uint64(len(rule)), rule)
} }
} }
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
if len(slice) == 0 {
return
}
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
}
sortHasherByDistance(slice, false, dist)
}
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights // SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) { func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
rule := prepareRule(slice) rule := prepareRule(slice)
if rule != nil { if rule != nil {
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
rule = SortByWeight(rule, weights, hash) sortByWeight(reflect.ValueOf(slice).Len(), false, rule, weights, hash, swap)
sortByRuleInverse(swap, uint64(len(rule)), rule)
} }
} }
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
if len(slice) == 0 {
return
}
if allSameF64(weights) {
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
}
sortHasherByDistance(slice, false, dist)
return
}
dist := make([]float64, len(slice))
for i := range dist {
d := distance(slice[i].Hash(), hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i]
}
sort.Sort(&hasherSorter[T, float64]{
slice: slice,
dist: dist,
asc: false,
})
}
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
sort.Sort(&hasherSorter[T, uint64]{
slice: slice,
dist: dist,
asc: true,
})
}
// SortSliceByIndex received []T and hash to sort by index-distance // SortSliceByIndex received []T and hash to sort by index-distance
func SortSliceByIndex(slice interface{}, hash uint64) { func SortSliceByIndex(slice interface{}, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len()) length := reflect.ValueOf(slice).Len()
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length) sortByDistance(length, true, nil, hash, swap)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = Sort(rule, hash)
sortByRuleInverse(swap, length, rule)
} }
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights // SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) { func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len()) length := reflect.ValueOf(slice).Len()
swap := reflect.Swapper(slice) swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length) sortByWeight(length, true, nil, weights, hash, swap)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = SortByWeight(rule, weights, hash)
sortByRuleInverse(swap, length, rule)
}
func sortByRuleDirect(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}
for j := rule[i]; !done[rule[j]]; j = rule[j] {
swap(int(i), int(j))
done[j] = true
}
}
}
func sortByRuleInverse(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}
for j := i; !done[rule[j]]; j = rule[j] {
swap(int(j), int(rule[j]))
done[j] = true
}
}
} }
func prepareRule(slice interface{}) []uint64 { func prepareRule(slice interface{}) []uint64 {
t := reflect.TypeOf(slice) t := reflect.TypeOf(slice)
if t.Kind() != reflect.Slice { if t.Kind() != reflect.Slice {
return nil panic("HRW sort expects slice, got " + t.Kind().String())
} }
var ( var (
@ -279,7 +263,7 @@ func prepareRule(slice interface{}) []uint64 {
default: default:
if _, ok := val.Index(0).Interface().(Hasher); !ok { if _, ok := val.Index(0).Interface().(Hasher); !ok {
return nil panic("slice elements must implement hrw.Hasher")
} }
for i := 0; i < length; i++ { for i := 0; i < length; i++ {
@ -293,9 +277,85 @@ func prepareRule(slice interface{}) []uint64 {
// ValidateWeights checks if weights are normalized between 0.0 and 1.0 // ValidateWeights checks if weights are normalized between 0.0 and 1.0
func ValidateWeights(weights []float64) error { func ValidateWeights(weights []float64) error {
for i := range weights { for i := range weights {
if weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight { if math.IsNaN(weights[i]) || weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
return errors.New("weights are not normalized") return errors.New("weights are not normalized")
} }
} }
return nil return nil
} }
// sortByWeight sorts nodes by weight using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
// if all nodes have the same distance then sort uniformly
if allSameF64(weights) {
sortByDistance(l, byIndex, nodes, hash, swap)
return
}
dist := make([]float64, l)
for i := 0; i < l; i++ {
d := getDistance(byIndex, i, nodes, hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i]
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] > dist[j] // higher distance must be placed lower to be first
},
}
sort.Sort(s)
}
// sortByDistance sorts nodes by hrw distance using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
dist := make([]uint64, l)
for i := 0; i < l; i++ {
dist[i] = getDistance(byIndex, i, nodes, hash)
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] < dist[j]
},
}
sort.Sort(s)
}
// getDistance return distance from nodes[i] to h.
// If byIndex is true, nodes index is used.
// Else if nodes[i] != nil, distance is calculated from this value.
// Otherwise, and hash from node index is taken.
func getDistance(byIndex bool, i int, nodes []uint64, h uint64) uint64 {
if nodes != nil {
return distance(nodes[i], h)
} else if byIndex {
return distance(uint64(i), h)
} else {
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, uint64(i))
return distance(Hash(buf), h)
}
}
func allSameF64(fs []float64) bool {
for i := range fs {
if fs[i] != fs[0] {
return false
}
}
return true
}

View file

@ -61,7 +61,7 @@ func Example() {
} }
func (h hashString) Hash() uint64 { func (h hashString) Hash() uint64 {
return Hash([]byte(h)) return StringHash(string(h))
} }
func TestSortSliceByIndex(t *testing.T) { func TestSortSliceByIndex(t *testing.T) {
@ -76,6 +76,9 @@ func TestValidateWeights(t *testing.T) {
weights := []float64{10, 10, 10, 2, 2, 2} weights := []float64{10, 10, 10, 2, 2, 2}
err := ValidateWeights(weights) err := ValidateWeights(weights)
require.Error(t, err) require.Error(t, err)
weights = []float64{math.NaN(), 1, 1, 0.2, 0.2, 0.2}
err = ValidateWeights(weights)
require.Error(t, err)
weights = []float64{1, 1, 1, 0.2, 0.2, 0.2} weights = []float64{1, 1, 1, 0.2, 0.2, 0.2}
err = ValidateWeights(weights) err = ValidateWeights(weights)
require.NoError(t, err) require.NoError(t, err)
@ -98,36 +101,6 @@ func TestSortSliceByValue(t *testing.T) {
require.Equal(t, expect, actual) require.Equal(t, expect, actual)
} }
func TestSortByRule(t *testing.T) {
t.Run("direct", func(t *testing.T) {
// 0 1 2 3 4 5
actual := []string{"a", "b", "c", "d", "e", "f"}
// 4 2 0 5 3 1
expect := []string{"c", "f", "b", "e", "a", "d"}
rule := []uint64{4, 2, 0, 5, 3, 1}
sortByRuleDirect(
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
6, rule)
require.Equal(t, expect, actual)
})
t.Run("inverse", func(t *testing.T) {
// 0 1 2 3 4 5
actual := []string{"a", "b", "c", "d", "e", "f"}
// 4 2 0 5 3 1
expect := []string{"e", "c", "a", "f", "d", "b"}
rule := []uint64{4, 2, 0, 5, 3, 1}
sortByRuleInverse(
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
6, rule)
require.Equal(t, expect, actual)
})
}
func TestSortSliceByValueFail(t *testing.T) { func TestSortSliceByValueFail(t *testing.T) {
t.Run("empty slice", func(t *testing.T) { t.Run("empty slice", func(t *testing.T) {
var ( var (
@ -140,15 +113,13 @@ func TestSortSliceByValueFail(t *testing.T) {
t.Run("must be slice", func(t *testing.T) { t.Run("must be slice", func(t *testing.T) {
actual := 10 actual := 10
hash := Hash(testKey) hash := Hash(testKey)
require.NotPanics(t, func() { SortSliceByValue(actual, hash) }) require.Panics(t, func() { SortSliceByValue(actual, hash) })
}) })
t.Run("must 'fail' for unknown type", func(t *testing.T) { t.Run("must 'fail' for unknown type", func(t *testing.T) {
actual := []unknown{1, 2, 3, 4, 5} actual := []unknown{1, 2, 3, 4, 5}
expect := []unknown{1, 2, 3, 4, 5}
hash := Hash(testKey) hash := Hash(testKey)
SortSliceByValue(actual, hash) require.Panics(t, func() { SortSliceByValue(actual, hash) })
require.Equal(t, expect, actual)
}) })
} }
@ -160,6 +131,23 @@ func TestSortSliceByValueHasher(t *testing.T) {
require.Equal(t, expect, actual) require.Equal(t, expect, actual)
} }
func TestSortHasherSliceByValue(t *testing.T) {
actual := []hashString{"a", "b", "c", "d", "e", "f"}
expect := []hashString{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortHasherSliceByValue(actual, hash)
require.EqualValues(t, expect, actual)
}
func TestSortHasherSliceByWeightValue(t *testing.T) {
actual := []hashString{"a", "b", "c", "d", "e", "f"}
weights := []float64{1.0, 1.0, 1.0, 1.0, 1.0, 1.0}
expect := []hashString{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortHasherSliceByWeightValue(actual, weights, hash)
require.EqualValues(t, expect, actual)
}
func TestSortSliceByValueIntSlice(t *testing.T) { func TestSortSliceByValueIntSlice(t *testing.T) {
cases := []slices{ cases := []slices{
{ {
@ -202,11 +190,6 @@ func TestSortSliceByValueIntSlice(t *testing.T) {
expect: []uint32{5, 1, 2, 0, 3, 4}, expect: []uint32{5, 1, 2, 0, 3, 4},
}, },
{
actual: Uint32Slice{0, 1, 2, 3, 4, 5},
expect: Uint32Slice{0, 1, 2, 3, 4, 5},
},
{ {
actual: []int64{0, 1, 2, 3, 4, 5}, actual: []int64{0, 1, 2, 3, 4, 5},
expect: []int64{5, 3, 0, 1, 4, 2}, expect: []int64{5, 3, 0, 1, 4, 2},
@ -670,6 +653,36 @@ func BenchmarkSortByValue_fnv_1000(b *testing.B) {
benchmarkSortByValue(b, 1000, hash) benchmarkSortByValue(b, 1000, hash)
} }
func BenchmarkSortHashersByValue_Reflection_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueReflection(b, 10, hash)
}
func BenchmarkSortHashersByValue_Reflection_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueReflection(b, 100, hash)
}
func BenchmarkSortHashersByValue_Reflection_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueReflection(b, 1000, hash)
}
func BenchmarkSortHashersByValue_Typed_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueTyped(b, 10, hash)
}
func BenchmarkSortHashersByValue_Typed_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueTyped(b, 100, hash)
}
func BenchmarkSortHashersByValue_Typed_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueTyped(b, 1000, hash)
}
func BenchmarkSortByWeight_fnv_10(b *testing.B) { func BenchmarkSortByWeight_fnv_10(b *testing.B) {
hash := Hash(testKey) hash := Hash(testKey)
_ = benchmarkSortByWeight(b, 10, hash) _ = benchmarkSortByWeight(b, 10, hash)
@ -715,6 +728,30 @@ func BenchmarkSortByWeightValue_fnv_1000(b *testing.B) {
benchmarkSortByWeightValue(b, 1000, hash) benchmarkSortByWeightValue(b, 1000, hash)
} }
func BenchmarkSortHashersByWeightValueReflection_fnv_10(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 10, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueReflection_fnv_100(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 100, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 1000, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 10, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_100(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 100, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 1000, Hash(testKey))
}
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 { func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {
servers := make([]uint64, n) servers := make([]uint64, n)
for i := uint64(0); i < uint64(len(servers)); i++ { for i := uint64(0); i < uint64(len(servers)); i++ {
@ -808,3 +845,63 @@ func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
SortSliceByWeightValue(servers, weights, hash) SortSliceByWeightValue(servers, weights, hash)
} }
} }
func benchmarkSortHashersByWeightValueRelection(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByWeightValue(servers, weights, hash)
}
}
func benchmarkSortHashersByWeightValueTyped(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortHasherSliceByWeightValue(servers, weights, hash)
}
}
func benchmarkSortHashersByValueReflection(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByValue(servers, hash)
}
}
func benchmarkSortHashersByValueTyped(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortHasherSliceByValue(servers, hash)
}
}