Compare commits

...

23 commits

Author SHA1 Message Date
16a7740ccd [#8] hrw: Introduce StringHash() for hashing strings directly
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      6      │                  7                   │
                                         │   sec/op    │    sec/op     vs base                │
SortHashersByValue_Typed_fnv_10-8          248.8n ± 1%   166.9n ±  9%  -32.93% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.195µ ± 6%   1.297µ ±  6%  -40.93% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        22.47µ ± 4%   12.42µ ± 10%  -44.73% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     301.7n ± 6%   180.8n ±  4%  -40.09% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.526µ ± 1%   1.378µ ±  5%  -45.47% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   24.37µ ± 2%   12.80µ ±  4%  -47.48% (p=0.000 n=10)
geomean                                    2.472µ        1.430µ        -42.13%

                                         │      6       │                   7                   │
                                         │     B/op     │     B/op      vs base                 │
SortHashersByValue_Typed_fnv_10-8            144.0 ± 0%     144.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_100-8           960.0 ± 0%     960.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_1000-8        8.062Ki ± 0%   8.062Ki ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_10-8       144.0 ± 0%     144.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_100-8      960.0 ± 0%     960.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_1000-8   8.062Ki ± 0%   8.062Ki ± 0%       ~ (p=1.000 n=10) ¹
geomean                                    1.021Ki        1.021Ki       +0.00%
¹ all samples are equal

                                         │     6      │                  7                  │
                                         │ allocs/op  │ allocs/op   vs base                 │
SortHashersByValue_Typed_fnv_10-8          2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_100-8         2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_1000-8        2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_10-8     2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_100-8    2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_1000-8   2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                                    2.000        2.000       +0.00%
¹ all samples are equal
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:56:24 +03:00
2ac89c82b6 [#8] hrw/test: Fix typo in benchmarks
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:56:24 +03:00
266da7c69a [#8] hrw: Do not allocate for swap()/less() helpers
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      4      │                  5                  │
                                         │   sec/op    │   sec/op     vs base                │
SortHashersByValue_Typed_fnv_10-8          309.2n ± 2%   294.4n ± 1%   -4.75% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.306µ ± 1%   2.549µ ± 1%  +10.54% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        21.73µ ± 1%   24.80µ ± 3%  +14.14% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     347.1n ± 1%   334.8n ± 2%   -3.56% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.668µ ± 1%   2.954µ ± 3%  +10.72% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   2.673µ ± 1%   2.957µ ± 4%  +10.63% (p=0.000 n=10)
geomean                                    1.836µ        1.947µ        +6.01%

                                         │      4       │                  5                   │
                                         │     B/op     │     B/op      vs base                │
SortHashersByValue_Typed_fnv_10-8            216.0 ± 0%     144.0 ± 0%  -33.33% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8          1032.0 ± 0%     960.0 ± 0%   -6.98% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        8.133Ki ± 0%   8.062Ki ± 0%   -0.86% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8       216.0 ± 0%     144.0 ± 0%  -33.33% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8     1032.0 ± 0%     960.0 ± 0%   -6.98% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8    1032.0 ± 0%     960.0 ± 0%   -6.98% (p=0.000 n=10)
geomean                                      867.8          730.1       -15.87%

                                         │     4      │                 5                  │
                                         │ allocs/op  │ allocs/op   vs base                │
SortHashersByValue_Typed_fnv_10-8          4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
geomean                                    4.000        2.000       -50.00%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:56:23 +03:00
c52f74d8e1 [#8] hrw: Do not allocate 2 slices for sort
Currently we allocate `rule` and then create `dist` which depends on it.
In this commit we create `dist` directly.

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      3      │                 4                  │
                                         │   sec/op    │   sec/op     vs base               │
SortHashersByValue_Typed_fnv_10-8          336.3n ± 3%   309.2n ± 2%  -8.06% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.424µ ± 3%   2.306µ ± 1%  -4.87% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        22.35µ ± 1%   21.73µ ± 1%  -2.75% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     346.6n ± 3%   347.1n ± 1%       ~ (p=0.631 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.637µ ± 6%   2.668µ ± 1%       ~ (p=0.481 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   2.609µ ± 4%   2.673µ ± 1%  +2.43% (p=0.000 n=10)
geomean                                    1.875µ        1.836µ       -2.06%

                                         │       3       │                  4                   │
                                         │     B/op      │     B/op      vs base                │
SortHashersByValue_Typed_fnv_10-8             296.0 ± 0%     216.0 ± 0%  -27.03% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8          1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        16.133Ki ± 0%   8.133Ki ± 0%  -49.59% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8        296.0 ± 0%     216.0 ± 0%  -27.03% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8     1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8    1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
geomean                                     1.442Ki          867.8       -41.24%

                                         │     3      │                 4                  │
                                         │ allocs/op  │ allocs/op   vs base                │
SortHashersByValue_Typed_fnv_10-8          5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
geomean                                    5.000        4.000       -20.00%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:53:44 +03:00
895ecf150f [#8] hrw: Inline swap() when slice is known
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      2      │                 3                  │
                                         │   sec/op    │   sec/op     vs base               │
SortHashersByValue_Typed_fnv_10-8          368.5n ± 2%   336.3n ± 3%  -8.75% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.411µ ± 4%   2.424µ ± 3%       ~ (p=0.853 n=10)
SortHashersByValue_Typed_fnv_1000-8        22.19µ ± 2%   22.35µ ± 1%       ~ (p=0.247 n=10)
SortHashersByWeightValueTyped_fnv_10-8     364.3n ± 2%   346.6n ± 3%  -4.86% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.541µ ± 3%   2.637µ ± 6%       ~ (p=0.055 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   2.483µ ± 1%   2.609µ ± 4%  +5.07% (p=0.003 n=10)
geomean                                    1.888µ        1.875µ       -0.71%

                                         │      2       │                  3                  │
                                         │     B/op     │     B/op      vs base               │
SortHashersByValue_Typed_fnv_10-8            312.0 ± 0%     296.0 ± 0%  -5.13% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         1.898Ki ± 0%   1.883Ki ± 0%  -0.82% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        16.15Ki ± 0%   16.13Ki ± 0%  -0.10% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8       312.0 ± 0%     296.0 ± 0%  -5.13% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    1.898Ki ± 0%   1.883Ki ± 0%  -0.82% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   1.898Ki ± 0%   1.883Ki ± 0%  -0.82% (p=0.000 n=10)
geomean                                    1.474Ki        1.442Ki       -2.16%

                                         │     2      │                 3                  │
                                         │ allocs/op  │ allocs/op   vs base                │
SortHashersByValue_Typed_fnv_10-8          6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
geomean                                    6.000        5.000       -16.67%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 21:41:16 +03:00
213c105ac1 [#8] go.mod: Use faster murmur3 lib
Specifically, this line became possible, because of noescape annotations
for assembly.
```
./hrw.go:307:14: make([]byte, 8) does not escape
```

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                              │      1       │                   2                   │
                                              │    sec/op    │   sec/op     vs base                  │
SortHashersByValue_Typed_fnv_10-8               580.1n ±  1%   368.5n ± 2%  -36.47% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              4.215µ ±  2%   2.411µ ± 4%  -42.79% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             39.40µ ±  1%   22.19µ ± 2%  -43.68% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8          599.6n ±  2%   364.3n ± 2%  -39.25% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         4.337µ ±  5%   2.541µ ± 3%  -41.41% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        4.344µ ±  3%   2.483µ ± 1%  -42.84% (p=0.000 n=10)
geomean                                         4.400µ         1.888µ       -41.13%                ¹
¹ benchmark set differs from baseline; geomeans may not be comparable

                                              │      1       │                   2                    │
                                              │     B/op     │     B/op      vs base                  │
SortHashersByValue_Typed_fnv_10-8                 472.0 ± 0%     312.0 ± 0%  -33.90% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              3.461Ki ± 0%   1.898Ki ± 0%  -45.15% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             31.77Ki ± 0%   16.15Ki ± 0%  -49.18% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8            472.0 ± 0%     312.0 ± 0%  -33.90% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         3.461Ki ± 0%   1.898Ki ± 0%  -45.15% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        3.461Ki ± 0%   1.898Ki ± 0%  -45.15% (p=0.000 n=10)
geomean                                         3.070Ki        1.474Ki       -42.37%                ¹
¹ benchmark set differs from baseline; geomeans may not be comparable

                                              │       1       │                  2                   │
                                              │   allocs/op   │ allocs/op   vs base                  │
SortHashersByValue_Typed_fnv_10-8                 16.000 ± 0%   6.000 ± 0%  -62.50% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8               106.000 ± 0%   6.000 ± 0%  -94.34% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             1006.000 ± 0%   6.000 ± 0%  -99.40% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8            16.000 ± 0%   6.000 ± 0%  -62.50% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8          106.000 ± 0%   6.000 ± 0%  -94.34% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8         106.000 ± 0%   6.000 ± 0%  -94.34% (p=0.000 n=10)
geomean                                            113.0        6.000       -92.69%                ¹
¹ benchmark set differs from baseline; geomeans may not be comparable
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 21:41:00 +03:00
c175ef4099 [#8] hrw: Do not create index slice for sorter
`ind` is only needed to index dist or weights, swap them directly.

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                              │      0      │                  1                   │
                                              │   sec/op    │    sec/op     vs base                │
SortHashersByValue_Typed_fnv_10-8               596.2n ± 4%   580.1n ±  1%   -2.72% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              4.453µ ± 2%   4.215µ ±  2%   -5.35% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             41.58µ ± 4%   39.40µ ±  1%   -5.23% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8          624.5n ± 2%   599.6n ±  2%   -3.99% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         4.593µ ± 2%   4.337µ ±  5%   -5.56% (p=0.003 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        4.896µ ± 8%   4.344µ ±  3%  -11.27% (p=0.000 n=10)
geomean                                         4.668µ        4.400µ         -5.75%

                                              │      0       │                  1                   │
                                              │     B/op     │     B/op      vs base                │
SortHashersByValue_Typed_fnv_10-8                 584.0 ± 0%     472.0 ± 0%  -19.18% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              4.367Ki ± 0%   3.461Ki ± 0%  -20.75% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             39.80Ki ± 0%   31.77Ki ± 0%  -20.18% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8            600.0 ± 0%     472.0 ± 0%  -21.33% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         4.383Ki ± 0%   3.461Ki ± 0%  -21.03% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        4.383Ki ± 0%   3.461Ki ± 0%  -21.03% (p=0.000 n=10)
geomean                                         3.742Ki        3.070Ki       -17.96%

                                              │      0      │                 1                  │
                                              │  allocs/op  │  allocs/op   vs base               │
SortHashersByValue_Typed_fnv_10-8                17.00 ± 0%    16.00 ± 0%  -5.88% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8               107.0 ± 0%    106.0 ± 0%  -0.93% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             1.007k ± 0%   1.006k ± 0%  -0.10% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8           17.00 ± 0%    16.00 ± 0%  -5.88% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8          107.0 ± 0%    106.0 ± 0%  -0.93% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8         107.0 ± 0%    106.0 ± 0%  -0.93% (p=0.000 n=10)
geomean                                          115.3         113.0       -1.94%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 21:40:46 +03:00
2c085708de [#6] .github: Remove CODEOWNERS
Not supported by Gitea/Forgejo.
See https://github.com/go-gitea/gitea/issues/10161 .

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-03-09 22:50:15 +03:00
15b3800347 [#6] Remove .travis.yml
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-03-09 22:50:15 +03:00
2e205cf1ca [#6] pre-commit: Add gitlint hook
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-03-09 22:50:15 +03:00
ebca2848ad [#6] pre-commit: Add golangci-lint hook
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-03-09 22:50:15 +03:00
0ad932400c [#6] pre-commit: Add initial configuration
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-03-09 22:50:14 +03:00
08e14caaf3 Rename package name
Due to source code relocation from GitHub.

Signed-off-by: Alex Vanin <a.vanin@yadro.com>
2023-03-07 14:00:00 +03:00
7e33833933 [#4] .github: Add CODEOWNERS
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-02-27 17:25:20 +03:00
79b208bebf [#3] hrw: Update README
Update benchmark results

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2023-02-27 14:18:58 +03:00
5671632658 [#2] hrw: Add typed methods for hashers
Add generic methods for sort-methods used in node

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2023-02-27 14:18:58 +03:00
9e9fc653e5 [#2] hrw: Go version update
Go version updated 1.14 -> 1.18

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2023-02-27 14:18:58 +03:00
997b540432 Move from nspcc-dev to TrueCloudLab
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2022-12-12 18:02:07 +03:00
22b833d972 Panic if provided value is not a slice
Signed-off-by: Evgenii Stratonikov <stratonikov@runbox.com>
2021-12-28 14:37:17 +03:00
fad35bbd3b Specify go.mod version 2020-09-11 18:30:18 +03:00
dddcfc8fc5 Simplify SortByValue/Weight a bit
Get rid of unneeded types.
2020-09-11 18:30:18 +03:00
Alex Vanin
f52ea8fb21
Handle NaN values in weight validation function (#7)
NaN values can pass the validation since NaN values don't work
with float arithmetic and comparisons are always true. Now validation
function has check for NaN values.
2019-08-01 12:16:03 +03:00
Alex Vanin
aa230933d1
Move normalization routine out of hrw library (#6)
HRW library supports weighted sorting. Weights must be normalized
before applying. Since there could be different types of normalization
for multiple criteria, there is no point to perform simple
normalization in this library. Pass a slice of normalized weights
to the `SortByWeight` functions.

This commit proposes to:
- remove normalization routine from `SortByWeight` function;
- add `ValidateWeights` function to check if weights are normalized;
- rename `weight` -> `distance` to avoid naming confusion between
  hash distance and actual weights;
- use testify lib in the tests;
2019-07-05 09:49:24 +03:00
9 changed files with 563 additions and 350 deletions

10
.gitlint Normal file
View file

@ -0,0 +1,10 @@
[general]
fail-without-commits=true
contrib=CC1
[title-match-regex]
regex=^\[\#[0-9]+\]\s
[ignore-by-title]
regex=^Release(.*)
ignore=title-match-regex

30
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,30 @@
ci:
autofix_prs: false
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-shebang-scripts-are-executable
- id: check-merge-conflict
- id: check-json
- id: check-xml
- id: check-yaml
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: end-of-file-fixer
exclude: ".key$"
- repo: https://github.com/golangci/golangci-lint
rev: v1.51.2
hooks:
- id: golangci-lint
- repo: https://github.com/jorisroovers/gitlint
rev: v0.18.0
hooks:
- id: gitlint
stages: [commit-msg]

View file

@ -1,17 +0,0 @@
language: go
go:
- 1.11.x
- 1.12.x
env:
- GO111MODULE=on
install:
- go get -v golang.org/x/lint/golint
- go mod tidy -v
script:
- golint -set_exit_status ./...
- go test -race -coverprofile=coverage.txt -covermode=atomic ./...
after_success:
- bash <(curl -s https://codecov.io/bash)
matrix:
allow_failures:
- go: tip

View file

@ -14,26 +14,37 @@
## Benchmark:
```
BenchmarkSort_fnv_10-8 5000000 354 ns/op 224 B/op 3 allocs/op
BenchmarkSort_fnv_100-8 300000 5103 ns/op 1856 B/op 3 allocs/op
BenchmarkSort_fnv_1000-8 10000 115874 ns/op 16448 B/op 3 allocs/op
BenchmarkSortByIndex_fnv_10-8 3000000 562 ns/op 384 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_100-8 200000 5819 ns/op 2928 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_1000-8 10000 125859 ns/op 25728 B/op 7 allocs/op
BenchmarkSortByValue_fnv_10-8 2000000 1056 ns/op 544 B/op 17 allocs/op
BenchmarkSortByValue_fnv_100-8 200000 9593 ns/op 4528 B/op 107 allocs/op
BenchmarkSortByValue_fnv_1000-8 10000 109272 ns/op 41728 B/op 1007 allocs/op
BenchmarkSortByWeight_fnv_10-8 3000000 500 ns/op 320 B/op 4 allocs/op
BenchmarkSortByWeight_fnv_100-8 200000 8257 ns/op 2768 B/op 4 allocs/op
BenchmarkSortByWeight_fnv_1000-8 10000 197938 ns/op 24656 B/op 4 allocs/op
BenchmarkSortByWeightIndex_fnv_10-8 2000000 760 ns/op 480 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_100-8 200000 9191 ns/op 3840 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_1000-8 10000 208204 ns/op 33936 B/op 8 allocs/op
BenchmarkSortByWeightValue_fnv_10-8 1000000 1095 ns/op 640 B/op 18 allocs/op
BenchmarkSortByWeightValue_fnv_100-8 200000 12291 ns/op 5440 B/op 108 allocs/op
BenchmarkSortByWeightValue_fnv_1000-8 10000 145125 ns/op 49936 B/op 1008 allocs/op
BenchmarkSort_fnv_10-8 4812801 240.9 ns/op 216 B/op 4 allocs/op
BenchmarkSort_fnv_100-8 434767 2600 ns/op 1848 B/op 4 allocs/op
BenchmarkSort_fnv_1000-8 20428 66116 ns/op 16440 B/op 4 allocs/op
BenchmarkSortByIndex_fnv_10-8 2505410 486.5 ns/op 352 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_100-8 254556 4697 ns/op 1984 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_1000-8 13581 88334 ns/op 16576 B/op 7 allocs/op
BenchmarkSortByValue_fnv_10-8 1761030 682.1 ns/op 592 B/op 18 allocs/op
BenchmarkSortByValue_fnv_100-8 258838 4675 ns/op 4480 B/op 108 allocs/op
BenchmarkSortByValue_fnv_1000-8 27027 44649 ns/op 40768 B/op 1008 allocs/op
BenchmarkSortHashersByValue_Reflection_fnv_10-8 1013560 1249 ns/op 768 B/op 29 allocs/op
BenchmarkSortHashersByValue_Reflection_fnv_100-8 106029 11414 ns/op 6096 B/op 209 allocs/op
BenchmarkSortHashersByValue_Reflection_fnv_1000-8 10000 108977 ns/op 56784 B/op 2009 allocs/op
BenchmarkSortHashersByValue_Typed_fnv_10-8 1577814 700.3 ns/op 584 B/op 17 allocs/op
BenchmarkSortHashersByValue_Typed_fnv_100-8 215938 5024 ns/op 4472 B/op 107 allocs/op
BenchmarkSortHashersByValue_Typed_fnv_1000-8 24447 46889 ns/op 40760 B/op 1007 allocs/op
BenchmarkSortByWeight_fnv_10-8 2924833 370.6 ns/op 448 B/op 8 allocs/op
BenchmarkSortByWeight_fnv_100-8 816069 1516 ns/op 2896 B/op 8 allocs/op
BenchmarkSortByWeight_fnv_1000-8 80391 17478 ns/op 24784 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_10-8 1945612 550.3 ns/op 368 B/op 7 allocs/op
BenchmarkSortByWeightIndex_fnv_100-8 140473 8084 ns/op 2000 B/op 7 allocs/op
BenchmarkSortByWeightIndex_fnv_1000-8 5518 200949 ns/op 16592 B/op 7 allocs/op
BenchmarkSortByWeightValue_fnv_10-8 1305580 909.8 ns/op 608 B/op 18 allocs/op
BenchmarkSortByWeightValue_fnv_100-8 165410 6796 ns/op 4496 B/op 108 allocs/op
BenchmarkSortByWeightValue_fnv_1000-8 17922 78555 ns/op 40784 B/op 1008 allocs/op
BenchmarkSortHashersByWeightValueReflection_fnv_10-8 454976 2229 ns/op 784 B/op 29 allocs/op
BenchmarkSortHashersByWeightValueReflection_fnv_100-8 76264 15332 ns/op 6112 B/op 209 allocs/op
BenchmarkSortHashersByWeightValueReflection_fnv_1000-8 80288 13192 ns/op 6112 B/op 209 allocs/op
BenchmarkSortHashersByWeightValueTyped_fnv_10-8 1433113 901.4 ns/op 600 B/op 17 allocs/op
BenchmarkSortHashersByWeightValueTyped_fnv_100-8 188626 5896 ns/op 4488 B/op 107 allocs/op
BenchmarkSortHashersByWeightValueTyped_fnv_1000-8 178131 6518 ns/op 4488 B/op 107 allocs/op
```
## Example
@ -44,7 +55,7 @@ package main
import (
"fmt"
"github.com/nspcc-dev/hrw"
"git.frostfs.info/TrueCloudLab/hrw"
)
func main() {
@ -71,11 +82,11 @@ func main() {
}
// Output:
// trying GET four.example.com/examples/object-key
// trying GET three.example.com/examples/object-key
// trying GET one.example.com/examples/object-key
// trying GET two.example.com/examples/object-key
// trying GET six.example.com/examples/object-key
// trying GET five.example.com/examples/object-key
// trying GET six.example.com/examples/object-key
// trying GET one.example.com/examples/object-key
// trying GET four.example.com/examples/object-key
}
```

14
go.mod
View file

@ -1,3 +1,13 @@
module github.com/nspcc-dev/hrw
module git.frostfs.info/TrueCloudLab/hrw
require github.com/spaolacci/murmur3 v1.1.0
go 1.18
require (
github.com/stretchr/testify v1.3.0
github.com/twmb/murmur3 v1.1.8
)
require (
github.com/davecgh/go-spew v1.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
)

11
go.sum
View file

@ -1,2 +1,9 @@
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=

340
hrw.go
View file

@ -4,31 +4,54 @@ package hrw
import (
"encoding/binary"
"errors"
"math"
"reflect"
"sort"
"github.com/spaolacci/murmur3"
"github.com/twmb/murmur3"
)
type (
swapper func(i, j int)
// Hasher interface used by SortSliceByValue
Hasher interface{ Hash() uint64 }
hashed struct {
length int
sorted []uint64
weight []uint64
sorter struct {
l int
less func(i, j int) bool
swap func(i, j int)
}
weighted struct {
h hashed
normal []float64 // normalized input weights
hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
slice []T
dist []N
asc bool
}
)
func weight(x uint64, y uint64) uint64 {
// Boundaries of valid normalized weights
const (
NormalizedMaxWeight = 1.0
NormalizedMinWeight = 0.0
)
func (s *sorter) Len() int { return s.l }
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
func (s *hasherSorter[T, N]) Less(i, j int) bool {
if s.asc {
return s.dist[i] < s.dist[j]
}
return s.dist[i] > s.dist[j]
}
func (s *hasherSorter[T, N]) Swap(i, j int) {
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
}
func distance(x uint64, y uint64) uint64 {
acc := x ^ y
// here used mmh3 64 bit finalizer
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
@ -40,161 +63,128 @@ func weight(x uint64, y uint64) uint64 {
return acc
}
func (h hashed) Len() int { return h.length }
func (h hashed) Less(i, j int) bool { return h.weight[i] < h.weight[j] }
func (h hashed) Swap(i, j int) {
h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i]
h.weight[i], h.weight[j] = h.weight[j], h.weight[i]
}
func (w weighted) Len() int { return w.h.length }
func (w weighted) Less(i, j int) bool {
// `maxUint64 - weight` makes least weight most valuable
// it is necessary for operation with normalized values
wi := float64(^uint64(0)-w.h.weight[i]) * w.normal[i]
wj := float64(^uint64(0)-w.h.weight[j]) * w.normal[j]
return wi > wj // higher weight must be placed lower to be first
}
func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) }
// Hash uses murmur3 hash to return uint64
func Hash(key []byte) uint64 {
return murmur3.Sum64(key)
}
// Sort receive nodes and hash, and sort it by weight
// StringHash uses murmur3 hash to return uint64
func StringHash(key string) uint64 {
return murmur3.StringSum64(key)
}
// Sort receive nodes and hash, and sort it by distance
func Sort(nodes []uint64, hash uint64) []uint64 {
var (
l = len(nodes)
h = hashed{
length: l,
sorted: make([]uint64, 0, l),
weight: make([]uint64, 0, l),
}
)
for i, node := range nodes {
h.sorted = append(h.sorted, uint64(i))
h.weight = append(h.weight, weight(node, hash))
l := len(nodes)
sorted := make([]uint64, l)
dist := make([]uint64, l)
for i := range nodes {
sorted[i] = uint64(i)
dist[i] = distance(nodes[i], hash)
}
sort.Sort(h)
return h.sorted
sort.Slice(sorted, func(i, j int) bool {
return dist[sorted[i]] < dist[sorted[j]]
})
return sorted
}
// SortByWeight receive nodes and hash, and sort it by weight
func SortByWeight(nodes []uint64, weights []uint64, hash uint64) []uint64 {
var (
maxWeight uint64
l = len(nodes)
w = weighted{
h: hashed{
length: l,
sorted: make([]uint64, 0, l),
weight: make([]uint64, 0, l),
},
normal: make([]float64, 0, l),
}
)
// finding max weight to perform normalization
for i := range weights {
if maxWeight < weights[i] {
maxWeight = weights[i]
}
// SortByWeight receive nodes, weights and hash, and sort it by distance * weight
func SortByWeight(nodes []uint64, weights []float64, hash uint64) []uint64 {
result := make([]uint64, len(nodes))
copy(nodes, result)
sortByWeight(len(nodes), false, nodes, weights, hash, reflect.Swapper(result))
return result
}
// if all nodes have 0-weights or weights are incorrect then sort uniformly
if maxWeight == 0 || l != len(nodes) {
return Sort(nodes, hash)
}
fMaxWeight := float64(maxWeight)
for i, node := range nodes {
w.h.sorted = append(w.h.sorted, uint64(i))
w.h.weight = append(w.h.weight, weight(node, hash))
w.normal = append(w.normal, float64(weights[i])/fMaxWeight)
}
sort.Sort(w)
return w.h.sorted
}
// SortSliceByValue received []T and hash to sort by value-weight
// SortSliceByValue received []T and hash to sort by value-distance
func SortSliceByValue(slice interface{}, hash uint64) {
rule := prepareRule(slice)
if rule != nil {
swap := reflect.Swapper(slice)
rule = Sort(rule, hash)
sortByRuleInverse(swap, uint64(len(rule)), rule)
sortByDistance(len(rule), false, rule, hash, swap)
}
}
// SortSliceByWeightValue received []T, weights and hash to sort by value-weight
func SortSliceByWeightValue(slice interface{}, weight []uint64, hash uint64) {
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
if len(slice) == 0 {
return
}
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
}
sortHasherByDistance(slice, false, dist)
}
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
rule := prepareRule(slice)
if rule != nil {
swap := reflect.Swapper(slice)
rule = SortByWeight(rule, weight, hash)
sortByRuleInverse(swap, uint64(len(rule)), rule)
sortByWeight(reflect.ValueOf(slice).Len(), false, rule, weights, hash, swap)
}
}
// SortSliceByIndex received []T and hash to sort by index-weight
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
if len(slice) == 0 {
return
}
if allSameF64(weights) {
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
}
sortHasherByDistance(slice, false, dist)
return
}
dist := make([]float64, len(slice))
for i := range dist {
d := distance(slice[i].Hash(), hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i]
}
sort.Sort(&hasherSorter[T, float64]{
slice: slice,
dist: dist,
asc: false,
})
}
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
sort.Sort(&hasherSorter[T, uint64]{
slice: slice,
dist: dist,
asc: true,
})
}
// SortSliceByIndex received []T and hash to sort by index-distance
func SortSliceByIndex(slice interface{}, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len())
length := reflect.ValueOf(slice).Len()
swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = Sort(rule, hash)
sortByRuleInverse(swap, length, rule)
sortByDistance(length, true, nil, hash, swap)
}
// SortSliceByWeightIndex received []T, weights and hash to sort by index-weight
func SortSliceByWeightIndex(slice interface{}, weight []uint64, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len())
// SortSliceByWeightIndex received []T, weights and hash to sort by index-distance * weights
func SortSliceByWeightIndex(slice interface{}, weights []float64, hash uint64) {
length := reflect.ValueOf(slice).Len()
swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = SortByWeight(rule, weight, hash)
sortByRuleInverse(swap, length, rule)
}
func sortByRuleDirect(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}
for j := rule[i]; !done[rule[j]]; j = rule[j] {
swap(int(i), int(j))
done[j] = true
}
}
}
func sortByRuleInverse(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}
for j := i; !done[rule[j]]; j = rule[j] {
swap(int(j), int(rule[j]))
done[j] = true
}
}
sortByWeight(length, true, nil, weights, hash, swap)
}
func prepareRule(slice interface{}) []uint64 {
t := reflect.TypeOf(slice)
if t.Kind() != reflect.Slice {
return nil
panic("HRW sort expects slice, got " + t.Kind().String())
}
var (
@ -273,7 +263,7 @@ func prepareRule(slice interface{}) []uint64 {
default:
if _, ok := val.Index(0).Interface().(Hasher); !ok {
return nil
panic("slice elements must implement hrw.Hasher")
}
for i := 0; i < length; i++ {
@ -283,3 +273,89 @@ func prepareRule(slice interface{}) []uint64 {
}
return rule
}
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
func ValidateWeights(weights []float64) error {
for i := range weights {
if math.IsNaN(weights[i]) || weights[i] > NormalizedMaxWeight || weights[i] < NormalizedMinWeight {
return errors.New("weights are not normalized")
}
}
return nil
}
// sortByWeight sorts nodes by weight using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
// if all nodes have the same distance then sort uniformly
if allSameF64(weights) {
sortByDistance(l, byIndex, nodes, hash, swap)
return
}
dist := make([]float64, l)
for i := 0; i < l; i++ {
d := getDistance(byIndex, i, nodes, hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i]
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] > dist[j] // higher distance must be placed lower to be first
},
}
sort.Sort(s)
}
// sortByDistance sorts nodes by hrw distance using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
dist := make([]uint64, l)
for i := 0; i < l; i++ {
dist[i] = getDistance(byIndex, i, nodes, hash)
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] < dist[j]
},
}
sort.Sort(s)
}
// getDistance return distance from nodes[i] to h.
// If byIndex is true, nodes index is used.
// Else if nodes[i] != nil, distance is calculated from this value.
// Otherwise, and hash from node index is taken.
func getDistance(byIndex bool, i int, nodes []uint64, h uint64) uint64 {
if nodes != nil {
return distance(nodes[i], h)
} else if byIndex {
return distance(uint64(i), h)
} else {
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, uint64(i))
return distance(Hash(buf), h)
}
}
func allSameF64(fs []float64) bool {
for i := range fs {
if fs[i] != fs[0] {
return false
}
}
return true
}

View file

@ -5,9 +5,10 @@ import (
"fmt"
"math"
"math/rand"
"reflect"
"strconv"
"testing"
"github.com/stretchr/testify/require"
)
type (
@ -60,7 +61,7 @@ func Example() {
}
func (h hashString) Hash() uint64 {
return Hash([]byte(h))
return StringHash(string(h))
}
func TestSortSliceByIndex(t *testing.T) {
@ -68,20 +69,28 @@ func TestSortSliceByIndex(t *testing.T) {
expect := []string{"e", "a", "c", "f", "d", "b"}
hash := Hash(testKey)
SortSliceByIndex(actual, hash)
if !reflect.DeepEqual(actual, expect) {
t.Errorf("Was %#v, but expected %#v", actual, expect)
require.Equal(t, expect, actual)
}
func TestValidateWeights(t *testing.T) {
weights := []float64{10, 10, 10, 2, 2, 2}
err := ValidateWeights(weights)
require.Error(t, err)
weights = []float64{math.NaN(), 1, 1, 0.2, 0.2, 0.2}
err = ValidateWeights(weights)
require.Error(t, err)
weights = []float64{1, 1, 1, 0.2, 0.2, 0.2}
err = ValidateWeights(weights)
require.NoError(t, err)
}
func TestSortSliceByWeightIndex(t *testing.T) {
actual := []string{"a", "b", "c", "d", "e", "f"}
weights := []uint64{10, 10, 10, 2, 2, 2}
weights := []float64{1, 1, 1, 0.2, 0.2, 0.2}
expect := []string{"a", "c", "b", "e", "f", "d"}
hash := Hash(testKey)
SortSliceByWeightIndex(actual, weights, hash)
if !reflect.DeepEqual(actual, expect) {
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
require.Equal(t, expect, actual)
}
func TestSortSliceByValue(t *testing.T) {
@ -89,43 +98,7 @@ func TestSortSliceByValue(t *testing.T) {
expect := []string{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortSliceByValue(actual, hash)
if !reflect.DeepEqual(actual, expect) {
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
}
func TestSortByRule(t *testing.T) {
t.Run("direct", func(t *testing.T) {
// 0 1 2 3 4 5
actual := []string{"a", "b", "c", "d", "e", "f"}
// 4 2 0 5 3 1
expect := []string{"c", "f", "b", "e", "a", "d"}
rule := []uint64{4, 2, 0, 5, 3, 1}
sortByRuleDirect(
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
6, rule)
if !reflect.DeepEqual(actual, expect) {
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
})
t.Run("inverse", func(t *testing.T) {
// 0 1 2 3 4 5
actual := []string{"a", "b", "c", "d", "e", "f"}
// 4 2 0 5 3 1
expect := []string{"e", "c", "a", "f", "d", "b"}
rule := []uint64{4, 2, 0, 5, 3, 1}
sortByRuleInverse(
func(i, j int) { actual[i], actual[j] = actual[j], actual[i] },
6, rule)
if !reflect.DeepEqual(actual, expect) {
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
})
require.Equal(t, expect, actual)
}
func TestSortSliceByValueFail(t *testing.T) {
@ -134,23 +107,19 @@ func TestSortSliceByValueFail(t *testing.T) {
actual []int
hash = Hash(testKey)
)
SortSliceByValue(actual, hash)
require.NotPanics(t, func() { SortSliceByValue(actual, hash) })
})
t.Run("must be slice", func(t *testing.T) {
actual := 10
hash := Hash(testKey)
SortSliceByValue(actual, hash)
require.Panics(t, func() { SortSliceByValue(actual, hash) })
})
t.Run("must 'fail' for unknown type", func(t *testing.T) {
actual := []unknown{1, 2, 3, 4, 5}
expect := []unknown{1, 2, 3, 4, 5}
hash := Hash(testKey)
SortSliceByValue(actual, hash)
if !reflect.DeepEqual(actual, expect) {
t.Errorf("Was %#v, but expected %#v", actual, expect)
}
require.Panics(t, func() { SortSliceByValue(actual, hash) })
})
}
@ -159,9 +128,24 @@ func TestSortSliceByValueHasher(t *testing.T) {
expect := []hashString{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortSliceByValue(actual, hash)
if !reflect.DeepEqual(actual, expect) {
t.Errorf("Was %#v, but expected %#v", actual, expect)
require.Equal(t, expect, actual)
}
func TestSortHasherSliceByValue(t *testing.T) {
actual := []hashString{"a", "b", "c", "d", "e", "f"}
expect := []hashString{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortHasherSliceByValue(actual, hash)
require.EqualValues(t, expect, actual)
}
func TestSortHasherSliceByWeightValue(t *testing.T) {
actual := []hashString{"a", "b", "c", "d", "e", "f"}
weights := []float64{1.0, 1.0, 1.0, 1.0, 1.0, 1.0}
expect := []hashString{"d", "f", "c", "b", "a", "e"}
hash := Hash(testKey)
SortHasherSliceByWeightValue(actual, weights, hash)
require.EqualValues(t, expect, actual)
}
func TestSortSliceByValueIntSlice(t *testing.T) {
@ -206,11 +190,6 @@ func TestSortSliceByValueIntSlice(t *testing.T) {
expect: []uint32{5, 1, 2, 0, 3, 4},
},
{
actual: Uint32Slice{0, 1, 2, 3, 4, 5},
expect: Uint32Slice{0, 1, 2, 3, 4, 5},
},
{
actual: []int64{0, 1, 2, 3, 4, 5},
expect: []int64{5, 3, 0, 1, 4, 2},
@ -225,9 +204,7 @@ func TestSortSliceByValueIntSlice(t *testing.T) {
for _, tc := range cases {
SortSliceByValue(tc.actual, hash)
if !reflect.DeepEqual(tc.actual, tc.expect) {
t.Errorf("Was %#v, but expected %#v", tc.actual, tc.expect)
}
require.Equal(t, tc.expect, tc.actual)
}
}
@ -236,9 +213,7 @@ func TestSort(t *testing.T) {
hash := Hash(testKey)
actual := Sort(nodes, hash)
expected := []uint64{3, 1, 4, 2, 0}
if !reflect.DeepEqual(actual, expected) {
t.Errorf("Was %#v, but expected %#v", actual, expected)
}
require.Equal(t, expected, actual)
}
func TestDistribution(t *testing.T) {
@ -276,18 +251,11 @@ func TestDistribution(t *testing.T) {
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta {
t.Errorf(
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByIndex", func(t *testing.T) {
@ -317,18 +285,11 @@ func TestDistribution(t *testing.T) {
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta {
t.Errorf(
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByValue", func(t *testing.T) {
@ -357,18 +318,11 @@ func TestDistribution(t *testing.T) {
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta {
t.Errorf(
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByStringValue", func(t *testing.T) {
@ -397,18 +351,11 @@ func TestDistribution(t *testing.T) {
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta {
t.Errorf(
"Node %s received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByInt32Value", func(t *testing.T) {
@ -437,31 +384,24 @@ func TestDistribution(t *testing.T) {
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta {
t.Errorf(
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByWeightValue", func(t *testing.T) {
var (
i uint64
a, b, result [size]int
w [size]uint64
w [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
w[i] = size - i
w[i] = float64(size-i) / float64(size)
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
@ -470,24 +410,24 @@ func TestDistribution(t *testing.T) {
SortSliceByWeightValue(b[:], w[:], hash)
result[b[0]]++
}
for i := 0; i < size-1; i++ {
if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) {
t.Fatalf("result array %v must be corresponded to weights %v", result, w)
}
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
"result array %v must be corresponded to weights %v", result, w)
}
})
t.Run("sortByWeightValueShuffledW", func(t *testing.T) {
t.Run("sortByWeightValueShuffledWeight", func(t *testing.T) {
var (
i uint64
a, b, result [size]int
w [size]uint64
w [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
w[i] = size - i
w[i] = float64(size-i) / float64(size)
}
rand.Shuffle(size, func(i, j int) {
@ -501,17 +441,16 @@ func TestDistribution(t *testing.T) {
result[b[0]]++
}
for i := 0; i < size-1; i++ {
if bool(w[i] > w[i+1]) != bool(result[i] > result[i+1]) {
t.Fatalf("result array %v must be corresponded to weights %v", result, w)
}
require.True(t, bool(w[i] > w[i+1]) == bool(result[i] > result[i+1]),
"result array %v must be corresponded to weights %v", result, w)
}
})
t.Run("sortByWeightValueEmptyW", func(t *testing.T) {
t.Run("sortByWeightValueEmptyWeight", func(t *testing.T) {
var (
i uint64
a, b [size]int
w [size]uint64
w [size]float64
counts = make(map[int]int, size)
key = make([]byte, 16)
)
@ -534,32 +473,25 @@ func TestDistribution(t *testing.T) {
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta {
t.Errorf(
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByWeightValueUniformW", func(t *testing.T) {
t.Run("sortByWeightValueUniformWeight", func(t *testing.T) {
var (
i uint64
a, b [size]int
w [size]uint64
w [size]float64
counts = make(map[int]int, size)
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
w[i] = 10
w[i] = 0.5
}
for i = 0; i < keys; i++ {
@ -576,45 +508,85 @@ func TestDistribution(t *testing.T) {
for node, count := range counts {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
if d > delta || (0-d) > delta {
t.Errorf(
"Node %d received %d keys, expected %.0f (+/- %.2f)",
node, count, mean, delta,
)
}
}
if chi2 > chiTable[size-1] {
t.Errorf(
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)",
chi2, chiTable[size-1])
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("sortByWeightValueAbsoluteW", func(t *testing.T) {
const keys = 1
var (
i uint64
a, b [size]int
w [size]uint64
w [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = int(i)
}
w[size-1] = 10
w[size-1] = 1
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], w[:], hash)
if b[0] != a[size-1] {
t.Fatalf("expected last value of %v to be the first with highest weight", a)
}
require.True(t, b[0] == a[size-1],
"expected last value of %v to be the first with highest distance", a)
}
})
t.Run("sortByWeightValueNormalizedWeight", func(t *testing.T) {
var (
i uint64
a, b, result [size]uint64
w, normalizedW [size]float64
key = make([]byte, 16)
)
for i = 0; i < size; i++ {
a[i] = i
w[int(i)] = 10
}
w[0] = 100
// Here let's use logarithm normalization
for i = 0; i < size; i++ {
normalizedW[i] = math.Log2(w[i]) / math.Log2(w[0])
}
for i = 0; i < keys; i++ {
copy(b[:], a[:])
binary.BigEndian.PutUint64(key, i+size)
hash := Hash(key)
SortSliceByWeightValue(b[:], normalizedW[:], hash)
for j := range b {
result[b[j]] += uint64(len(b) - j)
}
}
cutResult := result[1:]
var total uint64
for i := range cutResult {
total += cutResult[i]
}
var chi2 float64
mean := float64(total) / float64(len(cutResult))
delta := mean * percent
for node, count := range cutResult {
d := mean - float64(count)
chi2 += math.Pow(float64(count)-mean, 2) / mean
require.True(t, d < delta && (0-d) < delta,
"Node %d received %d keys, expected %.0f (+/- %.2f)", node, count, mean, delta)
}
require.True(t, chi2 < chiTable[size-1],
"Chi2 condition for .9 is not met (expected %.2f <= %.2f)", chi2, chiTable[size-1])
})
t.Run("hash collision", func(t *testing.T) {
var (
i uint64
@ -681,6 +653,36 @@ func BenchmarkSortByValue_fnv_1000(b *testing.B) {
benchmarkSortByValue(b, 1000, hash)
}
func BenchmarkSortHashersByValue_Reflection_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueReflection(b, 10, hash)
}
func BenchmarkSortHashersByValue_Reflection_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueReflection(b, 100, hash)
}
func BenchmarkSortHashersByValue_Reflection_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueReflection(b, 1000, hash)
}
func BenchmarkSortHashersByValue_Typed_fnv_10(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueTyped(b, 10, hash)
}
func BenchmarkSortHashersByValue_Typed_fnv_100(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueTyped(b, 100, hash)
}
func BenchmarkSortHashersByValue_Typed_fnv_1000(b *testing.B) {
hash := Hash(testKey)
benchmarkSortHashersByValueTyped(b, 1000, hash)
}
func BenchmarkSortByWeight_fnv_10(b *testing.B) {
hash := Hash(testKey)
_ = benchmarkSortByWeight(b, 10, hash)
@ -726,6 +728,30 @@ func BenchmarkSortByWeightValue_fnv_1000(b *testing.B) {
benchmarkSortByWeightValue(b, 1000, hash)
}
func BenchmarkSortHashersByWeightValueReflection_fnv_10(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 10, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueReflection_fnv_100(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 100, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 1000, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 10, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_100(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 100, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 1000, Hash(testKey))
}
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {
servers := make([]uint64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
@ -772,9 +798,9 @@ func benchmarkSortByValue(b *testing.B, n int, hash uint64) {
func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
servers := make([]uint64, n)
weights := make([]uint64, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = uint64(n) - i
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = i
}
@ -790,9 +816,9 @@ func benchmarkSortByWeight(b *testing.B, n int, hash uint64) uint64 {
func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
servers := make([]uint64, n)
weights := make([]uint64, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = uint64(n) - i
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = i
}
@ -806,9 +832,9 @@ func benchmarkSortByWeightIndex(b *testing.B, n int, hash uint64) {
func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
servers := make([]string, n)
weights := make([]uint64, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = uint64(n) - i
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = "localhost:" + strconv.FormatUint(60000-i, 10)
}
@ -819,3 +845,63 @@ func benchmarkSortByWeightValue(b *testing.B, n int, hash uint64) {
SortSliceByWeightValue(servers, weights, hash)
}
}
func benchmarkSortHashersByWeightValueRelection(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByWeightValue(servers, weights, hash)
}
}
func benchmarkSortHashersByWeightValueTyped(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
weights := make([]float64, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
weights[i] = float64(uint64(n)-i) / float64(n)
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortHasherSliceByWeightValue(servers, weights, hash)
}
}
func benchmarkSortHashersByValueReflection(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortSliceByValue(servers, hash)
}
}
func benchmarkSortHashersByValueTyped(b *testing.B, n int, hash uint64) {
servers := make([]hashString, n)
for i := uint64(0); i < uint64(len(servers)); i++ {
servers[i] = hashString("localhost:" + strconv.FormatUint(60000-i, 10))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SortHasherSliceByValue(servers, hash)
}
}