Compare commits

..

10 commits

Author SHA1 Message Date
3a8489bfe7 [#13] Restore deleted copyright notice
Signed-off-by: Vitaliy Potyarkin <v.potyarkin@yadro.com>
2024-11-08 15:33:45 +03:00
1b7ec474c9 [#9] *: Remove nspcc mentions
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-08-29 13:53:15 +03:00
78c3f718b1 [#9] doc: Remove nspcc mentions from README
Signed-off-by: Airat Arifullin a.arifullin@yadro.com
2023-06-13 10:55:55 +03:00
16a7740ccd [#8] hrw: Introduce StringHash() for hashing strings directly
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      6      │                  7                   │
                                         │   sec/op    │    sec/op     vs base                │
SortHashersByValue_Typed_fnv_10-8          248.8n ± 1%   166.9n ±  9%  -32.93% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.195µ ± 6%   1.297µ ±  6%  -40.93% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        22.47µ ± 4%   12.42µ ± 10%  -44.73% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     301.7n ± 6%   180.8n ±  4%  -40.09% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.526µ ± 1%   1.378µ ±  5%  -45.47% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   24.37µ ± 2%   12.80µ ±  4%  -47.48% (p=0.000 n=10)
geomean                                    2.472µ        1.430µ        -42.13%

                                         │      6       │                   7                   │
                                         │     B/op     │     B/op      vs base                 │
SortHashersByValue_Typed_fnv_10-8            144.0 ± 0%     144.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_100-8           960.0 ± 0%     960.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_1000-8        8.062Ki ± 0%   8.062Ki ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_10-8       144.0 ± 0%     144.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_100-8      960.0 ± 0%     960.0 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_1000-8   8.062Ki ± 0%   8.062Ki ± 0%       ~ (p=1.000 n=10) ¹
geomean                                    1.021Ki        1.021Ki       +0.00%
¹ all samples are equal

                                         │     6      │                  7                  │
                                         │ allocs/op  │ allocs/op   vs base                 │
SortHashersByValue_Typed_fnv_10-8          2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_100-8         2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByValue_Typed_fnv_1000-8        2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_10-8     2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_100-8    2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
SortHashersByWeightValueTyped_fnv_1000-8   2.000 ± 0%   2.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                                    2.000        2.000       +0.00%
¹ all samples are equal
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:56:24 +03:00
2ac89c82b6 [#8] hrw/test: Fix typo in benchmarks
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:56:24 +03:00
266da7c69a [#8] hrw: Do not allocate for swap()/less() helpers
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      4      │                  5                  │
                                         │   sec/op    │   sec/op     vs base                │
SortHashersByValue_Typed_fnv_10-8          309.2n ± 2%   294.4n ± 1%   -4.75% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.306µ ± 1%   2.549µ ± 1%  +10.54% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        21.73µ ± 1%   24.80µ ± 3%  +14.14% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     347.1n ± 1%   334.8n ± 2%   -3.56% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.668µ ± 1%   2.954µ ± 3%  +10.72% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   2.673µ ± 1%   2.957µ ± 4%  +10.63% (p=0.000 n=10)
geomean                                    1.836µ        1.947µ        +6.01%

                                         │      4       │                  5                   │
                                         │     B/op     │     B/op      vs base                │
SortHashersByValue_Typed_fnv_10-8            216.0 ± 0%     144.0 ± 0%  -33.33% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8          1032.0 ± 0%     960.0 ± 0%   -6.98% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        8.133Ki ± 0%   8.062Ki ± 0%   -0.86% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8       216.0 ± 0%     144.0 ± 0%  -33.33% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8     1032.0 ± 0%     960.0 ± 0%   -6.98% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8    1032.0 ± 0%     960.0 ± 0%   -6.98% (p=0.000 n=10)
geomean                                      867.8          730.1       -15.87%

                                         │     4      │                 5                  │
                                         │ allocs/op  │ allocs/op   vs base                │
SortHashersByValue_Typed_fnv_10-8          4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   4.000 ± 0%   2.000 ± 0%  -50.00% (p=0.000 n=10)
geomean                                    4.000        2.000       -50.00%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:56:23 +03:00
c52f74d8e1 [#8] hrw: Do not allocate 2 slices for sort
Currently we allocate `rule` and then create `dist` which depends on it.
In this commit we create `dist` directly.

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      3      │                 4                  │
                                         │   sec/op    │   sec/op     vs base               │
SortHashersByValue_Typed_fnv_10-8          336.3n ± 3%   309.2n ± 2%  -8.06% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.424µ ± 3%   2.306µ ± 1%  -4.87% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        22.35µ ± 1%   21.73µ ± 1%  -2.75% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     346.6n ± 3%   347.1n ± 1%       ~ (p=0.631 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.637µ ± 6%   2.668µ ± 1%       ~ (p=0.481 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   2.609µ ± 4%   2.673µ ± 1%  +2.43% (p=0.000 n=10)
geomean                                    1.875µ        1.836µ       -2.06%

                                         │       3       │                  4                   │
                                         │     B/op      │     B/op      vs base                │
SortHashersByValue_Typed_fnv_10-8             296.0 ± 0%     216.0 ± 0%  -27.03% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8          1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        16.133Ki ± 0%   8.133Ki ± 0%  -49.59% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8        296.0 ± 0%     216.0 ± 0%  -27.03% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8     1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8    1.883Ki ± 0%   1.008Ki ± 0%  -46.47% (p=0.000 n=10)
geomean                                     1.442Ki          867.8       -41.24%

                                         │     3      │                 4                  │
                                         │ allocs/op  │ allocs/op   vs base                │
SortHashersByValue_Typed_fnv_10-8          5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   5.000 ± 0%   4.000 ± 0%  -20.00% (p=0.000 n=10)
geomean                                    5.000        4.000       -20.00%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-02 10:53:44 +03:00
895ecf150f [#8] hrw: Inline swap() when slice is known
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                         │      2      │                 3                  │
                                         │   sec/op    │   sec/op     vs base               │
SortHashersByValue_Typed_fnv_10-8          368.5n ± 2%   336.3n ± 3%  -8.75% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         2.411µ ± 4%   2.424µ ± 3%       ~ (p=0.853 n=10)
SortHashersByValue_Typed_fnv_1000-8        22.19µ ± 2%   22.35µ ± 1%       ~ (p=0.247 n=10)
SortHashersByWeightValueTyped_fnv_10-8     364.3n ± 2%   346.6n ± 3%  -4.86% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    2.541µ ± 3%   2.637µ ± 6%       ~ (p=0.055 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   2.483µ ± 1%   2.609µ ± 4%  +5.07% (p=0.003 n=10)
geomean                                    1.888µ        1.875µ       -0.71%

                                         │      2       │                  3                  │
                                         │     B/op     │     B/op      vs base               │
SortHashersByValue_Typed_fnv_10-8            312.0 ± 0%     296.0 ± 0%  -5.13% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         1.898Ki ± 0%   1.883Ki ± 0%  -0.82% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        16.15Ki ± 0%   16.13Ki ± 0%  -0.10% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8       312.0 ± 0%     296.0 ± 0%  -5.13% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    1.898Ki ± 0%   1.883Ki ± 0%  -0.82% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   1.898Ki ± 0%   1.883Ki ± 0%  -0.82% (p=0.000 n=10)
geomean                                    1.474Ki        1.442Ki       -2.16%

                                         │     2      │                 3                  │
                                         │ allocs/op  │ allocs/op   vs base                │
SortHashersByValue_Typed_fnv_10-8          6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8         6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8        6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8     6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8    6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8   6.000 ± 0%   5.000 ± 0%  -16.67% (p=0.000 n=10)
geomean                                    6.000        5.000       -16.67%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 21:41:16 +03:00
213c105ac1 [#8] go.mod: Use faster murmur3 lib
Specifically, this line became possible, because of noescape annotations
for assembly.
```
./hrw.go:307:14: make([]byte, 8) does not escape
```

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                              │      1       │                   2                   │
                                              │    sec/op    │   sec/op     vs base                  │
SortHashersByValue_Typed_fnv_10-8               580.1n ±  1%   368.5n ± 2%  -36.47% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              4.215µ ±  2%   2.411µ ± 4%  -42.79% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             39.40µ ±  1%   22.19µ ± 2%  -43.68% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8          599.6n ±  2%   364.3n ± 2%  -39.25% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         4.337µ ±  5%   2.541µ ± 3%  -41.41% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        4.344µ ±  3%   2.483µ ± 1%  -42.84% (p=0.000 n=10)
geomean                                         4.400µ         1.888µ       -41.13%                ¹
¹ benchmark set differs from baseline; geomeans may not be comparable

                                              │      1       │                   2                    │
                                              │     B/op     │     B/op      vs base                  │
SortHashersByValue_Typed_fnv_10-8                 472.0 ± 0%     312.0 ± 0%  -33.90% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              3.461Ki ± 0%   1.898Ki ± 0%  -45.15% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             31.77Ki ± 0%   16.15Ki ± 0%  -49.18% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8            472.0 ± 0%     312.0 ± 0%  -33.90% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         3.461Ki ± 0%   1.898Ki ± 0%  -45.15% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        3.461Ki ± 0%   1.898Ki ± 0%  -45.15% (p=0.000 n=10)
geomean                                         3.070Ki        1.474Ki       -42.37%                ¹
¹ benchmark set differs from baseline; geomeans may not be comparable

                                              │       1       │                  2                   │
                                              │   allocs/op   │ allocs/op   vs base                  │
SortHashersByValue_Typed_fnv_10-8                 16.000 ± 0%   6.000 ± 0%  -62.50% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8               106.000 ± 0%   6.000 ± 0%  -94.34% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             1006.000 ± 0%   6.000 ± 0%  -99.40% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8            16.000 ± 0%   6.000 ± 0%  -62.50% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8          106.000 ± 0%   6.000 ± 0%  -94.34% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8         106.000 ± 0%   6.000 ± 0%  -94.34% (p=0.000 n=10)
geomean                                            113.0        6.000       -92.69%                ¹
¹ benchmark set differs from baseline; geomeans may not be comparable
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 21:41:00 +03:00
c175ef4099 [#8] hrw: Do not create index slice for sorter
`ind` is only needed to index dist or weights, swap them directly.

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/hrw
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                              │      0      │                  1                   │
                                              │   sec/op    │    sec/op     vs base                │
SortHashersByValue_Typed_fnv_10-8               596.2n ± 4%   580.1n ±  1%   -2.72% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              4.453µ ± 2%   4.215µ ±  2%   -5.35% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             41.58µ ± 4%   39.40µ ±  1%   -5.23% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8          624.5n ± 2%   599.6n ±  2%   -3.99% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         4.593µ ± 2%   4.337µ ±  5%   -5.56% (p=0.003 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        4.896µ ± 8%   4.344µ ±  3%  -11.27% (p=0.000 n=10)
geomean                                         4.668µ        4.400µ         -5.75%

                                              │      0       │                  1                   │
                                              │     B/op     │     B/op      vs base                │
SortHashersByValue_Typed_fnv_10-8                 584.0 ± 0%     472.0 ± 0%  -19.18% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8              4.367Ki ± 0%   3.461Ki ± 0%  -20.75% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             39.80Ki ± 0%   31.77Ki ± 0%  -20.18% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8            600.0 ± 0%     472.0 ± 0%  -21.33% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8         4.383Ki ± 0%   3.461Ki ± 0%  -21.03% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8        4.383Ki ± 0%   3.461Ki ± 0%  -21.03% (p=0.000 n=10)
geomean                                         3.742Ki        3.070Ki       -17.96%

                                              │      0      │                 1                  │
                                              │  allocs/op  │  allocs/op   vs base               │
SortHashersByValue_Typed_fnv_10-8                17.00 ± 0%    16.00 ± 0%  -5.88% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_100-8               107.0 ± 0%    106.0 ± 0%  -0.93% (p=0.000 n=10)
SortHashersByValue_Typed_fnv_1000-8             1.007k ± 0%   1.006k ± 0%  -0.10% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_10-8           17.00 ± 0%    16.00 ± 0%  -5.88% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_100-8          107.0 ± 0%    106.0 ± 0%  -0.93% (p=0.000 n=10)
SortHashersByWeightValueTyped_fnv_1000-8         107.0 ± 0%    106.0 ± 0%  -0.93% (p=0.000 n=10)
geomean                                          115.3         113.0       -1.94%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-06-01 21:40:46 +03:00
6 changed files with 104 additions and 65 deletions

View file

@ -1,6 +1,7 @@
MIT License
Copyright (c) 2019 NSPCC
Copyright (c) 2023-2024 TrueCloudLab
Copyright (c) 2019-2023 NSPCC
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View file

@ -1,15 +1,10 @@
# Golang HRW implementation
[![Build Status](https://travis-ci.org/nspcc-dev/hrw.svg?branch=master)](https://travis-ci.org/nspcc-dev/hrw)
[![codecov](https://codecov.io/gh/nspcc-dev/hrw/badge.svg)](https://codecov.io/gh/nspcc-dev/hrw)
[![Report](https://goreportcard.com/badge/github.com/nspcc-dev/hrw)](https://goreportcard.com/report/github.com/nspcc-dev/hrw)
[![GitHub release](https://img.shields.io/github/release/nspcc-dev/hrw.svg)](https://github.com/nspcc-dev/hrw)
[Rendezvous or highest random weight](https://en.wikipedia.org/wiki/Rendezvous_hashing) (HRW) hashing is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method.
## Install
`go get github.com/nspcc-dev/hrw`
`go get git.frostfs.info/TrueCloudLab/hrw`
## Benchmark:

2
go.mod
View file

@ -3,8 +3,8 @@ module git.frostfs.info/TrueCloudLab/hrw
go 1.18
require (
github.com/spaolacci/murmur3 v1.1.0
github.com/stretchr/testify v1.3.0
github.com/twmb/murmur3 v1.1.8
)
require (

4
go.sum
View file

@ -2,8 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=

147
hrw.go
View file

@ -9,7 +9,7 @@ import (
"reflect"
"sort"
"github.com/spaolacci/murmur3"
"github.com/twmb/murmur3"
)
type (
@ -21,6 +21,12 @@ type (
less func(i, j int) bool
swap func(i, j int)
}
hasherSorter[T Hasher, N interface{ ~uint64 | ~float64 }] struct {
slice []T
dist []N
asc bool
}
)
// Boundaries of valid normalized weights
@ -33,6 +39,18 @@ func (s *sorter) Len() int { return s.l }
func (s *sorter) Less(i, j int) bool { return s.less(i, j) }
func (s *sorter) Swap(i, j int) { s.swap(i, j) }
func (s *hasherSorter[T, N]) Len() int { return len(s.slice) }
func (s *hasherSorter[T, N]) Less(i, j int) bool {
if s.asc {
return s.dist[i] < s.dist[j]
}
return s.dist[i] > s.dist[j]
}
func (s *hasherSorter[T, N]) Swap(i, j int) {
s.slice[i], s.slice[j] = s.slice[j], s.slice[i]
s.dist[i], s.dist[j] = s.dist[j], s.dist[i]
}
func distance(x uint64, y uint64) uint64 {
acc := x ^ y
// here used mmh3 64 bit finalizer
@ -50,6 +68,11 @@ func Hash(key []byte) uint64 {
return murmur3.Sum64(key)
}
// StringHash uses murmur3 hash to return uint64
func StringHash(key string) uint64 {
return murmur3.StringSum64(key)
}
// Sort receive nodes and hash, and sort it by distance
func Sort(nodes []uint64, hash uint64) []uint64 {
l := len(nodes)
@ -85,13 +108,15 @@ func SortSliceByValue(slice interface{}, hash uint64) {
// SortHasherSliceByValue receives []Hasher and hash to sort by value-distance.
func SortHasherSliceByValue[T Hasher](slice []T, hash uint64) {
rule := prepareHasherRule(slice)
if rule != nil {
swap := func(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
}
sortByDistance(len(rule), false, rule, hash, swap)
if len(slice) == 0 {
return
}
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
}
sortHasherByDistance(slice, false, dist)
}
// SortSliceByWeightValue received []T, weights and hash to sort by value-distance * weights
@ -105,13 +130,41 @@ func SortSliceByWeightValue(slice interface{}, weights []float64, hash uint64) {
// SortHasherSliceByWeightValue receives []Hasher, weights and hash to sort by value-distance * weights.
func SortHasherSliceByWeightValue[T Hasher](slice []T, weights []float64, hash uint64) {
rule := prepareHasherRule(slice)
if rule != nil {
swap := func(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
}
sortByWeight(len(slice), false, rule, weights, hash, swap)
if len(slice) == 0 {
return
}
if allSameF64(weights) {
dist := make([]uint64, len(slice))
for i := range dist {
dist[i] = distance(slice[i].Hash(), hash)
}
sortHasherByDistance(slice, false, dist)
return
}
dist := make([]float64, len(slice))
for i := range dist {
d := distance(slice[i].Hash(), hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
dist[i] = float64(^uint64(0)-d) * weights[i]
}
sort.Sort(&hasherSorter[T, float64]{
slice: slice,
dist: dist,
asc: false,
})
}
// sortHasherByDistance is similar to sortByDistance but accepts slice directly.
func sortHasherByDistance[T Hasher](slice []T, byIndex bool, dist []uint64) {
sort.Sort(&hasherSorter[T, uint64]{
slice: slice,
dist: dist,
asc: true,
})
}
// SortSliceByIndex received []T and hash to sort by index-distance
@ -221,18 +274,6 @@ func prepareRule(slice interface{}) []uint64 {
return rule
}
func prepareHasherRule[T Hasher](hashers []T) []uint64 {
length := len(hashers)
if length == 0 {
return nil
}
result := make([]uint64, length)
for i := 0; i < length; i++ {
result[i] = hashers[i].Hash()
}
return result
}
// ValidateWeights checks if weights are normalized between 0.0 and 1.0
func ValidateWeights(weights []float64) error {
for i := range weights {
@ -243,24 +284,6 @@ func ValidateWeights(weights []float64) error {
return nil
}
func newSorter(l int, byIndex bool, nodes []uint64, h uint64,
swap func(i, j int)) (*sorter, []int, []uint64) {
ind := make([]int, l)
dist := make([]uint64, l)
for i := 0; i < l; i++ {
ind[i] = i
dist[i] = getDistance(byIndex, i, nodes, h)
}
return &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
ind[i], ind[j] = ind[j], ind[i]
},
}, ind, dist
}
// sortByWeight sorts nodes by weight using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash uint64, swap func(i, j int)) {
@ -270,14 +293,23 @@ func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash u
return
}
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
s.less = func(i, j int) bool {
ii, jj := ind[i], ind[j]
dist := make([]float64, l)
for i := 0; i < l; i++ {
d := getDistance(byIndex, i, nodes, hash)
// `maxUint64 - distance` makes the shorter distance more valuable
// it is necessary for operation with normalized values
wi := float64(^uint64(0)-dist[ii]) * weights[ii]
wj := float64(^uint64(0)-dist[jj]) * weights[jj]
return wi > wj // higher distance must be placed lower to be first
dist[i] = float64(^uint64(0)-d) * weights[i]
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] > dist[j] // higher distance must be placed lower to be first
},
}
sort.Sort(s)
}
@ -285,9 +317,20 @@ func sortByWeight(l int, byIndex bool, nodes []uint64, weights []float64, hash u
// sortByDistance sorts nodes by hrw distance using provided swapper.
// nodes contains hrw hashes. If it is nil, indices are used.
func sortByDistance(l int, byIndex bool, nodes []uint64, hash uint64, swap func(i, j int)) {
s, ind, dist := newSorter(l, byIndex, nodes, hash, swap)
s.less = func(i, j int) bool {
return dist[ind[i]] < dist[ind[j]]
dist := make([]uint64, l)
for i := 0; i < l; i++ {
dist[i] = getDistance(byIndex, i, nodes, hash)
}
s := &sorter{
l: l,
swap: func(i, j int) {
swap(i, j)
dist[i], dist[j] = dist[j], dist[i]
},
less: func(i, j int) bool {
return dist[i] < dist[j]
},
}
sort.Sort(s)
}

View file

@ -61,7 +61,7 @@ func Example() {
}
func (h hashString) Hash() uint64 {
return Hash([]byte(h))
return StringHash(string(h))
}
func TestSortSliceByIndex(t *testing.T) {
@ -737,7 +737,7 @@ func BenchmarkSortHashersByWeightValueReflection_fnv_100(b *testing.B) {
}
func BenchmarkSortHashersByWeightValueReflection_fnv_1000(b *testing.B) {
benchmarkSortHashersByWeightValueRelection(b, 100, Hash(testKey))
benchmarkSortHashersByWeightValueRelection(b, 1000, Hash(testKey))
}
func BenchmarkSortHashersByWeightValueTyped_fnv_10(b *testing.B) {
@ -749,7 +749,7 @@ func BenchmarkSortHashersByWeightValueTyped_fnv_100(b *testing.B) {
}
func BenchmarkSortHashersByWeightValueTyped_fnv_1000(b *testing.B) {
benchmarkSortHashersByWeightValueTyped(b, 100, Hash(testKey))
benchmarkSortHashersByWeightValueTyped(b, 1000, Hash(testKey))
}
func benchmarkSort(b *testing.B, n int, hash uint64) uint64 {