Replace siphash by hash/maphash

In Go 1.17.1, maphash has become quite a bit faster than siphash, so we
can drop one third-party dependency. maphash is just an interface to the
standard Go map's hash function, which we already trust for other use
cases.

Benchmark results on linux/amd64, -benchtime=3s:

name                                             old time/op    new time/op    delta
IndexHasUnknown-8                                  50.6ns ±10%    41.0ns ±19%  -18.92%  (p=0.000 n=9+10)
IndexHasKnown-8                                    52.6ns ±12%    41.5ns ±12%  -21.13%  (p=0.000 n=9+10)
IndexMapHash-8                                     3.64µs ± 1%    2.00µs ± 0%  -45.09%  (p=0.000 n=10+9)
IndexAlloc-8                                        700ms ± 1%     601ms ± 6%  -14.18%  (p=0.000 n=8+10)
IndexAllocParallel-8                                205ms ± 5%     192ms ± 8%   -6.18%  (p=0.043 n=10+10)
MasterIndexAlloc-8                                  319ms ± 1%     279ms ± 5%  -12.58%  (p=0.000 n=10+10)
MasterIndexLookupSingleIndex-8                      156ns ± 8%     147ns ± 6%   -5.46%  (p=0.023 n=10+10)
MasterIndexLookupMultipleIndex-8                    150ns ± 7%     142ns ± 8%   -5.69%  (p=0.007 n=10+10)
MasterIndexLookupSingleIndexUnknown-8              74.4ns ± 6%    72.0ns ± 9%     ~     (p=0.175 n=10+9)
MasterIndexLookupMultipleIndexUnknown-8            67.4ns ± 9%    65.5ns ± 7%     ~     (p=0.340 n=9+9)
MasterIndexLookupParallel/known,indices=25-8        461ns ± 2%     445ns ± 2%   -3.49%  (p=0.000 n=10+10)
MasterIndexLookupParallel/unknown,indices=25-8      408ns ±11%     378ns ± 5%   -7.22%  (p=0.035 n=10+9)
MasterIndexLookupParallel/known,indices=50-8        479ns ± 1%     437ns ± 4%   -8.82%  (p=0.000 n=10+10)
MasterIndexLookupParallel/unknown,indices=50-8      406ns ± 8%     343ns ±15%  -15.44%  (p=0.001 n=10+10)
MasterIndexLookupParallel/known,indices=100-8       480ns ± 1%     455ns ± 5%   -5.15%  (p=0.000 n=8+10)
MasterIndexLookupParallel/unknown,indices=100-8     391ns ±18%     382ns ± 8%     ~     (p=0.315 n=10+10)
MasterIndexLookupBlobSize-8                        71.0ns ± 8%    57.2ns ±11%  -19.36%  (p=0.000 n=9+10)
PackerManager-8                                     254ms ± 1%     254ms ± 1%     ~     (p=0.285 n=15+15)

name                                             old speed      new speed      delta
IndexMapHash-8                                   1.12GB/s ± 1%  2.05GB/s ± 0%  +82.13%  (p=0.000 n=10+9)
PackerManager-8                                   208MB/s ± 1%   207MB/s ± 1%     ~     (p=0.281 n=15+15)

name                                             old alloc/op   new alloc/op   delta
IndexMapHash-8                                      0.00B          0.00B          ~     (all equal)
IndexAlloc-8                                        400MB ± 0%     400MB ± 0%     ~     (p=1.000 n=9+10)
IndexAllocParallel-8                                401MB ± 0%     401MB ± 0%   +0.00%  (p=0.000 n=10+10)
MasterIndexAlloc-8                                  258MB ± 0%     262MB ± 0%   +1.42%  (p=0.000 n=9+10)
PackerManager-8                                    73.1kB ± 0%    73.1kB ± 0%     ~     (p=0.382 n=13+13)

name                                             old allocs/op  new allocs/op  delta
IndexMapHash-8                                       0.00           0.00          ~     (all equal)
IndexAlloc-8                                         907k ± 0%      907k ± 0%   -0.00%  (p=0.000 n=10+10)
IndexAllocParallel-8                                 907k ± 0%      907k ± 0%   +0.00%  (p=0.009 n=10+10)
MasterIndexAlloc-8                                   327k ± 0%      317k ± 0%   -3.06%  (p=0.000 n=10+10)
PackerManager-8                                       744 ± 0%       744 ± 0%     ~     (all equal)
This commit is contained in:
greatroar 2021-09-17 12:38:17 +02:00
parent bf9c8771a4
commit 8d2996eaaa
4 changed files with 7 additions and 44 deletions

1
go.mod
View file

@ -8,7 +8,6 @@ require (
github.com/Azure/go-autorest/autorest/to v0.4.0 // indirect
github.com/cenkalti/backoff/v4 v4.1.1
github.com/cespare/xxhash/v2 v2.1.1
github.com/dchest/siphash v1.2.2
github.com/dnaeon/go-vcr v1.2.0 // indirect
github.com/elithrar/simple-scrypt v1.3.0
github.com/go-ole/go-ole v1.2.5

2
go.sum
View file

@ -89,8 +89,6 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsr
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dchest/siphash v1.2.2 h1:9DFz8tQwl9pTVt5iok/9zKyzA1Q6bRGiF3HPiEEVr9I=
github.com/dchest/siphash v1.2.2/go.mod h1:q+IRvb2gOSrUnYoPqHiyHXS0FOBBOdl6tONBlVnOnt4=
github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI=
github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=

View file

@ -1,12 +1,9 @@
package repository
import (
"crypto/rand"
"encoding/binary"
"hash/maphash"
"github.com/restic/restic/internal/restic"
"github.com/dchest/siphash"
)
// An indexMap is a chained hash table that maps blob IDs to indexEntries.
@ -23,7 +20,7 @@ type indexMap struct {
buckets []*indexEntry
numentries uint
key0, key1 uint64 // Key for hash randomization.
mh maphash.Hash
free *indexEntry // Free list.
}
@ -113,25 +110,20 @@ func (m *indexMap) grow() {
}
func (m *indexMap) hash(id restic.ID) uint {
// We use siphash with a randomly generated 128-bit key, to prevent
// backups of specially crafted inputs from degrading performance.
// We use maphash to prevent backups of specially crafted inputs
// from degrading performance.
// While SHA-256 should be collision-resistant, for hash table indices
// we use only a few bits of it and finding collisions for those is
// much easier than breaking the whole algorithm.
h := uint(siphash.Hash(m.key0, m.key1, id[:]))
m.mh.Reset()
_, _ = m.mh.Write(id[:])
h := uint(m.mh.Sum64())
return h & uint(len(m.buckets)-1)
}
func (m *indexMap) init() {
const initialBuckets = 64
m.buckets = make([]*indexEntry, initialBuckets)
var buf [16]byte
if _, err := rand.Read(buf[:]); err != nil {
panic(err) // Very little we can do here.
}
m.key0 = binary.LittleEndian.Uint64(buf[:8])
m.key1 = binary.LittleEndian.Uint64(buf[8:])
}
func (m *indexMap) len() uint { return m.numentries }

View file

@ -107,32 +107,6 @@ func TestIndexMapForeachWithID(t *testing.T) {
}
}
func TestIndexMapHash(t *testing.T) {
t.Parallel()
var m1, m2 indexMap
id := restic.NewRandomID()
// Add to both maps to initialize them.
m1.add(id, 0, 0, 0)
m2.add(id, 0, 0, 0)
h1 := m1.hash(id)
h2 := m2.hash(id)
rtest.Equals(t, len(m1.buckets), len(m2.buckets)) // just to be sure
if h1 == h2 {
// The probability of the zero key should be 2^(-128).
if m1.key0 == 0 && m1.key1 == 0 {
t.Error("siphash key not set for m1")
}
if m2.key0 == 0 && m2.key1 == 0 {
t.Error("siphash key not set for m2")
}
}
}
func BenchmarkIndexMapHash(b *testing.B) {
var m indexMap
m.add(restic.ID{}, 0, 0, 0) // Trigger lazy initialization.