diff --git a/README.md b/README.md index 9d3b687..97c0385 100644 --- a/README.md +++ b/README.md @@ -13,16 +13,16 @@ make auto # Homomorphic hashing in golang -Package **tz** containts pure-Go implementation of hashing function described by Tillich and Źemor in [1] . +Package `tz` containts pure-Go implementation of hashing function described by Tillich and Źemor in [1] . -There are existing implementations already (e.g. [2]), however it is written in C. +There are existing implementations already (e.g. [2]), however they are written in C. -Package **gf127** contains arithmetic in GF(2^127) with _x^127+x^63+1_ as reduction polynomial. +Package `gf127` contains arithmetic in `GF(2^127)` with `x^127+x^63+1` as reduction polynomial. # Description It can be used instead of Merkle-tree for data-validation, because homomorphic hashes -are concatable: hash sum of data can be calculated based on hashes of chunks. +are concatenable: hash sum of data can be calculated based on hashes of chunks. The example of how it works can be seen in tests. diff --git a/benchmark b/benchmark index 57fef3d..36a75d0 100755 --- a/benchmark +++ b/benchmark @@ -6,8 +6,8 @@ tmpfile=$1 go build ./cmd/tzsum && \ -for c in tzC tz tzbits; do - ./tzsum -cpuprofile cpu.prof -name $tmpfile -hash $c - echo top | go tool pprof cpu.prof - echo +for impl in avx avx2 avx2inline; do + echo $impl implementation: + /usr/bin/env time -f "time: %e seconds" ./tzsum -name $tmpfile -impl $impl + echo done diff --git a/cmd/tzsum/main.go b/cmd/tzsum/main.go index 1663165..b62992f 100644 --- a/cmd/tzsum/main.go +++ b/cmd/tzsum/main.go @@ -3,6 +3,7 @@ package main import ( "flag" "fmt" + "hash" "io" "log" "os" @@ -16,13 +17,13 @@ var ( cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") memprofile = flag.String("memprofile", "", "write memory profile to `file`") filename = flag.String("name", "-", "file to use") + hashimpl = flag.String("impl", "avx2inline", "implementation to use") ) func main() { var ( f io.Reader err error - h = tz.New() ) flag.Parse() @@ -45,6 +46,18 @@ func main() { f = os.Stdin } + var h hash.Hash + switch *hashimpl { + case "avx": + h = tz.NewWith(tz.AVX) + case "avx2": + h = tz.NewWith(tz.AVX2) + case "avx2inline": + h = tz.NewWith(tz.AVX2Inline) + default: + h = tz.New() + } + if _, err := io.Copy(h, f); err != nil { log.Fatal("error while reading file: ", err) } diff --git a/tz/avx.go b/tz/avx.go new file mode 100644 index 0000000..4a61702 --- /dev/null +++ b/tz/avx.go @@ -0,0 +1,80 @@ +// Copyright 2018 (c) NSPCC +// +// This file contains AVX implementation. +package tz + +import ( + "hash" + "math" + + "github.com/nspcc-dev/tzhash/gf127" +) + +type digest struct { + x [4]gf127.GF127 +} + +// type assertion +var _ hash.Hash = (*digest)(nil) + +var ( + minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}} + x127x63 = gf127.GF127{1 << 63, 1 << 63} +) + +func newAVX() *digest { + d := new(digest) + d.Reset() + return d +} + +func (d *digest) Sum(in []byte) []byte { + // Make a copy of d so that caller can keep writing and summing. + d0 := *d + h := d0.checkSum() + return append(in, h[:]...) +} + +func (d *digest) checkSum() [hashSize]byte { + return d.byteArray() +} + +func (d *digest) byteArray() (b [hashSize]byte) { + copy(b[:], d.x[0].ByteArray()) + copy(b[16:], d.x[1].ByteArray()) + copy(b[32:], d.x[2].ByteArray()) + copy(b[48:], d.x[3].ByteArray()) + return +} + +func (d *digest) Reset() { + d.x[0] = gf127.GF127{1, 0} + d.x[1] = gf127.GF127{0, 0} + d.x[2] = gf127.GF127{0, 0} + d.x[3] = gf127.GF127{1, 0} +} + +func (d *digest) Write(data []byte) (n int, err error) { + n = len(data) + for _, b := range data { + mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>7)&1]) + mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>6)&1]) + mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>5)&1]) + mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>4)&1]) + mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>3)&1]) + mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>2)&1]) + mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>1)&1]) + mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>0)&1]) + } + return +} + +func (d *digest) Size() int { + return hashSize +} + +func (d *digest) BlockSize() int { + return hashBlockSize +} + +func mulBitRight(c00, c01, c10, c11, e *gf127.GF127) diff --git a/tz/hash_avx2.go b/tz/avx2.go similarity index 92% rename from tz/hash_avx2.go rename to tz/avx2.go index d686f86..2fe54e0 100644 --- a/tz/hash_avx2.go +++ b/tz/avx2.go @@ -1,3 +1,6 @@ +// Copyright 2019 (c) NSPCC +// +// This file contains AVX2 implementation. package tz import ( @@ -10,9 +13,10 @@ type digest2 struct { x [2]gf127.GF127x2 } +// type assertion var _ hash.Hash = (*digest2)(nil) -func NewAVX2() hash.Hash { +func newAVX2() *digest2 { d := new(digest2) d.Reset() return d diff --git a/tz/hash_avx2_inline.go b/tz/avx2_inline.go similarity index 87% rename from tz/hash_avx2_inline.go rename to tz/avx2_inline.go index c2800f2..de863b7 100644 --- a/tz/hash_avx2_inline.go +++ b/tz/avx2_inline.go @@ -1,3 +1,7 @@ +// Copyright 2019 (c) NSPCC +// +// This file contains AVX2 implementation with inlined +// assembly calls. package tz import ( @@ -10,9 +14,10 @@ type digest3 struct { x [2]gf127.GF127x2 } +// type assertion var _ hash.Hash = (*digest3)(nil) -func NewAVX2Inline() hash.Hash { +func newAVX2Inline() *digest3 { d := new(digest3) d.Reset() return d diff --git a/tz/avx2_unroll_amd64.s b/tz/avx2_inline_amd64.s similarity index 100% rename from tz/avx2_unroll_amd64.s rename to tz/avx2_inline_amd64.s diff --git a/tz/tzbits_amd64.s b/tz/avx_amd64.s similarity index 100% rename from tz/tzbits_amd64.s rename to tz/avx_amd64.s diff --git a/tz/hash.go b/tz/hash.go index b8b9e50..7396690 100644 --- a/tz/hash.go +++ b/tz/hash.go @@ -6,97 +6,54 @@ package tz import ( "errors" "hash" - "math" - - "github.com/nspcc-dev/tzhash/gf127" ) +type Implementation int + const ( hashSize = 64 hashBlockSize = 128 + + _ Implementation = iota + AVX + AVX2 + AVX2Inline ) -type digest struct { - x [4]gf127.GF127 +func (impl Implementation) String() string { + switch impl { + case AVX: + return "AVX" + case AVX2: + return "AVX2" + case AVX2Inline: + return "AVX2Inline" + default: + return "UNKNOWN" + } } -// type assertion -var _ hash.Hash = (*digest)(nil) - -var ( - minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}} - x127x63 = gf127.GF127{1 << 63, 1 << 63} -) +func NewWith(impl Implementation) hash.Hash { + switch impl { + case AVX: + return newAVX() + case AVX2: + return newAVX2() + case AVX2Inline: + return newAVX2Inline() + default: + return New() + } +} // New returns a new hash.Hash computing the Tillich-Zémor checksum. func New() hash.Hash { - d := new(digest) - d.Reset() - return d -} - -func (d *digest) Sum(in []byte) []byte { - // Make a copy of d so that caller can keep writing and summing. - d0 := *d - h := d0.checkSum() - return append(in, h[:]...) -} - -func (d *digest) checkSum() [hashSize]byte { - return d.byteArray() -} - -func (d *digest) byteArray() (b [hashSize]byte) { - copy(b[:], d.x[0].ByteArray()) - copy(b[16:], d.x[1].ByteArray()) - copy(b[32:], d.x[2].ByteArray()) - copy(b[48:], d.x[3].ByteArray()) - return -} - -func (d *digest) Reset() { - d.x[0] = gf127.GF127{1, 0} - d.x[1] = gf127.GF127{0, 0} - d.x[2] = gf127.GF127{0, 0} - d.x[3] = gf127.GF127{1, 0} -} - -func (d *digest) Write(data []byte) (n int, err error) { - n = len(data) - for _, b := range data { - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>7)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>6)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>5)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>4)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>3)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>2)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>1)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>0)&1]) - } - return -} - -func (d *digest) Size() int { - return hashSize -} - -func (d *digest) BlockSize() int { - return hashBlockSize -} - -// Sum returnz Tillich-Zémor checksum of data. -// It uses only AVX instructions (no AVX2). -func SumAVX(data []byte) [hashSize]byte { - d := new(digest) - d.Reset() - _, _ = d.Write(data) // no errors - return d.checkSum() + return newAVX2Inline() } // Sum returns Tillich-Zémor checksum of data. func Sum(data []byte) [hashSize]byte { - d := new(digest2) - d.Reset() + d := newAVX2Inline() _, _ = d.Write(data) // no errors return d.checkSum() } @@ -178,5 +135,3 @@ func SubtractL(c, a []byte) (b []byte, err error) { return p2.MarshalBinary() } - -func mulBitRight(c00, c01, c10, c11, e *gf127.GF127) diff --git a/tz/hash_test.go b/tz/hash_test.go index 750d1c3..25a1df8 100644 --- a/tz/hash_test.go +++ b/tz/hash_test.go @@ -11,6 +11,23 @@ import ( const benchDataSize = 100000 +var providers = []Implementation{ + AVX, + AVX2, + AVX2Inline, +} + +func TestNewWith(t *testing.T) { + d := NewWith(AVX) + require.IsType(t, (*digest)(nil), d) + + d = NewWith(AVX2) + require.IsType(t, (*digest2)(nil), d) + + d = NewWith(AVX2Inline) + require.IsType(t, (*digest3)(nil), d) +} + var testCases = []struct { input []byte hash string @@ -30,38 +47,18 @@ var testCases = []struct { } func TestHash(t *testing.T) { - t.Run("test AVX digest", func(t *testing.T) { - d := new(digest) - for _, tc := range testCases { - d.Reset() - _, _ = d.Write(tc.input) - sum := d.checkSum() - - require.Equal(t, tc.hash, hex.EncodeToString(sum[:])) - } - }) - - t.Run("test AVX2 digest", func(t *testing.T) { - d := new(digest2) - for _, tc := range testCases { - d.Reset() - _, _ = d.Write(tc.input) - sum := d.checkSum() - - require.Equal(t, tc.hash, hex.EncodeToString(sum[:])) - } - }) - - t.Run("test AVX2 digest with inline asm function", func(t *testing.T) { - d := new(digest3) - for _, tc := range testCases { - d.Reset() - _, _ = d.Write(tc.input) - sum := d.checkSum() - - require.Equal(t, tc.hash, hex.EncodeToString(sum[:])) - } - }) + for i := range providers { + p := providers[i] + t.Run("test "+p.String()+" digest", func(t *testing.T) { + d := NewWith(p) + for _, tc := range testCases { + d.Reset() + _, _ = d.Write(tc.input) + sum := d.Sum(nil) + require.Equal(t, tc.hash, hex.EncodeToString(sum[:])) + } + }) + } } func newBuffer() (data []byte) { @@ -75,46 +72,24 @@ func newBuffer() (data []byte) { return } -func BenchmarkAVX(b *testing.B) { +func BenchmarkSum(b *testing.B) { data := newBuffer() + size := int64(len(data)) - b.ResetTimer() - b.ReportAllocs() - d := new(digest) - for i := 0; i < b.N; i++ { - d.Reset() - _, _ = d.Write(data) - d.checkSum() + for i := range providers { + p := providers[i] + b.Run("bench"+p.String()+"digest", func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + d := NewWith(p) + for i := 0; i < b.N; i++ { + d.Reset() + _, _ = d.Write(data) + d.Sum(nil) + } + b.SetBytes(size) + }) } - b.SetBytes(int64(len(data))) -} - -func BenchmarkAVX2(b *testing.B) { - data := newBuffer() - - b.ResetTimer() - b.ReportAllocs() - d := new(digest2) - for i := 0; i < b.N; i++ { - d.Reset() - _, _ = d.Write(data) - d.checkSum() - } - b.SetBytes(int64(len(data))) -} - -func BenchmarkAVX2Inline(b *testing.B) { - data := newBuffer() - - b.ResetTimer() - b.ReportAllocs() - d := new(digest3) - for i := 0; i < b.N; i++ { - d.Reset() - _, _ = d.Write(data) - d.checkSum() - } - b.SetBytes(int64(len(data))) } func TestHomomorphism(t *testing.T) { @@ -189,14 +164,14 @@ func TestConcat(t *testing.T) { func TestValidate(t *testing.T) { var ( - hash []byte - ps [][]byte - got bool - err error + h []byte + ps [][]byte + got bool + err error ) for _, tc := range testCasesConcat { - hash, _ = hex.DecodeString(tc.Hash) + h, _ = hex.DecodeString(tc.Hash) require.NoError(t, err) ps = make([][]byte, len(tc.Parts)) @@ -205,7 +180,7 @@ func TestValidate(t *testing.T) { require.NoError(t, err) } - got, err = Validate(hash, ps) + got, err = Validate(h, ps) require.NoError(t, err) require.True(t, got) }