Add possibility to use different implementations in cli

Also make API smaller and more consistent and fix typos in documentation.
This commit is contained in:
Evgenii 2019-07-19 17:52:46 +03:00
parent 826ed77561
commit c3cfe63e64
10 changed files with 195 additions and 163 deletions

View file

@ -13,16 +13,16 @@ make auto
# Homomorphic hashing in golang # Homomorphic hashing in golang
Package **tz** containts pure-Go implementation of hashing function described by Tillich and Źemor in [1] . Package `tz` containts pure-Go implementation of hashing function described by Tillich and Źemor in [1] .
There are existing implementations already (e.g. [2]), however it is written in C. There are existing implementations already (e.g. [2]), however they are written in C.
Package **gf127** contains arithmetic in GF(2^127) with _x^127+x^63+1_ as reduction polynomial. Package `gf127` contains arithmetic in `GF(2^127)` with `x^127+x^63+1` as reduction polynomial.
# Description # Description
It can be used instead of Merkle-tree for data-validation, because homomorphic hashes It can be used instead of Merkle-tree for data-validation, because homomorphic hashes
are concatable: hash sum of data can be calculated based on hashes of chunks. are concatenable: hash sum of data can be calculated based on hashes of chunks.
The example of how it works can be seen in tests. The example of how it works can be seen in tests.

View file

@ -6,8 +6,8 @@
tmpfile=$1 tmpfile=$1
go build ./cmd/tzsum && \ go build ./cmd/tzsum && \
for c in tzC tz tzbits; do for impl in avx avx2 avx2inline; do
./tzsum -cpuprofile cpu.prof -name $tmpfile -hash $c echo $impl implementation:
echo top | go tool pprof cpu.prof /usr/bin/env time -f "time: %e seconds" ./tzsum -name $tmpfile -impl $impl
echo echo
done done

View file

@ -3,6 +3,7 @@ package main
import ( import (
"flag" "flag"
"fmt" "fmt"
"hash"
"io" "io"
"log" "log"
"os" "os"
@ -16,13 +17,13 @@ var (
cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
memprofile = flag.String("memprofile", "", "write memory profile to `file`") memprofile = flag.String("memprofile", "", "write memory profile to `file`")
filename = flag.String("name", "-", "file to use") filename = flag.String("name", "-", "file to use")
hashimpl = flag.String("impl", "avx2inline", "implementation to use")
) )
func main() { func main() {
var ( var (
f io.Reader f io.Reader
err error err error
h = tz.New()
) )
flag.Parse() flag.Parse()
@ -45,6 +46,18 @@ func main() {
f = os.Stdin f = os.Stdin
} }
var h hash.Hash
switch *hashimpl {
case "avx":
h = tz.NewWith(tz.AVX)
case "avx2":
h = tz.NewWith(tz.AVX2)
case "avx2inline":
h = tz.NewWith(tz.AVX2Inline)
default:
h = tz.New()
}
if _, err := io.Copy(h, f); err != nil { if _, err := io.Copy(h, f); err != nil {
log.Fatal("error while reading file: ", err) log.Fatal("error while reading file: ", err)
} }

80
tz/avx.go Normal file
View file

@ -0,0 +1,80 @@
// Copyright 2018 (c) NSPCC
//
// This file contains AVX implementation.
package tz
import (
"hash"
"math"
"github.com/nspcc-dev/tzhash/gf127"
)
type digest struct {
x [4]gf127.GF127
}
// type assertion
var _ hash.Hash = (*digest)(nil)
var (
minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}}
x127x63 = gf127.GF127{1 << 63, 1 << 63}
)
func newAVX() *digest {
d := new(digest)
d.Reset()
return d
}
func (d *digest) Sum(in []byte) []byte {
// Make a copy of d so that caller can keep writing and summing.
d0 := *d
h := d0.checkSum()
return append(in, h[:]...)
}
func (d *digest) checkSum() [hashSize]byte {
return d.byteArray()
}
func (d *digest) byteArray() (b [hashSize]byte) {
copy(b[:], d.x[0].ByteArray())
copy(b[16:], d.x[1].ByteArray())
copy(b[32:], d.x[2].ByteArray())
copy(b[48:], d.x[3].ByteArray())
return
}
func (d *digest) Reset() {
d.x[0] = gf127.GF127{1, 0}
d.x[1] = gf127.GF127{0, 0}
d.x[2] = gf127.GF127{0, 0}
d.x[3] = gf127.GF127{1, 0}
}
func (d *digest) Write(data []byte) (n int, err error) {
n = len(data)
for _, b := range data {
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>7)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>6)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>5)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>4)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>3)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>2)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>1)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>0)&1])
}
return
}
func (d *digest) Size() int {
return hashSize
}
func (d *digest) BlockSize() int {
return hashBlockSize
}
func mulBitRight(c00, c01, c10, c11, e *gf127.GF127)

View file

@ -1,3 +1,6 @@
// Copyright 2019 (c) NSPCC
//
// This file contains AVX2 implementation.
package tz package tz
import ( import (
@ -10,9 +13,10 @@ type digest2 struct {
x [2]gf127.GF127x2 x [2]gf127.GF127x2
} }
// type assertion
var _ hash.Hash = (*digest2)(nil) var _ hash.Hash = (*digest2)(nil)
func NewAVX2() hash.Hash { func newAVX2() *digest2 {
d := new(digest2) d := new(digest2)
d.Reset() d.Reset()
return d return d

View file

@ -1,3 +1,7 @@
// Copyright 2019 (c) NSPCC
//
// This file contains AVX2 implementation with inlined
// assembly calls.
package tz package tz
import ( import (
@ -10,9 +14,10 @@ type digest3 struct {
x [2]gf127.GF127x2 x [2]gf127.GF127x2
} }
// type assertion
var _ hash.Hash = (*digest3)(nil) var _ hash.Hash = (*digest3)(nil)
func NewAVX2Inline() hash.Hash { func newAVX2Inline() *digest3 {
d := new(digest3) d := new(digest3)
d.Reset() d.Reset()
return d return d

View file

@ -6,97 +6,54 @@ package tz
import ( import (
"errors" "errors"
"hash" "hash"
"math"
"github.com/nspcc-dev/tzhash/gf127"
) )
type Implementation int
const ( const (
hashSize = 64 hashSize = 64
hashBlockSize = 128 hashBlockSize = 128
_ Implementation = iota
AVX
AVX2
AVX2Inline
) )
type digest struct { func (impl Implementation) String() string {
x [4]gf127.GF127 switch impl {
case AVX:
return "AVX"
case AVX2:
return "AVX2"
case AVX2Inline:
return "AVX2Inline"
default:
return "UNKNOWN"
}
} }
// type assertion func NewWith(impl Implementation) hash.Hash {
var _ hash.Hash = (*digest)(nil) switch impl {
case AVX:
var ( return newAVX()
minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}} case AVX2:
x127x63 = gf127.GF127{1 << 63, 1 << 63} return newAVX2()
) case AVX2Inline:
return newAVX2Inline()
default:
return New()
}
}
// New returns a new hash.Hash computing the Tillich-Zémor checksum. // New returns a new hash.Hash computing the Tillich-Zémor checksum.
func New() hash.Hash { func New() hash.Hash {
d := new(digest) return newAVX2Inline()
d.Reset()
return d
}
func (d *digest) Sum(in []byte) []byte {
// Make a copy of d so that caller can keep writing and summing.
d0 := *d
h := d0.checkSum()
return append(in, h[:]...)
}
func (d *digest) checkSum() [hashSize]byte {
return d.byteArray()
}
func (d *digest) byteArray() (b [hashSize]byte) {
copy(b[:], d.x[0].ByteArray())
copy(b[16:], d.x[1].ByteArray())
copy(b[32:], d.x[2].ByteArray())
copy(b[48:], d.x[3].ByteArray())
return
}
func (d *digest) Reset() {
d.x[0] = gf127.GF127{1, 0}
d.x[1] = gf127.GF127{0, 0}
d.x[2] = gf127.GF127{0, 0}
d.x[3] = gf127.GF127{1, 0}
}
func (d *digest) Write(data []byte) (n int, err error) {
n = len(data)
for _, b := range data {
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>7)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>6)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>5)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>4)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>3)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>2)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>1)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>0)&1])
}
return
}
func (d *digest) Size() int {
return hashSize
}
func (d *digest) BlockSize() int {
return hashBlockSize
}
// Sum returnz Tillich-Zémor checksum of data.
// It uses only AVX instructions (no AVX2).
func SumAVX(data []byte) [hashSize]byte {
d := new(digest)
d.Reset()
_, _ = d.Write(data) // no errors
return d.checkSum()
} }
// Sum returns Tillich-Zémor checksum of data. // Sum returns Tillich-Zémor checksum of data.
func Sum(data []byte) [hashSize]byte { func Sum(data []byte) [hashSize]byte {
d := new(digest2) d := newAVX2Inline()
d.Reset()
_, _ = d.Write(data) // no errors _, _ = d.Write(data) // no errors
return d.checkSum() return d.checkSum()
} }
@ -178,5 +135,3 @@ func SubtractL(c, a []byte) (b []byte, err error) {
return p2.MarshalBinary() return p2.MarshalBinary()
} }
func mulBitRight(c00, c01, c10, c11, e *gf127.GF127)

View file

@ -11,6 +11,23 @@ import (
const benchDataSize = 100000 const benchDataSize = 100000
var providers = []Implementation{
AVX,
AVX2,
AVX2Inline,
}
func TestNewWith(t *testing.T) {
d := NewWith(AVX)
require.IsType(t, (*digest)(nil), d)
d = NewWith(AVX2)
require.IsType(t, (*digest2)(nil), d)
d = NewWith(AVX2Inline)
require.IsType(t, (*digest3)(nil), d)
}
var testCases = []struct { var testCases = []struct {
input []byte input []byte
hash string hash string
@ -30,38 +47,18 @@ var testCases = []struct {
} }
func TestHash(t *testing.T) { func TestHash(t *testing.T) {
t.Run("test AVX digest", func(t *testing.T) { for i := range providers {
d := new(digest) p := providers[i]
t.Run("test "+p.String()+" digest", func(t *testing.T) {
d := NewWith(p)
for _, tc := range testCases { for _, tc := range testCases {
d.Reset() d.Reset()
_, _ = d.Write(tc.input) _, _ = d.Write(tc.input)
sum := d.checkSum() sum := d.Sum(nil)
require.Equal(t, tc.hash, hex.EncodeToString(sum[:])) require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
} }
}) })
t.Run("test AVX2 digest", func(t *testing.T) {
d := new(digest2)
for _, tc := range testCases {
d.Reset()
_, _ = d.Write(tc.input)
sum := d.checkSum()
require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
} }
})
t.Run("test AVX2 digest with inline asm function", func(t *testing.T) {
d := new(digest3)
for _, tc := range testCases {
d.Reset()
_, _ = d.Write(tc.input)
sum := d.checkSum()
require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
}
})
} }
func newBuffer() (data []byte) { func newBuffer() (data []byte) {
@ -75,46 +72,24 @@ func newBuffer() (data []byte) {
return return
} }
func BenchmarkAVX(b *testing.B) { func BenchmarkSum(b *testing.B) {
data := newBuffer() data := newBuffer()
size := int64(len(data))
for i := range providers {
p := providers[i]
b.Run("bench"+p.String()+"digest", func(b *testing.B) {
b.ResetTimer() b.ResetTimer()
b.ReportAllocs() b.ReportAllocs()
d := new(digest) d := NewWith(p)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
d.Reset() d.Reset()
_, _ = d.Write(data) _, _ = d.Write(data)
d.checkSum() d.Sum(nil)
} }
b.SetBytes(int64(len(data))) b.SetBytes(size)
})
} }
func BenchmarkAVX2(b *testing.B) {
data := newBuffer()
b.ResetTimer()
b.ReportAllocs()
d := new(digest2)
for i := 0; i < b.N; i++ {
d.Reset()
_, _ = d.Write(data)
d.checkSum()
}
b.SetBytes(int64(len(data)))
}
func BenchmarkAVX2Inline(b *testing.B) {
data := newBuffer()
b.ResetTimer()
b.ReportAllocs()
d := new(digest3)
for i := 0; i < b.N; i++ {
d.Reset()
_, _ = d.Write(data)
d.checkSum()
}
b.SetBytes(int64(len(data)))
} }
func TestHomomorphism(t *testing.T) { func TestHomomorphism(t *testing.T) {
@ -189,14 +164,14 @@ func TestConcat(t *testing.T) {
func TestValidate(t *testing.T) { func TestValidate(t *testing.T) {
var ( var (
hash []byte h []byte
ps [][]byte ps [][]byte
got bool got bool
err error err error
) )
for _, tc := range testCasesConcat { for _, tc := range testCasesConcat {
hash, _ = hex.DecodeString(tc.Hash) h, _ = hex.DecodeString(tc.Hash)
require.NoError(t, err) require.NoError(t, err)
ps = make([][]byte, len(tc.Parts)) ps = make([][]byte, len(tc.Parts))
@ -205,7 +180,7 @@ func TestValidate(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
} }
got, err = Validate(hash, ps) got, err = Validate(h, ps)
require.NoError(t, err) require.NoError(t, err)
require.True(t, got) require.True(t, got)
} }