Add possibility to use different implementations in cli

Also make API smaller and more consistent and fix typos in documentation.
This commit is contained in:
Evgenii 2019-07-19 17:52:46 +03:00
parent 826ed77561
commit c3cfe63e64
10 changed files with 195 additions and 163 deletions

View file

@ -13,16 +13,16 @@ make auto
# Homomorphic hashing in golang
Package **tz** containts pure-Go implementation of hashing function described by Tillich and Źemor in [1] .
Package `tz` containts pure-Go implementation of hashing function described by Tillich and Źemor in [1] .
There are existing implementations already (e.g. [2]), however it is written in C.
There are existing implementations already (e.g. [2]), however they are written in C.
Package **gf127** contains arithmetic in GF(2^127) with _x^127+x^63+1_ as reduction polynomial.
Package `gf127` contains arithmetic in `GF(2^127)` with `x^127+x^63+1` as reduction polynomial.
# Description
It can be used instead of Merkle-tree for data-validation, because homomorphic hashes
are concatable: hash sum of data can be calculated based on hashes of chunks.
are concatenable: hash sum of data can be calculated based on hashes of chunks.
The example of how it works can be seen in tests.

View file

@ -6,8 +6,8 @@
tmpfile=$1
go build ./cmd/tzsum && \
for c in tzC tz tzbits; do
./tzsum -cpuprofile cpu.prof -name $tmpfile -hash $c
echo top | go tool pprof cpu.prof
for impl in avx avx2 avx2inline; do
echo $impl implementation:
/usr/bin/env time -f "time: %e seconds" ./tzsum -name $tmpfile -impl $impl
echo
done

View file

@ -3,6 +3,7 @@ package main
import (
"flag"
"fmt"
"hash"
"io"
"log"
"os"
@ -16,13 +17,13 @@ var (
cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
memprofile = flag.String("memprofile", "", "write memory profile to `file`")
filename = flag.String("name", "-", "file to use")
hashimpl = flag.String("impl", "avx2inline", "implementation to use")
)
func main() {
var (
f io.Reader
err error
h = tz.New()
)
flag.Parse()
@ -45,6 +46,18 @@ func main() {
f = os.Stdin
}
var h hash.Hash
switch *hashimpl {
case "avx":
h = tz.NewWith(tz.AVX)
case "avx2":
h = tz.NewWith(tz.AVX2)
case "avx2inline":
h = tz.NewWith(tz.AVX2Inline)
default:
h = tz.New()
}
if _, err := io.Copy(h, f); err != nil {
log.Fatal("error while reading file: ", err)
}

80
tz/avx.go Normal file
View file

@ -0,0 +1,80 @@
// Copyright 2018 (c) NSPCC
//
// This file contains AVX implementation.
package tz
import (
"hash"
"math"
"github.com/nspcc-dev/tzhash/gf127"
)
type digest struct {
x [4]gf127.GF127
}
// type assertion
var _ hash.Hash = (*digest)(nil)
var (
minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}}
x127x63 = gf127.GF127{1 << 63, 1 << 63}
)
func newAVX() *digest {
d := new(digest)
d.Reset()
return d
}
func (d *digest) Sum(in []byte) []byte {
// Make a copy of d so that caller can keep writing and summing.
d0 := *d
h := d0.checkSum()
return append(in, h[:]...)
}
func (d *digest) checkSum() [hashSize]byte {
return d.byteArray()
}
func (d *digest) byteArray() (b [hashSize]byte) {
copy(b[:], d.x[0].ByteArray())
copy(b[16:], d.x[1].ByteArray())
copy(b[32:], d.x[2].ByteArray())
copy(b[48:], d.x[3].ByteArray())
return
}
func (d *digest) Reset() {
d.x[0] = gf127.GF127{1, 0}
d.x[1] = gf127.GF127{0, 0}
d.x[2] = gf127.GF127{0, 0}
d.x[3] = gf127.GF127{1, 0}
}
func (d *digest) Write(data []byte) (n int, err error) {
n = len(data)
for _, b := range data {
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>7)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>6)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>5)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>4)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>3)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>2)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>1)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>0)&1])
}
return
}
func (d *digest) Size() int {
return hashSize
}
func (d *digest) BlockSize() int {
return hashBlockSize
}
func mulBitRight(c00, c01, c10, c11, e *gf127.GF127)

View file

@ -1,3 +1,6 @@
// Copyright 2019 (c) NSPCC
//
// This file contains AVX2 implementation.
package tz
import (
@ -10,9 +13,10 @@ type digest2 struct {
x [2]gf127.GF127x2
}
// type assertion
var _ hash.Hash = (*digest2)(nil)
func NewAVX2() hash.Hash {
func newAVX2() *digest2 {
d := new(digest2)
d.Reset()
return d

View file

@ -1,3 +1,7 @@
// Copyright 2019 (c) NSPCC
//
// This file contains AVX2 implementation with inlined
// assembly calls.
package tz
import (
@ -10,9 +14,10 @@ type digest3 struct {
x [2]gf127.GF127x2
}
// type assertion
var _ hash.Hash = (*digest3)(nil)
func NewAVX2Inline() hash.Hash {
func newAVX2Inline() *digest3 {
d := new(digest3)
d.Reset()
return d

View file

@ -6,97 +6,54 @@ package tz
import (
"errors"
"hash"
"math"
"github.com/nspcc-dev/tzhash/gf127"
)
type Implementation int
const (
hashSize = 64
hashBlockSize = 128
_ Implementation = iota
AVX
AVX2
AVX2Inline
)
type digest struct {
x [4]gf127.GF127
func (impl Implementation) String() string {
switch impl {
case AVX:
return "AVX"
case AVX2:
return "AVX2"
case AVX2Inline:
return "AVX2Inline"
default:
return "UNKNOWN"
}
}
// type assertion
var _ hash.Hash = (*digest)(nil)
var (
minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}}
x127x63 = gf127.GF127{1 << 63, 1 << 63}
)
func NewWith(impl Implementation) hash.Hash {
switch impl {
case AVX:
return newAVX()
case AVX2:
return newAVX2()
case AVX2Inline:
return newAVX2Inline()
default:
return New()
}
}
// New returns a new hash.Hash computing the Tillich-Zémor checksum.
func New() hash.Hash {
d := new(digest)
d.Reset()
return d
}
func (d *digest) Sum(in []byte) []byte {
// Make a copy of d so that caller can keep writing and summing.
d0 := *d
h := d0.checkSum()
return append(in, h[:]...)
}
func (d *digest) checkSum() [hashSize]byte {
return d.byteArray()
}
func (d *digest) byteArray() (b [hashSize]byte) {
copy(b[:], d.x[0].ByteArray())
copy(b[16:], d.x[1].ByteArray())
copy(b[32:], d.x[2].ByteArray())
copy(b[48:], d.x[3].ByteArray())
return
}
func (d *digest) Reset() {
d.x[0] = gf127.GF127{1, 0}
d.x[1] = gf127.GF127{0, 0}
d.x[2] = gf127.GF127{0, 0}
d.x[3] = gf127.GF127{1, 0}
}
func (d *digest) Write(data []byte) (n int, err error) {
n = len(data)
for _, b := range data {
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>7)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>6)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>5)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>4)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>3)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>2)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>1)&1])
mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>0)&1])
}
return
}
func (d *digest) Size() int {
return hashSize
}
func (d *digest) BlockSize() int {
return hashBlockSize
}
// Sum returnz Tillich-Zémor checksum of data.
// It uses only AVX instructions (no AVX2).
func SumAVX(data []byte) [hashSize]byte {
d := new(digest)
d.Reset()
_, _ = d.Write(data) // no errors
return d.checkSum()
return newAVX2Inline()
}
// Sum returns Tillich-Zémor checksum of data.
func Sum(data []byte) [hashSize]byte {
d := new(digest2)
d.Reset()
d := newAVX2Inline()
_, _ = d.Write(data) // no errors
return d.checkSum()
}
@ -178,5 +135,3 @@ func SubtractL(c, a []byte) (b []byte, err error) {
return p2.MarshalBinary()
}
func mulBitRight(c00, c01, c10, c11, e *gf127.GF127)

View file

@ -11,6 +11,23 @@ import (
const benchDataSize = 100000
var providers = []Implementation{
AVX,
AVX2,
AVX2Inline,
}
func TestNewWith(t *testing.T) {
d := NewWith(AVX)
require.IsType(t, (*digest)(nil), d)
d = NewWith(AVX2)
require.IsType(t, (*digest2)(nil), d)
d = NewWith(AVX2Inline)
require.IsType(t, (*digest3)(nil), d)
}
var testCases = []struct {
input []byte
hash string
@ -30,38 +47,18 @@ var testCases = []struct {
}
func TestHash(t *testing.T) {
t.Run("test AVX digest", func(t *testing.T) {
d := new(digest)
for _, tc := range testCases {
d.Reset()
_, _ = d.Write(tc.input)
sum := d.checkSum()
require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
}
})
t.Run("test AVX2 digest", func(t *testing.T) {
d := new(digest2)
for _, tc := range testCases {
d.Reset()
_, _ = d.Write(tc.input)
sum := d.checkSum()
require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
}
})
t.Run("test AVX2 digest with inline asm function", func(t *testing.T) {
d := new(digest3)
for _, tc := range testCases {
d.Reset()
_, _ = d.Write(tc.input)
sum := d.checkSum()
require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
}
})
for i := range providers {
p := providers[i]
t.Run("test "+p.String()+" digest", func(t *testing.T) {
d := NewWith(p)
for _, tc := range testCases {
d.Reset()
_, _ = d.Write(tc.input)
sum := d.Sum(nil)
require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
}
})
}
}
func newBuffer() (data []byte) {
@ -75,46 +72,24 @@ func newBuffer() (data []byte) {
return
}
func BenchmarkAVX(b *testing.B) {
func BenchmarkSum(b *testing.B) {
data := newBuffer()
size := int64(len(data))
b.ResetTimer()
b.ReportAllocs()
d := new(digest)
for i := 0; i < b.N; i++ {
d.Reset()
_, _ = d.Write(data)
d.checkSum()
for i := range providers {
p := providers[i]
b.Run("bench"+p.String()+"digest", func(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
d := NewWith(p)
for i := 0; i < b.N; i++ {
d.Reset()
_, _ = d.Write(data)
d.Sum(nil)
}
b.SetBytes(size)
})
}
b.SetBytes(int64(len(data)))
}
func BenchmarkAVX2(b *testing.B) {
data := newBuffer()
b.ResetTimer()
b.ReportAllocs()
d := new(digest2)
for i := 0; i < b.N; i++ {
d.Reset()
_, _ = d.Write(data)
d.checkSum()
}
b.SetBytes(int64(len(data)))
}
func BenchmarkAVX2Inline(b *testing.B) {
data := newBuffer()
b.ResetTimer()
b.ReportAllocs()
d := new(digest3)
for i := 0; i < b.N; i++ {
d.Reset()
_, _ = d.Write(data)
d.checkSum()
}
b.SetBytes(int64(len(data)))
}
func TestHomomorphism(t *testing.T) {
@ -189,14 +164,14 @@ func TestConcat(t *testing.T) {
func TestValidate(t *testing.T) {
var (
hash []byte
ps [][]byte
got bool
err error
h []byte
ps [][]byte
got bool
err error
)
for _, tc := range testCasesConcat {
hash, _ = hex.DecodeString(tc.Hash)
h, _ = hex.DecodeString(tc.Hash)
require.NoError(t, err)
ps = make([][]byte, len(tc.Parts))
@ -205,7 +180,7 @@ func TestValidate(t *testing.T) {
require.NoError(t, err)
}
got, err = Validate(hash, ps)
got, err = Validate(h, ps)
require.NoError(t, err)
require.True(t, got)
}