onedrive: improve speed of quickxorhash
This commits ports a fast C-implementation from https://github.com/namazso/QuickXorHash It uses new crypto/subtle code from go1.20 to avoid the use of unsafe. Typical speedups are about 25x when using go1.20 goos: linux goarch: amd64 cpu: Intel(R) Celeron(R) N5105 @ 2.00GHz QuickXorHash-Before 2.49ms 422MB/s ±11% 100.00% QuickXorHash-Subtle 87.9µs 11932MB/s ± 5% +2730.83% + 42.17% Co-Author: @namazso
This commit is contained in:
parent
41cc4530f3
commit
dc5d5de35c
4 changed files with 93 additions and 116 deletions
|
@ -7,51 +7,40 @@
|
||||||
// See: https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
|
// See: https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
|
||||||
package quickxorhash
|
package quickxorhash
|
||||||
|
|
||||||
// This code was ported from the code snippet linked from
|
// This code was ported from a fast C-implementation from
|
||||||
// https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
|
// https://github.com/namazso/QuickXorHash
|
||||||
// Which has the copyright
|
// which has licenced as BSD Zero Clause License
|
||||||
|
//
|
||||||
|
// BSD Zero Clause License
|
||||||
|
//
|
||||||
|
// Copyright (c) 2022 namazso <admin@namazso.eu>
|
||||||
|
//
|
||||||
|
// Permission to use, copy, modify, and/or distribute this software for any
|
||||||
|
// purpose with or without fee is hereby granted.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
||||||
|
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||||
|
// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
||||||
|
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
||||||
|
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||||
|
// OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||||
|
// PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
|
||||||
// ------------------------------------------------------------------------------
|
import "hash"
|
||||||
// Copyright (c) 2016 Microsoft Corporation
|
|
||||||
//
|
|
||||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
// of this software and associated documentation files (the "Software"), to deal
|
|
||||||
// in the Software without restriction, including without limitation the rights
|
|
||||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
// copies of the Software, and to permit persons to whom the Software is
|
|
||||||
// furnished to do so, subject to the following conditions:
|
|
||||||
//
|
|
||||||
// The above copyright notice and this permission notice shall be included in
|
|
||||||
// all copies or substantial portions of the Software.
|
|
||||||
//
|
|
||||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
||||||
// THE SOFTWARE.
|
|
||||||
// ------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
import (
|
|
||||||
"hash"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// BlockSize is the preferred size for hashing
|
// BlockSize is the preferred size for hashing
|
||||||
BlockSize = 64
|
BlockSize = 64
|
||||||
// Size of the output checksum
|
// Size of the output checksum
|
||||||
Size = 20
|
Size = 20
|
||||||
bitsInLastCell = 32
|
shift = 11
|
||||||
shift = 11
|
widthInBits = 8 * Size
|
||||||
widthInBits = 8 * Size
|
dataSize = shift * widthInBits
|
||||||
dataSize = (widthInBits-1)/64 + 1
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type quickXorHash struct {
|
type quickXorHash struct {
|
||||||
data [dataSize]uint64
|
data [dataSize]byte
|
||||||
lengthSoFar uint64
|
size uint64
|
||||||
shiftSoFar int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// New returns a new hash.Hash computing the quickXorHash checksum.
|
// New returns a new hash.Hash computing the quickXorHash checksum.
|
||||||
|
@ -70,94 +59,37 @@ func New() hash.Hash {
|
||||||
//
|
//
|
||||||
// Implementations must not retain p.
|
// Implementations must not retain p.
|
||||||
func (q *quickXorHash) Write(p []byte) (n int, err error) {
|
func (q *quickXorHash) Write(p []byte) (n int, err error) {
|
||||||
currentshift := q.shiftSoFar
|
var i int
|
||||||
|
// fill last remain
|
||||||
// The bitvector where we'll start xoring
|
lastRemain := int(q.size) % dataSize
|
||||||
vectorArrayIndex := currentshift / 64
|
if lastRemain != 0 {
|
||||||
|
i += xorBytes(q.data[lastRemain:], p)
|
||||||
// The position within the bit vector at which we begin xoring
|
|
||||||
vectorOffset := currentshift % 64
|
|
||||||
iterations := len(p)
|
|
||||||
if iterations > widthInBits {
|
|
||||||
iterations = widthInBits
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i < iterations; i++ {
|
if i != len(p) {
|
||||||
isLastCell := vectorArrayIndex == len(q.data)-1
|
for len(p)-i >= dataSize {
|
||||||
var bitsInVectorCell int
|
i += xorBytes(q.data[:], p[i:])
|
||||||
if isLastCell {
|
|
||||||
bitsInVectorCell = bitsInLastCell
|
|
||||||
} else {
|
|
||||||
bitsInVectorCell = 64
|
|
||||||
}
|
|
||||||
|
|
||||||
// There's at least 2 bitvectors before we reach the end of the array
|
|
||||||
if vectorOffset <= bitsInVectorCell-8 {
|
|
||||||
for j := i; j < len(p); j += widthInBits {
|
|
||||||
q.data[vectorArrayIndex] ^= uint64(p[j]) << uint(vectorOffset)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
index1 := vectorArrayIndex
|
|
||||||
var index2 int
|
|
||||||
if isLastCell {
|
|
||||||
index2 = 0
|
|
||||||
} else {
|
|
||||||
index2 = vectorArrayIndex + 1
|
|
||||||
}
|
|
||||||
low := byte(bitsInVectorCell - vectorOffset)
|
|
||||||
|
|
||||||
xoredByte := byte(0)
|
|
||||||
for j := i; j < len(p); j += widthInBits {
|
|
||||||
xoredByte ^= p[j]
|
|
||||||
}
|
|
||||||
q.data[index1] ^= uint64(xoredByte) << uint(vectorOffset)
|
|
||||||
q.data[index2] ^= uint64(xoredByte) >> low
|
|
||||||
}
|
|
||||||
vectorOffset += shift
|
|
||||||
for vectorOffset >= bitsInVectorCell {
|
|
||||||
if isLastCell {
|
|
||||||
vectorArrayIndex = 0
|
|
||||||
} else {
|
|
||||||
vectorArrayIndex = vectorArrayIndex + 1
|
|
||||||
}
|
|
||||||
vectorOffset -= bitsInVectorCell
|
|
||||||
}
|
}
|
||||||
|
xorBytes(q.data[:], p[i:])
|
||||||
}
|
}
|
||||||
|
q.size += uint64(len(p))
|
||||||
// Update the starting position in a circular shift pattern
|
|
||||||
q.shiftSoFar = (q.shiftSoFar + shift*(len(p)%widthInBits)) % widthInBits
|
|
||||||
|
|
||||||
q.lengthSoFar += uint64(len(p))
|
|
||||||
|
|
||||||
return len(p), nil
|
return len(p), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate the current checksum
|
// Calculate the current checksum
|
||||||
func (q *quickXorHash) checkSum() (h [Size]byte) {
|
func (q *quickXorHash) checkSum() (h [Size + 1]byte) {
|
||||||
// Output the data as little endian bytes
|
for i := 0; i < dataSize; i++ {
|
||||||
ph := 0
|
shift := (i * 11) % 160
|
||||||
for i := 0; i < len(q.data)-1; i++ {
|
shiftBytes := shift / 8
|
||||||
d := q.data[i]
|
shiftBits := shift % 8
|
||||||
_ = h[ph+7] // bounds check
|
shifted := int(q.data[i]) << shiftBits
|
||||||
h[ph+0] = byte(d >> (8 * 0))
|
h[shiftBytes] ^= byte(shifted)
|
||||||
h[ph+1] = byte(d >> (8 * 1))
|
h[shiftBytes+1] ^= byte(shifted >> 8)
|
||||||
h[ph+2] = byte(d >> (8 * 2))
|
|
||||||
h[ph+3] = byte(d >> (8 * 3))
|
|
||||||
h[ph+4] = byte(d >> (8 * 4))
|
|
||||||
h[ph+5] = byte(d >> (8 * 5))
|
|
||||||
h[ph+6] = byte(d >> (8 * 6))
|
|
||||||
h[ph+7] = byte(d >> (8 * 7))
|
|
||||||
ph += 8
|
|
||||||
}
|
}
|
||||||
// remaining 32 bits
|
h[0] ^= h[20]
|
||||||
d := q.data[len(q.data)-1]
|
|
||||||
h[Size-4] = byte(d >> (8 * 0))
|
|
||||||
h[Size-3] = byte(d >> (8 * 1))
|
|
||||||
h[Size-2] = byte(d >> (8 * 2))
|
|
||||||
h[Size-1] = byte(d >> (8 * 3))
|
|
||||||
|
|
||||||
// XOR the file length with the least significant bits in little endian format
|
// XOR the file length with the least significant bits in little endian format
|
||||||
d = q.lengthSoFar
|
d := q.size
|
||||||
h[Size-8] ^= byte(d >> (8 * 0))
|
h[Size-8] ^= byte(d >> (8 * 0))
|
||||||
h[Size-7] ^= byte(d >> (8 * 1))
|
h[Size-7] ^= byte(d >> (8 * 1))
|
||||||
h[Size-6] ^= byte(d >> (8 * 2))
|
h[Size-6] ^= byte(d >> (8 * 2))
|
||||||
|
@ -174,7 +106,7 @@ func (q *quickXorHash) checkSum() (h [Size]byte) {
|
||||||
// It does not change the underlying hash state.
|
// It does not change the underlying hash state.
|
||||||
func (q *quickXorHash) Sum(b []byte) []byte {
|
func (q *quickXorHash) Sum(b []byte) []byte {
|
||||||
hash := q.checkSum()
|
hash := q.checkSum()
|
||||||
return append(b, hash[:]...)
|
return append(b, hash[:Size]...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset resets the Hash to its initial state.
|
// Reset resets the Hash to its initial state.
|
||||||
|
@ -196,8 +128,10 @@ func (q *quickXorHash) BlockSize() int {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sum returns the quickXorHash checksum of the data.
|
// Sum returns the quickXorHash checksum of the data.
|
||||||
func Sum(data []byte) [Size]byte {
|
func Sum(data []byte) (h [Size]byte) {
|
||||||
var d quickXorHash
|
var d quickXorHash
|
||||||
_, _ = d.Write(data)
|
_, _ = d.Write(data)
|
||||||
return d.checkSum()
|
s := d.checkSum()
|
||||||
|
copy(h[:], s[:])
|
||||||
|
return h
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash"
|
"hash"
|
||||||
|
"math/rand"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
@ -166,3 +167,16 @@ func TestReset(t *testing.T) {
|
||||||
|
|
||||||
// check interface
|
// check interface
|
||||||
var _ hash.Hash = (*quickXorHash)(nil)
|
var _ hash.Hash = (*quickXorHash)(nil)
|
||||||
|
|
||||||
|
func BenchmarkQuickXorHash(b *testing.B) {
|
||||||
|
b.SetBytes(1 << 20)
|
||||||
|
buf := make([]byte, 1<<20)
|
||||||
|
rand.Read(buf)
|
||||||
|
h := New()
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
h.Reset()
|
||||||
|
h.Write(buf)
|
||||||
|
h.Sum(nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
20
backend/onedrive/quickxorhash/xor.go
Normal file
20
backend/onedrive/quickxorhash/xor.go
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
//go:build !go1.20
|
||||||
|
|
||||||
|
package quickxorhash
|
||||||
|
|
||||||
|
func xorBytes(dst, src []byte) int {
|
||||||
|
n := len(dst)
|
||||||
|
if len(src) < n {
|
||||||
|
n = len(src)
|
||||||
|
}
|
||||||
|
if n == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
dst = dst[:n]
|
||||||
|
//src = src[:n]
|
||||||
|
src = src[:len(dst)] // remove bounds check in loop
|
||||||
|
for i := range dst {
|
||||||
|
dst[i] ^= src[i]
|
||||||
|
}
|
||||||
|
return n
|
||||||
|
}
|
9
backend/onedrive/quickxorhash/xor_1.20.go
Normal file
9
backend/onedrive/quickxorhash/xor_1.20.go
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
//go:build go1.20
|
||||||
|
|
||||||
|
package quickxorhash
|
||||||
|
|
||||||
|
import "crypto/subtle"
|
||||||
|
|
||||||
|
func xorBytes(dst, src []byte) int {
|
||||||
|
return subtle.XORBytes(dst, src, dst)
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue