onedrive: improve speed of quickxorhash

This commits ports a fast C-implementation from https://github.com/namazso/QuickXorHash It uses new crypto/subtle code from go1.20 to avoid the use of unsafe. Typical speedups are about 25x when using go1.20 goos: linux goarch: amd64 cpu: Intel(R) Celeron(R) N5105 @ 2.00GHz QuickXorHash-Before 2.49ms 422MB/s ±11% 100.00% QuickXorHash-Subtle 87.9µs 11932MB/s ± 5% +2730.83% + 42.17% Co-Author: @namazso
2023-01-26 19:50:12 +08:00 · 2023-01-26 19:50:12 +08:00 · dc5d5de35c
commit dc5d5de35c
parent 41cc4530f3
4 changed files with 93 additions and 116 deletions
--- a/backend/onedrive/quickxorhash/quickxorhash.go
+++ b/backend/onedrive/quickxorhash/quickxorhash.go
@ -7,51 +7,40 @@
 // See: https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
 package quickxorhash
-// This code was ported from the code snippet linked from
+// This code was ported from a fast C-implementation from
-// https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
+// https://github.com/namazso/QuickXorHash
-// Which has the copyright
+// which has licenced as BSD Zero Clause License
 //
 // BSD Zero Clause License
 //
 // Copyright (c) 2022 namazso <admin@namazso.eu>
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted.
 //
 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
 // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 // AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
 // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
 // OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 // PERFORMANCE OF THIS SOFTWARE.
-// ------------------------------------------------------------------------------
+import "hash"
 //  Copyright (c) 2016 Microsoft Corporation
 //
 //  Permission is hereby granted, free of charge, to any person obtaining a copy
 //  of this software and associated documentation files (the "Software"), to deal
 //  in the Software without restriction, including without limitation the rights
 //  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 //  copies of the Software, and to permit persons to whom the Software is
 //  furnished to do so, subject to the following conditions:
 //
 //  The above copyright notice and this permission notice shall be included in
 //  all copies or substantial portions of the Software.
 //
 //  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 //  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 //  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 //  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 //  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 //  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 //  THE SOFTWARE.
 // ------------------------------------------------------------------------------
 import (
 	"hash"
 )
 const (
 	// BlockSize is the preferred size for hashing
 	BlockSize = 64
 	// Size of the output checksum
-	Size           = 20
+	Size        = 20
-	bitsInLastCell = 32
+	shift       = 11
-	shift          = 11
+	widthInBits = 8 * Size
-	widthInBits    = 8 * Size
+	dataSize    = shift * widthInBits
 	dataSize       = (widthInBits-1)/64 + 1
 )
 type quickXorHash struct {
-	data        [dataSize]uint64
+	data [dataSize]byte
-	lengthSoFar uint64
+	size uint64
 	shiftSoFar  int
 }
 // New returns a new hash.Hash computing the quickXorHash checksum.
@ -70,94 +59,37 @@ func New() hash.Hash {
 //
 // Implementations must not retain p.
 func (q *quickXorHash) Write(p []byte) (n int, err error) {
-	currentshift := q.shiftSoFar
+	var i int
-
+	// fill last remain
-	// The bitvector where we'll start xoring
+	lastRemain := int(q.size) % dataSize
-	vectorArrayIndex := currentshift / 64
+	if lastRemain != 0 {
-
+		i += xorBytes(q.data[lastRemain:], p)
 	// The position within the bit vector at which we begin xoring
 	vectorOffset := currentshift % 64
 	iterations := len(p)
 	if iterations > widthInBits {
 		iterations = widthInBits
 	}
-	for i := 0; i < iterations; i++ {
+	if i != len(p) {
-		isLastCell := vectorArrayIndex == len(q.data)-1
+		for len(p)-i >= dataSize {
-		var bitsInVectorCell int
+			i += xorBytes(q.data[:], p[i:])
 		if isLastCell {
 			bitsInVectorCell = bitsInLastCell
 		} else {
 			bitsInVectorCell = 64
 		}
 		// There's at least 2 bitvectors before we reach the end of the array
 		if vectorOffset <= bitsInVectorCell-8 {
 			for j := i; j < len(p); j += widthInBits {
 				q.data[vectorArrayIndex] ^= uint64(p[j]) << uint(vectorOffset)
 			}
 		} else {
 			index1 := vectorArrayIndex
 			var index2 int
 			if isLastCell {
 				index2 = 0
 			} else {
 				index2 = vectorArrayIndex + 1
 			}
 			low := byte(bitsInVectorCell - vectorOffset)
 			xoredByte := byte(0)
 			for j := i; j < len(p); j += widthInBits {
 				xoredByte ^= p[j]
 			}
 			q.data[index1] ^= uint64(xoredByte) << uint(vectorOffset)
 			q.data[index2] ^= uint64(xoredByte) >> low
 		}
 		vectorOffset += shift
 		for vectorOffset >= bitsInVectorCell {
 			if isLastCell {
 				vectorArrayIndex = 0
 			} else {
 				vectorArrayIndex = vectorArrayIndex + 1
 			}
 			vectorOffset -= bitsInVectorCell
 		}
 		xorBytes(q.data[:], p[i:])
 	}
-
+	q.size += uint64(len(p))
 	// Update the starting position in a circular shift pattern
 	q.shiftSoFar = (q.shiftSoFar + shift*(len(p)%widthInBits)) % widthInBits
 	q.lengthSoFar += uint64(len(p))
 	return len(p), nil
 }
 // Calculate the current checksum
-func (q *quickXorHash) checkSum() (h [Size]byte) {
+func (q *quickXorHash) checkSum() (h [Size + 1]byte) {
-	// Output the data as little endian bytes
+	for i := 0; i < dataSize; i++ {
-	ph := 0
+		shift := (i * 11) % 160
-	for i := 0; i < len(q.data)-1; i++ {
+		shiftBytes := shift / 8
-		d := q.data[i]
+		shiftBits := shift % 8
-		_ = h[ph+7] // bounds check
+		shifted := int(q.data[i]) << shiftBits
-		h[ph+0] = byte(d >> (8 * 0))
+		h[shiftBytes] ^= byte(shifted)
-		h[ph+1] = byte(d >> (8 * 1))
+		h[shiftBytes+1] ^= byte(shifted >> 8)
 		h[ph+2] = byte(d >> (8 * 2))
 		h[ph+3] = byte(d >> (8 * 3))
 		h[ph+4] = byte(d >> (8 * 4))
 		h[ph+5] = byte(d >> (8 * 5))
 		h[ph+6] = byte(d >> (8 * 6))
 		h[ph+7] = byte(d >> (8 * 7))
 		ph += 8
 	}
-	// remaining 32 bits
+	h[0] ^= h[20]
 	d := q.data[len(q.data)-1]
 	h[Size-4] = byte(d >> (8 * 0))
 	h[Size-3] = byte(d >> (8 * 1))
 	h[Size-2] = byte(d >> (8 * 2))
 	h[Size-1] = byte(d >> (8 * 3))
 	// XOR the file length with the least significant bits in little endian format
-	d = q.lengthSoFar
+	d := q.size
 	h[Size-8] ^= byte(d >> (8 * 0))
 	h[Size-7] ^= byte(d >> (8 * 1))
 	h[Size-6] ^= byte(d >> (8 * 2))
@ -174,7 +106,7 @@ func (q *quickXorHash) checkSum() (h [Size]byte) {
 // It does not change the underlying hash state.
 func (q *quickXorHash) Sum(b []byte) []byte {
 	hash := q.checkSum()
-	return append(b, hash[:]...)
+	return append(b, hash[:Size]...)
 }
 // Reset resets the Hash to its initial state.
@ -196,8 +128,10 @@ func (q *quickXorHash) BlockSize() int {
 }
 // Sum returns the quickXorHash checksum of the data.
-func Sum(data []byte) [Size]byte {
+func Sum(data []byte) (h [Size]byte) {
 	var d quickXorHash
 	_, _ = d.Write(data)
-	return d.checkSum()
+	s := d.checkSum()
 	copy(h[:], s[:])
 	return h
 }
--- a/backend/onedrive/quickxorhash/quickxorhash_test.go
+++ b/backend/onedrive/quickxorhash/quickxorhash_test.go
@ -4,6 +4,7 @@ import (
 	"encoding/base64"
 	"fmt"
 	"hash"
 	"math/rand"
 	"testing"
 	"github.com/stretchr/testify/assert"
@ -166,3 +167,16 @@ func TestReset(t *testing.T) {
 // check interface
 var _ hash.Hash = (*quickXorHash)(nil)
 func BenchmarkQuickXorHash(b *testing.B) {
 	b.SetBytes(1 << 20)
 	buf := make([]byte, 1<<20)
 	rand.Read(buf)
 	h := New()
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		h.Reset()
 		h.Write(buf)
 		h.Sum(nil)
 	}
 }
--- a/backend/onedrive/quickxorhash/xor.go
+++ b/backend/onedrive/quickxorhash/xor.go
@ -0,0 +1,20 @@
 //go:build !go1.20
 package quickxorhash
 func xorBytes(dst, src []byte) int {
 	n := len(dst)
 	if len(src) < n {
 		n = len(src)
 	}
 	if n == 0 {
 		return 0
 	}
 	dst = dst[:n]
 	//src = src[:n]
 	src = src[:len(dst)] // remove bounds check in loop
 	for i := range dst {
 		dst[i] ^= src[i]
 	}
 	return n
 }
--- a/backend/onedrive/quickxorhash/xor_1.20.go
+++ b/backend/onedrive/quickxorhash/xor_1.20.go
@ -0,0 +1,9 @@
 //go:build go1.20
 package quickxorhash
 import "crypto/subtle"
 func xorBytes(dst, src []byte) int {
 	return subtle.XORBytes(dst, src, dst)
 }