From 7e72d638dfc4a159a8a454f16b1819d18c6c153b Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 24 Apr 2020 20:56:45 +0200 Subject: [PATCH] Add patched chunker Referenced in https://forum.restic.net/t/restic-slice-bounds-out-of-range/2617/20 --- chunker/.travis.yml | 25 +++ chunker/LICENSE | 23 ++ chunker/README.md | 12 + chunker/chunker.go | 383 ++++++++++++++++++++++++++++++++ chunker/chunker_test.go | 347 +++++++++++++++++++++++++++++ chunker/doc.go | 82 +++++++ chunker/example_test.go | 39 ++++ chunker/go.mod | 3 + chunker/polynomials.go | 310 ++++++++++++++++++++++++++ chunker/polynomials_test.go | 424 ++++++++++++++++++++++++++++++++++++ go.mod | 2 + 11 files changed, 1650 insertions(+) create mode 100644 chunker/.travis.yml create mode 100644 chunker/LICENSE create mode 100644 chunker/README.md create mode 100644 chunker/chunker.go create mode 100644 chunker/chunker_test.go create mode 100644 chunker/doc.go create mode 100644 chunker/example_test.go create mode 100644 chunker/go.mod create mode 100644 chunker/polynomials.go create mode 100644 chunker/polynomials_test.go diff --git a/chunker/.travis.yml b/chunker/.travis.yml new file mode 100644 index 000000000..71fc89384 --- /dev/null +++ b/chunker/.travis.yml @@ -0,0 +1,25 @@ +language: go +sudo: false + +matrix: + include: + - os: linux + go: "1.9.x" + - os: linux + go: "1.10.x" + - os: linux + go: "tip" + - os: osx + go: "1.10.x" + +install: + - go get -t ./... + - go get -u golang.org/x/lint/golint + - go get -u golang.org/x/tools/cmd/goimports + +script: + - go vet ./... + - go test -v -cpu=2 ./... + - go test -v -cpu=1,2,4 -short -race ./... + - diff -au <(goimports -d .) <(printf "") + - diff -au <(golint ./...) <(printf "") diff --git a/chunker/LICENSE b/chunker/LICENSE new file mode 100644 index 000000000..04f854350 --- /dev/null +++ b/chunker/LICENSE @@ -0,0 +1,23 @@ +Copyright (c) 2014, Alexander Neumann +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/chunker/README.md b/chunker/README.md new file mode 100644 index 000000000..ae84c5497 --- /dev/null +++ b/chunker/README.md @@ -0,0 +1,12 @@ +[![GoDoc](https://godoc.org/github.com/restic/chunker?status.svg)](http://godoc.org/github.com/restic/chunker) +[![Build Status](https://travis-ci.com/restic/chunker.svg?branch=master)](https://travis-ci.com/restic/chunker) + +The package `chunker` implements content-defined-chunking (CDC) based on a +rolling Rabin Hash. The library is part of the [restic backup +program](https://github.com/restic/restic). + +An introduction to Content Defined Chunking can be found in the restic blog +post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc). + +You can find the API documentation at +https://godoc.org/github.com/restic/chunker diff --git a/chunker/chunker.go b/chunker/chunker.go new file mode 100644 index 000000000..563249825 --- /dev/null +++ b/chunker/chunker.go @@ -0,0 +1,383 @@ +package chunker + +import ( + "errors" + "fmt" + "io" + "sync" +) + +const ( + kiB = 1024 + miB = 1024 * kiB + + // WindowSize is the size of the sliding window. + windowSize = 64 + + // MinSize is the default minimal size of a chunk. + MinSize = 512 * kiB + // MaxSize is the default maximal size of a chunk. + MaxSize = 8 * miB + + chunkerBufSize = 512 * kiB +) + +type tables struct { + out [256]Pol + mod [256]Pol +} + +// cache precomputed tables, these are read-only anyway +var cache struct { + entries map[Pol]tables + sync.Mutex +} + +func init() { + cache.entries = make(map[Pol]tables) +} + +// Chunk is one content-dependent chunk of bytes whose end was cut when the +// Rabin Fingerprint had the value stored in Cut. +type Chunk struct { + Start uint + Length uint + Cut uint64 + Data []byte +} + +type chunkerState struct { + window [windowSize]byte + wpos uint + + buf []byte + bpos uint + bmax uint + + start uint + count uint + pos uint + + pre uint // wait for this many bytes before start calculating an new chunk + + digest uint64 +} + +type chunkerConfig struct { + MinSize, MaxSize uint + + pol Pol + polShift uint + tables tables + tablesInitialized bool + splitmask uint64 + + rd io.Reader + closed bool +} + +// Chunker splits content with Rabin Fingerprints. +type Chunker struct { + chunkerConfig + chunkerState +} + +// SetAverageBits allows to control the frequency of chunk discovery: +// the lower averageBits, the higher amount of chunks will be identified. +// The default value is 20 bits, so chunks will be of 1MiB size on average. +func (c *Chunker) SetAverageBits(averageBits int) { + c.splitmask = (1 << uint64(averageBits)) - 1 +} + +// New returns a new Chunker based on polynomial p that reads from rd. +func New(rd io.Reader, pol Pol) *Chunker { + return NewWithBoundaries(rd, pol, MinSize, MaxSize) +} + +// NewWithBoundaries returns a new Chunker based on polynomial p that reads from +// rd and custom min and max size boundaries. +func NewWithBoundaries(rd io.Reader, pol Pol, min, max uint) *Chunker { + c := &Chunker{ + chunkerState: chunkerState{ + buf: make([]byte, chunkerBufSize), + }, + chunkerConfig: chunkerConfig{ + pol: pol, + rd: rd, + MinSize: min, + MaxSize: max, + splitmask: (1 << 20) - 1, // aim to create chunks of 20 bits or about 1MiB on average. + }, + } + + c.reset() + + return c +} + +// Reset reinitializes the chunker with a new reader and polynomial. +func (c *Chunker) Reset(rd io.Reader, pol Pol) { + c.ResetWithBoundaries(rd, pol, MinSize, MaxSize) +} + +// ResetWithBoundaries reinitializes the chunker with a new reader, polynomial +// and custom min and max size boundaries. +func (c *Chunker) ResetWithBoundaries(rd io.Reader, pol Pol, min, max uint) { + *c = Chunker{ + chunkerState: chunkerState{ + buf: c.buf, + }, + chunkerConfig: chunkerConfig{ + pol: pol, + rd: rd, + MinSize: min, + MaxSize: max, + splitmask: (1 << 20) - 1, + }, + } + + c.reset() +} + +func (c *Chunker) reset() { + c.polShift = uint(c.pol.Deg() - 8) + c.fillTables() + + for i := 0; i < windowSize; i++ { + c.window[i] = 0 + } + + c.closed = false + c.digest = 0 + c.wpos = 0 + c.count = 0 + c.digest = c.slide(c.digest, 1) + c.start = c.pos + + // do not start a new chunk unless at least MinSize bytes have been read + c.pre = c.MinSize - windowSize +} + +// fillTables calculates out_table and mod_table for optimization. This +// implementation uses a cache in the global variable cache. +func (c *Chunker) fillTables() { + // if polynomial hasn't been specified, do not compute anything for now + if c.pol == 0 { + return + } + + c.tablesInitialized = true + + // test if the tables are cached for this polynomial + cache.Lock() + defer cache.Unlock() + if t, ok := cache.entries[c.pol]; ok { + c.tables = t + return + } + + // calculate table for sliding out bytes. The byte to slide out is used as + // the index for the table, the value contains the following: + // out_table[b] = Hash(b || 0 || ... || 0) + // \ windowsize-1 zero bytes / + // To slide out byte b_0 for window size w with known hash + // H := H(b_0 || ... || b_w), it is sufficient to add out_table[b_0]: + // H(b_0 || ... || b_w) + H(b_0 || 0 || ... || 0) + // = H(b_0 + b_0 || b_1 + 0 || ... || b_w + 0) + // = H( 0 || b_1 || ... || b_w) + // + // Afterwards a new byte can be shifted in. + for b := 0; b < 256; b++ { + var h Pol + + h = appendByte(h, byte(b), c.pol) + for i := 0; i < windowSize-1; i++ { + h = appendByte(h, 0, c.pol) + } + c.tables.out[b] = h + } + + // calculate table for reduction mod Polynomial + k := c.pol.Deg() + for b := 0; b < 256; b++ { + // mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and B = b(x) * x^k + // + // The 8 bits above deg(Polynomial) determine what happens next and so + // these bits are used as a lookup to this table. The value is split in + // two parts: Part A contains the result of the modulus operation, part + // B is used to cancel out the 8 top bits so that one XOR operation is + // enough to reduce modulo Polynomial + c.tables.mod[b] = Pol(uint64(b)<> polShift' + if polShift > 53-8 { + return Chunk{}, errors.New("the polynomial must have a degree less than or equal 53") + } + minSize := c.MinSize + maxSize := c.MaxSize + buf := c.buf + for { + if c.bpos >= c.bmax { + n, err := io.ReadFull(c.rd, buf[:]) + + if err == io.ErrUnexpectedEOF { + err = nil + } + + // io.ReadFull only returns io.EOF when no bytes could be read. If + // this is the case and we're in this branch, there are no more + // bytes to buffer, so this was the last chunk. If a different + // error has occurred, return that error and abandon the current + // chunk. + if err == io.EOF && !c.closed { + c.closed = true + + // return current chunk, if any bytes have been processed + if c.count > 0 { + return Chunk{ + Start: c.start, + Length: c.count, + Cut: c.digest, + Data: data, + }, nil + } + } + + if err != nil { + return Chunk{}, err + } + + if n < 0 { + return Chunk{}, fmt.Errorf("ReadFull returned negative number of bytes read: %v", n) + } + + c.bpos = 0 + c.bmax = uint(n) + } + + // check if bytes have to be dismissed before starting a new chunk + if c.pre > 0 { + n := c.bmax - c.bpos + if c.pre > uint(n) { + c.pre -= uint(n) + data = append(data, buf[c.bpos:c.bmax]...) + + c.count += uint(n) + c.pos += uint(n) + c.bpos = c.bmax + + continue + } + + data = append(data, buf[c.bpos:c.bpos+c.pre]...) + + c.bpos += c.pre + c.count += c.pre + c.pos += c.pre + c.pre = 0 + } + + add := c.count + digest := c.digest + win := c.window + wpos := c.wpos + for _, b := range buf[c.bpos:c.bmax] { + // slide(b) + // limit wpos before to elide array bound checks + wpos = wpos % windowSize + out := win[wpos] + win[wpos] = b + digest ^= uint64(tabout[out]) + wpos++ + + // updateDigest + index := byte(digest >> polShift) + digest <<= 8 + digest |= uint64(b) + + digest ^= uint64(tabmod[index]) + // end manual inline + + add++ + if add < minSize { + continue + } + + if (digest&c.splitmask) == 0 || add >= maxSize { + i := add - c.count - 1 + data = append(data, c.buf[c.bpos:c.bpos+uint(i)+1]...) + c.count = add + c.pos += uint(i) + 1 + c.bpos += uint(i) + 1 + c.buf = buf + + chunk := Chunk{ + Start: c.start, + Length: c.count, + Cut: digest, + Data: data, + } + + c.reset() + + return chunk, nil + } + } + c.digest = digest + c.window = win + c.wpos = wpos % windowSize + + steps := c.bmax - c.bpos + if steps > 0 { + data = append(data, c.buf[c.bpos:c.bpos+steps]...) + } + c.count += steps + c.pos += steps + c.bpos = c.bmax + } +} + +func updateDigest(digest uint64, polShift uint, tab tables, b byte) (newDigest uint64) { + index := digest >> polShift + digest <<= 8 + digest |= uint64(b) + + digest ^= uint64(tab.mod[index]) + return digest +} + +func (c *Chunker) slide(digest uint64, b byte) (newDigest uint64) { + out := c.window[c.wpos] + c.window[c.wpos] = b + digest ^= uint64(c.tables.out[out]) + c.wpos = (c.wpos + 1) % windowSize + + digest = updateDigest(digest, c.polShift, c.tables, b) + return digest +} + +func appendByte(hash Pol, b byte, pol Pol) Pol { + hash <<= 8 + hash |= Pol(b) + + return hash.Mod(pol) +} diff --git a/chunker/chunker_test.go b/chunker/chunker_test.go new file mode 100644 index 000000000..0c9535715 --- /dev/null +++ b/chunker/chunker_test.go @@ -0,0 +1,347 @@ +package chunker + +import ( + "bytes" + "crypto/sha256" + "encoding/hex" + "io" + "math/rand" + "testing" + "time" +) + +func parseDigest(s string) []byte { + d, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + + return d +} + +type chunk struct { + Length uint + CutFP uint64 + Digest []byte +} + +// polynomial used for all the tests below +const testPol = Pol(0x3DA3358B4DC173) + +// created for 32MB of random data out of math/rand's Uint32() seeded by +// constant 23 +// +// chunking configuration: +// window size 64, avg chunksize 1<<20, min chunksize 1<<19, max chunksize 1<<23 +// polynom 0x3DA3358B4DC173 +var chunks1 = []chunk{ + chunk{2163460, 0x000b98d4cdf00000, parseDigest("4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d")}, + chunk{643703, 0x000d4e8364d00000, parseDigest("5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407")}, + chunk{1528956, 0x0015a25c2ef00000, parseDigest("a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba")}, + chunk{1955808, 0x00102a8242e00000, parseDigest("c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824")}, + chunk{2222372, 0x00045da878000000, parseDigest("6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56")}, + chunk{2538687, 0x00198a8179900000, parseDigest("8687937412f654b5cfe4a82b08f28393a0c040f77c6f95e26742c2fc4254bfde")}, + chunk{609606, 0x001d4e8d17100000, parseDigest("5da820742ff5feb3369112938d3095785487456f65a8efc4b96dac4be7ebb259")}, + chunk{1205738, 0x000a7204dd600000, parseDigest("cc70d8fad5472beb031b1aca356bcab86c7368f40faa24fe5f8922c6c268c299")}, + chunk{959742, 0x00183e71e1400000, parseDigest("4065bdd778f95676c92b38ac265d361f81bff17d76e5d9452cf985a2ea5a4e39")}, + chunk{4036109, 0x001fec043c700000, parseDigest("b9cf166e75200eb4993fc9b6e22300a6790c75e6b0fc8f3f29b68a752d42f275")}, + chunk{1525894, 0x000b1574b1500000, parseDigest("2f238180e4ca1f7520a05f3d6059233926341090f9236ce677690c1823eccab3")}, + chunk{1352720, 0x00018965f2e00000, parseDigest("afd12f13286a3901430de816e62b85cc62468c059295ce5888b76b3af9028d84")}, + chunk{811884, 0x00155628aa100000, parseDigest("42d0cdb1ee7c48e552705d18e061abb70ae7957027db8ae8db37ec756472a70a")}, + chunk{1282314, 0x001909a0a1400000, parseDigest("819721c2457426eb4f4c7565050c44c32076a56fa9b4515a1c7796441730eb58")}, + chunk{1318021, 0x001cceb980000000, parseDigest("842eb53543db55bacac5e25cb91e43cc2e310fe5f9acc1aee86bdf5e91389374")}, + chunk{948640, 0x0011f7a470a00000, parseDigest("b8e36bf7019bb96ac3fb7867659d2167d9d3b3148c09fe0de45850b8fe577185")}, + chunk{645464, 0x00030ce2d9400000, parseDigest("5584bd27982191c3329f01ed846bfd266e96548dfa87018f745c33cfc240211d")}, + chunk{533758, 0x0004435c53c00000, parseDigest("4da778a25b72a9a0d53529eccfe2e5865a789116cb1800f470d8df685a8ab05d")}, + chunk{1128303, 0x0000c48517800000, parseDigest("08c6b0b38095b348d80300f0be4c5184d2744a17147c2cba5cc4315abf4c048f")}, + chunk{800374, 0x000968473f900000, parseDigest("820284d2c8fd243429674c996d8eb8d3450cbc32421f43113e980f516282c7bf")}, + chunk{2453512, 0x001e197c92600000, parseDigest("5fa870ed107c67704258e5e50abe67509fb73562caf77caa843b5f243425d853")}, + chunk{2651975, 0x000ae6c868000000, parseDigest("181347d2bbec32bef77ad5e9001e6af80f6abcf3576549384d334ee00c1988d8")}, + chunk{237392, 0x0000000000000001, parseDigest("fcd567f5d866357a8e299fd5b2359bb2c8157c30395229c4e9b0a353944a7978")}, +} + +// test if nullbytes are correctly split, even if length is a multiple of MinSize. +var chunks2 = []chunk{ + chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, + chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, + chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, + chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, +} + +// the same as chunks1, but avg chunksize is 1<<19 +var chunks3 = []chunk{ + chunk{1491586, 0x00023e586ea80000, parseDigest("4c008237df602048039287427171cef568a6cb965d1b5ca28dc80504a24bb061")}, + chunk{671874, 0x000b98d4cdf00000, parseDigest("fa8a42321b90c3d4ce9dd850562b2fd0c0fe4bdd26cf01a24f22046a224225d3")}, + chunk{643703, 0x000d4e8364d00000, parseDigest("5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407")}, + chunk{1284146, 0x0012b527e4780000, parseDigest("16d04cafecbeae9eaedd49da14c7ad7cdc2b1cc8569e5c16c32c9fb045aa899a")}, + chunk{823366, 0x000d1d6752180000, parseDigest("48662c118514817825ad4761e8e2e5f28f9bd8281b07e95dcafc6d02e0aa45c3")}, + chunk{810134, 0x0016071b6e180000, parseDigest("f629581aa05562f97f2c359890734c8574c5575da32f9289c5ba70bfd05f3f46")}, + chunk{567118, 0x00102a8242e00000, parseDigest("d4f0797c56c60d01bac33bfd49957a4816b6c067fc155b026de8a214cab4d70a")}, + chunk{821315, 0x001b3e42c8180000, parseDigest("8ebd0fd5db0293bd19140da936eb8b1bbd3cd6ffbec487385b956790014751ca")}, + chunk{1401057, 0x00045da878000000, parseDigest("001360af59adf4871ef138cfa2bb49007e86edaf5ac2d6f0b3d3014510991848")}, + chunk{2311122, 0x0005cbd885380000, parseDigest("8276d489b566086d9da95dc5c5fe6fc7d72646dd3308ced6b5b6ddb8595f0aa1")}, + chunk{608723, 0x001cfcd86f280000, parseDigest("518db33ba6a79d4f3720946f3785c05b9611082586d47ea58390fc2f6de9449e")}, + chunk{980456, 0x0013edb7a7f80000, parseDigest("0121b1690738395e15fecba1410cd0bf13fde02225160cad148829f77e7b6c99")}, + chunk{1140278, 0x0001f9f017e80000, parseDigest("28ca7c74804b5075d4f5eeb11f0845d99f62e8ea3a42b9a05c7bd5f2fca619dd")}, + chunk{2015542, 0x00097bf5d8180000, parseDigest("6fe8291f427d48650a5f0f944305d3a2dbc649bd401d2655fc0bdd42e890ca5a")}, + chunk{904752, 0x000e1863eff80000, parseDigest("62af1f1eb3f588d18aff28473303cc4731fc3cafcc52ce818fee3c4c2820854d")}, + chunk{713072, 0x001f3bb1b9b80000, parseDigest("4bda9dc2e3031d004d87a5cc93fe5207c4b0843186481b8f31597dc6ffa1496c")}, + chunk{675937, 0x001fec043c700000, parseDigest("5299c8c5acec1b90bb020cd75718aab5e12abb9bf66291465fd10e6a823a8b4a")}, + chunk{1525894, 0x000b1574b1500000, parseDigest("2f238180e4ca1f7520a05f3d6059233926341090f9236ce677690c1823eccab3")}, + chunk{1352720, 0x00018965f2e00000, parseDigest("afd12f13286a3901430de816e62b85cc62468c059295ce5888b76b3af9028d84")}, + chunk{811884, 0x00155628aa100000, parseDigest("42d0cdb1ee7c48e552705d18e061abb70ae7957027db8ae8db37ec756472a70a")}, + chunk{1282314, 0x001909a0a1400000, parseDigest("819721c2457426eb4f4c7565050c44c32076a56fa9b4515a1c7796441730eb58")}, + chunk{1093738, 0x0017f5d048880000, parseDigest("5dddfa7a241b68f65d267744bdb082ee865f3c2f0d8b946ea0ee47868a01bbff")}, + chunk{962003, 0x000b921f7ef80000, parseDigest("0cb5c9ebba196b441c715c8d805f6e7143a81cd5b0d2c65c6aacf59ca9124af9")}, + chunk{856384, 0x00030ce2d9400000, parseDigest("7734b206d46f3f387e8661e81edf5b1a91ea681867beb5831c18aaa86632d7fb")}, + chunk{533758, 0x0004435c53c00000, parseDigest("4da778a25b72a9a0d53529eccfe2e5865a789116cb1800f470d8df685a8ab05d")}, + chunk{1128303, 0x0000c48517800000, parseDigest("08c6b0b38095b348d80300f0be4c5184d2744a17147c2cba5cc4315abf4c048f")}, + chunk{800374, 0x000968473f900000, parseDigest("820284d2c8fd243429674c996d8eb8d3450cbc32421f43113e980f516282c7bf")}, + chunk{2453512, 0x001e197c92600000, parseDigest("5fa870ed107c67704258e5e50abe67509fb73562caf77caa843b5f243425d853")}, + chunk{665901, 0x00118c842cb80000, parseDigest("deceec26163842fdef6560311c69bf8a9871a56e16d719e2c4b7e4d668ceb61f")}, + chunk{1986074, 0x000ae6c868000000, parseDigest("64cd64bf3c3bc389eb20df8310f0427d1c36ab2eaaf09e346bfa7f0453fc1a18")}, + chunk{237392, 0x0000000000000001, parseDigest("fcd567f5d866357a8e299fd5b2359bb2c8157c30395229c4e9b0a353944a7978")}, +} + +func testWithData(t *testing.T, chnker *Chunker, testChunks []chunk, checkDigest bool) []Chunk { + chunks := []Chunk{} + + pos := uint(0) + for i, chunk := range testChunks { + c, err := chnker.Next(nil) + + if err != nil { + t.Fatalf("Error returned with chunk %d: %v", i, err) + } + + if c.Start != pos { + t.Fatalf("Start for chunk %d does not match: expected %d, got %d", + i, pos, c.Start) + } + + if c.Length != chunk.Length { + t.Fatalf("Length for chunk %d does not match: expected %d, got %d", + i, chunk.Length, c.Length) + } + + if c.Cut != chunk.CutFP { + t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x", + i, len(chunks)-1, chunk.CutFP, c.Cut) + } + + if checkDigest { + digest := hashData(c.Data) + if !bytes.Equal(chunk.Digest, digest) { + t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x", + i, len(chunks)-1, chunk.Digest, digest) + } + } + + pos += c.Length + chunks = append(chunks, c) + } + + _, err := chnker.Next(nil) + if err != io.EOF { + t.Fatal("Wrong error returned after last chunk") + } + + if len(chunks) != len(testChunks) { + t.Fatal("Amounts of test and resulting chunks do not match") + } + + return chunks +} + +func getRandom(seed int64, count int) []byte { + buf := make([]byte, count) + + rnd := rand.New(rand.NewSource(seed)) + for i := 0; i < count; i += 4 { + r := rnd.Uint32() + buf[i] = byte(r) + buf[i+1] = byte(r >> 8) + buf[i+2] = byte(r >> 16) + buf[i+3] = byte(r >> 24) + } + + return buf +} + +func hashData(d []byte) []byte { + h := sha256.New() + h.Write(d) + return h.Sum(nil) +} + +func TestChunker(t *testing.T) { + // setup data source + buf := getRandom(23, 32*1024*1024) + ch := New(bytes.NewReader(buf), testPol) + testWithData(t, ch, chunks1, true) + + // setup nullbyte data source + buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize) + ch = New(bytes.NewReader(buf), testPol) + + testWithData(t, ch, chunks2, true) +} + +func TestChunkerWithCustomAverageBits(t *testing.T) { + buf := getRandom(23, 32*1024*1024) + ch := New(bytes.NewReader(buf), testPol) + + // sligthly decrease averageBits to get more chunks + ch.SetAverageBits(19) + testWithData(t, ch, chunks3, true) +} + +func TestChunkerReset(t *testing.T) { + buf := getRandom(23, 32*1024*1024) + ch := New(bytes.NewReader(buf), testPol) + testWithData(t, ch, chunks1, true) + + ch.Reset(bytes.NewReader(buf), testPol) + testWithData(t, ch, chunks1, true) +} + +func TestChunkerWithRandomPolynomial(t *testing.T) { + // setup data source + buf := getRandom(23, 32*1024*1024) + + // generate a new random polynomial + start := time.Now() + p, err := RandomPolynomial() + if err != nil { + t.Fatal(err) + } + t.Logf("generating random polynomial took %v", time.Since(start)) + + start = time.Now() + ch := New(bytes.NewReader(buf), p) + t.Logf("creating chunker took %v", time.Since(start)) + + // make sure that first chunk is different + c, err := ch.Next(nil) + if err != nil { + t.Fatal(err.Error()) + } + + if c.Cut == chunks1[0].CutFP { + t.Fatal("Cut point is the same") + } + + if c.Length == chunks1[0].Length { + t.Fatal("Length is the same") + } + + if bytes.Equal(hashData(c.Data), chunks1[0].Digest) { + t.Fatal("Digest is the same") + } +} + +func TestChunkerWithoutHash(t *testing.T) { + // setup data source + buf := getRandom(23, 32*1024*1024) + + ch := New(bytes.NewReader(buf), testPol) + chunks := testWithData(t, ch, chunks1, false) + + // test reader + for i, c := range chunks { + if uint(len(c.Data)) != chunks1[i].Length { + t.Fatalf("reader returned wrong number of bytes: expected %d, got %d", + chunks1[i].Length, len(c.Data)) + } + + if !bytes.Equal(buf[c.Start:c.Start+c.Length], c.Data) { + t.Fatalf("invalid data for chunk returned: expected %02x, got %02x", + buf[c.Start:c.Start+c.Length], c.Data) + } + } + + // setup nullbyte data source + buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize) + ch = New(bytes.NewReader(buf), testPol) + + testWithData(t, ch, chunks2, false) +} + +func benchmarkChunker(b *testing.B, checkDigest bool) { + size := 32 * 1024 * 1024 + rd := bytes.NewReader(getRandom(23, size)) + ch := New(rd, testPol) + buf := make([]byte, MaxSize) + + b.ResetTimer() + b.SetBytes(int64(size)) + + var chunks int + for i := 0; i < b.N; i++ { + chunks = 0 + + _, err := rd.Seek(0, 0) + if err != nil { + b.Fatalf("Seek() return error %v", err) + } + + ch.Reset(rd, testPol) + + cur := 0 + for { + chunk, err := ch.Next(buf) + + if err == io.EOF { + break + } + + if err != nil { + b.Fatalf("Unexpected error occurred: %v", err) + } + + if chunk.Length != chunks1[cur].Length { + b.Errorf("wrong chunk length, want %d, got %d", + chunks1[cur].Length, chunk.Length) + } + + if chunk.Cut != chunks1[cur].CutFP { + b.Errorf("wrong cut fingerprint, want 0x%x, got 0x%x", + chunks1[cur].CutFP, chunk.Cut) + } + + if checkDigest { + h := hashData(chunk.Data) + if !bytes.Equal(h, chunks1[cur].Digest) { + b.Errorf("wrong digest, want %x, got %x", + chunks1[cur].Digest, h) + } + } + + chunks++ + cur++ + } + } + + b.Logf("%d chunks, average chunk size: %d bytes", chunks, size/chunks) +} + +func BenchmarkChunkerWithSHA256(b *testing.B) { + benchmarkChunker(b, true) +} + +func BenchmarkChunker(b *testing.B) { + benchmarkChunker(b, false) +} + +func BenchmarkNewChunker(b *testing.B) { + p, err := RandomPolynomial() + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + New(bytes.NewBuffer(nil), p) + } +} diff --git a/chunker/doc.go b/chunker/doc.go new file mode 100644 index 000000000..5537c172c --- /dev/null +++ b/chunker/doc.go @@ -0,0 +1,82 @@ +// Copyright 2014 Alexander Neumann. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package chunker implements Content Defined Chunking (CDC) based on a rolling +Rabin Checksum. + +Choosing a Random Irreducible Polynomial + +The function RandomPolynomial() returns a new random polynomial of degree 53 +for use with the chunker. The degree 53 is chosen because it is the largest +prime below 64-8 = 56, so that the top 8 bits of an uint64 can be used for +optimising calculations in the chunker. + +A random polynomial is chosen selecting 64 random bits, masking away bits +64..54 and setting bit 53 to one (otherwise the polynomial is not of the +desired degree) and bit 0 to one (otherwise the polynomial is trivially +reducible), so that 51 bits are chosen at random. + +This process is repeated until Irreducible() returns true, then this +polynomials is returned. If this doesn't happen after 1 million tries, the +function returns an error. The probability for selecting an irreducible +polynomial at random is about 7.5% ( (2^53-2)/53 / 2^51), so the probability +that no irreducible polynomial has been found after 100 tries is lower than +0.04%. + +Verifying Irreducible Polynomials + +During development the results have been verified using the computational +discrete algebra system GAP, which can be obtained from the website at +http://www.gap-system.org/. + +For filtering a given list of polynomials in hexadecimal coefficient notation, +the following script can be used: + + # create x over F_2 = GF(2) + x := Indeterminate(GF(2), "x"); + + # test if polynomial is irreducible, i.e. the number of factors is one + IrredPoly := function (poly) + return (Length(Factors(poly)) = 1); + end;; + + # create a polynomial in x from the hexadecimal representation of the + # coefficients + Hex2Poly := function (s) + return ValuePol(CoefficientsQadic(IntHexString(s), 2), x); + end;; + + # list of candidates, in hex + candidates := [ "3DA3358B4DC173" ]; + + # create real polynomials + L := List(candidates, Hex2Poly); + + # filter and display the list of irreducible polynomials contained in L + Display(Filtered(L, x -> (IrredPoly(x)))); + +All irreducible polynomials from the list are written to the output. + +Background Literature + +An introduction to Rabin Fingerprints/Checksums can be found in the following articles: + +Michael O. Rabin (1981): "Fingerprinting by Random Polynomials" +http://www.xmailserver.org/rabin.pdf + +Ross N. Williams (1993): "A Painless Guide to CRC Error Detection Algorithms" +http://www.zlib.net/crc_v3.txt + +Andrei Z. Broder (1993): "Some Applications of Rabin's Fingerprinting Method" +http://www.xmailserver.org/rabin_apps.pdf + +Shuhong Gao and Daniel Panario (1997): "Tests and Constructions of Irreducible Polynomials over Finite Fields" +http://www.math.clemson.edu/~sgao/papers/GP97a.pdf + +Andrew Kadatch, Bob Jenkins (2007): "Everything we know about CRC but afraid to forget" +http://crcutil.googlecode.com/files/crc-doc.1.0.pdf + +*/ +package chunker diff --git a/chunker/example_test.go b/chunker/example_test.go new file mode 100644 index 000000000..42f475f87 --- /dev/null +++ b/chunker/example_test.go @@ -0,0 +1,39 @@ +package chunker + +import ( + "bytes" + "crypto/sha256" + "fmt" + "io" +) + +func ExampleChunker() { + // generate 32MiB of deterministic pseudo-random data + data := getRandom(23, 32*1024*1024) + + // create a chunker + chunker := New(bytes.NewReader(data), Pol(0x3DA3358B4DC173)) + + // reuse this buffer + buf := make([]byte, 8*1024*1024) + + for i := 0; i < 5; i++ { + chunk, err := chunker.Next(buf) + if err == io.EOF { + break + } + + if err != nil { + panic(err) + } + + fmt.Printf("%d %02x\n", chunk.Length, sha256.Sum256(chunk.Data)) + } + + // Output: + // 2163460 4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d + // 643703 5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407 + // 1528956 a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba + // 1955808 c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824 + // 2222372 6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56 +} diff --git a/chunker/go.mod b/chunker/go.mod new file mode 100644 index 000000000..593d04268 --- /dev/null +++ b/chunker/go.mod @@ -0,0 +1,3 @@ +module github.com/restic/chunker + +go 1.14 diff --git a/chunker/polynomials.go b/chunker/polynomials.go new file mode 100644 index 000000000..8b126296f --- /dev/null +++ b/chunker/polynomials.go @@ -0,0 +1,310 @@ +package chunker + +import ( + "crypto/rand" + "encoding/binary" + "errors" + "fmt" + "io" + "strconv" +) + +// Pol is a polynomial from F_2[X]. +type Pol uint64 + +// Add returns x+y. +func (x Pol) Add(y Pol) Pol { + r := Pol(uint64(x) ^ uint64(y)) + return r +} + +// mulOverflows returns true if the multiplication would overflow uint64. +// Code by Rob Pike, see +// https://groups.google.com/d/msg/golang-nuts/h5oSN5t3Au4/KaNQREhZh0QJ +func mulOverflows(a, b Pol) bool { + if a <= 1 || b <= 1 { + return false + } + c := a.mul(b) + d := c.Div(b) + if d != a { + return true + } + + return false +} + +func (x Pol) mul(y Pol) Pol { + if x == 0 || y == 0 { + return 0 + } + + var res Pol + for i := 0; i <= y.Deg(); i++ { + if (y & (1 << uint(i))) > 0 { + res = res.Add(x << uint(i)) + } + } + + return res +} + +// Mul returns x*y. When an overflow occurs, Mul panics. +func (x Pol) Mul(y Pol) Pol { + if mulOverflows(x, y) { + panic("multiplication would overflow uint64") + } + + return x.mul(y) +} + +// Deg returns the degree of the polynomial x. If x is zero, -1 is returned. +func (x Pol) Deg() int { + // the degree of 0 is -1 + if x == 0 { + return -1 + } + + // see https://graphics.stanford.edu/~seander/bithacks.html#IntegerLog + + r := 0 + if uint64(x)&0xffffffff00000000 > 0 { + x >>= 32 + r |= 32 + } + + if uint64(x)&0xffff0000 > 0 { + x >>= 16 + r |= 16 + } + + if uint64(x)&0xff00 > 0 { + x >>= 8 + r |= 8 + } + + if uint64(x)&0xf0 > 0 { + x >>= 4 + r |= 4 + } + + if uint64(x)&0xc > 0 { + x >>= 2 + r |= 2 + } + + if uint64(x)&0x2 > 0 { + r |= 1 + } + + return r +} + +// String returns the coefficients in hex. +func (x Pol) String() string { + return "0x" + strconv.FormatUint(uint64(x), 16) +} + +// Expand returns the string representation of the polynomial x. +func (x Pol) Expand() string { + if x == 0 { + return "0" + } + + s := "" + for i := x.Deg(); i > 1; i-- { + if x&(1< 0 { + s += fmt.Sprintf("+x^%d", i) + } + } + + if x&2 > 0 { + s += "+x" + } + + if x&1 > 0 { + s += "+1" + } + + return s[1:] +} + +// DivMod returns x / d = q, and remainder r, +// see https://en.wikipedia.org/wiki/Division_algorithm +func (x Pol) DivMod(d Pol) (Pol, Pol) { + if x == 0 { + return 0, 0 + } + + if d == 0 { + panic("division by zero") + } + + D := d.Deg() + diff := x.Deg() - D + if diff < 0 { + return 0, x + } + + var q Pol + for diff >= 0 { + m := d << uint(diff) + q |= (1 << uint(diff)) + x = x.Add(m) + + diff = x.Deg() - D + } + + return q, x +} + +// Div returns the integer division result x / d. +func (x Pol) Div(d Pol) Pol { + q, _ := x.DivMod(d) + return q +} + +// Mod returns the remainder of x / d +func (x Pol) Mod(d Pol) Pol { + _, r := x.DivMod(d) + return r +} + +// I really dislike having a function that does not terminate, so specify a +// really large upper bound for finding a new irreducible polynomial, and +// return an error when no irreducible polynomial has been found within +// randPolMaxTries. +const randPolMaxTries = 1e6 + +// RandomPolynomial returns a new random irreducible polynomial +// of degree 53 using the default System CSPRNG as source. +// It is equivalent to calling DerivePolynomial(rand.Reader). +func RandomPolynomial() (Pol, error) { + return DerivePolynomial(rand.Reader) +} + +// DerivePolynomial returns an irreducible polynomial of degree 53 +// (largest prime number below 64-8) by reading bytes from source. +// There are (2^53-2/53) irreducible polynomials of degree 53 in +// F_2[X], c.f. Michael O. Rabin (1981): "Fingerprinting by Random +// Polynomials", page 4. If no polynomial could be found in one +// million tries, an error is returned. +func DerivePolynomial(source io.Reader) (Pol, error) { + for i := 0; i < randPolMaxTries; i++ { + var f Pol + + // choose polynomial at (pseudo)random + err := binary.Read(source, binary.LittleEndian, &f) + if err != nil { + return 0, err + } + + // mask away bits above bit 53 + f &= Pol((1 << 54) - 1) + + // set highest and lowest bit so that the degree is 53 and the + // polynomial is not trivially reducible + f |= (1 << 53) | 1 + + // test if f is irreducible + if f.Irreducible() { + return f, nil + } + } + + // If this is reached, we haven't found an irreducible polynomial in + // randPolMaxTries. This error is very unlikely to occur. + return 0, errors.New("unable to find new random irreducible polynomial") +} + +// GCD computes the Greatest Common Divisor x and f. +func (x Pol) GCD(f Pol) Pol { + if f == 0 { + return x + } + + if x == 0 { + return f + } + + if x.Deg() < f.Deg() { + x, f = f, x + } + + return f.GCD(x.Mod(f)) +} + +// Irreducible returns true iff x is irreducible over F_2. This function +// uses Ben Or's reducibility test. +// +// For details see "Tests and Constructions of Irreducible Polynomials over +// Finite Fields". +func (x Pol) Irreducible() bool { + for i := 1; i <= x.Deg()/2; i++ { + if x.GCD(qp(uint(i), x)) != 1 { + return false + } + } + + return true +} + +// MulMod computes x*f mod g +func (x Pol) MulMod(f, g Pol) Pol { + if x == 0 || f == 0 { + return 0 + } + + var res Pol + for i := 0; i <= f.Deg(); i++ { + if (f & (1 << uint(i))) > 0 { + a := x + for j := 0; j < i; j++ { + a = a.Mul(2).Mod(g) + } + res = res.Add(a).Mod(g) + } + } + + return res +} + +// qp computes the polynomial (x^(2^p)-x) mod g. This is needed for the +// reducibility test. +func qp(p uint, g Pol) Pol { + num := (1 << p) + i := 1 + + // start with x + res := Pol(2) + + for i < num { + // repeatedly square res + res = res.MulMod(res, g) + i *= 2 + } + + // add x + return res.Add(2).Mod(g) +} + +// MarshalJSON returns the JSON representation of the Pol. +func (x Pol) MarshalJSON() ([]byte, error) { + buf := strconv.AppendUint([]byte{'"'}, uint64(x), 16) + buf = append(buf, '"') + return buf, nil +} + +// UnmarshalJSON parses a Pol from the JSON data. +func (x *Pol) UnmarshalJSON(data []byte) error { + if len(data) < 2 { + return errors.New("invalid string for polynomial") + } + n, err := strconv.ParseUint(string(data[1:len(data)-1]), 16, 64) + if err != nil { + return err + } + *x = Pol(n) + + return nil +} diff --git a/chunker/polynomials_test.go b/chunker/polynomials_test.go new file mode 100644 index 000000000..741b0ea18 --- /dev/null +++ b/chunker/polynomials_test.go @@ -0,0 +1,424 @@ +package chunker + +import ( + "strconv" + "testing" +) + +var polAddTests = []struct { + x, y Pol + sum Pol +}{ + {23, 16, 23 ^ 16}, + {0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4}, + {0x9a7e30d1e855e0a0, 0x9a7e30d1e855e0a0, 0}, +} + +func TestPolAdd(t *testing.T) { + for i, test := range polAddTests { + if test.sum != test.x.Add(test.y) { + t.Errorf("test %d failed: sum != x+y", i) + } + + if test.sum != test.y.Add(test.x) { + t.Errorf("test %d failed: sum != y+x", i) + } + } +} + +func parseBin(s string) Pol { + i, err := strconv.ParseUint(s, 2, 64) + if err != nil { + panic(err) + } + + return Pol(i) +} + +var polMulTests = []struct { + x, y Pol + res Pol +}{ + {1, 2, 2}, + { + parseBin("1101"), + parseBin("10"), + parseBin("11010"), + }, + { + parseBin("1101"), + parseBin("11"), + parseBin("10111"), + }, + { + 0x40000000, + 0x40000000, + 0x1000000000000000, + }, + { + parseBin("1010"), + parseBin("100100"), + parseBin("101101000"), + }, + { + parseBin("100"), + parseBin("11"), + parseBin("1100"), + }, + { + parseBin("11"), + parseBin("110101"), + parseBin("1011111"), + }, + { + parseBin("10011"), + parseBin("110101"), + parseBin("1100001111"), + }, +} + +func TestPolMul(t *testing.T) { + for i, test := range polMulTests { + m := test.x.Mul(test.y) + if test.res != m { + t.Errorf("TestPolMul failed for test %d: %v * %v: want %v, got %v", + i, test.x, test.y, test.res, m) + } + m = test.y.Mul(test.x) + if test.res != test.y.Mul(test.x) { + t.Errorf("TestPolMul failed for %d: %v * %v: want %v, got %v", + i, test.x, test.y, test.res, m) + } + } +} + +func TestPolMulOverflow(t *testing.T) { + defer func() { + // try to recover overflow error + err := recover() + + if e, ok := err.(string); ok && e == "multiplication would overflow uint64" { + return + } + + t.Logf("invalid error raised: %v", err) + // re-raise error if not overflow + panic(err) + }() + + x := Pol(1 << 63) + x.Mul(2) + t.Fatal("overflow test did not panic") +} + +var polDivTests = []struct { + x, y Pol + res Pol +}{ + {10, 50, 0}, + {0, 1, 0}, + { + parseBin("101101000"), // 0x168 + parseBin("1010"), // 0xa + parseBin("100100"), // 0x24 + }, + {2, 2, 1}, + { + 0x8000000000000000, + 0x8000000000000000, + 1, + }, + { + parseBin("1100"), + parseBin("100"), + parseBin("11"), + }, + { + parseBin("1100001111"), + parseBin("10011"), + parseBin("110101"), + }, +} + +func TestPolDiv(t *testing.T) { + for i, test := range polDivTests { + m := test.x.Div(test.y) + if test.res != m { + t.Errorf("TestPolDiv failed for test %d: %v * %v: want %v, got %v", + i, test.x, test.y, test.res, m) + } + } +} + +func TestPolDeg(t *testing.T) { + var x Pol + if x.Deg() != -1 { + t.Errorf("deg(0) is not -1: %v", x.Deg()) + } + + x = 1 + if x.Deg() != 0 { + t.Errorf("deg(1) is not 0: %v", x.Deg()) + } + + for i := 0; i < 64; i++ { + x = 1 << uint(i) + if x.Deg() != i { + t.Errorf("deg(1<<%d) is not %d: %v", i, i, x.Deg()) + } + } +} + +var polModTests = []struct { + x, y Pol + res Pol +}{ + {10, 50, 10}, + {0, 1, 0}, + { + parseBin("101101001"), + parseBin("1010"), + parseBin("1"), + }, + {2, 2, 0}, + { + 0x8000000000000000, + 0x8000000000000000, + 0, + }, + { + parseBin("1100"), + parseBin("100"), + parseBin("0"), + }, + { + parseBin("1100001111"), + parseBin("10011"), + parseBin("0"), + }, +} + +func TestPolModt(t *testing.T) { + for i, test := range polModTests { + res := test.x.Mod(test.y) + if test.res != res { + t.Errorf("test %d failed: want %v, got %v", i, test.res, res) + } + } +} + +func BenchmarkPolDivMod(t *testing.B) { + f := Pol(0x2482734cacca49) + g := Pol(0x3af4b284899) + + for i := 0; i < t.N; i++ { + g.DivMod(f) + } +} + +func BenchmarkPolDiv(t *testing.B) { + f := Pol(0x2482734cacca49) + g := Pol(0x3af4b284899) + + for i := 0; i < t.N; i++ { + g.Div(f) + } +} + +func BenchmarkPolMod(t *testing.B) { + f := Pol(0x2482734cacca49) + g := Pol(0x3af4b284899) + + for i := 0; i < t.N; i++ { + g.Mod(f) + } +} + +func BenchmarkPolDeg(t *testing.B) { + f := Pol(0x3af4b284899) + d := f.Deg() + if d != 41 { + t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d", + d, 41) + } + + for i := 0; i < t.N; i++ { + f.Deg() + } +} + +func TestRandomPolynomial(t *testing.T) { + _, err := RandomPolynomial() + if err != nil { + t.Fatal(err) + } +} + +func BenchmarkRandomPolynomial(t *testing.B) { + for i := 0; i < t.N; i++ { + _, err := RandomPolynomial() + if err != nil { + t.Fatal(err) + } + } +} + +func TestExpandPolynomial(t *testing.T) { + pol := Pol(0x3DA3358B4DC173) + s := pol.Expand() + if s != "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1" { + t.Fatal("wrong result") + } +} + +var polIrredTests = []struct { + f Pol + irred bool +}{ + {0x38f1e565e288df, false}, + {0x3DA3358B4DC173, true}, + {0x30a8295b9d5c91, false}, + {0x255f4350b962cb, false}, + {0x267f776110a235, false}, + {0x2f4dae10d41227, false}, + {0x2482734cacca49, true}, + {0x312daf4b284899, false}, + {0x29dfb6553d01d1, false}, + {0x3548245eb26257, false}, + {0x3199e7ef4211b3, false}, + {0x362f39017dae8b, false}, + {0x200d57aa6fdacb, false}, + {0x35e0a4efa1d275, false}, + {0x2ced55b026577f, false}, + {0x260b012010893d, false}, + {0x2df29cbcd59e9d, false}, + {0x3f2ac7488bd429, false}, + {0x3e5cb1711669fb, false}, + {0x226d8de57a9959, false}, + {0x3c8de80aaf5835, false}, + {0x2026a59efb219b, false}, + {0x39dfa4d13fb231, false}, + {0x3143d0464b3299, false}, +} + +func TestPolIrreducible(t *testing.T) { + for _, test := range polIrredTests { + if test.f.Irreducible() != test.irred { + t.Errorf("Irreducibility test for Polynomial %v failed: got %v, wanted %v", + test.f, test.f.Irreducible(), test.irred) + } + } +} + +func BenchmarkPolIrreducible(b *testing.B) { + // find first irreducible polynomial + var pol Pol + for _, test := range polIrredTests { + if test.irred { + pol = test.f + break + } + } + + for i := 0; i < b.N; i++ { + if !pol.Irreducible() { + b.Errorf("Irreducibility test for Polynomial %v failed", pol) + } + } +} + +var polGCDTests = []struct { + f1 Pol + f2 Pol + gcd Pol +}{ + {10, 50, 2}, + {0, 1, 1}, + { + parseBin("101101001"), + parseBin("1010"), + parseBin("1"), + }, + {2, 2, 2}, + { + parseBin("1010"), + parseBin("11"), + parseBin("11"), + }, + { + 0x8000000000000000, + 0x8000000000000000, + 0x8000000000000000, + }, + { + parseBin("1100"), + parseBin("101"), + parseBin("11"), + }, + { + parseBin("1100001111"), + parseBin("10011"), + parseBin("10011"), + }, + { + 0x3DA3358B4DC173, + 0x3DA3358B4DC173, + 0x3DA3358B4DC173, + }, + { + 0x3DA3358B4DC173, + 0x230d2259defd, + 1, + }, + { + 0x230d2259defd, + 0x51b492b3eff2, + parseBin("10011"), + }, +} + +func TestPolGCD(t *testing.T) { + for i, test := range polGCDTests { + gcd := test.f1.GCD(test.f2) + if test.gcd != gcd { + t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v", + i, test, gcd, test.gcd) + } + + gcd = test.f2.GCD(test.f1) + if test.gcd != gcd { + t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v", + i, test, gcd, test.gcd) + } + } +} + +var polMulModTests = []struct { + f1 Pol + f2 Pol + g Pol + mod Pol +}{ + { + 0x1230, + 0x230, + 0x55, + 0x22, + }, + { + 0x0eae8c07dbbb3026, + 0xd5d6db9de04771de, + 0xdd2bda3b77c9, + 0x425ae8595b7a, + }, +} + +func TestPolMulMod(t *testing.T) { + for i, test := range polMulModTests { + mod := test.f1.MulMod(test.f2, test.g) + if mod != test.mod { + t.Errorf("MulMod test %d (%+v) failed: got %v, wanted %v", + i, test, mod, test.mod) + } + } +} diff --git a/go.mod b/go.mod index 7bc073c64..7e7089e26 100644 --- a/go.mod +++ b/go.mod @@ -49,3 +49,5 @@ require ( ) go 1.13 + +replace github.com/restic/chunker => ./chunker