chunker: Require a random irreducible polynomial

This also implements the necessary polynomial arithmetics in F_2[X].
This commit is contained in:
Alexander Neumann 2015-04-05 11:09:59 +02:00
parent 094ca7e635
commit 3cdf3a25b9
6 changed files with 749 additions and 10 deletions

View file

@ -10,10 +10,6 @@ const (
KiB = 1024 KiB = 1024
MiB = 1024 * KiB MiB = 1024 * KiB
// Polynomial is a randomly generated irreducible polynomial of degree 53
// in Z_2[X]. All rabin fingerprints are calculated with this polynomial.
Polynomial = 0x3DA3358B4DC173
// WindowSize is the size of the sliding window. // WindowSize is the size of the sliding window.
WindowSize = 64 WindowSize = 64
@ -28,10 +24,17 @@ const (
) )
var ( var (
pol_shift = deg(Polynomial) - 8 // pol is a randomly generated irreducible polynomial of degree 53
// in Z_2[X]. All rabin fingerprints are calculated with this polynomial.
pol = uint64(0x3DA3358B4DC173)
pol_shift = deg(pol) - 8
once sync.Once once sync.Once
mod_table [256]uint64 mod_table [256]uint64
out_table [256]uint64 out_table [256]uint64
// tables have been filled, do not allow changing the polynom afterwards
filled bool
) )
// A chunk is one content-dependent chunk of bytes whose end was cut when the // A chunk is one content-dependent chunk of bytes whose end was cut when the
@ -69,6 +72,16 @@ type Chunker struct {
h hash.Hash h hash.Hash
} }
// Polynomial sets the polynomial that is to be used for calculating the rabin
// fingerprints. This function must be called before the first chunker is
// created, otherwise the results are undefined.
func SetPolynomial(f uint64) {
if filled {
panic("polynomial changed after chunker has already been used")
}
pol = f
}
// New returns a new Chunker that reads from data from rd with bufsize and pass // New returns a new Chunker that reads from data from rd with bufsize and pass
// all data to hash along the way. // all data to hash along the way.
func New(rd io.Reader, bufsize int, hash hash.Hash) *Chunker { func New(rd io.Reader, bufsize int, hash hash.Hash) *Chunker {
@ -109,6 +122,8 @@ func (c *Chunker) Reset(rd io.Reader) {
// Calculate out_table and mod_table for optimization. Must be called only once. // Calculate out_table and mod_table for optimization. Must be called only once.
func fill_tables() { func fill_tables() {
filled = true
// calculate table for sliding out bytes. The byte to slide out is used as // calculate table for sliding out bytes. The byte to slide out is used as
// the index for the table, the value contains the following: // the index for the table, the value contains the following:
// out_table[b] = Hash(b || 0 || ... || 0) // out_table[b] = Hash(b || 0 || ... || 0)
@ -123,15 +138,15 @@ func fill_tables() {
for b := 0; b < 256; b++ { for b := 0; b < 256; b++ {
var hash uint64 var hash uint64
hash = append_byte(hash, byte(b), Polynomial) hash = append_byte(hash, byte(b), pol)
for i := 0; i < WindowSize-1; i++ { for i := 0; i < WindowSize-1; i++ {
hash = append_byte(hash, 0, Polynomial) hash = append_byte(hash, 0, pol)
} }
out_table[b] = hash out_table[b] = hash
} }
// calculate table for reduction mod Polynomial // calculate table for reduction mod Polynomial
k := deg(Polynomial) k := deg(pol)
for b := 0; b < 256; b++ { for b := 0; b < 256; b++ {
// mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and B = b(x) * x^k // mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and B = b(x) * x^k
// //
@ -140,7 +155,7 @@ func fill_tables() {
// two parts: Part A contains the result of the modulus operation, part // two parts: Part A contains the result of the modulus operation, part
// B is used to cancel out the 8 top bits so that one XOR operation is // B is used to cancel out the 8 top bits so that one XOR operation is
// enough to reduce modulo Polynomial // enough to reduce modulo Polynomial
mod_table[b] = mod(uint64(b)<<uint(k), Polynomial) | (uint64(b) << uint(k)) mod_table[b] = mod(uint64(b)<<uint(k), pol) | (uint64(b) << uint(k))
} }
} }
@ -308,7 +323,7 @@ func append_byte(hash uint64, b byte, pol uint64) uint64 {
return mod(hash, pol) return mod(hash, pol)
} }
// Mod calculates the remainder of x divided by p. // Mod calculates the remainder of x divided by p in F_2[X].
func mod(x, p uint64) uint64 { func mod(x, p uint64) uint64 {
for deg(x) >= deg(p) { for deg(x) >= deg(p) {
shift := uint(deg(x) - deg(p)) shift := uint(deg(x) - deg(p))

View file

@ -6,6 +6,59 @@
Package chunker implements Content Defined Chunking (CDC) based on a rolling Package chunker implements Content Defined Chunking (CDC) based on a rolling
Rabin Checksum. Rabin Checksum.
Choosing a Random Irreducible Polynomial
The function RandomPolynomial() returns a new random polynomial of degree 53
for use with the chunker. The degree 53 is chosen because it is the largest
prime below 64-8 = 56, so that the top 8 bits of an uint64 can be used for
optimising calculations in the chunker.
A random polynomial is chosen selecting 64 random bits, masking away bits
64..54 and setting bit 53 to one (otherwise the polynomial is not of the
desired degree) and bit 0 to one (otherwise the polynomial is trivially
reducible), so that 51 bits are chosen at random.
This process is repeated until Irreducible() returns true, then this
polynomials is returned. If this doesn't happen after 1 million tries, the
function returns an error. The probability for selecting an irreducible
polynomial at random is about 7.5% ( (2^53-2)/53 / 2^51), so the probability
that no irreducible polynomial has been found after 100 tries is lower than
0.04%.
Verifying Irreducible Polynomials
During development the results have been verified using the computational
discrete algebra system GAP, which can be obtained from the website at
http://www.gap-system.org/.
For filtering a given list of polynomials in hexadecimal coefficient notation,
the following script can be used:
# create x over F_2 = GF(2)
x := Indeterminate(GF(2), "x");
# test if polynomial is irreducible, i.e. the number of factors is one
IrredPoly := function (poly)
return (Length(Factors(poly)) = 1);
end;;
# create a polynomial in x from the hexadecimal representation of the
# coefficients
Hex2Poly := function (s)
return ValuePol(CoefficientsQadic(IntHexString(s), 2), x);
end;;
# list of candidates, in hex
candidates := [ "3DA3358B4DC173" ];
# create real polynomials
L := List(candidates, Hex2Poly);
# filter and display the list of irreducible polynomials contained in L
Display(Filtered(L, x -> (IrredPoly(x))));
All irreducible polynomials from the list are written to the output.
Background Literature Background Literature
An introduction to Rabin Fingerprints/Checksums can be found in the following articles: An introduction to Rabin Fingerprints/Checksums can be found in the following articles:
@ -19,6 +72,9 @@ http://www.zlib.net/crc_v3.txt
Andrei Z. Broder (1993): "Some Applications of Rabin's Fingerprinting Method" Andrei Z. Broder (1993): "Some Applications of Rabin's Fingerprinting Method"
http://www.xmailserver.org/rabin_apps.pdf http://www.xmailserver.org/rabin_apps.pdf
Shuhong Gao and Daniel Panario (1997): "Tests and Constructions of Irreducible Polynomials over Finite Fields"
http://www.math.clemson.edu/~sgao/papers/GP97a.pdf
Andrew Kadatch, Bob Jenkins (2007): "Everything we know about CRC but afraid to forget" Andrew Kadatch, Bob Jenkins (2007): "Everything we know about CRC but afraid to forget"
http://crcutil.googlecode.com/files/crc-doc.1.0.pdf http://crcutil.googlecode.com/files/crc-doc.1.0.pdf

36
chunker/generic_test.go Normal file
View file

@ -0,0 +1,36 @@
package chunker_test
import (
"fmt"
"path/filepath"
"reflect"
"runtime"
"testing"
)
// assert fails the test if the condition is false.
func assert(tb testing.TB, condition bool, msg string, v ...interface{}) {
if !condition {
_, file, line, _ := runtime.Caller(1)
fmt.Printf("\033[31m%s:%d: "+msg+"\033[39m\n\n", append([]interface{}{filepath.Base(file), line}, v...)...)
tb.FailNow()
}
}
// ok fails the test if an err is not nil.
func ok(tb testing.TB, err error) {
if err != nil {
_, file, line, _ := runtime.Caller(1)
fmt.Printf("\033[31m%s:%d: unexpected error: %s\033[39m\n\n", filepath.Base(file), line, err.Error())
tb.FailNow()
}
}
// equals fails the test if exp is not equal to act.
func equals(tb testing.TB, exp, act interface{}) {
if !reflect.DeepEqual(exp, act) {
_, file, line, _ := runtime.Caller(1)
fmt.Printf("\033[31m%s:%d:\n\n\texp: %#v\n\n\tgot: %#v\033[39m\n\n", filepath.Base(file), line, exp, act)
tb.FailNow()
}
}

257
chunker/polynomials.go Normal file
View file

@ -0,0 +1,257 @@
package chunker
import (
"crypto/rand"
"encoding/binary"
"errors"
"fmt"
"strconv"
)
// Pol is a polynomial from F_2[X].
type Pol uint64
// Add returns x+y.
func (x Pol) Add(y Pol) Pol {
r := Pol(uint64(x) ^ uint64(y))
return r
}
// mulOverflows returns true if the multiplication would overflow uint64.
// Code by Rob Pike, see
// https://groups.google.com/d/msg/golang-nuts/h5oSN5t3Au4/KaNQREhZh0QJ
func mulOverflows(a, b Pol) bool {
if a <= 1 || b <= 1 {
return false
}
c := a.mul(b)
d := c.Div(b)
if d != a {
return true
}
return false
}
func (x Pol) mul(y Pol) Pol {
if x == 0 || y == 0 {
return 0
}
var res Pol
for i := 0; i <= y.Deg(); i++ {
if (y & (1 << uint(i))) > 0 {
res = res.Add(x << uint(i))
}
}
return res
}
// Mul returns x*y. When an overflow occurs, Mul panics.
func (x Pol) Mul(y Pol) Pol {
if mulOverflows(x, y) {
panic("multiplication would overflow uint64")
}
return x.mul(y)
}
// Deg returns the degree of the polynomial x. If x is zero, -1 is returned.
func (x Pol) Deg() int {
// the degree of 0 is -1
if x == 0 {
return -1
}
for i := 63; i >= 0; i-- {
// test if bit i is set
if x&(1<<uint(i)) > 0 {
// this is the degree of x
return i
}
}
// fall-through, return -1
return -1
}
// String returns the coefficients in hex.
func (x Pol) String() string {
return "0x" + strconv.FormatUint(uint64(x), 16)
}
// Expand returns the string representation of the polynomial x.
func (x Pol) Expand() string {
if x == 0 {
return "0"
}
s := ""
for i := x.Deg(); i > 1; i-- {
if x&(1<<uint(i)) > 0 {
s += fmt.Sprintf("+x^%d", i)
}
}
if x&2 > 0 {
s += "+x"
}
if x&1 > 0 {
s += "+1"
}
return s[1:]
}
// DivMod returns x / d = q, and remainder r,
// see https://en.wikipedia.org/wiki/Division_algorithm
func (x Pol) DivMod(d Pol) (Pol, Pol) {
if x == 0 {
return 0, 0
}
if d == 0 {
panic("division by zero")
}
D := d.Deg()
diff := x.Deg() - D
if diff < 0 {
return 0, x
}
var q Pol
for diff >= 0 {
m := d << uint(diff)
q |= (1 << uint(diff))
x = x.Add(m)
diff = x.Deg() - D
}
return q, x
}
// Div returns the integer division result x / d.
func (x Pol) Div(d Pol) Pol {
q, _ := x.DivMod(d)
return q
}
// Mod returns the remainder of x / d
func (x Pol) Mod(d Pol) Pol {
_, r := x.DivMod(d)
return r
}
// I really dislike having a function that does not terminate, so specify a
// really large upper bound for finding a new irreducible polynomial, and
// return an error when no irreducible polynomial has been found within
// randPolMaxTries.
const randPolMaxTries = 1e6
// RandomPolynomial returns a new random irreducible polynomial of degree 53
// (largest prime number below 64-8). There are (2^53-2/53) irreducible
// polynomials of degree 53 in F_2[X], c.f. Michael O. Rabin (1981):
// "Fingerprinting by Random Polynomials", page 4. If no polynomial could be
// found in one million tries, an error is returned.
func RandomPolynomial() (Pol, error) {
for i := 0; i < randPolMaxTries; i++ {
var f Pol
// choose polynomial at random
err := binary.Read(rand.Reader, binary.LittleEndian, &f)
if err != nil {
return 0, err
}
// mask away bits above bit 53
f &= Pol((1 << 54) - 1)
// set highest and lowest bit so that the degree is 53 and the
// polynomial is not trivially reducible
f |= (1 << 53) | 1
// test if f is irreducible
if f.Irreducible() {
return f, nil
}
}
// If this is reached, we haven't found an irreducible polynomial in
// randPolMaxTries. This error is very unlikely to occur.
return 0, errors.New("unable to find new random irreducible polynomial")
}
// GCD computes the Greatest Common Divisor x and f.
func (x Pol) GCD(f Pol) Pol {
if f == 0 {
return x
}
if x == 0 {
return f
}
if x.Deg() < f.Deg() {
x, f = f, x
}
return f.GCD(x.Mod(f))
}
// Irreducible returns true iff x is irreducible over F_2. This function
// uses Ben Or's reducibility test.
//
// For details see "Tests and Constructions of Irreducible Polynomials over
// Finite Fields".
func (x Pol) Irreducible() bool {
for i := 1; i <= x.Deg()/2; i++ {
if x.GCD(qp(uint(i), x)) != 1 {
return false
}
}
return true
}
// MulMod computes x*f mod g
func (x Pol) MulMod(f, g Pol) Pol {
if x == 0 || f == 0 {
return 0
}
var res Pol
for i := 0; i <= f.Deg(); i++ {
if (f & (1 << uint(i))) > 0 {
a := x
for j := 0; j < i; j++ {
a = a.Mul(2).Mod(g)
}
res = res.Add(a).Mod(g)
}
}
return res
}
// qp computes the polynomial (x^(2^p)-x) mod g. This is needed for the
// reducibility test.
func qp(p uint, g Pol) Pol {
num := (1 << p)
i := 1
// start with x
res := Pol(2)
for i < num {
// repeatedly square res
res = res.MulMod(res, g)
i *= 2
}
// add x
return res.Add(2).Mod(g)
}

350
chunker/polynomials_test.go Normal file
View file

@ -0,0 +1,350 @@
package chunker_test
import (
"strconv"
"testing"
"github.com/restic/restic/chunker"
)
var polAddTests = []struct {
x, y chunker.Pol
sum chunker.Pol
}{
{23, 16, 23 ^ 16},
{0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4},
{0x9a7e30d1e855e0a0, 0x9a7e30d1e855e0a0, 0},
}
func TestPolAdd(t *testing.T) {
for _, test := range polAddTests {
equals(t, test.sum, test.x.Add(test.y))
equals(t, test.sum, test.y.Add(test.x))
}
}
func parseBin(s string) chunker.Pol {
i, err := strconv.ParseUint(s, 2, 64)
if err != nil {
panic(err)
}
return chunker.Pol(i)
}
var polMulTests = []struct {
x, y chunker.Pol
res chunker.Pol
}{
{1, 2, 2},
{
parseBin("1101"),
parseBin("10"),
parseBin("11010"),
},
{
parseBin("1101"),
parseBin("11"),
parseBin("10111"),
},
{
0x40000000,
0x40000000,
0x1000000000000000,
},
{
parseBin("1010"),
parseBin("100100"),
parseBin("101101000"),
},
{
parseBin("100"),
parseBin("11"),
parseBin("1100"),
},
{
parseBin("11"),
parseBin("110101"),
parseBin("1011111"),
},
{
parseBin("10011"),
parseBin("110101"),
parseBin("1100001111"),
},
}
func TestPolMul(t *testing.T) {
for i, test := range polMulTests {
m := test.x.Mul(test.y)
assert(t, test.res == m,
"TestPolMul failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
m = test.y.Mul(test.x)
assert(t, test.res == test.y.Mul(test.x),
"TestPolMul failed for %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
}
}
func TestPolMulOverflow(t *testing.T) {
defer func() {
// try to recover overflow error
err := recover()
if e, ok := err.(string); ok && e == "multiplication would overflow uint64" {
return
} else {
t.Logf("invalid error raised: %v", err)
// re-raise error if not overflow
panic(err)
}
}()
x := chunker.Pol(1 << 63)
x.Mul(2)
t.Fatal("overflow test did not panic")
}
var polDivTests = []struct {
x, y chunker.Pol
res chunker.Pol
}{
{10, 50, 0},
{0, 1, 0},
{
parseBin("101101000"), // 0x168
parseBin("1010"), // 0xa
parseBin("100100"), // 0x24
},
{2, 2, 1},
{
0x8000000000000000,
0x8000000000000000,
1,
},
{
parseBin("1100"),
parseBin("100"),
parseBin("11"),
},
{
parseBin("1100001111"),
parseBin("10011"),
parseBin("110101"),
},
}
func TestPolDiv(t *testing.T) {
for i, test := range polDivTests {
m := test.x.Div(test.y)
assert(t, test.res == m,
"TestPolDiv failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
}
}
var polModTests = []struct {
x, y chunker.Pol
res chunker.Pol
}{
{10, 50, 10},
{0, 1, 0},
{
parseBin("101101001"),
parseBin("1010"),
parseBin("1"),
},
{2, 2, 0},
{
0x8000000000000000,
0x8000000000000000,
0,
},
{
parseBin("1100"),
parseBin("100"),
parseBin("0"),
},
{
parseBin("1100001111"),
parseBin("10011"),
parseBin("0"),
},
}
func TestPolModt(t *testing.T) {
for _, test := range polModTests {
equals(t, test.res, test.x.Mod(test.y))
}
}
func BenchmarkPolDivMod(t *testing.B) {
f := chunker.Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899)
for i := 0; i < t.N; i++ {
g.DivMod(f)
}
}
func BenchmarkPolDeg(t *testing.B) {
f := chunker.Pol(0x3af4b284899)
d := f.Deg()
if d != 41 {
t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d",
d, 41)
}
for i := 0; i < t.N; i++ {
f.Deg()
}
}
func TestRandomPolynomial(t *testing.T) {
_, err := chunker.RandomPolynomial()
ok(t, err)
}
func BenchmarkRandomPolynomial(t *testing.B) {
for i := 0; i < t.N; i++ {
_, err := chunker.RandomPolynomial()
ok(t, err)
}
}
func TestExpandPolynomial(t *testing.T) {
pol := chunker.Pol(0x3DA3358B4DC173)
s := pol.Expand()
equals(t, "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1", s)
}
var polIrredTests = []struct {
f chunker.Pol
irred bool
}{
{0x38f1e565e288df, false},
{0x3DA3358B4DC173, true},
{0x30a8295b9d5c91, false},
{0x255f4350b962cb, false},
{0x267f776110a235, false},
{0x2f4dae10d41227, false},
{0x2482734cacca49, true},
{0x312daf4b284899, false},
{0x29dfb6553d01d1, false},
{0x3548245eb26257, false},
{0x3199e7ef4211b3, false},
{0x362f39017dae8b, false},
{0x200d57aa6fdacb, false},
{0x35e0a4efa1d275, false},
{0x2ced55b026577f, false},
{0x260b012010893d, false},
{0x2df29cbcd59e9d, false},
{0x3f2ac7488bd429, false},
{0x3e5cb1711669fb, false},
{0x226d8de57a9959, false},
{0x3c8de80aaf5835, false},
{0x2026a59efb219b, false},
{0x39dfa4d13fb231, false},
{0x3143d0464b3299, false},
}
func TestPolIrreducible(t *testing.T) {
for _, test := range polIrredTests {
assert(t, test.f.Irreducible() == test.irred,
"Irreducibility test for Polynomial %v failed: got %v, wanted %v",
test.f, test.f.Irreducible(), test.irred)
}
}
var polGCDTests = []struct {
f1 chunker.Pol
f2 chunker.Pol
gcd chunker.Pol
}{
{10, 50, 2},
{0, 1, 1},
{
parseBin("101101001"),
parseBin("1010"),
parseBin("1"),
},
{2, 2, 2},
{
parseBin("1010"),
parseBin("11"),
parseBin("11"),
},
{
0x8000000000000000,
0x8000000000000000,
0x8000000000000000,
},
{
parseBin("1100"),
parseBin("101"),
parseBin("11"),
},
{
parseBin("1100001111"),
parseBin("10011"),
parseBin("10011"),
},
{
0x3DA3358B4DC173,
0x3DA3358B4DC173,
0x3DA3358B4DC173,
},
{
0x3DA3358B4DC173,
0x230d2259defd,
1,
},
{
0x230d2259defd,
0x51b492b3eff2,
parseBin("10011"),
},
}
func TestPolGCD(t *testing.T) {
for i, test := range polGCDTests {
gcd := test.f1.GCD(test.f2)
assert(t, test.gcd == gcd,
"GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd)
gcd = test.f2.GCD(test.f1)
assert(t, test.gcd == gcd,
"GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd)
}
}
var polMulModTests = []struct {
f1 chunker.Pol
f2 chunker.Pol
g chunker.Pol
mod chunker.Pol
}{
{
0x1230,
0x230,
0x55,
0x22,
},
{
0x0eae8c07dbbb3026,
0xd5d6db9de04771de,
0xdd2bda3b77c9,
0x425ae8595b7a,
},
}
func TestPolMulMod(t *testing.T) {
for i, test := range polMulModTests {
mod := test.f1.MulMod(test.f2, test.g)
assert(t, mod == test.mod,
"MulMod test %d (%+v) failed: got %v, wanted %v",
i, test, mod, test.mod)
}
}

View file

@ -0,0 +1,25 @@
# This file is a script for GAP and tests a list of polynomials in hexadecimal
# for irreducibility over F_2
# create x over F_2 = GF(2)
x := Indeterminate(GF(2), "x");
# test if polynomial is irreducible, i.e. the number of factors is one
IrredPoly := function (poly)
return (Length(Factors(poly)) = 1);
end;;
# create a polynomial in x from the hexadecimal representation of the
# coefficients
Hex2Poly := function (s)
return ValuePol(CoefficientsQadic(IntHexString(s), 2), x);
end;;
# list of candidates, in hex
candidates := [ "3DA3358B4DC173" ];
# create real polynomials
L := List(candidates, Hex2Poly);
# filter and display the list of irreducible polynomials contained in L
Display(Filtered(L, x -> (IrredPoly(x))));