From 7258fda98a033732325d2ef71c856a755197f46a Mon Sep 17 00:00:00 2001 From: Derek McGowan Date: Thu, 12 Mar 2015 19:24:35 -0700 Subject: [PATCH] Add digest set implementation Set represents a unique set of digests which allow for efficient lookup. Dumping short codes is a function which takes in a digest set. Any operation involving short codes may be considered secure if the list of digests added to the set is the complete list of referenceable digests. Contains benchmarks for Add, Lookup, and Dump. Signed-off-by: Derek McGowan (github: dmcgowan) --- digest/set.go | 195 ++++++++++++++++++++++++++++++++ digest/set_test.go | 272 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 467 insertions(+) create mode 100644 digest/set.go create mode 100644 digest/set_test.go diff --git a/digest/set.go b/digest/set.go new file mode 100644 index 00000000..7a9a0380 --- /dev/null +++ b/digest/set.go @@ -0,0 +1,195 @@ +package digest + +import ( + "errors" + "sort" + "strings" +) + +var ( + // ErrDigestNotFound is used when a matching digest + // could not be found in a set. + ErrDigestNotFound = errors.New("digest not found") + + // ErrDigestAmbiguous is used when multiple digests + // are found in a set. None of the matching digests + // should be considered valid matches. + ErrDigestAmbiguous = errors.New("ambiguous digest string") +) + +// Set is used to hold a unique set of digests which +// may be easily referenced by easily referenced by a string +// representation of the digest as well as short representation. +// The uniqueness of the short representation is based on other +// digests in the set. If digests are ommited from this set, +// collisions in a larger set may not be detected, therefore it +// is important to always do short representation lookups on +// the complete set of digests. To mitigate collisions, an +// appropriately long short code should be used. +type Set struct { + entries digestEntries +} + +// NewSet creates an empty set of digests +// which may have digests added. +func NewSet() *Set { + return &Set{ + entries: digestEntries{}, + } +} + +// checkShortMatch checks whether two digests match as either whole +// values or short values. This function does not test equality, +// rather whether the second value could match against the first +// value. +func checkShortMatch(alg, hex, shortAlg, shortHex string) bool { + if len(hex) == len(shortHex) { + if hex != shortHex { + return false + } + if len(shortAlg) > 0 && alg != shortAlg { + return false + } + } else if !strings.HasPrefix(hex, shortHex) { + return false + } else if len(shortAlg) > 0 && alg != shortAlg { + return false + } + return true +} + +// Lookup looks for a digest matching the given string representation. +// If no digests could be found ErrDigestNotFound will be returned +// with an empty digest value. If multiple matches are found +// ErrDigestAmbiguous will be returned with an empty digest value. +func (dst *Set) Lookup(d string) (Digest, error) { + if len(dst.entries) == 0 { + return "", ErrDigestNotFound + } + var ( + searchFunc func(int) bool + alg string + hex string + ) + dgst, err := ParseDigest(d) + if err == ErrDigestInvalidFormat { + hex = d + searchFunc = func(i int) bool { + return dst.entries[i].val >= d + } + } else { + hex = dgst.Hex() + alg = dgst.Algorithm() + searchFunc = func(i int) bool { + if dst.entries[i].val == hex { + return dst.entries[i].alg >= alg + } + return dst.entries[i].val >= hex + } + } + idx := sort.Search(len(dst.entries), searchFunc) + if idx == len(dst.entries) || !checkShortMatch(dst.entries[idx].alg, dst.entries[idx].val, alg, hex) { + return "", ErrDigestNotFound + } + if dst.entries[idx].alg == alg && dst.entries[idx].val == hex { + return dst.entries[idx].digest, nil + } + if idx+1 < len(dst.entries) && checkShortMatch(dst.entries[idx+1].alg, dst.entries[idx+1].val, alg, hex) { + return "", ErrDigestAmbiguous + } + + return dst.entries[idx].digest, nil +} + +// Add adds the given digests to the set. An error will be returned +// if the given digest is invalid. If the digest already exists in the +// table, this operation will be a no-op. +func (dst *Set) Add(d Digest) error { + if err := d.Validate(); err != nil { + return err + } + entry := &digestEntry{alg: d.Algorithm(), val: d.Hex(), digest: d} + searchFunc := func(i int) bool { + if dst.entries[i].val == entry.val { + return dst.entries[i].alg >= entry.alg + } + return dst.entries[i].val >= entry.val + } + idx := sort.Search(len(dst.entries), searchFunc) + if idx == len(dst.entries) { + dst.entries = append(dst.entries, entry) + return nil + } else if dst.entries[idx].digest == d { + return nil + } + + entries := append(dst.entries, nil) + copy(entries[idx+1:], entries[idx:len(entries)-1]) + entries[idx] = entry + dst.entries = entries + return nil +} + +// ShortCodeTable returns a map of Digest to unique short codes. The +// length represents the minimum value, the maximum length may be the +// entire value of digest if uniqueness cannot be achieved without the +// full value. This function will attempt to make short codes as short +// as possible to be unique. +func ShortCodeTable(dst *Set, length int) map[Digest]string { + m := make(map[Digest]string, len(dst.entries)) + l := length + resetIdx := 0 + for i := 0; i < len(dst.entries); i++ { + var short string + extended := true + for extended { + extended = false + if len(dst.entries[i].val) <= l { + short = dst.entries[i].digest.String() + } else { + short = dst.entries[i].val[:l] + for j := i + 1; j < len(dst.entries); j++ { + if checkShortMatch(dst.entries[j].alg, dst.entries[j].val, "", short) { + if j > resetIdx { + resetIdx = j + } + extended = true + } else { + break + } + } + if extended { + l++ + } + } + } + m[dst.entries[i].digest] = short + if i >= resetIdx { + l = length + } + } + return m +} + +type digestEntry struct { + alg string + val string + digest Digest +} + +type digestEntries []*digestEntry + +func (d digestEntries) Len() int { + return len(d) +} + +func (d digestEntries) Less(i, j int) bool { + if d[i].val != d[j].val { + return d[i].val < d[j].val + } + return d[i].alg < d[j].alg +} + +func (d digestEntries) Swap(i, j int) { + d[i], d[j] = d[j], d[i] +} diff --git a/digest/set_test.go b/digest/set_test.go new file mode 100644 index 00000000..faeba6d3 --- /dev/null +++ b/digest/set_test.go @@ -0,0 +1,272 @@ +package digest + +import ( + "crypto/sha256" + "encoding/binary" + "math/rand" + "testing" +) + +func assertEqualDigests(t *testing.T, d1, d2 Digest) { + if d1 != d2 { + t.Fatalf("Digests do not match:\n\tActual: %s\n\tExpected: %s", d1, d2) + } +} + +func TestLookup(t *testing.T) { + digests := []Digest{ + "sha256:12345", + "sha256:1234", + "sha256:12346", + "sha256:54321", + "sha256:65431", + "sha256:64321", + "sha256:65421", + "sha256:65321", + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + dgst, err := dset.Lookup("54") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[3]) + + dgst, err = dset.Lookup("1234") + if err == nil { + t.Fatal("Expected ambiguous error looking up: 1234") + } + if err != ErrDigestAmbiguous { + t.Fatal(err) + } + + dgst, err = dset.Lookup("9876") + if err == nil { + t.Fatal("Expected ambiguous error looking up: 9876") + } + if err != ErrDigestNotFound { + t.Fatal(err) + } + + dgst, err = dset.Lookup("sha256:1234") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[1]) + + dgst, err = dset.Lookup("sha256:12345") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[0]) + + dgst, err = dset.Lookup("sha256:12346") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[2]) + + dgst, err = dset.Lookup("12346") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[2]) + + dgst, err = dset.Lookup("12345") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[0]) +} + +func TestAddDuplication(t *testing.T) { + digests := []Digest{ + "sha256:1234", + "sha256:12345", + "sha256:12346", + "sha256:54321", + "sha256:65431", + "sha512:65431", + "sha512:65421", + "sha512:65321", + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + if len(dset.entries) != 8 { + t.Fatal("Invalid dset size") + } + + if err := dset.Add(Digest("sha256:12345")); err != nil { + t.Fatal(err) + } + + if len(dset.entries) != 8 { + t.Fatal("Duplicate digest insert allowed") + } + + if err := dset.Add(Digest("sha384:12345")); err != nil { + t.Fatal(err) + } + + if len(dset.entries) != 9 { + t.Fatal("Insert with different algorithm not allowed") + } +} + +func assertEqualShort(t *testing.T, actual, expected string) { + if actual != expected { + t.Fatalf("Unexpected short value:\n\tExpected: %s\n\tActual: %s", expected, actual) + } +} + +func TestShortCodeTable(t *testing.T) { + digests := []Digest{ + "sha256:1234", + "sha256:12345", + "sha256:12346", + "sha256:54321", + "sha256:65431", + "sha256:64321", + "sha256:65421", + "sha256:65321", + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + dump := ShortCodeTable(dset, 2) + + if len(dump) < len(digests) { + t.Fatalf("Error unexpected size: %d, expecting %d", len(dump), len(digests)) + } + + assertEqualShort(t, dump[digests[0]], "sha256:1234") + assertEqualShort(t, dump[digests[1]], "sha256:12345") + assertEqualShort(t, dump[digests[2]], "sha256:12346") + assertEqualShort(t, dump[digests[3]], "54") + assertEqualShort(t, dump[digests[4]], "6543") + assertEqualShort(t, dump[digests[5]], "64") + assertEqualShort(t, dump[digests[6]], "6542") + assertEqualShort(t, dump[digests[7]], "653") +} + +func createDigests(count int) ([]Digest, error) { + r := rand.New(rand.NewSource(25823)) + digests := make([]Digest, count) + for i := range digests { + h := sha256.New() + if err := binary.Write(h, binary.BigEndian, r.Int63()); err != nil { + return nil, err + } + digests[i] = NewDigest("sha256", h) + } + return digests, nil +} + +func benchAddNTable(b *testing.B, n int) { + digests, err := createDigests(n) + if err != nil { + b.Fatal(err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))} + for j := range digests { + if err = dset.Add(digests[j]); err != nil { + b.Fatal(err) + } + } + } +} + +func benchLookupNTable(b *testing.B, n int, shortLen int) { + digests, err := createDigests(n) + if err != nil { + b.Fatal(err) + } + dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))} + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + b.Fatal(err) + } + } + shorts := make([]string, 0, n) + for _, short := range ShortCodeTable(dset, shortLen) { + shorts = append(shorts, short) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + if _, err = dset.Lookup(shorts[i%n]); err != nil { + b.Fatal(err) + } + } +} + +func benchShortCodeNTable(b *testing.B, n int, shortLen int) { + digests, err := createDigests(n) + if err != nil { + b.Fatal(err) + } + dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))} + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + b.Fatal(err) + } + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + ShortCodeTable(dset, shortLen) + } +} + +func BenchmarkAdd10(b *testing.B) { + benchAddNTable(b, 10) +} + +func BenchmarkAdd100(b *testing.B) { + benchAddNTable(b, 100) +} + +func BenchmarkAdd1000(b *testing.B) { + benchAddNTable(b, 1000) +} + +func BenchmarkLookup10(b *testing.B) { + benchLookupNTable(b, 10, 12) +} + +func BenchmarkLookup100(b *testing.B) { + benchLookupNTable(b, 100, 12) +} + +func BenchmarkLookup1000(b *testing.B) { + benchLookupNTable(b, 1000, 12) +} + +func BenchmarkShortCode10(b *testing.B) { + benchShortCodeNTable(b, 10, 12) +} +func BenchmarkShortCode100(b *testing.B) { + benchShortCodeNTable(b, 100, 12) +} +func BenchmarkShortCode1000(b *testing.B) { + benchShortCodeNTable(b, 1000, 12) +}