Add digest set implementation

Set represents a unique set of digests which allow for efficient lookup.
Dumping short codes is a function which takes in a digest set.
Any operation involving short codes may be considered secure if the list of digests added to the set is the complete list of referenceable digests.
Contains benchmarks for Add, Lookup, and Dump.

Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan)
pull/260/head
Derek McGowan 2015-03-12 19:24:35 -07:00
parent 8aacddd095
commit 7258fda98a
2 changed files with 467 additions and 0 deletions

195
digest/set.go 100644
View File

@ -0,0 +1,195 @@
package digest
import (
"errors"
"sort"
"strings"
)
var (
// ErrDigestNotFound is used when a matching digest
// could not be found in a set.
ErrDigestNotFound = errors.New("digest not found")
// ErrDigestAmbiguous is used when multiple digests
// are found in a set. None of the matching digests
// should be considered valid matches.
ErrDigestAmbiguous = errors.New("ambiguous digest string")
)
// Set is used to hold a unique set of digests which
// may be easily referenced by easily referenced by a string
// representation of the digest as well as short representation.
// The uniqueness of the short representation is based on other
// digests in the set. If digests are ommited from this set,
// collisions in a larger set may not be detected, therefore it
// is important to always do short representation lookups on
// the complete set of digests. To mitigate collisions, an
// appropriately long short code should be used.
type Set struct {
entries digestEntries
}
// NewSet creates an empty set of digests
// which may have digests added.
func NewSet() *Set {
return &Set{
entries: digestEntries{},
}
}
// checkShortMatch checks whether two digests match as either whole
// values or short values. This function does not test equality,
// rather whether the second value could match against the first
// value.
func checkShortMatch(alg, hex, shortAlg, shortHex string) bool {
if len(hex) == len(shortHex) {
if hex != shortHex {
return false
}
if len(shortAlg) > 0 && alg != shortAlg {
return false
}
} else if !strings.HasPrefix(hex, shortHex) {
return false
} else if len(shortAlg) > 0 && alg != shortAlg {
return false
}
return true
}
// Lookup looks for a digest matching the given string representation.
// If no digests could be found ErrDigestNotFound will be returned
// with an empty digest value. If multiple matches are found
// ErrDigestAmbiguous will be returned with an empty digest value.
func (dst *Set) Lookup(d string) (Digest, error) {
if len(dst.entries) == 0 {
return "", ErrDigestNotFound
}
var (
searchFunc func(int) bool
alg string
hex string
)
dgst, err := ParseDigest(d)
if err == ErrDigestInvalidFormat {
hex = d
searchFunc = func(i int) bool {
return dst.entries[i].val >= d
}
} else {
hex = dgst.Hex()
alg = dgst.Algorithm()
searchFunc = func(i int) bool {
if dst.entries[i].val == hex {
return dst.entries[i].alg >= alg
}
return dst.entries[i].val >= hex
}
}
idx := sort.Search(len(dst.entries), searchFunc)
if idx == len(dst.entries) || !checkShortMatch(dst.entries[idx].alg, dst.entries[idx].val, alg, hex) {
return "", ErrDigestNotFound
}
if dst.entries[idx].alg == alg && dst.entries[idx].val == hex {
return dst.entries[idx].digest, nil
}
if idx+1 < len(dst.entries) && checkShortMatch(dst.entries[idx+1].alg, dst.entries[idx+1].val, alg, hex) {
return "", ErrDigestAmbiguous
}
return dst.entries[idx].digest, nil
}
// Add adds the given digests to the set. An error will be returned
// if the given digest is invalid. If the digest already exists in the
// table, this operation will be a no-op.
func (dst *Set) Add(d Digest) error {
if err := d.Validate(); err != nil {
return err
}
entry := &digestEntry{alg: d.Algorithm(), val: d.Hex(), digest: d}
searchFunc := func(i int) bool {
if dst.entries[i].val == entry.val {
return dst.entries[i].alg >= entry.alg
}
return dst.entries[i].val >= entry.val
}
idx := sort.Search(len(dst.entries), searchFunc)
if idx == len(dst.entries) {
dst.entries = append(dst.entries, entry)
return nil
} else if dst.entries[idx].digest == d {
return nil
}
entries := append(dst.entries, nil)
copy(entries[idx+1:], entries[idx:len(entries)-1])
entries[idx] = entry
dst.entries = entries
return nil
}
// ShortCodeTable returns a map of Digest to unique short codes. The
// length represents the minimum value, the maximum length may be the
// entire value of digest if uniqueness cannot be achieved without the
// full value. This function will attempt to make short codes as short
// as possible to be unique.
func ShortCodeTable(dst *Set, length int) map[Digest]string {
m := make(map[Digest]string, len(dst.entries))
l := length
resetIdx := 0
for i := 0; i < len(dst.entries); i++ {
var short string
extended := true
for extended {
extended = false
if len(dst.entries[i].val) <= l {
short = dst.entries[i].digest.String()
} else {
short = dst.entries[i].val[:l]
for j := i + 1; j < len(dst.entries); j++ {
if checkShortMatch(dst.entries[j].alg, dst.entries[j].val, "", short) {
if j > resetIdx {
resetIdx = j
}
extended = true
} else {
break
}
}
if extended {
l++
}
}
}
m[dst.entries[i].digest] = short
if i >= resetIdx {
l = length
}
}
return m
}
type digestEntry struct {
alg string
val string
digest Digest
}
type digestEntries []*digestEntry
func (d digestEntries) Len() int {
return len(d)
}
func (d digestEntries) Less(i, j int) bool {
if d[i].val != d[j].val {
return d[i].val < d[j].val
}
return d[i].alg < d[j].alg
}
func (d digestEntries) Swap(i, j int) {
d[i], d[j] = d[j], d[i]
}

272
digest/set_test.go 100644
View File

@ -0,0 +1,272 @@
package digest
import (
"crypto/sha256"
"encoding/binary"
"math/rand"
"testing"
)
func assertEqualDigests(t *testing.T, d1, d2 Digest) {
if d1 != d2 {
t.Fatalf("Digests do not match:\n\tActual: %s\n\tExpected: %s", d1, d2)
}
}
func TestLookup(t *testing.T) {
digests := []Digest{
"sha256:12345",
"sha256:1234",
"sha256:12346",
"sha256:54321",
"sha256:65431",
"sha256:64321",
"sha256:65421",
"sha256:65321",
}
dset := NewSet()
for i := range digests {
if err := dset.Add(digests[i]); err != nil {
t.Fatal(err)
}
}
dgst, err := dset.Lookup("54")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[3])
dgst, err = dset.Lookup("1234")
if err == nil {
t.Fatal("Expected ambiguous error looking up: 1234")
}
if err != ErrDigestAmbiguous {
t.Fatal(err)
}
dgst, err = dset.Lookup("9876")
if err == nil {
t.Fatal("Expected ambiguous error looking up: 9876")
}
if err != ErrDigestNotFound {
t.Fatal(err)
}
dgst, err = dset.Lookup("sha256:1234")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[1])
dgst, err = dset.Lookup("sha256:12345")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[0])
dgst, err = dset.Lookup("sha256:12346")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[2])
dgst, err = dset.Lookup("12346")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[2])
dgst, err = dset.Lookup("12345")
if err != nil {
t.Fatal(err)
}
assertEqualDigests(t, dgst, digests[0])
}
func TestAddDuplication(t *testing.T) {
digests := []Digest{
"sha256:1234",
"sha256:12345",
"sha256:12346",
"sha256:54321",
"sha256:65431",
"sha512:65431",
"sha512:65421",
"sha512:65321",
}
dset := NewSet()
for i := range digests {
if err := dset.Add(digests[i]); err != nil {
t.Fatal(err)
}
}
if len(dset.entries) != 8 {
t.Fatal("Invalid dset size")
}
if err := dset.Add(Digest("sha256:12345")); err != nil {
t.Fatal(err)
}
if len(dset.entries) != 8 {
t.Fatal("Duplicate digest insert allowed")
}
if err := dset.Add(Digest("sha384:12345")); err != nil {
t.Fatal(err)
}
if len(dset.entries) != 9 {
t.Fatal("Insert with different algorithm not allowed")
}
}
func assertEqualShort(t *testing.T, actual, expected string) {
if actual != expected {
t.Fatalf("Unexpected short value:\n\tExpected: %s\n\tActual: %s", expected, actual)
}
}
func TestShortCodeTable(t *testing.T) {
digests := []Digest{
"sha256:1234",
"sha256:12345",
"sha256:12346",
"sha256:54321",
"sha256:65431",
"sha256:64321",
"sha256:65421",
"sha256:65321",
}
dset := NewSet()
for i := range digests {
if err := dset.Add(digests[i]); err != nil {
t.Fatal(err)
}
}
dump := ShortCodeTable(dset, 2)
if len(dump) < len(digests) {
t.Fatalf("Error unexpected size: %d, expecting %d", len(dump), len(digests))
}
assertEqualShort(t, dump[digests[0]], "sha256:1234")
assertEqualShort(t, dump[digests[1]], "sha256:12345")
assertEqualShort(t, dump[digests[2]], "sha256:12346")
assertEqualShort(t, dump[digests[3]], "54")
assertEqualShort(t, dump[digests[4]], "6543")
assertEqualShort(t, dump[digests[5]], "64")
assertEqualShort(t, dump[digests[6]], "6542")
assertEqualShort(t, dump[digests[7]], "653")
}
func createDigests(count int) ([]Digest, error) {
r := rand.New(rand.NewSource(25823))
digests := make([]Digest, count)
for i := range digests {
h := sha256.New()
if err := binary.Write(h, binary.BigEndian, r.Int63()); err != nil {
return nil, err
}
digests[i] = NewDigest("sha256", h)
}
return digests, nil
}
func benchAddNTable(b *testing.B, n int) {
digests, err := createDigests(n)
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
for j := range digests {
if err = dset.Add(digests[j]); err != nil {
b.Fatal(err)
}
}
}
}
func benchLookupNTable(b *testing.B, n int, shortLen int) {
digests, err := createDigests(n)
if err != nil {
b.Fatal(err)
}
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
for i := range digests {
if err := dset.Add(digests[i]); err != nil {
b.Fatal(err)
}
}
shorts := make([]string, 0, n)
for _, short := range ShortCodeTable(dset, shortLen) {
shorts = append(shorts, short)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if _, err = dset.Lookup(shorts[i%n]); err != nil {
b.Fatal(err)
}
}
}
func benchShortCodeNTable(b *testing.B, n int, shortLen int) {
digests, err := createDigests(n)
if err != nil {
b.Fatal(err)
}
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
for i := range digests {
if err := dset.Add(digests[i]); err != nil {
b.Fatal(err)
}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
ShortCodeTable(dset, shortLen)
}
}
func BenchmarkAdd10(b *testing.B) {
benchAddNTable(b, 10)
}
func BenchmarkAdd100(b *testing.B) {
benchAddNTable(b, 100)
}
func BenchmarkAdd1000(b *testing.B) {
benchAddNTable(b, 1000)
}
func BenchmarkLookup10(b *testing.B) {
benchLookupNTable(b, 10, 12)
}
func BenchmarkLookup100(b *testing.B) {
benchLookupNTable(b, 100, 12)
}
func BenchmarkLookup1000(b *testing.B) {
benchLookupNTable(b, 1000, 12)
}
func BenchmarkShortCode10(b *testing.B) {
benchShortCodeNTable(b, 10, 12)
}
func BenchmarkShortCode100(b *testing.B) {
benchShortCodeNTable(b, 100, 12)
}
func BenchmarkShortCode1000(b *testing.B) {
benchShortCodeNTable(b, 1000, 12)
}