Merge pull request #260 from dmcgowan/short-digests
Add short digest implementation
This commit is contained in:
commit
5556cd1ba1
2 changed files with 467 additions and 0 deletions
195
digest/set.go
Normal file
195
digest/set.go
Normal file
|
@ -0,0 +1,195 @@
|
||||||
|
package digest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// ErrDigestNotFound is used when a matching digest
|
||||||
|
// could not be found in a set.
|
||||||
|
ErrDigestNotFound = errors.New("digest not found")
|
||||||
|
|
||||||
|
// ErrDigestAmbiguous is used when multiple digests
|
||||||
|
// are found in a set. None of the matching digests
|
||||||
|
// should be considered valid matches.
|
||||||
|
ErrDigestAmbiguous = errors.New("ambiguous digest string")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Set is used to hold a unique set of digests which
|
||||||
|
// may be easily referenced by easily referenced by a string
|
||||||
|
// representation of the digest as well as short representation.
|
||||||
|
// The uniqueness of the short representation is based on other
|
||||||
|
// digests in the set. If digests are ommited from this set,
|
||||||
|
// collisions in a larger set may not be detected, therefore it
|
||||||
|
// is important to always do short representation lookups on
|
||||||
|
// the complete set of digests. To mitigate collisions, an
|
||||||
|
// appropriately long short code should be used.
|
||||||
|
type Set struct {
|
||||||
|
entries digestEntries
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewSet creates an empty set of digests
|
||||||
|
// which may have digests added.
|
||||||
|
func NewSet() *Set {
|
||||||
|
return &Set{
|
||||||
|
entries: digestEntries{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkShortMatch checks whether two digests match as either whole
|
||||||
|
// values or short values. This function does not test equality,
|
||||||
|
// rather whether the second value could match against the first
|
||||||
|
// value.
|
||||||
|
func checkShortMatch(alg, hex, shortAlg, shortHex string) bool {
|
||||||
|
if len(hex) == len(shortHex) {
|
||||||
|
if hex != shortHex {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if len(shortAlg) > 0 && alg != shortAlg {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
} else if !strings.HasPrefix(hex, shortHex) {
|
||||||
|
return false
|
||||||
|
} else if len(shortAlg) > 0 && alg != shortAlg {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lookup looks for a digest matching the given string representation.
|
||||||
|
// If no digests could be found ErrDigestNotFound will be returned
|
||||||
|
// with an empty digest value. If multiple matches are found
|
||||||
|
// ErrDigestAmbiguous will be returned with an empty digest value.
|
||||||
|
func (dst *Set) Lookup(d string) (Digest, error) {
|
||||||
|
if len(dst.entries) == 0 {
|
||||||
|
return "", ErrDigestNotFound
|
||||||
|
}
|
||||||
|
var (
|
||||||
|
searchFunc func(int) bool
|
||||||
|
alg string
|
||||||
|
hex string
|
||||||
|
)
|
||||||
|
dgst, err := ParseDigest(d)
|
||||||
|
if err == ErrDigestInvalidFormat {
|
||||||
|
hex = d
|
||||||
|
searchFunc = func(i int) bool {
|
||||||
|
return dst.entries[i].val >= d
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
hex = dgst.Hex()
|
||||||
|
alg = dgst.Algorithm()
|
||||||
|
searchFunc = func(i int) bool {
|
||||||
|
if dst.entries[i].val == hex {
|
||||||
|
return dst.entries[i].alg >= alg
|
||||||
|
}
|
||||||
|
return dst.entries[i].val >= hex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idx := sort.Search(len(dst.entries), searchFunc)
|
||||||
|
if idx == len(dst.entries) || !checkShortMatch(dst.entries[idx].alg, dst.entries[idx].val, alg, hex) {
|
||||||
|
return "", ErrDigestNotFound
|
||||||
|
}
|
||||||
|
if dst.entries[idx].alg == alg && dst.entries[idx].val == hex {
|
||||||
|
return dst.entries[idx].digest, nil
|
||||||
|
}
|
||||||
|
if idx+1 < len(dst.entries) && checkShortMatch(dst.entries[idx+1].alg, dst.entries[idx+1].val, alg, hex) {
|
||||||
|
return "", ErrDigestAmbiguous
|
||||||
|
}
|
||||||
|
|
||||||
|
return dst.entries[idx].digest, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add adds the given digests to the set. An error will be returned
|
||||||
|
// if the given digest is invalid. If the digest already exists in the
|
||||||
|
// table, this operation will be a no-op.
|
||||||
|
func (dst *Set) Add(d Digest) error {
|
||||||
|
if err := d.Validate(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
entry := &digestEntry{alg: d.Algorithm(), val: d.Hex(), digest: d}
|
||||||
|
searchFunc := func(i int) bool {
|
||||||
|
if dst.entries[i].val == entry.val {
|
||||||
|
return dst.entries[i].alg >= entry.alg
|
||||||
|
}
|
||||||
|
return dst.entries[i].val >= entry.val
|
||||||
|
}
|
||||||
|
idx := sort.Search(len(dst.entries), searchFunc)
|
||||||
|
if idx == len(dst.entries) {
|
||||||
|
dst.entries = append(dst.entries, entry)
|
||||||
|
return nil
|
||||||
|
} else if dst.entries[idx].digest == d {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
entries := append(dst.entries, nil)
|
||||||
|
copy(entries[idx+1:], entries[idx:len(entries)-1])
|
||||||
|
entries[idx] = entry
|
||||||
|
dst.entries = entries
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ShortCodeTable returns a map of Digest to unique short codes. The
|
||||||
|
// length represents the minimum value, the maximum length may be the
|
||||||
|
// entire value of digest if uniqueness cannot be achieved without the
|
||||||
|
// full value. This function will attempt to make short codes as short
|
||||||
|
// as possible to be unique.
|
||||||
|
func ShortCodeTable(dst *Set, length int) map[Digest]string {
|
||||||
|
m := make(map[Digest]string, len(dst.entries))
|
||||||
|
l := length
|
||||||
|
resetIdx := 0
|
||||||
|
for i := 0; i < len(dst.entries); i++ {
|
||||||
|
var short string
|
||||||
|
extended := true
|
||||||
|
for extended {
|
||||||
|
extended = false
|
||||||
|
if len(dst.entries[i].val) <= l {
|
||||||
|
short = dst.entries[i].digest.String()
|
||||||
|
} else {
|
||||||
|
short = dst.entries[i].val[:l]
|
||||||
|
for j := i + 1; j < len(dst.entries); j++ {
|
||||||
|
if checkShortMatch(dst.entries[j].alg, dst.entries[j].val, "", short) {
|
||||||
|
if j > resetIdx {
|
||||||
|
resetIdx = j
|
||||||
|
}
|
||||||
|
extended = true
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if extended {
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m[dst.entries[i].digest] = short
|
||||||
|
if i >= resetIdx {
|
||||||
|
l = length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
type digestEntry struct {
|
||||||
|
alg string
|
||||||
|
val string
|
||||||
|
digest Digest
|
||||||
|
}
|
||||||
|
|
||||||
|
type digestEntries []*digestEntry
|
||||||
|
|
||||||
|
func (d digestEntries) Len() int {
|
||||||
|
return len(d)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d digestEntries) Less(i, j int) bool {
|
||||||
|
if d[i].val != d[j].val {
|
||||||
|
return d[i].val < d[j].val
|
||||||
|
}
|
||||||
|
return d[i].alg < d[j].alg
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d digestEntries) Swap(i, j int) {
|
||||||
|
d[i], d[j] = d[j], d[i]
|
||||||
|
}
|
272
digest/set_test.go
Normal file
272
digest/set_test.go
Normal file
|
@ -0,0 +1,272 @@
|
||||||
|
package digest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/binary"
|
||||||
|
"math/rand"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func assertEqualDigests(t *testing.T, d1, d2 Digest) {
|
||||||
|
if d1 != d2 {
|
||||||
|
t.Fatalf("Digests do not match:\n\tActual: %s\n\tExpected: %s", d1, d2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLookup(t *testing.T) {
|
||||||
|
digests := []Digest{
|
||||||
|
"sha256:12345",
|
||||||
|
"sha256:1234",
|
||||||
|
"sha256:12346",
|
||||||
|
"sha256:54321",
|
||||||
|
"sha256:65431",
|
||||||
|
"sha256:64321",
|
||||||
|
"sha256:65421",
|
||||||
|
"sha256:65321",
|
||||||
|
}
|
||||||
|
|
||||||
|
dset := NewSet()
|
||||||
|
for i := range digests {
|
||||||
|
if err := dset.Add(digests[i]); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dgst, err := dset.Lookup("54")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
assertEqualDigests(t, dgst, digests[3])
|
||||||
|
|
||||||
|
dgst, err = dset.Lookup("1234")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("Expected ambiguous error looking up: 1234")
|
||||||
|
}
|
||||||
|
if err != ErrDigestAmbiguous {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
dgst, err = dset.Lookup("9876")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("Expected ambiguous error looking up: 9876")
|
||||||
|
}
|
||||||
|
if err != ErrDigestNotFound {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
dgst, err = dset.Lookup("sha256:1234")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
assertEqualDigests(t, dgst, digests[1])
|
||||||
|
|
||||||
|
dgst, err = dset.Lookup("sha256:12345")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
assertEqualDigests(t, dgst, digests[0])
|
||||||
|
|
||||||
|
dgst, err = dset.Lookup("sha256:12346")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
assertEqualDigests(t, dgst, digests[2])
|
||||||
|
|
||||||
|
dgst, err = dset.Lookup("12346")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
assertEqualDigests(t, dgst, digests[2])
|
||||||
|
|
||||||
|
dgst, err = dset.Lookup("12345")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
assertEqualDigests(t, dgst, digests[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAddDuplication(t *testing.T) {
|
||||||
|
digests := []Digest{
|
||||||
|
"sha256:1234",
|
||||||
|
"sha256:12345",
|
||||||
|
"sha256:12346",
|
||||||
|
"sha256:54321",
|
||||||
|
"sha256:65431",
|
||||||
|
"sha512:65431",
|
||||||
|
"sha512:65421",
|
||||||
|
"sha512:65321",
|
||||||
|
}
|
||||||
|
|
||||||
|
dset := NewSet()
|
||||||
|
for i := range digests {
|
||||||
|
if err := dset.Add(digests[i]); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(dset.entries) != 8 {
|
||||||
|
t.Fatal("Invalid dset size")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := dset.Add(Digest("sha256:12345")); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(dset.entries) != 8 {
|
||||||
|
t.Fatal("Duplicate digest insert allowed")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := dset.Add(Digest("sha384:12345")); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(dset.entries) != 9 {
|
||||||
|
t.Fatal("Insert with different algorithm not allowed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertEqualShort(t *testing.T, actual, expected string) {
|
||||||
|
if actual != expected {
|
||||||
|
t.Fatalf("Unexpected short value:\n\tExpected: %s\n\tActual: %s", expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShortCodeTable(t *testing.T) {
|
||||||
|
digests := []Digest{
|
||||||
|
"sha256:1234",
|
||||||
|
"sha256:12345",
|
||||||
|
"sha256:12346",
|
||||||
|
"sha256:54321",
|
||||||
|
"sha256:65431",
|
||||||
|
"sha256:64321",
|
||||||
|
"sha256:65421",
|
||||||
|
"sha256:65321",
|
||||||
|
}
|
||||||
|
|
||||||
|
dset := NewSet()
|
||||||
|
for i := range digests {
|
||||||
|
if err := dset.Add(digests[i]); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dump := ShortCodeTable(dset, 2)
|
||||||
|
|
||||||
|
if len(dump) < len(digests) {
|
||||||
|
t.Fatalf("Error unexpected size: %d, expecting %d", len(dump), len(digests))
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEqualShort(t, dump[digests[0]], "sha256:1234")
|
||||||
|
assertEqualShort(t, dump[digests[1]], "sha256:12345")
|
||||||
|
assertEqualShort(t, dump[digests[2]], "sha256:12346")
|
||||||
|
assertEqualShort(t, dump[digests[3]], "54")
|
||||||
|
assertEqualShort(t, dump[digests[4]], "6543")
|
||||||
|
assertEqualShort(t, dump[digests[5]], "64")
|
||||||
|
assertEqualShort(t, dump[digests[6]], "6542")
|
||||||
|
assertEqualShort(t, dump[digests[7]], "653")
|
||||||
|
}
|
||||||
|
|
||||||
|
func createDigests(count int) ([]Digest, error) {
|
||||||
|
r := rand.New(rand.NewSource(25823))
|
||||||
|
digests := make([]Digest, count)
|
||||||
|
for i := range digests {
|
||||||
|
h := sha256.New()
|
||||||
|
if err := binary.Write(h, binary.BigEndian, r.Int63()); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
digests[i] = NewDigest("sha256", h)
|
||||||
|
}
|
||||||
|
return digests, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchAddNTable(b *testing.B, n int) {
|
||||||
|
digests, err := createDigests(n)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
|
||||||
|
for j := range digests {
|
||||||
|
if err = dset.Add(digests[j]); err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchLookupNTable(b *testing.B, n int, shortLen int) {
|
||||||
|
digests, err := createDigests(n)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
|
||||||
|
for i := range digests {
|
||||||
|
if err := dset.Add(digests[i]); err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
shorts := make([]string, 0, n)
|
||||||
|
for _, short := range ShortCodeTable(dset, shortLen) {
|
||||||
|
shorts = append(shorts, short)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
if _, err = dset.Lookup(shorts[i%n]); err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchShortCodeNTable(b *testing.B, n int, shortLen int) {
|
||||||
|
digests, err := createDigests(n)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
|
||||||
|
for i := range digests {
|
||||||
|
if err := dset.Add(digests[i]); err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
ShortCodeTable(dset, shortLen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkAdd10(b *testing.B) {
|
||||||
|
benchAddNTable(b, 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkAdd100(b *testing.B) {
|
||||||
|
benchAddNTable(b, 100)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkAdd1000(b *testing.B) {
|
||||||
|
benchAddNTable(b, 1000)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkLookup10(b *testing.B) {
|
||||||
|
benchLookupNTable(b, 10, 12)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkLookup100(b *testing.B) {
|
||||||
|
benchLookupNTable(b, 100, 12)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkLookup1000(b *testing.B) {
|
||||||
|
benchLookupNTable(b, 1000, 12)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkShortCode10(b *testing.B) {
|
||||||
|
benchShortCodeNTable(b, 10, 12)
|
||||||
|
}
|
||||||
|
func BenchmarkShortCode100(b *testing.B) {
|
||||||
|
benchShortCodeNTable(b, 100, 12)
|
||||||
|
}
|
||||||
|
func BenchmarkShortCode1000(b *testing.B) {
|
||||||
|
benchShortCodeNTable(b, 1000, 12)
|
||||||
|
}
|
Loading…
Reference in a new issue