Merge pull request #260 from dmcgowan/short-digests
Add short digest implementation
This commit is contained in:
commit
5556cd1ba1
2 changed files with 467 additions and 0 deletions
195
digest/set.go
Normal file
195
digest/set.go
Normal file
|
@ -0,0 +1,195 @@
|
|||
package digest
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrDigestNotFound is used when a matching digest
|
||||
// could not be found in a set.
|
||||
ErrDigestNotFound = errors.New("digest not found")
|
||||
|
||||
// ErrDigestAmbiguous is used when multiple digests
|
||||
// are found in a set. None of the matching digests
|
||||
// should be considered valid matches.
|
||||
ErrDigestAmbiguous = errors.New("ambiguous digest string")
|
||||
)
|
||||
|
||||
// Set is used to hold a unique set of digests which
|
||||
// may be easily referenced by easily referenced by a string
|
||||
// representation of the digest as well as short representation.
|
||||
// The uniqueness of the short representation is based on other
|
||||
// digests in the set. If digests are ommited from this set,
|
||||
// collisions in a larger set may not be detected, therefore it
|
||||
// is important to always do short representation lookups on
|
||||
// the complete set of digests. To mitigate collisions, an
|
||||
// appropriately long short code should be used.
|
||||
type Set struct {
|
||||
entries digestEntries
|
||||
}
|
||||
|
||||
// NewSet creates an empty set of digests
|
||||
// which may have digests added.
|
||||
func NewSet() *Set {
|
||||
return &Set{
|
||||
entries: digestEntries{},
|
||||
}
|
||||
}
|
||||
|
||||
// checkShortMatch checks whether two digests match as either whole
|
||||
// values or short values. This function does not test equality,
|
||||
// rather whether the second value could match against the first
|
||||
// value.
|
||||
func checkShortMatch(alg, hex, shortAlg, shortHex string) bool {
|
||||
if len(hex) == len(shortHex) {
|
||||
if hex != shortHex {
|
||||
return false
|
||||
}
|
||||
if len(shortAlg) > 0 && alg != shortAlg {
|
||||
return false
|
||||
}
|
||||
} else if !strings.HasPrefix(hex, shortHex) {
|
||||
return false
|
||||
} else if len(shortAlg) > 0 && alg != shortAlg {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Lookup looks for a digest matching the given string representation.
|
||||
// If no digests could be found ErrDigestNotFound will be returned
|
||||
// with an empty digest value. If multiple matches are found
|
||||
// ErrDigestAmbiguous will be returned with an empty digest value.
|
||||
func (dst *Set) Lookup(d string) (Digest, error) {
|
||||
if len(dst.entries) == 0 {
|
||||
return "", ErrDigestNotFound
|
||||
}
|
||||
var (
|
||||
searchFunc func(int) bool
|
||||
alg string
|
||||
hex string
|
||||
)
|
||||
dgst, err := ParseDigest(d)
|
||||
if err == ErrDigestInvalidFormat {
|
||||
hex = d
|
||||
searchFunc = func(i int) bool {
|
||||
return dst.entries[i].val >= d
|
||||
}
|
||||
} else {
|
||||
hex = dgst.Hex()
|
||||
alg = dgst.Algorithm()
|
||||
searchFunc = func(i int) bool {
|
||||
if dst.entries[i].val == hex {
|
||||
return dst.entries[i].alg >= alg
|
||||
}
|
||||
return dst.entries[i].val >= hex
|
||||
}
|
||||
}
|
||||
idx := sort.Search(len(dst.entries), searchFunc)
|
||||
if idx == len(dst.entries) || !checkShortMatch(dst.entries[idx].alg, dst.entries[idx].val, alg, hex) {
|
||||
return "", ErrDigestNotFound
|
||||
}
|
||||
if dst.entries[idx].alg == alg && dst.entries[idx].val == hex {
|
||||
return dst.entries[idx].digest, nil
|
||||
}
|
||||
if idx+1 < len(dst.entries) && checkShortMatch(dst.entries[idx+1].alg, dst.entries[idx+1].val, alg, hex) {
|
||||
return "", ErrDigestAmbiguous
|
||||
}
|
||||
|
||||
return dst.entries[idx].digest, nil
|
||||
}
|
||||
|
||||
// Add adds the given digests to the set. An error will be returned
|
||||
// if the given digest is invalid. If the digest already exists in the
|
||||
// table, this operation will be a no-op.
|
||||
func (dst *Set) Add(d Digest) error {
|
||||
if err := d.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
entry := &digestEntry{alg: d.Algorithm(), val: d.Hex(), digest: d}
|
||||
searchFunc := func(i int) bool {
|
||||
if dst.entries[i].val == entry.val {
|
||||
return dst.entries[i].alg >= entry.alg
|
||||
}
|
||||
return dst.entries[i].val >= entry.val
|
||||
}
|
||||
idx := sort.Search(len(dst.entries), searchFunc)
|
||||
if idx == len(dst.entries) {
|
||||
dst.entries = append(dst.entries, entry)
|
||||
return nil
|
||||
} else if dst.entries[idx].digest == d {
|
||||
return nil
|
||||
}
|
||||
|
||||
entries := append(dst.entries, nil)
|
||||
copy(entries[idx+1:], entries[idx:len(entries)-1])
|
||||
entries[idx] = entry
|
||||
dst.entries = entries
|
||||
return nil
|
||||
}
|
||||
|
||||
// ShortCodeTable returns a map of Digest to unique short codes. The
|
||||
// length represents the minimum value, the maximum length may be the
|
||||
// entire value of digest if uniqueness cannot be achieved without the
|
||||
// full value. This function will attempt to make short codes as short
|
||||
// as possible to be unique.
|
||||
func ShortCodeTable(dst *Set, length int) map[Digest]string {
|
||||
m := make(map[Digest]string, len(dst.entries))
|
||||
l := length
|
||||
resetIdx := 0
|
||||
for i := 0; i < len(dst.entries); i++ {
|
||||
var short string
|
||||
extended := true
|
||||
for extended {
|
||||
extended = false
|
||||
if len(dst.entries[i].val) <= l {
|
||||
short = dst.entries[i].digest.String()
|
||||
} else {
|
||||
short = dst.entries[i].val[:l]
|
||||
for j := i + 1; j < len(dst.entries); j++ {
|
||||
if checkShortMatch(dst.entries[j].alg, dst.entries[j].val, "", short) {
|
||||
if j > resetIdx {
|
||||
resetIdx = j
|
||||
}
|
||||
extended = true
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if extended {
|
||||
l++
|
||||
}
|
||||
}
|
||||
}
|
||||
m[dst.entries[i].digest] = short
|
||||
if i >= resetIdx {
|
||||
l = length
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
type digestEntry struct {
|
||||
alg string
|
||||
val string
|
||||
digest Digest
|
||||
}
|
||||
|
||||
type digestEntries []*digestEntry
|
||||
|
||||
func (d digestEntries) Len() int {
|
||||
return len(d)
|
||||
}
|
||||
|
||||
func (d digestEntries) Less(i, j int) bool {
|
||||
if d[i].val != d[j].val {
|
||||
return d[i].val < d[j].val
|
||||
}
|
||||
return d[i].alg < d[j].alg
|
||||
}
|
||||
|
||||
func (d digestEntries) Swap(i, j int) {
|
||||
d[i], d[j] = d[j], d[i]
|
||||
}
|
272
digest/set_test.go
Normal file
272
digest/set_test.go
Normal file
|
@ -0,0 +1,272 @@
|
|||
package digest
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func assertEqualDigests(t *testing.T, d1, d2 Digest) {
|
||||
if d1 != d2 {
|
||||
t.Fatalf("Digests do not match:\n\tActual: %s\n\tExpected: %s", d1, d2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLookup(t *testing.T) {
|
||||
digests := []Digest{
|
||||
"sha256:12345",
|
||||
"sha256:1234",
|
||||
"sha256:12346",
|
||||
"sha256:54321",
|
||||
"sha256:65431",
|
||||
"sha256:64321",
|
||||
"sha256:65421",
|
||||
"sha256:65321",
|
||||
}
|
||||
|
||||
dset := NewSet()
|
||||
for i := range digests {
|
||||
if err := dset.Add(digests[i]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
dgst, err := dset.Lookup("54")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
assertEqualDigests(t, dgst, digests[3])
|
||||
|
||||
dgst, err = dset.Lookup("1234")
|
||||
if err == nil {
|
||||
t.Fatal("Expected ambiguous error looking up: 1234")
|
||||
}
|
||||
if err != ErrDigestAmbiguous {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dgst, err = dset.Lookup("9876")
|
||||
if err == nil {
|
||||
t.Fatal("Expected ambiguous error looking up: 9876")
|
||||
}
|
||||
if err != ErrDigestNotFound {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dgst, err = dset.Lookup("sha256:1234")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
assertEqualDigests(t, dgst, digests[1])
|
||||
|
||||
dgst, err = dset.Lookup("sha256:12345")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
assertEqualDigests(t, dgst, digests[0])
|
||||
|
||||
dgst, err = dset.Lookup("sha256:12346")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
assertEqualDigests(t, dgst, digests[2])
|
||||
|
||||
dgst, err = dset.Lookup("12346")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
assertEqualDigests(t, dgst, digests[2])
|
||||
|
||||
dgst, err = dset.Lookup("12345")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
assertEqualDigests(t, dgst, digests[0])
|
||||
}
|
||||
|
||||
func TestAddDuplication(t *testing.T) {
|
||||
digests := []Digest{
|
||||
"sha256:1234",
|
||||
"sha256:12345",
|
||||
"sha256:12346",
|
||||
"sha256:54321",
|
||||
"sha256:65431",
|
||||
"sha512:65431",
|
||||
"sha512:65421",
|
||||
"sha512:65321",
|
||||
}
|
||||
|
||||
dset := NewSet()
|
||||
for i := range digests {
|
||||
if err := dset.Add(digests[i]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(dset.entries) != 8 {
|
||||
t.Fatal("Invalid dset size")
|
||||
}
|
||||
|
||||
if err := dset.Add(Digest("sha256:12345")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(dset.entries) != 8 {
|
||||
t.Fatal("Duplicate digest insert allowed")
|
||||
}
|
||||
|
||||
if err := dset.Add(Digest("sha384:12345")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(dset.entries) != 9 {
|
||||
t.Fatal("Insert with different algorithm not allowed")
|
||||
}
|
||||
}
|
||||
|
||||
func assertEqualShort(t *testing.T, actual, expected string) {
|
||||
if actual != expected {
|
||||
t.Fatalf("Unexpected short value:\n\tExpected: %s\n\tActual: %s", expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShortCodeTable(t *testing.T) {
|
||||
digests := []Digest{
|
||||
"sha256:1234",
|
||||
"sha256:12345",
|
||||
"sha256:12346",
|
||||
"sha256:54321",
|
||||
"sha256:65431",
|
||||
"sha256:64321",
|
||||
"sha256:65421",
|
||||
"sha256:65321",
|
||||
}
|
||||
|
||||
dset := NewSet()
|
||||
for i := range digests {
|
||||
if err := dset.Add(digests[i]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
dump := ShortCodeTable(dset, 2)
|
||||
|
||||
if len(dump) < len(digests) {
|
||||
t.Fatalf("Error unexpected size: %d, expecting %d", len(dump), len(digests))
|
||||
}
|
||||
|
||||
assertEqualShort(t, dump[digests[0]], "sha256:1234")
|
||||
assertEqualShort(t, dump[digests[1]], "sha256:12345")
|
||||
assertEqualShort(t, dump[digests[2]], "sha256:12346")
|
||||
assertEqualShort(t, dump[digests[3]], "54")
|
||||
assertEqualShort(t, dump[digests[4]], "6543")
|
||||
assertEqualShort(t, dump[digests[5]], "64")
|
||||
assertEqualShort(t, dump[digests[6]], "6542")
|
||||
assertEqualShort(t, dump[digests[7]], "653")
|
||||
}
|
||||
|
||||
func createDigests(count int) ([]Digest, error) {
|
||||
r := rand.New(rand.NewSource(25823))
|
||||
digests := make([]Digest, count)
|
||||
for i := range digests {
|
||||
h := sha256.New()
|
||||
if err := binary.Write(h, binary.BigEndian, r.Int63()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
digests[i] = NewDigest("sha256", h)
|
||||
}
|
||||
return digests, nil
|
||||
}
|
||||
|
||||
func benchAddNTable(b *testing.B, n int) {
|
||||
digests, err := createDigests(n)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
|
||||
for j := range digests {
|
||||
if err = dset.Add(digests[j]); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func benchLookupNTable(b *testing.B, n int, shortLen int) {
|
||||
digests, err := createDigests(n)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
|
||||
for i := range digests {
|
||||
if err := dset.Add(digests[i]); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
shorts := make([]string, 0, n)
|
||||
for _, short := range ShortCodeTable(dset, shortLen) {
|
||||
shorts = append(shorts, short)
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if _, err = dset.Lookup(shorts[i%n]); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func benchShortCodeNTable(b *testing.B, n int, shortLen int) {
|
||||
digests, err := createDigests(n)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))}
|
||||
for i := range digests {
|
||||
if err := dset.Add(digests[i]); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
ShortCodeTable(dset, shortLen)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAdd10(b *testing.B) {
|
||||
benchAddNTable(b, 10)
|
||||
}
|
||||
|
||||
func BenchmarkAdd100(b *testing.B) {
|
||||
benchAddNTable(b, 100)
|
||||
}
|
||||
|
||||
func BenchmarkAdd1000(b *testing.B) {
|
||||
benchAddNTable(b, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkLookup10(b *testing.B) {
|
||||
benchLookupNTable(b, 10, 12)
|
||||
}
|
||||
|
||||
func BenchmarkLookup100(b *testing.B) {
|
||||
benchLookupNTable(b, 100, 12)
|
||||
}
|
||||
|
||||
func BenchmarkLookup1000(b *testing.B) {
|
||||
benchLookupNTable(b, 1000, 12)
|
||||
}
|
||||
|
||||
func BenchmarkShortCode10(b *testing.B) {
|
||||
benchShortCodeNTable(b, 10, 12)
|
||||
}
|
||||
func BenchmarkShortCode100(b *testing.B) {
|
||||
benchShortCodeNTable(b, 100, 12)
|
||||
}
|
||||
func BenchmarkShortCode1000(b *testing.B) {
|
||||
benchShortCodeNTable(b, 1000, 12)
|
||||
}
|
Loading…
Reference in a new issue