check: Simplify blob status tracking

UnusedBlobs now directly reads the list of existing blobs from the
repository index. This removes the need for the blobStatusExists flag,
which in turn allows converting the blobRefs map into a BlobSet.
This commit is contained in:
Michael Eischer 2020-11-06 23:41:04 +01:00
parent b8c7543a55
commit 3500f9490c
4 changed files with 18 additions and 24 deletions

View file

@ -255,7 +255,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
} }
if opts.CheckUnused { if opts.CheckUnused {
for _, id := range chkr.UnusedBlobs() { for _, id := range chkr.UnusedBlobs(gopts.ctx) {
Verbosef("unused blob %v\n", id) Verbosef("unused blob %v\n", id)
errorsFound = true errorsFound = true
} }

View file

@ -25,8 +25,7 @@ type Checker struct {
packs map[restic.ID]int64 packs map[restic.ID]int64
blobRefs struct { blobRefs struct {
sync.Mutex sync.Mutex
// see flags below M restic.BlobSet
M map[restic.BlobHandle]blobStatus
} }
masterIndex *repository.MasterIndex masterIndex *repository.MasterIndex
@ -34,13 +33,6 @@ type Checker struct {
repo restic.Repository repo restic.Repository
} }
type blobStatus uint8
const (
blobStatusExists blobStatus = 1 << iota
blobStatusReferenced
)
// New returns a new checker which runs on repo. // New returns a new checker which runs on repo.
func New(repo restic.Repository) *Checker { func New(repo restic.Repository) *Checker {
c := &Checker{ c := &Checker{
@ -49,7 +41,7 @@ func New(repo restic.Repository) *Checker {
repo: repo, repo: repo,
} }
c.blobRefs.M = make(map[restic.BlobHandle]blobStatus) c.blobRefs.M = restic.NewBlobSet()
return c return c
} }
@ -162,8 +154,6 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
debug.Log("process blobs") debug.Log("process blobs")
cnt := 0 cnt := 0
for blob := range res.Index.Each(wgCtx) { for blob := range res.Index.Each(wgCtx) {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
c.blobRefs.M[h] = blobStatusExists
cnt++ cnt++
if _, ok := packToIndex[blob.PackID]; !ok { if _, ok := packToIndex[blob.PackID]; !ok {
@ -529,9 +519,9 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha
// even when a file references a tree blob // even when a file references a tree blob
c.blobRefs.Lock() c.blobRefs.Lock()
h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob} h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob}
status := c.blobRefs.M[h] blobReferenced := c.blobRefs.M.Has(h)
c.blobRefs.Unlock() c.blobRefs.Unlock()
if (status & blobStatusReferenced) != 0 { if blobReferenced {
continue continue
} }
@ -552,7 +542,7 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha
loadCh = nil loadCh = nil
c.blobRefs.Lock() c.blobRefs.Lock()
h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob} h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob}
c.blobRefs.M[h] |= blobStatusReferenced c.blobRefs.M.Insert(h)
c.blobRefs.Unlock() c.blobRefs.Unlock()
case j, ok := <-inCh: case j, ok := <-inCh:
@ -687,7 +677,7 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
for _, blobID := range blobs { for _, blobID := range blobs {
c.blobRefs.Lock() c.blobRefs.Lock()
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob} h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
c.blobRefs.M[h] |= blobStatusReferenced c.blobRefs.M.Insert(h)
debug.Log("blob %v is referenced", blobID) debug.Log("blob %v is referenced", blobID)
c.blobRefs.Unlock() c.blobRefs.Unlock()
} }
@ -696,15 +686,19 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
} }
// UnusedBlobs returns all blobs that have never been referenced. // UnusedBlobs returns all blobs that have never been referenced.
func (c *Checker) UnusedBlobs() (blobs restic.BlobHandles) { func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles) {
c.blobRefs.Lock() c.blobRefs.Lock()
defer c.blobRefs.Unlock() defer c.blobRefs.Unlock()
debug.Log("checking %d blobs", len(c.blobRefs.M)) debug.Log("checking %d blobs", len(c.blobRefs.M))
for id, flags := range c.blobRefs.M { ctx, cancel := context.WithCancel(ctx)
if (flags & blobStatusReferenced) == 0 { defer cancel()
debug.Log("blob %v not referenced", id)
blobs = append(blobs, id) for blob := range c.repo.Index().Each(ctx) {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
if !c.blobRefs.M.Has(h) {
debug.Log("blob %v not referenced", h)
blobs = append(blobs, h)
} }
} }

View file

@ -181,7 +181,7 @@ func TestUnreferencedBlobs(t *testing.T) {
test.OKs(t, checkPacks(chkr)) test.OKs(t, checkPacks(chkr))
test.OKs(t, checkStruct(chkr)) test.OKs(t, checkStruct(chkr))
blobs := chkr.UnusedBlobs() blobs := chkr.UnusedBlobs(context.TODO())
sort.Sort(blobs) sort.Sort(blobs)
test.Equals(t, unusedBlobsBySnapshot, blobs) test.Equals(t, unusedBlobsBySnapshot, blobs)

View file

@ -37,7 +37,7 @@ func TestCheckRepo(t testing.TB, repo restic.Repository) {
} }
// unused blobs // unused blobs
blobs := chkr.UnusedBlobs() blobs := chkr.UnusedBlobs(context.TODO())
if len(blobs) > 0 { if len(blobs) > 0 {
t.Errorf("unused blobs found: %v", blobs) t.Errorf("unused blobs found: %v", blobs)
} }