Reimplement rebuild-index

This commit is contained in:
Alexander Weiss 2020-10-10 21:51:11 +02:00
parent 187c8fb259
commit 30b6a0878a
3 changed files with 110 additions and 55 deletions

View file

@ -1,10 +1,8 @@
package main package main
import ( import (
"context" "github.com/restic/restic/internal/pack"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/index"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
"github.com/spf13/cobra" "github.com/spf13/cobra"
@ -12,7 +10,7 @@ import (
var cmdRebuildIndex = &cobra.Command{ var cmdRebuildIndex = &cobra.Command{
Use: "rebuild-index [flags]", Use: "rebuild-index [flags]",
Short: "Build a new index file", Short: "Build a new index",
Long: ` Long: `
The "rebuild-index" command creates a new index based on the pack files in the The "rebuild-index" command creates a new index based on the pack files in the
repository. repository.
@ -24,15 +22,25 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
`, `,
DisableAutoGenTag: true, DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
return runRebuildIndex(globalOptions) return runRebuildIndex(rebuildIndexOptions, globalOptions)
}, },
} }
func init() { // RebuildIndexOptions collects all options for the rebuild-index command.
cmdRoot.AddCommand(cmdRebuildIndex) type RebuildIndexOptions struct {
ReadAllPacks bool
} }
func runRebuildIndex(gopts GlobalOptions) error { var rebuildIndexOptions RebuildIndexOptions
func init() {
cmdRoot.AddCommand(cmdRebuildIndex)
f := cmdRebuildIndex.Flags()
f.BoolVar(&rebuildIndexOptions.ReadAllPacks, "read-all-packs", false, "read all pack files to generate new index from scratch")
}
func runRebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions) error {
repo, err := OpenRepository(gopts) repo, err := OpenRepository(gopts)
if err != nil { if err != nil {
return err return err
@ -44,59 +52,100 @@ func runRebuildIndex(gopts GlobalOptions) error {
return err return err
} }
ctx, cancel := context.WithCancel(gopts.ctx) return rebuildIndex(opts, gopts, repo, restic.NewIDSet())
defer cancel()
return rebuildIndex(ctx, repo, restic.NewIDSet())
} }
func rebuildIndex(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet) error { func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repository.Repository, ignorePacks restic.IDSet) error {
Verbosef("counting files in repo\n") ctx := gopts.ctx
var packs uint64 var obsolete restic.IDs
err := repo.List(ctx, restic.PackFile, func(restic.ID, int64) error { packSizeFromList := make(map[restic.ID]int64)
packs++ packs := restic.NewIDSet()
return nil totalPacks := 0
})
if err != nil {
return err
}
bar := newProgressMax(!globalOptions.Quiet, packs-uint64(len(ignorePacks)), "packs") if opts.ReadAllPacks {
idx, invalidFiles, err := index.New(ctx, repo, ignorePacks, bar) // get old index files
bar.Done() err := repo.List(ctx, restic.IndexFile, func(id restic.ID, size int64) error {
if err != nil { obsolete = append(obsolete, id)
return err return nil
} })
if err != nil {
return err
}
if globalOptions.verbosity >= 2 { Verbosef("finding pack files in repo...\n")
for _, id := range invalidFiles { err = repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error {
Printf("skipped incomplete pack file: %v\n", id) packSizeFromList[id] = size
packs.Insert(id)
totalPacks++
return nil
})
if err != nil {
return err
}
} else {
Verbosef("loading indexes...\n")
err := repo.LoadIndex(gopts.ctx)
if err != nil {
return err
}
packSizeFromIndex := make(map[restic.ID]int64)
Verbosef("getting pack files to read...\n")
// iterate over all blobs in index
for blob := range repo.Index().Each(ctx) {
size, ok := packSizeFromIndex[blob.PackID]
if !ok {
size = pack.HeaderSize
}
size += int64(pack.PackedSizeOfBlob(blob.Length))
// update packSizeFromIndex
packSizeFromIndex[blob.PackID] = size
}
err = repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
size, ok := packSizeFromIndex[id]
if !ok || size != packSize {
// Pack was not referenced in index or size does not match
packSizeFromList[id] = size
packs.Insert(id)
}
totalPacks++
delete(packSizeFromIndex, id)
return nil
})
if err != nil {
return err
}
for id := range packSizeFromIndex {
// ignore pack files that are referenced in the index but do not exist
// when rebuilding the index
packs.Insert(id)
} }
} }
Verbosef("finding old index files\n") if len(packSizeFromList) > 0 {
Verbosef("reading pack files\n")
bar := newProgressMax(!globalOptions.Quiet, uint64(len(packSizeFromList)), "packs")
invalidFiles, err := repo.LoadIndexFromPacks(ctx, packSizeFromList, bar)
if err != nil {
return err
}
var supersedes restic.IDs for _, id := range invalidFiles {
err = repo.List(ctx, restic.IndexFile, func(id restic.ID, size int64) error { Verboseff("skipped incomplete pack file: %v\n", id)
supersedes = append(supersedes, id) totalPacks--
return nil }
}) } else {
Verbosef("no need to read any pack file\n")
}
err := rebuildIndexFiles(gopts, repo, packs, obsolete, uint64(totalPacks))
if err != nil { if err != nil {
return err return err
} }
Verbosef("done\n")
ids, err := idx.Save(ctx, repo, supersedes)
if err != nil {
return errors.Fatalf("unable to save index, last error was: %v", err)
}
Verbosef("saved new indexes as %v\n", ids)
Verbosef("remove %d old index files\n", len(supersedes))
err = DeleteFilesChecked(globalOptions, repo, restic.NewIDSet(supersedes...), restic.IndexFile)
if err != nil {
return errors.Fatalf("unable to remove an old index: %v\n", err)
}
return nil return nil
} }

View file

@ -175,7 +175,7 @@ func testRunRebuildIndex(t testing.TB, gopts GlobalOptions) {
globalOptions.stdout = os.Stdout globalOptions.stdout = os.Stdout
}() }()
rtest.OK(t, runRebuildIndex(gopts)) rtest.OK(t, runRebuildIndex(RebuildIndexOptions{}, gopts))
} }
func testRunLs(t testing.TB, gopts GlobalOptions, snapshotID string) []string { func testRunLs(t testing.TB, gopts GlobalOptions, snapshotID string) []string {
@ -1351,7 +1351,7 @@ func TestRebuildIndexFailsOnAppendOnly(t *testing.T) {
env.gopts.backendTestHook = func(r restic.Backend) (restic.Backend, error) { env.gopts.backendTestHook = func(r restic.Backend) (restic.Backend, error) {
return &appendOnlyBackend{r}, nil return &appendOnlyBackend{r}, nil
} }
err := runRebuildIndex(env.gopts) err := runRebuildIndex(RebuildIndexOptions{}, env.gopts)
if err == nil { if err == nil {
t.Error("expected rebuildIndex to fail") t.Error("expected rebuildIndex to fail")
} }
@ -1583,7 +1583,7 @@ func (be *listOnceBackend) List(ctx context.Context, t restic.FileType, fn func(
return be.Backend.List(ctx, t, fn) return be.Backend.List(ctx, t, fn)
} }
func TestPruneListOnce(t *testing.T) { func TestListOnce(t *testing.T) {
env, cleanup := withTestEnvironment(t) env, cleanup := withTestEnvironment(t)
defer cleanup() defer cleanup()
@ -1613,6 +1613,9 @@ func TestPruneListOnce(t *testing.T) {
testRunForget(t, env.gopts, firstSnapshot[0].String()) testRunForget(t, env.gopts, firstSnapshot[0].String())
testRunPrune(t, env.gopts, pruneOpts) testRunPrune(t, env.gopts, pruneOpts)
rtest.OK(t, runCheck(checkOpts, env.gopts, nil)) rtest.OK(t, runCheck(checkOpts, env.gopts, nil))
rtest.OK(t, runRebuildIndex(RebuildIndexOptions{}, env.gopts))
rtest.OK(t, runRebuildIndex(RebuildIndexOptions{ReadAllPacks: true}, env.gopts))
} }
func TestHardLink(t *testing.T) { func TestHardLink(t *testing.T) {

View file

@ -281,7 +281,10 @@ type EachByPackResult struct {
} }
// EachByPack returns a channel that yields all blobs known to the index // EachByPack returns a channel that yields all blobs known to the index
// grouped by packID but ignoring blobs with a packID in packPlacklist. // grouped by packID but ignoring blobs with a packID in packPlacklist for
// finalized indexes.
// This filtering is used when rebuilding the index where we need to ignore packs
// from the finalized index which have been re-read into a non-finalized index.
// When the context is cancelled, the background goroutine // When the context is cancelled, the background goroutine
// terminates. This blocks any modification of the index. // terminates. This blocks any modification of the index.
func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-chan EachByPackResult { func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-chan EachByPackResult {
@ -300,7 +303,7 @@ func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-
m := &idx.byType[typ] m := &idx.byType[typ]
m.foreach(func(e *indexEntry) bool { m.foreach(func(e *indexEntry) bool {
packID := idx.packs[e.packIndex] packID := idx.packs[e.packIndex]
if !packBlacklist.Has(packID) { if !idx.final || !packBlacklist.Has(packID) {
byPack[packID] = append(byPack[packID], e) byPack[packID] = append(byPack[packID], e)
} }
return true return true