forked from TrueCloudLab/restic
Reimplement rebuild-index
This commit is contained in:
parent
187c8fb259
commit
30b6a0878a
3 changed files with 110 additions and 55 deletions
|
@ -1,10 +1,8 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"github.com/restic/restic/internal/pack"
|
||||||
|
"github.com/restic/restic/internal/repository"
|
||||||
"github.com/restic/restic/internal/errors"
|
|
||||||
"github.com/restic/restic/internal/index"
|
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
|
@ -12,7 +10,7 @@ import (
|
||||||
|
|
||||||
var cmdRebuildIndex = &cobra.Command{
|
var cmdRebuildIndex = &cobra.Command{
|
||||||
Use: "rebuild-index [flags]",
|
Use: "rebuild-index [flags]",
|
||||||
Short: "Build a new index file",
|
Short: "Build a new index",
|
||||||
Long: `
|
Long: `
|
||||||
The "rebuild-index" command creates a new index based on the pack files in the
|
The "rebuild-index" command creates a new index based on the pack files in the
|
||||||
repository.
|
repository.
|
||||||
|
@ -24,15 +22,25 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
|
||||||
`,
|
`,
|
||||||
DisableAutoGenTag: true,
|
DisableAutoGenTag: true,
|
||||||
RunE: func(cmd *cobra.Command, args []string) error {
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
return runRebuildIndex(globalOptions)
|
return runRebuildIndex(rebuildIndexOptions, globalOptions)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
// RebuildIndexOptions collects all options for the rebuild-index command.
|
||||||
cmdRoot.AddCommand(cmdRebuildIndex)
|
type RebuildIndexOptions struct {
|
||||||
|
ReadAllPacks bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func runRebuildIndex(gopts GlobalOptions) error {
|
var rebuildIndexOptions RebuildIndexOptions
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
cmdRoot.AddCommand(cmdRebuildIndex)
|
||||||
|
f := cmdRebuildIndex.Flags()
|
||||||
|
f.BoolVar(&rebuildIndexOptions.ReadAllPacks, "read-all-packs", false, "read all pack files to generate new index from scratch")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func runRebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions) error {
|
||||||
repo, err := OpenRepository(gopts)
|
repo, err := OpenRepository(gopts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -44,59 +52,100 @@ func runRebuildIndex(gopts GlobalOptions) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(gopts.ctx)
|
return rebuildIndex(opts, gopts, repo, restic.NewIDSet())
|
||||||
defer cancel()
|
|
||||||
return rebuildIndex(ctx, repo, restic.NewIDSet())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func rebuildIndex(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet) error {
|
func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repository.Repository, ignorePacks restic.IDSet) error {
|
||||||
Verbosef("counting files in repo\n")
|
ctx := gopts.ctx
|
||||||
|
|
||||||
var packs uint64
|
var obsolete restic.IDs
|
||||||
err := repo.List(ctx, restic.PackFile, func(restic.ID, int64) error {
|
packSizeFromList := make(map[restic.ID]int64)
|
||||||
packs++
|
packs := restic.NewIDSet()
|
||||||
return nil
|
totalPacks := 0
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
bar := newProgressMax(!globalOptions.Quiet, packs-uint64(len(ignorePacks)), "packs")
|
if opts.ReadAllPacks {
|
||||||
idx, invalidFiles, err := index.New(ctx, repo, ignorePacks, bar)
|
// get old index files
|
||||||
bar.Done()
|
err := repo.List(ctx, restic.IndexFile, func(id restic.ID, size int64) error {
|
||||||
if err != nil {
|
obsolete = append(obsolete, id)
|
||||||
return err
|
return nil
|
||||||
}
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if globalOptions.verbosity >= 2 {
|
Verbosef("finding pack files in repo...\n")
|
||||||
for _, id := range invalidFiles {
|
err = repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error {
|
||||||
Printf("skipped incomplete pack file: %v\n", id)
|
packSizeFromList[id] = size
|
||||||
|
packs.Insert(id)
|
||||||
|
totalPacks++
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Verbosef("loading indexes...\n")
|
||||||
|
err := repo.LoadIndex(gopts.ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
packSizeFromIndex := make(map[restic.ID]int64)
|
||||||
|
|
||||||
|
Verbosef("getting pack files to read...\n")
|
||||||
|
// iterate over all blobs in index
|
||||||
|
for blob := range repo.Index().Each(ctx) {
|
||||||
|
size, ok := packSizeFromIndex[blob.PackID]
|
||||||
|
if !ok {
|
||||||
|
size = pack.HeaderSize
|
||||||
|
}
|
||||||
|
size += int64(pack.PackedSizeOfBlob(blob.Length))
|
||||||
|
// update packSizeFromIndex
|
||||||
|
packSizeFromIndex[blob.PackID] = size
|
||||||
|
}
|
||||||
|
|
||||||
|
err = repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
|
||||||
|
size, ok := packSizeFromIndex[id]
|
||||||
|
if !ok || size != packSize {
|
||||||
|
// Pack was not referenced in index or size does not match
|
||||||
|
packSizeFromList[id] = size
|
||||||
|
packs.Insert(id)
|
||||||
|
}
|
||||||
|
totalPacks++
|
||||||
|
delete(packSizeFromIndex, id)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for id := range packSizeFromIndex {
|
||||||
|
// ignore pack files that are referenced in the index but do not exist
|
||||||
|
// when rebuilding the index
|
||||||
|
packs.Insert(id)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Verbosef("finding old index files\n")
|
if len(packSizeFromList) > 0 {
|
||||||
|
Verbosef("reading pack files\n")
|
||||||
|
bar := newProgressMax(!globalOptions.Quiet, uint64(len(packSizeFromList)), "packs")
|
||||||
|
invalidFiles, err := repo.LoadIndexFromPacks(ctx, packSizeFromList, bar)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
var supersedes restic.IDs
|
for _, id := range invalidFiles {
|
||||||
err = repo.List(ctx, restic.IndexFile, func(id restic.ID, size int64) error {
|
Verboseff("skipped incomplete pack file: %v\n", id)
|
||||||
supersedes = append(supersedes, id)
|
totalPacks--
|
||||||
return nil
|
}
|
||||||
})
|
} else {
|
||||||
|
Verbosef("no need to read any pack file\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
err := rebuildIndexFiles(gopts, repo, packs, obsolete, uint64(totalPacks))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
Verbosef("done\n")
|
||||||
ids, err := idx.Save(ctx, repo, supersedes)
|
|
||||||
if err != nil {
|
|
||||||
return errors.Fatalf("unable to save index, last error was: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
Verbosef("saved new indexes as %v\n", ids)
|
|
||||||
|
|
||||||
Verbosef("remove %d old index files\n", len(supersedes))
|
|
||||||
err = DeleteFilesChecked(globalOptions, repo, restic.NewIDSet(supersedes...), restic.IndexFile)
|
|
||||||
if err != nil {
|
|
||||||
return errors.Fatalf("unable to remove an old index: %v\n", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -175,7 +175,7 @@ func testRunRebuildIndex(t testing.TB, gopts GlobalOptions) {
|
||||||
globalOptions.stdout = os.Stdout
|
globalOptions.stdout = os.Stdout
|
||||||
}()
|
}()
|
||||||
|
|
||||||
rtest.OK(t, runRebuildIndex(gopts))
|
rtest.OK(t, runRebuildIndex(RebuildIndexOptions{}, gopts))
|
||||||
}
|
}
|
||||||
|
|
||||||
func testRunLs(t testing.TB, gopts GlobalOptions, snapshotID string) []string {
|
func testRunLs(t testing.TB, gopts GlobalOptions, snapshotID string) []string {
|
||||||
|
@ -1351,7 +1351,7 @@ func TestRebuildIndexFailsOnAppendOnly(t *testing.T) {
|
||||||
env.gopts.backendTestHook = func(r restic.Backend) (restic.Backend, error) {
|
env.gopts.backendTestHook = func(r restic.Backend) (restic.Backend, error) {
|
||||||
return &appendOnlyBackend{r}, nil
|
return &appendOnlyBackend{r}, nil
|
||||||
}
|
}
|
||||||
err := runRebuildIndex(env.gopts)
|
err := runRebuildIndex(RebuildIndexOptions{}, env.gopts)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Error("expected rebuildIndex to fail")
|
t.Error("expected rebuildIndex to fail")
|
||||||
}
|
}
|
||||||
|
@ -1583,7 +1583,7 @@ func (be *listOnceBackend) List(ctx context.Context, t restic.FileType, fn func(
|
||||||
return be.Backend.List(ctx, t, fn)
|
return be.Backend.List(ctx, t, fn)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPruneListOnce(t *testing.T) {
|
func TestListOnce(t *testing.T) {
|
||||||
env, cleanup := withTestEnvironment(t)
|
env, cleanup := withTestEnvironment(t)
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
|
||||||
|
@ -1613,6 +1613,9 @@ func TestPruneListOnce(t *testing.T) {
|
||||||
testRunForget(t, env.gopts, firstSnapshot[0].String())
|
testRunForget(t, env.gopts, firstSnapshot[0].String())
|
||||||
testRunPrune(t, env.gopts, pruneOpts)
|
testRunPrune(t, env.gopts, pruneOpts)
|
||||||
rtest.OK(t, runCheck(checkOpts, env.gopts, nil))
|
rtest.OK(t, runCheck(checkOpts, env.gopts, nil))
|
||||||
|
|
||||||
|
rtest.OK(t, runRebuildIndex(RebuildIndexOptions{}, env.gopts))
|
||||||
|
rtest.OK(t, runRebuildIndex(RebuildIndexOptions{ReadAllPacks: true}, env.gopts))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHardLink(t *testing.T) {
|
func TestHardLink(t *testing.T) {
|
||||||
|
|
|
@ -281,7 +281,10 @@ type EachByPackResult struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// EachByPack returns a channel that yields all blobs known to the index
|
// EachByPack returns a channel that yields all blobs known to the index
|
||||||
// grouped by packID but ignoring blobs with a packID in packPlacklist.
|
// grouped by packID but ignoring blobs with a packID in packPlacklist for
|
||||||
|
// finalized indexes.
|
||||||
|
// This filtering is used when rebuilding the index where we need to ignore packs
|
||||||
|
// from the finalized index which have been re-read into a non-finalized index.
|
||||||
// When the context is cancelled, the background goroutine
|
// When the context is cancelled, the background goroutine
|
||||||
// terminates. This blocks any modification of the index.
|
// terminates. This blocks any modification of the index.
|
||||||
func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-chan EachByPackResult {
|
func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-chan EachByPackResult {
|
||||||
|
@ -300,7 +303,7 @@ func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-
|
||||||
m := &idx.byType[typ]
|
m := &idx.byType[typ]
|
||||||
m.foreach(func(e *indexEntry) bool {
|
m.foreach(func(e *indexEntry) bool {
|
||||||
packID := idx.packs[e.packIndex]
|
packID := idx.packs[e.packIndex]
|
||||||
if !packBlacklist.Has(packID) {
|
if !idx.final || !packBlacklist.Has(packID) {
|
||||||
byPack[packID] = append(byPack[packID], e)
|
byPack[packID] = append(byPack[packID], e)
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
|
|
Loading…
Reference in a new issue