forked from TrueCloudLab/restic
Merge pull request #3548 from gum3ng/issue_3490
Support for specifying amount of data in read-data-subset
This commit is contained in:
commit
6c84ea1412
4 changed files with 98 additions and 9 deletions
9
changelog/unreleased/issue-3490
Normal file
9
changelog/unreleased/issue-3490
Normal file
|
@ -0,0 +1,9 @@
|
|||
Enhancement: Support for specifying file size in `check --read-data-subset`
|
||||
|
||||
To check a subset of repository files, the `check --read-data-subset` command
|
||||
used to support two ways to select a subset - A specific range of pack files,
|
||||
or random percentage of pack files. We have added a third method to select pack
|
||||
files - By specifying file size. This new option is available with the 'restic check' command.
|
||||
|
||||
https://github.com/restic/restic/issues/3490
|
||||
https://github.com/restic/restic/pull/3548
|
|
@ -65,7 +65,7 @@ func checkFlags(opts CheckOptions) error {
|
|||
}
|
||||
if opts.ReadDataSubset != "" {
|
||||
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
|
||||
argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%%")
|
||||
argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage or a file size, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%% or --read-data-subset=10G")
|
||||
if err == nil {
|
||||
if len(dataSubset) != 2 {
|
||||
return argumentError
|
||||
|
@ -76,7 +76,7 @@ func checkFlags(opts CheckOptions) error {
|
|||
if dataSubset[1] > totalBucketsMax {
|
||||
return errors.Fatalf("check flag --read-data-subset=n/t t must be at most %d", totalBucketsMax)
|
||||
}
|
||||
} else {
|
||||
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
|
||||
percentage, err := parsePercentage(opts.ReadDataSubset)
|
||||
if err != nil {
|
||||
return argumentError
|
||||
|
@ -86,6 +86,17 @@ func checkFlags(opts CheckOptions) error {
|
|||
return errors.Fatal(
|
||||
"check flag --read-data-subset=n% n must be above 0.0% and at most 100.0%")
|
||||
}
|
||||
|
||||
} else {
|
||||
fileSize, err := parseSizeStr(opts.ReadDataSubset)
|
||||
if err != nil {
|
||||
return argumentError
|
||||
}
|
||||
if fileSize <= 0.0 {
|
||||
return errors.Fatal(
|
||||
"check flag --read-data-subset=n n must be above 0.0")
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -294,11 +305,28 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
|||
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
|
||||
packCount := uint64(len(packs))
|
||||
Verbosef("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
|
||||
} else {
|
||||
percentage, _ := parsePercentage(opts.ReadDataSubset)
|
||||
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
|
||||
percentage, err := parsePercentage(opts.ReadDataSubset)
|
||||
if err == nil {
|
||||
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
|
||||
Verbosef("read %.1f%% of data packs\n", percentage)
|
||||
}
|
||||
} else {
|
||||
repoSize := int64(0)
|
||||
allPacks := chkr.GetPacks()
|
||||
for _, size := range allPacks {
|
||||
repoSize += size
|
||||
}
|
||||
if repoSize == 0 {
|
||||
return errors.Fatal("Cannot read from a repository having size 0")
|
||||
}
|
||||
subsetSize, _ := parseSizeStr(opts.ReadDataSubset)
|
||||
if subsetSize > repoSize {
|
||||
subsetSize = repoSize
|
||||
}
|
||||
packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
|
||||
Verbosef("read %d bytes of data packs\n", subsetSize)
|
||||
}
|
||||
if packs == nil {
|
||||
return errors.Fatal("internal error: failed to select packs to check")
|
||||
}
|
||||
|
@ -349,6 +377,11 @@ func selectRandomPacksByPercentage(allPacks map[restic.ID]int64, percentage floa
|
|||
id := keys[idx[i]]
|
||||
packs[id] = allPacks[id]
|
||||
}
|
||||
|
||||
return packs
|
||||
}
|
||||
|
||||
func selectRandomPacksByFileSize(allPacks map[restic.ID]int64, subsetSize int64, repoSize int64) map[restic.ID]int64 {
|
||||
subsetPercentage := (float64(subsetSize) / float64(repoSize)) * 100.0
|
||||
packs := selectRandomPacksByPercentage(allPacks, subsetPercentage)
|
||||
return packs
|
||||
}
|
||||
|
|
|
@ -129,3 +129,37 @@ func TestSelectNoRandomPacksByPercentage(t *testing.T) {
|
|||
selectedPacks := selectRandomPacksByPercentage(testPacks, 10.0)
|
||||
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
|
||||
}
|
||||
|
||||
func TestSelectRandomPacksByFileSize(t *testing.T) {
|
||||
var testPacks = make(map[restic.ID]int64)
|
||||
for i := 1; i <= 10; i++ {
|
||||
id := restic.NewRandomID()
|
||||
// ensure unique ids
|
||||
id[0] = byte(i)
|
||||
testPacks[id] = 0
|
||||
}
|
||||
|
||||
selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500)
|
||||
rtest.Assert(t, len(selectedPacks) == 1, "Expected 1 selected packs")
|
||||
|
||||
selectedPacks = selectRandomPacksByFileSize(testPacks, 10240, 51200)
|
||||
rtest.Assert(t, len(selectedPacks) == 2, "Expected 2 selected packs")
|
||||
for pack := range selectedPacks {
|
||||
_, ok := testPacks[pack]
|
||||
rtest.Assert(t, ok, "Unexpected selection")
|
||||
}
|
||||
|
||||
selectedPacks = selectRandomPacksByFileSize(testPacks, 500, 500)
|
||||
rtest.Assert(t, len(selectedPacks) == 10, "Expected 10 selected packs")
|
||||
for pack := range selectedPacks {
|
||||
_, ok := testPacks[pack]
|
||||
rtest.Assert(t, ok, "Unexpected item in selection")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectNoRandomPacksByFileSize(t *testing.T) {
|
||||
// that the a repository without pack files works
|
||||
var testPacks = make(map[restic.ID]int64)
|
||||
selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500)
|
||||
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
|
||||
}
|
||||
|
|
|
@ -249,9 +249,9 @@ integrity of the pack files in the repository, use the ``--read-data`` flag:
|
|||
and also that it takes more time than the default ``check``.
|
||||
|
||||
Alternatively, use the ``--read-data-subset`` parameter to check only a
|
||||
subset of the repository pack files at a time. It supports two ways to select a
|
||||
subset. One selects a specific range of pack files, the other selects a random
|
||||
percentage of pack files.
|
||||
subset of the repository pack files at a time. It supports three ways to select a
|
||||
subset. One selects a specific range of pack files, the second selects a random
|
||||
percentage of pack files, and the third selects pack files of the specified size.
|
||||
|
||||
Use ``--read-data-subset=n/t`` to check only a subset of the repository pack
|
||||
files at a time. The parameter takes two values, ``n`` and ``t``. When the check
|
||||
|
@ -285,3 +285,16 @@ integer:
|
|||
.. code-block:: console
|
||||
|
||||
$ restic -r /srv/restic-repo check --read-data-subset=10%
|
||||
|
||||
Use ``--read-data-subset=NS`` to check a randomly chosen subset of the repository pack files.
|
||||
It takes one parameter, ``NS``, where 'N' is a whole number representing file size and 'S' is the unit
|
||||
of file size (B/K/M/G/T) of pack files to check. Behind the scenes, the specified size will be converted
|
||||
to percentage of the total repository size. The behaviour of the check command following this conversion
|
||||
will be the same as the percentage option above. For a file size value the following command may be used:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ restic -r /srv/restic-repo check --read-data-subset=50M
|
||||
$ restic -r /srv/restic-repo check --read-data-subset=10G
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue