forked from TrueCloudLab/restic
stats: Add debug mode to collect repository statistics
This commit is contained in:
parent
170e495334
commit
325fa916b5
2 changed files with 216 additions and 0 deletions
|
@ -5,11 +5,15 @@ import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/restic/chunker"
|
||||||
"github.com/restic/restic/internal/backend"
|
"github.com/restic/restic/internal/backend"
|
||||||
"github.com/restic/restic/internal/crypto"
|
"github.com/restic/restic/internal/crypto"
|
||||||
|
"github.com/restic/restic/internal/repository"
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
"github.com/restic/restic/internal/ui"
|
"github.com/restic/restic/internal/ui"
|
||||||
|
"github.com/restic/restic/internal/ui/table"
|
||||||
"github.com/restic/restic/internal/walker"
|
"github.com/restic/restic/internal/walker"
|
||||||
|
|
||||||
"github.com/minio/sha256-simd"
|
"github.com/minio/sha256-simd"
|
||||||
|
@ -99,6 +103,10 @@ func runStats(ctx context.Context, opts StatsOptions, gopts GlobalOptions, args
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if opts.countMode == countModeDebug {
|
||||||
|
return statsDebug(ctx, repo)
|
||||||
|
}
|
||||||
|
|
||||||
if !gopts.JSON {
|
if !gopts.JSON {
|
||||||
Printf("scanning...\n")
|
Printf("scanning...\n")
|
||||||
}
|
}
|
||||||
|
@ -291,6 +299,7 @@ func verifyStatsInput(opts StatsOptions) error {
|
||||||
case countModeUniqueFilesByContents:
|
case countModeUniqueFilesByContents:
|
||||||
case countModeBlobsPerFile:
|
case countModeBlobsPerFile:
|
||||||
case countModeRawData:
|
case countModeRawData:
|
||||||
|
case countModeDebug:
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("unknown counting mode: %s (use the -h flag to get a list of supported modes)", opts.countMode)
|
return fmt.Errorf("unknown counting mode: %s (use the -h flag to get a list of supported modes)", opts.countMode)
|
||||||
}
|
}
|
||||||
|
@ -335,4 +344,149 @@ const (
|
||||||
countModeUniqueFilesByContents = "files-by-contents"
|
countModeUniqueFilesByContents = "files-by-contents"
|
||||||
countModeBlobsPerFile = "blobs-per-file"
|
countModeBlobsPerFile = "blobs-per-file"
|
||||||
countModeRawData = "raw-data"
|
countModeRawData = "raw-data"
|
||||||
|
countModeDebug = "debug"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func statsDebug(ctx context.Context, repo restic.Repository) error {
|
||||||
|
Warnf("Collecting size statistics\n\n")
|
||||||
|
for _, t := range []restic.FileType{restic.KeyFile, restic.LockFile, restic.IndexFile, restic.PackFile} {
|
||||||
|
hist, err := statsDebugFileType(ctx, repo, t)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
Warnf("File Type: %v\n%v\n", t, hist)
|
||||||
|
}
|
||||||
|
|
||||||
|
hist := statsDebugBlobs(ctx, repo)
|
||||||
|
for _, t := range []restic.BlobType{restic.DataBlob, restic.TreeBlob} {
|
||||||
|
Warnf("Blob Type: %v\n%v\n\n", t, hist[t])
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func statsDebugFileType(ctx context.Context, repo restic.Repository, tpe restic.FileType) (*sizeHistogram, error) {
|
||||||
|
hist := newSizeHistogram(2 * repository.MaxPackSize)
|
||||||
|
err := repo.List(ctx, tpe, func(id restic.ID, size int64) error {
|
||||||
|
hist.Add(uint64(size))
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
return hist, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func statsDebugBlobs(ctx context.Context, repo restic.Repository) [restic.NumBlobTypes]*sizeHistogram {
|
||||||
|
var hist [restic.NumBlobTypes]*sizeHistogram
|
||||||
|
for i := 0; i < len(hist); i++ {
|
||||||
|
hist[i] = newSizeHistogram(2 * chunker.MaxSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
repo.Index().Each(ctx, func(pb restic.PackedBlob) {
|
||||||
|
hist[pb.Type].Add(uint64(pb.Length))
|
||||||
|
})
|
||||||
|
|
||||||
|
return hist
|
||||||
|
}
|
||||||
|
|
||||||
|
type sizeClass struct {
|
||||||
|
lower, upper uint64
|
||||||
|
count int64
|
||||||
|
}
|
||||||
|
|
||||||
|
type sizeHistogram struct {
|
||||||
|
count int64
|
||||||
|
totalSize uint64
|
||||||
|
buckets []sizeClass
|
||||||
|
oversized []uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
func newSizeHistogram(sizeLimit uint64) *sizeHistogram {
|
||||||
|
h := &sizeHistogram{}
|
||||||
|
h.buckets = append(h.buckets, sizeClass{0, 0, 0})
|
||||||
|
|
||||||
|
lowerBound := uint64(1)
|
||||||
|
growthFactor := uint64(10)
|
||||||
|
|
||||||
|
for lowerBound < sizeLimit {
|
||||||
|
upperBound := lowerBound*growthFactor - 1
|
||||||
|
if upperBound > sizeLimit {
|
||||||
|
upperBound = sizeLimit
|
||||||
|
}
|
||||||
|
h.buckets = append(h.buckets, sizeClass{lowerBound, upperBound, 0})
|
||||||
|
lowerBound *= growthFactor
|
||||||
|
}
|
||||||
|
|
||||||
|
return h
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sizeHistogram) Add(size uint64) {
|
||||||
|
s.count++
|
||||||
|
s.totalSize += size
|
||||||
|
|
||||||
|
for i, bucket := range s.buckets {
|
||||||
|
if size >= bucket.lower && size <= bucket.upper {
|
||||||
|
s.buckets[i].count++
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.oversized = append(s.oversized, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s sizeHistogram) String() string {
|
||||||
|
var out strings.Builder
|
||||||
|
|
||||||
|
out.WriteString(fmt.Sprintf("Count: %d\n", s.count))
|
||||||
|
out.WriteString(fmt.Sprintf("Total Size: %s\n", ui.FormatBytes(s.totalSize)))
|
||||||
|
|
||||||
|
t := table.New()
|
||||||
|
t.AddColumn("Size", "{{.SizeRange}}")
|
||||||
|
t.AddColumn("Count", "{{.Count}}")
|
||||||
|
type line struct {
|
||||||
|
SizeRange string
|
||||||
|
Count int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// only print up to the highest used bucket size
|
||||||
|
lastFilledIdx := 0
|
||||||
|
for i := 0; i < len(s.buckets); i++ {
|
||||||
|
if s.buckets[i].count != 0 {
|
||||||
|
lastFilledIdx = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var lines []line
|
||||||
|
hasStarted := false
|
||||||
|
for i, b := range s.buckets {
|
||||||
|
if i > lastFilledIdx {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.count > 0 {
|
||||||
|
hasStarted = true
|
||||||
|
}
|
||||||
|
if hasStarted {
|
||||||
|
lines = append(lines, line{
|
||||||
|
SizeRange: fmt.Sprintf("%d - %d Byte", b.lower, b.upper),
|
||||||
|
Count: b.count,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
longestRange := 0
|
||||||
|
for _, l := range lines {
|
||||||
|
if longestRange < len(l.SizeRange) {
|
||||||
|
longestRange = len(l.SizeRange)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i := range lines {
|
||||||
|
lines[i].SizeRange = strings.Repeat(" ", longestRange-len(lines[i].SizeRange)) + lines[i].SizeRange
|
||||||
|
t.AddRow(lines[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = t.Write(&out)
|
||||||
|
|
||||||
|
if len(s.oversized) > 0 {
|
||||||
|
out.WriteString(fmt.Sprintf("Oversized: %v\n", s.oversized))
|
||||||
|
}
|
||||||
|
return out.String()
|
||||||
|
}
|
||||||
|
|
62
cmd/restic/cmd_stats_test.go
Normal file
62
cmd/restic/cmd_stats_test.go
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
rtest "github.com/restic/restic/internal/test"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSizeHistogramNew(t *testing.T) {
|
||||||
|
h := newSizeHistogram(42)
|
||||||
|
|
||||||
|
exp := &sizeHistogram{
|
||||||
|
count: 0,
|
||||||
|
totalSize: 0,
|
||||||
|
buckets: []sizeClass{
|
||||||
|
{0, 0, 0},
|
||||||
|
{1, 9, 0},
|
||||||
|
{10, 42, 0},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
rtest.Equals(t, exp, h)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSizeHistogramAdd(t *testing.T) {
|
||||||
|
h := newSizeHistogram(42)
|
||||||
|
for i := uint64(0); i < 45; i++ {
|
||||||
|
h.Add(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
exp := &sizeHistogram{
|
||||||
|
count: 45,
|
||||||
|
totalSize: 990,
|
||||||
|
buckets: []sizeClass{
|
||||||
|
{0, 0, 1},
|
||||||
|
{1, 9, 9},
|
||||||
|
{10, 42, 33},
|
||||||
|
},
|
||||||
|
oversized: []uint64{43, 44},
|
||||||
|
}
|
||||||
|
|
||||||
|
rtest.Equals(t, exp, h)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSizeHistogramString(t *testing.T) {
|
||||||
|
t.Run("overflow", func(t *testing.T) {
|
||||||
|
h := newSizeHistogram(42)
|
||||||
|
h.Add(8)
|
||||||
|
h.Add(50)
|
||||||
|
|
||||||
|
rtest.Equals(t, "Count: 2\nTotal Size: 58 B\nSize Count\n-----------------\n1 - 9 Byte 1\n-----------------\nOversized: [50]\n", h.String())
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("withZero", func(t *testing.T) {
|
||||||
|
h := newSizeHistogram(42)
|
||||||
|
h.Add(0)
|
||||||
|
h.Add(1)
|
||||||
|
h.Add(10)
|
||||||
|
|
||||||
|
rtest.Equals(t, "Count: 3\nTotal Size: 11 B\nSize Count\n-------------------\n 0 - 0 Byte 1\n 1 - 9 Byte 1\n10 - 42 Byte 1\n-------------------\n", h.String())
|
||||||
|
})
|
||||||
|
}
|
Loading…
Reference in a new issue