Merge pull request #2255 from Kidswiss/tar

Fix dumping issues with / and the first sub level
This commit is contained in:
MichaelEischer 2020-09-01 21:52:17 +02:00 committed by GitHub
commit 2ddb7ffb7e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 420 additions and 143 deletions

View file

@ -0,0 +1,9 @@
Bugfix: Fix tar issues when dumping `/`
We've fixed an issue with dumping either `/` or files on the first sublevel
e.g. `/foo` to tar. This also fixes tar dumping issues on Windows where this
issue could also happen.
https://github.com/restic/restic/issues/2254
https://github.com/restic/restic/issues/2357
https://github.com/restic/restic/pull/2255

View file

@ -1,19 +1,16 @@
package main package main
import ( import (
"archive/tar"
"context" "context"
"fmt" "fmt"
"io"
"os" "os"
"path" "path"
"path/filepath" "path/filepath"
"strings"
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/dump"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/walker"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
@ -22,8 +19,10 @@ var cmdDump = &cobra.Command{
Use: "dump [flags] snapshotID file", Use: "dump [flags] snapshotID file",
Short: "Print a backed-up file to stdout", Short: "Print a backed-up file to stdout",
Long: ` Long: `
The "dump" command extracts a single file from a snapshot from the repository and The "dump" command extracts files from a snapshot from the repository. If a
prints its contents to stdout. single file is selected, it prints its contents to stdout. Folders are output
as a tar file containing the contents of the specified folder. Pass "/" as
file name to dump the whole snapshot as a tar file.
The special snapshot "latest" can be used to use the latest snapshot in the The special snapshot "latest" can be used to use the latest snapshot in the
repository. repository.
@ -59,17 +58,14 @@ func init() {
func splitPath(p string) []string { func splitPath(p string) []string {
d, f := path.Split(p) d, f := path.Split(p)
if d == "" { if d == "" || d == "/" {
return []string{f} return []string{f}
} }
if d == "/" { s := splitPath(path.Join("/", d))
return []string{d}
}
s := splitPath(path.Clean(d))
return append(s, f) return append(s, f)
} }
func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repository, prefix string, pathComponents []string, pathToPrint string) error { func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repository, prefix string, pathComponents []string) error {
if tree == nil { if tree == nil {
return fmt.Errorf("called with a nil tree") return fmt.Errorf("called with a nil tree")
@ -81,24 +77,42 @@ func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repositor
if l == 0 { if l == 0 {
return fmt.Errorf("empty path components") return fmt.Errorf("empty path components")
} }
// If we print / we need to assume that there are multiple nodes at that
// level in the tree.
if pathComponents[0] == "" {
if err := checkStdoutTar(); err != nil {
return err
}
return dump.WriteTar(ctx, repo, tree, "/", os.Stdout)
}
item := filepath.Join(prefix, pathComponents[0]) item := filepath.Join(prefix, pathComponents[0])
for _, node := range tree.Nodes { for _, node := range tree.Nodes {
if node.Name == pathComponents[0] || pathComponents[0] == "/" { // If dumping something in the highest level it will just take the
// first item it finds and dump that according to the switch case below.
if node.Name == pathComponents[0] {
switch { switch {
case l == 1 && node.Type == "file": case l == 1 && dump.IsFile(node):
return getNodeData(ctx, os.Stdout, repo, node) return dump.GetNodeData(ctx, os.Stdout, repo, node)
case l > 1 && node.Type == "dir": case l > 1 && dump.IsDir(node):
subtree, err := repo.LoadTree(ctx, *node.Subtree) subtree, err := repo.LoadTree(ctx, *node.Subtree)
if err != nil { if err != nil {
return errors.Wrapf(err, "cannot load subtree for %q", item) return errors.Wrapf(err, "cannot load subtree for %q", item)
} }
return printFromTree(ctx, subtree, repo, item, pathComponents[1:], pathToPrint) return printFromTree(ctx, subtree, repo, item, pathComponents[1:])
case node.Type == "dir": case dump.IsDir(node):
node.Path = pathToPrint if err := checkStdoutTar(); err != nil {
return tarTree(ctx, repo, node, pathToPrint) return err
}
subtree, err := repo.LoadTree(ctx, *node.Subtree)
if err != nil {
return err
}
return dump.WriteTar(ctx, repo, subtree, item, os.Stdout)
case l > 1: case l > 1:
return fmt.Errorf("%q should be a dir, but is a %q", item, node.Type) return fmt.Errorf("%q should be a dir, but is a %q", item, node.Type)
case node.Type != "file": case !dump.IsFile(node):
return fmt.Errorf("%q should be a file, but is a %q", item, node.Type) return fmt.Errorf("%q should be a file, but is a %q", item, node.Type)
} }
} }
@ -162,7 +176,7 @@ func runDump(opts DumpOptions, gopts GlobalOptions, args []string) error {
Exitf(2, "loading tree for snapshot %q failed: %v", snapshotIDString, err) Exitf(2, "loading tree for snapshot %q failed: %v", snapshotIDString, err)
} }
err = printFromTree(ctx, tree, repo, "", splittedPath, pathToPrint) err = printFromTree(ctx, tree, repo, "/", splittedPath)
if err != nil { if err != nil {
Exitf(2, "cannot dump file: %v", err) Exitf(2, "cannot dump file: %v", err)
} }
@ -170,126 +184,9 @@ func runDump(opts DumpOptions, gopts GlobalOptions, args []string) error {
return nil return nil
} }
func getNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error { func checkStdoutTar() error {
var (
buf []byte
err error
)
for _, id := range node.Content {
buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf)
if err != nil {
return err
}
_, err = output.Write(buf)
if err != nil {
return errors.Wrap(err, "Write")
}
}
return nil
}
func tarTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node, rootPath string) error {
if stdoutIsTerminal() { if stdoutIsTerminal() {
return fmt.Errorf("stdout is the terminal, please redirect output") return fmt.Errorf("stdout is the terminal, please redirect output")
} }
return nil
tw := tar.NewWriter(os.Stdout)
defer tw.Close()
// If we want to dump "/" we'll need to add the name of the first node, too
// as it would get lost otherwise.
if rootNode.Path == "/" {
rootNode.Path = path.Join(rootNode.Path, rootNode.Name)
rootPath = rootNode.Path
}
// we know that rootNode is a folder and walker.Walk will already process
// the next node, so we have to tar this one first, too
if err := tarNode(ctx, tw, rootNode, repo); err != nil {
return err
}
err := walker.Walk(ctx, repo, *rootNode.Subtree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
if err != nil {
return false, err
}
if node == nil {
return false, nil
}
node.Path = path.Join(rootPath, nodepath)
if node.Type == "file" || node.Type == "symlink" || node.Type == "dir" {
err := tarNode(ctx, tw, node, repo)
if err != nil {
return false, err
}
}
return false, nil
})
return err
}
func tarNode(ctx context.Context, tw *tar.Writer, node *restic.Node, repo restic.Repository) error {
header := &tar.Header{
Name: node.Path,
Size: int64(node.Size),
Mode: int64(node.Mode),
Uid: int(node.UID),
Gid: int(node.GID),
ModTime: node.ModTime,
AccessTime: node.AccessTime,
ChangeTime: node.ChangeTime,
PAXRecords: parseXattrs(node.ExtendedAttributes),
}
if node.Type == "symlink" {
header.Typeflag = tar.TypeSymlink
header.Linkname = node.LinkTarget
}
if node.Type == "dir" {
header.Typeflag = tar.TypeDir
}
err := tw.WriteHeader(header)
if err != nil {
return errors.Wrap(err, "TarHeader ")
}
return getNodeData(ctx, tw, repo, node)
}
func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string {
tmpMap := make(map[string]string)
for _, attr := range xattrs {
attrString := string(attr.Value)
if strings.HasPrefix(attr.Name, "system.posix_acl_") {
na := acl{}
na.decode(attr.Value)
if na.String() != "" {
if strings.Contains(attr.Name, "system.posix_acl_access") {
tmpMap["SCHILY.acl.access"] = na.String()
} else if strings.Contains(attr.Name, "system.posix_acl_default") {
tmpMap["SCHILY.acl.default"] = na.String()
}
}
} else {
tmpMap["SCHILY.xattr."+attr.Name] = attrString
}
}
return tmpMap
} }

View file

@ -0,0 +1,27 @@
package main
import (
"testing"
rtest "github.com/restic/restic/internal/test"
)
func TestDumpSplitPath(t *testing.T) {
testPaths := []struct {
path string
result []string
}{
{"", []string{""}},
{"test", []string{"test"}},
{"test/dir", []string{"test", "dir"}},
{"test/dir/sub", []string{"test", "dir", "sub"}},
{"/", []string{""}},
{"/test", []string{"test"}},
{"/test/dir", []string{"test", "dir"}},
{"/test/dir/sub", []string{"test", "dir", "sub"}},
}
for _, path := range testPaths {
parts := splitPath(path.path)
rtest.Equals(t, path.result, parts)
}
}

View file

@ -1,4 +1,4 @@
package main package dump
// Adapted from https://github.com/maxymania/go-system/blob/master/posix_acl/posix_acl.go // Adapted from https://github.com/maxymania/go-system/blob/master/posix_acl/posix_acl.go

View file

@ -1,4 +1,4 @@
package main package dump
import ( import (
"reflect" "reflect"

164
internal/dump/tar.go Normal file
View file

@ -0,0 +1,164 @@
package dump
import (
"archive/tar"
"context"
"io"
"path"
"path/filepath"
"strings"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/walker"
)
// WriteTar will write the contents of the given tree, encoded as a tar to the given destination.
// It will loop over all nodes in the tree and dump them recursively.
func WriteTar(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error {
tw := tar.NewWriter(dst)
for _, rootNode := range tree.Nodes {
rootNode.Path = rootPath
err := tarTree(ctx, repo, rootNode, rootPath, tw)
if err != nil {
_ = tw.Close()
return err
}
}
return tw.Close()
}
func tarTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node, rootPath string, tw *tar.Writer) error {
rootNode.Path = path.Join(rootNode.Path, rootNode.Name)
rootPath = rootNode.Path
if err := tarNode(ctx, tw, rootNode, repo); err != nil {
return err
}
// If this is no directory we are finished
if !IsDir(rootNode) {
return nil
}
err := walker.Walk(ctx, repo, *rootNode.Subtree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
if err != nil {
return false, err
}
if node == nil {
return false, nil
}
node.Path = path.Join(rootPath, nodepath)
if IsFile(node) || IsLink(node) || IsDir(node) {
err := tarNode(ctx, tw, node, repo)
if err != nil {
return false, err
}
}
return false, nil
})
return err
}
func tarNode(ctx context.Context, tw *tar.Writer, node *restic.Node, repo restic.Repository) error {
relPath, err := filepath.Rel("/", node.Path)
if err != nil {
return err
}
header := &tar.Header{
Name: filepath.ToSlash(relPath),
Size: int64(node.Size),
Mode: int64(node.Mode),
Uid: int(node.UID),
Gid: int(node.GID),
ModTime: node.ModTime,
AccessTime: node.AccessTime,
ChangeTime: node.ChangeTime,
PAXRecords: parseXattrs(node.ExtendedAttributes),
}
if IsLink(node) {
header.Typeflag = tar.TypeSymlink
header.Linkname = node.LinkTarget
}
if IsDir(node) {
header.Typeflag = tar.TypeDir
}
err = tw.WriteHeader(header)
if err != nil {
return errors.Wrap(err, "TarHeader ")
}
return GetNodeData(ctx, tw, repo, node)
}
func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string {
tmpMap := make(map[string]string)
for _, attr := range xattrs {
attrString := string(attr.Value)
if strings.HasPrefix(attr.Name, "system.posix_acl_") {
na := acl{}
na.decode(attr.Value)
if na.String() != "" {
if strings.Contains(attr.Name, "system.posix_acl_access") {
tmpMap["SCHILY.acl.access"] = na.String()
} else if strings.Contains(attr.Name, "system.posix_acl_default") {
tmpMap["SCHILY.acl.default"] = na.String()
}
}
} else {
tmpMap["SCHILY.xattr."+attr.Name] = attrString
}
}
return tmpMap
}
// GetNodeData will write the contents of the node to the given output
func GetNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error {
var (
buf []byte
err error
)
for _, id := range node.Content {
buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf)
if err != nil {
return err
}
_, err = output.Write(buf)
if err != nil {
return errors.Wrap(err, "Write")
}
}
return nil
}
// IsDir checks if the given node is a directory
func IsDir(node *restic.Node) bool {
return node.Type == "dir"
}
// IsLink checks if the given node as a link
func IsLink(node *restic.Node) bool {
return node.Type == "symlink"
}
// IsFile checks if the given node is a file
func IsFile(node *restic.Node) bool {
return node.Type == "file"
}

180
internal/dump/tar_test.go Normal file
View file

@ -0,0 +1,180 @@
package dump
import (
"archive/tar"
"bytes"
"context"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"testing"
"time"
"github.com/restic/restic/internal/archiver"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func prepareTempdirRepoSrc(t testing.TB, src archiver.TestDir) (tempdir string, repo restic.Repository, cleanup func()) {
tempdir, removeTempdir := rtest.TempDir(t)
repo, removeRepository := repository.TestRepository(t)
archiver.TestCreateFiles(t, tempdir, src)
cleanup = func() {
removeRepository()
removeTempdir()
}
return tempdir, repo, cleanup
}
func TestWriteTar(t *testing.T) {
tests := []struct {
name string
args archiver.TestDir
target string
}{
{
name: "single file in root",
args: archiver.TestDir{
"file": archiver.TestFile{Content: "string"},
},
target: "/",
},
{
name: "multiple files in root",
args: archiver.TestDir{
"file1": archiver.TestFile{Content: "string"},
"file2": archiver.TestFile{Content: "string"},
},
target: "/",
},
{
name: "multiple files and folders in root",
args: archiver.TestDir{
"file1": archiver.TestFile{Content: "string"},
"file2": archiver.TestFile{Content: "string"},
"firstDir": archiver.TestDir{
"another": archiver.TestFile{Content: "string"},
},
"secondDir": archiver.TestDir{
"another2": archiver.TestFile{Content: "string"},
},
},
target: "/",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
tmpdir, repo, cleanup := prepareTempdirRepoSrc(t, tt.args)
defer cleanup()
arch := archiver.New(repo, fs.Track{FS: fs.Local{}}, archiver.Options{})
back := fs.TestChdir(t, tmpdir)
defer back()
sn, _, err := arch.Snapshot(ctx, []string{"."}, archiver.SnapshotOptions{})
rtest.OK(t, err)
tree, err := repo.LoadTree(ctx, *sn.Tree)
rtest.OK(t, err)
dst := &bytes.Buffer{}
if err := WriteTar(ctx, repo, tree, tt.target, dst); err != nil {
t.Fatalf("WriteTar() error = %v", err)
}
if err := checkTar(t, tmpdir, dst); err != nil {
t.Errorf("WriteTar() = tar does not match: %v", err)
}
})
}
}
func checkTar(t *testing.T, testDir string, srcTar *bytes.Buffer) error {
tr := tar.NewReader(srcTar)
fileNumber := 0
tarFiles := 0
err := filepath.Walk(testDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.Name() != filepath.Base(testDir) {
fileNumber++
}
return nil
})
if err != nil {
return err
}
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
t.Fatal(err)
}
matchPath := filepath.Join(testDir, hdr.Name)
match, err := os.Stat(matchPath)
if err != nil {
return err
}
// check metadata, tar header contains time rounded to seconds
fileTime := match.ModTime().Round(time.Second)
tarTime := hdr.ModTime
if !fileTime.Equal(tarTime) {
return fmt.Errorf("modTime does not match, got: %s, want: %s", fileTime, tarTime)
}
if hdr.Typeflag == tar.TypeDir {
// this is a folder
if hdr.Name == "." {
// we don't need to check the root folder
continue
}
filebase := filepath.ToSlash(match.Name())
if filepath.Base(hdr.Name) != filebase {
return fmt.Errorf("foldernames don't match got %v want %v", filepath.Base(hdr.Name), filebase)
}
} else {
if match.Size() != hdr.Size {
return fmt.Errorf("size does not match got %v want %v", hdr.Size, match.Size())
}
contentsFile, err := ioutil.ReadFile(matchPath)
if err != nil {
t.Fatal(err)
}
contentsTar := &bytes.Buffer{}
_, err = io.Copy(contentsTar, tr)
if err != nil {
t.Fatal(err)
}
if contentsTar.String() != string(contentsFile) {
return fmt.Errorf("contents does not match, got %s want %s", contentsTar, contentsFile)
}
}
tarFiles++
}
if tarFiles != fileNumber {
return fmt.Errorf("not the same amount of files got %v want %v", tarFiles, fileNumber)
}
return nil
}