Merge pull request #2255 from Kidswiss/tar

Fix dumping issues with / and the first sub level
This commit is contained in:
MichaelEischer 2020-09-01 21:52:17 +02:00 committed by GitHub
commit 2ddb7ffb7e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 420 additions and 143 deletions

View file

@ -0,0 +1,9 @@
Bugfix: Fix tar issues when dumping `/`
We've fixed an issue with dumping either `/` or files on the first sublevel
e.g. `/foo` to tar. This also fixes tar dumping issues on Windows where this
issue could also happen.
https://github.com/restic/restic/issues/2254
https://github.com/restic/restic/issues/2357
https://github.com/restic/restic/pull/2255

View file

@ -1,19 +1,16 @@
package main
import (
"archive/tar"
"context"
"fmt"
"io"
"os"
"path"
"path/filepath"
"strings"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/dump"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/walker"
"github.com/spf13/cobra"
)
@ -22,8 +19,10 @@ var cmdDump = &cobra.Command{
Use: "dump [flags] snapshotID file",
Short: "Print a backed-up file to stdout",
Long: `
The "dump" command extracts a single file from a snapshot from the repository and
prints its contents to stdout.
The "dump" command extracts files from a snapshot from the repository. If a
single file is selected, it prints its contents to stdout. Folders are output
as a tar file containing the contents of the specified folder. Pass "/" as
file name to dump the whole snapshot as a tar file.
The special snapshot "latest" can be used to use the latest snapshot in the
repository.
@ -59,17 +58,14 @@ func init() {
func splitPath(p string) []string {
d, f := path.Split(p)
if d == "" {
if d == "" || d == "/" {
return []string{f}
}
if d == "/" {
return []string{d}
}
s := splitPath(path.Clean(d))
s := splitPath(path.Join("/", d))
return append(s, f)
}
func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repository, prefix string, pathComponents []string, pathToPrint string) error {
func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repository, prefix string, pathComponents []string) error {
if tree == nil {
return fmt.Errorf("called with a nil tree")
@ -81,24 +77,42 @@ func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repositor
if l == 0 {
return fmt.Errorf("empty path components")
}
// If we print / we need to assume that there are multiple nodes at that
// level in the tree.
if pathComponents[0] == "" {
if err := checkStdoutTar(); err != nil {
return err
}
return dump.WriteTar(ctx, repo, tree, "/", os.Stdout)
}
item := filepath.Join(prefix, pathComponents[0])
for _, node := range tree.Nodes {
if node.Name == pathComponents[0] || pathComponents[0] == "/" {
// If dumping something in the highest level it will just take the
// first item it finds and dump that according to the switch case below.
if node.Name == pathComponents[0] {
switch {
case l == 1 && node.Type == "file":
return getNodeData(ctx, os.Stdout, repo, node)
case l > 1 && node.Type == "dir":
case l == 1 && dump.IsFile(node):
return dump.GetNodeData(ctx, os.Stdout, repo, node)
case l > 1 && dump.IsDir(node):
subtree, err := repo.LoadTree(ctx, *node.Subtree)
if err != nil {
return errors.Wrapf(err, "cannot load subtree for %q", item)
}
return printFromTree(ctx, subtree, repo, item, pathComponents[1:], pathToPrint)
case node.Type == "dir":
node.Path = pathToPrint
return tarTree(ctx, repo, node, pathToPrint)
return printFromTree(ctx, subtree, repo, item, pathComponents[1:])
case dump.IsDir(node):
if err := checkStdoutTar(); err != nil {
return err
}
subtree, err := repo.LoadTree(ctx, *node.Subtree)
if err != nil {
return err
}
return dump.WriteTar(ctx, repo, subtree, item, os.Stdout)
case l > 1:
return fmt.Errorf("%q should be a dir, but is a %q", item, node.Type)
case node.Type != "file":
case !dump.IsFile(node):
return fmt.Errorf("%q should be a file, but is a %q", item, node.Type)
}
}
@ -162,7 +176,7 @@ func runDump(opts DumpOptions, gopts GlobalOptions, args []string) error {
Exitf(2, "loading tree for snapshot %q failed: %v", snapshotIDString, err)
}
err = printFromTree(ctx, tree, repo, "", splittedPath, pathToPrint)
err = printFromTree(ctx, tree, repo, "/", splittedPath)
if err != nil {
Exitf(2, "cannot dump file: %v", err)
}
@ -170,126 +184,9 @@ func runDump(opts DumpOptions, gopts GlobalOptions, args []string) error {
return nil
}
func getNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error {
var (
buf []byte
err error
)
for _, id := range node.Content {
buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf)
if err != nil {
return err
}
_, err = output.Write(buf)
if err != nil {
return errors.Wrap(err, "Write")
}
}
return nil
}
func tarTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node, rootPath string) error {
func checkStdoutTar() error {
if stdoutIsTerminal() {
return fmt.Errorf("stdout is the terminal, please redirect output")
}
tw := tar.NewWriter(os.Stdout)
defer tw.Close()
// If we want to dump "/" we'll need to add the name of the first node, too
// as it would get lost otherwise.
if rootNode.Path == "/" {
rootNode.Path = path.Join(rootNode.Path, rootNode.Name)
rootPath = rootNode.Path
}
// we know that rootNode is a folder and walker.Walk will already process
// the next node, so we have to tar this one first, too
if err := tarNode(ctx, tw, rootNode, repo); err != nil {
return err
}
err := walker.Walk(ctx, repo, *rootNode.Subtree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
if err != nil {
return false, err
}
if node == nil {
return false, nil
}
node.Path = path.Join(rootPath, nodepath)
if node.Type == "file" || node.Type == "symlink" || node.Type == "dir" {
err := tarNode(ctx, tw, node, repo)
if err != nil {
return false, err
}
}
return false, nil
})
return err
}
func tarNode(ctx context.Context, tw *tar.Writer, node *restic.Node, repo restic.Repository) error {
header := &tar.Header{
Name: node.Path,
Size: int64(node.Size),
Mode: int64(node.Mode),
Uid: int(node.UID),
Gid: int(node.GID),
ModTime: node.ModTime,
AccessTime: node.AccessTime,
ChangeTime: node.ChangeTime,
PAXRecords: parseXattrs(node.ExtendedAttributes),
}
if node.Type == "symlink" {
header.Typeflag = tar.TypeSymlink
header.Linkname = node.LinkTarget
}
if node.Type == "dir" {
header.Typeflag = tar.TypeDir
}
err := tw.WriteHeader(header)
if err != nil {
return errors.Wrap(err, "TarHeader ")
}
return getNodeData(ctx, tw, repo, node)
}
func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string {
tmpMap := make(map[string]string)
for _, attr := range xattrs {
attrString := string(attr.Value)
if strings.HasPrefix(attr.Name, "system.posix_acl_") {
na := acl{}
na.decode(attr.Value)
if na.String() != "" {
if strings.Contains(attr.Name, "system.posix_acl_access") {
tmpMap["SCHILY.acl.access"] = na.String()
} else if strings.Contains(attr.Name, "system.posix_acl_default") {
tmpMap["SCHILY.acl.default"] = na.String()
}
}
} else {
tmpMap["SCHILY.xattr."+attr.Name] = attrString
}
}
return tmpMap
return nil
}

View file

@ -0,0 +1,27 @@
package main
import (
"testing"
rtest "github.com/restic/restic/internal/test"
)
func TestDumpSplitPath(t *testing.T) {
testPaths := []struct {
path string
result []string
}{
{"", []string{""}},
{"test", []string{"test"}},
{"test/dir", []string{"test", "dir"}},
{"test/dir/sub", []string{"test", "dir", "sub"}},
{"/", []string{""}},
{"/test", []string{"test"}},
{"/test/dir", []string{"test", "dir"}},
{"/test/dir/sub", []string{"test", "dir", "sub"}},
}
for _, path := range testPaths {
parts := splitPath(path.path)
rtest.Equals(t, path.result, parts)
}
}

View file

@ -1,4 +1,4 @@
package main
package dump
// Adapted from https://github.com/maxymania/go-system/blob/master/posix_acl/posix_acl.go

View file

@ -1,4 +1,4 @@
package main
package dump
import (
"reflect"

164
internal/dump/tar.go Normal file
View file

@ -0,0 +1,164 @@
package dump
import (
"archive/tar"
"context"
"io"
"path"
"path/filepath"
"strings"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/walker"
)
// WriteTar will write the contents of the given tree, encoded as a tar to the given destination.
// It will loop over all nodes in the tree and dump them recursively.
func WriteTar(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error {
tw := tar.NewWriter(dst)
for _, rootNode := range tree.Nodes {
rootNode.Path = rootPath
err := tarTree(ctx, repo, rootNode, rootPath, tw)
if err != nil {
_ = tw.Close()
return err
}
}
return tw.Close()
}
func tarTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node, rootPath string, tw *tar.Writer) error {
rootNode.Path = path.Join(rootNode.Path, rootNode.Name)
rootPath = rootNode.Path
if err := tarNode(ctx, tw, rootNode, repo); err != nil {
return err
}
// If this is no directory we are finished
if !IsDir(rootNode) {
return nil
}
err := walker.Walk(ctx, repo, *rootNode.Subtree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
if err != nil {
return false, err
}
if node == nil {
return false, nil
}
node.Path = path.Join(rootPath, nodepath)
if IsFile(node) || IsLink(node) || IsDir(node) {
err := tarNode(ctx, tw, node, repo)
if err != nil {
return false, err
}
}
return false, nil
})
return err
}
func tarNode(ctx context.Context, tw *tar.Writer, node *restic.Node, repo restic.Repository) error {
relPath, err := filepath.Rel("/", node.Path)
if err != nil {
return err
}
header := &tar.Header{
Name: filepath.ToSlash(relPath),
Size: int64(node.Size),
Mode: int64(node.Mode),
Uid: int(node.UID),
Gid: int(node.GID),
ModTime: node.ModTime,
AccessTime: node.AccessTime,
ChangeTime: node.ChangeTime,
PAXRecords: parseXattrs(node.ExtendedAttributes),
}
if IsLink(node) {
header.Typeflag = tar.TypeSymlink
header.Linkname = node.LinkTarget
}
if IsDir(node) {
header.Typeflag = tar.TypeDir
}
err = tw.WriteHeader(header)
if err != nil {
return errors.Wrap(err, "TarHeader ")
}
return GetNodeData(ctx, tw, repo, node)
}
func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string {
tmpMap := make(map[string]string)
for _, attr := range xattrs {
attrString := string(attr.Value)
if strings.HasPrefix(attr.Name, "system.posix_acl_") {
na := acl{}
na.decode(attr.Value)
if na.String() != "" {
if strings.Contains(attr.Name, "system.posix_acl_access") {
tmpMap["SCHILY.acl.access"] = na.String()
} else if strings.Contains(attr.Name, "system.posix_acl_default") {
tmpMap["SCHILY.acl.default"] = na.String()
}
}
} else {
tmpMap["SCHILY.xattr."+attr.Name] = attrString
}
}
return tmpMap
}
// GetNodeData will write the contents of the node to the given output
func GetNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error {
var (
buf []byte
err error
)
for _, id := range node.Content {
buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf)
if err != nil {
return err
}
_, err = output.Write(buf)
if err != nil {
return errors.Wrap(err, "Write")
}
}
return nil
}
// IsDir checks if the given node is a directory
func IsDir(node *restic.Node) bool {
return node.Type == "dir"
}
// IsLink checks if the given node as a link
func IsLink(node *restic.Node) bool {
return node.Type == "symlink"
}
// IsFile checks if the given node is a file
func IsFile(node *restic.Node) bool {
return node.Type == "file"
}

180
internal/dump/tar_test.go Normal file
View file

@ -0,0 +1,180 @@
package dump
import (
"archive/tar"
"bytes"
"context"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"testing"
"time"
"github.com/restic/restic/internal/archiver"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func prepareTempdirRepoSrc(t testing.TB, src archiver.TestDir) (tempdir string, repo restic.Repository, cleanup func()) {
tempdir, removeTempdir := rtest.TempDir(t)
repo, removeRepository := repository.TestRepository(t)
archiver.TestCreateFiles(t, tempdir, src)
cleanup = func() {
removeRepository()
removeTempdir()
}
return tempdir, repo, cleanup
}
func TestWriteTar(t *testing.T) {
tests := []struct {
name string
args archiver.TestDir
target string
}{
{
name: "single file in root",
args: archiver.TestDir{
"file": archiver.TestFile{Content: "string"},
},
target: "/",
},
{
name: "multiple files in root",
args: archiver.TestDir{
"file1": archiver.TestFile{Content: "string"},
"file2": archiver.TestFile{Content: "string"},
},
target: "/",
},
{
name: "multiple files and folders in root",
args: archiver.TestDir{
"file1": archiver.TestFile{Content: "string"},
"file2": archiver.TestFile{Content: "string"},
"firstDir": archiver.TestDir{
"another": archiver.TestFile{Content: "string"},
},
"secondDir": archiver.TestDir{
"another2": archiver.TestFile{Content: "string"},
},
},
target: "/",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
tmpdir, repo, cleanup := prepareTempdirRepoSrc(t, tt.args)
defer cleanup()
arch := archiver.New(repo, fs.Track{FS: fs.Local{}}, archiver.Options{})
back := fs.TestChdir(t, tmpdir)
defer back()
sn, _, err := arch.Snapshot(ctx, []string{"."}, archiver.SnapshotOptions{})
rtest.OK(t, err)
tree, err := repo.LoadTree(ctx, *sn.Tree)
rtest.OK(t, err)
dst := &bytes.Buffer{}
if err := WriteTar(ctx, repo, tree, tt.target, dst); err != nil {
t.Fatalf("WriteTar() error = %v", err)
}
if err := checkTar(t, tmpdir, dst); err != nil {
t.Errorf("WriteTar() = tar does not match: %v", err)
}
})
}
}
func checkTar(t *testing.T, testDir string, srcTar *bytes.Buffer) error {
tr := tar.NewReader(srcTar)
fileNumber := 0
tarFiles := 0
err := filepath.Walk(testDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.Name() != filepath.Base(testDir) {
fileNumber++
}
return nil
})
if err != nil {
return err
}
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
t.Fatal(err)
}
matchPath := filepath.Join(testDir, hdr.Name)
match, err := os.Stat(matchPath)
if err != nil {
return err
}
// check metadata, tar header contains time rounded to seconds
fileTime := match.ModTime().Round(time.Second)
tarTime := hdr.ModTime
if !fileTime.Equal(tarTime) {
return fmt.Errorf("modTime does not match, got: %s, want: %s", fileTime, tarTime)
}
if hdr.Typeflag == tar.TypeDir {
// this is a folder
if hdr.Name == "." {
// we don't need to check the root folder
continue
}
filebase := filepath.ToSlash(match.Name())
if filepath.Base(hdr.Name) != filebase {
return fmt.Errorf("foldernames don't match got %v want %v", filepath.Base(hdr.Name), filebase)
}
} else {
if match.Size() != hdr.Size {
return fmt.Errorf("size does not match got %v want %v", hdr.Size, match.Size())
}
contentsFile, err := ioutil.ReadFile(matchPath)
if err != nil {
t.Fatal(err)
}
contentsTar := &bytes.Buffer{}
_, err = io.Copy(contentsTar, tr)
if err != nil {
t.Fatal(err)
}
if contentsTar.String() != string(contentsFile) {
return fmt.Errorf("contents does not match, got %s want %s", contentsTar, contentsFile)
}
}
tarFiles++
}
if tarFiles != fileNumber {
return fmt.Errorf("not the same amount of files got %v want %v", tarFiles, fileNumber)
}
return nil
}