Merge pull request #3081 from DRON-666/dump-zip

Add zip support to the `dump` command
This commit is contained in:
MichaelEischer 2020-12-19 11:33:33 +01:00 committed by GitHub
commit d32949ee54
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 455 additions and 200 deletions

View file

@ -0,0 +1,8 @@
Enhancement: Add zip format support to dump
Previously, restic could dump the contents of a whole folder structure only
in the tar format. The `dump` command now has a new flag to change output
format to zip. Just pass `--archive zip` as an option to `restic dump`.
https://github.com/restic/restic/pull/2433
https://github.com/restic/restic/pull/3081

View file

@ -21,8 +21,8 @@ var cmdDump = &cobra.Command{
Long: `
The "dump" command extracts files from a snapshot from the repository. If a
single file is selected, it prints its contents to stdout. Folders are output
as a tar file containing the contents of the specified folder. Pass "/" as
file name to dump the whole snapshot as a tar file.
as a tar (default) or zip file containing the contents of the specified folder.
Pass "/" as file name to dump the whole snapshot as an archive file.
The special snapshot "latest" can be used to use the latest snapshot in the
repository.
@ -43,6 +43,7 @@ type DumpOptions struct {
Hosts []string
Paths []string
Tags restic.TagLists
Archive string
}
var dumpOptions DumpOptions
@ -54,6 +55,7 @@ func init() {
flags.StringArrayVarP(&dumpOptions.Hosts, "host", "H", nil, `only consider snapshots for this host when the snapshot ID is "latest" (can be specified multiple times)`)
flags.Var(&dumpOptions.Tags, "tag", "only consider snapshots which include this `taglist` for snapshot ID \"latest\"")
flags.StringArrayVar(&dumpOptions.Paths, "path", nil, "only consider snapshots which include this (absolute) `path` for snapshot ID \"latest\"")
flags.StringVarP(&dumpOptions.Archive, "archive", "a", "tar", "set archive `format` as \"tar\" or \"zip\"")
}
func splitPath(p string) []string {
@ -65,8 +67,7 @@ func splitPath(p string) []string {
return append(s, f)
}
func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repository, prefix string, pathComponents []string) error {
func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repository, prefix string, pathComponents []string, writeDump dump.WriteDump) error {
if tree == nil {
return fmt.Errorf("called with a nil tree")
}
@ -81,10 +82,10 @@ func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repositor
// If we print / we need to assume that there are multiple nodes at that
// level in the tree.
if pathComponents[0] == "" {
if err := checkStdoutTar(); err != nil {
if err := checkStdoutArchive(); err != nil {
return err
}
return dump.WriteTar(ctx, repo, tree, "/", os.Stdout)
return writeDump(ctx, repo, tree, "/", os.Stdout)
}
item := filepath.Join(prefix, pathComponents[0])
@ -100,16 +101,16 @@ func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repositor
if err != nil {
return errors.Wrapf(err, "cannot load subtree for %q", item)
}
return printFromTree(ctx, subtree, repo, item, pathComponents[1:])
return printFromTree(ctx, subtree, repo, item, pathComponents[1:], writeDump)
case dump.IsDir(node):
if err := checkStdoutTar(); err != nil {
if err := checkStdoutArchive(); err != nil {
return err
}
subtree, err := repo.LoadTree(ctx, *node.Subtree)
if err != nil {
return err
}
return dump.WriteTar(ctx, repo, subtree, item, os.Stdout)
return writeDump(ctx, repo, subtree, item, os.Stdout)
case l > 1:
return fmt.Errorf("%q should be a dir, but is a %q", item, node.Type)
case !dump.IsFile(node):
@ -127,6 +128,16 @@ func runDump(opts DumpOptions, gopts GlobalOptions, args []string) error {
return errors.Fatal("no file and no snapshot ID specified")
}
var wd dump.WriteDump
switch opts.Archive {
case "tar":
wd = dump.WriteTar
case "zip":
wd = dump.WriteZip
default:
return fmt.Errorf("unknown archive format %q", opts.Archive)
}
snapshotIDString := args[0]
pathToPrint := args[1]
@ -176,7 +187,7 @@ func runDump(opts DumpOptions, gopts GlobalOptions, args []string) error {
Exitf(2, "loading tree for snapshot %q failed: %v", snapshotIDString, err)
}
err = printFromTree(ctx, tree, repo, "/", splittedPath)
err = printFromTree(ctx, tree, repo, "/", splittedPath, wd)
if err != nil {
Exitf(2, "cannot dump file: %v", err)
}
@ -184,7 +195,7 @@ func runDump(opts DumpOptions, gopts GlobalOptions, args []string) error {
return nil
}
func checkStdoutTar() error {
func checkStdoutArchive() error {
if stdoutIsTerminal() {
return fmt.Errorf("stdout is the terminal, please redirect output")
}

View file

@ -128,10 +128,13 @@ e.g.:
It is also possible to ``dump`` the contents of a whole folder structure to
stdout. To retain the information about the files and folders Restic will
output the contents in the tar format:
output the contents in the tar (default) or zip format:
.. code-block:: console
$ restic -r /srv/restic-repo dump latest /home/other/work > restore.tar
.. code-block:: console
$ restic -r /srv/restic-repo dump -a zip latest /home/other/work > restore.zip

107
internal/dump/common.go Normal file
View file

@ -0,0 +1,107 @@
package dump
import (
"context"
"io"
"path"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/walker"
)
// dumper implements saving node data.
type dumper interface {
io.Closer
dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error
}
// WriteDump will write the contents of the given tree to the given destination.
// It will loop over all nodes in the tree and dump them recursively.
type WriteDump func(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error
func writeDump(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dmp dumper, dst io.Writer) error {
for _, rootNode := range tree.Nodes {
rootNode.Path = rootPath
err := dumpTree(ctx, repo, rootNode, rootPath, dmp)
if err != nil {
dmp.Close()
return err
}
}
return dmp.Close()
}
func dumpTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node, rootPath string, dmp dumper) error {
rootNode.Path = path.Join(rootNode.Path, rootNode.Name)
rootPath = rootNode.Path
if err := dmp.dumpNode(ctx, rootNode, repo); err != nil {
return err
}
// If this is no directory we are finished
if !IsDir(rootNode) {
return nil
}
err := walker.Walk(ctx, repo, *rootNode.Subtree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
if err != nil {
return false, err
}
if node == nil {
return false, nil
}
node.Path = path.Join(rootPath, nodepath)
if IsFile(node) || IsLink(node) || IsDir(node) {
err := dmp.dumpNode(ctx, node, repo)
if err != nil {
return false, err
}
}
return false, nil
})
return err
}
// GetNodeData will write the contents of the node to the given output.
func GetNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error {
var (
buf []byte
err error
)
for _, id := range node.Content {
buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf)
if err != nil {
return err
}
_, err = output.Write(buf)
if err != nil {
return errors.Wrap(err, "Write")
}
}
return nil
}
// IsDir checks if the given node is a directory.
func IsDir(node *restic.Node) bool {
return node.Type == "dir"
}
// IsLink checks if the given node as a link.
func IsLink(node *restic.Node) bool {
return node.Type == "symlink"
}
// IsFile checks if the given node is a file.
func IsFile(node *restic.Node) bool {
return node.Type == "file"
}

View file

@ -0,0 +1,103 @@
package dump
import (
"bytes"
"context"
"testing"
"github.com/restic/restic/internal/archiver"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func prepareTempdirRepoSrc(t testing.TB, src archiver.TestDir) (tempdir string, repo restic.Repository, cleanup func()) {
tempdir, removeTempdir := rtest.TempDir(t)
repo, removeRepository := repository.TestRepository(t)
archiver.TestCreateFiles(t, tempdir, src)
cleanup = func() {
removeRepository()
removeTempdir()
}
return tempdir, repo, cleanup
}
type CheckDump func(t *testing.T, testDir string, testDump *bytes.Buffer) error
func WriteTest(t *testing.T, wd WriteDump, cd CheckDump) {
tests := []struct {
name string
args archiver.TestDir
target string
}{
{
name: "single file in root",
args: archiver.TestDir{
"file": archiver.TestFile{Content: "string"},
},
target: "/",
},
{
name: "multiple files in root",
args: archiver.TestDir{
"file1": archiver.TestFile{Content: "string"},
"file2": archiver.TestFile{Content: "string"},
},
target: "/",
},
{
name: "multiple files and folders in root",
args: archiver.TestDir{
"file1": archiver.TestFile{Content: "string"},
"file2": archiver.TestFile{Content: "string"},
"firstDir": archiver.TestDir{
"another": archiver.TestFile{Content: "string"},
},
"secondDir": archiver.TestDir{
"another2": archiver.TestFile{Content: "string"},
},
},
target: "/",
},
{
name: "file and symlink in root",
args: archiver.TestDir{
"file1": archiver.TestFile{Content: "string"},
"file2": archiver.TestSymlink{Target: "file1"},
},
target: "/",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
tmpdir, repo, cleanup := prepareTempdirRepoSrc(t, tt.args)
defer cleanup()
arch := archiver.New(repo, fs.Track{FS: fs.Local{}}, archiver.Options{})
back := rtest.Chdir(t, tmpdir)
defer back()
sn, _, err := arch.Snapshot(ctx, []string{"."}, archiver.SnapshotOptions{})
rtest.OK(t, err)
tree, err := repo.LoadTree(ctx, *sn.Tree)
rtest.OK(t, err)
dst := &bytes.Buffer{}
if err := wd(ctx, repo, tree, tt.target, dst); err != nil {
t.Fatalf("WriteDump() error = %v", err)
}
if err := cd(t, tmpdir, dst); err != nil {
t.Errorf("WriteDump() = does not match: %v", err)
}
})
}
}

View file

@ -5,77 +5,41 @@ import (
"context"
"io"
"os"
"path"
"path/filepath"
"strings"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/walker"
)
type tarDumper struct {
w *tar.Writer
}
// Statically ensure that tarDumper implements dumper.
var _ dumper = tarDumper{}
// WriteTar will write the contents of the given tree, encoded as a tar to the given destination.
// It will loop over all nodes in the tree and dump them recursively.
func WriteTar(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error {
tw := tar.NewWriter(dst)
dmp := tarDumper{w: tar.NewWriter(dst)}
for _, rootNode := range tree.Nodes {
rootNode.Path = rootPath
err := tarTree(ctx, repo, rootNode, rootPath, tw)
if err != nil {
_ = tw.Close()
return err
}
}
return tw.Close()
return writeDump(ctx, repo, tree, rootPath, dmp, dst)
}
func tarTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node, rootPath string, tw *tar.Writer) error {
rootNode.Path = path.Join(rootNode.Path, rootNode.Name)
rootPath = rootNode.Path
if err := tarNode(ctx, tw, rootNode, repo); err != nil {
return err
}
// If this is no directory we are finished
if !IsDir(rootNode) {
return nil
}
err := walker.Walk(ctx, repo, *rootNode.Subtree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
if err != nil {
return false, err
}
if node == nil {
return false, nil
}
node.Path = path.Join(rootPath, nodepath)
if IsFile(node) || IsLink(node) || IsDir(node) {
err := tarNode(ctx, tw, node, repo)
if err != nil {
return false, err
}
}
return false, nil
})
return err
func (dmp tarDumper) Close() error {
return dmp.w.Close()
}
// copied from archive/tar.FileInfoHeader
const (
// Mode constants from the USTAR spec:
// See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
c_ISUID = 04000 // Set uid
c_ISGID = 02000 // Set gid
c_ISVTX = 01000 // Save text (sticky bit)
cISUID = 0o4000 // Set uid
cISGID = 0o2000 // Set gid
cISVTX = 0o1000 // Save text (sticky bit)
)
func tarNode(ctx context.Context, tw *tar.Writer, node *restic.Node, repo restic.Repository) error {
func (dmp tarDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error {
relPath, err := filepath.Rel("/", node.Path)
if err != nil {
return err
@ -84,7 +48,7 @@ func tarNode(ctx context.Context, tw *tar.Writer, node *restic.Node, repo restic
header := &tar.Header{
Name: filepath.ToSlash(relPath),
Size: int64(node.Size),
Mode: int64(node.Mode.Perm()), // c_IS* constants are added later
Mode: int64(node.Mode.Perm()), // cIS* constants are added later
Uid: int(node.UID),
Gid: int(node.GID),
Uname: node.User,
@ -97,13 +61,13 @@ func tarNode(ctx context.Context, tw *tar.Writer, node *restic.Node, repo restic
// adapted from archive/tar.FileInfoHeader
if node.Mode&os.ModeSetuid != 0 {
header.Mode |= c_ISUID
header.Mode |= cISUID
}
if node.Mode&os.ModeSetgid != 0 {
header.Mode |= c_ISGID
header.Mode |= cISGID
}
if node.Mode&os.ModeSticky != 0 {
header.Mode |= c_ISVTX
header.Mode |= cISVTX
}
if IsFile(node) {
@ -120,13 +84,13 @@ func tarNode(ctx context.Context, tw *tar.Writer, node *restic.Node, repo restic
header.Name += "/"
}
err = tw.WriteHeader(header)
err = dmp.w.WriteHeader(header)
if err != nil {
return errors.Wrap(err, "TarHeader")
}
return GetNodeData(ctx, tw, repo, node)
return GetNodeData(ctx, dmp.w, repo, node)
}
func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string {
@ -146,7 +110,6 @@ func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string {
tmpMap["SCHILY.acl.default"] = na.String()
}
}
} else {
tmpMap["SCHILY.xattr."+attr.Name] = attrString
}
@ -154,39 +117,3 @@ func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string {
return tmpMap
}
// GetNodeData will write the contents of the node to the given output
func GetNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error {
var (
buf []byte
err error
)
for _, id := range node.Content {
buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf)
if err != nil {
return err
}
_, err = output.Write(buf)
if err != nil {
return errors.Wrap(err, "Write")
}
}
return nil
}
// IsDir checks if the given node is a directory
func IsDir(node *restic.Node) bool {
return node.Type == "dir"
}
// IsLink checks if the given node as a link
func IsLink(node *restic.Node) bool {
return node.Type == "symlink"
}
// IsFile checks if the given node is a file
func IsFile(node *restic.Node) bool {
return node.Type == "file"
}

View file

@ -3,7 +3,6 @@ package dump
import (
"archive/tar"
"bytes"
"context"
"fmt"
"io"
"io/ioutil"
@ -13,99 +12,11 @@ import (
"testing"
"time"
"github.com/restic/restic/internal/archiver"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func prepareTempdirRepoSrc(t testing.TB, src archiver.TestDir) (tempdir string, repo restic.Repository, cleanup func()) {
tempdir, removeTempdir := rtest.TempDir(t)
repo, removeRepository := repository.TestRepository(t)
archiver.TestCreateFiles(t, tempdir, src)
cleanup = func() {
removeRepository()
removeTempdir()
}
return tempdir, repo, cleanup
}
func TestWriteTar(t *testing.T) {
tests := []struct {
name string
args archiver.TestDir
target string
}{
{
name: "single file in root",
args: archiver.TestDir{
"file": archiver.TestFile{Content: "string"},
},
target: "/",
},
{
name: "multiple files in root",
args: archiver.TestDir{
"file1": archiver.TestFile{Content: "string"},
"file2": archiver.TestFile{Content: "string"},
},
target: "/",
},
{
name: "multiple files and folders in root",
args: archiver.TestDir{
"file1": archiver.TestFile{Content: "string"},
"file2": archiver.TestFile{Content: "string"},
"firstDir": archiver.TestDir{
"another": archiver.TestFile{Content: "string"},
},
"secondDir": archiver.TestDir{
"another2": archiver.TestFile{Content: "string"},
},
},
target: "/",
},
{
name: "file and symlink in root",
args: archiver.TestDir{
"file1": archiver.TestFile{Content: "string"},
"file2": archiver.TestSymlink{Target: "file1"},
},
target: "/",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
tmpdir, repo, cleanup := prepareTempdirRepoSrc(t, tt.args)
defer cleanup()
arch := archiver.New(repo, fs.Track{FS: fs.Local{}}, archiver.Options{})
back := rtest.Chdir(t, tmpdir)
defer back()
sn, _, err := arch.Snapshot(ctx, []string{"."}, archiver.SnapshotOptions{})
rtest.OK(t, err)
tree, err := repo.LoadTree(ctx, *sn.Tree)
rtest.OK(t, err)
dst := &bytes.Buffer{}
if err := WriteTar(ctx, repo, tree, tt.target, dst); err != nil {
t.Fatalf("WriteTar() error = %v", err)
}
if err := checkTar(t, tmpdir, dst); err != nil {
t.Errorf("WriteTar() = tar does not match: %v", err)
}
})
}
WriteTest(t, WriteTar, checkTar)
}
func checkTar(t *testing.T, testDir string, srcTar *bytes.Buffer) error {

62
internal/dump/zip.go Normal file
View file

@ -0,0 +1,62 @@
package dump
import (
"archive/zip"
"context"
"io"
"path/filepath"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
)
type zipDumper struct {
w *zip.Writer
}
// Statically ensure that zipDumper implements dumper.
var _ dumper = zipDumper{}
// WriteZip will write the contents of the given tree, encoded as a zip to the given destination.
func WriteZip(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error {
dmp := zipDumper{w: zip.NewWriter(dst)}
return writeDump(ctx, repo, tree, rootPath, dmp, dst)
}
func (dmp zipDumper) Close() error {
return dmp.w.Close()
}
func (dmp zipDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error {
relPath, err := filepath.Rel("/", node.Path)
if err != nil {
return err
}
header := &zip.FileHeader{
Name: filepath.ToSlash(relPath),
UncompressedSize64: node.Size,
Modified: node.ModTime,
}
header.SetMode(node.Mode)
if IsDir(node) {
header.Name += "/"
}
w, err := dmp.w.CreateHeader(header)
if err != nil {
return errors.Wrap(err, "ZipHeader")
}
if IsLink(node) {
if _, err = w.Write([]byte(node.LinkTarget)); err != nil {
return errors.Wrap(err, "Write")
}
return nil
}
return GetNodeData(ctx, w, repo, node)
}

123
internal/dump/zip_test.go Normal file
View file

@ -0,0 +1,123 @@
package dump
import (
"archive/zip"
"bytes"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/restic/restic/internal/fs"
)
func TestWriteZip(t *testing.T) {
WriteTest(t, WriteZip, checkZip)
}
func readZipFile(f *zip.File) ([]byte, error) {
rc, err := f.Open()
if err != nil {
return nil, err
}
defer rc.Close()
b := &bytes.Buffer{}
_, err = b.ReadFrom(rc)
if err != nil {
return nil, err
}
return b.Bytes(), nil
}
func checkZip(t *testing.T, testDir string, srcZip *bytes.Buffer) error {
z, err := zip.NewReader(bytes.NewReader(srcZip.Bytes()), int64(srcZip.Len()))
if err != nil {
return err
}
fileNumber := 0
zipFiles := len(z.File)
err = filepath.Walk(testDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.Name() != filepath.Base(testDir) {
fileNumber++
}
return nil
})
if err != nil {
return err
}
for _, f := range z.File {
matchPath := filepath.Join(testDir, f.Name)
match, err := os.Lstat(matchPath)
if err != nil {
return err
}
// check metadata, zip header contains time rounded to seconds
fileTime := match.ModTime().Truncate(time.Second)
zipTime := f.Modified
if !fileTime.Equal(zipTime) {
return fmt.Errorf("modTime does not match, got: %s, want: %s", zipTime, fileTime)
}
if f.Mode() != match.Mode() {
return fmt.Errorf("mode does not match, got: %v [%08x], want: %v [%08x]",
f.Mode(), uint32(f.Mode()), match.Mode(), uint32(match.Mode()))
}
t.Logf("Mode is %v [%08x] for %s", f.Mode(), uint32(f.Mode()), f.Name)
switch {
case f.FileInfo().IsDir():
filebase := filepath.ToSlash(match.Name())
if filepath.Base(f.Name) != filebase {
return fmt.Errorf("foldernames don't match got %v want %v", filepath.Base(f.Name), filebase)
}
if !strings.HasSuffix(f.Name, "/") {
return fmt.Errorf("foldernames must end with separator got %v", f.Name)
}
case f.Mode()&os.ModeSymlink != 0:
target, err := fs.Readlink(matchPath)
if err != nil {
return err
}
linkName, err := readZipFile(f)
if err != nil {
t.Fatal(err)
}
if target != string(linkName) {
return fmt.Errorf("symlink target does not match, got %s want %s", string(linkName), target)
}
default:
if uint64(match.Size()) != f.UncompressedSize64 {
return fmt.Errorf("size does not match got %v want %v", f.UncompressedSize64, match.Size())
}
contentsFile, err := ioutil.ReadFile(matchPath)
if err != nil {
t.Fatal(err)
}
contentsZip, err := readZipFile(f)
if err != nil {
t.Fatal(err)
}
if string(contentsZip) != string(contentsFile) {
return fmt.Errorf("contents does not match, got %s want %s", contentsZip, contentsFile)
}
}
}
if zipFiles != fileNumber {
return fmt.Errorf("not the same amount of files got %v want %v", zipFiles, fileNumber)
}
return nil
}