forked from TrueCloudLab/restic
Merge pull request #3802 from MichaelEischer/support-non-utf-symlinks
Support non utf symlink targets
This commit is contained in:
commit
98fb56baa6
3 changed files with 86 additions and 16 deletions
12
changelog/unreleased/issue-3311
Normal file
12
changelog/unreleased/issue-3311
Normal file
|
@ -0,0 +1,12 @@
|
|||
Bugfix: Support non-UTF8 paths as symlink target
|
||||
|
||||
Restic versions before 0.16.0 did not correctly backup and restore symlinks
|
||||
that contain a non-UTF8 target. Note that this only affects system that still
|
||||
use a non-Unicode encoding for filesystem paths.
|
||||
|
||||
We have extended the repository format to add support for such symlinks. Please
|
||||
note that at least restic version 0.16.0 must be used for both backup and
|
||||
restore to correctly handle non-UTF8 symlink targets.
|
||||
|
||||
https://github.com/restic/restic/issues/3311
|
||||
https://github.com/restic/restic/pull/3802
|
|
@ -10,6 +10,7 @@ import (
|
|||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/restic/restic/internal/errors"
|
||||
|
||||
|
@ -27,21 +28,26 @@ type ExtendedAttribute struct {
|
|||
|
||||
// Node is a file, directory or other item in a backup.
|
||||
type Node struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Mode os.FileMode `json:"mode,omitempty"`
|
||||
ModTime time.Time `json:"mtime,omitempty"`
|
||||
AccessTime time.Time `json:"atime,omitempty"`
|
||||
ChangeTime time.Time `json:"ctime,omitempty"`
|
||||
UID uint32 `json:"uid"`
|
||||
GID uint32 `json:"gid"`
|
||||
User string `json:"user,omitempty"`
|
||||
Group string `json:"group,omitempty"`
|
||||
Inode uint64 `json:"inode,omitempty"`
|
||||
DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev
|
||||
Size uint64 `json:"size,omitempty"`
|
||||
Links uint64 `json:"links,omitempty"`
|
||||
LinkTarget string `json:"linktarget,omitempty"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Mode os.FileMode `json:"mode,omitempty"`
|
||||
ModTime time.Time `json:"mtime,omitempty"`
|
||||
AccessTime time.Time `json:"atime,omitempty"`
|
||||
ChangeTime time.Time `json:"ctime,omitempty"`
|
||||
UID uint32 `json:"uid"`
|
||||
GID uint32 `json:"gid"`
|
||||
User string `json:"user,omitempty"`
|
||||
Group string `json:"group,omitempty"`
|
||||
Inode uint64 `json:"inode,omitempty"`
|
||||
DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev
|
||||
Size uint64 `json:"size,omitempty"`
|
||||
Links uint64 `json:"links,omitempty"`
|
||||
LinkTarget string `json:"linktarget,omitempty"`
|
||||
// implicitly base64-encoded field. Only used while encoding, `linktarget_raw` will overwrite LinkTarget if present.
|
||||
// This allows storing arbitrary byte-sequences, which are possible as symlink targets on unix systems,
|
||||
// as LinkTarget without breaking backwards-compatibility.
|
||||
// Must only be set of the linktarget cannot be encoded as valid utf8.
|
||||
LinkTargetRaw []byte `json:"linktarget_raw,omitempty"`
|
||||
ExtendedAttributes []ExtendedAttribute `json:"extended_attributes,omitempty"`
|
||||
Device uint64 `json:"device,omitempty"` // in case of Type == "dev", stat.st_rdev
|
||||
Content IDs `json:"content"`
|
||||
|
@ -344,6 +350,13 @@ func (node Node) MarshalJSON() ([]byte, error) {
|
|||
nj := nodeJSON(node)
|
||||
name := strconv.Quote(node.Name)
|
||||
nj.Name = name[1 : len(name)-1]
|
||||
if nj.LinkTargetRaw != nil {
|
||||
panic("LinkTargetRaw must not be set manually")
|
||||
}
|
||||
if !utf8.ValidString(node.LinkTarget) {
|
||||
// store raw bytes if invalid utf8
|
||||
nj.LinkTargetRaw = []byte(node.LinkTarget)
|
||||
}
|
||||
|
||||
return json.Marshal(nj)
|
||||
}
|
||||
|
@ -358,7 +371,14 @@ func (node *Node) UnmarshalJSON(data []byte) error {
|
|||
}
|
||||
|
||||
nj.Name, err = strconv.Unquote(`"` + nj.Name + `"`)
|
||||
return errors.Wrap(err, "Unquote")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "Unquote")
|
||||
}
|
||||
if nj.LinkTargetRaw != nil {
|
||||
nj.LinkTarget = string(nj.LinkTargetRaw)
|
||||
nj.LinkTargetRaw = nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (node Node) Equals(other Node) bool {
|
||||
|
|
|
@ -2,6 +2,8 @@ package restic_test
|
|||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
|
@ -10,6 +12,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/restic/restic/internal/restic"
|
||||
"github.com/restic/restic/internal/test"
|
||||
rtest "github.com/restic/restic/internal/test"
|
||||
)
|
||||
|
||||
|
@ -334,3 +337,38 @@ func TestFixTime(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSymlinkSerialization(t *testing.T) {
|
||||
for _, link := range []string{
|
||||
"válîd \t Üñi¢òde \n śẗŕinǵ",
|
||||
string([]byte{0, 1, 2, 0xfa, 0xfb, 0xfc}),
|
||||
} {
|
||||
n := restic.Node{
|
||||
LinkTarget: link,
|
||||
}
|
||||
ser, err := json.Marshal(n)
|
||||
test.OK(t, err)
|
||||
var n2 restic.Node
|
||||
err = json.Unmarshal(ser, &n2)
|
||||
test.OK(t, err)
|
||||
fmt.Println(string(ser))
|
||||
|
||||
test.Equals(t, n.LinkTarget, n2.LinkTarget)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSymlinkSerializationFormat(t *testing.T) {
|
||||
for _, d := range []struct {
|
||||
ser string
|
||||
linkTarget string
|
||||
}{
|
||||
{`{"linktarget":"test"}`, "test"},
|
||||
{`{"linktarget":"\u0000\u0001\u0002\ufffd\ufffd\ufffd","linktarget_raw":"AAEC+vv8"}`, string([]byte{0, 1, 2, 0xfa, 0xfb, 0xfc})},
|
||||
} {
|
||||
var n2 restic.Node
|
||||
err := json.Unmarshal([]byte(d.ser), &n2)
|
||||
test.OK(t, err)
|
||||
test.Equals(t, d.linkTarget, n2.LinkTarget)
|
||||
test.Assert(t, n2.LinkTargetRaw == nil, "quoted link target is just a helper field and must be unset after decoding")
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue