Merge pull request #3802 from MichaelEischer/support-non-utf-symlinks

Support non utf symlink targets
This commit is contained in:
Michael Eischer 2023-07-23 00:15:01 +02:00 committed by GitHub
commit 98fb56baa6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 86 additions and 16 deletions

View file

@ -0,0 +1,12 @@
Bugfix: Support non-UTF8 paths as symlink target
Restic versions before 0.16.0 did not correctly backup and restore symlinks
that contain a non-UTF8 target. Note that this only affects system that still
use a non-Unicode encoding for filesystem paths.
We have extended the repository format to add support for such symlinks. Please
note that at least restic version 0.16.0 must be used for both backup and
restore to correctly handle non-UTF8 symlink targets.
https://github.com/restic/restic/issues/3311
https://github.com/restic/restic/pull/3802

View file

@ -10,6 +10,7 @@ import (
"sync" "sync"
"syscall" "syscall"
"time" "time"
"unicode/utf8"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
@ -27,21 +28,26 @@ type ExtendedAttribute struct {
// Node is a file, directory or other item in a backup. // Node is a file, directory or other item in a backup.
type Node struct { type Node struct {
Name string `json:"name"` Name string `json:"name"`
Type string `json:"type"` Type string `json:"type"`
Mode os.FileMode `json:"mode,omitempty"` Mode os.FileMode `json:"mode,omitempty"`
ModTime time.Time `json:"mtime,omitempty"` ModTime time.Time `json:"mtime,omitempty"`
AccessTime time.Time `json:"atime,omitempty"` AccessTime time.Time `json:"atime,omitempty"`
ChangeTime time.Time `json:"ctime,omitempty"` ChangeTime time.Time `json:"ctime,omitempty"`
UID uint32 `json:"uid"` UID uint32 `json:"uid"`
GID uint32 `json:"gid"` GID uint32 `json:"gid"`
User string `json:"user,omitempty"` User string `json:"user,omitempty"`
Group string `json:"group,omitempty"` Group string `json:"group,omitempty"`
Inode uint64 `json:"inode,omitempty"` Inode uint64 `json:"inode,omitempty"`
DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev
Size uint64 `json:"size,omitempty"` Size uint64 `json:"size,omitempty"`
Links uint64 `json:"links,omitempty"` Links uint64 `json:"links,omitempty"`
LinkTarget string `json:"linktarget,omitempty"` LinkTarget string `json:"linktarget,omitempty"`
// implicitly base64-encoded field. Only used while encoding, `linktarget_raw` will overwrite LinkTarget if present.
// This allows storing arbitrary byte-sequences, which are possible as symlink targets on unix systems,
// as LinkTarget without breaking backwards-compatibility.
// Must only be set of the linktarget cannot be encoded as valid utf8.
LinkTargetRaw []byte `json:"linktarget_raw,omitempty"`
ExtendedAttributes []ExtendedAttribute `json:"extended_attributes,omitempty"` ExtendedAttributes []ExtendedAttribute `json:"extended_attributes,omitempty"`
Device uint64 `json:"device,omitempty"` // in case of Type == "dev", stat.st_rdev Device uint64 `json:"device,omitempty"` // in case of Type == "dev", stat.st_rdev
Content IDs `json:"content"` Content IDs `json:"content"`
@ -344,6 +350,13 @@ func (node Node) MarshalJSON() ([]byte, error) {
nj := nodeJSON(node) nj := nodeJSON(node)
name := strconv.Quote(node.Name) name := strconv.Quote(node.Name)
nj.Name = name[1 : len(name)-1] nj.Name = name[1 : len(name)-1]
if nj.LinkTargetRaw != nil {
panic("LinkTargetRaw must not be set manually")
}
if !utf8.ValidString(node.LinkTarget) {
// store raw bytes if invalid utf8
nj.LinkTargetRaw = []byte(node.LinkTarget)
}
return json.Marshal(nj) return json.Marshal(nj)
} }
@ -358,7 +371,14 @@ func (node *Node) UnmarshalJSON(data []byte) error {
} }
nj.Name, err = strconv.Unquote(`"` + nj.Name + `"`) nj.Name, err = strconv.Unquote(`"` + nj.Name + `"`)
return errors.Wrap(err, "Unquote") if err != nil {
return errors.Wrap(err, "Unquote")
}
if nj.LinkTargetRaw != nil {
nj.LinkTarget = string(nj.LinkTargetRaw)
nj.LinkTargetRaw = nil
}
return nil
} }
func (node Node) Equals(other Node) bool { func (node Node) Equals(other Node) bool {

View file

@ -2,6 +2,8 @@ package restic_test
import ( import (
"context" "context"
"encoding/json"
"fmt"
"os" "os"
"path/filepath" "path/filepath"
"reflect" "reflect"
@ -10,6 +12,7 @@ import (
"time" "time"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
rtest "github.com/restic/restic/internal/test" rtest "github.com/restic/restic/internal/test"
) )
@ -334,3 +337,38 @@ func TestFixTime(t *testing.T) {
}) })
} }
} }
func TestSymlinkSerialization(t *testing.T) {
for _, link := range []string{
"válîd \t Üñi¢òde \n śẗŕinǵ",
string([]byte{0, 1, 2, 0xfa, 0xfb, 0xfc}),
} {
n := restic.Node{
LinkTarget: link,
}
ser, err := json.Marshal(n)
test.OK(t, err)
var n2 restic.Node
err = json.Unmarshal(ser, &n2)
test.OK(t, err)
fmt.Println(string(ser))
test.Equals(t, n.LinkTarget, n2.LinkTarget)
}
}
func TestSymlinkSerializationFormat(t *testing.T) {
for _, d := range []struct {
ser string
linkTarget string
}{
{`{"linktarget":"test"}`, "test"},
{`{"linktarget":"\u0000\u0001\u0002\ufffd\ufffd\ufffd","linktarget_raw":"AAEC+vv8"}`, string([]byte{0, 1, 2, 0xfa, 0xfb, 0xfc})},
} {
var n2 restic.Node
err := json.Unmarshal([]byte(d.ser), &n2)
test.OK(t, err)
test.Equals(t, d.linkTarget, n2.LinkTarget)
test.Assert(t, n2.LinkTargetRaw == nil, "quoted link target is just a helper field and must be unset after decoding")
}
}