Add support for non-utf8 symlink targets

This commit is contained in:
Michael Eischer 2022-05-26 22:32:44 +02:00
parent 25ff9fa893
commit 6adb629608
2 changed files with 48 additions and 16 deletions

View file

@ -0,0 +1,12 @@
Bugfix: Support non-UTF8 paths as symlink target
Restic versions before 0.16.0 did not correctly backup and restore symlinks
that contain a non-UTF8 target. Note that this only affects system that still
use a non-Unicode encoding for filesystem paths.
We have extended the repository format to add support for such symlinks. Please
note that at least restic version 0.16.0 must be used for both backup and
restore to correctly handle non-UTF8 symlink targets.
https://github.com/restic/restic/issues/3311
https://github.com/restic/restic/pull/3802

View file

@ -10,6 +10,7 @@ import (
"sync" "sync"
"syscall" "syscall"
"time" "time"
"unicode/utf8"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
@ -27,21 +28,26 @@ type ExtendedAttribute struct {
// Node is a file, directory or other item in a backup. // Node is a file, directory or other item in a backup.
type Node struct { type Node struct {
Name string `json:"name"` Name string `json:"name"`
Type string `json:"type"` Type string `json:"type"`
Mode os.FileMode `json:"mode,omitempty"` Mode os.FileMode `json:"mode,omitempty"`
ModTime time.Time `json:"mtime,omitempty"` ModTime time.Time `json:"mtime,omitempty"`
AccessTime time.Time `json:"atime,omitempty"` AccessTime time.Time `json:"atime,omitempty"`
ChangeTime time.Time `json:"ctime,omitempty"` ChangeTime time.Time `json:"ctime,omitempty"`
UID uint32 `json:"uid"` UID uint32 `json:"uid"`
GID uint32 `json:"gid"` GID uint32 `json:"gid"`
User string `json:"user,omitempty"` User string `json:"user,omitempty"`
Group string `json:"group,omitempty"` Group string `json:"group,omitempty"`
Inode uint64 `json:"inode,omitempty"` Inode uint64 `json:"inode,omitempty"`
DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev
Size uint64 `json:"size,omitempty"` Size uint64 `json:"size,omitempty"`
Links uint64 `json:"links,omitempty"` Links uint64 `json:"links,omitempty"`
LinkTarget string `json:"linktarget,omitempty"` LinkTarget string `json:"linktarget,omitempty"`
// implicitly base64-encoded field. Only used while encoding, `linktarget_raw` will overwrite LinkTarget if present.
// This allows storing arbitrary byte-sequences, which are possible as symlink targets on unix systems,
// as LinkTarget without breaking backwards-compatibility.
// Must only be set of the linktarget cannot be encoded as valid utf8.
LinkTargetRaw []byte `json:"linktarget_raw,omitempty"`
ExtendedAttributes []ExtendedAttribute `json:"extended_attributes,omitempty"` ExtendedAttributes []ExtendedAttribute `json:"extended_attributes,omitempty"`
Device uint64 `json:"device,omitempty"` // in case of Type == "dev", stat.st_rdev Device uint64 `json:"device,omitempty"` // in case of Type == "dev", stat.st_rdev
Content IDs `json:"content"` Content IDs `json:"content"`
@ -344,6 +350,13 @@ func (node Node) MarshalJSON() ([]byte, error) {
nj := nodeJSON(node) nj := nodeJSON(node)
name := strconv.Quote(node.Name) name := strconv.Quote(node.Name)
nj.Name = name[1 : len(name)-1] nj.Name = name[1 : len(name)-1]
if nj.LinkTargetRaw != nil {
panic("LinkTargetRaw must not be set manually")
}
if !utf8.ValidString(node.LinkTarget) {
// store raw bytes if invalid utf8
nj.LinkTargetRaw = []byte(node.LinkTarget)
}
return json.Marshal(nj) return json.Marshal(nj)
} }
@ -358,7 +371,14 @@ func (node *Node) UnmarshalJSON(data []byte) error {
} }
nj.Name, err = strconv.Unquote(`"` + nj.Name + `"`) nj.Name, err = strconv.Unquote(`"` + nj.Name + `"`)
return errors.Wrap(err, "Unquote") if err != nil {
return errors.Wrap(err, "Unquote")
}
if nj.LinkTargetRaw != nil {
nj.LinkTarget = string(nj.LinkTargetRaw)
nj.LinkTargetRaw = nil
}
return nil
} }
func (node Node) Equals(other Node) bool { func (node Node) Equals(other Node) bool {