operations: fix invalid UTF-8 when truncating file names when not using --inplace

Before this change, when not using --inplace, rclone could generate
invalid file names when truncating file names to fit within the
character size limits.

This fixes it by taking care to truncate on UTF-8 character
boundaries.

See: https://forum.rclone.org/t/ssh-fx-failure-when-copying-file-with-nonstandard-characters-to-sftp-remote-with-ntfs-drive/42560/
This commit is contained in:
Nick Craig-Wood 2023-10-28 17:12:03 +01:00
parent 179f978f75
commit e64be7652a
2 changed files with 102 additions and 1 deletions

View file

@ -12,6 +12,7 @@ import (
"path"
"strings"
"time"
"unicode/utf8"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/accounting"
@ -65,6 +66,26 @@ func (c *copy) removeFailedPartialCopy(ctx context.Context, f fs.Fs, remote stri
c.removeFailedCopy(ctx, o)
}
// TruncateString s to n bytes.
//
// If s is valid UTF-8 then this may truncate to fewer than n bytes to
// make the returned string also valid UTF-8.
func TruncateString(s string, n int) string {
truncated := s[:n]
if !utf8.ValidString(s) {
// If input string wasn't valid UTF-8 then just return the truncation
return truncated
}
for len(truncated) > 0 {
if utf8.ValidString(truncated) {
return truncated
}
// Remove 1 byte until valid
truncated = truncated[:len(truncated)-1]
}
return truncated
}
// Check to see if we should be using a partial name and return the name for the copy and the inplace flag
func (c *copy) checkPartial() (remoteForCopy string, inplace bool, err error) {
remoteForCopy = c.remote
@ -79,7 +100,7 @@ func (c *copy) checkPartial() (remoteForCopy string, inplace bool, err error) {
suffix := "." + random.String(8) + c.ci.PartialSuffix
base := path.Base(c.remoteForCopy)
if len(base) > 100 {
remoteForCopy = c.remoteForCopy[:len(c.remoteForCopy)-len(suffix)] + suffix
remoteForCopy = TruncateString(c.remoteForCopy, len(c.remoteForCopy)-len(suffix)) + suffix
} else {
remoteForCopy += suffix
}

View file

@ -4,6 +4,7 @@ import (
"context"
"crypto/rand"
"errors"
"fmt"
"strings"
"testing"
@ -15,6 +16,85 @@ import (
"github.com/stretchr/testify/require"
)
func TestTruncateString(t *testing.T) {
for _, test := range []struct {
in string
n int
want string
}{
{
in: "",
n: 0,
want: "",
}, {
in: "Hello World",
n: 5,
want: "Hello",
}, {
in: "ááááá",
n: 5,
want: "áá",
}, {
in: "ááááá\xFF\xFF",
n: 5,
want: "áá\xc3",
}, {
in: "世世世世世",
n: 7,
want: "世世",
}, {
in: "🙂🙂🙂🙂🙂",
n: 16,
want: "🙂🙂🙂🙂",
}, {
in: "🙂🙂🙂🙂🙂",
n: 15,
want: "🙂🙂🙂",
}, {
in: "🙂🙂🙂🙂🙂",
n: 14,
want: "🙂🙂🙂",
}, {
in: "🙂🙂🙂🙂🙂",
n: 13,
want: "🙂🙂🙂",
}, {
in: "🙂🙂🙂🙂🙂",
n: 12,
want: "🙂🙂🙂",
}, {
in: "🙂🙂🙂🙂🙂",
n: 11,
want: "🙂🙂",
}, {
in: "𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢⁱᵒⁿᵃʳʸ",
n: 100,
want: "𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢ",
}, {
in: "a𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢⁱᵒⁿᵃʳʸ",
n: 100,
want: "a𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢ",
}, {
in: "aa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢⁱᵒⁿᵃʳʸ",
n: 100,
want: "aa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱ",
}, {
in: "aaa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢⁱᵒⁿᵃʳʸ",
n: 100,
want: "aaa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱ",
}, {
in: "aaaa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢⁱᵒⁿᵃʳʸ",
n: 100,
want: "aaaa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽ",
},
} {
got := operations.TruncateString(test.in, test.n)
assert.Equal(t, test.want, got, fmt.Sprintf("In %q", test.in))
assert.LessOrEqual(t, len(got), test.n)
assert.GreaterOrEqual(t, len(got), test.n-3)
}
}
func TestCopyFile(t *testing.T) {
ctx := context.Background()
r := fstest.NewRun(t)