forked from TrueCloudLab/rclone
chunker: fix case-insensitive NewObject, test metadata detection #4902
- fix test case FsNewObjectCaseInsensitive (PR #4830) - continue PR #4917, add comments in metadata detection code - add warning about metadata detection in user documentation - change metadata size limits, make room for future development - hide critical chunker parameters from command line
This commit is contained in:
parent
847625822f
commit
35a4de2030
3 changed files with 134 additions and 12 deletions
|
@ -97,7 +97,8 @@ var (
|
||||||
//
|
//
|
||||||
// And still chunker's primary function is to chunk large files
|
// And still chunker's primary function is to chunk large files
|
||||||
// rather than serve as a generic metadata container.
|
// rather than serve as a generic metadata container.
|
||||||
const maxMetadataSize = 255
|
const maxMetadataSize = 1023
|
||||||
|
const maxMetadataSizeWritten = 255
|
||||||
|
|
||||||
// Current/highest supported metadata format.
|
// Current/highest supported metadata format.
|
||||||
const metadataVersion = 1
|
const metadataVersion = 1
|
||||||
|
@ -152,6 +153,7 @@ Normally should contain a ':' and a path, e.g. "myremote:path/to/dir",
|
||||||
}, {
|
}, {
|
||||||
Name: "name_format",
|
Name: "name_format",
|
||||||
Advanced: true,
|
Advanced: true,
|
||||||
|
Hide: fs.OptionHideCommandLine,
|
||||||
Default: `*.rclone_chunk.###`,
|
Default: `*.rclone_chunk.###`,
|
||||||
Help: `String format of chunk file names.
|
Help: `String format of chunk file names.
|
||||||
The two placeholders are: base file name (*) and chunk number (#...).
|
The two placeholders are: base file name (*) and chunk number (#...).
|
||||||
|
@ -162,12 +164,14 @@ Possible chunk files are ignored if their name does not match given format.`,
|
||||||
}, {
|
}, {
|
||||||
Name: "start_from",
|
Name: "start_from",
|
||||||
Advanced: true,
|
Advanced: true,
|
||||||
|
Hide: fs.OptionHideCommandLine,
|
||||||
Default: 1,
|
Default: 1,
|
||||||
Help: `Minimum valid chunk number. Usually 0 or 1.
|
Help: `Minimum valid chunk number. Usually 0 or 1.
|
||||||
By default chunk numbers start from 1.`,
|
By default chunk numbers start from 1.`,
|
||||||
}, {
|
}, {
|
||||||
Name: "meta_format",
|
Name: "meta_format",
|
||||||
Advanced: true,
|
Advanced: true,
|
||||||
|
Hide: fs.OptionHideCommandLine,
|
||||||
Default: "simplejson",
|
Default: "simplejson",
|
||||||
Help: `Format of the metadata object or "none". By default "simplejson".
|
Help: `Format of the metadata object or "none". By default "simplejson".
|
||||||
Metadata is a small JSON file named after the composite file.`,
|
Metadata is a small JSON file named after the composite file.`,
|
||||||
|
@ -725,6 +729,9 @@ func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirP
|
||||||
fs.Infof(f, "ignore non-data chunk %q", remote)
|
fs.Infof(f, "ignore non-data chunk %q", remote)
|
||||||
}
|
}
|
||||||
// need to read metadata to ensure actual object type
|
// need to read metadata to ensure actual object type
|
||||||
|
// no need to read if metaobject is too big or absent,
|
||||||
|
// use the fact that before calling validate()
|
||||||
|
// the `size` field caches metaobject size, if any
|
||||||
if f.useMeta && mainObject != nil && mainObject.size <= maxMetadataSize {
|
if f.useMeta && mainObject != nil && mainObject.size <= maxMetadataSize {
|
||||||
mainObject.unsure = true
|
mainObject.unsure = true
|
||||||
}
|
}
|
||||||
|
@ -802,9 +809,10 @@ func (f *Fs) scanObject(ctx context.Context, remote string, quickScan bool) (fs.
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
o *Object
|
o *Object
|
||||||
baseObj fs.Object
|
baseObj fs.Object
|
||||||
err error
|
err error
|
||||||
|
sameMain bool
|
||||||
)
|
)
|
||||||
|
|
||||||
if f.useMeta {
|
if f.useMeta {
|
||||||
|
@ -818,6 +826,7 @@ func (f *Fs) scanObject(ctx context.Context, remote string, quickScan bool) (fs.
|
||||||
// as a hard limit. Anything larger than that is treated as a
|
// as a hard limit. Anything larger than that is treated as a
|
||||||
// non-chunked file without even checking its contents, so it's
|
// non-chunked file without even checking its contents, so it's
|
||||||
// paramount to prevent metadata from exceeding the maximum size.
|
// paramount to prevent metadata from exceeding the maximum size.
|
||||||
|
// Anything smaller is additionally checked for format.
|
||||||
o = f.newObject("", baseObj, nil)
|
o = f.newObject("", baseObj, nil)
|
||||||
if o.size > maxMetadataSize {
|
if o.size > maxMetadataSize {
|
||||||
return o, nil
|
return o, nil
|
||||||
|
@ -847,18 +856,27 @@ func (f *Fs) scanObject(ctx context.Context, remote string, quickScan bool) (fs.
|
||||||
return nil, errors.Wrap(err, "can't detect composite file")
|
return nil, errors.Wrap(err, "can't detect composite file")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
caseInsensitive := f.features.CaseInsensitive
|
||||||
for _, dirOrObject := range entries {
|
for _, dirOrObject := range entries {
|
||||||
entry, ok := dirOrObject.(fs.Object)
|
entry, ok := dirOrObject.(fs.Object)
|
||||||
if !ok {
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
entryRemote := entry.Remote()
|
entryRemote := entry.Remote()
|
||||||
if !strings.Contains(entryRemote, remote) {
|
if !caseInsensitive && !strings.Contains(entryRemote, remote) {
|
||||||
continue // bypass regexp to save cpu
|
continue // bypass regexp to save cpu
|
||||||
}
|
}
|
||||||
mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(entryRemote)
|
mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(entryRemote)
|
||||||
if mainRemote == "" || mainRemote != remote {
|
if mainRemote == "" {
|
||||||
continue // skip non-conforming chunks
|
continue // skip non-chunks
|
||||||
|
}
|
||||||
|
if caseInsensitive {
|
||||||
|
sameMain = strings.EqualFold(mainRemote, remote)
|
||||||
|
} else {
|
||||||
|
sameMain = mainRemote == remote
|
||||||
|
}
|
||||||
|
if !sameMain {
|
||||||
|
continue // skip alien chunks
|
||||||
}
|
}
|
||||||
if ctrlType != "" || xactID != "" {
|
if ctrlType != "" || xactID != "" {
|
||||||
if f.useMeta {
|
if f.useMeta {
|
||||||
|
@ -906,11 +924,22 @@ func (f *Fs) scanObject(ctx context.Context, remote string, quickScan bool) (fs.
|
||||||
return o, nil
|
return o, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// readMetadata reads composite object metadata and caches results,
|
||||||
|
// in case of critical errors metadata is not cached.
|
||||||
|
// Returns ErrMetaUnknown if an unsupported metadata format is detected.
|
||||||
|
// If object is not chunked but marked by List or NewObject for recheck,
|
||||||
|
// readMetadata will attempt to parse object as composite with fallback
|
||||||
|
// to non-chunked representation if the attempt fails.
|
||||||
func (o *Object) readMetadata(ctx context.Context) error {
|
func (o *Object) readMetadata(ctx context.Context) error {
|
||||||
|
// return quickly if metadata is absent or has been already cached
|
||||||
|
if !o.f.useMeta {
|
||||||
|
o.isFull = true
|
||||||
|
}
|
||||||
if o.isFull {
|
if o.isFull {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if !o.f.useMeta || (!o.isComposite() && !o.unsure) {
|
if !o.isComposite() && !o.unsure {
|
||||||
|
// this for sure is a non-chunked standalone file
|
||||||
o.isFull = true
|
o.isFull = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -928,6 +957,7 @@ func (o *Object) readMetadata(ctx context.Context) error {
|
||||||
return ErrMetaTooBig
|
return ErrMetaTooBig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// size is within limits, perform consistency checks
|
||||||
reader, err := metaObject.Open(ctx)
|
reader, err := metaObject.Open(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -965,7 +995,7 @@ func (o *Object) readMetadata(ctx context.Context) error {
|
||||||
o.sha1 = metaInfo.sha1
|
o.sha1 = metaInfo.sha1
|
||||||
}
|
}
|
||||||
|
|
||||||
o.isFull = true
|
o.isFull = true // cache results
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -974,11 +1004,14 @@ func (f *Fs) put(
|
||||||
ctx context.Context, in io.Reader, src fs.ObjectInfo, remote string, options []fs.OpenOption,
|
ctx context.Context, in io.Reader, src fs.ObjectInfo, remote string, options []fs.OpenOption,
|
||||||
basePut putFn, action string, target fs.Object) (obj fs.Object, err error) {
|
basePut putFn, action string, target fs.Object) (obj fs.Object, err error) {
|
||||||
|
|
||||||
|
// Perform consistency checks
|
||||||
if err := f.forbidChunk(src, remote); err != nil {
|
if err := f.forbidChunk(src, remote); err != nil {
|
||||||
return nil, errors.Wrap(err, action+" refused")
|
return nil, errors.Wrap(err, action+" refused")
|
||||||
}
|
}
|
||||||
if target == nil {
|
if target == nil {
|
||||||
// Get target object with a quick directory scan
|
// Get target object with a quick directory scan
|
||||||
|
// skip metadata check if target object does not exist.
|
||||||
|
// ignore not-chunked objects, skip chunk size checks.
|
||||||
if obj, err := f.scanObject(ctx, remote, true); err == nil {
|
if obj, err := f.scanObject(ctx, remote, true); err == nil {
|
||||||
target = obj
|
target = obj
|
||||||
}
|
}
|
||||||
|
@ -991,6 +1024,7 @@ func (f *Fs) put(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Prepare to upload
|
||||||
c := f.newChunkingReader(src)
|
c := f.newChunkingReader(src)
|
||||||
wrapIn := c.wrapStream(ctx, in, src)
|
wrapIn := c.wrapStream(ctx, in, src)
|
||||||
|
|
||||||
|
@ -1593,6 +1627,8 @@ func (f *Fs) okForServerSide(ctx context.Context, src fs.Object, opName string)
|
||||||
diff = "chunk sizes"
|
diff = "chunk sizes"
|
||||||
case f.opt.NameFormat != obj.f.opt.NameFormat:
|
case f.opt.NameFormat != obj.f.opt.NameFormat:
|
||||||
diff = "chunk name formats"
|
diff = "chunk name formats"
|
||||||
|
case f.opt.StartFrom != obj.f.opt.StartFrom:
|
||||||
|
diff = "chunk numbering"
|
||||||
case f.opt.MetaFormat != obj.f.opt.MetaFormat:
|
case f.opt.MetaFormat != obj.f.opt.MetaFormat:
|
||||||
diff = "meta formats"
|
diff = "meta formats"
|
||||||
}
|
}
|
||||||
|
@ -1821,6 +1857,9 @@ func (o *Object) addChunk(chunk fs.Object, chunkNo int) error {
|
||||||
copy(newChunks, o.chunks)
|
copy(newChunks, o.chunks)
|
||||||
o.chunks = newChunks
|
o.chunks = newChunks
|
||||||
}
|
}
|
||||||
|
if o.chunks[chunkNo] != nil {
|
||||||
|
return fmt.Errorf("duplicate chunk number %d", chunkNo+o.f.opt.StartFrom)
|
||||||
|
}
|
||||||
o.chunks[chunkNo] = chunk
|
o.chunks[chunkNo] = chunk
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -2248,15 +2287,17 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s
|
||||||
SHA1: sha1,
|
SHA1: sha1,
|
||||||
}
|
}
|
||||||
data, err := json.Marshal(&metadata)
|
data, err := json.Marshal(&metadata)
|
||||||
if err == nil && data != nil && len(data) >= maxMetadataSize {
|
if err == nil && data != nil && len(data) >= maxMetadataSizeWritten {
|
||||||
// be a nitpicker, never produce something you can't consume
|
// be a nitpicker, never produce something you can't consume
|
||||||
return nil, errors.New("metadata can't be this big, please report to rclone developers")
|
return nil, errors.New("metadata can't be this big, please report to rclone developers")
|
||||||
}
|
}
|
||||||
return data, err
|
return data, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// unmarshalSimpleJSON
|
// unmarshalSimpleJSON parses metadata.
|
||||||
//
|
//
|
||||||
|
// In case of errors returns a flag telling whether input has been
|
||||||
|
// produced by incompatible version of rclone vs wasn't metadata at all.
|
||||||
// Only metadata format version 1 is supported atm.
|
// Only metadata format version 1 is supported atm.
|
||||||
// Future releases will transparently migrate older metadata objects.
|
// Future releases will transparently migrate older metadata objects.
|
||||||
// New format will have a higher version number and cannot be correctly
|
// New format will have a higher version number and cannot be correctly
|
||||||
|
@ -2266,7 +2307,7 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s
|
||||||
func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, madeByChunker bool, err error) {
|
func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, madeByChunker bool, err error) {
|
||||||
// Be strict about JSON format
|
// Be strict about JSON format
|
||||||
// to reduce possibility that a random small file resembles metadata.
|
// to reduce possibility that a random small file resembles metadata.
|
||||||
if data != nil && len(data) > maxMetadataSize {
|
if data != nil && len(data) > maxMetadataSizeWritten {
|
||||||
return nil, false, ErrMetaTooBig
|
return nil, false, ErrMetaTooBig
|
||||||
}
|
}
|
||||||
if data == nil || len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' {
|
if data == nil || len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' {
|
||||||
|
|
|
@ -13,6 +13,7 @@ import (
|
||||||
|
|
||||||
"github.com/rclone/rclone/fs"
|
"github.com/rclone/rclone/fs"
|
||||||
"github.com/rclone/rclone/fs/hash"
|
"github.com/rclone/rclone/fs/hash"
|
||||||
|
"github.com/rclone/rclone/fs/object"
|
||||||
"github.com/rclone/rclone/fs/operations"
|
"github.com/rclone/rclone/fs/operations"
|
||||||
"github.com/rclone/rclone/fstest"
|
"github.com/rclone/rclone/fstest"
|
||||||
"github.com/rclone/rclone/fstest/fstests"
|
"github.com/rclone/rclone/fstest/fstests"
|
||||||
|
@ -663,6 +664,80 @@ func testMetadataInput(t *testing.T, f *Fs) {
|
||||||
runSubtest(futureMeta, "future")
|
runSubtest(futureMeta, "future")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test that chunker refuses to change on objects with future/unknowm metadata
|
||||||
|
func testFutureProof(t *testing.T, f *Fs) {
|
||||||
|
if f.opt.MetaFormat == "none" {
|
||||||
|
t.Skip("this test requires metadata support")
|
||||||
|
}
|
||||||
|
|
||||||
|
saveOpt := f.opt
|
||||||
|
ctx := context.Background()
|
||||||
|
f.opt.FailHard = true
|
||||||
|
const dir = "future"
|
||||||
|
const file = dir + "/test"
|
||||||
|
defer func() {
|
||||||
|
f.opt.FailHard = false
|
||||||
|
_ = operations.Purge(ctx, f.base, dir)
|
||||||
|
f.opt = saveOpt
|
||||||
|
}()
|
||||||
|
|
||||||
|
modTime := fstest.Time("2001-02-03T04:05:06.499999999Z")
|
||||||
|
putPart := func(name string, part int, data, msg string) {
|
||||||
|
if part > 0 {
|
||||||
|
name = f.makeChunkName(name, part-1, "", "")
|
||||||
|
}
|
||||||
|
item := fstest.Item{Path: name, ModTime: modTime}
|
||||||
|
_, obj := fstests.PutTestContents(ctx, t, f.base, &item, data, true)
|
||||||
|
assert.NotNil(t, obj, msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// simulate chunked object from future
|
||||||
|
meta := `{"ver":999,"nchunks":3,"size":9,"garbage":"litter","sha1":"0707f2970043f9f7c22029482db27733deaec029"}`
|
||||||
|
putPart(file, 0, meta, "metaobject")
|
||||||
|
putPart(file, 1, "abc", "chunk1")
|
||||||
|
putPart(file, 2, "def", "chunk2")
|
||||||
|
putPart(file, 3, "ghi", "chunk3")
|
||||||
|
|
||||||
|
// List should succeed
|
||||||
|
ls, err := f.List(ctx, dir)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 1, len(ls))
|
||||||
|
assert.Equal(t, int64(9), ls[0].Size())
|
||||||
|
|
||||||
|
// NewObject should succeed
|
||||||
|
obj, err := f.NewObject(ctx, file)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, file, obj.Remote())
|
||||||
|
assert.Equal(t, int64(9), obj.Size())
|
||||||
|
|
||||||
|
// Hash must fail
|
||||||
|
_, err = obj.Hash(ctx, hash.SHA1)
|
||||||
|
assert.Equal(t, ErrMetaUnknown, err)
|
||||||
|
|
||||||
|
// Move must fail
|
||||||
|
mobj, err := operations.Move(ctx, f, nil, file+"2", obj)
|
||||||
|
assert.Nil(t, mobj)
|
||||||
|
assert.Error(t, err)
|
||||||
|
if err != nil {
|
||||||
|
assert.Contains(t, err.Error(), "please upgrade rclone")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put must fail
|
||||||
|
oi := object.NewStaticObjectInfo(file, modTime, 3, true, nil, nil)
|
||||||
|
buf := bytes.NewBufferString("abc")
|
||||||
|
_, err = f.Put(ctx, buf, oi)
|
||||||
|
assert.Error(t, err)
|
||||||
|
|
||||||
|
// Rcat must fail
|
||||||
|
in := ioutil.NopCloser(bytes.NewBufferString("abc"))
|
||||||
|
robj, err := operations.Rcat(ctx, f, file, in, modTime)
|
||||||
|
assert.Nil(t, robj)
|
||||||
|
assert.NotNil(t, err)
|
||||||
|
if err != nil {
|
||||||
|
assert.Contains(t, err.Error(), "please upgrade rclone")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// InternalTest dispatches all internal tests
|
// InternalTest dispatches all internal tests
|
||||||
func (f *Fs) InternalTest(t *testing.T) {
|
func (f *Fs) InternalTest(t *testing.T) {
|
||||||
t.Run("PutLarge", func(t *testing.T) {
|
t.Run("PutLarge", func(t *testing.T) {
|
||||||
|
@ -686,6 +761,9 @@ func (f *Fs) InternalTest(t *testing.T) {
|
||||||
t.Run("MetadataInput", func(t *testing.T) {
|
t.Run("MetadataInput", func(t *testing.T) {
|
||||||
testMetadataInput(t, f)
|
testMetadataInput(t, f)
|
||||||
})
|
})
|
||||||
|
t.Run("FutureProof", func(t *testing.T) {
|
||||||
|
testFutureProof(t, f)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ fstests.InternalTester = (*Fs)(nil)
|
var _ fstests.InternalTester = (*Fs)(nil)
|
||||||
|
|
|
@ -299,6 +299,9 @@ If wrapped remote is case insensitive, the chunker overlay will inherit
|
||||||
that property (so you can't have a file called "Hello.doc" and "hello.doc"
|
that property (so you can't have a file called "Hello.doc" and "hello.doc"
|
||||||
in the same directory).
|
in the same directory).
|
||||||
|
|
||||||
|
Chunker included in rclone releases up to `v1.54` can sometimes fail to
|
||||||
|
detect metadata produced by recent versions of rclone. We recommend users
|
||||||
|
to keep rclone up-to-date to avoid data corruption.
|
||||||
|
|
||||||
{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/chunker/chunker.go then run make backenddocs" >}}
|
{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/chunker/chunker.go then run make backenddocs" >}}
|
||||||
### Standard Options
|
### Standard Options
|
||||||
|
|
Loading…
Reference in a new issue