chunker: prevent chunk corruption, survive meta-like input

This commit is contained in:
Ivan Andreev 2019-10-09 12:21:45 +03:00 committed by Nick Craig-Wood
parent 7aa2b4191c
commit 9049bb62ca
4 changed files with 506 additions and 79 deletions

View file

@ -98,6 +98,14 @@ const optimizeFirstChunk = false
// revealHidden is a stub until chunker lands the `reveal hidden` option. // revealHidden is a stub until chunker lands the `reveal hidden` option.
const revealHidden = false const revealHidden = false
// Prevent memory overflow due to specially crafted chunk name
const maxSafeChunkNumber = 10000000
// standard chunker errors
var (
ErrChunkOverflow = errors.New("chunk number overflow")
)
// Note: metadata logic is tightly coupled with chunker code in many // Note: metadata logic is tightly coupled with chunker code in many
// places, eg. in checks whether a file should have meta object or is // places, eg. in checks whether a file should have meta object or is
// eligible for chunking. // eligible for chunking.
@ -176,18 +184,17 @@ falling back to SHA1 if unsupported. Requires "simplejson".`,
Help: `Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".`, Help: `Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".`,
}}, }},
}, { }, {
Name: "fail_on_bad_chunks", Name: "fail_hard",
Advanced: true, Advanced: true,
Default: false, Default: false,
Help: `The list command might encounter files with missinng or invalid chunks. Help: `Choose how chunker should handle files with missing or invalid chunks.`,
This boolean flag tells what rclone should do in such cases.`,
Examples: []fs.OptionExample{ Examples: []fs.OptionExample{
{ {
Value: "true", Value: "true",
Help: "Fail with error.", Help: "Report errors and abort current command.",
}, { }, {
Value: "false", Value: "false",
Help: "Silently ignore invalid object.", Help: "Warn user, skip incomplete file and proceed.",
}, },
}, },
}}, }},
@ -231,6 +238,7 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
root: rpath, root: rpath,
opt: *opt, opt: *opt,
} }
f.dirSort = true // processEntries requires that meta Objects prerun data chunks atm.
switch opt.MetaFormat { switch opt.MetaFormat {
case "none": case "none":
@ -298,13 +306,13 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
// Options defines the configuration for this backend // Options defines the configuration for this backend
type Options struct { type Options struct {
Remote string `config:"remote"` Remote string `config:"remote"`
ChunkSize fs.SizeSuffix `config:"chunk_size"` ChunkSize fs.SizeSuffix `config:"chunk_size"`
NameFormat string `config:"name_format"` NameFormat string `config:"name_format"`
StartFrom int `config:"start_from"` StartFrom int `config:"start_from"`
MetaFormat string `config:"meta_format"` MetaFormat string `config:"meta_format"`
HashType string `config:"hash_type"` HashType string `config:"hash_type"`
FailOnBadChunks bool `config:"fail_on_bad_chunks"` FailHard bool `config:"fail_hard"`
} }
// Fs represents a wrapped fs.Fs // Fs represents a wrapped fs.Fs
@ -322,6 +330,7 @@ type Fs struct {
nameRegexp *regexp.Regexp // regular expression to match chunk names nameRegexp *regexp.Regexp // regular expression to match chunk names
opt Options // copy of Options opt Options // copy of Options
features *fs.Features // optional features features *fs.Features // optional features
dirSort bool // reserved for future, ignored
} }
// setChunkNameFormat converts pattern based chunk name format // setChunkNameFormat converts pattern based chunk name format
@ -454,6 +463,20 @@ func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrl
return return
} }
// forbidChunk prints error message or raises error if file is chunk.
// First argument sets log prefix, use `false` to suppress message.
func (f *Fs) forbidChunk(o interface{}, filePath string) error {
if mainPath, _, _, _ := f.parseChunkName(filePath); mainPath != "" {
if f.opt.FailHard {
return fmt.Errorf("chunk overlap with %q", mainPath)
}
if boolVal, isBool := o.(bool); !isBool || boolVal {
fs.Errorf(o, "chunk overlap with %q", mainPath)
}
}
return nil
}
// List the objects and directories in dir into entries. // List the objects and directories in dir into entries.
// The entries can be returned in any order but should be // The entries can be returned in any order but should be
// for a complete directory. // for a complete directory.
@ -480,7 +503,7 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
if err != nil { if err != nil {
return nil, err return nil, err
} }
return f.chunkEntries(ctx, entries, f.opt.FailOnBadChunks) return f.processEntries(ctx, entries, dir)
} }
// ListR lists the objects and directories of the Fs starting // ListR lists the objects and directories of the Fs starting
@ -498,11 +521,11 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
// immediately. // immediately.
// //
// Don't implement this unless you have a more efficient way // Don't implement this unless you have a more efficient way
// of listing recursively that doing a directory traversal. // of listing recursively than doing a directory traversal.
func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) { func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) {
do := f.base.Features().ListR do := f.base.Features().ListR
return do(ctx, dir, func(entries fs.DirEntries) error { return do(ctx, dir, func(entries fs.DirEntries) error {
newEntries, err := f.chunkEntries(ctx, entries, f.opt.FailOnBadChunks) newEntries, err := f.processEntries(ctx, entries, dir)
if err != nil { if err != nil {
return err return err
} }
@ -510,13 +533,15 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (
}) })
} }
// chunkEntries is called by List(R). It assembles chunk entries from // processEntries assembles chunk entries into composite entries
// wrapped remote into composite directory entries. func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirPath string) (newEntries fs.DirEntries, err error) {
func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardErrors bool) (chunkedEntries fs.DirEntries, err error) {
// sort entries, so that meta objects (if any) appear before their chunks // sort entries, so that meta objects (if any) appear before their chunks
sortedEntries := make(fs.DirEntries, len(origEntries)) sortedEntries := origEntries
copy(sortedEntries, origEntries) if f.dirSort {
sort.Sort(sortedEntries) sortedEntries := make(fs.DirEntries, len(origEntries))
copy(sortedEntries, origEntries)
sort.Sort(sortedEntries)
}
byRemote := make(map[string]*Object) byRemote := make(map[string]*Object)
badEntry := make(map[string]bool) badEntry := make(map[string]bool)
@ -554,7 +579,7 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
} }
} }
if err := mainObject.addChunk(entry, chunkNo); err != nil { if err := mainObject.addChunk(entry, chunkNo); err != nil {
if hardErrors { if f.opt.FailHard {
return nil, err return nil, err
} }
badEntry[mainRemote] = true badEntry[mainRemote] = true
@ -570,7 +595,7 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
wrapDir.SetRemote(entry.Remote()) wrapDir.SetRemote(entry.Remote())
tempEntries = append(tempEntries, wrapDir) tempEntries = append(tempEntries, wrapDir)
default: default:
if hardErrors { if f.opt.FailHard {
return nil, fmt.Errorf("Unknown object type %T", entry) return nil, fmt.Errorf("Unknown object type %T", entry)
} }
fs.Debugf(f, "unknown object type %T", entry) fs.Debugf(f, "unknown object type %T", entry)
@ -581,7 +606,7 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
if object, ok := entry.(*Object); ok { if object, ok := entry.(*Object); ok {
remote := object.Remote() remote := object.Remote()
if isSubdir[remote] { if isSubdir[remote] {
if hardErrors { if f.opt.FailHard {
return nil, fmt.Errorf("%q is both meta object and directory", remote) return nil, fmt.Errorf("%q is both meta object and directory", remote)
} }
badEntry[remote] = true // fall thru badEntry[remote] = true // fall thru
@ -591,17 +616,20 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
continue continue
} }
if err := object.validate(); err != nil { if err := object.validate(); err != nil {
if hardErrors { if f.opt.FailHard {
return nil, err return nil, err
} }
fs.Debugf(f, "invalid chunks in object %q", remote) fs.Debugf(f, "invalid chunks in object %q", remote)
continue continue
} }
} }
chunkedEntries = append(chunkedEntries, entry) newEntries = append(newEntries, entry)
} }
return chunkedEntries, nil if f.dirSort {
sort.Sort(newEntries)
}
return newEntries, nil
} }
// NewObject finds the Object at remote. // NewObject finds the Object at remote.
@ -615,8 +643,8 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
// but opening even a small file can be slow on some backends. // but opening even a small file can be slow on some backends.
// //
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
if mainRemote, _, _, _ := f.parseChunkName(remote); mainRemote != "" { if err := f.forbidChunk(false, remote); err != nil {
return nil, fmt.Errorf("%q should be meta object, not a chunk", remote) return nil, errors.Wrap(err, "can't access")
} }
var ( var (
@ -734,12 +762,12 @@ func (o *Object) readMetadata(ctx context.Context) error {
if err != nil { if err != nil {
return err return err
} }
_ = reader.Close() // ensure file handle is freed on windows
switch o.f.opt.MetaFormat { switch o.f.opt.MetaFormat {
case "simplejson": case "simplejson":
metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metadata) metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metadata, true)
if err != nil { if err != nil {
// TODO: in a rare case we might mistake a small file for metadata
return errors.Wrap(err, "invalid metadata") return errors.Wrap(err, "invalid metadata")
} }
if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks { if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks {
@ -775,8 +803,12 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
baseRemote := remote baseRemote := remote
// Transfer chunks data // Transfer chunks data
for chunkNo := 0; !c.done; chunkNo++ { for c.chunkNo = 0; !c.done; c.chunkNo++ {
tempRemote := f.makeChunkName(baseRemote, chunkNo, "", xactNo) if c.chunkNo > maxSafeChunkNumber {
return nil, ErrChunkOverflow
}
tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactNo)
size := c.sizeLeft size := c.sizeLeft
if size > c.chunkSize { if size > c.chunkSize {
size = c.chunkSize size = c.chunkSize
@ -785,7 +817,7 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
// If a single chunk is expected, avoid the extra rename operation // If a single chunk is expected, avoid the extra rename operation
chunkRemote := tempRemote chunkRemote := tempRemote
if c.expectSingle && chunkNo == 0 && optimizeFirstChunk { if c.expectSingle && c.chunkNo == 0 && optimizeFirstChunk {
chunkRemote = baseRemote chunkRemote = baseRemote
} }
info := f.wrapInfo(src, chunkRemote, size) info := f.wrapInfo(src, chunkRemote, size)
@ -836,8 +868,17 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
return nil, fmt.Errorf("Incorrect upload size %d != %d", c.readCount, c.sizeTotal) return nil, fmt.Errorf("Incorrect upload size %d != %d", c.readCount, c.sizeTotal)
} }
// Finalize the non-chunked object // Check for input that looks like valid metadata
if len(c.chunks) == 1 { needMeta := len(c.chunks) > 1
if c.readCount <= maxMetadataSize && len(c.chunks) == 1 {
_, err := unmarshalSimpleJSON(ctx, c.chunks[0], c.smallHead, false)
needMeta = err == nil
}
// Finalize small object as non-chunked.
// This can be bypassed, and single chunk with metadata will be
// created due to unsafe input.
if !needMeta && f.useMeta {
// If previous object was chunked, remove its chunks // If previous object was chunked, remove its chunks
f.removeOldChunks(ctx, baseRemote) f.removeOldChunks(ctx, baseRemote)
@ -918,10 +959,12 @@ type chunkingReader struct {
readCount int64 readCount int64
chunkSize int64 chunkSize int64
chunkLimit int64 chunkLimit int64
chunkNo int
err error err error
done bool done bool
chunks []fs.Object chunks []fs.Object
expectSingle bool expectSingle bool
smallHead []byte
fs *Fs fs *Fs
hasher gohash.Hash hasher gohash.Hash
md5 string md5 string
@ -1001,6 +1044,9 @@ func (c *chunkingReader) Read(buf []byte) (bytesRead int, err error) {
return return
} }
c.accountBytes(int64(bytesRead)) c.accountBytes(int64(bytesRead))
if c.chunkNo == 0 && c.expectSingle && bytesRead > 0 && c.readCount <= maxMetadataSize {
c.smallHead = append(c.smallHead, buf[:bytesRead]...)
}
if bytesRead == 0 && c.sizeLeft == 0 { if bytesRead == 0 && c.sizeLeft == 0 {
err = io.EOF // Force EOF when no data left. err = io.EOF // Force EOF when no data left.
} }
@ -1048,16 +1094,25 @@ func (f *Fs) removeOldChunks(ctx context.Context, remote string) {
// will return the object and the error, otherwise will return // will return the object and the error, otherwise will return
// nil and the error // nil and the error
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
if err := f.forbidChunk(src, src.Remote()); err != nil {
return nil, errors.Wrap(err, "refusing to put")
}
return f.put(ctx, in, src, src.Remote(), options, f.base.Put) return f.put(ctx, in, src, src.Remote(), options, f.base.Put)
} }
// PutStream uploads to the remote path with the modTime given of indeterminate size // PutStream uploads to the remote path with the modTime given of indeterminate size
func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
if err := f.forbidChunk(src, src.Remote()); err != nil {
return nil, errors.Wrap(err, "refusing to upload")
}
return f.put(ctx, in, src, src.Remote(), options, f.base.Features().PutStream) return f.put(ctx, in, src, src.Remote(), options, f.base.Features().PutStream)
} }
// Update in to the object with the modTime given of the given size // Update in to the object with the modTime given of the given size
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
if err := o.f.forbidChunk(o, o.Remote()); err != nil {
return errors.Wrap(err, "update refused")
}
if err := o.readMetadata(ctx); err != nil { if err := o.readMetadata(ctx); err != nil {
// refuse to update a file of unsupported format // refuse to update a file of unsupported format
return errors.Wrap(err, "refusing to update") return errors.Wrap(err, "refusing to update")
@ -1080,13 +1135,12 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
// //
// This will create a duplicate if we upload a new file without // This will create a duplicate if we upload a new file without
// checking to see if there is one already - use Put() for that. // checking to see if there is one already - use Put() for that.
// TODO: really split stream here
func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
do := f.base.Features().PutUnchecked do := f.base.Features().PutUnchecked
if do == nil { if do == nil {
return nil, errors.New("can't PutUnchecked") return nil, errors.New("can't PutUnchecked")
} }
// TODO: handle options and chunking! // TODO: handle range/limit options and really chunk stream here!
o, err := do(ctx, in, f.wrapInfo(src, "", -1)) o, err := do(ctx, in, f.wrapInfo(src, "", -1))
if err != nil { if err != nil {
return nil, err return nil, err
@ -1117,6 +1171,9 @@ func (f *Fs) Hashes() hash.Set {
// //
// Shouldn't return an error if it already exists // Shouldn't return an error if it already exists
func (f *Fs) Mkdir(ctx context.Context, dir string) error { func (f *Fs) Mkdir(ctx context.Context, dir string) error {
if err := f.forbidChunk(dir, dir); err != nil {
return errors.Wrap(err, "can't mkdir")
}
return f.base.Mkdir(ctx, dir) return f.base.Mkdir(ctx, dir)
} }
@ -1181,6 +1238,11 @@ func (f *Fs) Purge(ctx context.Context) error {
// the `delete hidden` flag above or at least the user has been warned. // the `delete hidden` flag above or at least the user has been warned.
// //
func (o *Object) Remove(ctx context.Context) (err error) { func (o *Object) Remove(ctx context.Context) (err error) {
if err := o.f.forbidChunk(o, o.Remote()); err != nil {
// operations.Move can still call Remove if chunker's Move refuses
// to corrupt file in hard mode. Hence, refuse to Remove, too.
return errors.Wrap(err, "refuse to corrupt")
}
if err := o.readMetadata(ctx); err != nil { if err := o.readMetadata(ctx); err != nil {
// Proceed but warn user that unexpected things can happen. // Proceed but warn user that unexpected things can happen.
fs.Errorf(o, "Removing a file with unsupported metadata: %v", err) fs.Errorf(o, "Removing a file with unsupported metadata: %v", err)
@ -1206,6 +1268,9 @@ func (o *Object) Remove(ctx context.Context) (err error) {
// copyOrMove implements copy or move // copyOrMove implements copy or move
func (f *Fs) copyOrMove(ctx context.Context, o *Object, remote string, do copyMoveFn, md5, sha1, opName string) (fs.Object, error) { func (f *Fs) copyOrMove(ctx context.Context, o *Object, remote string, do copyMoveFn, md5, sha1, opName string) (fs.Object, error) {
if err := f.forbidChunk(o, remote); err != nil {
return nil, errors.Wrapf(err, "can't %s", opName)
}
if !o.isComposite() { if !o.isComposite() {
fs.Debugf(o, "%s non-chunked object...", opName) fs.Debugf(o, "%s non-chunked object...", opName)
oResult, err := do(ctx, o.mainChunk(), remote) // chain operation to a single wrapped chunk oResult, err := do(ctx, o.mainChunk(), remote) // chain operation to a single wrapped chunk
@ -1493,6 +1558,9 @@ func (o *Object) addChunk(chunk fs.Object, chunkNo int) error {
o.chunks = append(o.chunks, chunk) o.chunks = append(o.chunks, chunk)
return nil return nil
} }
if chunkNo > maxSafeChunkNumber {
return ErrChunkOverflow
}
if chunkNo > len(o.chunks) { if chunkNo > len(o.chunks) {
newChunks := make([]fs.Object, (chunkNo + 1), (chunkNo+1)*2) newChunks := make([]fs.Object, (chunkNo + 1), (chunkNo+1)*2)
copy(newChunks, o.chunks) copy(newChunks, o.chunks)
@ -1897,20 +1965,31 @@ func (o *Object) ID() string {
// Meta format `simplejson` // Meta format `simplejson`
type metaSimpleJSON struct { type metaSimpleJSON struct {
Version int `json:"ver"` // required core fields
Size int64 `json:"size"` // total size of data chunks Version *int `json:"ver"`
NChunks int `json:"nchunks"` // number of data chunks Size *int64 `json:"size"` // total size of data chunks
MD5 string `json:"md5"` ChunkNum *int `json:"nchunks"` // number of data chunks
SHA1 string `json:"sha1"` // optional extra fields
MD5 string `json:"md5,omitempty"`
SHA1 string `json:"sha1,omitempty"`
} }
// marshalSimpleJSON
//
// Current implementation creates metadata in two cases:
// - for files larger than chunk size
// - if file contents can be mistaken as meta object
//
func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) ([]byte, error) { func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) ([]byte, error) {
version := metadataVersion
metadata := metaSimpleJSON{ metadata := metaSimpleJSON{
Version: metadataVersion, // required core fields
Size: size, Version: &version,
NChunks: nChunks, Size: &size,
MD5: md5, ChunkNum: &nChunks,
SHA1: sha1, // optional extra fields
MD5: md5,
SHA1: sha1,
} }
data, err := json.Marshal(&metadata) data, err := json.Marshal(&metadata)
if err == nil && data != nil && len(data) >= maxMetadataSize { if err == nil && data != nil && len(data) >= maxMetadataSize {
@ -1920,6 +1999,7 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s
return data, err return data, err
} }
// unmarshalSimpleJSON
// Note: only metadata format version 1 is supported atm. // Note: only metadata format version 1 is supported atm.
// //
// Current implementation creates metadata only for files larger than // Current implementation creates metadata only for files larger than
@ -1931,22 +2011,37 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s
// handled by current implementation. // handled by current implementation.
// The version check below will then explicitly ask user to upgrade rclone. // The version check below will then explicitly ask user to upgrade rclone.
// //
func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, err error) { func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte, strictChecks bool) (info *ObjectInfo, err error) {
if len(data) > maxMetadataSize { // Be strict about JSON format
// to reduce possibility that a random small file resembles metadata.
if data != nil && len(data) > maxMetadataSize {
return nil, errors.New("too big") return nil, errors.New("too big")
} }
if data == nil || len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' {
return nil, errors.New("invalid json")
}
var metadata metaSimpleJSON var metadata metaSimpleJSON
err = json.Unmarshal(data, &metadata) err = json.Unmarshal(data, &metadata)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// Basic fields are strictly required
// to reduce possibility that a random small file resembles metadata.
if metadata.Version == nil || metadata.Size == nil || metadata.ChunkNum == nil {
return nil, errors.New("missing required field")
}
// Perform strict checks, avoid corruption of future metadata formats. // Perform strict checks, avoid corruption of future metadata formats.
if metadata.Size < 0 { if *metadata.Version < 1 {
return nil, errors.New("wrong version")
}
if *metadata.Size < 0 {
return nil, errors.New("negative file size") return nil, errors.New("negative file size")
} }
if metadata.NChunks <= 0 { if *metadata.ChunkNum < 0 {
return nil, errors.New("wrong number of chunks") return nil, errors.New("negative number of chunks")
}
if *metadata.ChunkNum > maxSafeChunkNumber {
return nil, ErrChunkOverflow
} }
if metadata.MD5 != "" { if metadata.MD5 != "" {
_, err = hex.DecodeString(metadata.MD5) _, err = hex.DecodeString(metadata.MD5)
@ -1960,18 +2055,20 @@ func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte)
return nil, errors.New("wrong sha1 hash") return nil, errors.New("wrong sha1 hash")
} }
} }
if metadata.Version <= 0 { // ChunkNum is allowed to be 0 in future versions
return nil, errors.New("wrong version number") if *metadata.ChunkNum < 1 && *metadata.Version <= metadataVersion {
return nil, errors.New("wrong number of chunks")
} }
if metadata.Version != metadataVersion { // Non-strict mode also accepts future metadata versions
return nil, errors.Errorf("version %d is not supported, please upgrade rclone", metadata.Version) if *metadata.Version > metadataVersion && strictChecks {
return nil, fmt.Errorf("version %d is not supported, please upgrade rclone", metadata.Version)
} }
var nilFs *Fs // nil object triggers appropriate type method var nilFs *Fs // nil object triggers appropriate type method
info = nilFs.wrapInfo(metaObject, "", metadata.Size) info = nilFs.wrapInfo(metaObject, "", *metadata.Size)
info.nChunks = *metadata.ChunkNum
info.md5 = metadata.MD5 info.md5 = metadata.MD5
info.sha1 = metadata.SHA1 info.sha1 = metadata.SHA1
info.nChunks = metadata.NChunks
return info, nil return info, nil
} }

View file

@ -1,15 +1,23 @@
package chunker package chunker
import ( import (
"bytes"
"context" "context"
"flag" "flag"
"fmt" "fmt"
"io/ioutil"
"path"
"regexp"
"strings"
"testing" "testing"
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/operations"
"github.com/rclone/rclone/fstest" "github.com/rclone/rclone/fstest"
"github.com/rclone/rclone/fstest/fstests" "github.com/rclone/rclone/fstest/fstests"
"github.com/rclone/rclone/lib/random"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
// Command line flags // Command line flags
@ -240,6 +248,307 @@ func testChunkNameFormat(t *testing.T, f *Fs) {
assertMakeNamePanics("fish", -2, "bind.", 0) assertMakeNamePanics("fish", -2, "bind.", 0)
} }
func testSmallFileInternals(t *testing.T, f *Fs) {
const dir = "small"
ctx := context.Background()
saveOpt := f.opt
defer func() {
f.opt.FailHard = false
_ = operations.Purge(ctx, f.base, dir)
f.opt = saveOpt
}()
f.opt.FailHard = false
modTime := fstest.Time("2001-02-03T04:05:06.499999999Z")
checkSmallFileInternals := func(obj fs.Object) {
assert.NotNil(t, obj)
o, ok := obj.(*Object)
assert.True(t, ok)
assert.NotNil(t, o)
if o == nil {
return
}
switch {
case !f.useMeta:
// If meta format is "none", non-chunked file (even empty)
// internally is a single chunk without meta object.
assert.Nil(t, o.main)
assert.True(t, o.isComposite()) // sorry, sometimes a name is misleading
assert.Equal(t, 1, len(o.chunks))
default:
// normally non-chunked file is kept in the Object's main field
assert.NotNil(t, o.main)
assert.False(t, o.isComposite())
assert.Equal(t, 0, len(o.chunks))
}
}
checkContents := func(obj fs.Object, contents string) {
assert.NotNil(t, obj)
assert.Equal(t, int64(len(contents)), obj.Size())
r, err := obj.Open(ctx)
assert.NoError(t, err)
assert.NotNil(t, r)
if r == nil {
return
}
data, err := ioutil.ReadAll(r)
assert.NoError(t, err)
assert.Equal(t, contents, string(data))
_ = r.Close()
}
checkSmallFile := func(name, contents string) {
filename := path.Join(dir, name)
item := fstest.Item{Path: filename, ModTime: modTime}
_, put := fstests.PutTestContents(ctx, t, f, &item, contents, false)
assert.NotNil(t, put)
checkSmallFileInternals(put)
checkContents(put, contents)
// objects returned by Put and NewObject must have similar structure
obj, err := f.NewObject(ctx, filename)
assert.NoError(t, err)
assert.NotNil(t, obj)
checkSmallFileInternals(obj)
checkContents(obj, contents)
_ = obj.Remove(ctx)
_ = put.Remove(ctx) // for good
}
checkSmallFile("emptyfile", "")
checkSmallFile("smallfile", "Ok")
}
func testPreventCorruption(t *testing.T, f *Fs) {
if f.opt.ChunkSize > 50 {
t.Skip("this test requires small chunks")
}
const dir = "corrupted"
ctx := context.Background()
saveOpt := f.opt
defer func() {
f.opt.FailHard = false
_ = operations.Purge(ctx, f.base, dir)
f.opt = saveOpt
}()
f.opt.FailHard = true
contents := random.String(250)
modTime := fstest.Time("2001-02-03T04:05:06.499999999Z")
const overlapMessage = "chunk overlap"
assertOverlapError := func(err error) {
assert.Error(t, err)
if err != nil {
assert.Contains(t, err.Error(), overlapMessage)
}
}
newFile := func(name string) fs.Object {
item := fstest.Item{Path: path.Join(dir, name), ModTime: modTime}
_, obj := fstests.PutTestContents(ctx, t, f, &item, contents, true)
require.NotNil(t, obj)
return obj
}
billyObj := newFile("billy")
billyChunkName := func(chunkNo int) string {
return f.makeChunkName(billyObj.Remote(), chunkNo, "", -1)
}
err := f.Mkdir(ctx, billyChunkName(1))
assertOverlapError(err)
_, err = f.Move(ctx, newFile("silly1"), billyChunkName(2))
assert.Error(t, err)
assert.True(t, err == fs.ErrorCantMove || (err != nil && strings.Contains(err.Error(), overlapMessage)))
_, err = f.Copy(ctx, newFile("silly2"), billyChunkName(3))
assert.Error(t, err)
assert.True(t, err == fs.ErrorCantCopy || (err != nil && strings.Contains(err.Error(), overlapMessage)))
// accessing chunks in strict mode is prohibited
f.opt.FailHard = true
billyChunk4Name := billyChunkName(4)
billyChunk4, err := f.NewObject(ctx, billyChunk4Name)
assertOverlapError(err)
f.opt.FailHard = false
billyChunk4, err = f.NewObject(ctx, billyChunk4Name)
assert.NoError(t, err)
require.NotNil(t, billyChunk4)
f.opt.FailHard = true
_, err = f.Put(ctx, bytes.NewBufferString(contents), billyChunk4)
assertOverlapError(err)
// you can freely read chunks (if you have an object)
r, err := billyChunk4.Open(ctx)
assert.NoError(t, err)
var chunkContents []byte
assert.NotPanics(t, func() {
chunkContents, err = ioutil.ReadAll(r)
})
assert.NoError(t, err)
assert.NotEqual(t, contents, string(chunkContents))
// but you can't change them
err = billyChunk4.Update(ctx, bytes.NewBufferString(contents), newFile("silly3"))
assertOverlapError(err)
// Remove isn't special, you can't corrupt files even if you have an object
err = billyChunk4.Remove(ctx)
assertOverlapError(err)
// recreate billy in case it was anyhow corrupted
willyObj := newFile("willy")
willyChunkName := f.makeChunkName(willyObj.Remote(), 1, "", -1)
f.opt.FailHard = false
willyChunk, err := f.NewObject(ctx, willyChunkName)
f.opt.FailHard = true
assert.NoError(t, err)
require.NotNil(t, willyChunk)
_, err = operations.Copy(ctx, f, willyChunk, willyChunkName, newFile("silly4"))
assertOverlapError(err)
// operations.Move will return error when chunker's Move refused
// to corrupt target file, but reverts to copy/delete method
// still trying to delete target chunk. Chunker must come to rescue.
_, err = operations.Move(ctx, f, willyChunk, willyChunkName, newFile("silly5"))
assertOverlapError(err)
r, err = willyChunk.Open(ctx)
assert.NoError(t, err)
assert.NotPanics(t, func() {
_, err = ioutil.ReadAll(r)
})
assert.NoError(t, err)
}
func testChunkNumberOverflow(t *testing.T, f *Fs) {
if f.opt.ChunkSize > 50 {
t.Skip("this test requires small chunks")
}
const dir = "wreaked"
const wreakNumber = 10200300
ctx := context.Background()
saveOpt := f.opt
defer func() {
f.opt.FailHard = false
_ = operations.Purge(ctx, f.base, dir)
f.opt = saveOpt
}()
modTime := fstest.Time("2001-02-03T04:05:06.499999999Z")
contents := random.String(100)
newFile := func(f fs.Fs, name string) (fs.Object, string) {
filename := path.Join(dir, name)
item := fstest.Item{Path: filename, ModTime: modTime}
_, obj := fstests.PutTestContents(ctx, t, f, &item, contents, true)
require.NotNil(t, obj)
return obj, filename
}
f.opt.FailHard = false
file, fileName := newFile(f, "wreaker")
wreak, _ := newFile(f.base, f.makeChunkName("wreaker", wreakNumber, "", -1))
f.opt.FailHard = false
fstest.CheckListingWithRoot(t, f, dir, nil, nil, f.Precision())
_, err := f.NewObject(ctx, fileName)
assert.Error(t, err)
f.opt.FailHard = true
_, err = f.List(ctx, dir)
assert.Error(t, err)
_, err = f.NewObject(ctx, fileName)
assert.Error(t, err)
f.opt.FailHard = false
_ = wreak.Remove(ctx)
_ = file.Remove(ctx)
}
func testMetadataInput(t *testing.T, f *Fs) {
const minChunkForTest = 50
if f.opt.ChunkSize < minChunkForTest {
t.Skip("this test requires chunks that fit metadata")
}
const dir = "usermeta"
ctx := context.Background()
saveOpt := f.opt
defer func() {
f.opt.FailHard = false
_ = operations.Purge(ctx, f.base, dir)
f.opt = saveOpt
}()
f.opt.FailHard = false
modTime := fstest.Time("2001-02-03T04:05:06.499999999Z")
putFile := func(f fs.Fs, name, contents, message string, check bool) fs.Object {
item := fstest.Item{Path: name, ModTime: modTime}
_, obj := fstests.PutTestContents(ctx, t, f, &item, contents, check)
assert.NotNil(t, obj, message)
return obj
}
runSubtest := func(contents, name string) {
description := fmt.Sprintf("file with %s metadata", name)
filename := path.Join(dir, name)
require.True(t, len(contents) > 2 && len(contents) < minChunkForTest, description+" test data is correct")
part := putFile(f.base, f.makeChunkName(filename, 0, "", -1), "oops", "", true)
_ = putFile(f, filename, contents, "upload "+description, false)
obj, err := f.NewObject(ctx, filename)
assert.NoError(t, err, "access "+description)
assert.NotNil(t, obj)
assert.Equal(t, int64(len(contents)), obj.Size(), "size "+description)
o, ok := obj.(*Object)
assert.NotNil(t, ok)
if o != nil {
assert.True(t, o.isComposite() && len(o.chunks) == 1, description+" is forced composite")
o = nil
}
defer func() {
_ = obj.Remove(ctx)
_ = part.Remove(ctx)
}()
r, err := obj.Open(ctx)
assert.NoError(t, err, "open "+description)
assert.NotNil(t, r, "open stream of "+description)
if err == nil && r != nil {
data, err := ioutil.ReadAll(r)
assert.NoError(t, err, "read all of "+description)
assert.Equal(t, contents, string(data), description+" contents is ok")
_ = r.Close()
}
}
metaData, err := marshalSimpleJSON(ctx, 3, 1, "", "")
require.NoError(t, err)
todaysMeta := string(metaData)
runSubtest(todaysMeta, "today")
pastMeta := regexp.MustCompile(`"ver":[0-9]+`).ReplaceAllLiteralString(todaysMeta, `"ver":1`)
pastMeta = regexp.MustCompile(`"size":[0-9]+`).ReplaceAllLiteralString(pastMeta, `"size":0`)
runSubtest(pastMeta, "past")
futureMeta := regexp.MustCompile(`"ver":[0-9]+`).ReplaceAllLiteralString(todaysMeta, `"ver":999`)
futureMeta = regexp.MustCompile(`"nchunks":[0-9]+`).ReplaceAllLiteralString(futureMeta, `"nchunks":0,"x":"y"`)
runSubtest(futureMeta, "future")
}
// InternalTest dispatches all internal tests // InternalTest dispatches all internal tests
func (f *Fs) InternalTest(t *testing.T) { func (f *Fs) InternalTest(t *testing.T) {
t.Run("PutLarge", func(t *testing.T) { t.Run("PutLarge", func(t *testing.T) {
@ -251,6 +560,18 @@ func (f *Fs) InternalTest(t *testing.T) {
t.Run("ChunkNameFormat", func(t *testing.T) { t.Run("ChunkNameFormat", func(t *testing.T) {
testChunkNameFormat(t, f) testChunkNameFormat(t, f)
}) })
t.Run("SmallFileInternals", func(t *testing.T) {
testSmallFileInternals(t, f)
})
t.Run("PreventCorruption", func(t *testing.T) {
testPreventCorruption(t, f)
})
t.Run("ChunkNumberOverflow", func(t *testing.T) {
testChunkNumberOverflow(t, f)
})
t.Run("MetadataInput", func(t *testing.T) {
testMetadataInput(t, f)
})
} }
var _ fstests.InternalTester = (*Fs)(nil) var _ fstests.InternalTester = (*Fs)(nil)

View file

@ -115,11 +115,16 @@ original content.
When the `list` rclone command scans a directory on wrapped remote, When the `list` rclone command scans a directory on wrapped remote,
the potential chunk files are accounted for, grouped and assembled into the potential chunk files are accounted for, grouped and assembled into
composite directory entries. Any temporary chunks are hidden. composite directory entries. Any temporary chunks are hidden.
`list` can sometimes come across composite files with missing or invalid
chunks, eg if wrapped file system has been tampered with or damaged. List and other commands can sometimes come across composite files with
If chunker detects a missing chunk it will by default silently ignore missing or invalid chunks, eg. shadowed by like-named directory or
the whole group. You can use the `--chunker-fail-on-bad-chunks` another file. This usually means that wrapped file system has been directly
rclone option to make `list` fail with a loud error message. tampered with or damaged. If chunker detects a missing chunk it will
by default print warning, skip the whole incomplete group of chunks but
proceed with current command.
You can set the `--chunker-fail-hard` flag to have commands abort with
error message in such cases.
#### Chunk names #### Chunk names
@ -368,19 +373,18 @@ Metadata is a small JSON file named after the composite file.
- Simple JSON supports hash sums and chunk validation. - Simple JSON supports hash sums and chunk validation.
- It has the following fields: ver, size, nchunks, md5, sha1. - It has the following fields: ver, size, nchunks, md5, sha1.
#### --chunker-fail-on-bad-chunks #### --chunker-fail-hard
The list command might encounter files with missinng or invalid chunks. Choose how chunker should handle files with missing or invalid chunks.
This boolean flag tells what rclone should do in such cases.
- Config: fail_on_bad_chunks - Config: fail_hard
- Env Var: RCLONE_CHUNKER_FAIL_ON_BAD_CHUNKS - Env Var: RCLONE_CHUNKER_FAIL_HARD
- Type: bool - Type: bool
- Default: false - Default: false
- Examples: - Examples:
- "true" - "true"
- Fail with error. - Report errors and abort current command.
- "false" - "false"
- Silently ignore invalid object. - Warn user, skip incomplete file and proceed.
<!--- autogenerated options stop --> <!--- autogenerated options stop -->

View file

@ -151,16 +151,19 @@ func retry(t *testing.T, what string, f func() error) {
require.NoError(t, err, what) require.NoError(t, err, what)
} }
// testPut puts file to the remote // testPut puts file with random contents to the remote
func testPut(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item) (string, fs.Object) { func testPut(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item) (string, fs.Object) {
return PutTestContents(ctx, t, f, file, random.String(100), true)
}
// PutTestContents puts file with given contents to the remote and checks it but unlike TestPutLarge doesn't remove
func PutTestContents(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item, contents string, check bool) (string, fs.Object) {
var ( var (
err error err error
obj fs.Object obj fs.Object
uploadHash *hash.MultiHasher uploadHash *hash.MultiHasher
contents string
) )
retry(t, "Put", func() error { retry(t, "Put", func() error {
contents = random.String(100)
buf := bytes.NewBufferString(contents) buf := bytes.NewBufferString(contents)
uploadHash = hash.NewMultiHasher() uploadHash = hash.NewMultiHasher()
in := io.TeeReader(buf, uploadHash) in := io.TeeReader(buf, uploadHash)
@ -171,10 +174,12 @@ func testPut(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item) (str
return err return err
}) })
file.Hashes = uploadHash.Sums() file.Hashes = uploadHash.Sums()
file.Check(t, obj, f.Precision()) if check {
// Re-read the object and check again file.Check(t, obj, f.Precision())
obj = findObject(ctx, t, f, file.Path) // Re-read the object and check again
file.Check(t, obj, f.Precision()) obj = findObject(ctx, t, f, file.Path)
file.Check(t, obj, f.Precision())
}
return contents, obj return contents, obj
} }