chunker: finish meta-format before release
changes: - chunker: remove GetTier and SetTier - remove wdmrcompat metaformat - remove fastopen strategy - make hash_type option non-advanced - adverise hash support when possible - add metadata field "ver", run strict checks - describe internal behavior in comments - improve documentation note: wdmrcompat used to write file name in the metadata, so maximum metadata size was 1K; removing it allows to cap size by 200 bytes now.
This commit is contained in:
parent
c41812fc88
commit
ccecfa9cb1
5 changed files with 303 additions and 312 deletions
|
@ -36,13 +36,11 @@ const (
|
||||||
// WARNING: this optimization is not transaction safe!
|
// WARNING: this optimization is not transaction safe!
|
||||||
optimizeFirstChunk = false
|
optimizeFirstChunk = false
|
||||||
|
|
||||||
// Normally metadata is a small (less than 1KB) piece of JSON.
|
// Normally metadata is a small (100-200 bytes) piece of JSON.
|
||||||
// Valid metadata size should not exceed this limit.
|
// Valid metadata size should not exceed this limit.
|
||||||
maxMetaDataSize = 1023
|
maxMetaDataSize = 199
|
||||||
|
|
||||||
// fastopen strategy opens all chunks immediately, but reads sequentially.
|
metaDataVersion = 1
|
||||||
// linear strategy opens and reads chunks sequentially, without read-ahead.
|
|
||||||
downloadStrategy = "linear"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Formatting of temporary chunk names. Temporary suffix *follows* chunk
|
// Formatting of temporary chunk names. Temporary suffix *follows* chunk
|
||||||
|
@ -52,6 +50,13 @@ var (
|
||||||
tempChunkRegexp = regexp.MustCompile(`^(.+)\.\.tmp_([0-9]{10,19})$`)
|
tempChunkRegexp = regexp.MustCompile(`^(.+)\.\.tmp_([0-9]{10,19})$`)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Note: metadata logic is tightly coupled with chunker code in many
|
||||||
|
// places of the code, eg. in checks whether a file can have meta object
|
||||||
|
// or is eligible for chunking.
|
||||||
|
// If more metadata formats (or versions of a format) are added in future,
|
||||||
|
// it may be advisable to factor it into a "metadata strategy" interface
|
||||||
|
// similar to chunkingReader or linearReader below.
|
||||||
|
|
||||||
// Register with Fs
|
// Register with Fs
|
||||||
func init() {
|
func init() {
|
||||||
fs.Register(&fs.RegInfo{
|
fs.Register(&fs.RegInfo{
|
||||||
|
@ -98,16 +103,10 @@ Metadata is a small JSON file named after the composite file.`,
|
||||||
Value: "simplejson",
|
Value: "simplejson",
|
||||||
Help: `Simple JSON supports hash sums and chunk validation.
|
Help: `Simple JSON supports hash sums and chunk validation.
|
||||||
It has the following fields: size, nchunks, md5, sha1.`,
|
It has the following fields: size, nchunks, md5, sha1.`,
|
||||||
}, {
|
|
||||||
Value: "wdmrcompat",
|
|
||||||
Help: `This format brings compatibility with WebDavMailRuCloud.
|
|
||||||
It does not support hash sums or validation, most fields are ignored.
|
|
||||||
It has the following fields: Name, Size, PublicKey, CreationDate.
|
|
||||||
Requires hash type "none".`,
|
|
||||||
}},
|
}},
|
||||||
}, {
|
}, {
|
||||||
Name: "hash_type",
|
Name: "hash_type",
|
||||||
Advanced: true,
|
Advanced: false,
|
||||||
Default: "md5",
|
Default: "md5",
|
||||||
Help: `Choose how chunker handles hash sums.`,
|
Help: `Choose how chunker handles hash sums.`,
|
||||||
Examples: []fs.OptionExample{{
|
Examples: []fs.OptionExample{{
|
||||||
|
@ -122,8 +121,8 @@ for a single-chunk file but returns nothing otherwise.`,
|
||||||
Help: `SHA1 for multi-chunk files. Requires "simplejson".`,
|
Help: `SHA1 for multi-chunk files. Requires "simplejson".`,
|
||||||
}, {
|
}, {
|
||||||
Value: "md5quick",
|
Value: "md5quick",
|
||||||
Help: `When a file is copied on to chunker, MD5 is taken from its source
|
Help: `Copying a file to chunker will request MD5 from the source
|
||||||
falling back to SHA1 if the source doesn't support it. Requires "simplejson".`,
|
falling back to SHA1 if unsupported. Requires "simplejson".`,
|
||||||
}, {
|
}, {
|
||||||
Value: "sha1quick",
|
Value: "sha1quick",
|
||||||
Help: `Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".`,
|
Help: `Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".`,
|
||||||
|
@ -188,7 +187,7 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
|
||||||
switch opt.MetaFormat {
|
switch opt.MetaFormat {
|
||||||
case "none":
|
case "none":
|
||||||
f.useMeta = false
|
f.useMeta = false
|
||||||
case "simplejson", "wdmrcompat":
|
case "simplejson":
|
||||||
f.useMeta = true
|
f.useMeta = true
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unsupported meta format '%s'", opt.MetaFormat)
|
return nil, fmt.Errorf("unsupported meta format '%s'", opt.MetaFormat)
|
||||||
|
@ -243,8 +242,6 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
|
||||||
WriteMimeType: true,
|
WriteMimeType: true,
|
||||||
BucketBased: true,
|
BucketBased: true,
|
||||||
CanHaveEmptyDirectories: true,
|
CanHaveEmptyDirectories: true,
|
||||||
SetTier: true,
|
|
||||||
GetTier: true,
|
|
||||||
ServerSideAcrossConfigs: true,
|
ServerSideAcrossConfigs: true,
|
||||||
}).Fill(f).Mask(baseFs).WrapsFs(f, baseFs)
|
}).Fill(f).Mask(baseFs).WrapsFs(f, baseFs)
|
||||||
|
|
||||||
|
@ -393,6 +390,19 @@ func (f *Fs) parseChunkName(name string) (mainName string, chunkNo int, tempNo i
|
||||||
//
|
//
|
||||||
// This should return ErrDirNotFound if the directory isn't
|
// This should return ErrDirNotFound if the directory isn't
|
||||||
// found.
|
// found.
|
||||||
|
//
|
||||||
|
// Commands normally cleanup all temporary chunks in case of a failure.
|
||||||
|
// However, if rclone dies unexpectedly, it can leave behind a bunch of
|
||||||
|
// hidden temporary chunks. List and its underlying chunkEntries()
|
||||||
|
// silently skip all temporary chunks in the directory. It's okay if
|
||||||
|
// they belong to an unfinished command running in parallel.
|
||||||
|
//
|
||||||
|
// However, there is no way to discover dead temporary chunks a.t.m.
|
||||||
|
// As a workaround users can use `purge` to forcibly remove the whole
|
||||||
|
// directory together with dead chunks.
|
||||||
|
// In future a flag named like `--chunker-list-hidden` may be added to
|
||||||
|
// rclone that will tell List to reveal hidden chunks.
|
||||||
|
//
|
||||||
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
||||||
entries, err = f.base.List(ctx, dir)
|
entries, err = f.base.List(ctx, dir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -428,7 +438,8 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add some directory entries. This alters entries returning it as newEntries.
|
// chunkEntries is called by List(R). It merges chunk entries from
|
||||||
|
// wrapped remote into composite directory entries.
|
||||||
func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardErrors bool) (chunkedEntries fs.DirEntries, err error) {
|
func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardErrors bool) (chunkedEntries fs.DirEntries, err error) {
|
||||||
// sort entries, so that meta objects (if any) appear before their chunks
|
// sort entries, so that meta objects (if any) appear before their chunks
|
||||||
sortedEntries := make(fs.DirEntries, len(origEntries))
|
sortedEntries := make(fs.DirEntries, len(origEntries))
|
||||||
|
@ -514,6 +525,11 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewObject finds the Object at remote.
|
// NewObject finds the Object at remote.
|
||||||
|
//
|
||||||
|
// Please note that every NewObject invocation will scan the whole directory.
|
||||||
|
// Using here something like fs.DirCache might improve performance (and make
|
||||||
|
// logic more complex though).
|
||||||
|
//
|
||||||
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
||||||
if mainRemote, _, _ := f.parseChunkName(remote); mainRemote != "" {
|
if mainRemote, _, _ := f.parseChunkName(remote); mainRemote != "" {
|
||||||
return nil, fmt.Errorf("%q should be meta object, not a chunk", remote)
|
return nil, fmt.Errorf("%q should be meta object, not a chunk", remote)
|
||||||
|
@ -622,23 +638,14 @@ func (o *Object) readMetaData(ctx context.Context) error {
|
||||||
case "simplejson":
|
case "simplejson":
|
||||||
metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metaData)
|
metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metaData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// TODO: maybe it's a small single chunk?
|
// TODO: in a rare case we might mistake a small file for metadata
|
||||||
return err
|
return errors.Wrap(err, "invalid metadata")
|
||||||
}
|
}
|
||||||
if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks {
|
if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks {
|
||||||
return errors.New("invalid simplejson metadata")
|
return errors.New("metadata doesn't match file size")
|
||||||
}
|
}
|
||||||
o.md5 = metaInfo.md5
|
o.md5 = metaInfo.md5
|
||||||
o.sha1 = metaInfo.sha1
|
o.sha1 = metaInfo.sha1
|
||||||
case "wdmrcompat":
|
|
||||||
metaInfo, err := unmarshalWDMRCompat(ctx, metaObject, metaData)
|
|
||||||
if err != nil {
|
|
||||||
// TODO: maybe it's a small single chunk?
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if o.size != metaInfo.Size() {
|
|
||||||
return errors.New("invalid wdmrcompat metadata")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
o.isFull = true
|
o.isFull = true
|
||||||
|
@ -784,9 +791,6 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
|
||||||
case "simplejson":
|
case "simplejson":
|
||||||
c.updateHashes()
|
c.updateHashes()
|
||||||
metaData, err = marshalSimpleJSON(ctx, sizeTotal, len(c.chunks), c.md5, c.sha1)
|
metaData, err = marshalSimpleJSON(ctx, sizeTotal, len(c.chunks), c.md5, c.sha1)
|
||||||
case "wdmrcompat":
|
|
||||||
fileInfo := f.wrapInfo(src, baseRemote, sizeTotal)
|
|
||||||
metaData, err = marshalWDMRCompat(ctx, fileInfo)
|
|
||||||
}
|
}
|
||||||
if err == nil {
|
if err == nil {
|
||||||
metaInfo := f.wrapInfo(src, baseRemote, int64(len(metaData)))
|
metaInfo := f.wrapInfo(src, baseRemote, int64(len(metaData)))
|
||||||
|
@ -951,6 +955,9 @@ func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, opt
|
||||||
|
|
||||||
// Update in to the object with the modTime given of the given size
|
// Update in to the object with the modTime given of the given size
|
||||||
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
|
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
|
||||||
|
if err := o.readMetaData(ctx); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
basePut := o.f.base.Put
|
basePut := o.f.base.Put
|
||||||
if src.Size() < 0 {
|
if src.Size() < 0 {
|
||||||
basePut = o.f.base.Features().PutStream
|
basePut = o.f.base.Features().PutStream
|
||||||
|
@ -989,8 +996,17 @@ func (f *Fs) Precision() time.Duration {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hashes returns the supported hash sets.
|
// Hashes returns the supported hash sets.
|
||||||
|
// Chunker advertises a hash type if and only if it can be calculated
|
||||||
|
// for files of any size, multi-chunked or small.
|
||||||
func (f *Fs) Hashes() hash.Set {
|
func (f *Fs) Hashes() hash.Set {
|
||||||
return hash.Set(hash.None)
|
// composites && all of them && small files supported by wrapped remote
|
||||||
|
if f.useMD5 && !f.quickHash && f.base.Hashes().Contains(hash.MD5) {
|
||||||
|
return hash.NewHashSet(hash.MD5)
|
||||||
|
}
|
||||||
|
if f.useSHA1 && !f.quickHash && f.base.Hashes().Contains(hash.SHA1) {
|
||||||
|
return hash.NewHashSet(hash.SHA1)
|
||||||
|
}
|
||||||
|
return hash.NewHashSet() // can't provide strong guarantees
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mkdir makes the directory (container, bucket)
|
// Mkdir makes the directory (container, bucket)
|
||||||
|
@ -1012,7 +1028,12 @@ func (f *Fs) Rmdir(ctx context.Context, dir string) error {
|
||||||
// Implement this if you have a way of deleting all the files
|
// Implement this if you have a way of deleting all the files
|
||||||
// quicker than just running Remove() on the result of List()
|
// quicker than just running Remove() on the result of List()
|
||||||
//
|
//
|
||||||
// Return an error if it doesn't exist
|
// Return an error if it doesn't exist.
|
||||||
|
//
|
||||||
|
// This command will chain to `purge` from wrapped remote.
|
||||||
|
// As a result it removes not only chunker files with their
|
||||||
|
// active chunks but also all hidden chunks in the directory.
|
||||||
|
//
|
||||||
func (f *Fs) Purge(ctx context.Context) error {
|
func (f *Fs) Purge(ctx context.Context) error {
|
||||||
do := f.base.Features().Purge
|
do := f.base.Features().Purge
|
||||||
if do == nil {
|
if do == nil {
|
||||||
|
@ -1021,7 +1042,25 @@ func (f *Fs) Purge(ctx context.Context) error {
|
||||||
return do(ctx)
|
return do(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove an object
|
// Remove an object (chunks and metadata, if any)
|
||||||
|
//
|
||||||
|
// Remove deletes only active chunks of the object.
|
||||||
|
// It does not try to look for temporary chunks because they could belong
|
||||||
|
// to another command modifying this composite file in parallel.
|
||||||
|
//
|
||||||
|
// Commands normally cleanup all temporary chunks in case of a failure.
|
||||||
|
// However, if rclone dies unexpectedly, it can leave hidden temporary
|
||||||
|
// chunks, which cannot be discovered using the `list` command.
|
||||||
|
// Remove does not try to search for such chunks or delete them.
|
||||||
|
// Sometimes this can lead to strange results eg. when `list` shows that
|
||||||
|
// directory is empty but `rmdir` refuses to remove it because on the
|
||||||
|
// level of wrapped remote it's actually *not* empty.
|
||||||
|
// As a workaround users can use `purge` to forcibly remove it.
|
||||||
|
//
|
||||||
|
// In future, a flag `--chunker-delete-hidden` may be added which tells
|
||||||
|
// Remove to search directory for hidden chunks and remove them too
|
||||||
|
// (at the risk of breaking parallel commands).
|
||||||
|
//
|
||||||
func (o *Object) Remove(ctx context.Context) (err error) {
|
func (o *Object) Remove(ctx context.Context) (err error) {
|
||||||
if o.main != nil {
|
if o.main != nil {
|
||||||
err = o.main.Remove(ctx)
|
err = o.main.Remove(ctx)
|
||||||
|
@ -1095,13 +1134,6 @@ func (f *Fs) copyOrMove(ctx context.Context, o *Object, remote string, do copyMo
|
||||||
metaInfo := f.wrapInfo(metaObject, "", int64(len(metaData)))
|
metaInfo := f.wrapInfo(metaObject, "", int64(len(metaData)))
|
||||||
err = newObj.main.Update(ctx, bytes.NewReader(metaData), metaInfo)
|
err = newObj.main.Update(ctx, bytes.NewReader(metaData), metaInfo)
|
||||||
}
|
}
|
||||||
case "wdmrcompat":
|
|
||||||
newInfo := f.wrapInfo(metaObject, "", newObj.size)
|
|
||||||
metaData, err = marshalWDMRCompat(ctx, newInfo)
|
|
||||||
if err == nil {
|
|
||||||
metaInfo := f.wrapInfo(metaObject, "", int64(len(metaData)))
|
|
||||||
err = newObj.main.Update(ctx, bytes.NewReader(metaData), metaInfo)
|
|
||||||
}
|
|
||||||
case "none":
|
case "none":
|
||||||
if newObj.main != nil {
|
if newObj.main != nil {
|
||||||
err = newObj.main.Remove(ctx)
|
err = newObj.main.Remove(ctx)
|
||||||
|
@ -1436,7 +1468,22 @@ func (o *Object) SetModTime(ctx context.Context, mtime time.Time) error {
|
||||||
|
|
||||||
// Hash returns the selected checksum of the file.
|
// Hash returns the selected checksum of the file.
|
||||||
// If no checksum is available it returns "".
|
// If no checksum is available it returns "".
|
||||||
// It prefers the wrapped hashsum for a non-chunked file, then tries saved one.
|
//
|
||||||
|
// Hash prefers wrapped hashsum for a non-chunked file, then tries to
|
||||||
|
// read it from metadata. This in theory handles an unusual case when
|
||||||
|
// a small file is modified on the lower level by wrapped remote
|
||||||
|
// but chunker is not yet aware of changes.
|
||||||
|
//
|
||||||
|
// Currently metadata (if not configured as 'none') is kept only for
|
||||||
|
// multi-chunk files, but for small files chunker obtains hashsums from
|
||||||
|
// wrapped remote. If a particular hashsum type is not supported,
|
||||||
|
// chunker won't fail with `unsupported` error but return empty hash.
|
||||||
|
//
|
||||||
|
// In future metadata logic can be extended: if a normal (non-quick)
|
||||||
|
// hash type is configured, chunker will check whether wrapped remote
|
||||||
|
// supports it (see Fs.Hashes as an example). If not, it will add metadata
|
||||||
|
// to small files as well, thus providing hashsums for all files.
|
||||||
|
//
|
||||||
func (o *Object) Hash(ctx context.Context, hashType hash.Type) (string, error) {
|
func (o *Object) Hash(ctx context.Context, hashType hash.Type) (string, error) {
|
||||||
if !o.isChunked() {
|
if !o.isChunked() {
|
||||||
// First, chain to the single wrapped chunk, if possible.
|
// First, chain to the single wrapped chunk, if possible.
|
||||||
|
@ -1500,78 +1547,10 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read
|
||||||
limit = o.size - offset
|
limit = o.size - offset
|
||||||
}
|
}
|
||||||
|
|
||||||
switch downloadStrategy {
|
|
||||||
case "linear":
|
|
||||||
return o.newLinearReader(ctx, offset, limit, openOptions)
|
return o.newLinearReader(ctx, offset, limit, openOptions)
|
||||||
case "fastopen":
|
|
||||||
return o.newFastopenReader(ctx, offset, limit, openOptions)
|
|
||||||
default:
|
|
||||||
return nil, errors.New("invalid download strategy")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// fastopenReader opens all chunks immediately, but reads sequentlially
|
// linearReader opens and reads file chunks sequentially, without read-ahead
|
||||||
type fastopenReader struct {
|
|
||||||
readClosers []io.ReadCloser
|
|
||||||
multiReader io.Reader
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o *Object) newFastopenReader(ctx context.Context, offset, limit int64, options []fs.OpenOption) (io.ReadCloser, error) {
|
|
||||||
var (
|
|
||||||
readers []io.Reader
|
|
||||||
readClosers []io.ReadCloser
|
|
||||||
)
|
|
||||||
for _, chunk := range o.chunks {
|
|
||||||
if limit <= 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
count := chunk.Size()
|
|
||||||
if offset >= count {
|
|
||||||
offset -= count
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
count -= offset
|
|
||||||
if limit < count {
|
|
||||||
count = limit
|
|
||||||
}
|
|
||||||
|
|
||||||
end := offset + count - 1
|
|
||||||
chunkOptions := append(options, &fs.RangeOption{Start: offset, End: end})
|
|
||||||
rc, err := chunk.Open(ctx, chunkOptions...)
|
|
||||||
if err != nil {
|
|
||||||
r := fastopenReader{readClosers: readClosers}
|
|
||||||
_ = r.Close() // ignore error
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
readClosers = append(readClosers, rc)
|
|
||||||
readers = append(readers, rc)
|
|
||||||
|
|
||||||
offset = 0
|
|
||||||
limit -= count
|
|
||||||
}
|
|
||||||
|
|
||||||
r := &fastopenReader{
|
|
||||||
readClosers: readClosers,
|
|
||||||
multiReader: io.MultiReader(readers...),
|
|
||||||
}
|
|
||||||
return r, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *fastopenReader) Read(p []byte) (n int, err error) {
|
|
||||||
return r.multiReader.Read(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *fastopenReader) Close() (err error) {
|
|
||||||
for _, rc := range r.readClosers {
|
|
||||||
chunkErr := rc.Close()
|
|
||||||
if err == nil {
|
|
||||||
err = chunkErr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// linearReader opens and reads chunks sequentially, without read-ahead
|
|
||||||
type linearReader struct {
|
type linearReader struct {
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
chunks []fs.Object
|
chunks []fs.Object
|
||||||
|
@ -1771,25 +1750,9 @@ func (o *Object) ID() string {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetTier performs changing storage tier of the Object if
|
|
||||||
// multiple storage classes supported
|
|
||||||
func (o *Object) SetTier(tier string) error {
|
|
||||||
if doer, ok := o.mainChunk().(fs.SetTierer); ok {
|
|
||||||
return doer.SetTier(tier)
|
|
||||||
}
|
|
||||||
return errors.New("chunker: wrapped remote does not support SetTier")
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetTier returns storage tier or class of the Object
|
|
||||||
func (o *Object) GetTier() string {
|
|
||||||
if doer, ok := o.mainChunk().(fs.GetTierer); ok {
|
|
||||||
return doer.GetTier()
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// Meta format `simplejson`
|
// Meta format `simplejson`
|
||||||
type metaSimpleJSON struct {
|
type metaSimpleJSON struct {
|
||||||
|
Version int `json:"ver"`
|
||||||
Size int64 `json:"size"`
|
Size int64 `json:"size"`
|
||||||
NChunks int `json:"nchunks"`
|
NChunks int `json:"nchunks"`
|
||||||
MD5 string `json:"md5"`
|
MD5 string `json:"md5"`
|
||||||
|
@ -1798,6 +1761,7 @@ type metaSimpleJSON struct {
|
||||||
|
|
||||||
func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) (data []byte, err error) {
|
func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) (data []byte, err error) {
|
||||||
metaData := &metaSimpleJSON{
|
metaData := &metaSimpleJSON{
|
||||||
|
Version: metaDataVersion,
|
||||||
Size: size,
|
Size: size,
|
||||||
NChunks: nChunks,
|
NChunks: nChunks,
|
||||||
MD5: md5,
|
MD5: md5,
|
||||||
|
@ -1806,47 +1770,56 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s
|
||||||
return json.Marshal(&metaData)
|
return json.Marshal(&metaData)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Note: only metadata format version 1 is supported a.t.m.
|
||||||
|
//
|
||||||
|
// Current implementation creates metadata only for files larger than
|
||||||
|
// configured chunk size. This approach has drawback: availability of
|
||||||
|
// configured hashsum type for small files depends on the wrapped remote.
|
||||||
|
// Future versions of chunker may change approach as described in comment
|
||||||
|
// to the Hash method. They can transparently migrate older metadata.
|
||||||
|
// New format will have a higher version number and cannot be correctly
|
||||||
|
// hanled by current implementation.
|
||||||
|
// The version check below will then explicitly ask user to upgrade rclone.
|
||||||
|
//
|
||||||
func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, err error) {
|
func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, err error) {
|
||||||
var metaData *metaSimpleJSON
|
var metaData *metaSimpleJSON
|
||||||
err = json.Unmarshal(data, &metaData)
|
err = json.Unmarshal(data, &metaData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Perform strict checks, avoid corruption of future metadata formats.
|
||||||
|
if metaData.Size < 0 {
|
||||||
|
return nil, errors.New("negative file size")
|
||||||
|
}
|
||||||
|
if metaData.NChunks <= 0 {
|
||||||
|
return nil, errors.New("wrong number of chunks")
|
||||||
|
}
|
||||||
|
if metaData.MD5 != "" {
|
||||||
|
_, err = hex.DecodeString(metaData.MD5)
|
||||||
|
if len(metaData.MD5) != 32 || err != nil {
|
||||||
|
return nil, errors.New("wrong md5 hash")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if metaData.SHA1 != "" {
|
||||||
|
_, err = hex.DecodeString(metaData.SHA1)
|
||||||
|
if len(metaData.SHA1) != 40 || err != nil {
|
||||||
|
return nil, errors.New("wrong sha1 hash")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if metaData.Version <= 0 {
|
||||||
|
return nil, errors.New("wrong version number")
|
||||||
|
}
|
||||||
|
if metaData.Version != metaDataVersion {
|
||||||
|
return nil, errors.Errorf("version %d is not supported, please upgrade rclone", metaData.Version)
|
||||||
|
}
|
||||||
|
|
||||||
var nilFs *Fs // nil object triggers appropriate type method
|
var nilFs *Fs // nil object triggers appropriate type method
|
||||||
info = nilFs.wrapInfo(metaObject, "", metaData.Size)
|
info = nilFs.wrapInfo(metaObject, "", metaData.Size)
|
||||||
info.md5 = metaData.MD5
|
info.md5 = metaData.MD5
|
||||||
info.sha1 = metaData.SHA1
|
info.sha1 = metaData.SHA1
|
||||||
info.nChunks = metaData.NChunks
|
info.nChunks = metaData.NChunks
|
||||||
return
|
return info, nil
|
||||||
}
|
|
||||||
|
|
||||||
// Meta format `wdmrcompat`
|
|
||||||
type metaWDMRCompat struct {
|
|
||||||
Name string `json:"Name"`
|
|
||||||
Size int64 `json:"Size"`
|
|
||||||
PublicKey interface{} `json:"PublicKey"` // ignored, can be nil
|
|
||||||
CreationDate time.Time `json:"CreationDate"` // modification time, ignored
|
|
||||||
}
|
|
||||||
|
|
||||||
func marshalWDMRCompat(ctx context.Context, srcInfo fs.ObjectInfo) (data []byte, err error) {
|
|
||||||
metaData := &metaWDMRCompat{
|
|
||||||
Name: path.Base(srcInfo.Remote()),
|
|
||||||
Size: srcInfo.Size(),
|
|
||||||
PublicKey: nil,
|
|
||||||
CreationDate: srcInfo.ModTime(ctx).UTC(),
|
|
||||||
}
|
|
||||||
return json.Marshal(&metaData)
|
|
||||||
}
|
|
||||||
|
|
||||||
func unmarshalWDMRCompat(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, err error) {
|
|
||||||
var metaData *metaWDMRCompat
|
|
||||||
err = json.Unmarshal(data, &metaData)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
var nilFs *Fs // nil object triggers appropriate type method
|
|
||||||
info = nilFs.wrapInfo(metaObject, "", metaData.Size)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the interfaces are satisfied
|
// Check the interfaces are satisfied
|
||||||
|
@ -1868,6 +1841,4 @@ var (
|
||||||
_ fs.Object = (*Object)(nil)
|
_ fs.Object = (*Object)(nil)
|
||||||
_ fs.ObjectUnWrapper = (*Object)(nil)
|
_ fs.ObjectUnWrapper = (*Object)(nil)
|
||||||
_ fs.IDer = (*Object)(nil)
|
_ fs.IDer = (*Object)(nil)
|
||||||
_ fs.SetTierer = (*Object)(nil)
|
|
||||||
_ fs.GetTierer = (*Object)(nil)
|
|
||||||
)
|
)
|
||||||
|
|
|
@ -31,7 +31,11 @@ func TestIntegration(t *testing.T) {
|
||||||
RemoteName: *fstest.RemoteName,
|
RemoteName: *fstest.RemoteName,
|
||||||
NilObject: (*chunker.Object)(nil),
|
NilObject: (*chunker.Object)(nil),
|
||||||
SkipBadWindowsCharacters: !*UseBadChars,
|
SkipBadWindowsCharacters: !*UseBadChars,
|
||||||
UnimplementableObjectMethods: []string{"MimeType"},
|
UnimplementableObjectMethods: []string{
|
||||||
|
"MimeType",
|
||||||
|
"GetTier",
|
||||||
|
"SetTier",
|
||||||
|
},
|
||||||
UnimplementableFsMethods: []string{
|
UnimplementableFsMethods: []string{
|
||||||
"PublicLink",
|
"PublicLink",
|
||||||
"OpenWriterAt",
|
"OpenWriterAt",
|
||||||
|
|
|
@ -4,11 +4,11 @@ description: "Split-chunking overlay remote"
|
||||||
date: "2019-08-30"
|
date: "2019-08-30"
|
||||||
---
|
---
|
||||||
|
|
||||||
<i class="fa fa-cut"></i>Chunker
|
<i class="fa fa-cut"></i>Chunker (BETA)
|
||||||
----------------------------------------
|
----------------------------------------
|
||||||
|
|
||||||
The `chunker` overlay transparently splits large files into smaller chunks
|
The `chunker` overlay transparently splits large files into smaller chunks
|
||||||
during the upload to wrapped remote and transparently assembles them back
|
during upload to wrapped remote and transparently assembles them back
|
||||||
when the file is downloaded. This allows to effectively overcome size limits
|
when the file is downloaded. This allows to effectively overcome size limits
|
||||||
imposed by storage providers.
|
imposed by storage providers.
|
||||||
|
|
||||||
|
@ -41,10 +41,27 @@ Storage> chunker
|
||||||
Remote to chunk/unchunk.
|
Remote to chunk/unchunk.
|
||||||
Normally should contain a ':' and a path, eg "myremote:path/to/dir",
|
Normally should contain a ':' and a path, eg "myremote:path/to/dir",
|
||||||
"myremote:bucket" or maybe "myremote:" (not recommended).
|
"myremote:bucket" or maybe "myremote:" (not recommended).
|
||||||
|
Enter a string value. Press Enter for the default ("").
|
||||||
remote> remote:path
|
remote> remote:path
|
||||||
Files larger than chunk_size will be split in chunks. By default 2 Gb.
|
Files larger than chunk size will be split in chunks.
|
||||||
Enter a size with suffix k,M,G,T. Press Enter for the default ("2G").
|
Enter a size with suffix k,M,G,T. Press Enter for the default ("2G").
|
||||||
chunk_size> 1G
|
chunk_size> 100M
|
||||||
|
Choose how chunker handles hash sums.
|
||||||
|
Enter a string value. Press Enter for the default ("md5").
|
||||||
|
Choose a number from below, or type in your own value
|
||||||
|
/ Chunker can pass any hash supported by wrapped remote
|
||||||
|
1 | for a single-chunk file but returns nothing otherwise.
|
||||||
|
\ "none"
|
||||||
|
2 / MD5 for multi-chunk files. Requires "simplejson".
|
||||||
|
\ "md5"
|
||||||
|
3 / SHA1 for multi-chunk files. Requires "simplejson".
|
||||||
|
\ "sha1"
|
||||||
|
/ Copying a file to chunker will request MD5 from the source
|
||||||
|
4 | falling back to SHA1 if unsupported. Requires "simplejson".
|
||||||
|
\ "md5quick"
|
||||||
|
5 / Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".
|
||||||
|
\ "sha1quick"
|
||||||
|
hash_type> md5
|
||||||
Edit advanced config? (y/n)
|
Edit advanced config? (y/n)
|
||||||
y) Yes
|
y) Yes
|
||||||
n) No
|
n) No
|
||||||
|
@ -53,8 +70,9 @@ Remote config
|
||||||
--------------------
|
--------------------
|
||||||
[overlay]
|
[overlay]
|
||||||
type = chunker
|
type = chunker
|
||||||
remote = TestLocal:
|
remote = remote:bucket
|
||||||
chunk_size = 2G
|
chunk_size = 100M
|
||||||
|
hash_type = md5
|
||||||
--------------------
|
--------------------
|
||||||
y) Yes this is OK
|
y) Yes this is OK
|
||||||
e) Edit this remote
|
e) Edit this remote
|
||||||
|
@ -73,8 +91,8 @@ will put files in a directory called `name` in the current directory.
|
||||||
|
|
||||||
### Chunking
|
### Chunking
|
||||||
|
|
||||||
When rclone starts a file upload, chunker checks the file size.
|
When rclone starts a file upload, chunker checks the file size. If it
|
||||||
If it doesn't exceed the configured chunk size, chunker will just pass it
|
doesn't exceed the configured chunk size, chunker will just pass the file
|
||||||
to the wrapped remote. If a file is large, chunker will transparently cut
|
to the wrapped remote. If a file is large, chunker will transparently cut
|
||||||
data in pieces with temporary names and stream them one by one, on the fly.
|
data in pieces with temporary names and stream them one by one, on the fly.
|
||||||
Each chunk will contain the specified number of data byts, except for the
|
Each chunk will contain the specified number of data byts, except for the
|
||||||
|
@ -84,7 +102,7 @@ a temporary copy, record its size and repeat the above process.
|
||||||
When upload completes, temporary chunk files are finally renamed.
|
When upload completes, temporary chunk files are finally renamed.
|
||||||
This scheme guarantees that operations look from outside as atomic.
|
This scheme guarantees that operations look from outside as atomic.
|
||||||
A similar method with hidden temporary chunks is used for other operations
|
A similar method with hidden temporary chunks is used for other operations
|
||||||
(copy/move/rename etc). If operation fails, hidden chunks are normally
|
(copy/move/rename etc). If an operation fails, hidden chunks are normally
|
||||||
destroyed, and the destination composite file stays intact.
|
destroyed, and the destination composite file stays intact.
|
||||||
|
|
||||||
#### Chunk names
|
#### Chunk names
|
||||||
|
@ -94,58 +112,52 @@ By default chunk names are `BIG_FILE_NAME.rclone-chunk.001`,
|
||||||
format is `*.rclone-chunk.###`. You can configure another name format
|
format is `*.rclone-chunk.###`. You can configure another name format
|
||||||
using the `--chunker-name-format` option. The format uses asterisk
|
using the `--chunker-name-format` option. The format uses asterisk
|
||||||
`*` as a placeholder for the base file name and one or more consecutive
|
`*` as a placeholder for the base file name and one or more consecutive
|
||||||
hash characters `#` as a placeholder for the chunk number. There must be
|
hash characters `#` as a placeholder for sequential chunk number.
|
||||||
one and only one asterisk. The number of consecutive hashes defines the
|
There must be one and only one asterisk. The number of consecutive hash
|
||||||
minimum length of a string representing a chunk number. If a chunk number
|
characters defines the minimum length of a string representing a chunk number.
|
||||||
has less digits than the number of hashes, it is left-padded by zeros.
|
If decimal chunk number has less digits than the number of hashes, it is
|
||||||
If there are more digits in the number, they are left as is.
|
left-padded by zeros. If the number stringis longer, it is left intact.
|
||||||
By default numbering starts from 1 but there is another option that allows
|
By default numbering starts from 1 but there is another option that allows
|
||||||
user to start from 0, eg. for compatibility with legacy software.
|
user to start from 0, eg. for compatibility with legacy software.
|
||||||
|
|
||||||
For example, if name format is `big_*-##.part`, and original file was
|
For example, if name format is `big_*-##.part` and original file name is
|
||||||
named `data.txt` and numbering starts from 0, then the first chunk will be
|
`data.txt` and numbering starts from 0, then the first chunk will be named
|
||||||
named `big_data.txt-00.part`, the 99th chunk will be `big_data.txt-98.part`
|
`big_data.txt-00.part`, the 99th chunk will be `big_data.txt-98.part`
|
||||||
and the 302nd chunk will be `big_data.txt-301.part`.
|
and the 302nd chunk will become `big_data.txt-301.part`.
|
||||||
|
|
||||||
Would-be chunk files are ignored if their name does not match given format.
|
When the `list` rclone command scans a directory on wrapped remote, the
|
||||||
The list command might encounter composite files with missinng or invalid
|
potential chunk files are accounted for and merged into composite directory
|
||||||
chunks. By default, if chunker detects a missing chunk it will silently
|
entries only if their names match the configured format. All other files
|
||||||
ignore the whole group. Use the `--chunker-fail-on-bad-chunks` flag
|
are ignored, including temporary chunks.
|
||||||
to make it fail with an error message.
|
The list command might encounter composite files with missing or invalid
|
||||||
|
chunks. If chunker detects a missing chunk it will by default silently
|
||||||
|
ignore the whole group. You can use the `--chunker-fail-on-bad-chunks`
|
||||||
|
command line flag to make `list` fail with an error message.
|
||||||
|
|
||||||
|
|
||||||
### Metadata
|
### Metadata
|
||||||
|
|
||||||
By default when a file is large enough, chunker will create a metadata
|
By default when a file is large enough, chunker will create a metadata
|
||||||
object besides data chunks. The object is named after the original file.
|
object besides data chunks. The object is named after the original file.
|
||||||
Chunker allows to choose between few metadata formats. Please note that
|
Chunker allows user to disable metadata completely (the `none` format).
|
||||||
currently metadata is not created for files smaller than configured
|
Please note that currently metadata is not created for files smaller
|
||||||
chunk size. This may change in future as new formats are developed.
|
than configured chunk size. This may change in future as new formats
|
||||||
|
are developed.
|
||||||
|
|
||||||
#### Simple JSON metadata format
|
#### Simple JSON metadata format
|
||||||
|
|
||||||
This is the default format. It supports hash sums and chunk validation
|
This is the default format. It supports hash sums and chunk validation
|
||||||
for composite files. Meta objects carry the following fields:
|
for composite files. Meta objects carry the following fields:
|
||||||
|
|
||||||
- `size` - total size of chunks
|
- `ver` - version of format, currently `1`
|
||||||
- `nchunks` - number of chunks
|
- `size` - total size of composite file
|
||||||
- `md5` - MD5 hashsum (if present)
|
- `nchunks` - number of chunks in the file
|
||||||
|
- `md5` - MD5 hashsum of composite file (if present)
|
||||||
- `sha1` - SHA1 hashsum (if present)
|
- `sha1` - SHA1 hashsum (if present)
|
||||||
|
|
||||||
There is no field for composite file name as it's simply equal to the name
|
There is no field for composite file name as it's simply equal to the name
|
||||||
of meta object on the wrapped remote. Please refer to respective sections
|
of meta object on the wrapped remote. Please refer to respective sections
|
||||||
for detils on hashsums and modified time handling.
|
for detils on hashsums and handling of modified time.
|
||||||
|
|
||||||
#### WedDavMailRu compatible metadata format
|
|
||||||
|
|
||||||
The `wdmrcompat` metadata format is only useful to support historical files
|
|
||||||
created by [WebDriveMailru](https://github.com/yar229/WebDavMailRuCloud).
|
|
||||||
It keeps the following fields (most are ignored, though):
|
|
||||||
|
|
||||||
- `Name` - name of the composite file (always equal to the meta file name)
|
|
||||||
- `Size` - total size of chunks
|
|
||||||
- `PublicKey` - ignored, always "null"
|
|
||||||
- `CreationDate` - last modification (sic!) time, ignored.
|
|
||||||
|
|
||||||
#### No metadata
|
#### No metadata
|
||||||
|
|
||||||
|
@ -161,8 +173,8 @@ errors (especially missing last chunk) than metadata-enabled formats.
|
||||||
### Hashsums
|
### Hashsums
|
||||||
|
|
||||||
Chunker supports hashsums only when a compatible metadata is present.
|
Chunker supports hashsums only when a compatible metadata is present.
|
||||||
Thus, if you choose metadata format of `none` or `wdmrcompat`, chunker
|
Thus, if you choose metadata format of `none`, chunker will return
|
||||||
will return `UNSUPPORTED` as hashsum.
|
`UNSUPPORTED` as hashsum.
|
||||||
|
|
||||||
Please note that metadata is stored only for composite files. If a file
|
Please note that metadata is stored only for composite files. If a file
|
||||||
is small (smaller than configured chunk size), chunker will transparently
|
is small (smaller than configured chunk size), chunker will transparently
|
||||||
|
@ -175,16 +187,16 @@ Currently you can choose one or another but not both.
|
||||||
MD5 is set by default as the most supported type.
|
MD5 is set by default as the most supported type.
|
||||||
Since chunker keeps hashes for composite files and falls back to the
|
Since chunker keeps hashes for composite files and falls back to the
|
||||||
wrapped remote hash for small ones, we advise you to choose the same
|
wrapped remote hash for small ones, we advise you to choose the same
|
||||||
hash type as wrapped remote, so your file listings look coherent.
|
hash type as wrapped remote so that your file listings look coherent.
|
||||||
|
|
||||||
Normally, when a file is copied to chunker controlled remote, chunker
|
Normally, when a file is copied to a chunker controlled remote, chunker
|
||||||
will ask its source for compatible file hash and revert to on-the-fly
|
will ask the file source for compatible file hash and revert to on-the-fly
|
||||||
calculation if none is found. This involves some CPU overhead but provides
|
calculation if none is found. This involves some CPU overhead but provides
|
||||||
a guarantee that given hashsum is available. Also, chunker will reject
|
a guarantee that given hashsum is available. Also, chunker will reject
|
||||||
a server-side copy or move operation if source and destination hashsum
|
a server-side copy or move operation if source and destination hashsum
|
||||||
types are different, resulting in the extra network bandwidth, too.
|
types are different, resulting in the extra network bandwidth, too.
|
||||||
In some rare cases this may be undesired, so chunker provides two optional
|
In some rare cases this may be undesired, so chunker provides two optional
|
||||||
choices: `sha1quick` and `md5quick`. If source does not have the primary
|
choices: `sha1quick` and `md5quick`. If the source does not support primary
|
||||||
hash type and the quick mode is enabled, chunker will try to fall back to
|
hash type and the quick mode is enabled, chunker will try to fall back to
|
||||||
the secondary type. This will save CPU and bandwidth but can result in empty
|
the secondary type. This will save CPU and bandwidth but can result in empty
|
||||||
hashsums at destination. Beware of consequences: the `sync` command will
|
hashsums at destination. Beware of consequences: the `sync` command will
|
||||||
|
@ -215,13 +227,14 @@ chunk naming scheme is to:
|
||||||
hash type, chunk naming etc.
|
hash type, chunk naming etc.
|
||||||
- Now run `rclone sync oldchunks: newchunks:` and all your data
|
- Now run `rclone sync oldchunks: newchunks:` and all your data
|
||||||
will be transparently converted at transfer.
|
will be transparently converted at transfer.
|
||||||
This may take some time.
|
This may take some time, yet chunker will try server-side
|
||||||
|
copy if possible.
|
||||||
- After checking data integrity you may remove configuration section
|
- After checking data integrity you may remove configuration section
|
||||||
of the old remote.
|
of the old remote.
|
||||||
|
|
||||||
If rclone gets killed during a long operation on a big composite file,
|
If rclone gets killed during a long operation on a big composite file,
|
||||||
hidden temporary chunks may stay in the directory. They will not be
|
hidden temporary chunks may stay in the directory. They will not be
|
||||||
shown by the list command but will eat up your account quota.
|
shown by the `list` command but will eat up your account quota.
|
||||||
Please note that the `deletefile` rclone command deletes only active
|
Please note that the `deletefile` rclone command deletes only active
|
||||||
chunks of a file. As a workaround, you can use remote of the wrapped
|
chunks of a file. As a workaround, you can use remote of the wrapped
|
||||||
file system to see them.
|
file system to see them.
|
||||||
|
@ -234,17 +247,18 @@ remove everything including garbage.
|
||||||
### Caveats and Limitations
|
### Caveats and Limitations
|
||||||
|
|
||||||
Chunker requires wrapped remote to support server side `move` (or `copy` +
|
Chunker requires wrapped remote to support server side `move` (or `copy` +
|
||||||
delete) operations, otherwise it will explicitly refuse to start.
|
`delete`) operations, otherwise it will explicitly refuse to start.
|
||||||
This is because it internally renames temporary chunk files to their final
|
This is because it internally renames temporary chunk files to their final
|
||||||
names when an operation completes successfully.
|
names when an operation completes successfully.
|
||||||
|
|
||||||
Note that moves done using the copy-and-delete method may incur double
|
Note that a move implemented using the copy-and-delete method may incur
|
||||||
charging with some cloud storage providers.
|
double charging with some cloud storage providers.
|
||||||
|
|
||||||
Chunker will not automatically rename existing chunks when you change the
|
Chunker will not automatically rename existing chunks when you run
|
||||||
chunk name format. Beware that in result of this some files which have been
|
`rclone config` on a live remote and change the chunk name format.
|
||||||
treated as chunks before the change can pop up in directory listings as
|
Beware that in result of this some files which have been treated as chunks
|
||||||
normal files and vice versa. The same warning holds for the chunk size.
|
before the change can pop up in directory listings as normal files
|
||||||
|
and vice versa. The same warning holds for the chunk size.
|
||||||
If you desperately need to change critical chunking setings, you should
|
If you desperately need to change critical chunking setings, you should
|
||||||
run data migration as described in a dedicated section.
|
run data migration as described in a dedicated section.
|
||||||
|
|
||||||
|
@ -278,6 +292,28 @@ Files larger than chunk size will be split in chunks.
|
||||||
- Type: SizeSuffix
|
- Type: SizeSuffix
|
||||||
- Default: 2G
|
- Default: 2G
|
||||||
|
|
||||||
|
#### --chunker-hash-type
|
||||||
|
|
||||||
|
Choose how chunker handles hash sums.
|
||||||
|
|
||||||
|
- Config: hash_type
|
||||||
|
- Env Var: RCLONE_CHUNKER_HASH_TYPE
|
||||||
|
- Type: string
|
||||||
|
- Default: "md5"
|
||||||
|
- Examples:
|
||||||
|
- "none"
|
||||||
|
- Chunker can pass any hash supported by wrapped remote
|
||||||
|
- for a single-chunk file but returns nothing otherwise.
|
||||||
|
- "md5"
|
||||||
|
- MD5 for multi-chunk files. Requires "simplejson".
|
||||||
|
- "sha1"
|
||||||
|
- SHA1 for multi-chunk files. Requires "simplejson".
|
||||||
|
- "md5quick"
|
||||||
|
- Copying a file to chunker will request MD5 from the source
|
||||||
|
- falling back to SHA1 if unsupported. Requires "simplejson".
|
||||||
|
- "sha1quick"
|
||||||
|
- Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".
|
||||||
|
|
||||||
### Advanced Options
|
### Advanced Options
|
||||||
|
|
||||||
Here are the advanced options specific to chunker (Transparently chunk/split large files).
|
Here are the advanced options specific to chunker (Transparently chunk/split large files).
|
||||||
|
@ -321,33 +357,6 @@ Metadata is a small JSON file named after the composite file.
|
||||||
- "simplejson"
|
- "simplejson"
|
||||||
- Simple JSON supports hash sums and chunk validation.
|
- Simple JSON supports hash sums and chunk validation.
|
||||||
- It has the following fields: size, nchunks, md5, sha1.
|
- It has the following fields: size, nchunks, md5, sha1.
|
||||||
- "wdmrcompat"
|
|
||||||
- This format brings compatibility with WebDavMailRuCloud.
|
|
||||||
- It does not support hash sums or validation, most fields are ignored.
|
|
||||||
- It has the following fields: Name, Size, PublicKey, CreationDate.
|
|
||||||
- Requires hash type "none".
|
|
||||||
|
|
||||||
#### --chunker-hash-type
|
|
||||||
|
|
||||||
Choose how chunker handles hash sums.
|
|
||||||
|
|
||||||
- Config: hash_type
|
|
||||||
- Env Var: RCLONE_CHUNKER_HASH_TYPE
|
|
||||||
- Type: string
|
|
||||||
- Default: "md5"
|
|
||||||
- Examples:
|
|
||||||
- "none"
|
|
||||||
- Chunker can pass any hash supported by wrapped remote
|
|
||||||
- for a single-chunk file but returns nothing otherwise.
|
|
||||||
- "md5"
|
|
||||||
- MD5 for multi-chunk files. Requires "simplejson".
|
|
||||||
- "sha1"
|
|
||||||
- SHA1 for multi-chunk files. Requires "simplejson".
|
|
||||||
- "md5quick"
|
|
||||||
- When a file is copied on to chunker, MD5 is taken from its source
|
|
||||||
- falling back to SHA1 if the source doesn't support it. Requires "simplejson".
|
|
||||||
- "sha1quick"
|
|
||||||
- Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".
|
|
||||||
|
|
||||||
#### --chunker-fail-on-bad-chunks
|
#### --chunker-fail-on-bad-chunks
|
||||||
|
|
||||||
|
|
|
@ -986,7 +986,6 @@ func TestSyncWithTrackRenames(t *testing.T) {
|
||||||
fs.Config.TrackRenames = true
|
fs.Config.TrackRenames = true
|
||||||
defer func() {
|
defer func() {
|
||||||
fs.Config.TrackRenames = false
|
fs.Config.TrackRenames = false
|
||||||
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
haveHash := r.Fremote.Hashes().Overlap(r.Flocal.Hashes()).GetOne() != hash.None
|
haveHash := r.Fremote.Hashes().Overlap(r.Flocal.Hashes()).GetOne() != hash.None
|
||||||
|
@ -1010,45 +1009,64 @@ func TestSyncWithTrackRenames(t *testing.T) {
|
||||||
|
|
||||||
fstest.CheckItems(t, r.Fremote, f1, f2)
|
fstest.CheckItems(t, r.Fremote, f1, f2)
|
||||||
|
|
||||||
if canTrackRenames {
|
// As currently there is no Fs interface providing number of chunks
|
||||||
if r.Fremote.Features().Move == nil || r.Fremote.Name() == "TestUnion" { // union remote can Move but returns CantMove error
|
// in a file, this test depends on the well-known names of test remotes.
|
||||||
// If no server side Move, we are falling back to Copy + Delete
|
|
||||||
assert.Equal(t, int64(1), accounting.GlobalStats().GetTransfers()) // 1 copy
|
|
||||||
assert.Equal(t, int64(4), accounting.GlobalStats().GetChecks()) // 2 file checks + 1 move + 1 delete
|
|
||||||
} else {
|
|
||||||
assert.Equal(t, int64(0), accounting.GlobalStats().GetTransfers()) // 0 copy
|
|
||||||
assert.Equal(t, int64(3), accounting.GlobalStats().GetChecks()) // 2 file checks + 1 move
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if toyFileChecks(r) != -1 {
|
|
||||||
assert.Equal(t, toyFileChecks(r), accounting.GlobalStats().GetChecks())
|
|
||||||
}
|
|
||||||
assert.Equal(t, toyFileTransfers(r), accounting.GlobalStats().GetTransfers())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func toyFileChecks(r *fstest.Run) int64 {
|
|
||||||
remote := r.Fremote.Name()
|
remote := r.Fremote.Name()
|
||||||
// Numbers below are calculated for a 14 byte file.
|
|
||||||
if !strings.HasPrefix(remote, "TestChunker") {
|
// Union remote can Move but returns CantMove error.
|
||||||
return 2
|
moveAsCopyDelete := r.Fremote.Features().Move == nil || remote == "TestUnion"
|
||||||
}
|
|
||||||
// Chunker makes more internal checks.
|
chunker := strings.HasPrefix(remote, "TestChunker")
|
||||||
|
wrappedMoveAsCopyDelete := chunker && strings.HasSuffix(remote, "S3")
|
||||||
|
|
||||||
|
chunk3b := chunker && strings.Contains(remote, "Chunk3b") // chunker with 3 byte chunks
|
||||||
|
chunk50b := chunker && strings.Contains(remote, "Chunk50b") // chunker with 50 byte chunks
|
||||||
|
chunkDefault := chunker && !strings.Contains(remote, "ChunkerChunk") // default big chunk size
|
||||||
|
chunkBig := chunk50b || chunkDefault // file is smaller than chunk size
|
||||||
|
|
||||||
|
// Verify number of checks for a toy 14 byte file.
|
||||||
|
// The order of cases matters!
|
||||||
var checks int
|
var checks int
|
||||||
switch {
|
switch {
|
||||||
case strings.Contains(remote, "Chunk3b"): // chunk 3 bytes
|
case canTrackRenames && chunk3b:
|
||||||
checks = 6
|
checks = 8 // chunker makes extra checks for each small chunk
|
||||||
case strings.Contains(remote, "Chunk50b"): // chunk 50 bytes
|
case canTrackRenames && chunkBig:
|
||||||
checks = 3
|
checks = 4 // chunker makes 1 extra check for a single big chunk
|
||||||
case strings.Contains(remote, "ChunkerChunk"): // unknown chunk size
|
case canTrackRenames && moveAsCopyDelete:
|
||||||
return -1
|
checks = 4 // 2 file checks + 1 move + 1 delete
|
||||||
|
case canTrackRenames:
|
||||||
|
checks = 3 // 2 file checks + 1 move
|
||||||
|
case !chunker:
|
||||||
|
checks = 2 // 2 file checks on a generic non-chunking remote
|
||||||
|
case chunk3b:
|
||||||
|
checks = 6 // chunker makes extra checks for each small chunk
|
||||||
|
case chunkBig && wrappedMoveAsCopyDelete:
|
||||||
|
checks = 4 // one more extra check because S3 emulates Move as Copy+Delete
|
||||||
|
case chunkBig:
|
||||||
|
checks = 3 // chunker makes 1 extra check for a single big chunk
|
||||||
default:
|
default:
|
||||||
checks = 3 // large chunks (eventually no chunking)
|
checks = -1 // skip verification for chunker with unknown chunk size
|
||||||
}
|
}
|
||||||
if strings.HasSuffix(remote, "S3") {
|
if checks != -1 { // "-1" allows remotes to bypass this check
|
||||||
checks++ // Extra check because S3 emulates Move as Copy+Delete.
|
assert.Equal(t, int64(checks), accounting.GlobalStats().GetChecks())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify number of copy operations for a toy 14 byte file.
|
||||||
|
// The order of cases matters!
|
||||||
|
var copies int64
|
||||||
|
switch {
|
||||||
|
case canTrackRenames && moveAsCopyDelete:
|
||||||
|
copies = 1 // 1 copy
|
||||||
|
case canTrackRenames:
|
||||||
|
copies = 0 // 0 copy
|
||||||
|
case chunkBig && wrappedMoveAsCopyDelete:
|
||||||
|
copies = 2 // extra Copy because S3 emulates Move as Copy+Delete.
|
||||||
|
default:
|
||||||
|
copies = 1
|
||||||
|
}
|
||||||
|
if copies != -1 { // "-1" allows remotes to bypass this check
|
||||||
|
assert.Equal(t, copies, accounting.GlobalStats().GetTransfers())
|
||||||
}
|
}
|
||||||
return int64(checks)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func toyFileTransfers(r *fstest.Run) int64 {
|
func toyFileTransfers(r *fstest.Run) int64 {
|
||||||
|
|
|
@ -33,9 +33,6 @@ backends:
|
||||||
- backend: "chunker"
|
- backend: "chunker"
|
||||||
remote: "TestChunkerNometaLocal:"
|
remote: "TestChunkerNometaLocal:"
|
||||||
fastlist: true
|
fastlist: true
|
||||||
- backend: "chunker"
|
|
||||||
remote: "TestChunkerCompatLocal:"
|
|
||||||
fastlist: true
|
|
||||||
- backend: "chunker"
|
- backend: "chunker"
|
||||||
remote: "TestChunkerChunk3bLocal:"
|
remote: "TestChunkerChunk3bLocal:"
|
||||||
fastlist: true
|
fastlist: true
|
||||||
|
@ -44,10 +41,6 @@ backends:
|
||||||
remote: "TestChunkerChunk3bNometaLocal:"
|
remote: "TestChunkerChunk3bNometaLocal:"
|
||||||
fastlist: true
|
fastlist: true
|
||||||
maxfile: 6k
|
maxfile: 6k
|
||||||
- backend: "chunker"
|
|
||||||
remote: "TestChunkerChunk3bCompatLocal:"
|
|
||||||
fastlist: true
|
|
||||||
maxfile: 6k
|
|
||||||
- backend: "chunker"
|
- backend: "chunker"
|
||||||
remote: "TestChunkerMailru:"
|
remote: "TestChunkerMailru:"
|
||||||
fastlist: true
|
fastlist: true
|
||||||
|
@ -66,30 +59,26 @@ backends:
|
||||||
- backend: "chunker"
|
- backend: "chunker"
|
||||||
remote: "TestChunkerS3:"
|
remote: "TestChunkerS3:"
|
||||||
fastlist: true
|
fastlist: true
|
||||||
ignore:
|
|
||||||
- TestIntegration/FsMkdir/FsPutFiles/SetTier
|
|
||||||
- backend: "chunker"
|
- backend: "chunker"
|
||||||
remote: "TestChunkerChunk50bS3:"
|
remote: "TestChunkerChunk50bS3:"
|
||||||
fastlist: true
|
fastlist: true
|
||||||
maxfile: 1k
|
maxfile: 1k
|
||||||
ignore:
|
- backend: "chunker"
|
||||||
- TestIntegration/FsMkdir/FsPutFiles/SetTier
|
remote: "TestChunkerChunk50bMD5HashS3:"
|
||||||
#- backend: "chunker"
|
fastlist: true
|
||||||
# remote: "TestChunkerChunk50bMD5HashS3:"
|
maxfile: 1k
|
||||||
# fastlist: true
|
- backend: "chunker"
|
||||||
# maxfile: 1k
|
remote: "TestChunkerChunk50bSHA1HashS3:"
|
||||||
#- backend: "chunker"
|
fastlist: true
|
||||||
# remote: "TestChunkerChunk50bMD5QuickS3:"
|
maxfile: 1k
|
||||||
# fastlist: true
|
- backend: "chunker"
|
||||||
# maxfile: 1k
|
remote: "TestChunkerChunk50bMD5QuickS3:"
|
||||||
#- backend: "chunker"
|
fastlist: true
|
||||||
# remote: "TestChunkerChunk50bSHA1HashS3:"
|
maxfile: 1k
|
||||||
# fastlist: true
|
- backend: "chunker"
|
||||||
# maxfile: 1k
|
remote: "TestChunkerChunk50bSHA1QuickS3:"
|
||||||
#- backend: "chunker"
|
fastlist: true
|
||||||
# remote: "TestChunkerChunk50bSHA1QuickS3:"
|
maxfile: 1k
|
||||||
# fastlist: true
|
|
||||||
# maxfile: 1k
|
|
||||||
## end chunker
|
## end chunker
|
||||||
- backend: "drive"
|
- backend: "drive"
|
||||||
remote: "TestDrive:"
|
remote: "TestDrive:"
|
||||||
|
|
Loading…
Reference in a new issue