chunker: reduce length of temporary suffix

This commit is contained in:
Ivan Andreev 2019-12-04 13:43:58 +03:00 committed by Nick Craig-Wood
parent 50bb9b7bdd
commit 41ba1bba2b
3 changed files with 384 additions and 206 deletions

View file

@ -12,11 +12,13 @@ import (
gohash "hash" gohash "hash"
"io" "io"
"io/ioutil" "io/ioutil"
"math/rand"
"path" "path"
"regexp" "regexp"
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"sync"
"time" "time"
"github.com/pkg/errors" "github.com/pkg/errors"
@ -34,46 +36,57 @@ import (
// and optional metadata object. If it's present, // and optional metadata object. If it's present,
// meta object is named after the original file. // meta object is named after the original file.
// //
// The only supported metadata format is simplejson atm.
// It supports only per-file meta objects that are rudimentary,
// used mostly for consistency checks (lazily for performance reasons).
// Other formats can be developed that use an external meta store
// free of these limitations, but this needs some support from
// rclone core (eg. metadata store interfaces).
//
// The following types of chunks are supported: // The following types of chunks are supported:
// data and control, active and temporary. // data and control, active and temporary.
// Chunk type is identified by matching chunk file name // Chunk type is identified by matching chunk file name
// based on the chunk name format configured by user. // based on the chunk name format configured by user.
// //
// Both data and control chunks can be either temporary or // Both data and control chunks can be either temporary (aka hidden)
// active (non-temporary). // or active (non-temporary aka normal aka permanent).
// An operation creates temporary chunks while it runs. // An operation creates temporary chunks while it runs.
// By completion it removes temporary and leaves active // By completion it removes temporary and leaves active chunks.
// (aka normal aka permanent) chunks.
// //
// Temporary (aka hidden) chunks have a special hardcoded suffix // Temporary chunks have a special hardcoded suffix in addition
// in addition to the configured name pattern. The suffix comes last // to the configured name pattern.
// to prevent name collisions with non-temporary chunks. // Temporary suffix includes so called transaction identifier
// Temporary suffix includes so called transaction number usually // (abbreviated as `xactID` below), a generic non-negative base-36 "number"
// abbreviated as `xactNo` below, a generic non-negative integer
// used by parallel operations to share a composite object. // used by parallel operations to share a composite object.
// Chunker also accepts the longer decimal temporary suffix (obsolete),
// which is transparently converted to the new format. In its maximum
// length of 13 decimals it makes a 7-digit base-36 number.
// //
// Chunker can tell data chunks from control chunks by the characters // Chunker can tell data chunks from control chunks by the characters
// located in the "hash placeholder" position of configured format. // located in the "hash placeholder" position of configured format.
// Data chunks have decimal digits there. // Data chunks have decimal digits there.
// Control chunks have a short lowercase literal prepended by underscore // Control chunks have in that position a short lowercase alphanumeric
// in that position. // string (starting with a letter) prepended by underscore.
// //
// Metadata format v1 does not define any control chunk types, // Metadata format v1 does not define any control chunk types,
// they are currently ignored aka reserved. // they are currently ignored aka reserved.
// In future they can be used to implement resumable uploads etc. // In future they can be used to implement resumable uploads etc.
// //
const ( const (
ctrlTypeRegStr = `[a-z]{3,9}` ctrlTypeRegStr = `[a-z][a-z0-9]{2,6}`
tempChunkFormat = `%s..tmp_%010d` tempSuffixFormat = `_%04s`
tempChunkRegStr = `\.\.tmp_([0-9]{10,19})` tempSuffixRegStr = `_([0-9a-z]{4,9})`
tempSuffixRegOld = `\.\.tmp_([0-9]{10,13})`
) )
var ( var (
ctrlTypeRegexp = regexp.MustCompile(`^` + ctrlTypeRegStr + `$`) // regular expressions to validate control type and temporary suffix
ctrlTypeRegexp = regexp.MustCompile(`^` + ctrlTypeRegStr + `$`)
tempSuffixRegexp = regexp.MustCompile(`^` + tempSuffixRegStr + `$`)
) )
// Normally metadata is a small piece of JSON (about 100-300 bytes). // Normally metadata is a small piece of JSON (about 100-300 bytes).
// The size of valid metadata size must never exceed this limit. // The size of valid metadata must never exceed this limit.
// Current maximum provides a reasonable room for future extensions. // Current maximum provides a reasonable room for future extensions.
// //
// Please refrain from increasing it, this can cause old rclone versions // Please refrain from increasing it, this can cause old rclone versions
@ -101,6 +114,9 @@ const revealHidden = false
// Prevent memory overflow due to specially crafted chunk name // Prevent memory overflow due to specially crafted chunk name
const maxSafeChunkNumber = 10000000 const maxSafeChunkNumber = 10000000
// Number of attempts to find unique transaction identifier
const maxTransactionProbes = 100
// standard chunker errors // standard chunker errors
var ( var (
ErrChunkOverflow = errors.New("chunk number overflow") ErrChunkOverflow = errors.New("chunk number overflow")
@ -113,13 +129,6 @@ const (
delFailed = 2 // move, then delete and try again if failed delFailed = 2 // move, then delete and try again if failed
) )
// Note: metadata logic is tightly coupled with chunker code in many
// places, eg. in checks whether a file should have meta object or is
// eligible for chunking.
// If more metadata formats (or versions of a format) are added in future,
// it may be advisable to factor it into a "metadata strategy" interface
// similar to chunkingReader or linearReader below.
// Register with Fs // Register with Fs
func init() { func init() {
fs.Register(&fs.RegInfo{ fs.Register(&fs.RegInfo{
@ -261,7 +270,7 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
// detects a composite file because it finds the first chunk! // detects a composite file because it finds the first chunk!
// (yet can't satisfy fstest.CheckListing, will ignore) // (yet can't satisfy fstest.CheckListing, will ignore)
if err == nil && !f.useMeta && strings.Contains(rpath, "/") { if err == nil && !f.useMeta && strings.Contains(rpath, "/") {
firstChunkPath := f.makeChunkName(remotePath, 0, "", -1) firstChunkPath := f.makeChunkName(remotePath, 0, "", "")
_, testErr := baseInfo.NewFs(baseName, firstChunkPath, baseConfig) _, testErr := baseInfo.NewFs(baseName, firstChunkPath, baseConfig)
if testErr == fs.ErrorIsFile { if testErr == fs.ErrorIsFile {
err = testErr err = testErr
@ -310,12 +319,16 @@ type Fs struct {
dataNameFmt string // name format of data chunks dataNameFmt string // name format of data chunks
ctrlNameFmt string // name format of control chunks ctrlNameFmt string // name format of control chunks
nameRegexp *regexp.Regexp // regular expression to match chunk names nameRegexp *regexp.Regexp // regular expression to match chunk names
xactIDRand *rand.Rand // generator of random transaction identifiers
xactIDMutex sync.Mutex // mutex for the source of randomness
opt Options // copy of Options opt Options // copy of Options
features *fs.Features // optional features features *fs.Features // optional features
dirSort bool // reserved for future, ignored dirSort bool // reserved for future, ignored
} }
// configure must be called only from NewFs or by unit tests // configure sets up chunker for given name format, meta format and hash type.
// It also seeds the source of random transaction identifiers.
// configure must be called only from NewFs or by unit tests.
func (f *Fs) configure(nameFormat, metaFormat, hashType string) error { func (f *Fs) configure(nameFormat, metaFormat, hashType string) error {
if err := f.setChunkNameFormat(nameFormat); err != nil { if err := f.setChunkNameFormat(nameFormat); err != nil {
return errors.Wrapf(err, "invalid name format '%s'", nameFormat) return errors.Wrapf(err, "invalid name format '%s'", nameFormat)
@ -326,6 +339,10 @@ func (f *Fs) configure(nameFormat, metaFormat, hashType string) error {
if err := f.setHashType(hashType); err != nil { if err := f.setHashType(hashType); err != nil {
return err return err
} }
randomSeed := time.Now().UnixNano()
f.xactIDRand = rand.New(rand.NewSource(randomSeed))
return nil return nil
} }
@ -414,13 +431,13 @@ func (f *Fs) setChunkNameFormat(pattern string) error {
} }
reDataOrCtrl := fmt.Sprintf("(?:(%s)|_(%s))", reDigits, ctrlTypeRegStr) reDataOrCtrl := fmt.Sprintf("(?:(%s)|_(%s))", reDigits, ctrlTypeRegStr)
// this must be non-greedy or else it can eat up temporary suffix // this must be non-greedy or else it could eat up temporary suffix
const mainNameRegStr = "(.+?)" const mainNameRegStr = "(.+?)"
strRegex := regexp.QuoteMeta(pattern) strRegex := regexp.QuoteMeta(pattern)
strRegex = reHashes.ReplaceAllLiteralString(strRegex, reDataOrCtrl) strRegex = reHashes.ReplaceAllLiteralString(strRegex, reDataOrCtrl)
strRegex = strings.Replace(strRegex, "\\*", mainNameRegStr, -1) strRegex = strings.Replace(strRegex, "\\*", mainNameRegStr, -1)
strRegex = fmt.Sprintf("^%s(?:%s)?$", strRegex, tempChunkRegStr) strRegex = fmt.Sprintf("^%s(?:%s|%s)?$", strRegex, tempSuffixRegStr, tempSuffixRegOld)
f.nameRegexp = regexp.MustCompile(strRegex) f.nameRegexp = regexp.MustCompile(strRegex)
// craft printf formats for active data/control chunks // craft printf formats for active data/control chunks
@ -435,34 +452,36 @@ func (f *Fs) setChunkNameFormat(pattern string) error {
return nil return nil
} }
// makeChunkName produces chunk name (or path) for given file. // makeChunkName produces chunk name (or path) for a given file.
// //
// mainPath can be name, relative or absolute path of main file. // filePath can be name, relative or absolute path of main file.
// //
// chunkNo must be a zero based index of data chunk. // chunkNo must be a zero based index of data chunk.
// Negative chunkNo eg. -1 indicates a control chunk. // Negative chunkNo eg. -1 indicates a control chunk.
// ctrlType is type of control chunk (must be valid). // ctrlType is type of control chunk (must be valid).
// ctrlType must be "" for data chunks. // ctrlType must be "" for data chunks.
// //
// xactNo is a transaction number. // xactID is a transaction identifier. Empty xactID denotes active chunk,
// Negative xactNo eg. -1 indicates an active chunk, // otherwise temporary chunk name is produced.
// otherwise produce temporary chunk name.
// //
func (f *Fs) makeChunkName(mainPath string, chunkNo int, ctrlType string, xactNo int64) string { func (f *Fs) makeChunkName(filePath string, chunkNo int, ctrlType, xactID string) string {
dir, mainName := path.Split(mainPath) dir, parentName := path.Split(filePath)
var name string var name, tempSuffix string
switch { switch {
case chunkNo >= 0 && ctrlType == "": case chunkNo >= 0 && ctrlType == "":
name = fmt.Sprintf(f.dataNameFmt, mainName, chunkNo+f.opt.StartFrom) name = fmt.Sprintf(f.dataNameFmt, parentName, chunkNo+f.opt.StartFrom)
case chunkNo < 0 && ctrlTypeRegexp.MatchString(ctrlType): case chunkNo < 0 && ctrlTypeRegexp.MatchString(ctrlType):
name = fmt.Sprintf(f.ctrlNameFmt, mainName, ctrlType) name = fmt.Sprintf(f.ctrlNameFmt, parentName, ctrlType)
default: default:
panic("makeChunkName: invalid argument") // must not produce something we can't consume panic("makeChunkName: invalid argument") // must not produce something we can't consume
} }
if xactNo >= 0 { if xactID != "" {
name = fmt.Sprintf(tempChunkFormat, name, xactNo) tempSuffix = fmt.Sprintf(tempSuffixFormat, xactID)
if !tempSuffixRegexp.MatchString(tempSuffix) {
panic("makeChunkName: invalid argument")
}
} }
return dir + name return dir + name + tempSuffix
} }
// parseChunkName checks whether given file path belongs to // parseChunkName checks whether given file path belongs to
@ -470,20 +489,21 @@ func (f *Fs) makeChunkName(mainPath string, chunkNo int, ctrlType string, xactNo
// //
// filePath can be name, relative or absolute path of a file. // filePath can be name, relative or absolute path of a file.
// //
// Returned mainPath is a non-empty string if valid chunk name // Returned parentPath is path of the composite file owning the chunk.
// is detected or "" if it's not a chunk. // It's a non-empty string if valid chunk name is detected
// or "" if it's not a chunk.
// Other returned values depend on detected chunk type: // Other returned values depend on detected chunk type:
// data or control, active or temporary: // data or control, active or temporary:
// //
// data chunk - the returned chunkNo is non-negative and ctrlType is "" // data chunk - the returned chunkNo is non-negative and ctrlType is ""
// control chunk - the chunkNo is -1 and ctrlType is non-empty string // control chunk - the chunkNo is -1 and ctrlType is a non-empty string
// active chunk - the returned xactNo is -1 // active chunk - the returned xactID is ""
// temporary chunk - the xactNo is non-negative integer // temporary chunk - the xactID is a non-empty string
func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrlType string, xactNo int64) { func (f *Fs) parseChunkName(filePath string) (parentPath string, chunkNo int, ctrlType, xactID string) {
dir, name := path.Split(filePath) dir, name := path.Split(filePath)
match := f.nameRegexp.FindStringSubmatch(name) match := f.nameRegexp.FindStringSubmatch(name)
if match == nil || match[1] == "" { if match == nil || match[1] == "" {
return "", -1, "", -1 return "", -1, "", ""
} }
var err error var err error
@ -494,19 +514,26 @@ func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrl
} }
if chunkNo -= f.opt.StartFrom; chunkNo < 0 { if chunkNo -= f.opt.StartFrom; chunkNo < 0 {
fs.Infof(f, "invalid data chunk number in file %q", name) fs.Infof(f, "invalid data chunk number in file %q", name)
return "", -1, "", -1 return "", -1, "", ""
} }
} }
xactNo = -1
if match[4] != "" { if match[4] != "" {
if xactNo, err = strconv.ParseInt(match[4], 10, 64); err != nil || xactNo < 0 { xactID = match[4]
fs.Infof(f, "invalid transaction number in file %q", name) }
return "", -1, "", -1 if match[5] != "" {
// old-style temporary suffix
number, err := strconv.ParseInt(match[5], 10, 64)
if err != nil || number < 0 {
fs.Infof(f, "invalid old-style transaction number in file %q", name)
return "", -1, "", ""
} }
// convert old-style transaction number to base-36 transaction ID
xactID = fmt.Sprintf(tempSuffixFormat, strconv.FormatInt(number, 36))
xactID = xactID[1:] // strip leading underscore
} }
mainPath = dir + match[1] parentPath = dir + match[1]
ctrlType = match[3] ctrlType = match[3]
return return
} }
@ -514,17 +541,74 @@ func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrl
// forbidChunk prints error message or raises error if file is chunk. // forbidChunk prints error message or raises error if file is chunk.
// First argument sets log prefix, use `false` to suppress message. // First argument sets log prefix, use `false` to suppress message.
func (f *Fs) forbidChunk(o interface{}, filePath string) error { func (f *Fs) forbidChunk(o interface{}, filePath string) error {
if mainPath, _, _, _ := f.parseChunkName(filePath); mainPath != "" { if parentPath, _, _, _ := f.parseChunkName(filePath); parentPath != "" {
if f.opt.FailHard { if f.opt.FailHard {
return fmt.Errorf("chunk overlap with %q", mainPath) return fmt.Errorf("chunk overlap with %q", parentPath)
} }
if boolVal, isBool := o.(bool); !isBool || boolVal { if boolVal, isBool := o.(bool); !isBool || boolVal {
fs.Errorf(o, "chunk overlap with %q", mainPath) fs.Errorf(o, "chunk overlap with %q", parentPath)
} }
} }
return nil return nil
} }
// newXactID produces a sufficiently random transaction identifier.
//
// The temporary suffix mask allows identifiers consisting of 4-9
// base-36 digits (ie. digits 0-9 or lowercase letters a-z).
// The identifiers must be unique between transactions running on
// the single file in parallel.
//
// Currently the function produces 6-character identifiers.
// Together with underscore this makes a 7-character temporary suffix.
//
// The first 4 characters isolate groups of transactions by time intervals.
// The maximum length of interval is base-36 "zzzz" ie. 1,679,615 seconds.
// The function rather takes a maximum prime closest to this number
// (see https://primes.utm.edu) as the interval length to better safeguard
// against repeating pseudo-random sequences in cases when rclone is
// invoked from a periodic scheduler like unix cron.
// Thus, the interval is slightly more than 19 days 10 hours 33 minutes.
//
// The remaining 2 base-36 digits (in the range from 0 to 1295 inclusive)
// are taken from the local random source.
// This provides about 0.1% collision probability for two parallel
// operations started at the same second and working on the same file.
//
// Non-empty filePath argument enables probing for existing temporary chunk
// to further eliminate collisions.
func (f *Fs) newXactID(ctx context.Context, filePath string) (xactID string, err error) {
const closestPrimeZzzzSeconds = 1679609
const maxTwoBase36Digits = 1295
unixSec := time.Now().Unix()
if unixSec < 0 {
unixSec = -unixSec // unlikely but the number must be positive
}
circleSec := unixSec % closestPrimeZzzzSeconds
first4chars := strconv.FormatInt(circleSec, 36)
for tries := 0; tries < maxTransactionProbes; tries++ {
f.xactIDMutex.Lock()
randomness := f.xactIDRand.Int63n(maxTwoBase36Digits + 1)
f.xactIDMutex.Unlock()
last2chars := strconv.FormatInt(randomness, 36)
xactID = fmt.Sprintf("%04s%02s", first4chars, last2chars)
if filePath == "" {
return
}
probeChunk := f.makeChunkName(filePath, 0, "", xactID)
_, probeErr := f.base.NewObject(ctx, probeChunk)
if probeErr != nil {
return
}
}
return "", fmt.Errorf("can't setup transaction for %s", filePath)
}
// List the objects and directories in dir into entries. // List the objects and directories in dir into entries.
// The entries can be returned in any order but should be // The entries can be returned in any order but should be
// for a complete directory. // for a complete directory.
@ -602,8 +686,8 @@ func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirP
switch entry := dirOrObject.(type) { switch entry := dirOrObject.(type) {
case fs.Object: case fs.Object:
remote := entry.Remote() remote := entry.Remote()
if mainRemote, chunkNo, ctrlType, xactNo := f.parseChunkName(remote); mainRemote != "" { if mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(remote); mainRemote != "" {
if xactNo != -1 { if xactID != "" {
if revealHidden { if revealHidden {
fs.Infof(f, "ignore temporary chunk %q", remote) fs.Infof(f, "ignore temporary chunk %q", remote)
} }
@ -686,7 +770,7 @@ func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirP
// //
// Please note that every NewObject invocation will scan the whole directory. // Please note that every NewObject invocation will scan the whole directory.
// Using here something like fs.DirCache might improve performance // Using here something like fs.DirCache might improve performance
// (but will make logic more complex, though). // (yet making the logic more complex).
// //
// Note that chunker prefers analyzing file names rather than reading // Note that chunker prefers analyzing file names rather than reading
// the content of meta object assuming that directory scans are fast // the content of meta object assuming that directory scans are fast
@ -752,8 +836,8 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
if !strings.Contains(entryRemote, remote) { if !strings.Contains(entryRemote, remote) {
continue // bypass regexp to save cpu continue // bypass regexp to save cpu
} }
mainRemote, chunkNo, ctrlType, xactNo := f.parseChunkName(entryRemote) mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(entryRemote)
if mainRemote == "" || mainRemote != remote || ctrlType != "" || xactNo != -1 { if mainRemote == "" || mainRemote != remote || ctrlType != "" || xactID != "" {
continue // skip non-conforming, temporary and control chunks continue // skip non-conforming, temporary and control chunks
} }
//fs.Debugf(f, "%q belongs to %q as chunk %d", entryRemote, mainRemote, chunkNo) //fs.Debugf(f, "%q belongs to %q as chunk %d", entryRemote, mainRemote, chunkNo)
@ -786,7 +870,7 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
// This is either a composite object with metadata or a non-chunked // This is either a composite object with metadata or a non-chunked
// file without metadata. Validate it and update the total data size. // file without metadata. Validate it and update the total data size.
// As an optimization, skip metadata reading here - we will call // As an optimization, skip metadata reading here - we will call
// readMetadata lazily when needed. // readMetadata lazily when needed (reading can be expensive).
if err := o.validate(); err != nil { if err := o.validate(); err != nil {
return nil, err return nil, err
} }
@ -843,14 +927,11 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
} }
}() }()
// Use system timer as a trivial source of transaction numbers,
// don't try hard to safeguard against chunk collisions between
// parallel transactions.
xactNo := time.Now().Unix()
if xactNo < 0 {
xactNo = -xactNo // unlikely but transaction number must be positive
}
baseRemote := remote baseRemote := remote
xactID, errXact := f.newXactID(ctx, baseRemote)
if errXact != nil {
return nil, errXact
}
// Transfer chunks data // Transfer chunks data
for c.chunkNo = 0; !c.done; c.chunkNo++ { for c.chunkNo = 0; !c.done; c.chunkNo++ {
@ -858,7 +939,7 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
return nil, ErrChunkOverflow return nil, ErrChunkOverflow
} }
tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactNo) tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactID)
size := c.sizeLeft size := c.sizeLeft
if size > c.chunkSize { if size > c.chunkSize {
size = c.chunkSize size = c.chunkSize
@ -962,7 +1043,7 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
// Rename data chunks from temporary to final names // Rename data chunks from temporary to final names
for chunkNo, chunk := range c.chunks { for chunkNo, chunk := range c.chunks {
chunkRemote := f.makeChunkName(baseRemote, chunkNo, "", -1) chunkRemote := f.makeChunkName(baseRemote, chunkNo, "", "")
chunkMoved, errMove := f.baseMove(ctx, chunk, chunkRemote, delFailed) chunkMoved, errMove := f.baseMove(ctx, chunk, chunkRemote, delFailed)
if errMove != nil { if errMove != nil {
return nil, errMove return nil, errMove
@ -1221,11 +1302,6 @@ func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo,
return f.newObject("", o, nil), nil return f.newObject("", o, nil), nil
} }
// Precision returns the precision of this Fs
func (f *Fs) Precision() time.Duration {
return f.base.Precision()
}
// Hashes returns the supported hash sets. // Hashes returns the supported hash sets.
// Chunker advertises a hash type if and only if it can be calculated // Chunker advertises a hash type if and only if it can be calculated
// for files of any size, non-chunked or composite. // for files of any size, non-chunked or composite.
@ -1613,8 +1689,8 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
wrappedNotifyFunc := func(path string, entryType fs.EntryType) { wrappedNotifyFunc := func(path string, entryType fs.EntryType) {
//fs.Debugf(f, "ChangeNotify: path %q entryType %d", path, entryType) //fs.Debugf(f, "ChangeNotify: path %q entryType %d", path, entryType)
if entryType == fs.EntryObject { if entryType == fs.EntryObject {
mainPath, _, _, xactNo := f.parseChunkName(path) mainPath, _, _, xactID := f.parseChunkName(path)
if mainPath != "" && xactNo == -1 { if mainPath != "" && xactID == "" {
path = mainPath path = mainPath
} }
} }
@ -2063,7 +2139,7 @@ type metaSimpleJSON struct {
// Current implementation creates metadata in three cases: // Current implementation creates metadata in three cases:
// - for files larger than chunk size // - for files larger than chunk size
// - if file contents can be mistaken as meta object // - if file contents can be mistaken as meta object
// - if consistent hashing is on but wrapped remote can't provide given hash // - if consistent hashing is On but wrapped remote can't provide given hash
// //
func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) ([]byte, error) { func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) ([]byte, error) {
version := metadataVersion version := metadataVersion
@ -2177,6 +2253,11 @@ func (f *Fs) String() string {
return fmt.Sprintf("Chunked '%s:%s'", f.name, f.root) return fmt.Sprintf("Chunked '%s:%s'", f.name, f.root)
} }
// Precision returns the precision of this Fs
func (f *Fs) Precision() time.Duration {
return f.base.Precision()
}
// Check the interfaces are satisfied // Check the interfaces are satisfied
var ( var (
_ fs.Fs = (*Fs)(nil) _ fs.Fs = (*Fs)(nil)

View file

@ -64,35 +64,40 @@ func testChunkNameFormat(t *testing.T, f *Fs) {
assert.Error(t, err) assert.Error(t, err)
} }
assertMakeName := func(wantChunkName, mainName string, chunkNo int, ctrlType string, xactNo int64) { assertMakeName := func(wantChunkName, mainName string, chunkNo int, ctrlType, xactID string) {
gotChunkName := f.makeChunkName(mainName, chunkNo, ctrlType, xactNo) gotChunkName := ""
assert.Equal(t, wantChunkName, gotChunkName) assert.NotPanics(t, func() {
gotChunkName = f.makeChunkName(mainName, chunkNo, ctrlType, xactID)
}, "makeChunkName(%q,%d,%q,%q) must not panic", mainName, chunkNo, ctrlType, xactID)
if gotChunkName != "" {
assert.Equal(t, wantChunkName, gotChunkName)
}
} }
assertMakeNamePanics := func(mainName string, chunkNo int, ctrlType string, xactNo int64) { assertMakeNamePanics := func(mainName string, chunkNo int, ctrlType, xactID string) {
assert.Panics(t, func() { assert.Panics(t, func() {
_ = f.makeChunkName(mainName, chunkNo, ctrlType, xactNo) _ = f.makeChunkName(mainName, chunkNo, ctrlType, xactID)
}, "makeChunkName(%q,%d,%q,%d) should panic", mainName, chunkNo, ctrlType, xactNo) }, "makeChunkName(%q,%d,%q,%q) should panic", mainName, chunkNo, ctrlType, xactID)
} }
assertParseName := func(fileName, wantMainName string, wantChunkNo int, wantCtrlType string, wantXactNo int64) { assertParseName := func(fileName, wantMainName string, wantChunkNo int, wantCtrlType, wantXactID string) {
gotMainName, gotChunkNo, gotCtrlType, gotXactNo := f.parseChunkName(fileName) gotMainName, gotChunkNo, gotCtrlType, gotXactID := f.parseChunkName(fileName)
assert.Equal(t, wantMainName, gotMainName) assert.Equal(t, wantMainName, gotMainName)
assert.Equal(t, wantChunkNo, gotChunkNo) assert.Equal(t, wantChunkNo, gotChunkNo)
assert.Equal(t, wantCtrlType, gotCtrlType) assert.Equal(t, wantCtrlType, gotCtrlType)
assert.Equal(t, wantXactNo, gotXactNo) assert.Equal(t, wantXactID, gotXactID)
} }
const newFormatSupported = false // support for patterns not starting with base name (*) const newFormatSupported = false // support for patterns not starting with base name (*)
// valid formats // valid formats
assertFormat(`*.rclone_chunk.###`, `%s.rclone_chunk.%03d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]{3,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) assertFormat(`*.rclone_chunk.###`, `%s.rclone_chunk.%03d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]{3,})|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`)
assertFormat(`*.rclone_chunk.#`, `%s.rclone_chunk.%d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) assertFormat(`*.rclone_chunk.#`, `%s.rclone_chunk.%d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`)
assertFormat(`*_chunk_#####`, `%s_chunk_%05d`, `%s_chunk__%s`, `^(.+?)_chunk_(?:([0-9]{5,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) assertFormat(`*_chunk_#####`, `%s_chunk_%05d`, `%s_chunk__%s`, `^(.+?)_chunk_(?:([0-9]{5,})|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`)
assertFormat(`*-chunk-#`, `%s-chunk-%d`, `%s-chunk-_%s`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) assertFormat(`*-chunk-#`, `%s-chunk-%d`, `%s-chunk-_%s`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`)
assertFormat(`*-chunk-#-%^$()[]{}.+-!?:\`, `%s-chunk-%d-%%^$()[]{}.+-!?:\`, `%s-chunk-_%s-%%^$()[]{}.+-!?:\`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z]{3,9}))-%\^\$\(\)\[\]\{\}\.\+-!\?:\\(?:\.\.tmp_([0-9]{10,19}))?$`) assertFormat(`*-chunk-#-%^$()[]{}.+-!?:\`, `%s-chunk-%d-%%^$()[]{}.+-!?:\`, `%s-chunk-_%s-%%^$()[]{}.+-!?:\`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))-%\^\$\(\)\[\]\{\}\.\+-!\?:\\(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`)
if newFormatSupported { if newFormatSupported {
assertFormat(`_*-chunk-##,`, `_%s-chunk-%02d,`, `_%s-chunk-_%s,`, `^_(.+?)-chunk-(?:([0-9]{2,})|_([a-z]{3,9})),(?:\.\.tmp_([0-9]{10,19}))?$`) assertFormat(`_*-chunk-##,`, `_%s-chunk-%02d,`, `_%s-chunk-_%s,`, `^_(.+?)-chunk-(?:([0-9]{2,})|_([a-z][a-z0-9]{2,6})),(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`)
} }
// invalid formats // invalid formats
@ -111,142 +116,223 @@ func testChunkNameFormat(t *testing.T, f *Fs) {
// quick tests // quick tests
if newFormatSupported { if newFormatSupported {
assertFormat(`part_*_#`, `part_%s_%d`, `part_%s__%s`, `^part_(.+?)_(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) assertFormat(`part_*_#`, `part_%s_%d`, `part_%s__%s`, `^part_(.+?)_(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))(?:_([0-9][0-9a-z]{3,8})\.\.tmp_([0-9]{10,13}))?$`)
f.opt.StartFrom = 1 f.opt.StartFrom = 1
assertMakeName(`part_fish_1`, "fish", 0, "", -1) assertMakeName(`part_fish_1`, "fish", 0, "", "")
assertParseName(`part_fish_43`, "fish", 42, "", -1) assertParseName(`part_fish_43`, "fish", 42, "", "")
assertMakeName(`part_fish_3..tmp_0000000004`, "fish", 2, "", 4) assertMakeName(`part_fish__locks`, "fish", -2, "locks", "")
assertParseName(`part_fish_4..tmp_0000000005`, "fish", 3, "", 5) assertParseName(`part_fish__locks`, "fish", -1, "locks", "")
assertMakeName(`part_fish__locks`, "fish", -2, "locks", -3) assertMakeName(`part_fish__x2y`, "fish", -2, "x2y", "")
assertParseName(`part_fish__locks`, "fish", -1, "locks", -1) assertParseName(`part_fish__x2y`, "fish", -1, "x2y", "")
assertMakeName(`part_fish__blockinfo..tmp_1234567890123456789`, "fish", -3, "blockinfo", 1234567890123456789) assertMakeName(`part_fish_3_0004`, "fish", 2, "", "4")
assertParseName(`part_fish__blockinfo..tmp_1234567890123456789`, "fish", -1, "blockinfo", 1234567890123456789) assertParseName(`part_fish_4_0005`, "fish", 3, "", "0005")
assertMakeName(`part_fish__blkinfo_jj5fvo3wr`, "fish", -3, "blkinfo", "jj5fvo3wr")
assertParseName(`part_fish__blkinfo_zz9fvo3wr`, "fish", -1, "blkinfo", "zz9fvo3wr")
// old-style temporary suffix (parse only)
assertParseName(`part_fish_4..tmp_0000000011`, "fish", 3, "", "000b")
assertParseName(`part_fish__blkinfo_jj5fvo3wr`, "fish", -1, "blkinfo", "jj5fvo3wr")
} }
// prepare format for long tests // prepare format for long tests
assertFormat(`*.chunk.###`, `%s.chunk.%03d`, `%s.chunk._%s`, `^(.+?)\.chunk\.(?:([0-9]{3,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) assertFormat(`*.chunk.###`, `%s.chunk.%03d`, `%s.chunk._%s`, `^(.+?)\.chunk\.(?:([0-9]{3,})|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`)
f.opt.StartFrom = 2 f.opt.StartFrom = 2
// valid data chunks // valid data chunks
assertMakeName(`fish.chunk.003`, "fish", 1, "", -1) assertMakeName(`fish.chunk.003`, "fish", 1, "", "")
assertMakeName(`fish.chunk.011..tmp_0000054321`, "fish", 9, "", 54321) assertParseName(`fish.chunk.003`, "fish", 1, "", "")
assertMakeName(`fish.chunk.011..tmp_1234567890`, "fish", 9, "", 1234567890) assertMakeName(`fish.chunk.021`, "fish", 19, "", "")
assertMakeName(`fish.chunk.1916..tmp_123456789012345`, "fish", 1914, "", 123456789012345) assertParseName(`fish.chunk.021`, "fish", 19, "", "")
assertParseName(`fish.chunk.003`, "fish", 1, "", -1) // valid temporary data chunks
assertParseName(`fish.chunk.004..tmp_0000000021`, "fish", 2, "", 21) assertMakeName(`fish.chunk.011_4321`, "fish", 9, "", "4321")
assertParseName(`fish.chunk.021`, "fish", 19, "", -1) assertParseName(`fish.chunk.011_4321`, "fish", 9, "", "4321")
assertParseName(`fish.chunk.323..tmp_1234567890123456789`, "fish", 321, "", 1234567890123456789) assertMakeName(`fish.chunk.011_00bc`, "fish", 9, "", "00bc")
assertParseName(`fish.chunk.011_00bc`, "fish", 9, "", "00bc")
assertMakeName(`fish.chunk.1916_5jjfvo3wr`, "fish", 1914, "", "5jjfvo3wr")
assertParseName(`fish.chunk.1916_5jjfvo3wr`, "fish", 1914, "", "5jjfvo3wr")
assertMakeName(`fish.chunk.1917_zz9fvo3wr`, "fish", 1915, "", "zz9fvo3wr")
assertParseName(`fish.chunk.1917_zz9fvo3wr`, "fish", 1915, "", "zz9fvo3wr")
// valid temporary data chunks (old temporary suffix, only parse)
assertParseName(`fish.chunk.004..tmp_0000000047`, "fish", 2, "", "001b")
assertParseName(`fish.chunk.323..tmp_9994567890123`, "fish", 321, "", "3jjfvo3wr")
// parsing invalid data chunk names // parsing invalid data chunk names
assertParseName(`fish.chunk.3`, "", -1, "", -1) assertParseName(`fish.chunk.3`, "", -1, "", "")
assertParseName(`fish.chunk.001`, "", -1, "", -1) assertParseName(`fish.chunk.001`, "", -1, "", "")
assertParseName(`fish.chunk.21`, "", -1, "", -1) assertParseName(`fish.chunk.21`, "", -1, "", "")
assertParseName(`fish.chunk.-21`, "", -1, "", -1) assertParseName(`fish.chunk.-21`, "", -1, "", "")
assertParseName(`fish.chunk.004.tmp_0000000021`, "", -1, "", -1) assertParseName(`fish.chunk.004abcd`, "", -1, "", "") // missing underscore delimiter
assertParseName(`fish.chunk.003..tmp_123456789`, "", -1, "", -1) assertParseName(`fish.chunk.004__1234`, "", -1, "", "") // extra underscore delimiter
assertParseName(`fish.chunk.003..tmp_012345678901234567890123456789`, "", -1, "", -1) assertParseName(`fish.chunk.004_123`, "", -1, "", "") // too short temporary suffix
assertParseName(`fish.chunk.003..tmp_-1`, "", -1, "", -1) assertParseName(`fish.chunk.004_1234567890`, "", -1, "", "") // too long temporary suffix
assertParseName(`fish.chunk.004_-1234`, "", -1, "", "") // temporary suffix must be positive
assertParseName(`fish.chunk.004_123E`, "", -1, "", "") // uppercase not allowed
assertParseName(`fish.chunk.004_12.3`, "", -1, "", "") // punctuation not allowed
// parsing invalid data chunk names (old temporary suffix)
assertParseName(`fish.chunk.004.tmp_0000000021`, "", -1, "", "")
assertParseName(`fish.chunk.003..tmp_123456789`, "", -1, "", "")
assertParseName(`fish.chunk.003..tmp_012345678901234567890123456789`, "", -1, "", "")
assertParseName(`fish.chunk.323..tmp_12345678901234`, "", -1, "", "")
assertParseName(`fish.chunk.003..tmp_-1`, "", -1, "", "")
// valid control chunks // valid control chunks
assertMakeName(`fish.chunk._info`, "fish", -1, "info", -1) assertMakeName(`fish.chunk._info`, "fish", -1, "info", "")
assertMakeName(`fish.chunk._locks`, "fish", -2, "locks", -1) assertMakeName(`fish.chunk._locks`, "fish", -2, "locks", "")
assertMakeName(`fish.chunk._blockinfo`, "fish", -3, "blockinfo", -1) assertMakeName(`fish.chunk._blkinfo`, "fish", -3, "blkinfo", "")
assertMakeName(`fish.chunk._x2y`, "fish", -4, "x2y", "")
assertParseName(`fish.chunk._info`, "fish", -1, "info", -1) assertParseName(`fish.chunk._info`, "fish", -1, "info", "")
assertParseName(`fish.chunk._locks`, "fish", -1, "locks", -1) assertParseName(`fish.chunk._locks`, "fish", -1, "locks", "")
assertParseName(`fish.chunk._blockinfo`, "fish", -1, "blockinfo", -1) assertParseName(`fish.chunk._blkinfo`, "fish", -1, "blkinfo", "")
assertParseName(`fish.chunk._x2y`, "fish", -1, "x2y", "")
// valid temporary control chunks // valid temporary control chunks
assertMakeName(`fish.chunk._info..tmp_0000000021`, "fish", -1, "info", 21) assertMakeName(`fish.chunk._info_0001`, "fish", -1, "info", "1")
assertMakeName(`fish.chunk._locks..tmp_0000054321`, "fish", -2, "locks", 54321) assertMakeName(`fish.chunk._locks_4321`, "fish", -2, "locks", "4321")
assertMakeName(`fish.chunk._uploads..tmp_0000000000`, "fish", -3, "uploads", 0) assertMakeName(`fish.chunk._uploads_abcd`, "fish", -3, "uploads", "abcd")
assertMakeName(`fish.chunk._blockinfo..tmp_1234567890123456789`, "fish", -4, "blockinfo", 1234567890123456789) assertMakeName(`fish.chunk._blkinfo_xyzabcdef`, "fish", -4, "blkinfo", "xyzabcdef")
assertMakeName(`fish.chunk._x2y_1aaa`, "fish", -5, "x2y", "1aaa")
assertParseName(`fish.chunk._info..tmp_0000000021`, "fish", -1, "info", 21) assertParseName(`fish.chunk._info_0001`, "fish", -1, "info", "0001")
assertParseName(`fish.chunk._locks..tmp_0000054321`, "fish", -1, "locks", 54321) assertParseName(`fish.chunk._locks_4321`, "fish", -1, "locks", "4321")
assertParseName(`fish.chunk._uploads..tmp_0000000000`, "fish", -1, "uploads", 0) assertParseName(`fish.chunk._uploads_9abc`, "fish", -1, "uploads", "9abc")
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789`, "fish", -1, "blockinfo", 1234567890123456789) assertParseName(`fish.chunk._blkinfo_xyzabcdef`, "fish", -1, "blkinfo", "xyzabcdef")
assertParseName(`fish.chunk._x2y_1aaa`, "fish", -1, "x2y", "1aaa")
// valid temporary control chunks (old temporary suffix, parse only)
assertParseName(`fish.chunk._info..tmp_0000000047`, "fish", -1, "info", "001b")
assertParseName(`fish.chunk._locks..tmp_0000054321`, "fish", -1, "locks", "15wx")
assertParseName(`fish.chunk._uploads..tmp_0000000000`, "fish", -1, "uploads", "0000")
assertParseName(`fish.chunk._blkinfo..tmp_9994567890123`, "fish", -1, "blkinfo", "3jjfvo3wr")
assertParseName(`fish.chunk._x2y..tmp_0000000000`, "fish", -1, "x2y", "0000")
// parsing invalid control chunk names // parsing invalid control chunk names
assertParseName(`fish.chunk.info`, "", -1, "", -1) assertParseName(`fish.chunk.metadata`, "", -1, "", "") // must be prepended by underscore
assertParseName(`fish.chunk.locks`, "", -1, "", -1) assertParseName(`fish.chunk.info`, "", -1, "", "")
assertParseName(`fish.chunk.uploads`, "", -1, "", -1) assertParseName(`fish.chunk.locks`, "", -1, "", "")
assertParseName(`fish.chunk.blockinfo`, "", -1, "", -1) assertParseName(`fish.chunk.uploads`, "", -1, "", "")
assertParseName(`fish.chunk._os`, "", -1, "", -1) assertParseName(`fish.chunk._os`, "", -1, "", "") // too short
assertParseName(`fish.chunk._futuredata`, "", -1, "", -1) assertParseName(`fish.chunk._metadata`, "", -1, "", "") // too long
assertParseName(`fish.chunk._me_ta`, "", -1, "", -1) assertParseName(`fish.chunk._blockinfo`, "", -1, "", "") // way too long
assertParseName(`fish.chunk._in-fo`, "", -1, "", -1) assertParseName(`fish.chunk._4me`, "", -1, "", "") // cannot start with digit
assertParseName(`fish.chunk._.bin`, "", -1, "", -1) assertParseName(`fish.chunk._567`, "", -1, "", "") // cannot be all digits
assertParseName(`fish.chunk._me_ta`, "", -1, "", "") // punctuation not allowed
assertParseName(`fish.chunk._in-fo`, "", -1, "", "")
assertParseName(`fish.chunk._.bin`, "", -1, "", "")
assertParseName(`fish.chunk._.2xy`, "", -1, "", "")
assertParseName(`fish.chunk._locks..tmp_123456789`, "", -1, "", -1) // parsing invalid temporary control chunks
assertParseName(`fish.chunk._meta..tmp_-1`, "", -1, "", -1) assertParseName(`fish.chunk._blkinfo1234`, "", -1, "", "") // missing underscore delimiter
assertParseName(`fish.chunk._blockinfo..tmp_012345678901234567890123456789`, "", -1, "", -1) assertParseName(`fish.chunk._info__1234`, "", -1, "", "") // extra underscore delimiter
assertParseName(`fish.chunk._info_123`, "", -1, "", "") // too short temporary suffix
assertParseName(`fish.chunk._info_1234567890`, "", -1, "", "") // too long temporary suffix
assertParseName(`fish.chunk._info_-1234`, "", -1, "", "") // temporary suffix must be positive
assertParseName(`fish.chunk._info_123E`, "", -1, "", "") // uppercase not allowed
assertParseName(`fish.chunk._info_12.3`, "", -1, "", "") // punctuation not allowed
assertParseName(`fish.chunk._locks..tmp_123456789`, "", -1, "", "")
assertParseName(`fish.chunk._meta..tmp_-1`, "", -1, "", "")
assertParseName(`fish.chunk._blockinfo..tmp_012345678901234567890123456789`, "", -1, "", "")
// short control chunk names: 3 letters ok, 1-2 letters not allowed // short control chunk names: 3 letters ok, 1-2 letters not allowed
assertMakeName(`fish.chunk._ext`, "fish", -1, "ext", -1) assertMakeName(`fish.chunk._ext`, "fish", -1, "ext", "")
assertMakeName(`fish.chunk._ext..tmp_0000000021`, "fish", -1, "ext", 21) assertParseName(`fish.chunk._int`, "fish", -1, "int", "")
assertParseName(`fish.chunk._int`, "fish", -1, "int", -1)
assertParseName(`fish.chunk._int..tmp_0000000021`, "fish", -1, "int", 21) assertMakeNamePanics("fish", -1, "in", "")
assertMakeNamePanics("fish", -1, "in", -1) assertMakeNamePanics("fish", -1, "up", "4")
assertMakeNamePanics("fish", -1, "up", 4) assertMakeNamePanics("fish", -1, "x", "")
assertMakeNamePanics("fish", -1, "x", -1) assertMakeNamePanics("fish", -1, "c", "1z")
assertMakeNamePanics("fish", -1, "c", 4)
assertMakeName(`fish.chunk._ext_0000`, "fish", -1, "ext", "0")
assertMakeName(`fish.chunk._ext_0026`, "fish", -1, "ext", "26")
assertMakeName(`fish.chunk._int_0abc`, "fish", -1, "int", "abc")
assertMakeName(`fish.chunk._int_9xyz`, "fish", -1, "int", "9xyz")
assertMakeName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr")
assertMakeName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr")
assertParseName(`fish.chunk._ext_0000`, "fish", -1, "ext", "0000")
assertParseName(`fish.chunk._ext_0026`, "fish", -1, "ext", "0026")
assertParseName(`fish.chunk._int_0abc`, "fish", -1, "int", "0abc")
assertParseName(`fish.chunk._int_9xyz`, "fish", -1, "int", "9xyz")
assertParseName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr")
assertParseName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr")
// base file name can sometimes look like a valid chunk name // base file name can sometimes look like a valid chunk name
assertParseName(`fish.chunk.003.chunk.004`, "fish.chunk.003", 2, "", -1) assertParseName(`fish.chunk.003.chunk.004`, "fish.chunk.003", 2, "", "")
assertParseName(`fish.chunk.003.chunk.005..tmp_0000000021`, "fish.chunk.003", 3, "", 21) assertParseName(`fish.chunk.003.chunk._info`, "fish.chunk.003", -1, "info", "")
assertParseName(`fish.chunk.003.chunk._info`, "fish.chunk.003", -1, "info", -1) assertParseName(`fish.chunk.003.chunk._Meta`, "", -1, "", "")
assertParseName(`fish.chunk.003.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk.003", -1, "blockinfo", 1234567890123456789)
assertParseName(`fish.chunk.003.chunk._Meta`, "", -1, "", -1)
assertParseName(`fish.chunk.003.chunk._x..tmp_0000054321`, "", -1, "", -1)
assertParseName(`fish.chunk.004..tmp_0000000021.chunk.004`, "fish.chunk.004..tmp_0000000021", 2, "", -1) assertParseName(`fish.chunk._info.chunk.004`, "fish.chunk._info", 2, "", "")
assertParseName(`fish.chunk.004..tmp_0000000021.chunk.005..tmp_0000000021`, "fish.chunk.004..tmp_0000000021", 3, "", 21) assertParseName(`fish.chunk._info.chunk._info`, "fish.chunk._info", -1, "info", "")
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._info`, "fish.chunk.004..tmp_0000000021", -1, "info", -1) assertParseName(`fish.chunk._info.chunk._info.chunk._Meta`, "", -1, "", "")
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk.004..tmp_0000000021", -1, "blockinfo", 1234567890123456789)
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._Meta`, "", -1, "", -1)
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._x..tmp_0000054321`, "", -1, "", -1)
assertParseName(`fish.chunk._info.chunk.004`, "fish.chunk._info", 2, "", -1) // base file name looking like a valid chunk name (old temporary suffix)
assertParseName(`fish.chunk._info.chunk.005..tmp_0000000021`, "fish.chunk._info", 3, "", 21) assertParseName(`fish.chunk.003.chunk.005..tmp_0000000022`, "fish.chunk.003", 3, "", "000m")
assertParseName(`fish.chunk._info.chunk._info`, "fish.chunk._info", -1, "info", -1) assertParseName(`fish.chunk.003.chunk._x..tmp_0000054321`, "", -1, "", "")
assertParseName(`fish.chunk._info.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk._info", -1, "blockinfo", 1234567890123456789) assertParseName(`fish.chunk._info.chunk.005..tmp_0000000023`, "fish.chunk._info", 3, "", "000n")
assertParseName(`fish.chunk._info.chunk._info.chunk._Meta`, "", -1, "", -1) assertParseName(`fish.chunk._info.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", "")
assertParseName(`fish.chunk._info.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", -1)
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk.004`, "fish.chunk._blockinfo..tmp_1234567890123456789", 2, "", -1) assertParseName(`fish.chunk.003.chunk._blkinfo..tmp_9994567890123`, "fish.chunk.003", -1, "blkinfo", "3jjfvo3wr")
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk.005..tmp_0000000021`, "fish.chunk._blockinfo..tmp_1234567890123456789", 3, "", 21) assertParseName(`fish.chunk._info.chunk._blkinfo..tmp_9994567890123`, "fish.chunk._info", -1, "blkinfo", "3jjfvo3wr")
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info`, "fish.chunk._blockinfo..tmp_1234567890123456789", -1, "info", -1)
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk._blockinfo..tmp_1234567890123456789", -1, "blockinfo", 1234567890123456789) assertParseName(`fish.chunk.004..tmp_0000000021.chunk.004`, "fish.chunk.004..tmp_0000000021", 2, "", "")
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info.chunk._Meta`, "", -1, "", -1) assertParseName(`fish.chunk.004..tmp_0000000021.chunk.005..tmp_0000000025`, "fish.chunk.004..tmp_0000000021", 3, "", "000p")
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", -1) assertParseName(`fish.chunk.004..tmp_0000000021.chunk._info`, "fish.chunk.004..tmp_0000000021", -1, "info", "")
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._blkinfo..tmp_9994567890123`, "fish.chunk.004..tmp_0000000021", -1, "blkinfo", "3jjfvo3wr")
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._Meta`, "", -1, "", "")
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._x..tmp_0000054321`, "", -1, "", "")
assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk.004`, "fish.chunk._blkinfo..tmp_9994567890123", 2, "", "")
assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk.005..tmp_0000000026`, "fish.chunk._blkinfo..tmp_9994567890123", 3, "", "000q")
assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._info`, "fish.chunk._blkinfo..tmp_9994567890123", -1, "info", "")
assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._blkinfo..tmp_9994567890123`, "fish.chunk._blkinfo..tmp_9994567890123", -1, "blkinfo", "3jjfvo3wr")
assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._info.chunk._Meta`, "", -1, "", "")
assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", "")
assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk.004`, "fish.chunk._blkinfo..tmp_1234567890123456789", 2, "", "")
assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk.005..tmp_0000000022`, "fish.chunk._blkinfo..tmp_1234567890123456789", 3, "", "000m")
assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._info`, "fish.chunk._blkinfo..tmp_1234567890123456789", -1, "info", "")
assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._blkinfo..tmp_9994567890123`, "fish.chunk._blkinfo..tmp_1234567890123456789", -1, "blkinfo", "3jjfvo3wr")
assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._info.chunk._Meta`, "", -1, "", "")
assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", "")
// attempts to make invalid chunk names // attempts to make invalid chunk names
assertMakeNamePanics("fish", -1, "", -1) // neither data nor control assertMakeNamePanics("fish", -1, "", "") // neither data nor control
assertMakeNamePanics("fish", 0, "info", -1) // both data and control assertMakeNamePanics("fish", 0, "info", "") // both data and control
assertMakeNamePanics("fish", -1, "futuredata", -1) // control type too long assertMakeNamePanics("fish", -1, "metadata", "") // control type too long
assertMakeNamePanics("fish", -1, "123", -1) // digits not allowed assertMakeNamePanics("fish", -1, "blockinfo", "") // control type way too long
assertMakeNamePanics("fish", -1, "Meta", -1) // only lower case letters allowed assertMakeNamePanics("fish", -1, "2xy", "") // first digit not allowed
assertMakeNamePanics("fish", -1, "in-fo", -1) // punctuation not allowed assertMakeNamePanics("fish", -1, "123", "") // all digits not allowed
assertMakeNamePanics("fish", -1, "_info", -1) assertMakeNamePanics("fish", -1, "Meta", "") // only lower case letters allowed
assertMakeNamePanics("fish", -1, "info_", -1) assertMakeNamePanics("fish", -1, "in-fo", "") // punctuation not allowed
assertMakeNamePanics("fish", -2, ".bind", -3) assertMakeNamePanics("fish", -1, "_info", "")
assertMakeNamePanics("fish", -2, "bind.", -3) assertMakeNamePanics("fish", -1, "info_", "")
assertMakeNamePanics("fish", -2, ".bind", "")
assertMakeNamePanics("fish", -2, "bind.", "")
assertMakeNamePanics("fish", -1, "", 1) // neither data nor control assertMakeNamePanics("fish", -1, "", "1") // neither data nor control
assertMakeNamePanics("fish", 0, "info", 12) // both data and control assertMakeNamePanics("fish", 0, "info", "23") // both data and control
assertMakeNamePanics("fish", -1, "futuredata", 45) // control type too long assertMakeNamePanics("fish", -1, "metadata", "45") // control type too long
assertMakeNamePanics("fish", -1, "123", 123) // digits not allowed assertMakeNamePanics("fish", -1, "blockinfo", "7") // control type way too long
assertMakeNamePanics("fish", -1, "Meta", 456) // only lower case letters allowed assertMakeNamePanics("fish", -1, "2xy", "abc") // first digit not allowed
assertMakeNamePanics("fish", -1, "in-fo", 321) // punctuation not allowed assertMakeNamePanics("fish", -1, "123", "def") // all digits not allowed
assertMakeNamePanics("fish", -1, "_info", 15678) assertMakeNamePanics("fish", -1, "Meta", "mnk") // only lower case letters allowed
assertMakeNamePanics("fish", -1, "info_", 999) assertMakeNamePanics("fish", -1, "in-fo", "xyz") // punctuation not allowed
assertMakeNamePanics("fish", -2, ".bind", 0) assertMakeNamePanics("fish", -1, "_info", "5678")
assertMakeNamePanics("fish", -2, "bind.", 0) assertMakeNamePanics("fish", -1, "info_", "999")
assertMakeNamePanics("fish", -2, ".bind", "0")
assertMakeNamePanics("fish", -2, "bind.", "0")
assertMakeNamePanics("fish", 0, "", "1234567890") // temporary suffix too long
assertMakeNamePanics("fish", 0, "", "123F4") // uppercase not allowed
assertMakeNamePanics("fish", 0, "", "123.") // punctuation not allowed
assertMakeNamePanics("fish", 0, "", "_123")
} }
func testSmallFileInternals(t *testing.T, f *Fs) { func testSmallFileInternals(t *testing.T, f *Fs) {
@ -383,7 +469,7 @@ func testPreventCorruption(t *testing.T, f *Fs) {
billyObj := newFile("billy") billyObj := newFile("billy")
billyChunkName := func(chunkNo int) string { billyChunkName := func(chunkNo int) string {
return f.makeChunkName(billyObj.Remote(), chunkNo, "", -1) return f.makeChunkName(billyObj.Remote(), chunkNo, "", "")
} }
err := f.Mkdir(ctx, billyChunkName(1)) err := f.Mkdir(ctx, billyChunkName(1))
@ -433,7 +519,7 @@ func testPreventCorruption(t *testing.T, f *Fs) {
// recreate billy in case it was anyhow corrupted // recreate billy in case it was anyhow corrupted
willyObj := newFile("willy") willyObj := newFile("willy")
willyChunkName := f.makeChunkName(willyObj.Remote(), 1, "", -1) willyChunkName := f.makeChunkName(willyObj.Remote(), 1, "", "")
f.opt.FailHard = false f.opt.FailHard = false
willyChunk, err := f.NewObject(ctx, willyChunkName) willyChunk, err := f.NewObject(ctx, willyChunkName)
f.opt.FailHard = true f.opt.FailHard = true
@ -484,7 +570,7 @@ func testChunkNumberOverflow(t *testing.T, f *Fs) {
f.opt.FailHard = false f.opt.FailHard = false
file, fileName := newFile(f, "wreaker") file, fileName := newFile(f, "wreaker")
wreak, _ := newFile(f.base, f.makeChunkName("wreaker", wreakNumber, "", -1)) wreak, _ := newFile(f.base, f.makeChunkName("wreaker", wreakNumber, "", ""))
f.opt.FailHard = false f.opt.FailHard = false
fstest.CheckListingWithRoot(t, f, dir, nil, nil, f.Precision()) fstest.CheckListingWithRoot(t, f, dir, nil, nil, f.Precision())
@ -532,7 +618,7 @@ func testMetadataInput(t *testing.T, f *Fs) {
filename := path.Join(dir, name) filename := path.Join(dir, name)
require.True(t, len(contents) > 2 && len(contents) < minChunkForTest, description+" test data is correct") require.True(t, len(contents) > 2 && len(contents) < minChunkForTest, description+" test data is correct")
part := putFile(f.base, f.makeChunkName(filename, 0, "", -1), "oops", "", true) part := putFile(f.base, f.makeChunkName(filename, 0, "", ""), "oops", "", true)
_ = putFile(f, filename, contents, "upload "+description, false) _ = putFile(f, filename, contents, "upload "+description, false)
obj, err := f.NewObject(ctx, filename) obj, err := f.NewObject(ctx, filename)

View file

@ -130,10 +130,10 @@ error message in such cases.
#### Chunk names #### Chunk names
The default chunk name format is `*.rclone-chunk.###`, hence by default The default chunk name format is `*.rclone_chunk.###`, hence by default
chunk names are `BIG_FILE_NAME.rclone-chunk.001`, chunk names are `BIG_FILE_NAME.rclone_chunk.001`,
`BIG_FILE_NAME.rclone-chunk.002` etc. You can configure a different name `BIG_FILE_NAME.rclone_chunk.002` etc. You can configure another name format
format using the `--chunker-name-format` option. The format uses asterisk using the `name_format` configuration file option. The format uses asterisk
`*` as a placeholder for the base file name and one or more consecutive `*` as a placeholder for the base file name and one or more consecutive
hash characters `#` as a placeholder for sequential chunk number. hash characters `#` as a placeholder for sequential chunk number.
There must be one and only one asterisk. The number of consecutive hash There must be one and only one asterisk. The number of consecutive hash
@ -211,6 +211,9 @@ file hashing, configure chunker with `md5all` or `sha1all`. These two modes
guarantee given hash for all files. If wrapped remote doesn't support it, guarantee given hash for all files. If wrapped remote doesn't support it,
chunker will then add metadata to all files, even small. However, this can chunker will then add metadata to all files, even small. However, this can
double the amount of small files in storage and incur additional service charges. double the amount of small files in storage and incur additional service charges.
You can even use chunker to force md5/sha1 support in any other remote
at expence of sidecar meta objects by setting eg. `chunk_type=sha1all`
to force hashsums and `chunk_size=1P` to effectively disable chunking.
Normally, when a file is copied to chunker controlled remote, chunker Normally, when a file is copied to chunker controlled remote, chunker
will ask the file source for compatible file hash and revert to on-the-fly will ask the file source for compatible file hash and revert to on-the-fly
@ -274,6 +277,14 @@ Chunker requires wrapped remote to support server side `move` (or `copy` +
This is because it internally renames temporary chunk files to their final This is because it internally renames temporary chunk files to their final
names when an operation completes successfully. names when an operation completes successfully.
Chunker encodes chunk number in file name, so with default `name_format`
setting it adds 17 characters. Also chunker adds 7 characters of temporary
suffix during operations. Many file systems limit base file name without path
by 255 characters. Using rclone's crypt remote as a base file system limits
file name by 143 characters. Thus, maximum name length is 231 for most files
and 119 for chunker-over-crypt. A user in need can change name format to
eg. `*.rcc##` and save 10 characters (provided at most 99 chunks per file).
Note that a move implemented using the copy-and-delete method may incur Note that a move implemented using the copy-and-delete method may incur
double charging with some cloud storage providers. double charging with some cloud storage providers.