forked from TrueCloudLab/rclone
chunker: reservations for future extensions
This commit is contained in:
parent
41ed33b08e
commit
7aa2b4191c
3 changed files with 690 additions and 394 deletions
File diff suppressed because it is too large
Load diff
|
@ -18,7 +18,7 @@ var (
|
|||
)
|
||||
|
||||
// test that chunking does not break large uploads
|
||||
func (f *Fs) InternalTestPutLarge(t *testing.T, kilobytes int) {
|
||||
func testPutLarge(t *testing.T, f *Fs, kilobytes int) {
|
||||
t.Run(fmt.Sprintf("PutLarge%dk", kilobytes), func(t *testing.T) {
|
||||
fstests.TestPutLarge(context.Background(), t, f, &fstest.Item{
|
||||
ModTime: fstest.Time("2001-02-03T04:05:06.499999999Z"),
|
||||
|
@ -28,119 +28,228 @@ func (f *Fs) InternalTestPutLarge(t *testing.T, kilobytes int) {
|
|||
})
|
||||
}
|
||||
|
||||
func (f *Fs) InternalTestChunkNameFormat(t *testing.T) {
|
||||
savedNameFormat := f.opt.NameFormat
|
||||
savedStartFrom := f.opt.StartFrom
|
||||
// test chunk name parser
|
||||
func testChunkNameFormat(t *testing.T, f *Fs) {
|
||||
saveOpt := f.opt
|
||||
defer func() {
|
||||
// restore original settings
|
||||
_ = f.parseNameFormat(savedNameFormat)
|
||||
f.opt.StartFrom = savedStartFrom
|
||||
// restore original settings (f is pointer, f.opt is struct)
|
||||
f.opt = saveOpt
|
||||
_ = f.setChunkNameFormat(f.opt.NameFormat)
|
||||
}()
|
||||
var err error
|
||||
|
||||
err = f.parseNameFormat("*.rclone_chunk.###")
|
||||
assertFormat := func(pattern, wantDataFormat, wantCtrlFormat, wantNameRegexp string) {
|
||||
err := f.setChunkNameFormat(pattern)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, `%s.rclone_chunk.%03d`, f.nameFormat)
|
||||
assert.Equal(t, `^(.+)\.rclone_chunk\.([0-9]{3,})$`, f.nameRegexp.String())
|
||||
|
||||
err = f.parseNameFormat("*.rclone_chunk.#")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, `%s.rclone_chunk.%d`, f.nameFormat)
|
||||
assert.Equal(t, `^(.+)\.rclone_chunk\.([0-9]+)$`, f.nameRegexp.String())
|
||||
|
||||
err = f.parseNameFormat("*_chunk_#####")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, `%s_chunk_%05d`, f.nameFormat)
|
||||
assert.Equal(t, `^(.+)_chunk_([0-9]{5,})$`, f.nameRegexp.String())
|
||||
|
||||
err = f.parseNameFormat("*-chunk-#")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, `%s-chunk-%d`, f.nameFormat)
|
||||
assert.Equal(t, `^(.+)-chunk-([0-9]+)$`, f.nameRegexp.String())
|
||||
|
||||
err = f.parseNameFormat("_*-chunk-##,")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, `_%s-chunk-%02d,`, f.nameFormat)
|
||||
assert.Equal(t, `^_(.+)-chunk-([0-9]{2,}),$`, f.nameRegexp.String())
|
||||
|
||||
err = f.parseNameFormat(`*-chunk-#-%^$()[]{}.+-!?:\/`)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, `%s-chunk-%d-%%^$()[]{}.+-!?:\/`, f.nameFormat)
|
||||
assert.Equal(t, `^(.+)-chunk-([0-9]+)-%\^\$\(\)\[\]\{\}\.\+-!\?:\\/$`, f.nameRegexp.String())
|
||||
|
||||
err = f.parseNameFormat("chunk-#")
|
||||
assert.Error(t, err)
|
||||
|
||||
err = f.parseNameFormat("*-chunk")
|
||||
assert.Error(t, err)
|
||||
|
||||
err = f.parseNameFormat("*-*-chunk-#")
|
||||
assert.Error(t, err)
|
||||
|
||||
err = f.parseNameFormat("*-chunk-#-#")
|
||||
assert.Error(t, err)
|
||||
|
||||
err = f.parseNameFormat("#-chunk-*")
|
||||
assert.Error(t, err)
|
||||
|
||||
err = f.parseNameFormat("*#")
|
||||
assert.NoError(t, err)
|
||||
|
||||
err = f.parseNameFormat("**#")
|
||||
assert.Error(t, err)
|
||||
err = f.parseNameFormat("#*")
|
||||
assert.Error(t, err)
|
||||
err = f.parseNameFormat("")
|
||||
assert.Error(t, err)
|
||||
err = f.parseNameFormat("-")
|
||||
assert.Error(t, err)
|
||||
|
||||
f.opt.StartFrom = 2
|
||||
err = f.parseNameFormat("*.chunk.###")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, `%s.chunk.%03d`, f.nameFormat)
|
||||
assert.Equal(t, `^(.+)\.chunk\.([0-9]{3,})$`, f.nameRegexp.String())
|
||||
|
||||
assert.Equal(t, "fish.chunk.003", f.makeChunkName("fish", 1, -1))
|
||||
assert.Equal(t, "fish.chunk.011..tmp_0000054321", f.makeChunkName("fish", 9, 54321))
|
||||
assert.Equal(t, "fish.chunk.011..tmp_1234567890", f.makeChunkName("fish", 9, 1234567890))
|
||||
assert.Equal(t, "fish.chunk.1916..tmp_123456789012345", f.makeChunkName("fish", 1914, 123456789012345))
|
||||
|
||||
name, chunkNo, tempNo := f.parseChunkName("fish.chunk.003")
|
||||
assert.True(t, name == "fish" && chunkNo == 1 && tempNo == -1)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.004..tmp_0000000021")
|
||||
assert.True(t, name == "fish" && chunkNo == 2 && tempNo == 21)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.021")
|
||||
assert.True(t, name == "fish" && chunkNo == 19 && tempNo == -1)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.323..tmp_1234567890123456789")
|
||||
assert.True(t, name == "fish" && chunkNo == 321 && tempNo == 1234567890123456789)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.3")
|
||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.001")
|
||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.21")
|
||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.-21")
|
||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.004.tmp_0000000021")
|
||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.003..tmp_123456789")
|
||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.003..tmp_012345678901234567890123456789")
|
||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.003..tmp_-1")
|
||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
||||
assert.Equal(t, wantDataFormat, f.dataNameFmt)
|
||||
assert.Equal(t, wantCtrlFormat, f.ctrlNameFmt)
|
||||
assert.Equal(t, wantNameRegexp, f.nameRegexp.String())
|
||||
}
|
||||
|
||||
assertFormatValid := func(pattern string) {
|
||||
err := f.setChunkNameFormat(pattern)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
assertFormatInvalid := func(pattern string) {
|
||||
err := f.setChunkNameFormat(pattern)
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
assertMakeName := func(wantChunkName, mainName string, chunkNo int, ctrlType string, xactNo int64) {
|
||||
gotChunkName := f.makeChunkName(mainName, chunkNo, ctrlType, xactNo)
|
||||
assert.Equal(t, wantChunkName, gotChunkName)
|
||||
}
|
||||
|
||||
assertMakeNamePanics := func(mainName string, chunkNo int, ctrlType string, xactNo int64) {
|
||||
assert.Panics(t, func() {
|
||||
_ = f.makeChunkName(mainName, chunkNo, ctrlType, xactNo)
|
||||
}, "makeChunkName(%q,%d,%q,%d) should panic", mainName, chunkNo, ctrlType, xactNo)
|
||||
}
|
||||
|
||||
assertParseName := func(fileName, wantMainName string, wantChunkNo int, wantCtrlType string, wantXactNo int64) {
|
||||
gotMainName, gotChunkNo, gotCtrlType, gotXactNo := f.parseChunkName(fileName)
|
||||
assert.Equal(t, wantMainName, gotMainName)
|
||||
assert.Equal(t, wantChunkNo, gotChunkNo)
|
||||
assert.Equal(t, wantCtrlType, gotCtrlType)
|
||||
assert.Equal(t, wantXactNo, gotXactNo)
|
||||
}
|
||||
|
||||
const newFormatSupported = false // support for patterns not starting with base name (*)
|
||||
|
||||
// valid formats
|
||||
assertFormat(`*.rclone_chunk.###`, `%s.rclone_chunk.%03d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]{3,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||
assertFormat(`*.rclone_chunk.#`, `%s.rclone_chunk.%d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||
assertFormat(`*_chunk_#####`, `%s_chunk_%05d`, `%s_chunk__%s`, `^(.+?)_chunk_(?:([0-9]{5,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||
assertFormat(`*-chunk-#`, `%s-chunk-%d`, `%s-chunk-_%s`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||
assertFormat(`*-chunk-#-%^$()[]{}.+-!?:\`, `%s-chunk-%d-%%^$()[]{}.+-!?:\`, `%s-chunk-_%s-%%^$()[]{}.+-!?:\`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z]{3,9}))-%\^\$\(\)\[\]\{\}\.\+-!\?:\\(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||
if newFormatSupported {
|
||||
assertFormat(`_*-chunk-##,`, `_%s-chunk-%02d,`, `_%s-chunk-_%s,`, `^_(.+?)-chunk-(?:([0-9]{2,})|_([a-z]{3,9})),(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||
}
|
||||
|
||||
// invalid formats
|
||||
assertFormatInvalid(`chunk-#`)
|
||||
assertFormatInvalid(`*-chunk`)
|
||||
assertFormatInvalid(`*-*-chunk-#`)
|
||||
assertFormatInvalid(`*-chunk-#-#`)
|
||||
assertFormatInvalid(`#-chunk-*`)
|
||||
assertFormatInvalid(`*/#`)
|
||||
|
||||
assertFormatValid(`*#`)
|
||||
assertFormatInvalid(`**#`)
|
||||
assertFormatInvalid(`#*`)
|
||||
assertFormatInvalid(``)
|
||||
assertFormatInvalid(`-`)
|
||||
|
||||
// quick tests
|
||||
if newFormatSupported {
|
||||
assertFormat(`part_*_#`, `part_%s_%d`, `part_%s__%s`, `^part_(.+?)_(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||
f.opt.StartFrom = 1
|
||||
|
||||
assertMakeName(`part_fish_1`, "fish", 0, "", -1)
|
||||
assertParseName(`part_fish_43`, "fish", 42, "", -1)
|
||||
assertMakeName(`part_fish_3..tmp_0000000004`, "fish", 2, "", 4)
|
||||
assertParseName(`part_fish_4..tmp_0000000005`, "fish", 3, "", 5)
|
||||
assertMakeName(`part_fish__locks`, "fish", -2, "locks", -3)
|
||||
assertParseName(`part_fish__locks`, "fish", -1, "locks", -1)
|
||||
assertMakeName(`part_fish__blockinfo..tmp_1234567890123456789`, "fish", -3, "blockinfo", 1234567890123456789)
|
||||
assertParseName(`part_fish__blockinfo..tmp_1234567890123456789`, "fish", -1, "blockinfo", 1234567890123456789)
|
||||
}
|
||||
|
||||
// prepare format for long tests
|
||||
assertFormat(`*.chunk.###`, `%s.chunk.%03d`, `%s.chunk._%s`, `^(.+?)\.chunk\.(?:([0-9]{3,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||
f.opt.StartFrom = 2
|
||||
|
||||
// valid data chunks
|
||||
assertMakeName(`fish.chunk.003`, "fish", 1, "", -1)
|
||||
assertMakeName(`fish.chunk.011..tmp_0000054321`, "fish", 9, "", 54321)
|
||||
assertMakeName(`fish.chunk.011..tmp_1234567890`, "fish", 9, "", 1234567890)
|
||||
assertMakeName(`fish.chunk.1916..tmp_123456789012345`, "fish", 1914, "", 123456789012345)
|
||||
|
||||
assertParseName(`fish.chunk.003`, "fish", 1, "", -1)
|
||||
assertParseName(`fish.chunk.004..tmp_0000000021`, "fish", 2, "", 21)
|
||||
assertParseName(`fish.chunk.021`, "fish", 19, "", -1)
|
||||
assertParseName(`fish.chunk.323..tmp_1234567890123456789`, "fish", 321, "", 1234567890123456789)
|
||||
|
||||
// parsing invalid data chunk names
|
||||
assertParseName(`fish.chunk.3`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.001`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.21`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.-21`, "", -1, "", -1)
|
||||
|
||||
assertParseName(`fish.chunk.004.tmp_0000000021`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.003..tmp_123456789`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.003..tmp_012345678901234567890123456789`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.003..tmp_-1`, "", -1, "", -1)
|
||||
|
||||
// valid control chunks
|
||||
assertMakeName(`fish.chunk._info`, "fish", -1, "info", -1)
|
||||
assertMakeName(`fish.chunk._locks`, "fish", -2, "locks", -1)
|
||||
assertMakeName(`fish.chunk._blockinfo`, "fish", -3, "blockinfo", -1)
|
||||
|
||||
assertParseName(`fish.chunk._info`, "fish", -1, "info", -1)
|
||||
assertParseName(`fish.chunk._locks`, "fish", -1, "locks", -1)
|
||||
assertParseName(`fish.chunk._blockinfo`, "fish", -1, "blockinfo", -1)
|
||||
|
||||
// valid temporary control chunks
|
||||
assertMakeName(`fish.chunk._info..tmp_0000000021`, "fish", -1, "info", 21)
|
||||
assertMakeName(`fish.chunk._locks..tmp_0000054321`, "fish", -2, "locks", 54321)
|
||||
assertMakeName(`fish.chunk._uploads..tmp_0000000000`, "fish", -3, "uploads", 0)
|
||||
assertMakeName(`fish.chunk._blockinfo..tmp_1234567890123456789`, "fish", -4, "blockinfo", 1234567890123456789)
|
||||
|
||||
assertParseName(`fish.chunk._info..tmp_0000000021`, "fish", -1, "info", 21)
|
||||
assertParseName(`fish.chunk._locks..tmp_0000054321`, "fish", -1, "locks", 54321)
|
||||
assertParseName(`fish.chunk._uploads..tmp_0000000000`, "fish", -1, "uploads", 0)
|
||||
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789`, "fish", -1, "blockinfo", 1234567890123456789)
|
||||
|
||||
// parsing invalid control chunk names
|
||||
assertParseName(`fish.chunk.info`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.locks`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.uploads`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.blockinfo`, "", -1, "", -1)
|
||||
|
||||
assertParseName(`fish.chunk._os`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk._futuredata`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk._me_ta`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk._in-fo`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk._.bin`, "", -1, "", -1)
|
||||
|
||||
assertParseName(`fish.chunk._locks..tmp_123456789`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk._meta..tmp_-1`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk._blockinfo..tmp_012345678901234567890123456789`, "", -1, "", -1)
|
||||
|
||||
// short control chunk names: 3 letters ok, 1-2 letters not allowed
|
||||
assertMakeName(`fish.chunk._ext`, "fish", -1, "ext", -1)
|
||||
assertMakeName(`fish.chunk._ext..tmp_0000000021`, "fish", -1, "ext", 21)
|
||||
assertParseName(`fish.chunk._int`, "fish", -1, "int", -1)
|
||||
assertParseName(`fish.chunk._int..tmp_0000000021`, "fish", -1, "int", 21)
|
||||
assertMakeNamePanics("fish", -1, "in", -1)
|
||||
assertMakeNamePanics("fish", -1, "up", 4)
|
||||
assertMakeNamePanics("fish", -1, "x", -1)
|
||||
assertMakeNamePanics("fish", -1, "c", 4)
|
||||
|
||||
// base file name can sometimes look like a valid chunk name
|
||||
assertParseName(`fish.chunk.003.chunk.004`, "fish.chunk.003", 2, "", -1)
|
||||
assertParseName(`fish.chunk.003.chunk.005..tmp_0000000021`, "fish.chunk.003", 3, "", 21)
|
||||
assertParseName(`fish.chunk.003.chunk._info`, "fish.chunk.003", -1, "info", -1)
|
||||
assertParseName(`fish.chunk.003.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk.003", -1, "blockinfo", 1234567890123456789)
|
||||
assertParseName(`fish.chunk.003.chunk._Meta`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.003.chunk._x..tmp_0000054321`, "", -1, "", -1)
|
||||
|
||||
assertParseName(`fish.chunk.004..tmp_0000000021.chunk.004`, "fish.chunk.004..tmp_0000000021", 2, "", -1)
|
||||
assertParseName(`fish.chunk.004..tmp_0000000021.chunk.005..tmp_0000000021`, "fish.chunk.004..tmp_0000000021", 3, "", 21)
|
||||
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._info`, "fish.chunk.004..tmp_0000000021", -1, "info", -1)
|
||||
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk.004..tmp_0000000021", -1, "blockinfo", 1234567890123456789)
|
||||
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._Meta`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._x..tmp_0000054321`, "", -1, "", -1)
|
||||
|
||||
assertParseName(`fish.chunk._info.chunk.004`, "fish.chunk._info", 2, "", -1)
|
||||
assertParseName(`fish.chunk._info.chunk.005..tmp_0000000021`, "fish.chunk._info", 3, "", 21)
|
||||
assertParseName(`fish.chunk._info.chunk._info`, "fish.chunk._info", -1, "info", -1)
|
||||
assertParseName(`fish.chunk._info.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk._info", -1, "blockinfo", 1234567890123456789)
|
||||
assertParseName(`fish.chunk._info.chunk._info.chunk._Meta`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk._info.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", -1)
|
||||
|
||||
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk.004`, "fish.chunk._blockinfo..tmp_1234567890123456789", 2, "", -1)
|
||||
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk.005..tmp_0000000021`, "fish.chunk._blockinfo..tmp_1234567890123456789", 3, "", 21)
|
||||
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info`, "fish.chunk._blockinfo..tmp_1234567890123456789", -1, "info", -1)
|
||||
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk._blockinfo..tmp_1234567890123456789", -1, "blockinfo", 1234567890123456789)
|
||||
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info.chunk._Meta`, "", -1, "", -1)
|
||||
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", -1)
|
||||
|
||||
// attempts to make invalid chunk names
|
||||
assertMakeNamePanics("fish", -1, "", -1) // neither data nor control
|
||||
assertMakeNamePanics("fish", 0, "info", -1) // both data and control
|
||||
assertMakeNamePanics("fish", -1, "futuredata", -1) // control type too long
|
||||
assertMakeNamePanics("fish", -1, "123", -1) // digits not allowed
|
||||
assertMakeNamePanics("fish", -1, "Meta", -1) // only lower case letters allowed
|
||||
assertMakeNamePanics("fish", -1, "in-fo", -1) // punctuation not allowed
|
||||
assertMakeNamePanics("fish", -1, "_info", -1)
|
||||
assertMakeNamePanics("fish", -1, "info_", -1)
|
||||
assertMakeNamePanics("fish", -2, ".bind", -3)
|
||||
assertMakeNamePanics("fish", -2, "bind.", -3)
|
||||
|
||||
assertMakeNamePanics("fish", -1, "", 1) // neither data nor control
|
||||
assertMakeNamePanics("fish", 0, "info", 12) // both data and control
|
||||
assertMakeNamePanics("fish", -1, "futuredata", 45) // control type too long
|
||||
assertMakeNamePanics("fish", -1, "123", 123) // digits not allowed
|
||||
assertMakeNamePanics("fish", -1, "Meta", 456) // only lower case letters allowed
|
||||
assertMakeNamePanics("fish", -1, "in-fo", 321) // punctuation not allowed
|
||||
assertMakeNamePanics("fish", -1, "_info", 15678)
|
||||
assertMakeNamePanics("fish", -1, "info_", 999)
|
||||
assertMakeNamePanics("fish", -2, ".bind", 0)
|
||||
assertMakeNamePanics("fish", -2, "bind.", 0)
|
||||
}
|
||||
|
||||
// InternalTest dispatches all internal tests
|
||||
func (f *Fs) InternalTest(t *testing.T) {
|
||||
t.Run("PutLarge", func(t *testing.T) {
|
||||
if *UploadKilobytes <= 0 {
|
||||
t.Skip("-upload-kilobytes is not set")
|
||||
}
|
||||
f.InternalTestPutLarge(t, *UploadKilobytes)
|
||||
testPutLarge(t, f, *UploadKilobytes)
|
||||
})
|
||||
t.Run("ChunkNameFormat", func(t *testing.T) {
|
||||
f.InternalTestChunkNameFormat(t)
|
||||
testChunkNameFormat(t, f)
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ won't. This means that if you are using a bucket based remote (eg S3, B2, swift)
|
|||
then you should probably put the bucket in the remote `s3:bucket`.
|
||||
|
||||
Now configure `chunker` using `rclone config`. We will call this one `overlay`
|
||||
to separate it from the `remote`.
|
||||
to separate it from the `remote` itself.
|
||||
|
||||
```
|
||||
No remotes found - make a new one
|
||||
|
@ -50,11 +50,11 @@ Choose how chunker handles hash sums.
|
|||
Enter a string value. Press Enter for the default ("md5").
|
||||
Choose a number from below, or type in your own value
|
||||
/ Chunker can pass any hash supported by wrapped remote
|
||||
1 | for a single-chunk file but returns nothing otherwise.
|
||||
1 | for non-chunked files but returns nothing otherwise.
|
||||
\ "none"
|
||||
2 / MD5 for multi-chunk files. Requires "simplejson".
|
||||
2 / MD5 for composite files. Requires "simplejson".
|
||||
\ "md5"
|
||||
3 / SHA1 for multi-chunk files. Requires "simplejson".
|
||||
3 / SHA1 for composite files. Requires "simplejson".
|
||||
\ "sha1"
|
||||
/ Copying a file to chunker will request MD5 from the source
|
||||
4 | falling back to SHA1 if unsupported. Requires "simplejson".
|
||||
|
@ -95,28 +95,44 @@ When rclone starts a file upload, chunker checks the file size. If it
|
|||
doesn't exceed the configured chunk size, chunker will just pass the file
|
||||
to the wrapped remote. If a file is large, chunker will transparently cut
|
||||
data in pieces with temporary names and stream them one by one, on the fly.
|
||||
Each chunk will contain the specified number of data byts, except for the
|
||||
Each data chunk will contain the specified number of bytes, except for the
|
||||
last one which may have less data. If file size is unknown in advance
|
||||
(this is called a streaming upload), chunker will internally create
|
||||
a temporary copy, record its size and repeat the above process.
|
||||
|
||||
When upload completes, temporary chunk files are finally renamed.
|
||||
This scheme guarantees that operations look from outside as atomic.
|
||||
This scheme guarantees that operations can be run in parallel and look
|
||||
from outside as atomic.
|
||||
A similar method with hidden temporary chunks is used for other operations
|
||||
(copy/move/rename etc). If an operation fails, hidden chunks are normally
|
||||
destroyed, and the destination composite file stays intact.
|
||||
destroyed, and the target composite file stays intact.
|
||||
|
||||
When a composite file download is requested, chunker transparently
|
||||
assembles it by concatenating data chunks in order. As the split is trivial
|
||||
one could even manually concatenate data chunks together to obtain the
|
||||
original content.
|
||||
|
||||
When the `list` rclone command scans a directory on wrapped remote,
|
||||
the potential chunk files are accounted for, grouped and assembled into
|
||||
composite directory entries. Any temporary chunks are hidden.
|
||||
`list` can sometimes come across composite files with missing or invalid
|
||||
chunks, eg if wrapped file system has been tampered with or damaged.
|
||||
If chunker detects a missing chunk it will by default silently ignore
|
||||
the whole group. You can use the `--chunker-fail-on-bad-chunks`
|
||||
rclone option to make `list` fail with a loud error message.
|
||||
|
||||
#### Chunk names
|
||||
|
||||
By default chunk names are `BIG_FILE_NAME.rclone-chunk.001`,
|
||||
`BIG_FILE_NAME.rclone-chunk.002` etc, because the default chunk name
|
||||
format is `*.rclone-chunk.###`. You can configure another name format
|
||||
using the `--chunker-name-format` option. The format uses asterisk
|
||||
The default chunk name format is `*.rclone-chunk.###`, hence by default
|
||||
chunk names are `BIG_FILE_NAME.rclone-chunk.001`,
|
||||
`BIG_FILE_NAME.rclone-chunk.002` etc. You can configure a different name
|
||||
format using the `--chunker-name-format` option. The format uses asterisk
|
||||
`*` as a placeholder for the base file name and one or more consecutive
|
||||
hash characters `#` as a placeholder for sequential chunk number.
|
||||
There must be one and only one asterisk. The number of consecutive hash
|
||||
characters defines the minimum length of a string representing a chunk number.
|
||||
If decimal chunk number has less digits than the number of hashes, it is
|
||||
left-padded by zeros. If the number stringis longer, it is left intact.
|
||||
left-padded by zeros. If the decimal string is longer, it is left intact.
|
||||
By default numbering starts from 1 but there is another option that allows
|
||||
user to start from 0, eg. for compatibility with legacy software.
|
||||
|
||||
|
@ -125,24 +141,18 @@ For example, if name format is `big_*-##.part` and original file name is
|
|||
`big_data.txt-00.part`, the 99th chunk will be `big_data.txt-98.part`
|
||||
and the 302nd chunk will become `big_data.txt-301.part`.
|
||||
|
||||
When the `list` rclone command scans a directory on wrapped remote, the
|
||||
potential chunk files are accounted for and merged into composite directory
|
||||
entries only if their names match the configured format. All other files
|
||||
are ignored, including temporary chunks.
|
||||
The list command might encounter composite files with missing or invalid
|
||||
chunks. If chunker detects a missing chunk it will by default silently
|
||||
ignore the whole group. You can use the `--chunker-fail-on-bad-chunks`
|
||||
command line flag to make `list` fail with an error message.
|
||||
Note that `list` assembles composite directory entries only when chunk names
|
||||
match the configured format and treats non-conforming file names as normal
|
||||
non-chunked files.
|
||||
|
||||
|
||||
### Metadata
|
||||
|
||||
By default when a file is large enough, chunker will create a metadata
|
||||
object besides data chunks. The object is named after the original file.
|
||||
Besides data chunks chunker will by default create metadata object for
|
||||
a composite file. The object is named after the original file.
|
||||
Chunker allows user to disable metadata completely (the `none` format).
|
||||
Please note that currently metadata is not created for files smaller
|
||||
than configured chunk size. This may change in future as new formats
|
||||
are developed.
|
||||
Note that metadata is normally not created for files smaller than the
|
||||
configured chunk size. This may change in future rclone releases.
|
||||
|
||||
#### Simple JSON metadata format
|
||||
|
||||
|
@ -151,13 +161,13 @@ for composite files. Meta objects carry the following fields:
|
|||
|
||||
- `ver` - version of format, currently `1`
|
||||
- `size` - total size of composite file
|
||||
- `nchunks` - number of chunks in the file
|
||||
- `nchunks` - number of data chunks in file
|
||||
- `md5` - MD5 hashsum of composite file (if present)
|
||||
- `sha1` - SHA1 hashsum (if present)
|
||||
|
||||
There is no field for composite file name as it's simply equal to the name
|
||||
of meta object on the wrapped remote. Please refer to respective sections
|
||||
for detils on hashsums and handling of modified time.
|
||||
for details on hashsums and modified time handling.
|
||||
|
||||
#### No metadata
|
||||
|
||||
|
@ -165,16 +175,15 @@ You can disable meta objects by setting the meta format option to `none`.
|
|||
In this mode chunker will scan directory for all files that follow
|
||||
configured chunk name format, group them by detecting chunks with the same
|
||||
base name and show group names as virtual composite files.
|
||||
When a download is requested, chunker will transparently assemble compound
|
||||
files by merging chunks in order. This method is more prone to missing chunk
|
||||
errors (especially missing last chunk) than metadata-enabled formats.
|
||||
This method is more prone to missing chunk errors (especially missing
|
||||
last chunk) than format with metadata enabled.
|
||||
|
||||
|
||||
### Hashsums
|
||||
|
||||
Chunker supports hashsums only when a compatible metadata is present.
|
||||
Thus, if you choose metadata format of `none`, chunker will return
|
||||
`UNSUPPORTED` as hashsum.
|
||||
Hence, if you choose metadata format of `none`, chunker will report hashsum
|
||||
as `UNSUPPORTED`.
|
||||
|
||||
Please note that metadata is stored only for composite files. If a file
|
||||
is small (smaller than configured chunk size), chunker will transparently
|
||||
|
@ -183,18 +192,19 @@ You will see the empty string as a hashsum of requested type for small
|
|||
files if the wrapped remote doesn't support it.
|
||||
|
||||
Many storage backends support MD5 and SHA1 hash types, so does chunker.
|
||||
Currently you can choose one or another but not both.
|
||||
With chunker you can choose one or another but not both.
|
||||
MD5 is set by default as the most supported type.
|
||||
Since chunker keeps hashes for composite files and falls back to the
|
||||
wrapped remote hash for small ones, we advise you to choose the same
|
||||
hash type as wrapped remote so that your file listings look coherent.
|
||||
wrapped remote hash for non-chunked ones, we advise you to choose the same
|
||||
hash type as supported by wrapped remote so that your file listings
|
||||
look coherent.
|
||||
|
||||
Normally, when a file is copied to a chunker controlled remote, chunker
|
||||
Normally, when a file is copied to chunker controlled remote, chunker
|
||||
will ask the file source for compatible file hash and revert to on-the-fly
|
||||
calculation if none is found. This involves some CPU overhead but provides
|
||||
a guarantee that given hashsum is available. Also, chunker will reject
|
||||
a server-side copy or move operation if source and destination hashsum
|
||||
types are different, resulting in the extra network bandwidth, too.
|
||||
types are different resulting in the extra network bandwidth, too.
|
||||
In some rare cases this may be undesired, so chunker provides two optional
|
||||
choices: `sha1quick` and `md5quick`. If the source does not support primary
|
||||
hash type and the quick mode is enabled, chunker will try to fall back to
|
||||
|
@ -209,10 +219,10 @@ between source and target are not found.
|
|||
Chunker stores modification times using the wrapped remote so support
|
||||
depends on that. For a small non-chunked file the chunker overlay simply
|
||||
manipulates modification time of the wrapped remote file.
|
||||
If file is large and metadata is present, then chunker will get and set
|
||||
For a composite file with metadata chunker will get and set
|
||||
modification time of the metadata object on the wrapped remote.
|
||||
If file is chunked but metadata format is `none` then chunker will
|
||||
use modification time of the first chunk.
|
||||
use modification time of the first data chunk.
|
||||
|
||||
|
||||
### Migrations
|
||||
|
@ -222,11 +232,11 @@ chunk naming scheme is to:
|
|||
|
||||
- Collect all your chunked files under a directory and have your
|
||||
chunker remote point to it.
|
||||
- Create another directory (possibly on the same cloud storage)
|
||||
- Create another directory (most probably on the same cloud storage)
|
||||
and configure a new remote with desired metadata format,
|
||||
hash type, chunk naming etc.
|
||||
- Now run `rclone sync oldchunks: newchunks:` and all your data
|
||||
will be transparently converted at transfer.
|
||||
will be transparently converted in transfer.
|
||||
This may take some time, yet chunker will try server-side
|
||||
copy if possible.
|
||||
- After checking data integrity you may remove configuration section
|
||||
|
@ -235,11 +245,11 @@ chunk naming scheme is to:
|
|||
If rclone gets killed during a long operation on a big composite file,
|
||||
hidden temporary chunks may stay in the directory. They will not be
|
||||
shown by the `list` command but will eat up your account quota.
|
||||
Please note that the `deletefile` rclone command deletes only active
|
||||
Please note that the `deletefile` command deletes only active
|
||||
chunks of a file. As a workaround, you can use remote of the wrapped
|
||||
file system to see them.
|
||||
An easy way to get rid of hidden garbage is to copy littered directory
|
||||
somewhere using the chunker remote and purge original directory.
|
||||
somewhere using the chunker remote and purge the original directory.
|
||||
The `copy` command will copy only active chunks while the `purge` will
|
||||
remove everything including garbage.
|
||||
|
||||
|
@ -260,7 +270,7 @@ Beware that in result of this some files which have been treated as chunks
|
|||
before the change can pop up in directory listings as normal files
|
||||
and vice versa. The same warning holds for the chunk size.
|
||||
If you desperately need to change critical chunking setings, you should
|
||||
run data migration as described in a dedicated section.
|
||||
run data migration as described above.
|
||||
|
||||
If wrapped remote is case insensitive, the chunker overlay will inherit
|
||||
that property (so you can't have a file called "Hello.doc" and "hello.doc"
|
||||
|
@ -303,11 +313,11 @@ Choose how chunker handles hash sums.
|
|||
- Examples:
|
||||
- "none"
|
||||
- Chunker can pass any hash supported by wrapped remote
|
||||
- for a single-chunk file but returns nothing otherwise.
|
||||
- for non-chunked files but returns nothing otherwise.
|
||||
- "md5"
|
||||
- MD5 for multi-chunk files. Requires "simplejson".
|
||||
- MD5 for composite files. Requires "simplejson".
|
||||
- "sha1"
|
||||
- SHA1 for multi-chunk files. Requires "simplejson".
|
||||
- SHA1 for composite files. Requires "simplejson".
|
||||
- "md5quick"
|
||||
- Copying a file to chunker will request MD5 from the source
|
||||
- falling back to SHA1 if unsupported. Requires "simplejson".
|
||||
|
@ -316,7 +326,7 @@ Choose how chunker handles hash sums.
|
|||
|
||||
### Advanced Options
|
||||
|
||||
Here are the advanced options specific to chunker (Transparently chunk/split large files).
|
||||
Here are the advanced options specific to chunker.
|
||||
|
||||
#### --chunker-name-format
|
||||
|
||||
|
@ -356,7 +366,7 @@ Metadata is a small JSON file named after the composite file.
|
|||
- Do not use metadata files at all. Requires hash type "none".
|
||||
- "simplejson"
|
||||
- Simple JSON supports hash sums and chunk validation.
|
||||
- It has the following fields: size, nchunks, md5, sha1.
|
||||
- It has the following fields: ver, size, nchunks, md5, sha1.
|
||||
|
||||
#### --chunker-fail-on-bad-chunks
|
||||
|
||||
|
|
Loading…
Reference in a new issue