chunker: reservations for future extensions
This commit is contained in:
parent
41ed33b08e
commit
7aa2b4191c
3 changed files with 690 additions and 394 deletions
File diff suppressed because it is too large
Load diff
|
@ -18,7 +18,7 @@ var (
|
||||||
)
|
)
|
||||||
|
|
||||||
// test that chunking does not break large uploads
|
// test that chunking does not break large uploads
|
||||||
func (f *Fs) InternalTestPutLarge(t *testing.T, kilobytes int) {
|
func testPutLarge(t *testing.T, f *Fs, kilobytes int) {
|
||||||
t.Run(fmt.Sprintf("PutLarge%dk", kilobytes), func(t *testing.T) {
|
t.Run(fmt.Sprintf("PutLarge%dk", kilobytes), func(t *testing.T) {
|
||||||
fstests.TestPutLarge(context.Background(), t, f, &fstest.Item{
|
fstests.TestPutLarge(context.Background(), t, f, &fstest.Item{
|
||||||
ModTime: fstest.Time("2001-02-03T04:05:06.499999999Z"),
|
ModTime: fstest.Time("2001-02-03T04:05:06.499999999Z"),
|
||||||
|
@ -28,119 +28,228 @@ func (f *Fs) InternalTestPutLarge(t *testing.T, kilobytes int) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Fs) InternalTestChunkNameFormat(t *testing.T) {
|
// test chunk name parser
|
||||||
savedNameFormat := f.opt.NameFormat
|
func testChunkNameFormat(t *testing.T, f *Fs) {
|
||||||
savedStartFrom := f.opt.StartFrom
|
saveOpt := f.opt
|
||||||
defer func() {
|
defer func() {
|
||||||
// restore original settings
|
// restore original settings (f is pointer, f.opt is struct)
|
||||||
_ = f.parseNameFormat(savedNameFormat)
|
f.opt = saveOpt
|
||||||
f.opt.StartFrom = savedStartFrom
|
_ = f.setChunkNameFormat(f.opt.NameFormat)
|
||||||
}()
|
}()
|
||||||
var err error
|
|
||||||
|
|
||||||
err = f.parseNameFormat("*.rclone_chunk.###")
|
assertFormat := func(pattern, wantDataFormat, wantCtrlFormat, wantNameRegexp string) {
|
||||||
|
err := f.setChunkNameFormat(pattern)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, `%s.rclone_chunk.%03d`, f.nameFormat)
|
assert.Equal(t, wantDataFormat, f.dataNameFmt)
|
||||||
assert.Equal(t, `^(.+)\.rclone_chunk\.([0-9]{3,})$`, f.nameRegexp.String())
|
assert.Equal(t, wantCtrlFormat, f.ctrlNameFmt)
|
||||||
|
assert.Equal(t, wantNameRegexp, f.nameRegexp.String())
|
||||||
err = f.parseNameFormat("*.rclone_chunk.#")
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, `%s.rclone_chunk.%d`, f.nameFormat)
|
|
||||||
assert.Equal(t, `^(.+)\.rclone_chunk\.([0-9]+)$`, f.nameRegexp.String())
|
|
||||||
|
|
||||||
err = f.parseNameFormat("*_chunk_#####")
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, `%s_chunk_%05d`, f.nameFormat)
|
|
||||||
assert.Equal(t, `^(.+)_chunk_([0-9]{5,})$`, f.nameRegexp.String())
|
|
||||||
|
|
||||||
err = f.parseNameFormat("*-chunk-#")
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, `%s-chunk-%d`, f.nameFormat)
|
|
||||||
assert.Equal(t, `^(.+)-chunk-([0-9]+)$`, f.nameRegexp.String())
|
|
||||||
|
|
||||||
err = f.parseNameFormat("_*-chunk-##,")
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, `_%s-chunk-%02d,`, f.nameFormat)
|
|
||||||
assert.Equal(t, `^_(.+)-chunk-([0-9]{2,}),$`, f.nameRegexp.String())
|
|
||||||
|
|
||||||
err = f.parseNameFormat(`*-chunk-#-%^$()[]{}.+-!?:\/`)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, `%s-chunk-%d-%%^$()[]{}.+-!?:\/`, f.nameFormat)
|
|
||||||
assert.Equal(t, `^(.+)-chunk-([0-9]+)-%\^\$\(\)\[\]\{\}\.\+-!\?:\\/$`, f.nameRegexp.String())
|
|
||||||
|
|
||||||
err = f.parseNameFormat("chunk-#")
|
|
||||||
assert.Error(t, err)
|
|
||||||
|
|
||||||
err = f.parseNameFormat("*-chunk")
|
|
||||||
assert.Error(t, err)
|
|
||||||
|
|
||||||
err = f.parseNameFormat("*-*-chunk-#")
|
|
||||||
assert.Error(t, err)
|
|
||||||
|
|
||||||
err = f.parseNameFormat("*-chunk-#-#")
|
|
||||||
assert.Error(t, err)
|
|
||||||
|
|
||||||
err = f.parseNameFormat("#-chunk-*")
|
|
||||||
assert.Error(t, err)
|
|
||||||
|
|
||||||
err = f.parseNameFormat("*#")
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
err = f.parseNameFormat("**#")
|
|
||||||
assert.Error(t, err)
|
|
||||||
err = f.parseNameFormat("#*")
|
|
||||||
assert.Error(t, err)
|
|
||||||
err = f.parseNameFormat("")
|
|
||||||
assert.Error(t, err)
|
|
||||||
err = f.parseNameFormat("-")
|
|
||||||
assert.Error(t, err)
|
|
||||||
|
|
||||||
f.opt.StartFrom = 2
|
|
||||||
err = f.parseNameFormat("*.chunk.###")
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, `%s.chunk.%03d`, f.nameFormat)
|
|
||||||
assert.Equal(t, `^(.+)\.chunk\.([0-9]{3,})$`, f.nameRegexp.String())
|
|
||||||
|
|
||||||
assert.Equal(t, "fish.chunk.003", f.makeChunkName("fish", 1, -1))
|
|
||||||
assert.Equal(t, "fish.chunk.011..tmp_0000054321", f.makeChunkName("fish", 9, 54321))
|
|
||||||
assert.Equal(t, "fish.chunk.011..tmp_1234567890", f.makeChunkName("fish", 9, 1234567890))
|
|
||||||
assert.Equal(t, "fish.chunk.1916..tmp_123456789012345", f.makeChunkName("fish", 1914, 123456789012345))
|
|
||||||
|
|
||||||
name, chunkNo, tempNo := f.parseChunkName("fish.chunk.003")
|
|
||||||
assert.True(t, name == "fish" && chunkNo == 1 && tempNo == -1)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.004..tmp_0000000021")
|
|
||||||
assert.True(t, name == "fish" && chunkNo == 2 && tempNo == 21)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.021")
|
|
||||||
assert.True(t, name == "fish" && chunkNo == 19 && tempNo == -1)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.323..tmp_1234567890123456789")
|
|
||||||
assert.True(t, name == "fish" && chunkNo == 321 && tempNo == 1234567890123456789)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.3")
|
|
||||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.001")
|
|
||||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.21")
|
|
||||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.-21")
|
|
||||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.004.tmp_0000000021")
|
|
||||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.003..tmp_123456789")
|
|
||||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.003..tmp_012345678901234567890123456789")
|
|
||||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
|
||||||
name, chunkNo, tempNo = f.parseChunkName("fish.chunk.003..tmp_-1")
|
|
||||||
assert.True(t, name == "" && chunkNo == -1 && tempNo == -1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assertFormatValid := func(pattern string) {
|
||||||
|
err := f.setChunkNameFormat(pattern)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
assertFormatInvalid := func(pattern string) {
|
||||||
|
err := f.setChunkNameFormat(pattern)
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
assertMakeName := func(wantChunkName, mainName string, chunkNo int, ctrlType string, xactNo int64) {
|
||||||
|
gotChunkName := f.makeChunkName(mainName, chunkNo, ctrlType, xactNo)
|
||||||
|
assert.Equal(t, wantChunkName, gotChunkName)
|
||||||
|
}
|
||||||
|
|
||||||
|
assertMakeNamePanics := func(mainName string, chunkNo int, ctrlType string, xactNo int64) {
|
||||||
|
assert.Panics(t, func() {
|
||||||
|
_ = f.makeChunkName(mainName, chunkNo, ctrlType, xactNo)
|
||||||
|
}, "makeChunkName(%q,%d,%q,%d) should panic", mainName, chunkNo, ctrlType, xactNo)
|
||||||
|
}
|
||||||
|
|
||||||
|
assertParseName := func(fileName, wantMainName string, wantChunkNo int, wantCtrlType string, wantXactNo int64) {
|
||||||
|
gotMainName, gotChunkNo, gotCtrlType, gotXactNo := f.parseChunkName(fileName)
|
||||||
|
assert.Equal(t, wantMainName, gotMainName)
|
||||||
|
assert.Equal(t, wantChunkNo, gotChunkNo)
|
||||||
|
assert.Equal(t, wantCtrlType, gotCtrlType)
|
||||||
|
assert.Equal(t, wantXactNo, gotXactNo)
|
||||||
|
}
|
||||||
|
|
||||||
|
const newFormatSupported = false // support for patterns not starting with base name (*)
|
||||||
|
|
||||||
|
// valid formats
|
||||||
|
assertFormat(`*.rclone_chunk.###`, `%s.rclone_chunk.%03d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]{3,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||||
|
assertFormat(`*.rclone_chunk.#`, `%s.rclone_chunk.%d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||||
|
assertFormat(`*_chunk_#####`, `%s_chunk_%05d`, `%s_chunk__%s`, `^(.+?)_chunk_(?:([0-9]{5,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||||
|
assertFormat(`*-chunk-#`, `%s-chunk-%d`, `%s-chunk-_%s`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||||
|
assertFormat(`*-chunk-#-%^$()[]{}.+-!?:\`, `%s-chunk-%d-%%^$()[]{}.+-!?:\`, `%s-chunk-_%s-%%^$()[]{}.+-!?:\`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z]{3,9}))-%\^\$\(\)\[\]\{\}\.\+-!\?:\\(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||||
|
if newFormatSupported {
|
||||||
|
assertFormat(`_*-chunk-##,`, `_%s-chunk-%02d,`, `_%s-chunk-_%s,`, `^_(.+?)-chunk-(?:([0-9]{2,})|_([a-z]{3,9})),(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// invalid formats
|
||||||
|
assertFormatInvalid(`chunk-#`)
|
||||||
|
assertFormatInvalid(`*-chunk`)
|
||||||
|
assertFormatInvalid(`*-*-chunk-#`)
|
||||||
|
assertFormatInvalid(`*-chunk-#-#`)
|
||||||
|
assertFormatInvalid(`#-chunk-*`)
|
||||||
|
assertFormatInvalid(`*/#`)
|
||||||
|
|
||||||
|
assertFormatValid(`*#`)
|
||||||
|
assertFormatInvalid(`**#`)
|
||||||
|
assertFormatInvalid(`#*`)
|
||||||
|
assertFormatInvalid(``)
|
||||||
|
assertFormatInvalid(`-`)
|
||||||
|
|
||||||
|
// quick tests
|
||||||
|
if newFormatSupported {
|
||||||
|
assertFormat(`part_*_#`, `part_%s_%d`, `part_%s__%s`, `^part_(.+?)_(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||||
|
f.opt.StartFrom = 1
|
||||||
|
|
||||||
|
assertMakeName(`part_fish_1`, "fish", 0, "", -1)
|
||||||
|
assertParseName(`part_fish_43`, "fish", 42, "", -1)
|
||||||
|
assertMakeName(`part_fish_3..tmp_0000000004`, "fish", 2, "", 4)
|
||||||
|
assertParseName(`part_fish_4..tmp_0000000005`, "fish", 3, "", 5)
|
||||||
|
assertMakeName(`part_fish__locks`, "fish", -2, "locks", -3)
|
||||||
|
assertParseName(`part_fish__locks`, "fish", -1, "locks", -1)
|
||||||
|
assertMakeName(`part_fish__blockinfo..tmp_1234567890123456789`, "fish", -3, "blockinfo", 1234567890123456789)
|
||||||
|
assertParseName(`part_fish__blockinfo..tmp_1234567890123456789`, "fish", -1, "blockinfo", 1234567890123456789)
|
||||||
|
}
|
||||||
|
|
||||||
|
// prepare format for long tests
|
||||||
|
assertFormat(`*.chunk.###`, `%s.chunk.%03d`, `%s.chunk._%s`, `^(.+?)\.chunk\.(?:([0-9]{3,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`)
|
||||||
|
f.opt.StartFrom = 2
|
||||||
|
|
||||||
|
// valid data chunks
|
||||||
|
assertMakeName(`fish.chunk.003`, "fish", 1, "", -1)
|
||||||
|
assertMakeName(`fish.chunk.011..tmp_0000054321`, "fish", 9, "", 54321)
|
||||||
|
assertMakeName(`fish.chunk.011..tmp_1234567890`, "fish", 9, "", 1234567890)
|
||||||
|
assertMakeName(`fish.chunk.1916..tmp_123456789012345`, "fish", 1914, "", 123456789012345)
|
||||||
|
|
||||||
|
assertParseName(`fish.chunk.003`, "fish", 1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.004..tmp_0000000021`, "fish", 2, "", 21)
|
||||||
|
assertParseName(`fish.chunk.021`, "fish", 19, "", -1)
|
||||||
|
assertParseName(`fish.chunk.323..tmp_1234567890123456789`, "fish", 321, "", 1234567890123456789)
|
||||||
|
|
||||||
|
// parsing invalid data chunk names
|
||||||
|
assertParseName(`fish.chunk.3`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.001`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.21`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.-21`, "", -1, "", -1)
|
||||||
|
|
||||||
|
assertParseName(`fish.chunk.004.tmp_0000000021`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.003..tmp_123456789`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.003..tmp_012345678901234567890123456789`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.003..tmp_-1`, "", -1, "", -1)
|
||||||
|
|
||||||
|
// valid control chunks
|
||||||
|
assertMakeName(`fish.chunk._info`, "fish", -1, "info", -1)
|
||||||
|
assertMakeName(`fish.chunk._locks`, "fish", -2, "locks", -1)
|
||||||
|
assertMakeName(`fish.chunk._blockinfo`, "fish", -3, "blockinfo", -1)
|
||||||
|
|
||||||
|
assertParseName(`fish.chunk._info`, "fish", -1, "info", -1)
|
||||||
|
assertParseName(`fish.chunk._locks`, "fish", -1, "locks", -1)
|
||||||
|
assertParseName(`fish.chunk._blockinfo`, "fish", -1, "blockinfo", -1)
|
||||||
|
|
||||||
|
// valid temporary control chunks
|
||||||
|
assertMakeName(`fish.chunk._info..tmp_0000000021`, "fish", -1, "info", 21)
|
||||||
|
assertMakeName(`fish.chunk._locks..tmp_0000054321`, "fish", -2, "locks", 54321)
|
||||||
|
assertMakeName(`fish.chunk._uploads..tmp_0000000000`, "fish", -3, "uploads", 0)
|
||||||
|
assertMakeName(`fish.chunk._blockinfo..tmp_1234567890123456789`, "fish", -4, "blockinfo", 1234567890123456789)
|
||||||
|
|
||||||
|
assertParseName(`fish.chunk._info..tmp_0000000021`, "fish", -1, "info", 21)
|
||||||
|
assertParseName(`fish.chunk._locks..tmp_0000054321`, "fish", -1, "locks", 54321)
|
||||||
|
assertParseName(`fish.chunk._uploads..tmp_0000000000`, "fish", -1, "uploads", 0)
|
||||||
|
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789`, "fish", -1, "blockinfo", 1234567890123456789)
|
||||||
|
|
||||||
|
// parsing invalid control chunk names
|
||||||
|
assertParseName(`fish.chunk.info`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.locks`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.uploads`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.blockinfo`, "", -1, "", -1)
|
||||||
|
|
||||||
|
assertParseName(`fish.chunk._os`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk._futuredata`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk._me_ta`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk._in-fo`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk._.bin`, "", -1, "", -1)
|
||||||
|
|
||||||
|
assertParseName(`fish.chunk._locks..tmp_123456789`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk._meta..tmp_-1`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk._blockinfo..tmp_012345678901234567890123456789`, "", -1, "", -1)
|
||||||
|
|
||||||
|
// short control chunk names: 3 letters ok, 1-2 letters not allowed
|
||||||
|
assertMakeName(`fish.chunk._ext`, "fish", -1, "ext", -1)
|
||||||
|
assertMakeName(`fish.chunk._ext..tmp_0000000021`, "fish", -1, "ext", 21)
|
||||||
|
assertParseName(`fish.chunk._int`, "fish", -1, "int", -1)
|
||||||
|
assertParseName(`fish.chunk._int..tmp_0000000021`, "fish", -1, "int", 21)
|
||||||
|
assertMakeNamePanics("fish", -1, "in", -1)
|
||||||
|
assertMakeNamePanics("fish", -1, "up", 4)
|
||||||
|
assertMakeNamePanics("fish", -1, "x", -1)
|
||||||
|
assertMakeNamePanics("fish", -1, "c", 4)
|
||||||
|
|
||||||
|
// base file name can sometimes look like a valid chunk name
|
||||||
|
assertParseName(`fish.chunk.003.chunk.004`, "fish.chunk.003", 2, "", -1)
|
||||||
|
assertParseName(`fish.chunk.003.chunk.005..tmp_0000000021`, "fish.chunk.003", 3, "", 21)
|
||||||
|
assertParseName(`fish.chunk.003.chunk._info`, "fish.chunk.003", -1, "info", -1)
|
||||||
|
assertParseName(`fish.chunk.003.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk.003", -1, "blockinfo", 1234567890123456789)
|
||||||
|
assertParseName(`fish.chunk.003.chunk._Meta`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.003.chunk._x..tmp_0000054321`, "", -1, "", -1)
|
||||||
|
|
||||||
|
assertParseName(`fish.chunk.004..tmp_0000000021.chunk.004`, "fish.chunk.004..tmp_0000000021", 2, "", -1)
|
||||||
|
assertParseName(`fish.chunk.004..tmp_0000000021.chunk.005..tmp_0000000021`, "fish.chunk.004..tmp_0000000021", 3, "", 21)
|
||||||
|
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._info`, "fish.chunk.004..tmp_0000000021", -1, "info", -1)
|
||||||
|
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk.004..tmp_0000000021", -1, "blockinfo", 1234567890123456789)
|
||||||
|
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._Meta`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk.004..tmp_0000000021.chunk._x..tmp_0000054321`, "", -1, "", -1)
|
||||||
|
|
||||||
|
assertParseName(`fish.chunk._info.chunk.004`, "fish.chunk._info", 2, "", -1)
|
||||||
|
assertParseName(`fish.chunk._info.chunk.005..tmp_0000000021`, "fish.chunk._info", 3, "", 21)
|
||||||
|
assertParseName(`fish.chunk._info.chunk._info`, "fish.chunk._info", -1, "info", -1)
|
||||||
|
assertParseName(`fish.chunk._info.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk._info", -1, "blockinfo", 1234567890123456789)
|
||||||
|
assertParseName(`fish.chunk._info.chunk._info.chunk._Meta`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk._info.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", -1)
|
||||||
|
|
||||||
|
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk.004`, "fish.chunk._blockinfo..tmp_1234567890123456789", 2, "", -1)
|
||||||
|
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk.005..tmp_0000000021`, "fish.chunk._blockinfo..tmp_1234567890123456789", 3, "", 21)
|
||||||
|
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info`, "fish.chunk._blockinfo..tmp_1234567890123456789", -1, "info", -1)
|
||||||
|
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk._blockinfo..tmp_1234567890123456789", -1, "blockinfo", 1234567890123456789)
|
||||||
|
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info.chunk._Meta`, "", -1, "", -1)
|
||||||
|
assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", -1)
|
||||||
|
|
||||||
|
// attempts to make invalid chunk names
|
||||||
|
assertMakeNamePanics("fish", -1, "", -1) // neither data nor control
|
||||||
|
assertMakeNamePanics("fish", 0, "info", -1) // both data and control
|
||||||
|
assertMakeNamePanics("fish", -1, "futuredata", -1) // control type too long
|
||||||
|
assertMakeNamePanics("fish", -1, "123", -1) // digits not allowed
|
||||||
|
assertMakeNamePanics("fish", -1, "Meta", -1) // only lower case letters allowed
|
||||||
|
assertMakeNamePanics("fish", -1, "in-fo", -1) // punctuation not allowed
|
||||||
|
assertMakeNamePanics("fish", -1, "_info", -1)
|
||||||
|
assertMakeNamePanics("fish", -1, "info_", -1)
|
||||||
|
assertMakeNamePanics("fish", -2, ".bind", -3)
|
||||||
|
assertMakeNamePanics("fish", -2, "bind.", -3)
|
||||||
|
|
||||||
|
assertMakeNamePanics("fish", -1, "", 1) // neither data nor control
|
||||||
|
assertMakeNamePanics("fish", 0, "info", 12) // both data and control
|
||||||
|
assertMakeNamePanics("fish", -1, "futuredata", 45) // control type too long
|
||||||
|
assertMakeNamePanics("fish", -1, "123", 123) // digits not allowed
|
||||||
|
assertMakeNamePanics("fish", -1, "Meta", 456) // only lower case letters allowed
|
||||||
|
assertMakeNamePanics("fish", -1, "in-fo", 321) // punctuation not allowed
|
||||||
|
assertMakeNamePanics("fish", -1, "_info", 15678)
|
||||||
|
assertMakeNamePanics("fish", -1, "info_", 999)
|
||||||
|
assertMakeNamePanics("fish", -2, ".bind", 0)
|
||||||
|
assertMakeNamePanics("fish", -2, "bind.", 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// InternalTest dispatches all internal tests
|
||||||
func (f *Fs) InternalTest(t *testing.T) {
|
func (f *Fs) InternalTest(t *testing.T) {
|
||||||
t.Run("PutLarge", func(t *testing.T) {
|
t.Run("PutLarge", func(t *testing.T) {
|
||||||
if *UploadKilobytes <= 0 {
|
if *UploadKilobytes <= 0 {
|
||||||
t.Skip("-upload-kilobytes is not set")
|
t.Skip("-upload-kilobytes is not set")
|
||||||
}
|
}
|
||||||
f.InternalTestPutLarge(t, *UploadKilobytes)
|
testPutLarge(t, f, *UploadKilobytes)
|
||||||
})
|
})
|
||||||
t.Run("ChunkNameFormat", func(t *testing.T) {
|
t.Run("ChunkNameFormat", func(t *testing.T) {
|
||||||
f.InternalTestChunkNameFormat(t)
|
testChunkNameFormat(t, f)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ won't. This means that if you are using a bucket based remote (eg S3, B2, swift)
|
||||||
then you should probably put the bucket in the remote `s3:bucket`.
|
then you should probably put the bucket in the remote `s3:bucket`.
|
||||||
|
|
||||||
Now configure `chunker` using `rclone config`. We will call this one `overlay`
|
Now configure `chunker` using `rclone config`. We will call this one `overlay`
|
||||||
to separate it from the `remote`.
|
to separate it from the `remote` itself.
|
||||||
|
|
||||||
```
|
```
|
||||||
No remotes found - make a new one
|
No remotes found - make a new one
|
||||||
|
@ -50,11 +50,11 @@ Choose how chunker handles hash sums.
|
||||||
Enter a string value. Press Enter for the default ("md5").
|
Enter a string value. Press Enter for the default ("md5").
|
||||||
Choose a number from below, or type in your own value
|
Choose a number from below, or type in your own value
|
||||||
/ Chunker can pass any hash supported by wrapped remote
|
/ Chunker can pass any hash supported by wrapped remote
|
||||||
1 | for a single-chunk file but returns nothing otherwise.
|
1 | for non-chunked files but returns nothing otherwise.
|
||||||
\ "none"
|
\ "none"
|
||||||
2 / MD5 for multi-chunk files. Requires "simplejson".
|
2 / MD5 for composite files. Requires "simplejson".
|
||||||
\ "md5"
|
\ "md5"
|
||||||
3 / SHA1 for multi-chunk files. Requires "simplejson".
|
3 / SHA1 for composite files. Requires "simplejson".
|
||||||
\ "sha1"
|
\ "sha1"
|
||||||
/ Copying a file to chunker will request MD5 from the source
|
/ Copying a file to chunker will request MD5 from the source
|
||||||
4 | falling back to SHA1 if unsupported. Requires "simplejson".
|
4 | falling back to SHA1 if unsupported. Requires "simplejson".
|
||||||
|
@ -95,28 +95,44 @@ When rclone starts a file upload, chunker checks the file size. If it
|
||||||
doesn't exceed the configured chunk size, chunker will just pass the file
|
doesn't exceed the configured chunk size, chunker will just pass the file
|
||||||
to the wrapped remote. If a file is large, chunker will transparently cut
|
to the wrapped remote. If a file is large, chunker will transparently cut
|
||||||
data in pieces with temporary names and stream them one by one, on the fly.
|
data in pieces with temporary names and stream them one by one, on the fly.
|
||||||
Each chunk will contain the specified number of data byts, except for the
|
Each data chunk will contain the specified number of bytes, except for the
|
||||||
last one which may have less data. If file size is unknown in advance
|
last one which may have less data. If file size is unknown in advance
|
||||||
(this is called a streaming upload), chunker will internally create
|
(this is called a streaming upload), chunker will internally create
|
||||||
a temporary copy, record its size and repeat the above process.
|
a temporary copy, record its size and repeat the above process.
|
||||||
|
|
||||||
When upload completes, temporary chunk files are finally renamed.
|
When upload completes, temporary chunk files are finally renamed.
|
||||||
This scheme guarantees that operations look from outside as atomic.
|
This scheme guarantees that operations can be run in parallel and look
|
||||||
|
from outside as atomic.
|
||||||
A similar method with hidden temporary chunks is used for other operations
|
A similar method with hidden temporary chunks is used for other operations
|
||||||
(copy/move/rename etc). If an operation fails, hidden chunks are normally
|
(copy/move/rename etc). If an operation fails, hidden chunks are normally
|
||||||
destroyed, and the destination composite file stays intact.
|
destroyed, and the target composite file stays intact.
|
||||||
|
|
||||||
|
When a composite file download is requested, chunker transparently
|
||||||
|
assembles it by concatenating data chunks in order. As the split is trivial
|
||||||
|
one could even manually concatenate data chunks together to obtain the
|
||||||
|
original content.
|
||||||
|
|
||||||
|
When the `list` rclone command scans a directory on wrapped remote,
|
||||||
|
the potential chunk files are accounted for, grouped and assembled into
|
||||||
|
composite directory entries. Any temporary chunks are hidden.
|
||||||
|
`list` can sometimes come across composite files with missing or invalid
|
||||||
|
chunks, eg if wrapped file system has been tampered with or damaged.
|
||||||
|
If chunker detects a missing chunk it will by default silently ignore
|
||||||
|
the whole group. You can use the `--chunker-fail-on-bad-chunks`
|
||||||
|
rclone option to make `list` fail with a loud error message.
|
||||||
|
|
||||||
#### Chunk names
|
#### Chunk names
|
||||||
|
|
||||||
By default chunk names are `BIG_FILE_NAME.rclone-chunk.001`,
|
The default chunk name format is `*.rclone-chunk.###`, hence by default
|
||||||
`BIG_FILE_NAME.rclone-chunk.002` etc, because the default chunk name
|
chunk names are `BIG_FILE_NAME.rclone-chunk.001`,
|
||||||
format is `*.rclone-chunk.###`. You can configure another name format
|
`BIG_FILE_NAME.rclone-chunk.002` etc. You can configure a different name
|
||||||
using the `--chunker-name-format` option. The format uses asterisk
|
format using the `--chunker-name-format` option. The format uses asterisk
|
||||||
`*` as a placeholder for the base file name and one or more consecutive
|
`*` as a placeholder for the base file name and one or more consecutive
|
||||||
hash characters `#` as a placeholder for sequential chunk number.
|
hash characters `#` as a placeholder for sequential chunk number.
|
||||||
There must be one and only one asterisk. The number of consecutive hash
|
There must be one and only one asterisk. The number of consecutive hash
|
||||||
characters defines the minimum length of a string representing a chunk number.
|
characters defines the minimum length of a string representing a chunk number.
|
||||||
If decimal chunk number has less digits than the number of hashes, it is
|
If decimal chunk number has less digits than the number of hashes, it is
|
||||||
left-padded by zeros. If the number stringis longer, it is left intact.
|
left-padded by zeros. If the decimal string is longer, it is left intact.
|
||||||
By default numbering starts from 1 but there is another option that allows
|
By default numbering starts from 1 but there is another option that allows
|
||||||
user to start from 0, eg. for compatibility with legacy software.
|
user to start from 0, eg. for compatibility with legacy software.
|
||||||
|
|
||||||
|
@ -125,24 +141,18 @@ For example, if name format is `big_*-##.part` and original file name is
|
||||||
`big_data.txt-00.part`, the 99th chunk will be `big_data.txt-98.part`
|
`big_data.txt-00.part`, the 99th chunk will be `big_data.txt-98.part`
|
||||||
and the 302nd chunk will become `big_data.txt-301.part`.
|
and the 302nd chunk will become `big_data.txt-301.part`.
|
||||||
|
|
||||||
When the `list` rclone command scans a directory on wrapped remote, the
|
Note that `list` assembles composite directory entries only when chunk names
|
||||||
potential chunk files are accounted for and merged into composite directory
|
match the configured format and treats non-conforming file names as normal
|
||||||
entries only if their names match the configured format. All other files
|
non-chunked files.
|
||||||
are ignored, including temporary chunks.
|
|
||||||
The list command might encounter composite files with missing or invalid
|
|
||||||
chunks. If chunker detects a missing chunk it will by default silently
|
|
||||||
ignore the whole group. You can use the `--chunker-fail-on-bad-chunks`
|
|
||||||
command line flag to make `list` fail with an error message.
|
|
||||||
|
|
||||||
|
|
||||||
### Metadata
|
### Metadata
|
||||||
|
|
||||||
By default when a file is large enough, chunker will create a metadata
|
Besides data chunks chunker will by default create metadata object for
|
||||||
object besides data chunks. The object is named after the original file.
|
a composite file. The object is named after the original file.
|
||||||
Chunker allows user to disable metadata completely (the `none` format).
|
Chunker allows user to disable metadata completely (the `none` format).
|
||||||
Please note that currently metadata is not created for files smaller
|
Note that metadata is normally not created for files smaller than the
|
||||||
than configured chunk size. This may change in future as new formats
|
configured chunk size. This may change in future rclone releases.
|
||||||
are developed.
|
|
||||||
|
|
||||||
#### Simple JSON metadata format
|
#### Simple JSON metadata format
|
||||||
|
|
||||||
|
@ -151,13 +161,13 @@ for composite files. Meta objects carry the following fields:
|
||||||
|
|
||||||
- `ver` - version of format, currently `1`
|
- `ver` - version of format, currently `1`
|
||||||
- `size` - total size of composite file
|
- `size` - total size of composite file
|
||||||
- `nchunks` - number of chunks in the file
|
- `nchunks` - number of data chunks in file
|
||||||
- `md5` - MD5 hashsum of composite file (if present)
|
- `md5` - MD5 hashsum of composite file (if present)
|
||||||
- `sha1` - SHA1 hashsum (if present)
|
- `sha1` - SHA1 hashsum (if present)
|
||||||
|
|
||||||
There is no field for composite file name as it's simply equal to the name
|
There is no field for composite file name as it's simply equal to the name
|
||||||
of meta object on the wrapped remote. Please refer to respective sections
|
of meta object on the wrapped remote. Please refer to respective sections
|
||||||
for detils on hashsums and handling of modified time.
|
for details on hashsums and modified time handling.
|
||||||
|
|
||||||
#### No metadata
|
#### No metadata
|
||||||
|
|
||||||
|
@ -165,16 +175,15 @@ You can disable meta objects by setting the meta format option to `none`.
|
||||||
In this mode chunker will scan directory for all files that follow
|
In this mode chunker will scan directory for all files that follow
|
||||||
configured chunk name format, group them by detecting chunks with the same
|
configured chunk name format, group them by detecting chunks with the same
|
||||||
base name and show group names as virtual composite files.
|
base name and show group names as virtual composite files.
|
||||||
When a download is requested, chunker will transparently assemble compound
|
This method is more prone to missing chunk errors (especially missing
|
||||||
files by merging chunks in order. This method is more prone to missing chunk
|
last chunk) than format with metadata enabled.
|
||||||
errors (especially missing last chunk) than metadata-enabled formats.
|
|
||||||
|
|
||||||
|
|
||||||
### Hashsums
|
### Hashsums
|
||||||
|
|
||||||
Chunker supports hashsums only when a compatible metadata is present.
|
Chunker supports hashsums only when a compatible metadata is present.
|
||||||
Thus, if you choose metadata format of `none`, chunker will return
|
Hence, if you choose metadata format of `none`, chunker will report hashsum
|
||||||
`UNSUPPORTED` as hashsum.
|
as `UNSUPPORTED`.
|
||||||
|
|
||||||
Please note that metadata is stored only for composite files. If a file
|
Please note that metadata is stored only for composite files. If a file
|
||||||
is small (smaller than configured chunk size), chunker will transparently
|
is small (smaller than configured chunk size), chunker will transparently
|
||||||
|
@ -183,18 +192,19 @@ You will see the empty string as a hashsum of requested type for small
|
||||||
files if the wrapped remote doesn't support it.
|
files if the wrapped remote doesn't support it.
|
||||||
|
|
||||||
Many storage backends support MD5 and SHA1 hash types, so does chunker.
|
Many storage backends support MD5 and SHA1 hash types, so does chunker.
|
||||||
Currently you can choose one or another but not both.
|
With chunker you can choose one or another but not both.
|
||||||
MD5 is set by default as the most supported type.
|
MD5 is set by default as the most supported type.
|
||||||
Since chunker keeps hashes for composite files and falls back to the
|
Since chunker keeps hashes for composite files and falls back to the
|
||||||
wrapped remote hash for small ones, we advise you to choose the same
|
wrapped remote hash for non-chunked ones, we advise you to choose the same
|
||||||
hash type as wrapped remote so that your file listings look coherent.
|
hash type as supported by wrapped remote so that your file listings
|
||||||
|
look coherent.
|
||||||
|
|
||||||
Normally, when a file is copied to a chunker controlled remote, chunker
|
Normally, when a file is copied to chunker controlled remote, chunker
|
||||||
will ask the file source for compatible file hash and revert to on-the-fly
|
will ask the file source for compatible file hash and revert to on-the-fly
|
||||||
calculation if none is found. This involves some CPU overhead but provides
|
calculation if none is found. This involves some CPU overhead but provides
|
||||||
a guarantee that given hashsum is available. Also, chunker will reject
|
a guarantee that given hashsum is available. Also, chunker will reject
|
||||||
a server-side copy or move operation if source and destination hashsum
|
a server-side copy or move operation if source and destination hashsum
|
||||||
types are different, resulting in the extra network bandwidth, too.
|
types are different resulting in the extra network bandwidth, too.
|
||||||
In some rare cases this may be undesired, so chunker provides two optional
|
In some rare cases this may be undesired, so chunker provides two optional
|
||||||
choices: `sha1quick` and `md5quick`. If the source does not support primary
|
choices: `sha1quick` and `md5quick`. If the source does not support primary
|
||||||
hash type and the quick mode is enabled, chunker will try to fall back to
|
hash type and the quick mode is enabled, chunker will try to fall back to
|
||||||
|
@ -209,10 +219,10 @@ between source and target are not found.
|
||||||
Chunker stores modification times using the wrapped remote so support
|
Chunker stores modification times using the wrapped remote so support
|
||||||
depends on that. For a small non-chunked file the chunker overlay simply
|
depends on that. For a small non-chunked file the chunker overlay simply
|
||||||
manipulates modification time of the wrapped remote file.
|
manipulates modification time of the wrapped remote file.
|
||||||
If file is large and metadata is present, then chunker will get and set
|
For a composite file with metadata chunker will get and set
|
||||||
modification time of the metadata object on the wrapped remote.
|
modification time of the metadata object on the wrapped remote.
|
||||||
If file is chunked but metadata format is `none` then chunker will
|
If file is chunked but metadata format is `none` then chunker will
|
||||||
use modification time of the first chunk.
|
use modification time of the first data chunk.
|
||||||
|
|
||||||
|
|
||||||
### Migrations
|
### Migrations
|
||||||
|
@ -222,11 +232,11 @@ chunk naming scheme is to:
|
||||||
|
|
||||||
- Collect all your chunked files under a directory and have your
|
- Collect all your chunked files under a directory and have your
|
||||||
chunker remote point to it.
|
chunker remote point to it.
|
||||||
- Create another directory (possibly on the same cloud storage)
|
- Create another directory (most probably on the same cloud storage)
|
||||||
and configure a new remote with desired metadata format,
|
and configure a new remote with desired metadata format,
|
||||||
hash type, chunk naming etc.
|
hash type, chunk naming etc.
|
||||||
- Now run `rclone sync oldchunks: newchunks:` and all your data
|
- Now run `rclone sync oldchunks: newchunks:` and all your data
|
||||||
will be transparently converted at transfer.
|
will be transparently converted in transfer.
|
||||||
This may take some time, yet chunker will try server-side
|
This may take some time, yet chunker will try server-side
|
||||||
copy if possible.
|
copy if possible.
|
||||||
- After checking data integrity you may remove configuration section
|
- After checking data integrity you may remove configuration section
|
||||||
|
@ -235,11 +245,11 @@ chunk naming scheme is to:
|
||||||
If rclone gets killed during a long operation on a big composite file,
|
If rclone gets killed during a long operation on a big composite file,
|
||||||
hidden temporary chunks may stay in the directory. They will not be
|
hidden temporary chunks may stay in the directory. They will not be
|
||||||
shown by the `list` command but will eat up your account quota.
|
shown by the `list` command but will eat up your account quota.
|
||||||
Please note that the `deletefile` rclone command deletes only active
|
Please note that the `deletefile` command deletes only active
|
||||||
chunks of a file. As a workaround, you can use remote of the wrapped
|
chunks of a file. As a workaround, you can use remote of the wrapped
|
||||||
file system to see them.
|
file system to see them.
|
||||||
An easy way to get rid of hidden garbage is to copy littered directory
|
An easy way to get rid of hidden garbage is to copy littered directory
|
||||||
somewhere using the chunker remote and purge original directory.
|
somewhere using the chunker remote and purge the original directory.
|
||||||
The `copy` command will copy only active chunks while the `purge` will
|
The `copy` command will copy only active chunks while the `purge` will
|
||||||
remove everything including garbage.
|
remove everything including garbage.
|
||||||
|
|
||||||
|
@ -260,7 +270,7 @@ Beware that in result of this some files which have been treated as chunks
|
||||||
before the change can pop up in directory listings as normal files
|
before the change can pop up in directory listings as normal files
|
||||||
and vice versa. The same warning holds for the chunk size.
|
and vice versa. The same warning holds for the chunk size.
|
||||||
If you desperately need to change critical chunking setings, you should
|
If you desperately need to change critical chunking setings, you should
|
||||||
run data migration as described in a dedicated section.
|
run data migration as described above.
|
||||||
|
|
||||||
If wrapped remote is case insensitive, the chunker overlay will inherit
|
If wrapped remote is case insensitive, the chunker overlay will inherit
|
||||||
that property (so you can't have a file called "Hello.doc" and "hello.doc"
|
that property (so you can't have a file called "Hello.doc" and "hello.doc"
|
||||||
|
@ -303,11 +313,11 @@ Choose how chunker handles hash sums.
|
||||||
- Examples:
|
- Examples:
|
||||||
- "none"
|
- "none"
|
||||||
- Chunker can pass any hash supported by wrapped remote
|
- Chunker can pass any hash supported by wrapped remote
|
||||||
- for a single-chunk file but returns nothing otherwise.
|
- for non-chunked files but returns nothing otherwise.
|
||||||
- "md5"
|
- "md5"
|
||||||
- MD5 for multi-chunk files. Requires "simplejson".
|
- MD5 for composite files. Requires "simplejson".
|
||||||
- "sha1"
|
- "sha1"
|
||||||
- SHA1 for multi-chunk files. Requires "simplejson".
|
- SHA1 for composite files. Requires "simplejson".
|
||||||
- "md5quick"
|
- "md5quick"
|
||||||
- Copying a file to chunker will request MD5 from the source
|
- Copying a file to chunker will request MD5 from the source
|
||||||
- falling back to SHA1 if unsupported. Requires "simplejson".
|
- falling back to SHA1 if unsupported. Requires "simplejson".
|
||||||
|
@ -316,7 +326,7 @@ Choose how chunker handles hash sums.
|
||||||
|
|
||||||
### Advanced Options
|
### Advanced Options
|
||||||
|
|
||||||
Here are the advanced options specific to chunker (Transparently chunk/split large files).
|
Here are the advanced options specific to chunker.
|
||||||
|
|
||||||
#### --chunker-name-format
|
#### --chunker-name-format
|
||||||
|
|
||||||
|
@ -356,7 +366,7 @@ Metadata is a small JSON file named after the composite file.
|
||||||
- Do not use metadata files at all. Requires hash type "none".
|
- Do not use metadata files at all. Requires hash type "none".
|
||||||
- "simplejson"
|
- "simplejson"
|
||||||
- Simple JSON supports hash sums and chunk validation.
|
- Simple JSON supports hash sums and chunk validation.
|
||||||
- It has the following fields: size, nchunks, md5, sha1.
|
- It has the following fields: ver, size, nchunks, md5, sha1.
|
||||||
|
|
||||||
#### --chunker-fail-on-bad-chunks
|
#### --chunker-fail-on-bad-chunks
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue