From 9b4b3033da25ec41aabb35b858a765dd734a5f09 Mon Sep 17 00:00:00 2001 From: nielash Date: Sat, 9 Mar 2024 23:36:57 -0500 Subject: [PATCH] fs/cache: fix parent not getting pinned when remote is a file Before this change, when cache.GetFn was called on a file rather than a directory, two cache entries would be added (the file + its parent) but only one of them would get pinned if the caller then called Pin(f). This left the other one exposed to expiration if the ci.FsCacheExpireDuration was reached. This was problematic because both entries point to the same Fs, and if one entry expires while the other is pinned, the Shutdown method gets erroneously called on an Fs that is still in use. An example of the problem showed up in the Hasher backend, which uses the Shutdown method to stop the bolt db used to store hashes. If a command was run on a Hasher file (ex. `rclone md5sum --download hasher:somelargefile.zip`) and hashing the file took longer than the --fs-cache-expire-duration (5m by default), the bolt db was stopped before the hashing operation completed, resulting in an error. This change fixes the issue by ensuring that: 1. only one entry is added to the cache (the file's parent, not the file). 2. future lookups correctly find the entry regardless of whether they are called with the parent name or one of its children. 3. fs.ErrorIsFile is returned when (and only when) fsString points to a file (preserving the fix from https://github.com/rclone/rclone/commit/8d5bc7f28b8459a0e04fdbfd2dca43c5d9ec4555). Note that f.Root() should always point to the parent dir as of https://github.com/rclone/rclone/commit/c69eb84573c85206ab028eda2987180e049ef2e4 --- fs/cache/cache.go | 111 +++++++++++++++++++++++++++++++++++------ fs/cache/cache_test.go | 95 ++++++++++++++++++++++++++++++----- fs/rc/cache_test.go | 2 +- lib/cache/cache.go | 16 ++++++ 4 files changed, 197 insertions(+), 27 deletions(-) diff --git a/fs/cache/cache.go b/fs/cache/cache.go index ae86ac082..e2c104395 100644 --- a/fs/cache/cache.go +++ b/fs/cache/cache.go @@ -4,6 +4,7 @@ package cache import ( "context" "runtime" + "strings" "sync" "github.com/rclone/rclone/fs" @@ -12,10 +13,11 @@ import ( ) var ( - once sync.Once // creation - c *cache.Cache - mu sync.Mutex // mutex to protect remap - remap = map[string]string{} // map user supplied names to canonical names + once sync.Once // creation + c *cache.Cache + mu sync.Mutex // mutex to protect remap + remap = map[string]string{} // map user supplied names to canonical names - [fsString]canonicalName + childParentMap = map[string]string{} // tracks a one-to-many relationship between parent dirs and their direct children files - [child]parent ) // Create the cache just once @@ -57,6 +59,39 @@ func addMapping(fsString, canonicalName string) { mu.Unlock() } +// addChild tracks known file (child) to directory (parent) relationships. +// Note that the canonicalName of a child will always equal that of its parent, +// but not everything with an equal canonicalName is a child. +// It could be an alias or overridden version of a directory. +func addChild(child, parent string) { + if child == parent { + return + } + mu.Lock() + childParentMap[child] = parent + mu.Unlock() +} + +// returns true if name is definitely known to be a child (i.e. a file, not a dir). +// returns false if name is a dir or if we don't know. +func isChild(child string) bool { + mu.Lock() + _, found := childParentMap[child] + mu.Unlock() + return found +} + +// ensures that we return fs.ErrorIsFile when necessary +func getError(fsString string, err error) error { + if err != nil && err != fs.ErrorIsFile { + return err + } + if isChild(fsString) { + return fs.ErrorIsFile + } + return nil +} + // GetFn gets an fs.Fs named fsString either from the cache or creates // it afresh with the create function func GetFn(ctx context.Context, fsString string, create func(ctx context.Context, fsString string) (fs.Fs, error)) (f fs.Fs, err error) { @@ -69,31 +104,39 @@ func GetFn(ctx context.Context, fsString string, create func(ctx context.Context created = ok return f, ok, err }) + f, ok := value.(fs.Fs) if err != nil && err != fs.ErrorIsFile { + if ok { + return f, err // for possible future uses of PutErr + } return nil, err } - f = value.(fs.Fs) // Check we stored the Fs at the canonical name if created { canonicalName := fs.ConfigString(f) if canonicalName != canonicalFsString { - // Note that if err == fs.ErrorIsFile at this moment - // then we can't rename the remote as it will have the - // wrong error status, we need to add a new one. - if err == nil { + if err == nil { // it's a dir fs.Debugf(nil, "fs cache: renaming cache item %q to be canonical %q", canonicalFsString, canonicalName) value, found := c.Rename(canonicalFsString, canonicalName) if found { f = value.(fs.Fs) } addMapping(canonicalFsString, canonicalName) - } else { - fs.Debugf(nil, "fs cache: adding new entry for parent of %q, %q", canonicalFsString, canonicalName) - Put(canonicalName, f) + } else { // it's a file + // the fs we cache is always the file's parent, never the file, + // but we use the childParentMap to return the correct error status based on the fsString passed in. + fs.Debugf(nil, "fs cache: renaming child cache item %q to be canonical for parent %q", canonicalFsString, canonicalName) + value, found := c.Rename(canonicalFsString, canonicalName) // rename the file entry to parent + if found { + f = value.(fs.Fs) // if parent already exists, use it + } + Put(canonicalName, f) // force err == nil for the cache + addMapping(canonicalFsString, canonicalName) // note the fsString-canonicalName connection for future lookups + addChild(fsString, canonicalName) // note the file-directory connection for future lookups } } } - return f, err + return f, getError(fsString, err) // ensure fs.ErrorIsFile is returned when necessary } // Pin f into the cache until Unpin is called @@ -111,7 +154,6 @@ func PinUntilFinalized(f fs.Fs, x interface{}) { runtime.SetFinalizer(x, func(_ interface{}) { Unpin(f) }) - } // Unpin f from the cache @@ -174,6 +216,9 @@ func PutErr(fsString string, f fs.Fs, err error) { canonicalName := fs.ConfigString(f) c.PutErr(canonicalName, f, err) addMapping(fsString, canonicalName) + if err == fs.ErrorIsFile { + addChild(fsString, canonicalName) + } } // Put puts an fs.Fs named fsString into the cache @@ -186,6 +231,7 @@ func Put(fsString string, f fs.Fs) { // Returns number of entries deleted func ClearConfig(name string) (deleted int) { createOnFirstUse() + ClearMappingsPrefix(name) return c.DeletePrefix(name + ":") } @@ -193,6 +239,7 @@ func ClearConfig(name string) (deleted int) { func Clear() { createOnFirstUse() c.Clear() + ClearMappings() } // Entries returns the number of entries in the cache @@ -200,3 +247,39 @@ func Entries() int { createOnFirstUse() return c.Entries() } + +// ClearMappings removes everything from remap and childParentMap +func ClearMappings() { + mu.Lock() + defer mu.Unlock() + remap = map[string]string{} + childParentMap = map[string]string{} +} + +// ClearMappingsPrefix deletes all mappings to parents with given prefix +// +// Returns number of entries deleted +func ClearMappingsPrefix(prefix string) (deleted int) { + mu.Lock() + do := func(mapping map[string]string) { + for key, val := range mapping { + if !strings.HasPrefix(val, prefix) { + continue + } + delete(mapping, key) + deleted++ + } + } + do(remap) + do(childParentMap) + mu.Unlock() + return deleted +} + +// EntriesWithPinCount returns the number of pinned and unpinned entries in the cache +// +// Each entry is counted only once, regardless of entry.pinCount +func EntriesWithPinCount() (pinned, unpinned int) { + createOnFirstUse() + return c.EntriesWithPinCount() +} diff --git a/fs/cache/cache_test.go b/fs/cache/cache_test.go index acddac80c..d9d54fa2e 100644 --- a/fs/cache/cache_test.go +++ b/fs/cache/cache_test.go @@ -24,7 +24,7 @@ func mockNewFs(t *testing.T) func(ctx context.Context, path string) (fs.Fs, erro switch path { case "mock:/": return mockfs.NewFs(ctx, "mock", "/", nil) - case "mock:/file.txt", "mock:file.txt": + case "mock:/file.txt", "mock:file.txt", "mock:/file2.txt", "mock:file2.txt": fMock, err := mockfs.NewFs(ctx, "mock", "/", nil) require.NoError(t, err) return fMock, fs.ErrorIsFile @@ -55,6 +55,7 @@ func TestGet(t *testing.T) { } func TestGetFile(t *testing.T) { + defer ClearMappings() create := mockNewFs(t) assert.Equal(t, 0, Entries()) @@ -63,7 +64,7 @@ func TestGetFile(t *testing.T) { require.Equal(t, fs.ErrorIsFile, err) require.NotNil(t, f) - assert.Equal(t, 2, Entries()) + assert.Equal(t, 1, Entries()) f2, err := GetFn(context.Background(), "mock:/file.txt", create) require.Equal(t, fs.ErrorIsFile, err) @@ -71,7 +72,7 @@ func TestGetFile(t *testing.T) { assert.Equal(t, f, f2) - // check parent is there too + // check it is also found when referred to by parent name f2, err = GetFn(context.Background(), "mock:/", create) require.Nil(t, err) require.NotNil(t, f2) @@ -80,6 +81,7 @@ func TestGetFile(t *testing.T) { } func TestGetFile2(t *testing.T) { + defer ClearMappings() create := mockNewFs(t) assert.Equal(t, 0, Entries()) @@ -88,7 +90,7 @@ func TestGetFile2(t *testing.T) { require.Equal(t, fs.ErrorIsFile, err) require.NotNil(t, f) - assert.Equal(t, 2, Entries()) + assert.Equal(t, 1, Entries()) f2, err := GetFn(context.Background(), "mock:file.txt", create) require.Equal(t, fs.ErrorIsFile, err) @@ -96,7 +98,7 @@ func TestGetFile2(t *testing.T) { assert.Equal(t, f, f2) - // check parent is there too + // check it is also found when referred to by parent name f2, err = GetFn(context.Background(), "mock:/", create) require.Nil(t, err) require.NotNil(t, f2) @@ -124,22 +126,22 @@ func TestPutErr(t *testing.T) { assert.Equal(t, 0, Entries()) - PutErr("mock:file.txt", f, fs.ErrorIsFile) + PutErr("mock:/", f, fs.ErrorNotFoundInConfigFile) assert.Equal(t, 1, Entries()) - fNew, err := GetFn(context.Background(), "mock:file.txt", create) - require.Equal(t, fs.ErrorIsFile, err) + fNew, err := GetFn(context.Background(), "mock:/", create) + require.Equal(t, fs.ErrorNotFoundInConfigFile, err) require.Equal(t, f, fNew) assert.Equal(t, 1, Entries()) // Check canonicalisation - PutErr("mock:/file.txt", f, fs.ErrorIsFile) + PutErr("mock:/file.txt", f, fs.ErrorNotFoundInConfigFile) fNew, err = GetFn(context.Background(), "mock:/file.txt", create) - require.Equal(t, fs.ErrorIsFile, err) + require.Equal(t, fs.ErrorNotFoundInConfigFile, err) require.Equal(t, f, fNew) assert.Equal(t, 1, Entries()) @@ -190,6 +192,75 @@ func TestPin(t *testing.T) { Unpin(f2) } +func TestPinFile(t *testing.T) { + defer ClearMappings() + create := mockNewFs(t) + + // Test pinning and unpinning nonexistent + f, err := mockfs.NewFs(context.Background(), "mock", "/file.txt", nil) + require.NoError(t, err) + Pin(f) + Unpin(f) + + // Now test pinning an existing + f2, err := GetFn(context.Background(), "mock:/file.txt", create) + require.Equal(t, fs.ErrorIsFile, err) + assert.Equal(t, 1, len(childParentMap)) + + Pin(f2) + assert.Equal(t, 1, Entries()) + pinned, unpinned := EntriesWithPinCount() + assert.Equal(t, 1, pinned) + assert.Equal(t, 0, unpinned) + + Unpin(f2) + assert.Equal(t, 1, Entries()) + pinned, unpinned = EntriesWithPinCount() + assert.Equal(t, 0, pinned) + assert.Equal(t, 1, unpinned) + + // try a different child of the same parent, and parent + // should not add additional cache items + called = 0 // this one does create() because we haven't seen it before and don't yet know it's a file + f3, err := GetFn(context.Background(), "mock:/file2.txt", create) + assert.Equal(t, fs.ErrorIsFile, err) + assert.Equal(t, 1, Entries()) + assert.Equal(t, 2, len(childParentMap)) + + parent, err := GetFn(context.Background(), "mock:/", create) + assert.NoError(t, err) + assert.Equal(t, 1, Entries()) + assert.Equal(t, 2, len(childParentMap)) + + Pin(f3) + assert.Equal(t, 1, Entries()) + pinned, unpinned = EntriesWithPinCount() + assert.Equal(t, 1, pinned) + assert.Equal(t, 0, unpinned) + + Unpin(f3) + assert.Equal(t, 1, Entries()) + pinned, unpinned = EntriesWithPinCount() + assert.Equal(t, 0, pinned) + assert.Equal(t, 1, unpinned) + + Pin(parent) + assert.Equal(t, 1, Entries()) + pinned, unpinned = EntriesWithPinCount() + assert.Equal(t, 1, pinned) + assert.Equal(t, 0, unpinned) + + Unpin(parent) + assert.Equal(t, 1, Entries()) + pinned, unpinned = EntriesWithPinCount() + assert.Equal(t, 0, pinned) + assert.Equal(t, 1, unpinned) + + // all 3 should have equal configstrings + assert.Equal(t, fs.ConfigString(f2), fs.ConfigString(f3)) + assert.Equal(t, fs.ConfigString(f2), fs.ConfigString(parent)) +} + func TestClearConfig(t *testing.T) { create := mockNewFs(t) @@ -198,9 +269,9 @@ func TestClearConfig(t *testing.T) { _, err := GetFn(context.Background(), "mock:/file.txt", create) require.Equal(t, fs.ErrorIsFile, err) - assert.Equal(t, 2, Entries()) // file + parent + assert.Equal(t, 1, Entries()) - assert.Equal(t, 2, ClearConfig("mock")) + assert.Equal(t, 1, ClearConfig("mock")) assert.Equal(t, 0, Entries()) } diff --git a/fs/rc/cache_test.go b/fs/rc/cache_test.go index 0a351cee7..15952fa04 100644 --- a/fs/rc/cache_test.go +++ b/fs/rc/cache_test.go @@ -22,7 +22,7 @@ func mockNewFs(t *testing.T) func() { require.NoError(t, err) cache.Put("mock:/", f) cache.Put(":mock:/", f) - f, err = mockfs.NewFs(ctx, "mock", "dir/file.txt", nil) + f, err = mockfs.NewFs(ctx, "mock", "dir/", nil) require.NoError(t, err) cache.PutErr("mock:dir/file.txt", f, fs.ErrorIsFile) return func() { diff --git a/lib/cache/cache.go b/lib/cache/cache.go index fc73eab68..6f841d182 100644 --- a/lib/cache/cache.go +++ b/lib/cache/cache.go @@ -260,3 +260,19 @@ func (c *Cache) SetFinalizer(finalize func(interface{})) { c.finalize = finalize c.mu.Unlock() } + +// EntriesWithPinCount returns the number of pinned and unpinned entries in the cache +// +// Each entry is counted only once, regardless of entry.pinCount +func (c *Cache) EntriesWithPinCount() (pinned, unpinned int) { + c.mu.Lock() + for _, entry := range c.cache { + if entry.pinCount <= 0 { + unpinned++ + } else { + pinned++ + } + } + c.mu.Unlock() + return pinned, unpinned +}