fs/cache: fix parent not getting pinned when remote is a file

Before this change, when cache.GetFn was called on a file rather than a
directory, two cache entries would be added (the file + its parent) but only one
of them would get pinned if the caller then called Pin(f). This left the other
one exposed to expiration if the ci.FsCacheExpireDuration was reached. This was
problematic because both entries point to the same Fs, and if one entry expires
while the other is pinned, the Shutdown method gets erroneously called on an Fs
that is still in use.

An example of the problem showed up in the Hasher backend, which uses the
Shutdown method to stop the bolt db used to store hashes. If a command was run
on a Hasher file (ex. `rclone md5sum --download hasher:somelargefile.zip`) and
hashing the file took longer than the --fs-cache-expire-duration (5m by default), the
bolt db was stopped before the hashing operation completed, resulting in an
error.

This change fixes the issue by ensuring that:
1. only one entry is added to the cache (the file's parent, not the file).
2. future lookups correctly find the entry regardless of whether they are called
	with the parent name or one of its children.
3. fs.ErrorIsFile is returned when (and only when) fsString points to a file
	(preserving the fix from 8d5bc7f28b).

Note that f.Root() should always point to the parent dir as of c69eb84573
This commit is contained in:
nielash 2024-03-09 23:36:57 -05:00 committed by Nick Craig-Wood
parent 94997d25d2
commit 9b4b3033da
4 changed files with 197 additions and 27 deletions

105
fs/cache/cache.go vendored
View file

@ -4,6 +4,7 @@ package cache
import (
"context"
"runtime"
"strings"
"sync"
"github.com/rclone/rclone/fs"
@ -15,7 +16,8 @@ var (
once sync.Once // creation
c *cache.Cache
mu sync.Mutex // mutex to protect remap
remap = map[string]string{} // map user supplied names to canonical names
remap = map[string]string{} // map user supplied names to canonical names - [fsString]canonicalName
childParentMap = map[string]string{} // tracks a one-to-many relationship between parent dirs and their direct children files - [child]parent
)
// Create the cache just once
@ -57,6 +59,39 @@ func addMapping(fsString, canonicalName string) {
mu.Unlock()
}
// addChild tracks known file (child) to directory (parent) relationships.
// Note that the canonicalName of a child will always equal that of its parent,
// but not everything with an equal canonicalName is a child.
// It could be an alias or overridden version of a directory.
func addChild(child, parent string) {
if child == parent {
return
}
mu.Lock()
childParentMap[child] = parent
mu.Unlock()
}
// returns true if name is definitely known to be a child (i.e. a file, not a dir).
// returns false if name is a dir or if we don't know.
func isChild(child string) bool {
mu.Lock()
_, found := childParentMap[child]
mu.Unlock()
return found
}
// ensures that we return fs.ErrorIsFile when necessary
func getError(fsString string, err error) error {
if err != nil && err != fs.ErrorIsFile {
return err
}
if isChild(fsString) {
return fs.ErrorIsFile
}
return nil
}
// GetFn gets an fs.Fs named fsString either from the cache or creates
// it afresh with the create function
func GetFn(ctx context.Context, fsString string, create func(ctx context.Context, fsString string) (fs.Fs, error)) (f fs.Fs, err error) {
@ -69,31 +104,39 @@ func GetFn(ctx context.Context, fsString string, create func(ctx context.Context
created = ok
return f, ok, err
})
f, ok := value.(fs.Fs)
if err != nil && err != fs.ErrorIsFile {
if ok {
return f, err // for possible future uses of PutErr
}
return nil, err
}
f = value.(fs.Fs)
// Check we stored the Fs at the canonical name
if created {
canonicalName := fs.ConfigString(f)
if canonicalName != canonicalFsString {
// Note that if err == fs.ErrorIsFile at this moment
// then we can't rename the remote as it will have the
// wrong error status, we need to add a new one.
if err == nil {
if err == nil { // it's a dir
fs.Debugf(nil, "fs cache: renaming cache item %q to be canonical %q", canonicalFsString, canonicalName)
value, found := c.Rename(canonicalFsString, canonicalName)
if found {
f = value.(fs.Fs)
}
addMapping(canonicalFsString, canonicalName)
} else {
fs.Debugf(nil, "fs cache: adding new entry for parent of %q, %q", canonicalFsString, canonicalName)
Put(canonicalName, f)
} else { // it's a file
// the fs we cache is always the file's parent, never the file,
// but we use the childParentMap to return the correct error status based on the fsString passed in.
fs.Debugf(nil, "fs cache: renaming child cache item %q to be canonical for parent %q", canonicalFsString, canonicalName)
value, found := c.Rename(canonicalFsString, canonicalName) // rename the file entry to parent
if found {
f = value.(fs.Fs) // if parent already exists, use it
}
Put(canonicalName, f) // force err == nil for the cache
addMapping(canonicalFsString, canonicalName) // note the fsString-canonicalName connection for future lookups
addChild(fsString, canonicalName) // note the file-directory connection for future lookups
}
}
}
return f, err
return f, getError(fsString, err) // ensure fs.ErrorIsFile is returned when necessary
}
// Pin f into the cache until Unpin is called
@ -111,7 +154,6 @@ func PinUntilFinalized(f fs.Fs, x interface{}) {
runtime.SetFinalizer(x, func(_ interface{}) {
Unpin(f)
})
}
// Unpin f from the cache
@ -174,6 +216,9 @@ func PutErr(fsString string, f fs.Fs, err error) {
canonicalName := fs.ConfigString(f)
c.PutErr(canonicalName, f, err)
addMapping(fsString, canonicalName)
if err == fs.ErrorIsFile {
addChild(fsString, canonicalName)
}
}
// Put puts an fs.Fs named fsString into the cache
@ -186,6 +231,7 @@ func Put(fsString string, f fs.Fs) {
// Returns number of entries deleted
func ClearConfig(name string) (deleted int) {
createOnFirstUse()
ClearMappingsPrefix(name)
return c.DeletePrefix(name + ":")
}
@ -193,6 +239,7 @@ func ClearConfig(name string) (deleted int) {
func Clear() {
createOnFirstUse()
c.Clear()
ClearMappings()
}
// Entries returns the number of entries in the cache
@ -200,3 +247,39 @@ func Entries() int {
createOnFirstUse()
return c.Entries()
}
// ClearMappings removes everything from remap and childParentMap
func ClearMappings() {
mu.Lock()
defer mu.Unlock()
remap = map[string]string{}
childParentMap = map[string]string{}
}
// ClearMappingsPrefix deletes all mappings to parents with given prefix
//
// Returns number of entries deleted
func ClearMappingsPrefix(prefix string) (deleted int) {
mu.Lock()
do := func(mapping map[string]string) {
for key, val := range mapping {
if !strings.HasPrefix(val, prefix) {
continue
}
delete(mapping, key)
deleted++
}
}
do(remap)
do(childParentMap)
mu.Unlock()
return deleted
}
// EntriesWithPinCount returns the number of pinned and unpinned entries in the cache
//
// Each entry is counted only once, regardless of entry.pinCount
func EntriesWithPinCount() (pinned, unpinned int) {
createOnFirstUse()
return c.EntriesWithPinCount()
}

View file

@ -24,7 +24,7 @@ func mockNewFs(t *testing.T) func(ctx context.Context, path string) (fs.Fs, erro
switch path {
case "mock:/":
return mockfs.NewFs(ctx, "mock", "/", nil)
case "mock:/file.txt", "mock:file.txt":
case "mock:/file.txt", "mock:file.txt", "mock:/file2.txt", "mock:file2.txt":
fMock, err := mockfs.NewFs(ctx, "mock", "/", nil)
require.NoError(t, err)
return fMock, fs.ErrorIsFile
@ -55,6 +55,7 @@ func TestGet(t *testing.T) {
}
func TestGetFile(t *testing.T) {
defer ClearMappings()
create := mockNewFs(t)
assert.Equal(t, 0, Entries())
@ -63,7 +64,7 @@ func TestGetFile(t *testing.T) {
require.Equal(t, fs.ErrorIsFile, err)
require.NotNil(t, f)
assert.Equal(t, 2, Entries())
assert.Equal(t, 1, Entries())
f2, err := GetFn(context.Background(), "mock:/file.txt", create)
require.Equal(t, fs.ErrorIsFile, err)
@ -71,7 +72,7 @@ func TestGetFile(t *testing.T) {
assert.Equal(t, f, f2)
// check parent is there too
// check it is also found when referred to by parent name
f2, err = GetFn(context.Background(), "mock:/", create)
require.Nil(t, err)
require.NotNil(t, f2)
@ -80,6 +81,7 @@ func TestGetFile(t *testing.T) {
}
func TestGetFile2(t *testing.T) {
defer ClearMappings()
create := mockNewFs(t)
assert.Equal(t, 0, Entries())
@ -88,7 +90,7 @@ func TestGetFile2(t *testing.T) {
require.Equal(t, fs.ErrorIsFile, err)
require.NotNil(t, f)
assert.Equal(t, 2, Entries())
assert.Equal(t, 1, Entries())
f2, err := GetFn(context.Background(), "mock:file.txt", create)
require.Equal(t, fs.ErrorIsFile, err)
@ -96,7 +98,7 @@ func TestGetFile2(t *testing.T) {
assert.Equal(t, f, f2)
// check parent is there too
// check it is also found when referred to by parent name
f2, err = GetFn(context.Background(), "mock:/", create)
require.Nil(t, err)
require.NotNil(t, f2)
@ -124,22 +126,22 @@ func TestPutErr(t *testing.T) {
assert.Equal(t, 0, Entries())
PutErr("mock:file.txt", f, fs.ErrorIsFile)
PutErr("mock:/", f, fs.ErrorNotFoundInConfigFile)
assert.Equal(t, 1, Entries())
fNew, err := GetFn(context.Background(), "mock:file.txt", create)
require.Equal(t, fs.ErrorIsFile, err)
fNew, err := GetFn(context.Background(), "mock:/", create)
require.Equal(t, fs.ErrorNotFoundInConfigFile, err)
require.Equal(t, f, fNew)
assert.Equal(t, 1, Entries())
// Check canonicalisation
PutErr("mock:/file.txt", f, fs.ErrorIsFile)
PutErr("mock:/file.txt", f, fs.ErrorNotFoundInConfigFile)
fNew, err = GetFn(context.Background(), "mock:/file.txt", create)
require.Equal(t, fs.ErrorIsFile, err)
require.Equal(t, fs.ErrorNotFoundInConfigFile, err)
require.Equal(t, f, fNew)
assert.Equal(t, 1, Entries())
@ -190,6 +192,75 @@ func TestPin(t *testing.T) {
Unpin(f2)
}
func TestPinFile(t *testing.T) {
defer ClearMappings()
create := mockNewFs(t)
// Test pinning and unpinning nonexistent
f, err := mockfs.NewFs(context.Background(), "mock", "/file.txt", nil)
require.NoError(t, err)
Pin(f)
Unpin(f)
// Now test pinning an existing
f2, err := GetFn(context.Background(), "mock:/file.txt", create)
require.Equal(t, fs.ErrorIsFile, err)
assert.Equal(t, 1, len(childParentMap))
Pin(f2)
assert.Equal(t, 1, Entries())
pinned, unpinned := EntriesWithPinCount()
assert.Equal(t, 1, pinned)
assert.Equal(t, 0, unpinned)
Unpin(f2)
assert.Equal(t, 1, Entries())
pinned, unpinned = EntriesWithPinCount()
assert.Equal(t, 0, pinned)
assert.Equal(t, 1, unpinned)
// try a different child of the same parent, and parent
// should not add additional cache items
called = 0 // this one does create() because we haven't seen it before and don't yet know it's a file
f3, err := GetFn(context.Background(), "mock:/file2.txt", create)
assert.Equal(t, fs.ErrorIsFile, err)
assert.Equal(t, 1, Entries())
assert.Equal(t, 2, len(childParentMap))
parent, err := GetFn(context.Background(), "mock:/", create)
assert.NoError(t, err)
assert.Equal(t, 1, Entries())
assert.Equal(t, 2, len(childParentMap))
Pin(f3)
assert.Equal(t, 1, Entries())
pinned, unpinned = EntriesWithPinCount()
assert.Equal(t, 1, pinned)
assert.Equal(t, 0, unpinned)
Unpin(f3)
assert.Equal(t, 1, Entries())
pinned, unpinned = EntriesWithPinCount()
assert.Equal(t, 0, pinned)
assert.Equal(t, 1, unpinned)
Pin(parent)
assert.Equal(t, 1, Entries())
pinned, unpinned = EntriesWithPinCount()
assert.Equal(t, 1, pinned)
assert.Equal(t, 0, unpinned)
Unpin(parent)
assert.Equal(t, 1, Entries())
pinned, unpinned = EntriesWithPinCount()
assert.Equal(t, 0, pinned)
assert.Equal(t, 1, unpinned)
// all 3 should have equal configstrings
assert.Equal(t, fs.ConfigString(f2), fs.ConfigString(f3))
assert.Equal(t, fs.ConfigString(f2), fs.ConfigString(parent))
}
func TestClearConfig(t *testing.T) {
create := mockNewFs(t)
@ -198,9 +269,9 @@ func TestClearConfig(t *testing.T) {
_, err := GetFn(context.Background(), "mock:/file.txt", create)
require.Equal(t, fs.ErrorIsFile, err)
assert.Equal(t, 2, Entries()) // file + parent
assert.Equal(t, 1, Entries())
assert.Equal(t, 2, ClearConfig("mock"))
assert.Equal(t, 1, ClearConfig("mock"))
assert.Equal(t, 0, Entries())
}

View file

@ -22,7 +22,7 @@ func mockNewFs(t *testing.T) func() {
require.NoError(t, err)
cache.Put("mock:/", f)
cache.Put(":mock:/", f)
f, err = mockfs.NewFs(ctx, "mock", "dir/file.txt", nil)
f, err = mockfs.NewFs(ctx, "mock", "dir/", nil)
require.NoError(t, err)
cache.PutErr("mock:dir/file.txt", f, fs.ErrorIsFile)
return func() {

16
lib/cache/cache.go vendored
View file

@ -260,3 +260,19 @@ func (c *Cache) SetFinalizer(finalize func(interface{})) {
c.finalize = finalize
c.mu.Unlock()
}
// EntriesWithPinCount returns the number of pinned and unpinned entries in the cache
//
// Each entry is counted only once, regardless of entry.pinCount
func (c *Cache) EntriesWithPinCount() (pinned, unpinned int) {
c.mu.Lock()
for _, entry := range c.cache {
if entry.pinCount <= 0 {
unpinned++
} else {
pinned++
}
}
c.mu.Unlock()
return pinned, unpinned
}