Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

File locking module data store #3139

Merged
merged 45 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
c4cad75
Add test for concurrent cache reads + writes
doriable Jul 2, 2024
d4eb36a
Implement file locking on module.yaml in for ModuleDataStore
doriable Jul 3, 2024
a9dffdd
Skip test
doriable Jul 8, 2024
584b8f1
Fix lint
doriable Jul 8, 2024
f1953ef
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 8, 2024
c40e2fa
Updates from conversations + comments
doriable Jul 8, 2024
e9589be
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 8, 2024
89a076d
Close moduleDir before other delete operations
doriable Jul 9, 2024
0e49421
Refactor test away from errgroup
doriable Jul 9, 2024
812da0c
Use a lock file instead of locking on module.yaml
doriable Jul 9, 2024
caaad59
Check module.yaml during PutModules to avoid writing extra data
doriable Jul 9, 2024
1ac3afd
Fix delete invalid to use storage APIs
doriable Jul 9, 2024
ad470e5
Add locker options for filelock.Locker
doriable Jul 9, 2024
05ea4bb
Address comments + fix delete in delete invalid files
doriable Jul 10, 2024
7332a7d
Remove extra comment
doriable Jul 10, 2024
e7bf3cd
Only avoid calling deleteInvalidModuleData if module.yaml was not found
doriable Jul 10, 2024
2b73dc4
Address small comments
doriable Jul 11, 2024
cbf7021
Change `filelocker` as a required arg and adjust tests.
doriable Jul 11, 2024
427e0b5
Remove deleteInvalidModuleData
doriable Jul 11, 2024
f146b60
Add storageos test to module_data_store_test.go
doriable Jul 11, 2024
d162eec
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 11, 2024
8829be0
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 11, 2024
1f6c0d2
Small clean-up
doriable Jul 11, 2024
7dfa33e
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 15, 2024
c44c7ee
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 17, 2024
3916950
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 18, 2024
e98467d
Separate dir for file locking
doriable Jul 18, 2024
6a9416a
Apply suggestions from code review
doriable Jul 25, 2024
b52c371
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 25, 2024
4266bd0
Delete lock files
doriable Jul 25, 2024
d4340a7
Address comments
doriable Jul 25, 2024
baa4f31
Fix lint
doriable Jul 25, 2024
1939643
Remove nil checks for filelocker and use NopLocker when passed nil in…
doriable Jul 25, 2024
ede95e9
Merge branch 'main' into file-locking-module-data-store
bufdev Aug 1, 2024
d5d9178
Address some comments
doriable Aug 1, 2024
5c3baa2
Fix tests
doriable Aug 1, 2024
44f3966
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Aug 8, 2024
2e46398
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Aug 13, 2024
7fa96d1
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Aug 21, 2024
a6c476a
Address comments
doriable Aug 21, 2024
c520685
Unexpand dockerignore glob
doriable Aug 21, 2024
723e85c
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Aug 21, 2024
61a38bb
Add in-line comments to document module read/write processes.
doriable Aug 21, 2024
fbd040f
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Aug 21, 2024
4598d8c
Add comment explaining module.yaml as the source of validity at the m…
doriable Aug 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -281,4 +281,7 @@ issues:
path: private/buf/cmd/buf/buf_test.go
# The LsModules tests call chdir and cannot be parallelized.
text: "LsModules"

- linters:
- forbidigo
# We are using errgroup in a test for concurrent reads/writes to the cache.
path: private/bufpkg/bufmodule/bufmodulecache/bufmodulecache_test.go
doriable marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 6 additions & 0 deletions private/buf/bufcli/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/bufbuild/buf/private/bufpkg/bufmodule/bufmodulestore"
"github.com/bufbuild/buf/private/pkg/app/appext"
"github.com/bufbuild/buf/private/pkg/command"
"github.com/bufbuild/buf/private/pkg/filelock"
"github.com/bufbuild/buf/private/pkg/normalpath"
"github.com/bufbuild/buf/private/pkg/storage/storageos"
)
Expand Down Expand Up @@ -166,12 +167,17 @@ func newModuleDataProvider(
if err != nil {
return nil, err
}
filelocker, err := filelock.NewLocker(fullCacheDirPath)
doriable marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}
return bufmodulecache.NewModuleDataProvider(
container.Logger(),
delegateModuleDataProvider,
bufmodulestore.NewModuleDataStore(
container.Logger(),
cacheBucket,
bufmodulestore.ModuleDataStoreWithFileLocker(filelocker),
doriable marked this conversation as resolved.
Show resolved Hide resolved
),
), nil
}
Expand Down
62 changes: 61 additions & 1 deletion private/bufpkg/bufmodule/bufmodulecache/bufmodulecache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,21 @@ package bufmodulecache

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/bufbuild/buf/private/bufpkg/bufmodule"
"github.com/bufbuild/buf/private/bufpkg/bufmodule/bufmodulestore"
"github.com/bufbuild/buf/private/bufpkg/bufmodule/bufmoduletesting"
"github.com/bufbuild/buf/private/pkg/filelock"
"github.com/bufbuild/buf/private/pkg/slicesext"
"github.com/bufbuild/buf/private/pkg/storage/storagemem"
"github.com/bufbuild/buf/private/pkg/storage/storageos"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
)

func TestCommitProviderForModuleKeyBasic(t *testing.T) {
Expand Down Expand Up @@ -214,6 +220,57 @@ func TestModuleDataProviderBasic(t *testing.T) {
)
}

func TestConcurrentCacheReadWrite(t *testing.T) {
t.Skip("skipping expensive test for concurrent cache reads/writes")
doriable marked this conversation as resolved.
Show resolved Hide resolved
t.Parallel()

bsrProvider, moduleKeys := testGetBSRProviderAndModuleKeys(t, context.Background())
tempDir := t.TempDir()
cacheDir := filepath.Join(tempDir, "cache")

for i := 0; i < 20; i++ {
require.NoError(t, os.MkdirAll(cacheDir, 0755))
errs, ctx := errgroup.WithContext(context.Background())

for j := 0; j < 5; j++ {
bucket, err := storageos.NewProvider().NewReadWriteBucket(cacheDir)
require.NoError(t, err)
filelocker, err := filelock.NewLocker(cacheDir)
doriable marked this conversation as resolved.
Show resolved Hide resolved
require.NoError(t, err)

cacheProvider := newModuleDataProvider(
zap.NewNop(),
doriable marked this conversation as resolved.
Show resolved Hide resolved
bsrProvider,
bufmodulestore.NewModuleDataStore(
zap.NewNop(),
bucket,
bufmodulestore.ModuleDataStoreWithFileLocker(filelocker),
),
)

errs.Go(func() error {
moduleDatas, err := cacheProvider.GetModuleDatasForModuleKeys(
ctx,
moduleKeys,
)
if err != nil {
return err
}
for _, moduleData := range moduleDatas {
// Calling moduleData.Bucket() checks the digest
if _, err := moduleData.Bucket(); err != nil {
return err
}
}
return nil
})
}

assert.NoError(t, errs.Wait()) // Waits for all go routines to finish and returns the first error, if any
require.NoError(t, os.RemoveAll(cacheDir))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no need to do this, and it's somewhat unsafe - temporary directories are automatically cleaned up.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a subdirectory of the temp dir created by the test and we want to clear it after each attempt (to start fresh). This exposed race conditions that wouldn't be hit once we managed to successfully cache everything during an attempt.

}
}

func testGetBSRProviderAndModuleKeys(t *testing.T, ctx context.Context) (bufmoduletesting.OmniProvider, []bufmodule.ModuleKey) {
bsrProvider, err := bufmoduletesting.NewOmniProvider(
bufmoduletesting.ModuleData{
Expand All @@ -235,7 +292,10 @@ func testGetBSRProviderAndModuleKeys(t *testing.T, ctx context.Context) (bufmodu
bufmoduletesting.ModuleData{
Name: "buf.build/foo/mod3",
PathToData: map[string][]byte{
"mod3.proto": []byte(
"mod3a.proto": []byte(
pkwarren marked this conversation as resolved.
Show resolved Hide resolved
`syntax = proto3; package mod3;`,
),
"mod3b.proto": []byte(
`syntax = proto3; package mod3;`,
),
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,6 @@ func newModuleDataProvider(
func (p *moduleDataProvider) GetModuleDatasForModuleKeys(
ctx context.Context,
moduleKeys []bufmodule.ModuleKey,
) ([]bufmodule.ModuleData, error) {
) (_ []bufmodule.ModuleData, retErr error) {
doriable marked this conversation as resolved.
Show resolved Hide resolved
return p.baseProvider.getValuesForKeys(ctx, moduleKeys)
}
79 changes: 59 additions & 20 deletions private/bufpkg/bufmodule/bufmodulestore/module_data_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/bufbuild/buf/private/bufpkg/bufmodule"
"github.com/bufbuild/buf/private/pkg/encoding"
"github.com/bufbuild/buf/private/pkg/filelock"
"github.com/bufbuild/buf/private/pkg/normalpath"
"github.com/bufbuild/buf/private/pkg/slicesext"
"github.com/bufbuild/buf/private/pkg/storage"
Expand Down Expand Up @@ -96,13 +97,27 @@ func ModuleDataStoreWithTar() ModuleDataStoreOption {
}
}

// TODO(doria): clean up GoDoc.
// ModuleDataStoreWithFileLocker returns a new ModuleDataStoreOption that sets a filelock.Locker.
//
// If a filelocker is set, then the module data store will use this to grab a file lock on
// module.yaml to synchronize reading and writing to the cache.
//
// If ModuleDataStoreWithTar is set, then this is ignored.
func ModuleDataStoreWithFileLocker(filelocker filelock.Locker) ModuleDataStoreOption {
return func(moduleDataStore *moduleDataStore) {
moduleDataStore.filelocker = filelocker
}
}

/// *** PRIVATE ***

type moduleDataStore struct {
logger *zap.Logger
bucket storage.ReadWriteBucket

tar bool
tar bool
filelocker filelock.Locker
}

func newModuleDataStore(
Expand Down Expand Up @@ -167,11 +182,26 @@ func (p *moduleDataStore) getModuleDataForModuleKey(
return nil, err
}
} else {
moduleCacheBucket, err = p.getReadWriteBucketForDir(moduleKey)
// Not checking for fs.ErrNotExist. Function only returns error on actual system error.
dirPath, err := getModuleDataStoreDirPath(moduleKey)
if err != nil {
return nil, err
}
p.logDebugModuleKey(
moduleKey,
"module data store dir read write bucket",
zap.String("dirPath", dirPath),
)
moduleCacheBucket = storage.MapReadWriteBucket(p.bucket, storage.MapOnPrefix(dirPath))
// Only attempt to get a file lock when storing individual files
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what this means - what is an "individual file"? It should be clear from the code comments as to what this code does. This may take a paragraph or two of comments.

if p.filelocker != nil {
unlocker, err := p.filelocker.RLock(ctx, dirPath+"/"+externalModuleDataFileName)
if err != nil {
return nil, err
}
defer func() {
retErr = multierr.Append(retErr, unlocker.Unlock())
}()
}
}
defer func() {
if retErr != nil {
Expand Down Expand Up @@ -297,6 +327,15 @@ func (p *moduleDataStore) deleteInvalidModuleData(
if err != nil {
return err
}
if p.filelocker != nil {
unlocker, err := p.filelocker.Lock(ctx, dirPath+"/"+externalModuleDataFileName)
if err != nil {
return err
}
defer func() {
retErr = multierr.Append(retErr, unlocker.Unlock())
doriable marked this conversation as resolved.
Show resolved Hide resolved
}()
}
return p.bucket.DeleteAll(ctx, dirPath)
pkwarren marked this conversation as resolved.
Show resolved Hide resolved
}

Expand All @@ -317,10 +356,26 @@ func (p *moduleDataStore) putModuleData(
}
}()
} else {
moduleCacheBucket, err = p.getReadWriteBucketForDir(moduleKey)
dirPath, err := getModuleDataStoreDirPath(moduleKey)
if err != nil {
return err
}
p.logDebugModuleKey(
moduleKey,
"module data store dir read write bucket",
zap.String("dirPath", dirPath),
)
moduleCacheBucket = storage.MapReadWriteBucket(p.bucket, storage.MapOnPrefix(dirPath))
// Only attempt to get a file lock when storing individual files
if p.filelocker != nil {
unlocker, err := p.filelocker.Lock(ctx, dirPath+"/"+externalModuleDataFileName)
if err != nil {
return err
}
defer func() {
retErr = multierr.Append(retErr, unlocker.Unlock())
}()
}
}
depModuleKeys, err := moduleData.DeclaredDepModuleKeys()
if err != nil {
Expand Down Expand Up @@ -391,22 +446,6 @@ func (p *moduleDataStore) putModuleData(
return storage.PutPath(ctx, moduleCacheBucket, externalModuleDataFileName, data)
doriable marked this conversation as resolved.
Show resolved Hide resolved
}

// Only returns error on actual system error.
func (p *moduleDataStore) getReadWriteBucketForDir(
moduleKey bufmodule.ModuleKey,
) (storage.ReadWriteBucket, error) {
dirPath, err := getModuleDataStoreDirPath(moduleKey)
if err != nil {
return nil, err
}
p.logDebugModuleKey(
moduleKey,
"module data store dir read write bucket",
zap.String("dirPath", dirPath),
)
return storage.MapReadWriteBucket(p.bucket, storage.MapOnPrefix(dirPath)), nil
}

// May return fs.ErrNotExist error if tar not found.
func (p *moduleDataStore) getReadBucketForTar(
ctx context.Context,
Expand Down