Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

File locking module data store #3139

Merged
merged 45 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
c4cad75
Add test for concurrent cache reads + writes
doriable Jul 2, 2024
d4eb36a
Implement file locking on module.yaml in for ModuleDataStore
doriable Jul 3, 2024
a9dffdd
Skip test
doriable Jul 8, 2024
584b8f1
Fix lint
doriable Jul 8, 2024
f1953ef
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 8, 2024
c40e2fa
Updates from conversations + comments
doriable Jul 8, 2024
e9589be
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 8, 2024
89a076d
Close moduleDir before other delete operations
doriable Jul 9, 2024
0e49421
Refactor test away from errgroup
doriable Jul 9, 2024
812da0c
Use a lock file instead of locking on module.yaml
doriable Jul 9, 2024
caaad59
Check module.yaml during PutModules to avoid writing extra data
doriable Jul 9, 2024
1ac3afd
Fix delete invalid to use storage APIs
doriable Jul 9, 2024
ad470e5
Add locker options for filelock.Locker
doriable Jul 9, 2024
05ea4bb
Address comments + fix delete in delete invalid files
doriable Jul 10, 2024
7332a7d
Remove extra comment
doriable Jul 10, 2024
e7bf3cd
Only avoid calling deleteInvalidModuleData if module.yaml was not found
doriable Jul 10, 2024
2b73dc4
Address small comments
doriable Jul 11, 2024
cbf7021
Change `filelocker` as a required arg and adjust tests.
doriable Jul 11, 2024
427e0b5
Remove deleteInvalidModuleData
doriable Jul 11, 2024
f146b60
Add storageos test to module_data_store_test.go
doriable Jul 11, 2024
d162eec
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 11, 2024
8829be0
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 11, 2024
1f6c0d2
Small clean-up
doriable Jul 11, 2024
7dfa33e
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 15, 2024
c44c7ee
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 17, 2024
3916950
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 18, 2024
e98467d
Separate dir for file locking
doriable Jul 18, 2024
6a9416a
Apply suggestions from code review
doriable Jul 25, 2024
b52c371
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Jul 25, 2024
4266bd0
Delete lock files
doriable Jul 25, 2024
d4340a7
Address comments
doriable Jul 25, 2024
baa4f31
Fix lint
doriable Jul 25, 2024
1939643
Remove nil checks for filelocker and use NopLocker when passed nil in…
doriable Jul 25, 2024
ede95e9
Merge branch 'main' into file-locking-module-data-store
bufdev Aug 1, 2024
d5d9178
Address some comments
doriable Aug 1, 2024
5c3baa2
Fix tests
doriable Aug 1, 2024
44f3966
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Aug 8, 2024
2e46398
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Aug 13, 2024
7fa96d1
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Aug 21, 2024
a6c476a
Address comments
doriable Aug 21, 2024
c520685
Unexpand dockerignore glob
doriable Aug 21, 2024
723e85c
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Aug 21, 2024
61a38bb
Add in-line comments to document module read/write processes.
doriable Aug 21, 2024
fbd040f
Merge remote-tracking branch 'origin/main' into file-locking-module-d…
doriable Aug 21, 2024
4598d8c
Add comment explaining module.yaml as the source of validity at the m…
doriable Aug 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions private/bufpkg/bufmodule/bufmodulecache/bufmodulecache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ package bufmodulecache

import (
"context"
"fmt"
"os"
"path/filepath"
"testing"
"time"

"github.com/bufbuild/buf/private/bufpkg/bufmodule"
"github.com/bufbuild/buf/private/bufpkg/bufmodule/bufmodulestore"
Expand Down Expand Up @@ -232,12 +234,16 @@ func TestConcurrentCacheReadWrite(t *testing.T) {
require.NoError(t, os.MkdirAll(cacheDir, 0755))
jobs, err := slicesext.MapError(
[]int{0, 1, 2, 3, 4},
func(_ int) (func(ctx context.Context) error, error) {
func(i int) (func(ctx context.Context) error, error) {
logger := logger.Named(fmt.Sprintf("job-%d", i))
bucket, err := storageos.NewProvider().NewReadWriteBucket(cacheDir)
if err != nil {
return nil, err
}
filelocker, err := filelock.NewLocker(cacheDir)
filelocker, err := filelock.NewLocker(
cacheDir,
filelock.LockerWithLockRetryDelay(10*time.Millisecond),
doriable marked this conversation as resolved.
Show resolved Hide resolved
)
if err != nil {
return nil, err
}
Expand Down
48 changes: 20 additions & 28 deletions private/bufpkg/bufmodule/bufmodulestore/module_data_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"errors"
"fmt"
"io/fs"
"os"

"github.com/bufbuild/buf/private/bufpkg/bufmodule"
"github.com/bufbuild/buf/private/pkg/encoding"
Expand Down Expand Up @@ -345,35 +344,29 @@ func (p *moduleDataStore) deleteInvalidModuleData(
}
}()
}
moduleDir, err := os.Open(dirPath)
if err != nil {
return err
}
defer func() {
// If the moduleDir was already closed, that is fine
if err := moduleDir.Close(); err != nil && err != fs.ErrClosed {
p.logger.Debug("failed_delete_close_module_dir", zap.Error(err))
retErr = multierr.Append(retErr, err)
var deleted map[string]struct{}
doriable marked this conversation as resolved.
Show resolved Hide resolved
if err := p.bucket.Walk(ctx, dirPath, func(objectInfo storage.ObjectInfo) error {
components := normalpath.Components(objectInfo.Path())
if len(components) < 1 {
return fmt.Errorf("invalid object info path: %s", objectInfo.Path())
}
}()
fileNames, err := moduleDir.Readdirnames(-1)
if err != nil {
return err
}
// Close the moduleDir first before doing other operations
if err := moduleDir.Close(); err != nil {
return err
}
// Delete all contents except the lock file
for _, fileName := range fileNames {
if fileName != externalModuleDataLockFileName {
if err := p.bucket.Delete(ctx, dirPath+"/"+fileName); err != nil {
return err
}
topLevelPath := components[0]
if topLevelPath == externalModuleDataLockFileName {
return nil
}
if _, ok := deleted[topLevelPath]; ok {
return nil
}
if err := p.bucket.DeleteAll(ctx, topLevelPath); err != nil {
return err
}
deleted[topLevelPath] = struct{}{}
return nil
}); err != nil {
return err
}
// Delete the lock file
return p.bucket.Delete(ctx, dirPath+"/"+externalModuleDataLockFileName)
// Delete the entire dirPath
return p.bucket.DeleteAll(ctx, dirPath)
pkwarren marked this conversation as resolved.
Show resolved Hide resolved
}

func (p *moduleDataStore) putModuleData(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's really difficult to understand what the code in here is trying to accomplish now. We need specific documentation as we go, and comment within explaining what this code block is trying to accomplish. Something like "we're going to first check for X, if that's not the case, we're going to do this overwrite like this, etc etc".

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So getModuleDataForModuleKey currently only does read operations, and each step of that read is documented... we can add more documentation, but I need some clarification on which parts.

As the last comment in getModuleDataForModuleKey addresses, validity of content is always determined by module.yaml, and that is always expected to be written last.

Any error and or invalid module.yaml is returned as an error, and then handled by fetching new data and putting it into the cache. There is nothing being overwritten/changed by getModuleDataForModuleKey, it is only reading the cache.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you point me to the documentation? I can't follow what is going on here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see anywhere where invalid data is cleaned up anymore. Perhaps I missed it. What happens if I have invalid data, say file "a.proto", and the valid data is only the file "b.proto", both written to same cache directory...will "a.proto" not be there anymore? Basically, I don't understand how the deleteInvalidModuleData case is now handled. It appears to only be handled for tar files (getModuleDataForModuleKey calls bucket.Delete).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the validity of the contents of v3/modules/<digest_type>/<module_key> is determined by the presence of a valid module.yaml file. As documented at the end of getModuleDataForModuleKey, we rely on module.yaml to be written last.

So the case you're describing, where there is an invalid a.proto but a valid b.proto, this occurs because of some interruption to the process while writing a.proto. In which case, no module.yaml would be written, this would not be considered valid, and then new data is fetched, etc.

Expand Down Expand Up @@ -517,7 +510,6 @@ func (p *moduleDataStore) putModuleData(
ctx,
filesBucket,
storage.MapWriteBucket(moduleCacheBucket, storage.MapOnPrefix(externalModuleDataFilesDir)),
storage.CopyWithAtomic(),
); err != nil {
return err
}
Expand Down
28 changes: 26 additions & 2 deletions private/pkg/filelock/filelock.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,32 @@ type Locker interface {
//
// The root directory path should generally be a data directory path.
// The root directory must exist.
func NewLocker(rootDirPath string) (Locker, error) {
return newLocker(rootDirPath)
func NewLocker(rootDirPath string, options ...LockerOption) (Locker, error) {
return newLocker(rootDirPath, options...)
}

// LockerOption is an option for a new Locker.
type LockerOption func(*lockerOptions)

// LockerWithLockTimeout sets the default lock timeout for the Locker.
//
// If Lock/RLock is called with LockWithTimeout, that will override this default timeout.
// If this is not set, the default lock timeout is 3s.
doriable marked this conversation as resolved.
Show resolved Hide resolved
func LockerWithLockTimeout(lockTimeout time.Duration) LockerOption {
return func(lockerOptions *lockerOptions) {
lockerOptions.lockTimeout = lockTimeout
}
}

// LockerWithLockRetryDelay sets the default lock retry delay for the Locker.
//
// If Lock/RLock is called with LockWithRetryDelay, that will override the default lock
// retry delay.
// If this is not set, the default lock retry delay is 200ms.
func LockerWithLockRetryDelay(lockRetryDelay time.Duration) LockerOption {
return func(lockerOptions *lockerOptions) {
lockerOptions.lockRetryDelay = lockRetryDelay
}
}

// LockOption is an option for lock.
Expand Down
43 changes: 39 additions & 4 deletions private/pkg/filelock/locker.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,18 @@ import (
"context"
"fmt"
"os"
"time"

"github.com/bufbuild/buf/private/pkg/normalpath"
)

type locker struct {
rootDirPath string
rootDirPath string
lockTimeout time.Duration
lockRetryDelay time.Duration
}

func newLocker(rootDirPath string) (*locker, error) {
func newLocker(rootDirPath string, options ...LockerOption) (*locker, error) {
// allow symlinks
fileInfo, err := os.Stat(normalpath.Unnormalize(rootDirPath))
if err != nil {
Expand All @@ -35,16 +38,29 @@ func newLocker(rootDirPath string) (*locker, error) {
if !fileInfo.IsDir() {
return nil, fmt.Errorf("%q is not a directory", rootDirPath)
}
lockerOptions := newLockerOptions()
for _, option := range options {
option(lockerOptions)
}
return &locker{
// do not validate - allow anything including absolute paths and jumping context
rootDirPath: normalpath.Normalize(rootDirPath),
rootDirPath: normalpath.Normalize(rootDirPath),
lockTimeout: lockerOptions.lockTimeout,
lockRetryDelay: lockerOptions.lockRetryDelay,
}, nil
}

func (l *locker) Lock(ctx context.Context, path string, options ...LockOption) (Unlocker, error) {
if err := validatePath(path); err != nil {
return nil, err
}
options = append(
doriable marked this conversation as resolved.
Show resolved Hide resolved
[]LockOption{
LockWithTimeout(l.lockTimeout),
LockWithRetryDelay(l.lockRetryDelay),
},
options..., // Any additional options set will be applied last
)
return lock(
ctx,
normalpath.Unnormalize(normalpath.Join(l.rootDirPath, path)),
Expand All @@ -56,10 +72,17 @@ func (l *locker) RLock(ctx context.Context, path string, options ...LockOption)
if err := validatePath(path); err != nil {
return nil, err
}
options = append(
[]LockOption{
LockWithTimeout(l.lockTimeout),
LockWithRetryDelay(l.lockRetryDelay),
},
options..., // Any additional options set will be applied last
)
return rlock(
ctx,
normalpath.Unnormalize(normalpath.Join(l.rootDirPath, path)),
options...,
options..., // Any additional options set will be applied last
doriable marked this conversation as resolved.
Show resolved Hide resolved
)
}

Expand All @@ -74,3 +97,15 @@ func validatePath(path string) error {
}
return nil
}

type lockerOptions struct {
lockTimeout time.Duration
lockRetryDelay time.Duration
}

func newLockerOptions() *lockerOptions {
return &lockerOptions{
lockTimeout: DefaultLockTimeout,
lockRetryDelay: DefaultLockRetryDelay,
}
}
Loading