From a150a4b84ee52fc799d3a9ab4334e2bd37307821 Mon Sep 17 00:00:00 2001 From: Bert Date: Mon, 29 Jan 2024 16:53:57 -0500 Subject: [PATCH] fix: improve ability to control dynamo creds cache (#1870) Exposes `AwsCredentialAdapter` for purpose of allowing its cache to be used by the dynamo client. This is necessary to get around a timeout loading credentials ... Currently dynamo client default to lazy_builder https://docs.rs/aws-sdk-dynamodb/0.34.0/src/aws_sdk_dynamodb/config.rs.html#790 This lazy cache has a 5 second timeout loading credentials: https://github.com/smithy-lang/smithy-rs/blob/e78c60dbf169403eedceb1b718b862b0c5e5ee09/aws/rust-runtime/aws-credential-types/src/cache/lazy_caching.rs#L93 This change allows the caller to more easily pass their own `ProvideCredentials` implementation and rely on the caching built into `AwsCredentialAdapter`. ```rs use aws_config::default_provider::credentials::DefaultCredentialsChain; use aws_credential_types::provider::SharedCredentialsProvider; use lance::dataset::ReadParams; use lance_io::object_store::{AwsCredentialAdapter, ObjectStoreParams}; use vectordb::Database; let creds_provider = Arc::new(AwsCredentialAdapter::new( Arc::new(SharedCredentialsProvider::new(DefaultCredentialsChain::builder().build().await)), ObjectStoreParams::default().s3_credentials_refresh_offset, )); let db = Database::connect("s3://my-bucket/my-db?engine=ddb&ddbTableName=my-dyn-table").await.unwrap(); let table = db.open_table_with_params("my-table", ReadParams { store_options: Some(ObjectStoreParams { aws_credentials: Some(creds_provider), ..Default::default() }), ..ReadParams::default() }).await.unwrap(); ``` --- rust/lance-io/src/object_store.rs | 7 +++++-- rust/lance-table/src/io/commit.rs | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index c874e67267..9e94e42b24 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -108,7 +108,7 @@ const AWS_CREDS_CACHE_KEY: &str = "aws_credentials"; /// Adapt an AWS SDK cred into object_store credentials #[derive(Debug)] -struct AwsCredentialAdapter { +pub struct AwsCredentialAdapter { pub inner: Arc, // RefCell can't be shared accross threads, so we use HashMap @@ -119,7 +119,10 @@ struct AwsCredentialAdapter { } impl AwsCredentialAdapter { - fn new(provider: Arc, credentials_refresh_offset: Duration) -> Self { + pub fn new( + provider: Arc, + credentials_refresh_offset: Duration, + ) -> Self { Self { inner: provider, cache: Arc::new(RwLock::new(HashMap::new())), diff --git a/rust/lance-table/src/io/commit.rs b/rust/lance-table/src/io/commit.rs index 5e15fb40d3..5823d1091d 100644 --- a/rust/lance-table/src/io/commit.rs +++ b/rust/lance-table/src/io/commit.rs @@ -61,6 +61,7 @@ use lance_io::object_store::ObjectStoreParams; #[cfg(feature = "dynamodb")] use { self::external_manifest::{ExternalManifestCommitHandler, ExternalManifestStore}, + aws_credential_types::cache::CredentialsCache, lance_io::object_store::{build_aws_credential, StorageOptions}, object_store::aws::AmazonS3ConfigKey, std::borrow::Cow, @@ -295,7 +296,10 @@ async fn build_dynamodb_external_store( let dynamodb_config = aws_sdk_dynamodb::config::Builder::new() .region(Some(Region::new(region.to_string()))) - .credentials_provider(OSObjectStoreToAwsCredAdaptor(creds)); + .credentials_provider(OSObjectStoreToAwsCredAdaptor(creds)) + // caching should be handled by passed AwsCredentialProvider + .credentials_cache(CredentialsCache::no_caching()); + let dynamodb_config = match env::var("DYNAMODB_ENDPOINT") { Ok(endpoint) => dynamodb_config.endpoint_url(endpoint), _ => dynamodb_config,