Skip to content

Commit

Permalink
AWS S3: consolidate environment and defaults
Browse files Browse the repository at this point in the history
* single `api/env/aws.go`
* cleanup, comment, and document

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Mar 2, 2024
1 parent f6ad143 commit 833ba65
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 56 deletions.
20 changes: 6 additions & 14 deletions ais/backend/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

aiss3 "github.com/NVIDIA/aistore/ais/s3"
"github.com/NVIDIA/aistore/api/apc"
"github.com/NVIDIA/aistore/api/env"
"github.com/NVIDIA/aistore/cmn"
"github.com/NVIDIA/aistore/cmn/cos"
"github.com/NVIDIA/aistore/cmn/debug"
Expand All @@ -35,17 +36,6 @@ import (
"github.com/aws/smithy-go"
)

const (
// environment variable to globally override the default 'https://s3.amazonaws.com' endpoint
// NOTE: the same can be done on a per-bucket basis, via bucket prop `Extra.AWS.Endpoint`
// (bucket override will always take precedence)
awsEnvS3Endpoint = "S3_ENDPOINT"

// ditto non-default profile (the default is [default] in ~/.aws/credentials)
// same NOTE in re precedence
awsEnvConfigProfile = "AWS_PROFILE"
)

type (
awsProvider struct {
t core.TargetPut
Expand All @@ -68,9 +58,11 @@ var (
// interface guard
var _ core.BackendProvider = (*awsProvider)(nil)

// environment variables => static defaults that can still be overridden via bck.Props.Extra.AWS
// in addition to these two (below), default bucket region = env.AwsDefaultRegion()
func NewAWS(t core.TargetPut) (core.BackendProvider, error) {
s3Endpoint = os.Getenv(awsEnvS3Endpoint)
awsProfile = os.Getenv(awsEnvConfigProfile)
s3Endpoint = os.Getenv(env.AWS.Endpoint)
awsProfile = os.Getenv(env.AWS.Profile)
return &awsProvider{t: t}, nil
}

Expand Down Expand Up @@ -663,7 +655,7 @@ func getBucketLocation(svc *s3.Client, bckName string) (region string, err error
}
region = string(resp.LocationConstraint)
if region == "" {
region = cmn.AwsDefaultRegion // Buckets in region `us-east-1` have a LocationConstraint of null.
region = env.AwsDefaultRegion()
}
return
}
Expand Down
7 changes: 4 additions & 3 deletions ais/test/s3_compat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

"github.com/NVIDIA/aistore/api"
"github.com/NVIDIA/aistore/api/apc"
"github.com/NVIDIA/aistore/api/env"
"github.com/NVIDIA/aistore/cmn"
"github.com/NVIDIA/aistore/cmn/cos"
"github.com/NVIDIA/aistore/cmn/feat"
Expand Down Expand Up @@ -102,7 +103,7 @@ func TestS3PassThroughPutGet(t *testing.T) {
cfg.HTTPClient = newS3Client()
s3Client := s3.NewFromConfig(cfg)
*/
s3Client := s3.New(s3.Options{HTTPClient: newS3Client(), Region: cmn.AwsDefaultRegion})
s3Client := s3.New(s3.Options{HTTPClient: newS3Client(), Region: env.AwsDefaultRegion()})

putOutput, err := s3Client.PutObject(context.Background(), &s3.PutObjectInput{
Bucket: aws.String(bck.Name),
Expand Down Expand Up @@ -139,7 +140,7 @@ func TestS3PassThroughMultipart(t *testing.T) {

setPresignedS3(t, bck)

s3Client := s3.New(s3.Options{HTTPClient: newS3Client(), Region: cmn.AwsDefaultRegion})
s3Client := s3.New(s3.Options{HTTPClient: newS3Client(), Region: env.AwsDefaultRegion()})

createMultipartUploadOutput, err := s3Client.CreateMultipartUpload(context.Background(), &s3.CreateMultipartUploadInput{
Bucket: aws.String(bck.Name),
Expand Down Expand Up @@ -211,7 +212,7 @@ func TestWriteThroughCacheNoColdGet(t *testing.T) {

setPresignedS3(t, bck)

s3Client := s3.New(s3.Options{HTTPClient: newS3Client(), Region: cmn.AwsDefaultRegion})
s3Client := s3.New(s3.Options{HTTPClient: newS3Client(), Region: env.AwsDefaultRegion()})

putOutput, err := s3Client.PutObject(context.Background(), &s3.PutObjectInput{
Bucket: aws.String(bck.Name),
Expand Down
34 changes: 34 additions & 0 deletions api/env/aws.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Package env contains environment variables
/*
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*/
package env

import "os"

func AwsDefaultRegion() (region string) {
if region = os.Getenv(AWS.Region); region == "" {
// from https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketLocation.html:
// "Buckets in region `us-east-1` have a LocationConstraint of null."
region = "us-east-1"
}
return region
}

// use S3_ENDPOINT to globally override the default 'https://s3.amazonaws.com' endpoint
// NOTE: the same can be done on a per-bucket basis, via bucket prop `Extra.AWS.Endpoint`
// (bucket override will always take precedence)

// ditto non-default profile via "AWS_PROFILE" (the default one is called [default])

var (
AWS = struct {
Endpoint string
Region string
Profile string
}{
Endpoint: "S3_ENDPOINT",
Region: "AWS_REGION",
Profile: "AWS_PROFILE",
}
)
10 changes: 7 additions & 3 deletions bench/tools/aisloader/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (

"github.com/NVIDIA/aistore/api"
"github.com/NVIDIA/aistore/api/apc"
"github.com/NVIDIA/aistore/api/env"
"github.com/NVIDIA/aistore/cmn"
"github.com/NVIDIA/aistore/cmn/cos"
"github.com/NVIDIA/aistore/cmn/debug"
Expand Down Expand Up @@ -466,8 +467,11 @@ func listObjectNames(baseParams api.BaseParams, bck cmn.Bck, prefix string, cach
}

func initS3Svc() error {
if s3Profile == "" && os.Getenv(awsEnvConfigProfile) != "" {
s3Profile = os.Getenv(awsEnvConfigProfile)
// '--s3profile' takes precedence
if s3Profile == "" {
if profile := os.Getenv(env.AWS.Profile); profile != "" {
s3Profile = profile
}
}
cfg, err := config.LoadDefaultConfig(
context.Background(),
Expand All @@ -480,7 +484,7 @@ func initS3Svc() error {
cfg.BaseEndpoint = aws.String(s3Endpoint)
}
if cfg.Region == "" {
cfg.Region = cmn.AwsDefaultRegion // Buckets in region `us-east-1` have a LocationConstraint of null.
cfg.Region = env.AwsDefaultRegion()
}

s3svc = s3.NewFromConfig(cfg)
Expand Down
16 changes: 5 additions & 11 deletions bench/tools/aisloader/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,6 @@ const (

defaultClusterIP = "localhost"
defaultClusterIPv4 = "127.0.0.1"

// environment variable to globally override the default 'https://s3.amazonaws.com' endpoint
// NOTE: the same can be done by passing a flag to aisloader `-s3endpoint`
// (`-s3endpoint` flag will always take precedence)
awsEnvS3Endpoint = "S3_ENDPOINT"

// ditto non-default profile (the default is [default] in ~/.aws/credentials)
// same NOTE in re precedence
awsEnvConfigProfile = "AWS_PROFILE"
)

type (
Expand Down Expand Up @@ -633,8 +624,11 @@ func addCmdLine(f *flag.FlagSet, p *params) {

// validate command line and finish initialization
func _init(p *params) (err error) {
if s3Endpoint == "" && os.Getenv(awsEnvS3Endpoint) != "" {
s3Endpoint = os.Getenv(awsEnvS3Endpoint)
// '--s3endpoint' takes precedence
if s3Endpoint == "" {
if ep := os.Getenv(env.AWS.Endpoint); ep != "" {
s3Endpoint = ep
}
}
if p.bck.Name != "" {
if p.cleanUp.Val && isDirectS3() {
Expand Down
19 changes: 0 additions & 19 deletions cmn/aws.go

This file was deleted.

9 changes: 9 additions & 0 deletions cmn/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ type backendFuncs struct {
EncodeCksum func(v any) (cksumValue string, isSet bool)
}

// from https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html
// "The ETag may or may not be an MD5 digest of the object data. Whether or
// not it is depends on how the object was created and how it is encrypted..."
const AwsMultipartDelim = "-"

func IsS3MultipartEtag(etag string) bool {
return strings.Contains(etag, AwsMultipartDelim)
}

func awsIsVersionSet(version *string) bool {
return version != nil && *version != "" && *version != "null"
}
Expand Down
19 changes: 13 additions & 6 deletions docs/environment-vars.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,16 @@ t[fXbarEnn] 3.08% 367.66GiB 51% 8.414TiB [0.9 1.1
See related:
* [AIS K8s Operator: environment variables](https://github.com/NVIDIA/ais-k8s/blob/master/operator/pkg/resources/cmn/env.go)

## AWS S3

**NOTE:** for the most recent updates, please refer to the [source](https://github.com/NVIDIA/aistore/blob/main/api/env/aws.go).

| name | comment |
| ---- | ------- |
| `S3_ENDPOINT` | global S3 endpoint to be used instead of `s3.amazonaws.com` |
| `AWS_REGION` | default bucket region; can be set to override the global default 'us-east-1' location |
| `AWS_PROFILE` | global AWS profile with alternative (as far as the [default]) credentials and/or AWS region |

## Package: backend

AIS natively supports 3 (three) [Cloud storages](/docs/providers.md).
Expand All @@ -137,12 +147,9 @@ The corresponding environment "belongs" to the internal [backend](https://github

| name | comment |
| ---- | ------- |
| `S3_ENDPOINT` | global S3 endpoint to be used instead of `s3.amazonaws.com` |
| `AWS_PROFILE` | global AWS profiles with alternative account credentials and/or AWS region |
| `GOOGLE_CLOUD_PROJECT` | GCP account with permissions to access your Google Cloud Storage buckets |
| `GOOGLE_APPLICATION_CREDENTIALS` | (ditto) |
| `AZURE_STORAGE_ACCOUNT` | Azure account |
| `AZURE_STORAGE_KEY` | (ditto) |
| `S3_ENDPOINT`, `AWS_PROFILE`, and `AWS_REGION`| see previous section |
| `GOOGLE_CLOUD_PROJECT`, `GOOGLE_APPLICATION_CREDENTIALS` | GCP account with permissions to access Google Cloud Storage buckets |
| `AZURE_STORAGE_ACCOUNT`, `AZURE_STORAGE_KEY` | Azure account with permissions to access Blob Storage containers |
| `AIS_AZURE_URL` | Azure endpoint, e.g. `http://<account_name>.blob.core.windows.net` |

Notice in the table above that the variables `S3_ENDPOINT` and `AWS_PROFILE` are designated as _global_: cluster-wide.
Expand Down

0 comments on commit 833ba65

Please sign in to comment.