From a33cb3dc2759e9f94381ce92e480183aef1bae49 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 12 Nov 2024 09:08:04 -0700 Subject: [PATCH 01/32] fwv --- go.mod | 25 +++++ go.sum | 38 +++++++ main.go | 314 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 377 insertions(+) create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..526a311 --- /dev/null +++ b/go.mod @@ -0,0 +1,25 @@ +module github.com/lsst-dm/s3daemon-go + +go 1.22.7 + +require ( + github.com/aws/aws-sdk-go-v2 v1.32.4 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.6 // indirect + github.com/aws/aws-sdk-go-v2/config v1.28.3 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.17.44 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.19 // indirect + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.37 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.23 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.23 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.23 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.4 // indirect + github.com/aws/aws-sdk-go-v2/service/s3 v1.66.3 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.24.5 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.4 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 // indirect + github.com/aws/smithy-go v1.22.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..53a63f5 --- /dev/null +++ b/go.sum @@ -0,0 +1,38 @@ +github.com/aws/aws-sdk-go-v2 v1.32.4 h1:S13INUiTxgrPueTmrm5DZ+MiAo99zYzHEFh1UNkOxNE= +github.com/aws/aws-sdk-go-v2 v1.32.4/go.mod h1:2SK5n0a2karNTv5tbP1SjsX0uhttou00v/HpXKM1ZUo= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.6 h1:pT3hpW0cOHRJx8Y0DfJUEQuqPild8jRGmSFmBgvydr0= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.6/go.mod h1:j/I2++U0xX+cr44QjHay4Cvxj6FUbnxrgmqN3H1jTZA= +github.com/aws/aws-sdk-go-v2/config v1.28.3 h1:kL5uAptPcPKaJ4q0sDUjUIdueO18Q7JDzl64GpVwdOM= +github.com/aws/aws-sdk-go-v2/config v1.28.3/go.mod h1:SPEn1KA8YbgQnwiJ/OISU4fz7+F6Fe309Jf0QTsRCl4= +github.com/aws/aws-sdk-go-v2/credentials v1.17.44 h1:qqfs5kulLUHUEXlHEZXLJkgGoF3kkUeFUTVA585cFpU= +github.com/aws/aws-sdk-go-v2/credentials v1.17.44/go.mod h1:0Lm2YJ8etJdEdw23s+q/9wTpOeo2HhNE97XcRa7T8MA= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.19 h1:woXadbf0c7enQ2UGCi8gW/WuKmE0xIzxBF/eD94jMKQ= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.19/go.mod h1:zminj5ucw7w0r65bP6nhyOd3xL6veAUMc3ElGMoLVb4= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.37 h1:jHKR76E81sZvz1+x1vYYrHMxphG5LFBJPhSqEr4CLlE= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.37/go.mod h1:iMkyPkmoJWQKzSOtaX+8oEJxAuqr7s8laxcqGDSHeII= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.23 h1:A2w6m6Tmr+BNXjDsr7M90zkWjsu4JXHwrzPg235STs4= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.23/go.mod h1:35EVp9wyeANdujZruvHiQUAo9E3vbhnIO1mTCAxMlY0= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.23 h1:pgYW9FCabt2M25MoHYCfMrVY2ghiiBKYWUVXfwZs+sU= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.23/go.mod h1:c48kLgzO19wAu3CPkDWC28JbaJ+hfQlsdl7I2+oqIbk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 h1:VaRN3TlFdd6KxX1x3ILT5ynH6HvKgqdiXoTxAF4HQcQ= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.23 h1:1SZBDiRzzs3sNhOMVApyWPduWYGAX0imGy06XiBnCAM= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.23/go.mod h1:i9TkxgbZmHVh2S0La6CAXtnyFhlCX/pJ0JsOvBAS6Mk= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.0 h1:TToQNkvGguu209puTojY/ozlqy2d/SFNcoLIqTFi42g= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.0/go.mod h1:0jp+ltwkf+SwG2fm/PKo8t4y8pJSgOCO4D8Lz3k0aHQ= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.4 h1:aaPpoG15S2qHkWm4KlEyF01zovK1nW4BBbyXuHNSE90= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.4/go.mod h1:eD9gS2EARTKgGr/W5xwgY/ik9z/zqpW+m/xOQbVxrMk= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.4 h1:tHxQi/XHPK0ctd/wdOw0t7Xrc2OxcRCnVzv8lwWPu0c= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.4/go.mod h1:4GQbF1vJzG60poZqWatZlhP31y8PGCCVTvIGPdaaYJ0= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.4 h1:E5ZAVOmI2apR8ADb72Q63KqwwwdW1XcMeXIlrZ1Psjg= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.4/go.mod h1:wezzqVUOVVdk+2Z/JzQT4NxAU0NbhRe5W8pIE72jsWI= +github.com/aws/aws-sdk-go-v2/service/s3 v1.66.3 h1:neNOYJl72bHrz9ikAEED4VqWyND/Po0DnEx64RW6YM4= +github.com/aws/aws-sdk-go-v2/service/s3 v1.66.3/go.mod h1:TMhLIyRIyoGVlaEMAt+ITMbwskSTpcGsCPDq91/ihY0= +github.com/aws/aws-sdk-go-v2/service/sso v1.24.5 h1:HJwZwRt2Z2Tdec+m+fPjvdmkq2s9Ra+VR0hjF7V2o40= +github.com/aws/aws-sdk-go-v2/service/sso v1.24.5/go.mod h1:wrMCEwjFPms+V86TCQQeOxQF/If4vT44FGIOFiMC2ck= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.4 h1:zcx9LiGWZ6i6pjdcoE9oXAB6mUdeyC36Ia/QEiIvYdg= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.4/go.mod h1:Tp/ly1cTjRLGBBmNccFumbZ8oqpZlpdhFf80SrRh4is= +github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 h1:yDxvkz3/uOKfxnv8YhzOi9m+2OGIxF+on3KOISbK5IU= +github.com/aws/aws-sdk-go-v2/service/sts v1.32.4/go.mod h1:9XEUty5v5UAsMiFOBJrNibZgwCeOma73jgGwwhgffa8= +github.com/aws/smithy-go v1.22.0 h1:uunKnWlcoL3zO7q+gG2Pk53joueEOsnNB28QdMsmiMM= +github.com/aws/smithy-go v1.22.0/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= diff --git a/main.go b/main.go new file mode 100644 index 0000000..de61e30 --- /dev/null +++ b/main.go @@ -0,0 +1,314 @@ +package main + +import ( + "bytes" + "context" + "errors" + "flag" + "fmt" + "html" + "io/ioutil" + "log" + "net/http" + "net/url" + "os" + "path/filepath" + "strconv" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/feature/s3/manager" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/aws/smithy-go" +) + +type s3dConf struct { + host *string + port *int + endpoint_url *string + // access_key *string + // secret_key *string +} + +type S3DHandler struct { + Ctx context.Context + AwsConfig *aws.Config + S3Client *s3.Client + Conf *s3dConf +} + +// UploadFile reads from a file and puts the data into an object in a bucket. +func (h *S3DHandler) UploadFile(ctx context.Context, bucketName string, objectKey string, fileName string) error { + start := time.Now() + file, err := os.Open(fileName) + if err != nil { + log.Printf("Couldn't open file %v to upload. Here's why: %v\n", fileName, err) + } else { + defer file.Close() + _, err = h.S3Client.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + Body: file, + }) + if err != nil { + var apiErr smithy.APIError + if errors.As(err, &apiErr) && apiErr.ErrorCode() == "EntityTooLarge" { + log.Printf("Error while uploading object to %s. The object is too large.\n"+ + "To upload objects larger than 5GB, use the S3 console (160GB max)\n"+ + "or the multipart upload API (5TB max).", bucketName) + } else { + log.Printf("Couldn't upload file %v to %v:%v. Here's why: %v\n", + fileName, bucketName, objectKey, err) + } + } else { + /* + err = s3.NewObjectExistsWaiter(h.S3Client).Wait( + ctx, &s3.HeadObjectInput{Bucket: aws.String(bucketName), Key: aws.String(objectKey)}, time.Minute) + if err != nil { + log.Printf("Failed attempt to wait for object %s to exist.\n", objectKey) + } + */ + } + } + fmt.Printf("uploaded %v to %v:%v in %s\n", fileName, bucketName, objectKey, time.Now().Sub(start)) + return err +} + +// UploadObject uses the S3 upload manager to upload an object to a bucket. +func (h *S3DHandler) UploadFileMultipart(bucket string, key string, fileName string) error { + start := time.Now() + // file, err := os.Open(fileName) + data, err := ioutil.ReadFile(fileName) + if err != nil { + log.Printf("Couldn't open file %v to upload. Here's why: %v\n", fileName, err) + return err + } + // defer file.Close() + fmt.Printf("slurped %v:%v in %s\n", bucket, key, time.Now().Sub(start)) + + s3Client := s3.NewFromConfig(*h.getAwsConfig(), func(o *s3.Options) { + o.UsePathStyle = true + }) + fmt.Printf("NewFromConfig %v:%v in %s\n", bucket, key, time.Now().Sub(start)) + uploader := manager.NewUploader(s3Client, func(u *manager.Uploader) { + u.Concurrency = 1000 + u.MaxUploadParts = 1000 + u.PartSize = 1024 * 1024 * 5 + }) + fmt.Printf("NewUploader %v:%v in %s\n", bucket, key, time.Now().Sub(start)) + + _, err = uploader.Upload(h.Ctx, &s3.PutObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + Body: bytes.NewReader([]byte(data)), + // Body: file, + // ChecksumAlgorithm: types.ChecksumAlgorithmSha256, + }) + if err != nil { + var noBucket *types.NoSuchBucket + if errors.As(err, &noBucket) { + log.Printf("Bucket %s does not exist.\n", bucket) + err = noBucket + } + } + fmt.Printf("uploaded %v:%v in %s\n", bucket, key, time.Now().Sub(start)) + return err +} + +func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + file := r.PostFormValue("file") + if file == "" { + w.Header().Set("x-missing-field", "file") + w.WriteHeader(http.StatusBadRequest) + return + } + uri := r.PostFormValue("uri") + if uri == "" { + w.Header().Set("x-missing-field", "uri") + w.WriteHeader(http.StatusBadRequest) + return + } + + fmt.Println("file:", file) + fmt.Println("uri:", uri) + + if !filepath.IsAbs(file) { + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, "Only absolute file paths are supported, %q", html.EscapeString(file)) + return + } + + u, err := url.Parse(uri) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, "Unable to parse URI, %q", html.EscapeString(uri)) + return + } + + if u.Scheme != "s3" { + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, "Only s3 scheme is supported, %q", html.EscapeString(uri)) + return + } + + bucket := u.Host + if bucket == "" { + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, "Unable to parse bucket from URI, %q", html.EscapeString(uri)) + return + } + key := u.Path[1:] // Remove leading slash + + // fmt.Println("Bucket:", bucket) + // fmt.Println("Key:", key) + + // err = h.UploadFile(context.Background(), bucket, key, file) + // if err != nil { + // w.WriteHeader(http.StatusBadRequest) + // fmt.Printf("error uploading file: %s\n", err) + // return + // } + err = h.UploadFileMultipart(bucket, key, file) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + fmt.Printf("error uploading file: %s\n", err) + return + } + + fmt.Fprintf(w, "Successful put %q", html.EscapeString(uri)) +} + +func getConf() s3dConf { + conf := s3dConf{} + conf.host = flag.String("host", os.Getenv("S3DAEMON_HOST"), "S3 Daemon Host") + defaultPort, _ := strconv.Atoi(os.Getenv("S3DAEMON_PORT")) + if defaultPort == 0 { + defaultPort = 15555 + } + conf.port = flag.Int("port", defaultPort, "S3 Daemon Port") + conf.endpoint_url = flag.String("s3-endpoint-url", os.Getenv("S3_ENDPOINT_URL"), "S3 Endpoint URL") + flag.Parse() + + if *conf.endpoint_url == "" { + log.Fatal("s3-endpoint-url is required") + } + + log.Println("host:", *conf.host) + log.Println("port:", *conf.port) + log.Println("s3-endpoint-url:", *conf.endpoint_url) + + return conf +} + +func (h *S3DHandler) getAwsConfig() *aws.Config { + httpClient := awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { + t.ExpectContinueTimeout = 0 + t.IdleConnTimeout = 0 + t.MaxIdleConns = 1000 + t.MaxConnsPerHost = 1000 + t.MaxIdleConnsPerHost = 1000 + // disable http/2 to prevent muxing over a single tcp connection + t.TLSClientConfig.NextProtos = []string{"http/1.1"} + }) + + cfg, err := config.LoadDefaultConfig( + context.TODO(), + config.WithBaseEndpoint(*h.Conf.endpoint_url), + config.WithHTTPClient(httpClient), + // config.WithRetryer(func() aws.Retryer { + // return retry.NewStandard(func(o *retry.StandardOptions) { + // o.MaxAttempts = 10 + // o.MaxBackoff = time.Millisecond * 500 + // o.RateLimiter = ratelimit.None + // }) + // }), + ) + if err != nil { + log.Fatal(err) + } + + return &cfg +} + +func main() { + conf := getConf() + + // httpClient := &http.Client{ + // Transport: &http.Transport{ + // ExpectContinueTimeout: 0, + // IdleConnTimeout: 0, + // MaxConnsPerHost: 1000, + // MaxIdleConns: 1000, + // MaxIdleConnsPerHost: 1000, + // }, + // } + + // httpClient := awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { + // t.ExpectContinueTimeout = 0 + // t.IdleConnTimeout = 0 + // t.MaxIdleConns = 1000 + // t.MaxConnsPerHost = 1000 + // t.MaxIdleConnsPerHost = 1000 + // }) + + ctx := context.TODO() + + cfg, err := config.LoadDefaultConfig( + ctx, + config.WithBaseEndpoint(*conf.endpoint_url), + //config.WithHTTPClient(httpClient), + // config.WithRetryer(func() aws.Retryer { + // return retry.NewStandard(func(o *retry.StandardOptions) { + // o.MaxAttempts = 10 + // o.MaxBackoff = time.Millisecond * 500 + // o.RateLimiter = ratelimit.None + // }) + // }), + ) + if err != nil { + log.Fatal(err) + } + + //cfg.HTTPClient.(*http.Client).Transport.(*http.Transport).ExpectContinueTimeout = 0 + + // v := reflect.ValueOf(cfg.HTTPClient) + // fmt.Println("Type:", v.Type()) + // fmt.Println("Kind:", v.Kind()) + + s3Client := s3.NewFromConfig(cfg, func(o *s3.Options) { + o.UsePathStyle = true + }) + resp, err := s3Client.ListBuckets(context.TODO(), nil) + if err != nil { + log.Fatal(err) + } + + handler := S3DHandler{ + Ctx: ctx, + AwsConfig: &cfg, + S3Client: s3Client, + Conf: &conf, + } + + // Print out the list of buckets + fmt.Println("Buckets:") + for _, bucket := range resp.Buckets { + fmt.Println(*bucket.Name) + } + + http.Handle("/", &handler) + + addr := fmt.Sprintf("%s:%d", *conf.host, *conf.port) + fmt.Println("Listening on", addr) + + err = http.ListenAndServe(addr, nil) + if errors.Is(err, http.ErrServerClosed) { + fmt.Printf("server closed\n") + } else if err != nil { + fmt.Printf("error starting server: %s\n", err) + os.Exit(1) + } +} From 9ea211eaa80e95d38a8514767904774d4dd50d8e Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 12 Nov 2024 09:36:07 -0700 Subject: [PATCH 02/32] one pool --- main.go | 123 ++++++++++++++++++-------------------------------------- 1 file changed, 40 insertions(+), 83 deletions(-) diff --git a/main.go b/main.go index de61e30..10cef49 100644 --- a/main.go +++ b/main.go @@ -25,7 +25,7 @@ import ( "github.com/aws/smithy-go" ) -type s3dConf struct { +type S3DConf struct { host *string port *int endpoint_url *string @@ -34,10 +34,10 @@ type s3dConf struct { } type S3DHandler struct { - Ctx context.Context + Conf *S3DConf AwsConfig *aws.Config S3Client *s3.Client - Conf *s3dConf + Uploader *manager.Uploader } // UploadFile reads from a file and puts the data into an object in a bucket. @@ -89,18 +89,7 @@ func (h *S3DHandler) UploadFileMultipart(bucket string, key string, fileName str // defer file.Close() fmt.Printf("slurped %v:%v in %s\n", bucket, key, time.Now().Sub(start)) - s3Client := s3.NewFromConfig(*h.getAwsConfig(), func(o *s3.Options) { - o.UsePathStyle = true - }) - fmt.Printf("NewFromConfig %v:%v in %s\n", bucket, key, time.Now().Sub(start)) - uploader := manager.NewUploader(s3Client, func(u *manager.Uploader) { - u.Concurrency = 1000 - u.MaxUploadParts = 1000 - u.PartSize = 1024 * 1024 * 5 - }) - fmt.Printf("NewUploader %v:%v in %s\n", bucket, key, time.Now().Sub(start)) - - _, err = uploader.Upload(h.Ctx, &s3.PutObjectInput{ + _, err = h.Uploader.Upload(context.TODO(), &s3.PutObjectInput{ Bucket: aws.String(bucket), Key: aws.String(key), Body: bytes.NewReader([]byte(data)), @@ -181,8 +170,8 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { fmt.Fprintf(w, "Successful put %q", html.EscapeString(uri)) } -func getConf() s3dConf { - conf := s3dConf{} +func getConf() S3DConf { + conf := S3DConf{} conf.host = flag.String("host", os.Getenv("S3DAEMON_HOST"), "S3 Daemon Host") defaultPort, _ := strconv.Atoi(os.Getenv("S3DAEMON_PORT")) if defaultPort == 0 { @@ -203,7 +192,11 @@ func getConf() s3dConf { return conf } -func (h *S3DHandler) getAwsConfig() *aws.Config { +func New(conf *S3DConf) *S3DHandler { + handler := &S3DHandler{ + Conf: conf, + } + httpClient := awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { t.ExpectContinueTimeout = 0 t.IdleConnTimeout = 0 @@ -211,12 +204,13 @@ func (h *S3DHandler) getAwsConfig() *aws.Config { t.MaxConnsPerHost = 1000 t.MaxIdleConnsPerHost = 1000 // disable http/2 to prevent muxing over a single tcp connection + t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} }) - cfg, err := config.LoadDefaultConfig( + awsCfg, err := config.LoadDefaultConfig( context.TODO(), - config.WithBaseEndpoint(*h.Conf.endpoint_url), + config.WithBaseEndpoint(*conf.endpoint_url), config.WithHTTPClient(httpClient), // config.WithRetryer(func() aws.Retryer { // return retry.NewStandard(func(o *retry.StandardOptions) { @@ -230,81 +224,44 @@ func (h *S3DHandler) getAwsConfig() *aws.Config { log.Fatal(err) } - return &cfg -} - -func main() { - conf := getConf() - - // httpClient := &http.Client{ - // Transport: &http.Transport{ - // ExpectContinueTimeout: 0, - // IdleConnTimeout: 0, - // MaxConnsPerHost: 1000, - // MaxIdleConns: 1000, - // MaxIdleConnsPerHost: 1000, - // }, - // } - - // httpClient := awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { - // t.ExpectContinueTimeout = 0 - // t.IdleConnTimeout = 0 - // t.MaxIdleConns = 1000 - // t.MaxConnsPerHost = 1000 - // t.MaxIdleConnsPerHost = 1000 - // }) - - ctx := context.TODO() + handler.AwsConfig = &awsCfg - cfg, err := config.LoadDefaultConfig( - ctx, - config.WithBaseEndpoint(*conf.endpoint_url), - //config.WithHTTPClient(httpClient), - // config.WithRetryer(func() aws.Retryer { - // return retry.NewStandard(func(o *retry.StandardOptions) { - // o.MaxAttempts = 10 - // o.MaxBackoff = time.Millisecond * 500 - // o.RateLimiter = ratelimit.None - // }) - // }), - ) - if err != nil { - log.Fatal(err) - } + handler.S3Client = s3.NewFromConfig(awsCfg, func(o *s3.Options) { + o.UsePathStyle = true + }) - //cfg.HTTPClient.(*http.Client).Transport.(*http.Transport).ExpectContinueTimeout = 0 + /* + resp, err := s3Client.ListBuckets(context.TODO(), nil) + if err != nil { + log.Fatal(err) + } - // v := reflect.ValueOf(cfg.HTTPClient) - // fmt.Println("Type:", v.Type()) - // fmt.Println("Kind:", v.Kind()) + // Print out the list of buckets + fmt.Println("Buckets:") + for _, bucket := range resp.Buckets { + fmt.Println(*bucket.Name) + } + */ - s3Client := s3.NewFromConfig(cfg, func(o *s3.Options) { - o.UsePathStyle = true + handler.Uploader = manager.NewUploader(handler.S3Client, func(u *manager.Uploader) { + u.Concurrency = 1000 + u.MaxUploadParts = 1000 + u.PartSize = 1024 * 1024 * 5 }) - resp, err := s3Client.ListBuckets(context.TODO(), nil) - if err != nil { - log.Fatal(err) - } - handler := S3DHandler{ - Ctx: ctx, - AwsConfig: &cfg, - S3Client: s3Client, - Conf: &conf, - } + return handler +} - // Print out the list of buckets - fmt.Println("Buckets:") - for _, bucket := range resp.Buckets { - fmt.Println(*bucket.Name) - } +func main() { + conf := getConf() - http.Handle("/", &handler) + handler := New(&conf) + http.Handle("/", handler) addr := fmt.Sprintf("%s:%d", *conf.host, *conf.port) fmt.Println("Listening on", addr) - err = http.ListenAndServe(addr, nil) + err := http.ListenAndServe(addr, nil) if errors.Is(err, http.ErrServerClosed) { fmt.Printf("server closed\n") } else if err != nil { From 8321484aa7ee72144e187574aae1b96bde11c0bf Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 12 Nov 2024 10:27:29 -0700 Subject: [PATCH 03/32] disable file slurping --- main.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/main.go b/main.go index 10cef49..b881070 100644 --- a/main.go +++ b/main.go @@ -1,13 +1,11 @@ package main import ( - "bytes" "context" "errors" "flag" "fmt" "html" - "io/ioutil" "log" "net/http" "net/url" @@ -80,21 +78,20 @@ func (h *S3DHandler) UploadFile(ctx context.Context, bucketName string, objectKe // UploadObject uses the S3 upload manager to upload an object to a bucket. func (h *S3DHandler) UploadFileMultipart(bucket string, key string, fileName string) error { start := time.Now() - // file, err := os.Open(fileName) - data, err := ioutil.ReadFile(fileName) + file, err := os.Open(fileName) if err != nil { log.Printf("Couldn't open file %v to upload. Here's why: %v\n", fileName, err) return err } - // defer file.Close() - fmt.Printf("slurped %v:%v in %s\n", bucket, key, time.Now().Sub(start)) + defer file.Close() + // data, err := ioutil.ReadFile(fileName) + // fmt.Printf("slurped %v:%v in %s\n", bucket, key, time.Now().Sub(start)) _, err = h.Uploader.Upload(context.TODO(), &s3.PutObjectInput{ Bucket: aws.String(bucket), Key: aws.String(key), - Body: bytes.NewReader([]byte(data)), - // Body: file, - // ChecksumAlgorithm: types.ChecksumAlgorithmSha256, + // Body: bytes.NewReader([]byte(data)), + Body: file, }) if err != nil { var noBucket *types.NoSuchBucket From ba454a0ac3460db90d66ea1adad00367e047fa89 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 12 Nov 2024 11:18:53 -0700 Subject: [PATCH 04/32] add S3DAEMON_MAX_PARALLEL_UPLOADS --- go.mod | 1 + go.sum | 2 ++ main.go | 37 +++++++++++++++++++++++++++++++------ 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 526a311..9935431 100644 --- a/go.mod +++ b/go.mod @@ -22,4 +22,5 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.4 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 // indirect github.com/aws/smithy-go v1.22.0 // indirect + golang.org/x/sync v0.9.0 // indirect ) diff --git a/go.sum b/go.sum index 53a63f5..07f8cf9 100644 --- a/go.sum +++ b/go.sum @@ -36,3 +36,5 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 h1:yDxvkz3/uOKfxnv8YhzOi9m+2OGI github.com/aws/aws-sdk-go-v2/service/sts v1.32.4/go.mod h1:9XEUty5v5UAsMiFOBJrNibZgwCeOma73jgGwwhgffa8= github.com/aws/smithy-go v1.22.0 h1:uunKnWlcoL3zO7q+gG2Pk53joueEOsnNB28QdMsmiMM= github.com/aws/smithy-go v1.22.0/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= diff --git a/main.go b/main.go index b881070..b7b886f 100644 --- a/main.go +++ b/main.go @@ -21,6 +21,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/types" "github.com/aws/smithy-go" + "golang.org/x/sync/semaphore" ) type S3DConf struct { @@ -29,13 +30,15 @@ type S3DConf struct { endpoint_url *string // access_key *string // secret_key *string + maxParallelUploads *int64 } type S3DHandler struct { - Conf *S3DConf - AwsConfig *aws.Config - S3Client *s3.Client - Uploader *manager.Uploader + Conf *S3DConf + AwsConfig *aws.Config + S3Client *s3.Client + Uploader *manager.Uploader + ParallelUploads *semaphore.Weighted } // UploadFile reads from a file and puts the data into an object in a bucket. @@ -148,6 +151,14 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } key := u.Path[1:] // Remove leading slash + // limit the number of parallel uploads + if err := h.ParallelUploads.Acquire(context.Background(), 1); err != nil { + w.WriteHeader(http.StatusServiceUnavailable) + fmt.Fprintf(w, "error acquiring semaphore: %s\n", err) + return + } + defer h.ParallelUploads.Release(1) + // fmt.Println("Bucket:", bucket) // fmt.Println("Key:", key) @@ -169,13 +180,24 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { func getConf() S3DConf { conf := S3DConf{} + conf.host = flag.String("host", os.Getenv("S3DAEMON_HOST"), "S3 Daemon Host") + defaultPort, _ := strconv.Atoi(os.Getenv("S3DAEMON_PORT")) if defaultPort == 0 { defaultPort = 15555 } conf.port = flag.Int("port", defaultPort, "S3 Daemon Port") + conf.endpoint_url = flag.String("s3-endpoint-url", os.Getenv("S3_ENDPOINT_URL"), "S3 Endpoint URL") + + var defaultMaxParallelUploads int64 + defaultMaxParallelUploads, _ = strconv.ParseInt(os.Getenv("S3DAEMON_MAX_PARALLEL_UPLOADS"), 10, 64) + if defaultMaxParallelUploads == 0 { + defaultMaxParallelUploads = 100 + } + conf.maxParallelUploads = flag.Int64("max-parallel-uploads", defaultMaxParallelUploads, "Max Parallel Uploads") + flag.Parse() if *conf.endpoint_url == "" { @@ -185,11 +207,12 @@ func getConf() S3DConf { log.Println("host:", *conf.host) log.Println("port:", *conf.port) log.Println("s3-endpoint-url:", *conf.endpoint_url) + log.Println("max-parallel-uploads:", *conf.maxParallelUploads) return conf } -func New(conf *S3DConf) *S3DHandler { +func NewHandler(conf *S3DConf) *S3DHandler { handler := &S3DHandler{ Conf: conf, } @@ -246,13 +269,15 @@ func New(conf *S3DConf) *S3DHandler { u.PartSize = 1024 * 1024 * 5 }) + handler.ParallelUploads = semaphore.NewWeighted(*conf.maxParallelUploads) + return handler } func main() { conf := getConf() - handler := New(&conf) + handler := NewHandler(&conf) http.Handle("/", handler) addr := fmt.Sprintf("%s:%d", *conf.host, *conf.port) From b99de7d90c2ef7dc68ea2780653e7a95e615762c Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 12 Nov 2024 14:28:57 -0700 Subject: [PATCH 05/32] add S3DAEMON_UPLOAD_TIMEOUT --- main.go | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/main.go b/main.go index b7b886f..0eef667 100644 --- a/main.go +++ b/main.go @@ -31,6 +31,7 @@ type S3DConf struct { // access_key *string // secret_key *string maxParallelUploads *int64 + uploadTimeout time.Duration } type S3DHandler struct { @@ -126,33 +127,35 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if !filepath.IsAbs(file) { w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "Only absolute file paths are supported, %q", html.EscapeString(file)) + fmt.Fprintf(w, "Only absolute file paths are supported, %q\n", html.EscapeString(file)) return } u, err := url.Parse(uri) if err != nil { w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "Unable to parse URI, %q", html.EscapeString(uri)) + fmt.Fprintf(w, "Unable to parse URI, %q\n", html.EscapeString(uri)) return } if u.Scheme != "s3" { w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "Only s3 scheme is supported, %q", html.EscapeString(uri)) + fmt.Fprintf(w, "Only s3 scheme is supported, %q\n", html.EscapeString(uri)) return } bucket := u.Host if bucket == "" { w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "Unable to parse bucket from URI, %q", html.EscapeString(uri)) + fmt.Fprintf(w, "Unable to parse bucket from URI, %q\n", html.EscapeString(uri)) return } key := u.Path[1:] // Remove leading slash // limit the number of parallel uploads - if err := h.ParallelUploads.Acquire(context.Background(), 1); err != nil { + ctx, cancel := context.WithTimeout(context.Background(), h.Conf.uploadTimeout) + defer cancel() + if err := h.ParallelUploads.Acquire(ctx, 1); err != nil { w.WriteHeader(http.StatusServiceUnavailable) fmt.Fprintf(w, "error acquiring semaphore: %s\n", err) return @@ -175,7 +178,7 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } - fmt.Fprintf(w, "Successful put %q", html.EscapeString(uri)) + fmt.Fprintf(w, "Successful put %q\n", html.EscapeString(uri)) } func getConf() S3DConf { @@ -198,16 +201,29 @@ func getConf() S3DConf { } conf.maxParallelUploads = flag.Int64("max-parallel-uploads", defaultMaxParallelUploads, "Max Parallel Uploads") + defaultUploadTimeout := os.Getenv("S3DAEMON_UPLOAD_TIMEOUT") + if defaultUploadTimeout == "" { + defaultUploadTimeout = "10s" + } + uploadTimeout := flag.String("upload-timeout", defaultUploadTimeout, "Upload Timeout (go duration)") + flag.Parse() if *conf.endpoint_url == "" { log.Fatal("s3-endpoint-url is required") } + uploadTimeoutDuration, err := time.ParseDuration(*uploadTimeout) + if err != nil { + log.Fatal("upload-timeout is invalid") + } + conf.uploadTimeout = uploadTimeoutDuration + log.Println("host:", *conf.host) log.Println("port:", *conf.port) log.Println("s3-endpoint-url:", *conf.endpoint_url) log.Println("max-parallel-uploads:", *conf.maxParallelUploads) + log.Println("upload-timeout:", conf.uploadTimeout) return conf } From 7830ec108e9e31a89d924fc40cce9e9d07bd62e8 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 12 Nov 2024 14:29:11 -0700 Subject: [PATCH 06/32] add basic .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6555f62 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/s3daemon-go From 2618514a245d3c89f9651817c6debe006aa8eefd Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 12 Nov 2024 14:29:21 -0700 Subject: [PATCH 07/32] add Dockerfile --- Dockerfile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c6d5c4b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM golang:1.22-alpine as builder + +ARG BIN=s3daemon-go +RUN apk --update --no-cache add \ + binutils \ + && rm -rf /root/.cache +WORKDIR /go/src/github.com/jhoblitt/s3daemon-go +COPY . . +RUN go build && strip "$BIN" + +FROM alpine:3 +WORKDIR /root/ +COPY --from=builder /go/src/github.com/jhoblitt/s3daemon-go/$BIN /bin/$BIN +ENTRYPOINT ["/bin/s3daemon-go"] From 0822c1633aaa416b98609c77a8ca18ec7b42d6cb Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 12 Nov 2024 15:30:48 -0700 Subject: [PATCH 08/32] set 5MiB http WriteBufferSize --- main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/main.go b/main.go index 0eef667..c133d2d 100644 --- a/main.go +++ b/main.go @@ -239,6 +239,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { t.MaxIdleConns = 1000 t.MaxConnsPerHost = 1000 t.MaxIdleConnsPerHost = 1000 + t.WriteBufferSize = 1024 * 1024 * 5 // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} From fb8e9336400936d7f058e8ca6e7df6e17bfd1b0c Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 12 Nov 2024 16:53:38 -0700 Subject: [PATCH 09/32] rm unused S3DHandler.UploadFile --- main.go | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/main.go b/main.go index c133d2d..b686bb0 100644 --- a/main.go +++ b/main.go @@ -42,43 +42,6 @@ type S3DHandler struct { ParallelUploads *semaphore.Weighted } -// UploadFile reads from a file and puts the data into an object in a bucket. -func (h *S3DHandler) UploadFile(ctx context.Context, bucketName string, objectKey string, fileName string) error { - start := time.Now() - file, err := os.Open(fileName) - if err != nil { - log.Printf("Couldn't open file %v to upload. Here's why: %v\n", fileName, err) - } else { - defer file.Close() - _, err = h.S3Client.PutObject(ctx, &s3.PutObjectInput{ - Bucket: aws.String(bucketName), - Key: aws.String(objectKey), - Body: file, - }) - if err != nil { - var apiErr smithy.APIError - if errors.As(err, &apiErr) && apiErr.ErrorCode() == "EntityTooLarge" { - log.Printf("Error while uploading object to %s. The object is too large.\n"+ - "To upload objects larger than 5GB, use the S3 console (160GB max)\n"+ - "or the multipart upload API (5TB max).", bucketName) - } else { - log.Printf("Couldn't upload file %v to %v:%v. Here's why: %v\n", - fileName, bucketName, objectKey, err) - } - } else { - /* - err = s3.NewObjectExistsWaiter(h.S3Client).Wait( - ctx, &s3.HeadObjectInput{Bucket: aws.String(bucketName), Key: aws.String(objectKey)}, time.Minute) - if err != nil { - log.Printf("Failed attempt to wait for object %s to exist.\n", objectKey) - } - */ - } - } - fmt.Printf("uploaded %v to %v:%v in %s\n", fileName, bucketName, objectKey, time.Now().Sub(start)) - return err -} - // UploadObject uses the S3 upload manager to upload an object to a bucket. func (h *S3DHandler) UploadFileMultipart(bucket string, key string, fileName string) error { start := time.Now() @@ -165,12 +128,6 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // fmt.Println("Bucket:", bucket) // fmt.Println("Key:", key) - // err = h.UploadFile(context.Background(), bucket, key, file) - // if err != nil { - // w.WriteHeader(http.StatusBadRequest) - // fmt.Printf("error uploading file: %s\n", err) - // return - // } err = h.UploadFileMultipart(bucket, key, file) if err != nil { w.WriteHeader(http.StatusBadRequest) From fc898b16d59796b54ad67c3533fff1b6c295f9bb Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Wed, 13 Nov 2024 10:30:56 -0700 Subject: [PATCH 10/32] add S3DAEMON_QUEUE_TIMEOUT & S3DAEMON_UPLOAD_TRIES + cleanup logs --- go.mod | 13 +++--- main.go | 134 ++++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 94 insertions(+), 53 deletions(-) diff --git a/go.mod b/go.mod index 9935431..9117013 100644 --- a/go.mod +++ b/go.mod @@ -3,12 +3,17 @@ module github.com/lsst-dm/s3daemon-go go 1.22.7 require ( - github.com/aws/aws-sdk-go-v2 v1.32.4 // indirect + github.com/aws/aws-sdk-go-v2 v1.32.4 + github.com/aws/aws-sdk-go-v2/config v1.28.3 + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.37 + github.com/aws/aws-sdk-go-v2/service/s3 v1.66.3 + golang.org/x/sync v0.9.0 +) + +require ( github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.6 // indirect - github.com/aws/aws-sdk-go-v2/config v1.28.3 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.17.44 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.19 // indirect - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.37 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.23 // indirect github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.23 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect @@ -17,10 +22,8 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.4 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.4 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.4 // indirect - github.com/aws/aws-sdk-go-v2/service/s3 v1.66.3 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.24.5 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.4 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 // indirect github.com/aws/smithy-go v1.22.0 // indirect - golang.org/x/sync v0.9.0 // indirect ) diff --git a/main.go b/main.go index b686bb0..3f875c1 100644 --- a/main.go +++ b/main.go @@ -20,7 +20,6 @@ import ( "github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/types" - "github.com/aws/smithy-go" "golang.org/x/sync/semaphore" ) @@ -31,7 +30,9 @@ type S3DConf struct { // access_key *string // secret_key *string maxParallelUploads *int64 - uploadTimeout time.Duration + uploadTimeout *time.Duration + queueTimeout *time.Duration + uploadTries *int } type S3DHandler struct { @@ -47,31 +48,50 @@ func (h *S3DHandler) UploadFileMultipart(bucket string, key string, fileName str start := time.Now() file, err := os.Open(fileName) if err != nil { - log.Printf("Couldn't open file %v to upload. Here's why: %v\n", fileName, err) + log.Printf("upload %v:%v | Couldn't open file %v to upload because: %v\n", bucket, key, fileName, err) return err } defer file.Close() // data, err := ioutil.ReadFile(fileName) - // fmt.Printf("slurped %v:%v in %s\n", bucket, key, time.Now().Sub(start)) - - _, err = h.Uploader.Upload(context.TODO(), &s3.PutObjectInput{ - Bucket: aws.String(bucket), - Key: aws.String(key), - // Body: bytes.NewReader([]byte(data)), - Body: file, - }) - if err != nil { - var noBucket *types.NoSuchBucket - if errors.As(err, &noBucket) { - log.Printf("Bucket %s does not exist.\n", bucket) - err = noBucket + // log.Printf("slurped %v:%v in %s\n", bucket, key, time.Now().Sub(start)) + + maxAttempts := *h.Conf.uploadTries + var attempt int + for attempt = 1; attempt <= maxAttempts; attempt++ { + ctx, cancel := context.WithTimeout(context.TODO(), *h.Conf.uploadTimeout) + defer cancel() + _, err = h.Uploader.Upload(ctx, &s3.PutObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + // Body: bytes.NewReader([]byte(data)), + Body: file, + }) + if err != nil { + var noBucket *types.NoSuchBucket + if errors.As(err, &noBucket) { + log.Printf("upload %v:%v | Bucket %s does not exist.\n", bucket, key, bucket) + return noBucket // Don't retry if the bucket doesn't exist. + } + + log.Printf("upload %v:%v | failed after %s -- try %v/%v\n", bucket, key, time.Now().Sub(start), attempt, maxAttempts) + log.Printf("upload %v:%v | failed because: %v\n", bucket, key, err) + + // bubble up the error if we've exhausted our attempts + if attempt == maxAttempts { + return err + } + } else { + break } } - fmt.Printf("uploaded %v:%v in %s\n", bucket, key, time.Now().Sub(start)) - return err + + log.Printf("upload %v:%v | success in %s after %v/%v tries\n", bucket, key, time.Now().Sub(start), attempt, maxAttempts) + return nil } func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + start := time.Now() + file := r.PostFormValue("file") if file == "" { w.Header().Set("x-missing-field", "file") @@ -85,9 +105,6 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } - fmt.Println("file:", file) - fmt.Println("uri:", uri) - if !filepath.IsAbs(file) { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "Only absolute file paths are supported, %q\n", html.EscapeString(file)) @@ -115,23 +132,23 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } key := u.Path[1:] // Remove leading slash + log.Printf("queuing %v:%v | source %v\n", bucket, key, file) + // limit the number of parallel uploads - ctx, cancel := context.WithTimeout(context.Background(), h.Conf.uploadTimeout) + ctx, cancel := context.WithTimeout(context.Background(), *h.Conf.queueTimeout) defer cancel() if err := h.ParallelUploads.Acquire(ctx, 1); err != nil { w.WriteHeader(http.StatusServiceUnavailable) fmt.Fprintf(w, "error acquiring semaphore: %s\n", err) + log.Printf("queue %v:%v | failed after %s: %s\n", bucket, key, time.Now().Sub(start), err) return } defer h.ParallelUploads.Release(1) - // fmt.Println("Bucket:", bucket) - // fmt.Println("Key:", key) - err = h.UploadFileMultipart(bucket, key, file) if err != nil { w.WriteHeader(http.StatusBadRequest) - fmt.Printf("error uploading file: %s\n", err) + fmt.Fprintf(w, "error uploading file: %s\n", err) return } @@ -141,46 +158,68 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { func getConf() S3DConf { conf := S3DConf{} - conf.host = flag.String("host", os.Getenv("S3DAEMON_HOST"), "S3 Daemon Host") + // start flags + conf.host = flag.String("host", os.Getenv("S3DAEMON_HOST"), "S3 Daemon Host (S3DAEMON_HOST)") defaultPort, _ := strconv.Atoi(os.Getenv("S3DAEMON_PORT")) if defaultPort == 0 { defaultPort = 15555 } - conf.port = flag.Int("port", defaultPort, "S3 Daemon Port") + conf.port = flag.Int("port", defaultPort, "S3 Daemon Port (S3DAEMON_PORT)") - conf.endpoint_url = flag.String("s3-endpoint-url", os.Getenv("S3_ENDPOINT_URL"), "S3 Endpoint URL") + conf.endpoint_url = flag.String("s3-endpoint-url", os.Getenv("S3_ENDPOINT_URL"), "S3 Endpoint URL (S3_ENDPOINT_URL)") var defaultMaxParallelUploads int64 defaultMaxParallelUploads, _ = strconv.ParseInt(os.Getenv("S3DAEMON_MAX_PARALLEL_UPLOADS"), 10, 64) if defaultMaxParallelUploads == 0 { defaultMaxParallelUploads = 100 } - conf.maxParallelUploads = flag.Int64("max-parallel-uploads", defaultMaxParallelUploads, "Max Parallel Uploads") + conf.maxParallelUploads = flag.Int64("max-parallel-uploads", defaultMaxParallelUploads, "Max Parallel Uploads (S3DAEMON_MAX_PARALLEL_UPLOADS)") defaultUploadTimeout := os.Getenv("S3DAEMON_UPLOAD_TIMEOUT") if defaultUploadTimeout == "" { defaultUploadTimeout = "10s" } - uploadTimeout := flag.String("upload-timeout", defaultUploadTimeout, "Upload Timeout (go duration)") + uploadTimeout := flag.String("upload-timeout", defaultUploadTimeout, "Upload Timeout (S3DAEMON_UPLOAD_TIMEOUT)") + + defaultQueueTimeout := os.Getenv("S3DAEMON_QUEUE_TIMEOUT") + if defaultQueueTimeout == "" { + defaultQueueTimeout = "10s" + } + queueTimeout := flag.String("queue-timeout", defaultQueueTimeout, "Queue Timeout waiting for transfer to start (S3DAEMON_QUEUE_TIMEOUT)") + + defaultUploadTries, _ := strconv.Atoi(os.Getenv("S3DAEMON_UPLOAD_TRIES")) + if defaultUploadTries == 0 { + defaultUploadTries = 1 + } + conf.uploadTries = flag.Int("upload-tries", defaultUploadTries, "Max number of upload tries (S3DAEMON_UPLOAD_TRIES)") flag.Parse() + // end flags if *conf.endpoint_url == "" { - log.Fatal("s3-endpoint-url is required") + log.Fatal("S3_ENDPOINT_URL is required") } uploadTimeoutDuration, err := time.ParseDuration(*uploadTimeout) if err != nil { - log.Fatal("upload-timeout is invalid") + log.Fatal("S3DAEMON_UPLOAD_TIMEOUT is invalid") + } + conf.uploadTimeout = &uploadTimeoutDuration + + queueTimeoutDuration, err := time.ParseDuration(*queueTimeout) + if err != nil { + log.Fatal("S3DAEMON_QUEUE_TIMEOUT is invalid") } - conf.uploadTimeout = uploadTimeoutDuration + conf.queueTimeout = &queueTimeoutDuration - log.Println("host:", *conf.host) - log.Println("port:", *conf.port) - log.Println("s3-endpoint-url:", *conf.endpoint_url) - log.Println("max-parallel-uploads:", *conf.maxParallelUploads) - log.Println("upload-timeout:", conf.uploadTimeout) + log.Println("S3DAEMON_HOST:", *conf.host) + log.Println("S3DAEMON_PORT:", *conf.port) + log.Println("S3DAEMON_ENDPOINT_URL:", *conf.endpoint_url) + log.Println("S3DAEMON_MAX_PARALLEL_UPLOADS:", *conf.maxParallelUploads) + log.Println("S3DAEMON_UPLOAD_TIMEOUT:", *conf.uploadTimeout) + log.Println("S3DAEMON_QUEUE_TIMEOUT:", *conf.queueTimeout) + log.Println("S3DAEMON_UPLOAD_TRIES:", *conf.uploadTries) return conf } @@ -193,9 +232,9 @@ func NewHandler(conf *S3DConf) *S3DHandler { httpClient := awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { t.ExpectContinueTimeout = 0 t.IdleConnTimeout = 0 - t.MaxIdleConns = 1000 - t.MaxConnsPerHost = 1000 - t.MaxIdleConnsPerHost = 1000 + t.MaxIdleConns = int(*conf.maxParallelUploads * 4) + t.MaxConnsPerHost = int(*conf.maxParallelUploads * 5) // allow for multipart upload creation + t.MaxIdleConnsPerHost = int(*conf.maxParallelUploads * 4) t.WriteBufferSize = 1024 * 1024 * 5 // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false @@ -231,9 +270,9 @@ func NewHandler(conf *S3DConf) *S3DHandler { } // Print out the list of buckets - fmt.Println("Buckets:") + log.Println("Buckets:") for _, bucket := range resp.Buckets { - fmt.Println(*bucket.Name) + log.Println(*bucket.Name) } */ @@ -255,13 +294,12 @@ func main() { http.Handle("/", handler) addr := fmt.Sprintf("%s:%d", *conf.host, *conf.port) - fmt.Println("Listening on", addr) + log.Println("Listening on", addr) err := http.ListenAndServe(addr, nil) if errors.Is(err, http.ErrServerClosed) { - fmt.Printf("server closed\n") + log.Printf("server closed\n") } else if err != nil { - fmt.Printf("error starting server: %s\n", err) - os.Exit(1) + log.Fatal("error starting server: %s\n", err) } } From 75b62099727a6a96b3e3cd6ea777b96f4dbf3656 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Wed, 13 Nov 2024 10:54:53 -0700 Subject: [PATCH 11/32] switch to github.com/hyperledger/fabric/common/semaphore golang.org/x/sync would block trying to acquire the transfer semaphore, which could result in an http status not being returned to the client until well after the queue timeout had been exceeded. --- go.mod | 4 +++- go.sum | 20 ++++++++++++++++++-- main.go | 11 ++++++----- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 9117013..2c6a65b 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/aws/aws-sdk-go-v2/config v1.28.3 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.37 github.com/aws/aws-sdk-go-v2/service/s3 v1.66.3 - golang.org/x/sync v0.9.0 + github.com/hyperledger/fabric v2.1.1+incompatible ) require ( @@ -26,4 +26,6 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.4 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 // indirect github.com/aws/smithy-go v1.22.0 // indirect + github.com/onsi/gomega v1.35.1 // indirect + github.com/stretchr/testify v1.9.0 // indirect ) diff --git a/go.sum b/go.sum index 07f8cf9..6e38cf0 100644 --- a/go.sum +++ b/go.sum @@ -36,5 +36,21 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 h1:yDxvkz3/uOKfxnv8YhzOi9m+2OGI github.com/aws/aws-sdk-go-v2/service/sts v1.32.4/go.mod h1:9XEUty5v5UAsMiFOBJrNibZgwCeOma73jgGwwhgffa8= github.com/aws/smithy-go v1.22.0 h1:uunKnWlcoL3zO7q+gG2Pk53joueEOsnNB28QdMsmiMM= github.com/aws/smithy-go v1.22.0/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= -golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= -golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/hyperledger/fabric v2.1.1+incompatible h1:cYYRv3vVg4kA6DmrixLxwn1nwBEUuYda8DsMwlaMKbY= +github.com/hyperledger/fabric v2.1.1+incompatible/go.mod h1:tGFAOCT696D3rG0Vofd2dyWYLySHlh0aQjf7Q1HAju0= +github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= +github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index 3f875c1..264a8c4 100644 --- a/main.go +++ b/main.go @@ -20,7 +20,7 @@ import ( "github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/types" - "golang.org/x/sync/semaphore" + "github.com/hyperledger/fabric/common/semaphore" ) type S3DConf struct { @@ -40,7 +40,7 @@ type S3DHandler struct { AwsConfig *aws.Config S3Client *s3.Client Uploader *manager.Uploader - ParallelUploads *semaphore.Weighted + ParallelUploads *semaphore.Semaphore } // UploadObject uses the S3 upload manager to upload an object to a bucket. @@ -137,13 +137,13 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // limit the number of parallel uploads ctx, cancel := context.WithTimeout(context.Background(), *h.Conf.queueTimeout) defer cancel() - if err := h.ParallelUploads.Acquire(ctx, 1); err != nil { + if err := h.ParallelUploads.Acquire(ctx); err != nil { w.WriteHeader(http.StatusServiceUnavailable) fmt.Fprintf(w, "error acquiring semaphore: %s\n", err) log.Printf("queue %v:%v | failed after %s: %s\n", bucket, key, time.Now().Sub(start), err) return } - defer h.ParallelUploads.Release(1) + defer h.ParallelUploads.Release() err = h.UploadFileMultipart(bucket, key, file) if err != nil { @@ -282,7 +282,8 @@ func NewHandler(conf *S3DConf) *S3DHandler { u.PartSize = 1024 * 1024 * 5 }) - handler.ParallelUploads = semaphore.NewWeighted(*conf.maxParallelUploads) + sema := semaphore.New(int(*conf.maxParallelUploads)) + handler.ParallelUploads = &sema return handler } From 773360f7be1183f182d3061b944da5a877373ec1 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Thu, 14 Nov 2024 09:32:09 -0700 Subject: [PATCH 12/32] increase max idle conns to allow for multipart create/complete --- main.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/main.go b/main.go index 264a8c4..84b19d5 100644 --- a/main.go +++ b/main.go @@ -229,12 +229,14 @@ func NewHandler(conf *S3DConf) *S3DHandler { Conf: conf, } + maxConns := int(*conf.maxParallelUploads * 5) // allow for multipart upload creation + httpClient := awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { t.ExpectContinueTimeout = 0 t.IdleConnTimeout = 0 - t.MaxIdleConns = int(*conf.maxParallelUploads * 4) - t.MaxConnsPerHost = int(*conf.maxParallelUploads * 5) // allow for multipart upload creation - t.MaxIdleConnsPerHost = int(*conf.maxParallelUploads * 4) + t.MaxIdleConns = maxConns + t.MaxConnsPerHost = maxConns + t.MaxIdleConnsPerHost = maxConns t.WriteBufferSize = 1024 * 1024 * 5 // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false From efc17d72e332d57f0c7da23be0f37d44dbcd7d76 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Thu, 14 Nov 2024 11:57:16 -0700 Subject: [PATCH 13/32] add S3DAEMON_UPLOAD_PARTSIZE --- go.mod | 6 +++++- go.sum | 51 +++++++++++++++++++++++++++++++++++++++++++++++---- main.go | 17 ++++++++++++++++- 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 2c6a65b..2f3d35d 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.37 github.com/aws/aws-sdk-go-v2/service/s3 v1.66.3 github.com/hyperledger/fabric v2.1.1+incompatible + k8s.io/apimachinery v0.31.2 ) require ( @@ -26,6 +27,9 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.4 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 // indirect github.com/aws/smithy-go v1.22.0 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect github.com/onsi/gomega v1.35.1 // indirect - github.com/stretchr/testify v1.9.0 // indirect + github.com/x448/float16 v0.8.4 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect ) diff --git a/go.sum b/go.sum index 6e38cf0..22c8cfb 100644 --- a/go.sum +++ b/go.sum @@ -36,21 +36,64 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 h1:yDxvkz3/uOKfxnv8YhzOi9m+2OGI github.com/aws/aws-sdk-go-v2/service/sts v1.32.4/go.mod h1:9XEUty5v5UAsMiFOBJrNibZgwCeOma73jgGwwhgffa8= github.com/aws/smithy-go v1.22.0 h1:uunKnWlcoL3zO7q+gG2Pk53joueEOsnNB28QdMsmiMM= github.com/aws/smithy-go v1.22.0/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/hyperledger/fabric v2.1.1+incompatible h1:cYYRv3vVg4kA6DmrixLxwn1nwBEUuYda8DsMwlaMKbY= github.com/hyperledger/fabric v2.1.1+incompatible/go.mod h1:tGFAOCT696D3rG0Vofd2dyWYLySHlh0aQjf7Q1HAju0= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/apimachinery v0.31.2 h1:i4vUt2hPK56W6mlT7Ry+AO8eEsyxMD1U44NR22CLTYw= +k8s.io/apimachinery v0.31.2/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= diff --git a/main.go b/main.go index 84b19d5..a0373be 100644 --- a/main.go +++ b/main.go @@ -21,6 +21,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/types" "github.com/hyperledger/fabric/common/semaphore" + k8sresource "k8s.io/apimachinery/pkg/api/resource" ) type S3DConf struct { @@ -33,6 +34,7 @@ type S3DConf struct { uploadTimeout *time.Duration queueTimeout *time.Duration uploadTries *int + uploadPartsize *k8sresource.Quantity } type S3DHandler struct { @@ -194,6 +196,12 @@ func getConf() S3DConf { } conf.uploadTries = flag.Int("upload-tries", defaultUploadTries, "Max number of upload tries (S3DAEMON_UPLOAD_TRIES)") + defaultUploadPartsize := os.Getenv("S3DAEMON_UPLOAD_PARTSIZE") + if defaultUploadPartsize == "" { + defaultUploadPartsize = "5Mi" + } + uploadPartsize := flag.String("upload-partsize", defaultUploadPartsize, "Upload Part Size (S3DAEMON_UPLOAD_PARTSIZE)") + flag.Parse() // end flags @@ -213,6 +221,12 @@ func getConf() S3DConf { } conf.queueTimeout = &queueTimeoutDuration + uploadPartsizeQuantity, err := k8sresource.ParseQuantity(*uploadPartsize) + if err != nil { + log.Fatal("S3DAEMON_UPLOAD_PARTSIZE is invalid") + } + conf.uploadPartsize = &uploadPartsizeQuantity + log.Println("S3DAEMON_HOST:", *conf.host) log.Println("S3DAEMON_PORT:", *conf.port) log.Println("S3DAEMON_ENDPOINT_URL:", *conf.endpoint_url) @@ -220,6 +234,7 @@ func getConf() S3DConf { log.Println("S3DAEMON_UPLOAD_TIMEOUT:", *conf.uploadTimeout) log.Println("S3DAEMON_QUEUE_TIMEOUT:", *conf.queueTimeout) log.Println("S3DAEMON_UPLOAD_TRIES:", *conf.uploadTries) + log.Println("S3DAEMON_UPLOAD_PARTSIZE:", conf.uploadPartsize.String()) return conf } @@ -281,7 +296,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { handler.Uploader = manager.NewUploader(handler.S3Client, func(u *manager.Uploader) { u.Concurrency = 1000 u.MaxUploadParts = 1000 - u.PartSize = 1024 * 1024 * 5 + u.PartSize = conf.uploadPartsize.Value() }) sema := semaphore.New(int(*conf.maxParallelUploads)) From 5abb9dcb248fa9f071faff8eabd3b8fbb01368e5 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Mon, 18 Nov 2024 10:01:30 -0700 Subject: [PATCH 14/32] decrease write buffer to 64MiB --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index a0373be..c7e5dd1 100644 --- a/main.go +++ b/main.go @@ -252,7 +252,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { t.MaxIdleConns = maxConns t.MaxConnsPerHost = maxConns t.MaxIdleConnsPerHost = maxConns - t.WriteBufferSize = 1024 * 1024 * 5 + t.WriteBufferSize = 64 * 1024 // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} From 97e93509d468c524c0b73168386aa3f13690c059 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Mon, 18 Nov 2024 10:02:22 -0700 Subject: [PATCH 15/32] disable aws retyring (aws.NopRetryer) --- main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/main.go b/main.go index c7e5dd1..99ac719 100644 --- a/main.go +++ b/main.go @@ -278,6 +278,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { handler.S3Client = s3.NewFromConfig(awsCfg, func(o *s3.Options) { o.UsePathStyle = true + o.Retryer = aws.NopRetryer{} }) /* From 52b7f54593a36f86cb57fe10de5c8bec97fdeda6 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Mon, 18 Nov 2024 10:04:24 -0700 Subject: [PATCH 16/32] explicitly enable http keep alive --- main.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main.go b/main.go index 99ac719..a3c2c15 100644 --- a/main.go +++ b/main.go @@ -7,6 +7,7 @@ import ( "fmt" "html" "log" + "net" "net/http" "net/url" "os" @@ -256,6 +257,8 @@ func NewHandler(conf *S3DConf) *S3DHandler { // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} + }).WithDialerOptions(func(d *net.Dialer) { + d.KeepAlive = 0 // 0 means enabled, -1 means disabled }) awsCfg, err := config.LoadDefaultConfig( From 144fcd246a0e1daf41ff04a6cc9c62967c0e43bc Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Mon, 18 Nov 2024 10:11:33 -0700 Subject: [PATCH 17/32] statically link docker build --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c6d5c4b..2d57a0a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ RUN apk --update --no-cache add \ && rm -rf /root/.cache WORKDIR /go/src/github.com/jhoblitt/s3daemon-go COPY . . -RUN go build && strip "$BIN" +RUN CGO_ENABLED=0 go build -ldflags "-extldflags '-static'" -o s3daemon-go && strip "$BIN" FROM alpine:3 WORKDIR /root/ From 887ade2f0168b4dcfbbcb415a976b766c46e5190 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Mon, 18 Nov 2024 12:31:21 -0700 Subject: [PATCH 18/32] add 100mbit/s upload rate limit --- go.mod | 2 ++ go.sum | 4 ++++ main.go | 9 +++++++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 2f3d35d..8bcbed2 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/aws/aws-sdk-go-v2/config v1.28.3 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.37 github.com/aws/aws-sdk-go-v2/service/s3 v1.66.3 + github.com/conduitio/bwlimit v0.1.0 github.com/hyperledger/fabric v2.1.1+incompatible k8s.io/apimachinery v0.31.2 ) @@ -31,5 +32,6 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/onsi/gomega v1.35.1 // indirect github.com/x448/float16 v0.8.4 // indirect + golang.org/x/time v0.3.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect ) diff --git a/go.sum b/go.sum index 22c8cfb..380c062 100644 --- a/go.sum +++ b/go.sum @@ -36,6 +36,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 h1:yDxvkz3/uOKfxnv8YhzOi9m+2OGI github.com/aws/aws-sdk-go-v2/service/sts v1.32.4/go.mod h1:9XEUty5v5UAsMiFOBJrNibZgwCeOma73jgGwwhgffa8= github.com/aws/smithy-go v1.22.0 h1:uunKnWlcoL3zO7q+gG2Pk53joueEOsnNB28QdMsmiMM= github.com/aws/smithy-go v1.22.0/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= +github.com/conduitio/bwlimit v0.1.0 h1:x3ijON0TSghQob4tFKaEvKixFmYKfVJQeSpXluC2JvE= +github.com/conduitio/bwlimit v0.1.0/go.mod h1:E+ASZ1/5L33MTb8hJTERs5Xnmh6Ulq3jbRh7LrdbXWU= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= @@ -83,6 +85,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= diff --git a/main.go b/main.go index a3c2c15..a40f7cf 100644 --- a/main.go +++ b/main.go @@ -21,6 +21,7 @@ import ( "github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/conduitio/bwlimit" "github.com/hyperledger/fabric/common/semaphore" k8sresource "k8s.io/apimachinery/pkg/api/resource" ) @@ -247,6 +248,11 @@ func NewHandler(conf *S3DConf) *S3DHandler { maxConns := int(*conf.maxParallelUploads * 5) // allow for multipart upload creation + dialer := bwlimit.NewDialer(&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 0, + }, 100*(bwlimit.Mebibyte/8), 0) + httpClient := awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { t.ExpectContinueTimeout = 0 t.IdleConnTimeout = 0 @@ -257,8 +263,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} - }).WithDialerOptions(func(d *net.Dialer) { - d.KeepAlive = 0 // 0 means enabled, -1 means disabled + t.DialContext = dialer.DialContext }) awsCfg, err := config.LoadDefaultConfig( From fde8fadf4a42c2decb9e559fbcf3671b36868f27 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Mon, 18 Nov 2024 12:56:09 -0700 Subject: [PATCH 19/32] add S3DAEMON_UPLOAD_BWLIMIT --- main.go | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/main.go b/main.go index a40f7cf..9c7c77a 100644 --- a/main.go +++ b/main.go @@ -37,6 +37,7 @@ type S3DConf struct { queueTimeout *time.Duration uploadTries *int uploadPartsize *k8sresource.Quantity + uploadBwlimit *k8sresource.Quantity } type S3DHandler struct { @@ -202,7 +203,13 @@ func getConf() S3DConf { if defaultUploadPartsize == "" { defaultUploadPartsize = "5Mi" } - uploadPartsize := flag.String("upload-partsize", defaultUploadPartsize, "Upload Part Size (S3DAEMON_UPLOAD_PARTSIZE)") + uploadPartsizeRaw := flag.String("upload-partsize", defaultUploadPartsize, "Upload Part Size (S3DAEMON_UPLOAD_PARTSIZE)") + + defaultUploadBwlimit := os.Getenv("S3DAEMON_UPLOAD_BWLIMIT") + if defaultUploadBwlimit == "" { + defaultUploadBwlimit = "10Mi" + } + uploadBwlimitRaw := flag.String("upload-bwlimit", defaultUploadBwlimit, "Upload bandwidth limit in bytes per second (S3DAEMON_UPLOAD_BWLIMIT)") flag.Parse() // end flags @@ -223,11 +230,17 @@ func getConf() S3DConf { } conf.queueTimeout = &queueTimeoutDuration - uploadPartsizeQuantity, err := k8sresource.ParseQuantity(*uploadPartsize) + uploadPartsize, err := k8sresource.ParseQuantity(*uploadPartsizeRaw) if err != nil { log.Fatal("S3DAEMON_UPLOAD_PARTSIZE is invalid") } - conf.uploadPartsize = &uploadPartsizeQuantity + conf.uploadPartsize = &uploadPartsize + + uploadBwlimit, err := k8sresource.ParseQuantity(*uploadBwlimitRaw) + if err != nil { + log.Fatal("S3DAEMON_UPLOAD_BWLIMIT is invalid") + } + conf.uploadBwlimit = &uploadBwlimit log.Println("S3DAEMON_HOST:", *conf.host) log.Println("S3DAEMON_PORT:", *conf.port) @@ -237,6 +250,7 @@ func getConf() S3DConf { log.Println("S3DAEMON_QUEUE_TIMEOUT:", *conf.queueTimeout) log.Println("S3DAEMON_UPLOAD_TRIES:", *conf.uploadTries) log.Println("S3DAEMON_UPLOAD_PARTSIZE:", conf.uploadPartsize.String()) + log.Println("S3DAEMON_UPLOAD_BWLIMIT:", conf.uploadBwlimit.String()) return conf } @@ -251,7 +265,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { dialer := bwlimit.NewDialer(&net.Dialer{ Timeout: 30 * time.Second, KeepAlive: 0, - }, 100*(bwlimit.Mebibyte/8), 0) + }, bwlimit.Byte(conf.uploadBwlimit.Value()), 0) httpClient := awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { t.ExpectContinueTimeout = 0 From b1de9a9cb7c43dcf2a89cfb063624edafc43a5b7 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Mon, 18 Nov 2024 16:36:35 -0700 Subject: [PATCH 20/32] pass through client request context If the client disconnects while an upload is in progress, it should be aborted. --- main.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/main.go b/main.go index 9c7c77a..42239e6 100644 --- a/main.go +++ b/main.go @@ -49,7 +49,7 @@ type S3DHandler struct { } // UploadObject uses the S3 upload manager to upload an object to a bucket. -func (h *S3DHandler) UploadFileMultipart(bucket string, key string, fileName string) error { +func (h *S3DHandler) UploadFileMultipart(ctx context.Context, bucket string, key string, fileName string) error { start := time.Now() file, err := os.Open(fileName) if err != nil { @@ -63,9 +63,9 @@ func (h *S3DHandler) UploadFileMultipart(bucket string, key string, fileName str maxAttempts := *h.Conf.uploadTries var attempt int for attempt = 1; attempt <= maxAttempts; attempt++ { - ctx, cancel := context.WithTimeout(context.TODO(), *h.Conf.uploadTimeout) + uploadCtx, cancel := context.WithTimeout(ctx, *h.Conf.uploadTimeout) defer cancel() - _, err = h.Uploader.Upload(ctx, &s3.PutObjectInput{ + _, err = h.Uploader.Upload(uploadCtx, &s3.PutObjectInput{ Bucket: aws.String(bucket), Key: aws.String(key), // Body: bytes.NewReader([]byte(data)), @@ -140,9 +140,9 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { log.Printf("queuing %v:%v | source %v\n", bucket, key, file) // limit the number of parallel uploads - ctx, cancel := context.WithTimeout(context.Background(), *h.Conf.queueTimeout) + semaCtx, cancel := context.WithTimeout(r.Context(), *h.Conf.queueTimeout) defer cancel() - if err := h.ParallelUploads.Acquire(ctx); err != nil { + if err := h.ParallelUploads.Acquire(semaCtx); err != nil { w.WriteHeader(http.StatusServiceUnavailable) fmt.Fprintf(w, "error acquiring semaphore: %s\n", err) log.Printf("queue %v:%v | failed after %s: %s\n", bucket, key, time.Now().Sub(start), err) @@ -150,7 +150,7 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } defer h.ParallelUploads.Release() - err = h.UploadFileMultipart(bucket, key, file) + err = h.UploadFileMultipart(r.Context(), bucket, key, file) if err != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "error uploading file: %s\n", err) From 89113b7008aec4e38fb51044ceb7b4a07c6678df Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Mon, 18 Nov 2024 17:46:06 -0700 Subject: [PATCH 21/32] mv http form parsing into parseRequest() --- main.go | 84 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/main.go b/main.go index 42239e6..8234c05 100644 --- a/main.go +++ b/main.go @@ -48,12 +48,19 @@ type S3DHandler struct { ParallelUploads *semaphore.Semaphore } +type S3DUploadTask struct { + uri *url.URL + bucket *string + key *string + file *string +} + // UploadObject uses the S3 upload manager to upload an object to a bucket. -func (h *S3DHandler) UploadFileMultipart(ctx context.Context, bucket string, key string, fileName string) error { +func (h *S3DHandler) UploadFileMultipart(ctx context.Context, task *S3DUploadTask) error { start := time.Now() - file, err := os.Open(fileName) + file, err := os.Open(*task.file) if err != nil { - log.Printf("upload %v:%v | Couldn't open file %v to upload because: %v\n", bucket, key, fileName, err) + log.Printf("upload %v:%v | Couldn't open file %v to upload because: %v\n", *task.bucket, *task.key, *task.file, err) return err } defer file.Close() @@ -66,20 +73,20 @@ func (h *S3DHandler) UploadFileMultipart(ctx context.Context, bucket string, key uploadCtx, cancel := context.WithTimeout(ctx, *h.Conf.uploadTimeout) defer cancel() _, err = h.Uploader.Upload(uploadCtx, &s3.PutObjectInput{ - Bucket: aws.String(bucket), - Key: aws.String(key), + Bucket: aws.String(*task.bucket), + Key: aws.String(*task.key), // Body: bytes.NewReader([]byte(data)), Body: file, }) if err != nil { var noBucket *types.NoSuchBucket if errors.As(err, &noBucket) { - log.Printf("upload %v:%v | Bucket %s does not exist.\n", bucket, key, bucket) + log.Printf("upload %v:%v | Bucket does not exist.\n", *task.bucket, *task.key) return noBucket // Don't retry if the bucket doesn't exist. } - log.Printf("upload %v:%v | failed after %s -- try %v/%v\n", bucket, key, time.Now().Sub(start), attempt, maxAttempts) - log.Printf("upload %v:%v | failed because: %v\n", bucket, key, err) + log.Printf("upload %v:%v | failed after %s -- try %v/%v\n", *task.bucket, *task.key, time.Now().Sub(start), attempt, maxAttempts) + log.Printf("upload %v:%v | failed because: %v\n", *task.bucket, *task.key, err) // bubble up the error if we've exhausted our attempts if attempt == maxAttempts { @@ -90,54 +97,54 @@ func (h *S3DHandler) UploadFileMultipart(ctx context.Context, bucket string, key } } - log.Printf("upload %v:%v | success in %s after %v/%v tries\n", bucket, key, time.Now().Sub(start), attempt, maxAttempts) + log.Printf("upload %v:%v | success in %s after %v/%v tries\n", *task.bucket, *task.key, time.Now().Sub(start), attempt, maxAttempts) return nil } -func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { - start := time.Now() - +func (h *S3DHandler) parseRequest(r *http.Request) (*S3DUploadTask, error) { file := r.PostFormValue("file") if file == "" { - w.Header().Set("x-missing-field", "file") - w.WriteHeader(http.StatusBadRequest) - return + return nil, fmt.Errorf("missing field: file") } - uri := r.PostFormValue("uri") - if uri == "" { - w.Header().Set("x-missing-field", "uri") - w.WriteHeader(http.StatusBadRequest) - return + uriRaw := r.PostFormValue("uri") + if uriRaw == "" { + return nil, fmt.Errorf("missing field: uri") } if !filepath.IsAbs(file) { - w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "Only absolute file paths are supported, %q\n", html.EscapeString(file)) - return + return nil, fmt.Errorf("Only absolute file paths are supported: %q", html.EscapeString(file)) } - u, err := url.Parse(uri) + uri, err := url.Parse(uriRaw) if err != nil { - w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "Unable to parse URI, %q\n", html.EscapeString(uri)) - return + return nil, fmt.Errorf("Unable to parse URI: %q", html.EscapeString(uriRaw)) } - if u.Scheme != "s3" { - w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "Only s3 scheme is supported, %q\n", html.EscapeString(uri)) - return + if uri.Scheme != "s3" { + return nil, fmt.Errorf("Only s3 scheme is supported: %q", html.EscapeString(uriRaw)) } - bucket := u.Host + bucket := uri.Host if bucket == "" { + return nil, fmt.Errorf("Unable to parse bucket from URI: %q", html.EscapeString(uriRaw)) + } + key := uri.Path[1:] // Remove leading slash + + return &S3DUploadTask{uri: uri, bucket: &bucket, key: &key, file: &file}, nil +} + +func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + start := time.Now() + + task, err := h.parseRequest(r) + if err != nil { + w.Header().Set("x-error", err.Error()) w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "Unable to parse bucket from URI, %q\n", html.EscapeString(uri)) + fmt.Fprintf(w, "error parsing request: %s\n", err) return } - key := u.Path[1:] // Remove leading slash - log.Printf("queuing %v:%v | source %v\n", bucket, key, file) + log.Printf("queuing %v:%v | source %v\n", *task.bucket, *task.key, *task.file) // limit the number of parallel uploads semaCtx, cancel := context.WithTimeout(r.Context(), *h.Conf.queueTimeout) @@ -145,19 +152,18 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if err := h.ParallelUploads.Acquire(semaCtx); err != nil { w.WriteHeader(http.StatusServiceUnavailable) fmt.Fprintf(w, "error acquiring semaphore: %s\n", err) - log.Printf("queue %v:%v | failed after %s: %s\n", bucket, key, time.Now().Sub(start), err) + log.Printf("queue %v:%v | failed after %s: %s\n", *task.bucket, *task.key, time.Now().Sub(start), err) return } defer h.ParallelUploads.Release() - err = h.UploadFileMultipart(r.Context(), bucket, key, file) - if err != nil { + if err := h.UploadFileMultipart(r.Context(), task); err != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "error uploading file: %s\n", err) return } - fmt.Fprintf(w, "Successful put %q\n", html.EscapeString(uri)) + fmt.Fprintf(w, "Successful put %q\n", html.EscapeString(task.uri.String())) } func getConf() S3DConf { From ee4ac10b3fccef20f52783570158f68b4e3ba2ce Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 19 Nov 2024 09:01:10 -0700 Subject: [PATCH 22/32] stop upload tries when client disconnects --- main.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 8234c05..f7b251c 100644 --- a/main.go +++ b/main.go @@ -79,13 +79,20 @@ func (h *S3DHandler) UploadFileMultipart(ctx context.Context, task *S3DUploadTas Body: file, }) if err != nil { + log.Printf("upload %v:%v | failed after %s -- try %v/%v\n", *task.bucket, *task.key, time.Now().Sub(start), attempt, maxAttempts) var noBucket *types.NoSuchBucket if errors.As(err, &noBucket) { log.Printf("upload %v:%v | Bucket does not exist.\n", *task.bucket, *task.key) - return noBucket // Don't retry if the bucket doesn't exist. + // Don't retry if the bucket doesn't exist. + return noBucket + } + + if errors.Is(err, context.Canceled) { + log.Printf("upload %v:%v | context cancelled\n", *task.bucket, *task.key) + // Don't retry if the client disconnected + return err } - log.Printf("upload %v:%v | failed after %s -- try %v/%v\n", *task.bucket, *task.key, time.Now().Sub(start), attempt, maxAttempts) log.Printf("upload %v:%v | failed because: %v\n", *task.bucket, *task.key, err) // bubble up the error if we've exhausted our attempts From 627a0bcb5740f38ff95f763778e414e18e895858 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 19 Nov 2024 10:04:56 -0700 Subject: [PATCH 23/32] convert S3DAEMON_UPLOAD_BWLIMIT to bit/s; 0 disables --- main.go | 54 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/main.go b/main.go index f7b251c..42c99a3 100644 --- a/main.go +++ b/main.go @@ -220,9 +220,9 @@ func getConf() S3DConf { defaultUploadBwlimit := os.Getenv("S3DAEMON_UPLOAD_BWLIMIT") if defaultUploadBwlimit == "" { - defaultUploadBwlimit = "10Mi" + defaultUploadBwlimit = "0" } - uploadBwlimitRaw := flag.String("upload-bwlimit", defaultUploadBwlimit, "Upload bandwidth limit in bytes per second (S3DAEMON_UPLOAD_BWLIMIT)") + uploadBwlimitRaw := flag.String("upload-bwlimit", defaultUploadBwlimit, "Upload bandwidth limit in bits per second (S3DAEMON_UPLOAD_BWLIMIT)") flag.Parse() // end flags @@ -275,23 +275,39 @@ func NewHandler(conf *S3DConf) *S3DHandler { maxConns := int(*conf.maxParallelUploads * 5) // allow for multipart upload creation - dialer := bwlimit.NewDialer(&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 0, - }, bwlimit.Byte(conf.uploadBwlimit.Value()), 0) - - httpClient := awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { - t.ExpectContinueTimeout = 0 - t.IdleConnTimeout = 0 - t.MaxIdleConns = maxConns - t.MaxConnsPerHost = maxConns - t.MaxIdleConnsPerHost = maxConns - t.WriteBufferSize = 64 * 1024 - // disable http/2 to prevent muxing over a single tcp connection - t.ForceAttemptHTTP2 = false - t.TLSClientConfig.NextProtos = []string{"http/1.1"} - t.DialContext = dialer.DialContext - }) + var httpClient *awshttp.BuildableClient + + if conf.uploadBwlimit.Value() != 0 { + dialer := bwlimit.NewDialer(&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 0, + }, bwlimit.Byte(conf.uploadBwlimit.Value()/8), 0) + + httpClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { + t.ExpectContinueTimeout = 0 + t.IdleConnTimeout = 0 + t.MaxIdleConns = maxConns + t.MaxConnsPerHost = maxConns + t.MaxIdleConnsPerHost = maxConns + t.WriteBufferSize = 64 * 1024 + // disable http/2 to prevent muxing over a single tcp connection + t.ForceAttemptHTTP2 = false + t.TLSClientConfig.NextProtos = []string{"http/1.1"} + t.DialContext = dialer.DialContext + }) + } else { + httpClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { + t.ExpectContinueTimeout = 0 + t.IdleConnTimeout = 0 + t.MaxIdleConns = maxConns + t.MaxConnsPerHost = maxConns + t.MaxIdleConnsPerHost = maxConns + t.WriteBufferSize = 64 * 1024 + // disable http/2 to prevent muxing over a single tcp connection + t.ForceAttemptHTTP2 = false + t.TLSClientConfig.NextProtos = []string{"http/1.1"} + }) + } awsCfg, err := config.LoadDefaultConfig( context.TODO(), From 1b65e168c6405a2f676e1d78774a1b6fa3e05abf Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Tue, 19 Nov 2024 11:35:45 -0700 Subject: [PATCH 24/32] add fq bwlimit as socket option and S3DAEMON_UPLOAD_BWLIMIT_INTERNAL --- go.mod | 1 + go.sum | 2 ++ main.go | 49 ++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index 8bcbed2..0a58027 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/s3 v1.66.3 github.com/conduitio/bwlimit v0.1.0 github.com/hyperledger/fabric v2.1.1+incompatible + golang.org/x/sys v0.26.0 k8s.io/apimachinery v0.31.2 ) diff --git a/go.sum b/go.sum index 380c062..52bcae1 100644 --- a/go.sum +++ b/go.sum @@ -81,6 +81,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= diff --git a/main.go b/main.go index 42c99a3..30c3942 100644 --- a/main.go +++ b/main.go @@ -13,6 +13,7 @@ import ( "os" "path/filepath" "strconv" + "syscall" "time" "github.com/aws/aws-sdk-go-v2/aws" @@ -23,6 +24,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/s3/types" "github.com/conduitio/bwlimit" "github.com/hyperledger/fabric/common/semaphore" + "golang.org/x/sys/unix" k8sresource "k8s.io/apimachinery/pkg/api/resource" ) @@ -32,12 +34,13 @@ type S3DConf struct { endpoint_url *string // access_key *string // secret_key *string - maxParallelUploads *int64 - uploadTimeout *time.Duration - queueTimeout *time.Duration - uploadTries *int - uploadPartsize *k8sresource.Quantity - uploadBwlimit *k8sresource.Quantity + maxParallelUploads *int64 + uploadTimeout *time.Duration + queueTimeout *time.Duration + uploadTries *int + uploadPartsize *k8sresource.Quantity + uploadBwlimit *k8sresource.Quantity + uploadBwlimitInteral bool } type S3DHandler struct { @@ -224,6 +227,9 @@ func getConf() S3DConf { } uploadBwlimitRaw := flag.String("upload-bwlimit", defaultUploadBwlimit, "Upload bandwidth limit in bits per second (S3DAEMON_UPLOAD_BWLIMIT)") + defaultUploadBwlimitInternal, _ := strconv.ParseBool(os.Getenv("S3DAEMON_UPLOAD_BWLIMIT_INTERNAL")) + uploadBwlimitInternal := flag.Bool("upload-bwlimit-internal", defaultUploadBwlimitInternal, "Use internal tcp pacing instead of fq (S3DAEMON_UPLOAD_BWLIMIT_INTERNAL)") + flag.Parse() // end flags @@ -255,6 +261,8 @@ func getConf() S3DConf { } conf.uploadBwlimit = &uploadBwlimit + conf.uploadBwlimitInteral = *uploadBwlimitInternal + log.Println("S3DAEMON_HOST:", *conf.host) log.Println("S3DAEMON_PORT:", *conf.port) log.Println("S3DAEMON_ENDPOINT_URL:", *conf.endpoint_url) @@ -264,6 +272,7 @@ func getConf() S3DConf { log.Println("S3DAEMON_UPLOAD_TRIES:", *conf.uploadTries) log.Println("S3DAEMON_UPLOAD_PARTSIZE:", conf.uploadPartsize.String()) log.Println("S3DAEMON_UPLOAD_BWLIMIT:", conf.uploadBwlimit.String()) + log.Println("S3DAEMON_UPLOAD_BWLIMIT_INTERNAL:", conf.uploadBwlimitInteral) return conf } @@ -278,10 +287,28 @@ func NewHandler(conf *S3DConf) *S3DHandler { var httpClient *awshttp.BuildableClient if conf.uploadBwlimit.Value() != 0 { - dialer := bwlimit.NewDialer(&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 0, - }, bwlimit.Byte(conf.uploadBwlimit.Value()/8), 0) + var dialCtx func(ctx context.Context, network, address string) (net.Conn, error) + + if conf.uploadBwlimitInteral { + dialCtx = bwlimit.NewDialer(&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 0, + }, bwlimit.Byte(conf.uploadBwlimit.Value()/8), 0).DialContext + } else { + dialer := &net.Dialer{ + Control: func(network, address string, conn syscall.RawConn) error { + // https://pkg.go.dev/syscall#RawConn + var operr error + if err := conn.Control(func(fd uintptr) { + operr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_MAX_PACING_RATE, int(conf.uploadBwlimit.Value()/8)) + }); err != nil { + return err + } + return operr + }, + } + dialCtx = dialer.DialContext + } httpClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { t.ExpectContinueTimeout = 0 @@ -293,7 +320,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} - t.DialContext = dialer.DialContext + t.DialContext = dialCtx }) } else { httpClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { From 28120ee91e33115af3d39a829679c9d193b98a53 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Thu, 21 Nov 2024 11:33:07 -0700 Subject: [PATCH 25/32] add S3DAEMON_UPLOAD_WRITE_BUFFER_SIZE --- main.go | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/main.go b/main.go index 30c3942..c77c2f5 100644 --- a/main.go +++ b/main.go @@ -34,13 +34,14 @@ type S3DConf struct { endpoint_url *string // access_key *string // secret_key *string - maxParallelUploads *int64 - uploadTimeout *time.Duration - queueTimeout *time.Duration - uploadTries *int - uploadPartsize *k8sresource.Quantity - uploadBwlimit *k8sresource.Quantity - uploadBwlimitInteral bool + maxParallelUploads *int64 + uploadTimeout *time.Duration + queueTimeout *time.Duration + uploadTries *int + uploadPartsize *k8sresource.Quantity + uploadBwlimit *k8sresource.Quantity + uploadBwlimitInteral bool + uploadWriteBufferSize *k8sresource.Quantity } type S3DHandler struct { @@ -230,6 +231,12 @@ func getConf() S3DConf { defaultUploadBwlimitInternal, _ := strconv.ParseBool(os.Getenv("S3DAEMON_UPLOAD_BWLIMIT_INTERNAL")) uploadBwlimitInternal := flag.Bool("upload-bwlimit-internal", defaultUploadBwlimitInternal, "Use internal tcp pacing instead of fq (S3DAEMON_UPLOAD_BWLIMIT_INTERNAL)") + defaultUploadWriteBufferSize := os.Getenv("S3DAEMON_UPLOAD_WRITE_BUFFER_SIZE") + if defaultUploadWriteBufferSize == "" { + defaultUploadWriteBufferSize = "64Ki" + } + uploadWriteBufferSizeRaw := flag.String("upload-write-buffer-size", defaultUploadWriteBufferSize, "Upload Write Buffer Size (S3DAEMON_UPLOAD_WRITE_BUFFER_SIZE)") + flag.Parse() // end flags @@ -263,6 +270,12 @@ func getConf() S3DConf { conf.uploadBwlimitInteral = *uploadBwlimitInternal + uploadWriteBufferSize, err := k8sresource.ParseQuantity(*uploadWriteBufferSizeRaw) + if err != nil { + log.Fatal("S3DAEMON_UPLOAD_WRITE_BUFFER_SIZE is invalid") + } + conf.uploadWriteBufferSize = &uploadWriteBufferSize + log.Println("S3DAEMON_HOST:", *conf.host) log.Println("S3DAEMON_PORT:", *conf.port) log.Println("S3DAEMON_ENDPOINT_URL:", *conf.endpoint_url) @@ -273,6 +286,7 @@ func getConf() S3DConf { log.Println("S3DAEMON_UPLOAD_PARTSIZE:", conf.uploadPartsize.String()) log.Println("S3DAEMON_UPLOAD_BWLIMIT:", conf.uploadBwlimit.String()) log.Println("S3DAEMON_UPLOAD_BWLIMIT_INTERNAL:", conf.uploadBwlimitInteral) + log.Println("S3DAEMON_UPLOAD_WRITE_BUFFER_SIZE:", conf.uploadWriteBufferSize.String()) return conf } @@ -316,7 +330,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { t.MaxIdleConns = maxConns t.MaxConnsPerHost = maxConns t.MaxIdleConnsPerHost = maxConns - t.WriteBufferSize = 64 * 1024 + t.WriteBufferSize = int(conf.uploadWriteBufferSize.Value()) // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} @@ -329,7 +343,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { t.MaxIdleConns = maxConns t.MaxConnsPerHost = maxConns t.MaxIdleConnsPerHost = maxConns - t.WriteBufferSize = 64 * 1024 + t.WriteBufferSize = int(conf.uploadWriteBufferSize.Value()) // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} From 1b2ba2373ecf65f1d2d4470289148451b56a40c3 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Fri, 22 Nov 2024 11:40:07 -0700 Subject: [PATCH 26/32] rm commented out code and stale comments --- main.go | 36 +++++------------------------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/main.go b/main.go index c77c2f5..411363a 100644 --- a/main.go +++ b/main.go @@ -29,11 +29,9 @@ import ( ) type S3DConf struct { - host *string - port *int - endpoint_url *string - // access_key *string - // secret_key *string + host *string + port *int + endpoint_url *string maxParallelUploads *int64 uploadTimeout *time.Duration queueTimeout *time.Duration @@ -59,7 +57,6 @@ type S3DUploadTask struct { file *string } -// UploadObject uses the S3 upload manager to upload an object to a bucket. func (h *S3DHandler) UploadFileMultipart(ctx context.Context, task *S3DUploadTask) error { start := time.Now() file, err := os.Open(*task.file) @@ -68,8 +65,6 @@ func (h *S3DHandler) UploadFileMultipart(ctx context.Context, task *S3DUploadTas return err } defer file.Close() - // data, err := ioutil.ReadFile(fileName) - // log.Printf("slurped %v:%v in %s\n", bucket, key, time.Now().Sub(start)) maxAttempts := *h.Conf.uploadTries var attempt int @@ -79,8 +74,7 @@ func (h *S3DHandler) UploadFileMultipart(ctx context.Context, task *S3DUploadTas _, err = h.Uploader.Upload(uploadCtx, &s3.PutObjectInput{ Bucket: aws.String(*task.bucket), Key: aws.String(*task.key), - // Body: bytes.NewReader([]byte(data)), - Body: file, + Body: file, }) if err != nil { log.Printf("upload %v:%v | failed after %s -- try %v/%v\n", *task.bucket, *task.key, time.Now().Sub(start), attempt, maxAttempts) @@ -354,13 +348,6 @@ func NewHandler(conf *S3DConf) *S3DHandler { context.TODO(), config.WithBaseEndpoint(*conf.endpoint_url), config.WithHTTPClient(httpClient), - // config.WithRetryer(func() aws.Retryer { - // return retry.NewStandard(func(o *retry.StandardOptions) { - // o.MaxAttempts = 10 - // o.MaxBackoff = time.Millisecond * 500 - // o.RateLimiter = ratelimit.None - // }) - // }), ) if err != nil { log.Fatal(err) @@ -370,22 +357,9 @@ func NewHandler(conf *S3DConf) *S3DHandler { handler.S3Client = s3.NewFromConfig(awsCfg, func(o *s3.Options) { o.UsePathStyle = true - o.Retryer = aws.NopRetryer{} + o.Retryer = aws.NopRetryer{} // we handle retries ourselves }) - /* - resp, err := s3Client.ListBuckets(context.TODO(), nil) - if err != nil { - log.Fatal(err) - } - - // Print out the list of buckets - log.Println("Buckets:") - for _, bucket := range resp.Buckets { - log.Println(*bucket.Name) - } - */ - handler.Uploader = manager.NewUploader(handler.S3Client, func(u *manager.Uploader) { u.Concurrency = 1000 u.MaxUploadParts = 1000 From 70fb4b1b7f08bec837518526dbdfae3292b56b81 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Fri, 22 Nov 2024 11:40:56 -0700 Subject: [PATCH 27/32] add Makefile for static build --- Makefile | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..dcaf729 --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +all: + CGO_ENABLED=0 go build -ldflags "-extldflags '-static'" -o s3daemon-go From 37021b5fd7f2b59eb4ac7fb4eccb82d584da7f57 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Fri, 22 Nov 2024 12:17:36 -0700 Subject: [PATCH 28/32] rename s3daemon-go -> s3nd --- .gitignore | 2 +- Dockerfile | 10 ++--- Makefile | 2 +- go.mod | 2 +- main.go | 116 ++++++++++++++++++++++++++--------------------------- 5 files changed, 66 insertions(+), 66 deletions(-) diff --git a/.gitignore b/.gitignore index 6555f62..08e7509 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -/s3daemon-go +/s3nd diff --git a/Dockerfile b/Dockerfile index 2d57a0a..95dc57d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,14 @@ FROM golang:1.22-alpine as builder -ARG BIN=s3daemon-go +ARG BIN=s3nd RUN apk --update --no-cache add \ binutils \ && rm -rf /root/.cache -WORKDIR /go/src/github.com/jhoblitt/s3daemon-go +WORKDIR /go/src/github.com/lsst-dm/s3nd COPY . . -RUN CGO_ENABLED=0 go build -ldflags "-extldflags '-static'" -o s3daemon-go && strip "$BIN" +RUN CGO_ENABLED=0 go build -ldflags "-extldflags '-static'" -o s3nd && strip "$BIN" FROM alpine:3 WORKDIR /root/ -COPY --from=builder /go/src/github.com/jhoblitt/s3daemon-go/$BIN /bin/$BIN -ENTRYPOINT ["/bin/s3daemon-go"] +COPY --from=builder /go/src/github.com/lsst-dm/s3nd/$BIN /bin/$BIN +ENTRYPOINT ["/bin/s3nd"] diff --git a/Makefile b/Makefile index dcaf729..1692a9d 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,2 @@ all: - CGO_ENABLED=0 go build -ldflags "-extldflags '-static'" -o s3daemon-go + CGO_ENABLED=0 go build -ldflags "-extldflags '-static'" -o s3nd diff --git a/go.mod b/go.mod index 0a58027..9cc1ab2 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/lsst-dm/s3daemon-go +module github.com/lsst-dm/s3nd go 1.22.7 diff --git a/main.go b/main.go index 411363a..c540dd7 100644 --- a/main.go +++ b/main.go @@ -28,11 +28,11 @@ import ( k8sresource "k8s.io/apimachinery/pkg/api/resource" ) -type S3DConf struct { +type S3ndConf struct { host *string port *int - endpoint_url *string - maxParallelUploads *int64 + endpointUrl *string + uploadMaxParallel *int64 uploadTimeout *time.Duration queueTimeout *time.Duration uploadTries *int @@ -42,22 +42,22 @@ type S3DConf struct { uploadWriteBufferSize *k8sresource.Quantity } -type S3DHandler struct { - Conf *S3DConf +type S3ndHandler struct { + Conf *S3ndConf AwsConfig *aws.Config S3Client *s3.Client Uploader *manager.Uploader ParallelUploads *semaphore.Semaphore } -type S3DUploadTask struct { +type S3ndUploadTask struct { uri *url.URL bucket *string key *string file *string } -func (h *S3DHandler) UploadFileMultipart(ctx context.Context, task *S3DUploadTask) error { +func (h *S3ndHandler) UploadFileMultipart(ctx context.Context, task *S3ndUploadTask) error { start := time.Now() file, err := os.Open(*task.file) if err != nil { @@ -106,7 +106,7 @@ func (h *S3DHandler) UploadFileMultipart(ctx context.Context, task *S3DUploadTas return nil } -func (h *S3DHandler) parseRequest(r *http.Request) (*S3DUploadTask, error) { +func (h *S3ndHandler) parseRequest(r *http.Request) (*S3ndUploadTask, error) { file := r.PostFormValue("file") if file == "" { return nil, fmt.Errorf("missing field: file") @@ -135,10 +135,10 @@ func (h *S3DHandler) parseRequest(r *http.Request) (*S3DUploadTask, error) { } key := uri.Path[1:] // Remove leading slash - return &S3DUploadTask{uri: uri, bucket: &bucket, key: &key, file: &file}, nil + return &S3ndUploadTask{uri: uri, bucket: &bucket, key: &key, file: &file}, nil } -func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { +func (h *S3ndHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { start := time.Now() task, err := h.parseRequest(r) @@ -171,94 +171,94 @@ func (h *S3DHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { fmt.Fprintf(w, "Successful put %q\n", html.EscapeString(task.uri.String())) } -func getConf() S3DConf { - conf := S3DConf{} +func getConf() S3ndConf { + conf := S3ndConf{} // start flags - conf.host = flag.String("host", os.Getenv("S3DAEMON_HOST"), "S3 Daemon Host (S3DAEMON_HOST)") + conf.host = flag.String("host", os.Getenv("S3ND_HOST"), "S3 Daemon Host (S3ND_HOST)") - defaultPort, _ := strconv.Atoi(os.Getenv("S3DAEMON_PORT")) + defaultPort, _ := strconv.Atoi(os.Getenv("S3ND_PORT")) if defaultPort == 0 { defaultPort = 15555 } - conf.port = flag.Int("port", defaultPort, "S3 Daemon Port (S3DAEMON_PORT)") + conf.port = flag.Int("port", defaultPort, "S3 Daemon Port (S3ND_PORT)") - conf.endpoint_url = flag.String("s3-endpoint-url", os.Getenv("S3_ENDPOINT_URL"), "S3 Endpoint URL (S3_ENDPOINT_URL)") + conf.endpointUrl = flag.String("endpoint-url", os.Getenv("S3ND_ENDPOINT_URL"), "S3 Endpoint URL (S3ND_ENDPOINT_URL)") - var defaultMaxParallelUploads int64 - defaultMaxParallelUploads, _ = strconv.ParseInt(os.Getenv("S3DAEMON_MAX_PARALLEL_UPLOADS"), 10, 64) - if defaultMaxParallelUploads == 0 { - defaultMaxParallelUploads = 100 + var defaultUploadMaxParallel int64 + defaultUploadMaxParallel, _ = strconv.ParseInt(os.Getenv("S3ND_UPLOAD_MAX_PARALLEL"), 10, 64) + if defaultUploadMaxParallel == 0 { + defaultUploadMaxParallel = 100 } - conf.maxParallelUploads = flag.Int64("max-parallel-uploads", defaultMaxParallelUploads, "Max Parallel Uploads (S3DAEMON_MAX_PARALLEL_UPLOADS)") + conf.uploadMaxParallel = flag.Int64("upload-max-parallel", defaultUploadMaxParallel, "Maximum number of parallel object uploads (S3ND_UPLOAD_MAX_PARALLEL)") - defaultUploadTimeout := os.Getenv("S3DAEMON_UPLOAD_TIMEOUT") + defaultUploadTimeout := os.Getenv("S3ND_UPLOAD_TIMEOUT") if defaultUploadTimeout == "" { defaultUploadTimeout = "10s" } - uploadTimeout := flag.String("upload-timeout", defaultUploadTimeout, "Upload Timeout (S3DAEMON_UPLOAD_TIMEOUT)") + uploadTimeout := flag.String("upload-timeout", defaultUploadTimeout, "Upload Timeout (S3ND_UPLOAD_TIMEOUT)") - defaultQueueTimeout := os.Getenv("S3DAEMON_QUEUE_TIMEOUT") + defaultQueueTimeout := os.Getenv("S3ND_QUEUE_TIMEOUT") if defaultQueueTimeout == "" { defaultQueueTimeout = "10s" } - queueTimeout := flag.String("queue-timeout", defaultQueueTimeout, "Queue Timeout waiting for transfer to start (S3DAEMON_QUEUE_TIMEOUT)") + queueTimeout := flag.String("queue-timeout", defaultQueueTimeout, "Queue Timeout waiting for transfer to start (S3ND_QUEUE_TIMEOUT)") - defaultUploadTries, _ := strconv.Atoi(os.Getenv("S3DAEMON_UPLOAD_TRIES")) + defaultUploadTries, _ := strconv.Atoi(os.Getenv("S3ND_UPLOAD_TRIES")) if defaultUploadTries == 0 { defaultUploadTries = 1 } - conf.uploadTries = flag.Int("upload-tries", defaultUploadTries, "Max number of upload tries (S3DAEMON_UPLOAD_TRIES)") + conf.uploadTries = flag.Int("upload-tries", defaultUploadTries, "Max number of upload tries (S3ND_UPLOAD_TRIES)") - defaultUploadPartsize := os.Getenv("S3DAEMON_UPLOAD_PARTSIZE") + defaultUploadPartsize := os.Getenv("S3ND_UPLOAD_PARTSIZE") if defaultUploadPartsize == "" { defaultUploadPartsize = "5Mi" } - uploadPartsizeRaw := flag.String("upload-partsize", defaultUploadPartsize, "Upload Part Size (S3DAEMON_UPLOAD_PARTSIZE)") + uploadPartsizeRaw := flag.String("upload-partsize", defaultUploadPartsize, "Upload Part Size (S3ND_UPLOAD_PARTSIZE)") - defaultUploadBwlimit := os.Getenv("S3DAEMON_UPLOAD_BWLIMIT") + defaultUploadBwlimit := os.Getenv("S3ND_UPLOAD_BWLIMIT") if defaultUploadBwlimit == "" { defaultUploadBwlimit = "0" } - uploadBwlimitRaw := flag.String("upload-bwlimit", defaultUploadBwlimit, "Upload bandwidth limit in bits per second (S3DAEMON_UPLOAD_BWLIMIT)") + uploadBwlimitRaw := flag.String("upload-bwlimit", defaultUploadBwlimit, "Upload bandwidth limit in bits per second (S3ND_UPLOAD_BWLIMIT)") - defaultUploadBwlimitInternal, _ := strconv.ParseBool(os.Getenv("S3DAEMON_UPLOAD_BWLIMIT_INTERNAL")) - uploadBwlimitInternal := flag.Bool("upload-bwlimit-internal", defaultUploadBwlimitInternal, "Use internal tcp pacing instead of fq (S3DAEMON_UPLOAD_BWLIMIT_INTERNAL)") + defaultUploadBwlimitInternal, _ := strconv.ParseBool(os.Getenv("S3ND_UPLOAD_BWLIMIT_INTERNAL")) + uploadBwlimitInternal := flag.Bool("upload-bwlimit-internal", defaultUploadBwlimitInternal, "Use internal tcp pacing instead of fq (S3ND_UPLOAD_BWLIMIT_INTERNAL)") - defaultUploadWriteBufferSize := os.Getenv("S3DAEMON_UPLOAD_WRITE_BUFFER_SIZE") + defaultUploadWriteBufferSize := os.Getenv("S3ND_UPLOAD_WRITE_BUFFER_SIZE") if defaultUploadWriteBufferSize == "" { defaultUploadWriteBufferSize = "64Ki" } - uploadWriteBufferSizeRaw := flag.String("upload-write-buffer-size", defaultUploadWriteBufferSize, "Upload Write Buffer Size (S3DAEMON_UPLOAD_WRITE_BUFFER_SIZE)") + uploadWriteBufferSizeRaw := flag.String("upload-write-buffer-size", defaultUploadWriteBufferSize, "Upload Write Buffer Size (S3ND_UPLOAD_WRITE_BUFFER_SIZE)") flag.Parse() // end flags - if *conf.endpoint_url == "" { - log.Fatal("S3_ENDPOINT_URL is required") + if *conf.endpointUrl == "" { + log.Fatal("S3ND_ENDPOINT_URL is required") } uploadTimeoutDuration, err := time.ParseDuration(*uploadTimeout) if err != nil { - log.Fatal("S3DAEMON_UPLOAD_TIMEOUT is invalid") + log.Fatal("S3ND_UPLOAD_TIMEOUT is invalid") } conf.uploadTimeout = &uploadTimeoutDuration queueTimeoutDuration, err := time.ParseDuration(*queueTimeout) if err != nil { - log.Fatal("S3DAEMON_QUEUE_TIMEOUT is invalid") + log.Fatal("S3ND_QUEUE_TIMEOUT is invalid") } conf.queueTimeout = &queueTimeoutDuration uploadPartsize, err := k8sresource.ParseQuantity(*uploadPartsizeRaw) if err != nil { - log.Fatal("S3DAEMON_UPLOAD_PARTSIZE is invalid") + log.Fatal("S3ND_UPLOAD_PARTSIZE is invalid") } conf.uploadPartsize = &uploadPartsize uploadBwlimit, err := k8sresource.ParseQuantity(*uploadBwlimitRaw) if err != nil { - log.Fatal("S3DAEMON_UPLOAD_BWLIMIT is invalid") + log.Fatal("S3ND_UPLOAD_BWLIMIT is invalid") } conf.uploadBwlimit = &uploadBwlimit @@ -266,31 +266,31 @@ func getConf() S3DConf { uploadWriteBufferSize, err := k8sresource.ParseQuantity(*uploadWriteBufferSizeRaw) if err != nil { - log.Fatal("S3DAEMON_UPLOAD_WRITE_BUFFER_SIZE is invalid") + log.Fatal("S3ND_UPLOAD_WRITE_BUFFER_SIZE is invalid") } conf.uploadWriteBufferSize = &uploadWriteBufferSize - log.Println("S3DAEMON_HOST:", *conf.host) - log.Println("S3DAEMON_PORT:", *conf.port) - log.Println("S3DAEMON_ENDPOINT_URL:", *conf.endpoint_url) - log.Println("S3DAEMON_MAX_PARALLEL_UPLOADS:", *conf.maxParallelUploads) - log.Println("S3DAEMON_UPLOAD_TIMEOUT:", *conf.uploadTimeout) - log.Println("S3DAEMON_QUEUE_TIMEOUT:", *conf.queueTimeout) - log.Println("S3DAEMON_UPLOAD_TRIES:", *conf.uploadTries) - log.Println("S3DAEMON_UPLOAD_PARTSIZE:", conf.uploadPartsize.String()) - log.Println("S3DAEMON_UPLOAD_BWLIMIT:", conf.uploadBwlimit.String()) - log.Println("S3DAEMON_UPLOAD_BWLIMIT_INTERNAL:", conf.uploadBwlimitInteral) - log.Println("S3DAEMON_UPLOAD_WRITE_BUFFER_SIZE:", conf.uploadWriteBufferSize.String()) + log.Println("S3ND_HOST:", *conf.host) + log.Println("S3ND_PORT:", *conf.port) + log.Println("S3ND_ENDPOINT_URL:", *conf.endpointUrl) + log.Println("S3ND_UPLOAD_MAX_PARALLEL:", *conf.uploadMaxParallel) + log.Println("S3ND_UPLOAD_TIMEOUT:", *conf.uploadTimeout) + log.Println("S3ND_QUEUE_TIMEOUT:", *conf.queueTimeout) + log.Println("S3ND_UPLOAD_TRIES:", *conf.uploadTries) + log.Println("S3ND_UPLOAD_PARTSIZE:", conf.uploadPartsize.String()) + log.Println("S3ND_UPLOAD_BWLIMIT:", conf.uploadBwlimit.String()) + log.Println("S3ND_UPLOAD_BWLIMIT_INTERNAL:", conf.uploadBwlimitInteral) + log.Println("S3ND_UPLOAD_WRITE_BUFFER_SIZE:", conf.uploadWriteBufferSize.String()) return conf } -func NewHandler(conf *S3DConf) *S3DHandler { - handler := &S3DHandler{ +func NewHandler(conf *S3ndConf) *S3ndHandler { + handler := &S3ndHandler{ Conf: conf, } - maxConns := int(*conf.maxParallelUploads * 5) // allow for multipart upload creation + maxConns := int(*conf.uploadMaxParallel * 5) // allow for multipart upload creation var httpClient *awshttp.BuildableClient @@ -346,7 +346,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { awsCfg, err := config.LoadDefaultConfig( context.TODO(), - config.WithBaseEndpoint(*conf.endpoint_url), + config.WithBaseEndpoint(*conf.endpointUrl), config.WithHTTPClient(httpClient), ) if err != nil { @@ -366,7 +366,7 @@ func NewHandler(conf *S3DConf) *S3DHandler { u.PartSize = conf.uploadPartsize.Value() }) - sema := semaphore.New(int(*conf.maxParallelUploads)) + sema := semaphore.New(int(*conf.uploadMaxParallel)) handler.ParallelUploads = &sema return handler From 09ee251a36eb857a3c3e0384314b85f222465e7b Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Fri, 22 Nov 2024 12:24:34 -0700 Subject: [PATCH 29/32] add gha ci workflow --- .github/workflows/ci.yaml | 138 ++++++++++++++++++++++++++++++++++++++ .hadolint.yaml | 4 ++ .yamllint.yaml | 17 +++++ Dockerfile | 7 +- main.go | 2 +- 5 files changed, 163 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/ci.yaml create mode 100644 .hadolint.yaml create mode 100644 .yamllint.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..3694010 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,138 @@ +--- +name: Build and Publish + +"on": + push: + branches: + - "**" + tags: + - "v*.*.*" + pull_request: + branches: + - main + - master + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.22" + + - name: Build + run: go build -v ./... + + - name: Vet + run: go vet -v ./... + + - name: Test + run: go test -v ./... + + hadolint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: hadolint/hadolint-action@v3.1.0 + with: + dockerfile: Dockerfile + + yamllint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run yamllint + uses: bewuethr/yamllint-action@v1 + + shellcheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run ShellCheck + uses: ludeeus/action-shellcheck@master + + oci_image: + name: Build OCI Image + needs: build + if: github.repository == 'lsst-dm/s3nd' + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=schedule + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + gh-release: + name: Create GitHub Release + needs: build + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Release + uses: softprops/action-gh-release@v2 + + go-release: + name: Release Go Binaries + needs: gh-release + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + strategy: + matrix: + goos: [linux, darwin] + goarch: [amd64, arm64] + steps: + - uses: actions/checkout@v4 + - uses: wangyoucao577/go-release-action@v1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + goos: ${{ matrix.goos }} + goarch: ${{ matrix.goarch }} + goversion: "1.22" + asset_name: '${{ github.event.repository.name }}-${{ matrix.goos }}-${{ matrix.goarch }}' diff --git a/.hadolint.yaml b/.hadolint.yaml new file mode 100644 index 0000000..430835c --- /dev/null +++ b/.hadolint.yaml @@ -0,0 +1,4 @@ +--- +ignored: + # disable pinning apk package versions + - DL3018 diff --git a/.yamllint.yaml b/.yamllint.yaml new file mode 100644 index 0000000..8074ccb --- /dev/null +++ b/.yamllint.yaml @@ -0,0 +1,17 @@ +--- +extends: "default" + +rules: + # 80 chars should be enough, but don't fail if a line is longer + line-length: false + indentation: + level: "error" + spaces: 2 + indent-sequences: true + # do not obsess over comment formatting + comments-indentation: false + comments: + level: "error" + require-starting-space: false + document-start: + level: "error" diff --git a/Dockerfile b/Dockerfile index 95dc57d..33ba332 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,13 @@ -FROM golang:1.22-alpine as builder +FROM golang:1.22-alpine AS builder -ARG BIN=s3nd RUN apk --update --no-cache add \ binutils \ && rm -rf /root/.cache WORKDIR /go/src/github.com/lsst-dm/s3nd COPY . . -RUN CGO_ENABLED=0 go build -ldflags "-extldflags '-static'" -o s3nd && strip "$BIN" +RUN CGO_ENABLED=0 go build -ldflags "-extldflags '-static'" -o s3nd && strip s3nd FROM alpine:3 WORKDIR /root/ -COPY --from=builder /go/src/github.com/lsst-dm/s3nd/$BIN /bin/$BIN +COPY --from=builder /go/src/github.com/lsst-dm/s3nd/s3nd /bin/s3nd ENTRYPOINT ["/bin/s3nd"] diff --git a/main.go b/main.go index c540dd7..ad3464e 100644 --- a/main.go +++ b/main.go @@ -385,6 +385,6 @@ func main() { if errors.Is(err, http.ErrServerClosed) { log.Printf("server closed\n") } else if err != nil { - log.Fatal("error starting server: %s\n", err) + log.Fatalf("error starting server: %s\n", err) } } From 06e0b70e9ad24a2618e117f05cfba11d56d69bc6 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Fri, 22 Nov 2024 15:03:10 -0700 Subject: [PATCH 30/32] rm S3ND_UPLOAD_BWLIMIT_INTERNAL & github.com/conduitio/bwlimit github.com/conduitio/bwlimit's approach to trying to time writes to the socket proved to be fairly inaccurate in terms of actual data rate and seems inferior to SO_MAX_PACING_RATE. --- go.mod | 2 -- go.sum | 4 ---- main.go | 42 ++++++++++++------------------------------ 3 files changed, 12 insertions(+), 36 deletions(-) diff --git a/go.mod b/go.mod index 9cc1ab2..7186fe5 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,6 @@ require ( github.com/aws/aws-sdk-go-v2/config v1.28.3 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.37 github.com/aws/aws-sdk-go-v2/service/s3 v1.66.3 - github.com/conduitio/bwlimit v0.1.0 github.com/hyperledger/fabric v2.1.1+incompatible golang.org/x/sys v0.26.0 k8s.io/apimachinery v0.31.2 @@ -33,6 +32,5 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/onsi/gomega v1.35.1 // indirect github.com/x448/float16 v0.8.4 // indirect - golang.org/x/time v0.3.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect ) diff --git a/go.sum b/go.sum index 52bcae1..babecfa 100644 --- a/go.sum +++ b/go.sum @@ -36,8 +36,6 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 h1:yDxvkz3/uOKfxnv8YhzOi9m+2OGI github.com/aws/aws-sdk-go-v2/service/sts v1.32.4/go.mod h1:9XEUty5v5UAsMiFOBJrNibZgwCeOma73jgGwwhgffa8= github.com/aws/smithy-go v1.22.0 h1:uunKnWlcoL3zO7q+gG2Pk53joueEOsnNB28QdMsmiMM= github.com/aws/smithy-go v1.22.0/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= -github.com/conduitio/bwlimit v0.1.0 h1:x3ijON0TSghQob4tFKaEvKixFmYKfVJQeSpXluC2JvE= -github.com/conduitio/bwlimit v0.1.0/go.mod h1:E+ASZ1/5L33MTb8hJTERs5Xnmh6Ulq3jbRh7LrdbXWU= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= @@ -87,8 +85,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= diff --git a/main.go b/main.go index ad3464e..1b9dee7 100644 --- a/main.go +++ b/main.go @@ -22,7 +22,6 @@ import ( "github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/types" - "github.com/conduitio/bwlimit" "github.com/hyperledger/fabric/common/semaphore" "golang.org/x/sys/unix" k8sresource "k8s.io/apimachinery/pkg/api/resource" @@ -38,7 +37,6 @@ type S3ndConf struct { uploadTries *int uploadPartsize *k8sresource.Quantity uploadBwlimit *k8sresource.Quantity - uploadBwlimitInteral bool uploadWriteBufferSize *k8sresource.Quantity } @@ -222,9 +220,6 @@ func getConf() S3ndConf { } uploadBwlimitRaw := flag.String("upload-bwlimit", defaultUploadBwlimit, "Upload bandwidth limit in bits per second (S3ND_UPLOAD_BWLIMIT)") - defaultUploadBwlimitInternal, _ := strconv.ParseBool(os.Getenv("S3ND_UPLOAD_BWLIMIT_INTERNAL")) - uploadBwlimitInternal := flag.Bool("upload-bwlimit-internal", defaultUploadBwlimitInternal, "Use internal tcp pacing instead of fq (S3ND_UPLOAD_BWLIMIT_INTERNAL)") - defaultUploadWriteBufferSize := os.Getenv("S3ND_UPLOAD_WRITE_BUFFER_SIZE") if defaultUploadWriteBufferSize == "" { defaultUploadWriteBufferSize = "64Ki" @@ -262,8 +257,6 @@ func getConf() S3ndConf { } conf.uploadBwlimit = &uploadBwlimit - conf.uploadBwlimitInteral = *uploadBwlimitInternal - uploadWriteBufferSize, err := k8sresource.ParseQuantity(*uploadWriteBufferSizeRaw) if err != nil { log.Fatal("S3ND_UPLOAD_WRITE_BUFFER_SIZE is invalid") @@ -279,7 +272,6 @@ func getConf() S3ndConf { log.Println("S3ND_UPLOAD_TRIES:", *conf.uploadTries) log.Println("S3ND_UPLOAD_PARTSIZE:", conf.uploadPartsize.String()) log.Println("S3ND_UPLOAD_BWLIMIT:", conf.uploadBwlimit.String()) - log.Println("S3ND_UPLOAD_BWLIMIT_INTERNAL:", conf.uploadBwlimitInteral) log.Println("S3ND_UPLOAD_WRITE_BUFFER_SIZE:", conf.uploadWriteBufferSize.String()) return conf @@ -295,27 +287,17 @@ func NewHandler(conf *S3ndConf) *S3ndHandler { var httpClient *awshttp.BuildableClient if conf.uploadBwlimit.Value() != 0 { - var dialCtx func(ctx context.Context, network, address string) (net.Conn, error) - - if conf.uploadBwlimitInteral { - dialCtx = bwlimit.NewDialer(&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 0, - }, bwlimit.Byte(conf.uploadBwlimit.Value()/8), 0).DialContext - } else { - dialer := &net.Dialer{ - Control: func(network, address string, conn syscall.RawConn) error { - // https://pkg.go.dev/syscall#RawConn - var operr error - if err := conn.Control(func(fd uintptr) { - operr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_MAX_PACING_RATE, int(conf.uploadBwlimit.Value()/8)) - }); err != nil { - return err - } - return operr - }, - } - dialCtx = dialer.DialContext + dialer := &net.Dialer{ + Control: func(network, address string, conn syscall.RawConn) error { + // https://pkg.go.dev/syscall#RawConn + var operr error + if err := conn.Control(func(fd uintptr) { + operr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_MAX_PACING_RATE, int(conf.uploadBwlimit.Value()/8)) + }); err != nil { + return err + } + return operr + }, } httpClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { @@ -328,7 +310,7 @@ func NewHandler(conf *S3ndConf) *S3ndHandler { // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} - t.DialContext = dialCtx + t.DialContext = dialer.DialContext }) } else { httpClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { From 3ad74f97a36c6bad6e1291ae2fca18692b40f9a2 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Fri, 22 Nov 2024 15:57:14 -0700 Subject: [PATCH 31/32] mv env var and flag handling into conf package --- conf/conf.go | 134 +++++++++++++++++++++++++++++++++++++++++++ main.go | 156 ++++++--------------------------------------------- 2 files changed, 151 insertions(+), 139 deletions(-) create mode 100644 conf/conf.go diff --git a/conf/conf.go b/conf/conf.go new file mode 100644 index 0000000..113c988 --- /dev/null +++ b/conf/conf.go @@ -0,0 +1,134 @@ +package conf + +import ( + "flag" + "log" + "os" + "strconv" + "time" + + k8sresource "k8s.io/apimachinery/pkg/api/resource" +) + +type S3ndConf struct { + Host *string + Port *int + EndpointUrl *string + UploadMaxParallel *int64 + UploadTimeout *time.Duration + QueueTimeout *time.Duration + UploadTries *int + UploadPartsize *k8sresource.Quantity + UploadBwlimit *k8sresource.Quantity + UploadWriteBufferSize *k8sresource.Quantity +} + +// Parse the environment variables and flags. If a flag is not set, the +// environment variable is used. Errors are fatal. +func NewConf() S3ndConf { + conf := S3ndConf{} + + // start flags + conf.Host = flag.String("host", os.Getenv("S3ND_HOST"), "S3 Daemon Host (S3ND_HOST)") + + defaultPort, _ := strconv.Atoi(os.Getenv("S3ND_PORT")) + if defaultPort == 0 { + defaultPort = 15555 + } + conf.Port = flag.Int("port", defaultPort, "S3 Daemon Port (S3ND_PORT)") + + conf.EndpointUrl = flag.String("endpoint-url", os.Getenv("S3ND_ENDPOINT_URL"), "S3 Endpoint URL (S3ND_ENDPOINT_URL)") + + var defaultUploadMaxParallel int64 + defaultUploadMaxParallel, _ = strconv.ParseInt(os.Getenv("S3ND_UPLOAD_MAX_PARALLEL"), 10, 64) + if defaultUploadMaxParallel == 0 { + defaultUploadMaxParallel = 100 + } + conf.UploadMaxParallel = flag.Int64("upload-max-parallel", defaultUploadMaxParallel, "Maximum number of parallel object uploads (S3ND_UPLOAD_MAX_PARALLEL)") + + defaultUploadTimeout := os.Getenv("S3ND_UPLOAD_TIMEOUT") + if defaultUploadTimeout == "" { + defaultUploadTimeout = "10s" + } + uploadTimeout := flag.String("upload-timeout", defaultUploadTimeout, "Upload Timeout (S3ND_UPLOAD_TIMEOUT)") + + defaultQueueTimeout := os.Getenv("S3ND_QUEUE_TIMEOUT") + if defaultQueueTimeout == "" { + defaultQueueTimeout = "10s" + } + queueTimeout := flag.String("queue-timeout", defaultQueueTimeout, "Queue Timeout waiting for transfer to start (S3ND_QUEUE_TIMEOUT)") + + defaultUploadTries, _ := strconv.Atoi(os.Getenv("S3ND_UPLOAD_TRIES")) + if defaultUploadTries == 0 { + defaultUploadTries = 1 + } + conf.UploadTries = flag.Int("upload-tries", defaultUploadTries, "Max number of upload tries (S3ND_UPLOAD_TRIES)") + + defaultUploadPartsize := os.Getenv("S3ND_UPLOAD_PARTSIZE") + if defaultUploadPartsize == "" { + defaultUploadPartsize = "5Mi" + } + uploadPartsizeRaw := flag.String("upload-partsize", defaultUploadPartsize, "Upload Part Size (S3ND_UPLOAD_PARTSIZE)") + + defaultUploadBwlimit := os.Getenv("S3ND_UPLOAD_BWLIMIT") + if defaultUploadBwlimit == "" { + defaultUploadBwlimit = "0" + } + uploadBwlimitRaw := flag.String("upload-bwlimit", defaultUploadBwlimit, "Upload bandwidth limit in bits per second (S3ND_UPLOAD_BWLIMIT)") + + defaultUploadWriteBufferSize := os.Getenv("S3ND_UPLOAD_WRITE_BUFFER_SIZE") + if defaultUploadWriteBufferSize == "" { + defaultUploadWriteBufferSize = "64Ki" + } + uploadWriteBufferSizeRaw := flag.String("upload-write-buffer-size", defaultUploadWriteBufferSize, "Upload Write Buffer Size (S3ND_UPLOAD_WRITE_BUFFER_SIZE)") + + flag.Parse() + // end flags + + if *conf.EndpointUrl == "" { + log.Fatal("S3ND_ENDPOINT_URL is required") + } + + uploadTimeoutDuration, err := time.ParseDuration(*uploadTimeout) + if err != nil { + log.Fatal("S3ND_UPLOAD_TIMEOUT is invalid") + } + conf.UploadTimeout = &uploadTimeoutDuration + + queueTimeoutDuration, err := time.ParseDuration(*queueTimeout) + if err != nil { + log.Fatal("S3ND_QUEUE_TIMEOUT is invalid") + } + conf.QueueTimeout = &queueTimeoutDuration + + uploadPartsize, err := k8sresource.ParseQuantity(*uploadPartsizeRaw) + if err != nil { + log.Fatal("S3ND_UPLOAD_PARTSIZE is invalid") + } + conf.UploadPartsize = &uploadPartsize + + uploadBwlimit, err := k8sresource.ParseQuantity(*uploadBwlimitRaw) + if err != nil { + log.Fatal("S3ND_UPLOAD_BWLIMIT is invalid") + } + conf.UploadBwlimit = &uploadBwlimit + + uploadWriteBufferSize, err := k8sresource.ParseQuantity(*uploadWriteBufferSizeRaw) + if err != nil { + log.Fatal("S3ND_UPLOAD_WRITE_BUFFER_SIZE is invalid") + } + conf.UploadWriteBufferSize = &uploadWriteBufferSize + + log.Println("S3ND_HOST:", *conf.Host) + log.Println("S3ND_PORT:", *conf.Port) + log.Println("S3ND_ENDPOINT_URL:", *conf.EndpointUrl) + log.Println("S3ND_UPLOAD_MAX_PARALLEL:", *conf.UploadMaxParallel) + log.Println("S3ND_UPLOAD_TIMEOUT:", *conf.UploadTimeout) + log.Println("S3ND_QUEUE_TIMEOUT:", *conf.QueueTimeout) + log.Println("S3ND_UPLOAD_TRIES:", *conf.UploadTries) + log.Println("S3ND_UPLOAD_PARTSIZE:", conf.UploadPartsize.String()) + log.Println("S3ND_UPLOAD_BWLIMIT:", conf.UploadBwlimit.String()) + log.Println("S3ND_UPLOAD_WRITE_BUFFER_SIZE:", conf.UploadWriteBufferSize.String()) + + return conf +} diff --git a/main.go b/main.go index 1b9dee7..2e4eec9 100644 --- a/main.go +++ b/main.go @@ -3,7 +3,6 @@ package main import ( "context" "errors" - "flag" "fmt" "html" "log" @@ -12,10 +11,11 @@ import ( "net/url" "os" "path/filepath" - "strconv" "syscall" "time" + "github.com/lsst-dm/s3nd/conf" + "github.com/aws/aws-sdk-go-v2/aws" awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http" "github.com/aws/aws-sdk-go-v2/config" @@ -24,24 +24,10 @@ import ( "github.com/aws/aws-sdk-go-v2/service/s3/types" "github.com/hyperledger/fabric/common/semaphore" "golang.org/x/sys/unix" - k8sresource "k8s.io/apimachinery/pkg/api/resource" ) -type S3ndConf struct { - host *string - port *int - endpointUrl *string - uploadMaxParallel *int64 - uploadTimeout *time.Duration - queueTimeout *time.Duration - uploadTries *int - uploadPartsize *k8sresource.Quantity - uploadBwlimit *k8sresource.Quantity - uploadWriteBufferSize *k8sresource.Quantity -} - type S3ndHandler struct { - Conf *S3ndConf + Conf *conf.S3ndConf AwsConfig *aws.Config S3Client *s3.Client Uploader *manager.Uploader @@ -64,10 +50,10 @@ func (h *S3ndHandler) UploadFileMultipart(ctx context.Context, task *S3ndUploadT } defer file.Close() - maxAttempts := *h.Conf.uploadTries + maxAttempts := *h.Conf.UploadTries var attempt int for attempt = 1; attempt <= maxAttempts; attempt++ { - uploadCtx, cancel := context.WithTimeout(ctx, *h.Conf.uploadTimeout) + uploadCtx, cancel := context.WithTimeout(ctx, *h.Conf.UploadTimeout) defer cancel() _, err = h.Uploader.Upload(uploadCtx, &s3.PutObjectInput{ Bucket: aws.String(*task.bucket), @@ -150,7 +136,7 @@ func (h *S3ndHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { log.Printf("queuing %v:%v | source %v\n", *task.bucket, *task.key, *task.file) // limit the number of parallel uploads - semaCtx, cancel := context.WithTimeout(r.Context(), *h.Conf.queueTimeout) + semaCtx, cancel := context.WithTimeout(r.Context(), *h.Conf.QueueTimeout) defer cancel() if err := h.ParallelUploads.Acquire(semaCtx); err != nil { w.WriteHeader(http.StatusServiceUnavailable) @@ -169,130 +155,22 @@ func (h *S3ndHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { fmt.Fprintf(w, "Successful put %q\n", html.EscapeString(task.uri.String())) } -func getConf() S3ndConf { - conf := S3ndConf{} - - // start flags - conf.host = flag.String("host", os.Getenv("S3ND_HOST"), "S3 Daemon Host (S3ND_HOST)") - - defaultPort, _ := strconv.Atoi(os.Getenv("S3ND_PORT")) - if defaultPort == 0 { - defaultPort = 15555 - } - conf.port = flag.Int("port", defaultPort, "S3 Daemon Port (S3ND_PORT)") - - conf.endpointUrl = flag.String("endpoint-url", os.Getenv("S3ND_ENDPOINT_URL"), "S3 Endpoint URL (S3ND_ENDPOINT_URL)") - - var defaultUploadMaxParallel int64 - defaultUploadMaxParallel, _ = strconv.ParseInt(os.Getenv("S3ND_UPLOAD_MAX_PARALLEL"), 10, 64) - if defaultUploadMaxParallel == 0 { - defaultUploadMaxParallel = 100 - } - conf.uploadMaxParallel = flag.Int64("upload-max-parallel", defaultUploadMaxParallel, "Maximum number of parallel object uploads (S3ND_UPLOAD_MAX_PARALLEL)") - - defaultUploadTimeout := os.Getenv("S3ND_UPLOAD_TIMEOUT") - if defaultUploadTimeout == "" { - defaultUploadTimeout = "10s" - } - uploadTimeout := flag.String("upload-timeout", defaultUploadTimeout, "Upload Timeout (S3ND_UPLOAD_TIMEOUT)") - - defaultQueueTimeout := os.Getenv("S3ND_QUEUE_TIMEOUT") - if defaultQueueTimeout == "" { - defaultQueueTimeout = "10s" - } - queueTimeout := flag.String("queue-timeout", defaultQueueTimeout, "Queue Timeout waiting for transfer to start (S3ND_QUEUE_TIMEOUT)") - - defaultUploadTries, _ := strconv.Atoi(os.Getenv("S3ND_UPLOAD_TRIES")) - if defaultUploadTries == 0 { - defaultUploadTries = 1 - } - conf.uploadTries = flag.Int("upload-tries", defaultUploadTries, "Max number of upload tries (S3ND_UPLOAD_TRIES)") - - defaultUploadPartsize := os.Getenv("S3ND_UPLOAD_PARTSIZE") - if defaultUploadPartsize == "" { - defaultUploadPartsize = "5Mi" - } - uploadPartsizeRaw := flag.String("upload-partsize", defaultUploadPartsize, "Upload Part Size (S3ND_UPLOAD_PARTSIZE)") - - defaultUploadBwlimit := os.Getenv("S3ND_UPLOAD_BWLIMIT") - if defaultUploadBwlimit == "" { - defaultUploadBwlimit = "0" - } - uploadBwlimitRaw := flag.String("upload-bwlimit", defaultUploadBwlimit, "Upload bandwidth limit in bits per second (S3ND_UPLOAD_BWLIMIT)") - - defaultUploadWriteBufferSize := os.Getenv("S3ND_UPLOAD_WRITE_BUFFER_SIZE") - if defaultUploadWriteBufferSize == "" { - defaultUploadWriteBufferSize = "64Ki" - } - uploadWriteBufferSizeRaw := flag.String("upload-write-buffer-size", defaultUploadWriteBufferSize, "Upload Write Buffer Size (S3ND_UPLOAD_WRITE_BUFFER_SIZE)") - - flag.Parse() - // end flags - - if *conf.endpointUrl == "" { - log.Fatal("S3ND_ENDPOINT_URL is required") - } - - uploadTimeoutDuration, err := time.ParseDuration(*uploadTimeout) - if err != nil { - log.Fatal("S3ND_UPLOAD_TIMEOUT is invalid") - } - conf.uploadTimeout = &uploadTimeoutDuration - - queueTimeoutDuration, err := time.ParseDuration(*queueTimeout) - if err != nil { - log.Fatal("S3ND_QUEUE_TIMEOUT is invalid") - } - conf.queueTimeout = &queueTimeoutDuration - - uploadPartsize, err := k8sresource.ParseQuantity(*uploadPartsizeRaw) - if err != nil { - log.Fatal("S3ND_UPLOAD_PARTSIZE is invalid") - } - conf.uploadPartsize = &uploadPartsize - - uploadBwlimit, err := k8sresource.ParseQuantity(*uploadBwlimitRaw) - if err != nil { - log.Fatal("S3ND_UPLOAD_BWLIMIT is invalid") - } - conf.uploadBwlimit = &uploadBwlimit - - uploadWriteBufferSize, err := k8sresource.ParseQuantity(*uploadWriteBufferSizeRaw) - if err != nil { - log.Fatal("S3ND_UPLOAD_WRITE_BUFFER_SIZE is invalid") - } - conf.uploadWriteBufferSize = &uploadWriteBufferSize - - log.Println("S3ND_HOST:", *conf.host) - log.Println("S3ND_PORT:", *conf.port) - log.Println("S3ND_ENDPOINT_URL:", *conf.endpointUrl) - log.Println("S3ND_UPLOAD_MAX_PARALLEL:", *conf.uploadMaxParallel) - log.Println("S3ND_UPLOAD_TIMEOUT:", *conf.uploadTimeout) - log.Println("S3ND_QUEUE_TIMEOUT:", *conf.queueTimeout) - log.Println("S3ND_UPLOAD_TRIES:", *conf.uploadTries) - log.Println("S3ND_UPLOAD_PARTSIZE:", conf.uploadPartsize.String()) - log.Println("S3ND_UPLOAD_BWLIMIT:", conf.uploadBwlimit.String()) - log.Println("S3ND_UPLOAD_WRITE_BUFFER_SIZE:", conf.uploadWriteBufferSize.String()) - - return conf -} - -func NewHandler(conf *S3ndConf) *S3ndHandler { +func NewHandler(conf *conf.S3ndConf) *S3ndHandler { handler := &S3ndHandler{ Conf: conf, } - maxConns := int(*conf.uploadMaxParallel * 5) // allow for multipart upload creation + maxConns := int(*conf.UploadMaxParallel * 5) // allow for multipart upload creation var httpClient *awshttp.BuildableClient - if conf.uploadBwlimit.Value() != 0 { + if conf.UploadBwlimit.Value() != 0 { dialer := &net.Dialer{ Control: func(network, address string, conn syscall.RawConn) error { // https://pkg.go.dev/syscall#RawConn var operr error if err := conn.Control(func(fd uintptr) { - operr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_MAX_PACING_RATE, int(conf.uploadBwlimit.Value()/8)) + operr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_MAX_PACING_RATE, int(conf.UploadBwlimit.Value()/8)) }); err != nil { return err } @@ -306,7 +184,7 @@ func NewHandler(conf *S3ndConf) *S3ndHandler { t.MaxIdleConns = maxConns t.MaxConnsPerHost = maxConns t.MaxIdleConnsPerHost = maxConns - t.WriteBufferSize = int(conf.uploadWriteBufferSize.Value()) + t.WriteBufferSize = int(conf.UploadWriteBufferSize.Value()) // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} @@ -319,7 +197,7 @@ func NewHandler(conf *S3ndConf) *S3ndHandler { t.MaxIdleConns = maxConns t.MaxConnsPerHost = maxConns t.MaxIdleConnsPerHost = maxConns - t.WriteBufferSize = int(conf.uploadWriteBufferSize.Value()) + t.WriteBufferSize = int(conf.UploadWriteBufferSize.Value()) // disable http/2 to prevent muxing over a single tcp connection t.ForceAttemptHTTP2 = false t.TLSClientConfig.NextProtos = []string{"http/1.1"} @@ -328,7 +206,7 @@ func NewHandler(conf *S3ndConf) *S3ndHandler { awsCfg, err := config.LoadDefaultConfig( context.TODO(), - config.WithBaseEndpoint(*conf.endpointUrl), + config.WithBaseEndpoint(*conf.EndpointUrl), config.WithHTTPClient(httpClient), ) if err != nil { @@ -345,22 +223,22 @@ func NewHandler(conf *S3ndConf) *S3ndHandler { handler.Uploader = manager.NewUploader(handler.S3Client, func(u *manager.Uploader) { u.Concurrency = 1000 u.MaxUploadParts = 1000 - u.PartSize = conf.uploadPartsize.Value() + u.PartSize = conf.UploadPartsize.Value() }) - sema := semaphore.New(int(*conf.uploadMaxParallel)) + sema := semaphore.New(int(*conf.UploadMaxParallel)) handler.ParallelUploads = &sema return handler } func main() { - conf := getConf() + conf := conf.NewConf() handler := NewHandler(&conf) http.Handle("/", handler) - addr := fmt.Sprintf("%s:%d", *conf.host, *conf.port) + addr := fmt.Sprintf("%s:%d", *conf.Host, *conf.Port) log.Println("Listening on", addr) err := http.ListenAndServe(addr, nil) From 1a94702b2dc98450a91e4ff989fcc86c25b74b05 Mon Sep 17 00:00:00 2001 From: Joshua Hoblitt Date: Fri, 22 Nov 2024 16:10:21 -0700 Subject: [PATCH 32/32] mv http.Handler implementation into handler package --- handler/handler.go | 233 +++++++++++++++++++++++++++++++++++++++++++++ main.go | 226 +------------------------------------------ 2 files changed, 235 insertions(+), 224 deletions(-) create mode 100644 handler/handler.go diff --git a/handler/handler.go b/handler/handler.go new file mode 100644 index 0000000..0fec39e --- /dev/null +++ b/handler/handler.go @@ -0,0 +1,233 @@ +package handler + +import ( + "context" + "errors" + "fmt" + "html" + "log" + "net" + "net/http" + "net/url" + "os" + "path/filepath" + "syscall" + "time" + + "github.com/lsst-dm/s3nd/conf" + + "github.com/aws/aws-sdk-go-v2/aws" + awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/feature/s3/manager" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/hyperledger/fabric/common/semaphore" + "golang.org/x/sys/unix" +) + +type S3ndHandler struct { + conf *conf.S3ndConf + awsConfig *aws.Config + s3Client *s3.Client + uploader *manager.Uploader + parallelUploads *semaphore.Semaphore +} + +type s3ndUploadTask struct { + uri *url.URL + bucket *string + key *string + file *string +} + +func NewHandler(conf *conf.S3ndConf) *S3ndHandler { + handler := &S3ndHandler{ + conf: conf, + } + + maxConns := int(*conf.UploadMaxParallel * 5) // allow for multipart upload creation + + var httpClient *awshttp.BuildableClient + + if conf.UploadBwlimit.Value() != 0 { + dialer := &net.Dialer{ + Control: func(network, address string, conn syscall.RawConn) error { + // https://pkg.go.dev/syscall#RawConn + var operr error + if err := conn.Control(func(fd uintptr) { + operr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_MAX_PACING_RATE, int(conf.UploadBwlimit.Value()/8)) + }); err != nil { + return err + } + return operr + }, + } + + httpClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { + t.ExpectContinueTimeout = 0 + t.IdleConnTimeout = 0 + t.MaxIdleConns = maxConns + t.MaxConnsPerHost = maxConns + t.MaxIdleConnsPerHost = maxConns + t.WriteBufferSize = int(conf.UploadWriteBufferSize.Value()) + // disable http/2 to prevent muxing over a single tcp connection + t.ForceAttemptHTTP2 = false + t.TLSClientConfig.NextProtos = []string{"http/1.1"} + t.DialContext = dialer.DialContext + }) + } else { + httpClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { + t.ExpectContinueTimeout = 0 + t.IdleConnTimeout = 0 + t.MaxIdleConns = maxConns + t.MaxConnsPerHost = maxConns + t.MaxIdleConnsPerHost = maxConns + t.WriteBufferSize = int(conf.UploadWriteBufferSize.Value()) + // disable http/2 to prevent muxing over a single tcp connection + t.ForceAttemptHTTP2 = false + t.TLSClientConfig.NextProtos = []string{"http/1.1"} + }) + } + + awsCfg, err := config.LoadDefaultConfig( + context.TODO(), + config.WithBaseEndpoint(*conf.EndpointUrl), + config.WithHTTPClient(httpClient), + ) + if err != nil { + log.Fatal(err) + } + + handler.awsConfig = &awsCfg + + handler.s3Client = s3.NewFromConfig(awsCfg, func(o *s3.Options) { + o.UsePathStyle = true + o.Retryer = aws.NopRetryer{} // we handle retries ourselves + }) + + handler.uploader = manager.NewUploader(handler.s3Client, func(u *manager.Uploader) { + u.Concurrency = 1000 + u.MaxUploadParts = 1000 + u.PartSize = conf.UploadPartsize.Value() + }) + + sema := semaphore.New(int(*conf.UploadMaxParallel)) + handler.parallelUploads = &sema + + return handler +} + +func (h *S3ndHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + start := time.Now() + + task, err := h.parseRequest(r) + if err != nil { + w.Header().Set("x-error", err.Error()) + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, "error parsing request: %s\n", err) + return + } + + log.Printf("queuing %v:%v | source %v\n", *task.bucket, *task.key, *task.file) + + // limit the number of parallel uploads + semaCtx, cancel := context.WithTimeout(r.Context(), *h.conf.QueueTimeout) + defer cancel() + if err := h.parallelUploads.Acquire(semaCtx); err != nil { + w.WriteHeader(http.StatusServiceUnavailable) + fmt.Fprintf(w, "error acquiring semaphore: %s\n", err) + log.Printf("queue %v:%v | failed after %s: %s\n", *task.bucket, *task.key, time.Now().Sub(start), err) + return + } + defer h.parallelUploads.Release() + + if err := h.uploadFileMultipart(r.Context(), task); err != nil { + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, "error uploading file: %s\n", err) + return + } + + fmt.Fprintf(w, "Successful put %q\n", html.EscapeString(task.uri.String())) +} + +func (h *S3ndHandler) parseRequest(r *http.Request) (*s3ndUploadTask, error) { + file := r.PostFormValue("file") + if file == "" { + return nil, fmt.Errorf("missing field: file") + } + uriRaw := r.PostFormValue("uri") + if uriRaw == "" { + return nil, fmt.Errorf("missing field: uri") + } + + if !filepath.IsAbs(file) { + return nil, fmt.Errorf("Only absolute file paths are supported: %q", html.EscapeString(file)) + } + + uri, err := url.Parse(uriRaw) + if err != nil { + return nil, fmt.Errorf("Unable to parse URI: %q", html.EscapeString(uriRaw)) + } + + if uri.Scheme != "s3" { + return nil, fmt.Errorf("Only s3 scheme is supported: %q", html.EscapeString(uriRaw)) + } + + bucket := uri.Host + if bucket == "" { + return nil, fmt.Errorf("Unable to parse bucket from URI: %q", html.EscapeString(uriRaw)) + } + key := uri.Path[1:] // Remove leading slash + + return &s3ndUploadTask{uri: uri, bucket: &bucket, key: &key, file: &file}, nil +} + +func (h *S3ndHandler) uploadFileMultipart(ctx context.Context, task *s3ndUploadTask) error { + start := time.Now() + file, err := os.Open(*task.file) + if err != nil { + log.Printf("upload %v:%v | Couldn't open file %v to upload because: %v\n", *task.bucket, *task.key, *task.file, err) + return err + } + defer file.Close() + + maxAttempts := *h.conf.UploadTries + var attempt int + for attempt = 1; attempt <= maxAttempts; attempt++ { + uploadCtx, cancel := context.WithTimeout(ctx, *h.conf.UploadTimeout) + defer cancel() + _, err = h.uploader.Upload(uploadCtx, &s3.PutObjectInput{ + Bucket: aws.String(*task.bucket), + Key: aws.String(*task.key), + Body: file, + }) + if err != nil { + log.Printf("upload %v:%v | failed after %s -- try %v/%v\n", *task.bucket, *task.key, time.Now().Sub(start), attempt, maxAttempts) + var noBucket *types.NoSuchBucket + if errors.As(err, &noBucket) { + log.Printf("upload %v:%v | Bucket does not exist.\n", *task.bucket, *task.key) + // Don't retry if the bucket doesn't exist. + return noBucket + } + + if errors.Is(err, context.Canceled) { + log.Printf("upload %v:%v | context cancelled\n", *task.bucket, *task.key) + // Don't retry if the client disconnected + return err + } + + log.Printf("upload %v:%v | failed because: %v\n", *task.bucket, *task.key, err) + + // bubble up the error if we've exhausted our attempts + if attempt == maxAttempts { + return err + } + } else { + break + } + } + + log.Printf("upload %v:%v | success in %s after %v/%v tries\n", *task.bucket, *task.key, time.Now().Sub(start), attempt, maxAttempts) + return nil +} diff --git a/main.go b/main.go index 2e4eec9..b427cdf 100644 --- a/main.go +++ b/main.go @@ -1,241 +1,19 @@ package main import ( - "context" "errors" "fmt" - "html" "log" - "net" "net/http" - "net/url" - "os" - "path/filepath" - "syscall" - "time" "github.com/lsst-dm/s3nd/conf" - - "github.com/aws/aws-sdk-go-v2/aws" - awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http" - "github.com/aws/aws-sdk-go-v2/config" - "github.com/aws/aws-sdk-go-v2/feature/s3/manager" - "github.com/aws/aws-sdk-go-v2/service/s3" - "github.com/aws/aws-sdk-go-v2/service/s3/types" - "github.com/hyperledger/fabric/common/semaphore" - "golang.org/x/sys/unix" + "github.com/lsst-dm/s3nd/handler" ) -type S3ndHandler struct { - Conf *conf.S3ndConf - AwsConfig *aws.Config - S3Client *s3.Client - Uploader *manager.Uploader - ParallelUploads *semaphore.Semaphore -} - -type S3ndUploadTask struct { - uri *url.URL - bucket *string - key *string - file *string -} - -func (h *S3ndHandler) UploadFileMultipart(ctx context.Context, task *S3ndUploadTask) error { - start := time.Now() - file, err := os.Open(*task.file) - if err != nil { - log.Printf("upload %v:%v | Couldn't open file %v to upload because: %v\n", *task.bucket, *task.key, *task.file, err) - return err - } - defer file.Close() - - maxAttempts := *h.Conf.UploadTries - var attempt int - for attempt = 1; attempt <= maxAttempts; attempt++ { - uploadCtx, cancel := context.WithTimeout(ctx, *h.Conf.UploadTimeout) - defer cancel() - _, err = h.Uploader.Upload(uploadCtx, &s3.PutObjectInput{ - Bucket: aws.String(*task.bucket), - Key: aws.String(*task.key), - Body: file, - }) - if err != nil { - log.Printf("upload %v:%v | failed after %s -- try %v/%v\n", *task.bucket, *task.key, time.Now().Sub(start), attempt, maxAttempts) - var noBucket *types.NoSuchBucket - if errors.As(err, &noBucket) { - log.Printf("upload %v:%v | Bucket does not exist.\n", *task.bucket, *task.key) - // Don't retry if the bucket doesn't exist. - return noBucket - } - - if errors.Is(err, context.Canceled) { - log.Printf("upload %v:%v | context cancelled\n", *task.bucket, *task.key) - // Don't retry if the client disconnected - return err - } - - log.Printf("upload %v:%v | failed because: %v\n", *task.bucket, *task.key, err) - - // bubble up the error if we've exhausted our attempts - if attempt == maxAttempts { - return err - } - } else { - break - } - } - - log.Printf("upload %v:%v | success in %s after %v/%v tries\n", *task.bucket, *task.key, time.Now().Sub(start), attempt, maxAttempts) - return nil -} - -func (h *S3ndHandler) parseRequest(r *http.Request) (*S3ndUploadTask, error) { - file := r.PostFormValue("file") - if file == "" { - return nil, fmt.Errorf("missing field: file") - } - uriRaw := r.PostFormValue("uri") - if uriRaw == "" { - return nil, fmt.Errorf("missing field: uri") - } - - if !filepath.IsAbs(file) { - return nil, fmt.Errorf("Only absolute file paths are supported: %q", html.EscapeString(file)) - } - - uri, err := url.Parse(uriRaw) - if err != nil { - return nil, fmt.Errorf("Unable to parse URI: %q", html.EscapeString(uriRaw)) - } - - if uri.Scheme != "s3" { - return nil, fmt.Errorf("Only s3 scheme is supported: %q", html.EscapeString(uriRaw)) - } - - bucket := uri.Host - if bucket == "" { - return nil, fmt.Errorf("Unable to parse bucket from URI: %q", html.EscapeString(uriRaw)) - } - key := uri.Path[1:] // Remove leading slash - - return &S3ndUploadTask{uri: uri, bucket: &bucket, key: &key, file: &file}, nil -} - -func (h *S3ndHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { - start := time.Now() - - task, err := h.parseRequest(r) - if err != nil { - w.Header().Set("x-error", err.Error()) - w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "error parsing request: %s\n", err) - return - } - - log.Printf("queuing %v:%v | source %v\n", *task.bucket, *task.key, *task.file) - - // limit the number of parallel uploads - semaCtx, cancel := context.WithTimeout(r.Context(), *h.Conf.QueueTimeout) - defer cancel() - if err := h.ParallelUploads.Acquire(semaCtx); err != nil { - w.WriteHeader(http.StatusServiceUnavailable) - fmt.Fprintf(w, "error acquiring semaphore: %s\n", err) - log.Printf("queue %v:%v | failed after %s: %s\n", *task.bucket, *task.key, time.Now().Sub(start), err) - return - } - defer h.ParallelUploads.Release() - - if err := h.UploadFileMultipart(r.Context(), task); err != nil { - w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "error uploading file: %s\n", err) - return - } - - fmt.Fprintf(w, "Successful put %q\n", html.EscapeString(task.uri.String())) -} - -func NewHandler(conf *conf.S3ndConf) *S3ndHandler { - handler := &S3ndHandler{ - Conf: conf, - } - - maxConns := int(*conf.UploadMaxParallel * 5) // allow for multipart upload creation - - var httpClient *awshttp.BuildableClient - - if conf.UploadBwlimit.Value() != 0 { - dialer := &net.Dialer{ - Control: func(network, address string, conn syscall.RawConn) error { - // https://pkg.go.dev/syscall#RawConn - var operr error - if err := conn.Control(func(fd uintptr) { - operr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_MAX_PACING_RATE, int(conf.UploadBwlimit.Value()/8)) - }); err != nil { - return err - } - return operr - }, - } - - httpClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { - t.ExpectContinueTimeout = 0 - t.IdleConnTimeout = 0 - t.MaxIdleConns = maxConns - t.MaxConnsPerHost = maxConns - t.MaxIdleConnsPerHost = maxConns - t.WriteBufferSize = int(conf.UploadWriteBufferSize.Value()) - // disable http/2 to prevent muxing over a single tcp connection - t.ForceAttemptHTTP2 = false - t.TLSClientConfig.NextProtos = []string{"http/1.1"} - t.DialContext = dialer.DialContext - }) - } else { - httpClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { - t.ExpectContinueTimeout = 0 - t.IdleConnTimeout = 0 - t.MaxIdleConns = maxConns - t.MaxConnsPerHost = maxConns - t.MaxIdleConnsPerHost = maxConns - t.WriteBufferSize = int(conf.UploadWriteBufferSize.Value()) - // disable http/2 to prevent muxing over a single tcp connection - t.ForceAttemptHTTP2 = false - t.TLSClientConfig.NextProtos = []string{"http/1.1"} - }) - } - - awsCfg, err := config.LoadDefaultConfig( - context.TODO(), - config.WithBaseEndpoint(*conf.EndpointUrl), - config.WithHTTPClient(httpClient), - ) - if err != nil { - log.Fatal(err) - } - - handler.AwsConfig = &awsCfg - - handler.S3Client = s3.NewFromConfig(awsCfg, func(o *s3.Options) { - o.UsePathStyle = true - o.Retryer = aws.NopRetryer{} // we handle retries ourselves - }) - - handler.Uploader = manager.NewUploader(handler.S3Client, func(u *manager.Uploader) { - u.Concurrency = 1000 - u.MaxUploadParts = 1000 - u.PartSize = conf.UploadPartsize.Value() - }) - - sema := semaphore.New(int(*conf.UploadMaxParallel)) - handler.ParallelUploads = &sema - - return handler -} - func main() { conf := conf.NewConf() - handler := NewHandler(&conf) + handler := handler.NewHandler(&conf) http.Handle("/", handler) addr := fmt.Sprintf("%s:%d", *conf.Host, *conf.Port)