diff --git a/src/containers/nextflow/Dockerfile b/src/containers/nextflow/Dockerfile new file mode 100644 index 000000000..873929af9 --- /dev/null +++ b/src/containers/nextflow/Dockerfile @@ -0,0 +1,21 @@ +FROM centos:7 AS build + +RUN yum update -y \ + && yum install -y \ + curl \ + java-1.8.0-openjdk \ + awscli \ + && yum clean -y all + +ENV JAVA_HOME /usr/lib/jvm/jre-openjdk/ + +WORKDIR /opt/inst +RUN curl -s https://get.nextflow.io | bash +RUN mv nextflow /usr/local/bin + +COPY nextflow.aws.sh /opt/bin/nextflow.aws.sh +RUN chmod +x /opt/bin/nextflow.aws.sh + +WORKDIR /opt/work +ENTRYPOINT ["/opt/bin/nextflow.aws.sh"] + diff --git a/src/containers/nextflow/nextflow.aws.sh b/src/containers/nextflow/nextflow.aws.sh new file mode 100644 index 000000000..98cf3839b --- /dev/null +++ b/src/containers/nextflow/nextflow.aws.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +NEXTFLOW_SCRIPT=$1 + +# Create the default config using environment variables +# passed into the container +mkdir -p /opt/config +NF_CONFIG=/opt/config/nextflow.config + +cat << EOF > $NF_CONFIG +workDir = "$NF_WORKDIR" +process.executor = "awsbatch" +process.queue = "$NF_JOB_QUEUE" +executor.awscli = "/home/ec2-user/miniconda/bin/aws" +EOF + +# AWS Batch places multiple jobs on an instance +# To avoid file path clobbering use the JobID and JobAttempt +# to create a unique path +GUID="$AWS_BATCH_JOB_ID/$AWS_BATCH_JOB_ATTEMPT" + +mkdir -p /opt/work/$GUID +cd /opt/work/$GUID + +# stage workflow definition +aws s3 cp --no-progress $NEXTFLOW_SCRIPT . + +NF_FILE=$(find . -name "*.nf") + +echo "== Nextflow Configuration ==" +cat $NF_CONFIG + +echo "== Running Workflow ==" +nextflow -c $NF_CONFIG run $NF_FILE diff --git a/src/templates/nextflow/nextflow-aio.template.yaml b/src/templates/nextflow/nextflow-aio.template.yaml index 22238021b..84b5d8526 100644 --- a/src/templates/nextflow/nextflow-aio.template.yaml +++ b/src/templates/nextflow/nextflow-aio.template.yaml @@ -27,7 +27,6 @@ Metadata: - ExistingDataBucket - KeyPairName - AvailabilityZones - - NextflowContainerImage - Label: default: "AWS Batch" Parameters: @@ -39,6 +38,7 @@ Metadata: - Label: default: "Nextflow Resources" Parameters: + - NextflowContainerImage - S3NextflowBucketName - ExistingNextflowBucket - S3NextflowScriptPrefix @@ -139,8 +139,10 @@ Parameters: NextflowContainerImage: Type: String Description: >- - Container image for nextflow with custom entrypoint for workflow - script staging. (Example, "/nextflow:latest") + (Optional) Container image for nextflow with custom entrypoint for config and workflow + script staging. (Example, "/nextflow:latest"). + Provide this if you have a specific version of nextflow you want to use, otherwise a + container will be built using the latest version. TemplateRootUrl: Type: String @@ -214,6 +216,9 @@ Resources: Outputs: + NextflowContainerImage: + Value: !GetAtt NextflowStack.Outputs.NextflowContainerImage + NextflowJobDefinition: Value: !GetAtt NextflowStack.Outputs.NextflowJobDefinition Description: >- diff --git a/src/templates/nextflow/nextflow-resources.template.yaml b/src/templates/nextflow/nextflow-resources.template.yaml index 26608a163..d6987c49d 100644 --- a/src/templates/nextflow/nextflow-resources.template.yaml +++ b/src/templates/nextflow/nextflow-resources.template.yaml @@ -25,10 +25,11 @@ Metadata: - S3DataBucketName - S3NextflowBucketName - ExistingBucket - - NextflowContainerImage + - BatchDefaultJobQueue - Label: default: "Optional" Parameters: + - NextflowContainerImage - S3ScriptPrefix - S3WorkDirPrefix @@ -67,8 +68,10 @@ Parameters: NextflowContainerImage: Type: String Description: >- - Container image for nextflow with custom entrypoint for config and workflow - script staging. (Example, "/nextflow:latest") + (Optional) Container image for nextflow with custom entrypoint for config and workflow + script staging. (Example, "/nextflow:latest"). + Provide this if you have a specific version of nextflow you want to use, otherwise a + container will be built using the latest version. BatchDefaultJobQueue: Type: String @@ -80,7 +83,12 @@ Conditions: Fn::Equals: - !Ref S3NextflowBucketName - !Ref S3DataBucketName - + + BuildNextflowContainer: + Fn::Equals: + - !Ref NextflowContainerImage + - "" + NextflowBucketDoesNotExist: Fn::Equals: - !Ref ExistingBucket @@ -110,6 +118,70 @@ Resources: SSEAlgorithm: AES256 Tags: !FindInMap ["TagMap", "default", "tags"] + IAMCodeBuildRole: + Type: AWS::IAM::Role + Condition: BuildNextflowContainer + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: codebuild.amazonaws.com + Action: sts:AssumeRole + + Policies: + - PolicyName: !Sub codebuild-ecr-access-${AWS::Region} + PolicyDocument: + Version: 2012-10-17 + Statement: + Effect: Allow + Resource: "*" + Action: + - "ecr:CreateRepository" + - "ecr:BatchCheckLayerAvailability" + - "ecr:CompleteLayerUpload" + - "ecr:GetAuthorizationToken" + - "ecr:InitiateLayerUpload" + - "ecr:PutImage" + - "ecr:UploadLayerPart" + - PolicyName: !Sub codebuild-logs-access-${AWS::Region} + PolicyDocument: + Version: 2012-10-17 + Statement: + Effect: Allow + Resource: "*" + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + + IAMLambdaExecutionRole: + Type: AWS::IAM::Role + Condition: BuildNextflowContainer + Properties: + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: lambda.amazonaws.com + Action: "sts:AssumeRole" + Path: / + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole + - arn:aws:iam::aws:policy/service-role/AWSLambdaRole + Policies: + - PolicyName: !Sub codebuild-access-${AWS::Region} + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - "codebuild:StartBuild" + - "codebuild:BatchGetBuilds" + Resource: "*" + IAMNextflowJobRole: Type: AWS::IAM::Role Properties: @@ -152,6 +224,95 @@ Resources: ManagedPolicyArns: - "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" + CodeBuildProject: + Type: AWS::CodeBuild::Project + Condition: BuildNextflowContainer + Properties: + Name: !Sub + - nextflow-container-${GUID} + - GUID: !Select [ 2, !Split [ "/", !Ref "AWS::StackId" ]] + Description: >- + Builds a nextflow container for running genomics workflows + Artifacts: + Type: NO_ARTIFACTS + Environment: + Type: LINUX_CONTAINER + Image: aws/codebuild/standard:1.0 + ComputeType: BUILD_GENERAL1_SMALL + PrivilegedMode: True + + ServiceRole: !GetAtt IAMCodeBuildRole.Arn + Source: + Type: NO_SOURCE + BuildSpec: !Sub + - |- + version: 0.2 + phases: + pre_build: + commands: + - echo "Docker Login to ECR" + - $(aws ecr get-login --no-include-email --region ${AWS::Region}) + - echo "Creating ECR image repository" + - aws ecr create-repository --repository-name nextflow || true + - echo "Getting source code from Github" + - git clone https://github.com/aws-samples/aws-genomics-workflows.git + - cd aws-genomics-workflows + - cd src/containers/nextflow + build: + commands: + - echo "Building container" + - docker build -t nextflow . + post_build: + commands: + - echo "Tagging container image" + - docker tag nextflow:latest ${REGISTRY}/nextflow:latest + - echo "Pushing container image to ECR" + - docker push ${REGISTRY}/nextflow:latest + - REGISTRY: !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com + + Tags: !FindInMap ["TagMap", "default", "tags"] + + CodeBuildInvocation: + Type: Custom::CodeBuildInvocation + Condition: BuildNextflowContainer + Properties: + ServiceToken: !GetAtt CodeBuildInvocationFunction.Arn + BuildProject: !Ref CodeBuildProject + + CodeBuildInvocationFunction: + Type: AWS::Lambda::Function + Condition: BuildNextflowContainer + Properties: + Handler: index.handler + Role: !GetAtt IAMLambdaExecutionRole.Arn + Runtime: python3.7 + Timeout: 600 + Code: + ZipFile: | + from time import sleep + + import boto3 + import cfnresponse + + def handler(event, context): + if event['RequestType'] in ("Create", "Update"): + codebuild = boto3.client('codebuild') + build = codebuild.start_build( + projectName=event["ResourceProperties"]["BuildProject"] + )['build'] + + id = build['id'] + status = build['buildStatus'] + while status == 'IN_PROGRESS': + sleep(10) + build = codebuild.batch_get_builds(ids=[id])['builds'][0] + status = build['buildStatus'] + + if status != "SUCCEEDED": + cfnresponse.send(event, context, cfnresponse.FAILED, None) + + cfnresponse.send(event, context, cfnresponse.SUCCESS, None) + BatchNextflowJobDefinition: Type: AWS::Batch::JobDefinition Properties: @@ -178,7 +339,11 @@ Resources: Memory: 1024 JobRoleArn: !GetAtt IAMNextflowJobRole.Arn Vcpus: 2 - Image: !Ref NextflowContainerImage + Image: + Fn::If: + - BuildNextflowContainer + - !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/nextflow:latest + - !Ref NextflowContainerImage Environment: - Name: "NF_JOB_QUEUE" Value: !Ref BatchDefaultJobQueue @@ -218,6 +383,15 @@ Outputs: the bucket. Value: !Ref S3WorkDirPrefix + NextflowContainerImage: + Description: >- + The nextflow container used. + Value: + Fn::If: + - BuildNextflowContainer + - !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/nextflow:latest + - !Ref NextflowContainerImage + NextflowJobDefinition: Description: >- Batch Job Definition that creates a nextflow head node for running workflows