Skip to content

Commit

Permalink
Fix race-condition when IAM Role has been created previously (#82)
Browse files Browse the repository at this point in the history
Signed-off-by: Matt Wise <[email protected]>
  • Loading branch information
diranged authored Jul 18, 2021
1 parent bd213ca commit feddadf
Show file tree
Hide file tree
Showing 3 changed files with 208 additions and 153 deletions.
15 changes: 7 additions & 8 deletions controllers/iamrole_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,29 +229,28 @@ func (r *IamroleReconciler) HandleReconcile(ctx context.Context, req ctrl.Reques
fallthrough
default:

resp, err := r.IAMClient.CreateRole(ctx, *input)
// Default behavior on new Iamrole resource state is to go off and create it
resp, err := r.IAMClient.EnsureRole(ctx, *input)
if err != nil {
log.Error(err, "error in creating a role")
state := iammanagerv1alpha1.Error

if strings.Contains(err.Error(), awsapi.RoleAlreadyExistsError) {
// This check verifies whether or not the IAM Role somehow already exists, but is allocated to another namespace based on the tag applied to it.
if strings.Contains(err.Error(), awsapi.RoleExistsAlreadyForOtherNamespace) {
state = iammanagerv1alpha1.RoleNameNotAvailable

//Role itself is not created
roleName = ""
}
r.Recorder.Event(iamRole, v1.EventTypeWarning, string(state), "Unable to create/update iam role due to error "+err.Error())
return r.UpdateStatus(ctx, iamRole, iammanagerv1alpha1.IamroleStatus{RetryCount: iamRole.Status.RetryCount + 1, RoleName: roleName, ErrorDescription: err.Error(), State: state, LastUpdatedTimestamp: metav1.Now()}, requeueTime)
}

//OK. Successful!!
// Is this IRSA role? If yes, Create/update Service Account with required annotation
flag, saName := utils.ParseIRSAAnnotation(ctx, iamRole)
if flag {
//CreateOrUpdateServiceAccount
roleARN := resp.RoleARN
if roleARN == "" {
roleARN = iamRole.Status.RoleARN
}
if err := k8s.NewK8sManagerClient(r.Client).CreateOrUpdateServiceAccount(ctx, saName, iamRole.Namespace, roleARN); err != nil {
if err := k8s.NewK8sManagerClient(r.Client).CreateOrUpdateServiceAccount(ctx, saName, iamRole.Namespace, resp.RoleARN); err != nil {
log.Error(err, "error in updating service account for IRSA role")
r.Recorder.Event(iamRole, v1.EventTypeWarning, string(iammanagerv1alpha1.Error), "Unable to create/update service account for IRSA role due to error "+err.Error())
return r.UpdateStatus(ctx, iamRole, iammanagerv1alpha1.IamroleStatus{RetryCount: iamRole.Status.RetryCount + 1, RoleName: roleName, ErrorDescription: err.Error(), State: iammanagerv1alpha1.Error, LastUpdatedTimestamp: metav1.Now()}, requeueTime)
Expand Down
162 changes: 105 additions & 57 deletions pkg/awsapi/iam.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,17 @@ import (
//IAMIface defines interface methods
type IAMIface interface {
CreateRole(ctx context.Context, req IAMRoleRequest)
EnsureRole(ctx context.Context, req IAMRoleRequest)
UpdateRole(ctx context.Context, req IAMRoleRequest)
DeleteRole(ctx context.Context, roleName string)
GetRole(ctx context.Context, roleName string)
AttachInlineRolePolicy(ctx context.Context, req IAMRoleRequest)
AddPermissionBoundary(ctx context.Context, req IAMRoleRequest) error
GetRolePolicy(ctx context.Context, req IAMRoleRequest) bool
}

const (
RoleAlreadyExistsError = "Please choose a different name"
RoleExistsAlreadyForOtherNamespace = "Please choose a different name"
)

//IAMRoleRequest struct
Expand All @@ -48,6 +50,20 @@ type IAMRoleResponse struct {
RoleID string
}

func NewIAMRoleResponseFromGetRole(output iam.GetRoleOutput) *IAMRoleResponse {
return &IAMRoleResponse{
RoleARN: aws.StringValue(output.Role.Arn),
RoleID: aws.StringValue(output.Role.RoleId),
}
}

func NewIAMRoleResponseFromCreateRole(output iam.CreateRoleOutput) *IAMRoleResponse {
return &IAMRoleResponse{
RoleARN: aws.StringValue(output.Role.Arn),
RoleID: aws.StringValue(output.Role.RoleId),
}
}

type IAM struct {
Client iamiface.IAMAPI
}
Expand All @@ -63,53 +79,16 @@ func NewIAM(region string) *IAM {
}
}

// CreateRole creates/updates the role
func (i *IAM) CreateRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) {
log := log.Logger(ctx, "awsapi", "iam", "CreateRole")
// EnsureRole ensures that a role exists, and that it has the appropriate configuration
func (i *IAM) EnsureRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) {
log := log.Logger(ctx, "awsapi", "iam", "EnsureRole")
log = log.WithValues("roleName", req.Name)
log.V(1).Info("Initiating api call")
input := &iam.CreateRoleInput{
AssumeRolePolicyDocument: aws.String(req.TrustPolicy),
RoleName: aws.String(req.Name),
Description: aws.String(req.Description),
MaxSessionDuration: aws.Int64(req.SessionDuration),
PermissionsBoundary: aws.String(req.ManagedPermissionBoundaryPolicy),
}

if err := input.Validate(); err != nil {
log.Error(err, "input validation failed")
return nil, err
}

roleAlreadyExists := false
iResp, err := i.Client.CreateRole(input)
// Get the role, or create it.
log.V(1).Info("Verifying that IAM Role exists")
role, err := i.GetOrCreateRole(ctx, req)
if err != nil {
if aerr, ok := err.(awserr.Error); ok {
switch aerr.Code() {
// Update the role to the latest spec if it is existed already
case iam.ErrCodeEntityAlreadyExistsException:
roleAlreadyExists = true
log.Info(iam.ErrCodeEntityAlreadyExistsException)
case iam.ErrCodeLimitExceededException:
log.Error(err, iam.ErrCodeLimitExceededException)
case iam.ErrCodeNoSuchEntityException:
log.Error(err, iam.ErrCodeNoSuchEntityException)
case iam.ErrCodeServiceFailureException:
log.Error(err, iam.ErrCodeServiceFailureException)
default:
log.Error(err, aerr.Error())
}
}
if !roleAlreadyExists {
return nil, err
}
}

resp := &IAMRoleResponse{}

if !roleAlreadyExists {
resp.RoleARN = aws.StringValue(iResp.Role.Arn)
resp.RoleID = aws.StringValue(iResp.Role.RoleId)
return &IAMRoleResponse{}, err
}

//Verify tags
Expand Down Expand Up @@ -156,16 +135,51 @@ func (i *IAM) CreateRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleRespo
return &IAMRoleResponse{}, err
}

return resp, nil
return role, nil
}

// GetOrCreateRole will try to create a new IAM Role in AWS. If it exists already, it will
// use that role. In either case we return back an IAMRoleResponse{} object.
func (i *IAM) GetOrCreateRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) {
log := log.Logger(ctx, "awsapi", "iam", "GetOrCreateRole")
log = log.WithValues("roleName", req.Name)

// Try getting the role. If the role already exists, just return it.
getResp, err := i.GetRole(ctx, req)
if err != nil {
if aerr, ok := err.(awserr.Error); ok {
switch aerr.Code() {
case iam.ErrCodeNoSuchEntityException:
log.Info("Role does not already exist")
default:
return nil, err
}
}
}
if getResp != nil {
return NewIAMRoleResponseFromGetRole(*getResp), nil
}

// If the role already exists - we'll figure that out based on the error that is returned by AWS.
createResp, err := i.CreateRole(ctx, req)
if err != nil {
return nil, err
}
if createResp != nil {
return NewIAMRoleResponseFromCreateRole(*createResp), nil
}

// if we got here, something really strange happened.. we got no errors, but also no responses?
return nil, nil
}

//VerifyTags function verifies the tags attached to the role
func (i *IAM) VerifyTags(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) {
log := log.Logger(ctx, "awsapi", "iam", "VerifyTags")
log = log.WithValues("roleName", req.Name)
log.V(1).Info("Initiating api call")
//Lets first list the tags and look for namespace and cluster tags

//Lets first list the tags and look for namespace and cluster tags
listTags, err := i.Client.ListRoleTags(&iam.ListRoleTagsInput{
RoleName: aws.String(req.Name),
})
Expand All @@ -191,7 +205,7 @@ func (i *IAM) VerifyTags(ctx context.Context, req IAMRoleRequest) (*IAMRoleRespo
}

if flag {
return nil, fmt.Errorf("role name %s in AWS is not available. %s", req.Name, RoleAlreadyExistsError)
return nil, fmt.Errorf("role name %s in AWS is not available. %s", req.Name, RoleExistsAlreadyForOtherNamespace)
}

return &IAMRoleResponse{}, nil
Expand Down Expand Up @@ -405,27 +419,36 @@ func (i *IAM) AttachInlineRolePolicy(ctx context.Context, req IAMRoleRequest) (*
return &IAMRoleResponse{}, nil
}

//GetRole gets the role from aws iam
func (i *IAM) GetRole(ctx context.Context, req IAMRoleRequest) (*iam.GetRoleOutput, error) {
log := log.Logger(ctx, "awsapi", "iam", "GetRole")
// CreateRole will try to create an IAM Role, or return back Nil if it can not be created
func (i *IAM) CreateRole(ctx context.Context, req IAMRoleRequest) (*iam.CreateRoleOutput, error) {
log := log.Logger(ctx, "awsapi", "iam", "CreateRole")
log = log.WithValues("roleName", req.Name)
log.V(1).Info("Initiating api call")
// First get the iam role policy on the AWS IAM side
input := &iam.GetRoleInput{
RoleName: aws.String(req.Name),

input := &iam.CreateRoleInput{
AssumeRolePolicyDocument: aws.String(req.TrustPolicy),
RoleName: aws.String(req.Name),
Description: aws.String(req.Description),
MaxSessionDuration: aws.Int64(req.SessionDuration),
PermissionsBoundary: aws.String(req.ManagedPermissionBoundaryPolicy),
}

if err := input.Validate(); err != nil {
log.Error(err, "input validation failed")
//should log the error
return nil, err
}

resp, err := i.Client.GetRole(input)
// If the role already exists - we'll figure that out based on the error that is returned by AWS.
log.V(1).Info("Initiating api call")
resp, err := i.Client.CreateRole(input)

if err != nil {
if aerr, ok := err.(awserr.Error); ok {
switch aerr.Code() {
// Update the role to the latest spec if it is existed already
case iam.ErrCodeEntityAlreadyExistsException:
log.Info(iam.ErrCodeEntityAlreadyExistsException)
case iam.ErrCodeLimitExceededException:
log.Error(err, iam.ErrCodeLimitExceededException)
case iam.ErrCodeNoSuchEntityException:
log.Error(err, iam.ErrCodeNoSuchEntityException)
case iam.ErrCodeServiceFailureException:
Expand All @@ -437,6 +460,31 @@ func (i *IAM) GetRole(ctx context.Context, req IAMRoleRequest) (*iam.GetRoleOutp

return nil, err
}
log.V(1).Info("Successfully created the role")

return resp, nil
}

//GetRole gets the role from aws iam
func (i *IAM) GetRole(ctx context.Context, req IAMRoleRequest) (*iam.GetRoleOutput, error) {
log := log.Logger(ctx, "awsapi", "iam", "GetRole")
log = log.WithValues("roleName", req.Name)
log.V(1).Info("Initiating api call")
// First get the iam role policy on the AWS IAM side
input := &iam.GetRoleInput{
RoleName: aws.String(req.Name),
}

if err := input.Validate(); err != nil {
log.Error(err, "input validation failed")
//should log the error
return nil, err
}

resp, err := i.Client.GetRole(input)
if err != nil {
return nil, err
}
log.V(1).Info("Successfully able to get the role")

return resp, nil
Expand Down
Loading

0 comments on commit feddadf

Please sign in to comment.