Skip to content

Commit

Permalink
feature/remove-redundant-sso-config (#3362)
Browse files Browse the repository at this point in the history
* As this task is now live, there is no need to have a separate config value

* add full sync check
  • Loading branch information
chopkinsmade authored Nov 19, 2024
1 parent a9817ac commit 140008c
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 67 deletions.
58 changes: 24 additions & 34 deletions dataworkspace/dataworkspace/apps/applications/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1125,8 +1125,6 @@ def create_user_from_sso(
if changed:
logger.info("User %s with email %s has changed, saving updates", user.username, user.email)
user.save()
else:
logger.info("User %s with email %s has NOT changed", user.username, user.email)

return user

Expand Down Expand Up @@ -1220,31 +1218,24 @@ def _process_staff_sso_file(
if published_date > new_last_processed_datetime:
new_last_processed_datetime = published_date

if settings.S3_SSO_IMPORT_ENABLED:
logger.info(
"sync_s3_sso_users: User id %s published date %s is after previous date %s, creating the user from sso",
logger.info(
"sync_s3_sso_users: User id %s published date %s is after previous date %s, creating the user from sso",
user_id,
published_date,
last_processed_datetime,
)
try:
user = create_user_from_sso(
user_id,
published_date,
last_processed_datetime,
primary_email,
first_name,
last_name,
status,
check_tools_access_if_user_exists=True,
)
try:
user = create_user_from_sso(
user_id,
primary_email,
first_name,
last_name,
status,
check_tools_access_if_user_exists=True,
)

except IntegrityError:
logger.exception("sync_s3_sso_users: Failed to create user record")
else:
logger.info(
"sync_s3_sso_users: S3_SSO_IMPORT_ENABLED is disabled, user %s with published date %s will not be added",
user_id,
published_date,
)
except IntegrityError:
logger.exception("sync_s3_sso_users: Failed to create user record")

seen_user_ids.append(user_id)

Expand All @@ -1266,6 +1257,12 @@ def _get_seen_ids_and_last_processed(
return list(set(seen_user_ids)), latest_processed_datetime


def _is_full_sync(files):
is_full_sync = all("full" in file.key for file in files)
logger.info("sync_s3_sso_users: is full sync: %s", is_full_sync)
return is_full_sync


def _do_sync_s3_sso_users():
last_published = cache.get(
"s3_sso_sync_last_published",
Expand All @@ -1284,7 +1281,7 @@ def _do_sync_s3_sso_users():
files, s3_resource.meta.client, new_last_processed
)
new_last_processed = seen_result[1]
if len(seen_result[0]) > 0:
if len(seen_result[0]) > 0 and _is_full_sync(files):
unseen_user_profiles = (
Profile.objects.exclude(user__username__in=seen_result[0])
.filter(sso_status="active")
Expand All @@ -1294,15 +1291,8 @@ def _do_sync_s3_sso_users():
"sync_s3_sso_users: active users exist locally but not in SSO %s",
list(unseen_user_profiles.values_list("user__id", flat=True)),
)
if settings.S3_SSO_IMPORT_ENABLED:
logger.info(
"sync_s3_sso_users: Marking users as inactive",
)
unseen_user_profiles.update(sso_status="inactive")
else:
logger.info(
"sync_s3_sso_users: S3_SSO_IMPORT_ENABLED is FALSE, no changes to active user",
)

unseen_user_profiles.update(sso_status="inactive")

logger.info("sync_s3_sso_users: New last_published date for cache %s", new_last_processed)

Expand Down
1 change: 0 additions & 1 deletion dataworkspace/dataworkspace/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,6 @@ def aws_fargate_private_ip():
S3_PERMISSIONS_BOUNDARY_ARN = env["S3_PERMISSIONS_BOUNDARY_ARN"]
S3_ROLE_PREFIX = env["S3_ROLE_PREFIX"]
S3_NOTEBOOKS_BUCKET_ARN = env["S3_NOTEBOOKS_BUCKET_ARN"]
S3_SSO_IMPORT_ENABLED = env.get("S3_SSO_IMPORT_ENABLED", False)
EFS_ID = env["EFS_ID"]

YOUR_FILES_ENABLED = env.get("YOUR_FILES_ENABLED", "False") == "True"
Expand Down
109 changes: 77 additions & 32 deletions dataworkspace/dataworkspace/tests/applications/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
_do_get_staff_sso_s3_object_summaries,
remove_tools_access_for_users_with_expired_cert,
self_certify_renewal_email_notification,
_is_full_sync,
)

from dataworkspace.apps.datasets.constants import UserAccessType
Expand Down Expand Up @@ -545,15 +546,16 @@ def test_sync_with_cache_uses_previous_date(
)

@pytest.mark.django_db
@override_settings(S3_SSO_IMPORT_ENABLED=True)
def test_sync_with_active_user_not_in_file_set_to_inactive(
def test_sync_with_active_user_not_in_file_set_to_inactive_for_full_sync(
self,
):
with mock.patch("dataworkspace.apps.applications.utils.get_s3_resource"), mock.patch(
"dataworkspace.apps.applications.utils._do_get_staff_sso_s3_object_summaries"
) as mock_get_s3_files, mock.patch(
"dataworkspace.apps.applications.utils._get_seen_ids_and_last_processed"
) as mock_get_seen_ids_and_last_processed:
) as mock_get_seen_ids_and_last_processed, mock.patch(
"dataworkspace.apps.applications.utils._is_full_sync", return_value=True
):
mock_get_s3_files.return_value = [mock.MagicMock(key="a/today.jsonl.gz")]

inactive_user = UserFactory()
Expand Down Expand Up @@ -594,6 +596,39 @@ def test_sync_with_active_user_not_in_file_set_to_inactive(
== "inactive"
)

@pytest.mark.django_db
def test_sync_with_active_user_not_in_file_not_changed_for_partial_sync(
self,
):
with mock.patch("dataworkspace.apps.applications.utils.get_s3_resource"), mock.patch(
"dataworkspace.apps.applications.utils._do_get_staff_sso_s3_object_summaries"
) as mock_get_s3_files, mock.patch(
"dataworkspace.apps.applications.utils._get_seen_ids_and_last_processed"
) as mock_get_seen_ids_and_last_processed, mock.patch(
"dataworkspace.apps.applications.utils._is_full_sync", return_value=False
):
mock_get_s3_files.return_value = [mock.MagicMock(key="a/today.jsonl.gz")]

active_user_not_in_file = UserFactory()
active_user_not_in_file.profile.sso_status = "active"
active_user_not_in_file.profile.save()

mock_get_seen_ids_and_last_processed.return_value = (
[],
datetime.datetime.now(),
)

_do_sync_s3_sso_users()

User = get_user_model()

assert (
User.objects.filter(username=active_user_not_in_file.username)
.first()
.profile.sso_status
== "active"
)

@override_settings(
CACHES={"default": {"BACKEND": "django.core.cache.backends.dummy.DummyCache"}}
)
Expand Down Expand Up @@ -690,30 +725,6 @@ def test_get_seen_ids_and_last_processed_returns_unique_set_when_duplicate_ids_i
CACHES={"default": {"BACKEND": "django.core.cache.backends.dummy.DummyCache"}}
)
@pytest.mark.django_db
def test_process_staff_sso_with_s3_import_disabled_doesnt_add_user(self, sso_user_factory):
user_1 = sso_user_factory()
user_2 = sso_user_factory()
m_open = mock.mock_open(read_data="\n".join([json.dumps(user_1), json.dumps(user_2)]))

with mock.patch(
"dataworkspace.apps.applications.utils.smart_open",
m_open,
create=True,
):
_process_staff_sso_file(
mock.MagicMock(),
"file.jsonl.gz",
datetime.datetime.fromtimestamp(0, tz=datetime.datetime.now().astimezone().tzinfo),
)

User = get_user_model()
assert not User.objects.all()

@override_settings(
CACHES={"default": {"BACKEND": "django.core.cache.backends.dummy.DummyCache"}}
)
@override_settings(S3_SSO_IMPORT_ENABLED=True)
@pytest.mark.django_db
def test_process_staff_sso_file_without_cache_creates_all_users(self, sso_user_factory):
user_1 = sso_user_factory()
user_2 = sso_user_factory()
Expand All @@ -731,7 +742,7 @@ def test_process_staff_sso_file_without_cache_creates_all_users(self, sso_user_f
)

User = get_user_model()
all_users = User.objects.all()
all_users = User.objects.all().order_by("date_joined")

assert len(all_users) == 2
assert str(all_users[0].profile.sso_id) == user_1["object"]["dit:StaffSSO:User:userId"]
Expand All @@ -758,7 +769,6 @@ def test_process_staff_sso_file_without_cache_creates_all_users(self, sso_user_f
CACHES={"default": {"BACKEND": "django.core.cache.backends.dummy.DummyCache"}}
)
@pytest.mark.django_db
@override_settings(S3_SSO_IMPORT_ENABLED=True)
def test_process_staff_sso_file_with_cache_ignores_users_before_last_published(
self, sso_user_factory
):
Expand Down Expand Up @@ -803,7 +813,6 @@ def test_process_staff_sso_file_with_cache_ignores_users_before_last_published(
CACHES={"default": {"BACKEND": "django.core.cache.backends.dummy.DummyCache"}}
)
@pytest.mark.django_db
@override_settings(S3_SSO_IMPORT_ENABLED=True)
def test_process_staff_sso_updates_existing_users_email(self, sso_user_factory):
user_1 = sso_user_factory()

Expand Down Expand Up @@ -834,7 +843,6 @@ def test_process_staff_sso_updates_existing_users_email(self, sso_user_factory):
CACHES={"default": {"BACKEND": "django.core.cache.backends.dummy.DummyCache"}}
)
@pytest.mark.django_db
@override_settings(S3_SSO_IMPORT_ENABLED=True)
def test_process_staff_sso_creates_role_if_user_can_access_tools(self, sso_user_factory):
user_1 = sso_user_factory()

Expand Down Expand Up @@ -997,7 +1005,6 @@ def test_process_staff_sso_returns_same_last_published_when_user_older_than_prev
CACHES={"default": {"BACKEND": "django.core.cache.backends.dummy.DummyCache"}}
)
@pytest.mark.django_db
@override_settings(S3_SSO_IMPORT_ENABLED=True)
def test_process_staff_sso_returns_latest_user_last_published_when_newer_than_previous(
self, sso_user_factory
):
Expand All @@ -1019,6 +1026,44 @@ def test_process_staff_sso_returns_latest_user_last_published_when_newer_than_pr
assert process_staff_results[0] == [get_user_model().objects.all().first().username]
assert process_staff_results[1] == parser.parse(user_1["published"])

@override_settings(
CACHES={"default": {"BACKEND": "django.core.cache.backends.dummy.DummyCache"}}
)
@pytest.mark.parametrize(
("filenames"),
(
(["full"]),
(["full_", "full"]),
),
)
def test_is_full_sync_returns_true_when_all_files_contain_full(self, filenames):
files = [
mock.MagicMock(
key=filename,
)
for filename in filenames
]
assert _is_full_sync(files)

@override_settings(
CACHES={"default": {"BACKEND": "django.core.cache.backends.dummy.DummyCache"}}
)
@pytest.mark.parametrize(
("filenames"),
(
(["anything"]),
(["anything", "full"]),
),
)
def test_is_full_sync_returns_false_when_all_files_do_not_contain_full(self, filenames):
files = [
mock.MagicMock(
key=filename,
)
for filename in filenames
]
assert _is_full_sync(files) is False


class TestCreateToolsAccessIAMRoleTask:
@pytest.mark.django_db
Expand Down

0 comments on commit 140008c

Please sign in to comment.