From ea86a3c7cc3116c22d69fbce9cfe431ba2b29dd4 Mon Sep 17 00:00:00 2001 From: Zach Waterfield Date: Thu, 2 Jan 2025 10:33:51 -0500 Subject: [PATCH] feat: add a custom google backend for sso (#27082) --- ee/api/authentication.py | 58 ++++++++++++++++++++++++++++ ee/api/test/test_authentication.py | 61 ++++++++++++++++++++++++++++++ ee/settings.py | 2 +- posthog/api/test/test_signup.py | 6 +++ 4 files changed, 126 insertions(+), 1 deletion(-) diff --git a/ee/api/authentication.py b/ee/api/authentication.py index f2850bcfb5f61..92b0f5f470547 100644 --- a/ee/api/authentication.py +++ b/ee/api/authentication.py @@ -14,12 +14,14 @@ SAMLAuth, SAMLIdentityProvider, ) +from social_core.backends.google import GoogleOAuth2 from social_core.exceptions import AuthFailed, AuthMissingParameter from social_django.utils import load_backend, load_strategy from posthog.constants import AvailableFeature from posthog.models.organization import OrganizationMembership from posthog.models.organization_domain import OrganizationDomain +from social_django.models import UserSocialAuth @api_view(["GET"]) @@ -165,3 +167,59 @@ def get_user_id(self, details, response): USER_ID_ATTRIBUTES = ["name_id", "NAME_ID", "nameId", OID_USERID] uid = self._get_attr(response["attributes"], USER_ID_ATTRIBUTES) return f"{response['idp_name']}:{uid}" + + +class CustomGoogleOAuth2(GoogleOAuth2): + def get_user_id(self, details, response): + """ + Retrieve and migrate Google OAuth user identification. + + Note: While social-auth-core supports using Google's sub claim via + settings.USE_UNIQUE_USER_ID = True, this setting was not enabled historically + in our application. This led to emails being stored as uids instead of the + more stable Google sub identifier. + + 'sub' (subject identifier) is part of OpenID Connect and is guaranteed to be a + stable, unique identifier for the user within Google's system. It's designed + specifically for authentication purposes and won't change even if the user changes + their email or other profile details. + + This method handles two types of user identification: + 1. Legacy users: Originally stored with email as their uid (due to missing USE_UNIQUE_USER_ID) + 2. New users: Using Google's sub claim (unique identifier) as uid + + The method first checks if a user exists with the sub as uid. If not found, + it looks for a legacy user with email as uid and migrates them to use sub. + This ensures a smooth transition from email-based to sub-based identification + while maintaining backward compatibility. + + Args: + details: User details dictionary from OAuth response + response: Full OAuth response from Google + + Returns: + str: The Google sub claim to be used as uid + """ + email = response.get("email") + sub = response.get("sub") + + if not sub: + raise ValueError("Google OAuth response missing 'sub' claim") + + try: + # First try: Find user by sub (preferred method) + social_auth = UserSocialAuth.objects.get(provider="google-oauth2", uid=sub) + return sub + except UserSocialAuth.DoesNotExist: + pass + + try: + # Second try: Find and migrate legacy user using email as uid + social_auth = UserSocialAuth.objects.get(provider="google-oauth2", uid=email) + # Migrate user from email to sub + social_auth.uid = sub + social_auth.save() + return sub + except UserSocialAuth.DoesNotExist: + # No existing user found - use sub for new account + return sub diff --git a/ee/api/test/test_authentication.py b/ee/api/test/test_authentication.py index f9f8eec7d2d30..4f8a3adb5afd1 100644 --- a/ee/api/test/test_authentication.py +++ b/ee/api/test/test_authentication.py @@ -12,12 +12,14 @@ from freezegun.api import freeze_time from rest_framework import status from social_core.exceptions import AuthFailed, AuthMissingParameter +from social_django.models import UserSocialAuth from ee.api.test.base import APILicensedTest from ee.models.license import License from posthog.constants import AvailableFeature from posthog.models import OrganizationMembership, User from posthog.models.organization_domain import OrganizationDomain +from ee.api.authentication import CustomGoogleOAuth2 SAML_MOCK_SETTINGS = { "SOCIAL_AUTH_SAML_SECURITY_CONFIG": { @@ -707,3 +709,62 @@ def test_xmlsec_and_lxml(self): assert "1.3.13" == xmlsec.__version__ assert "4.9.4" == lxml.__version__ + + +class TestCustomGoogleOAuth2(APILicensedTest): + def setUp(self): + super().setUp() + self.google_oauth = CustomGoogleOAuth2() + self.details = {"email": "test@posthog.com"} + self.sub = "google-oauth2|123456789" + + def test_get_user_id_existing_user_with_sub(self): + """Test that a user with sub as uid continues using that sub.""" + # Create user with sub as uid + UserSocialAuth.objects.create(provider="google-oauth2", uid=self.sub, user=self.user) + + response = {"email": "test@posthog.com", "sub": self.sub} + + uid = self.google_oauth.get_user_id(self.details, response) + + self.assertEqual(uid, self.sub) + # Verify no migration occurred (count should be 1) + self.assertEqual(UserSocialAuth.objects.filter(provider="google-oauth2").count(), 1) + # Verify uid is still sub + self.assertEqual(UserSocialAuth.objects.get(provider="google-oauth2").uid, self.sub) + + def test_get_user_id_migrates_email_to_sub(self): + """Test that a user with email as uid gets migrated to using sub.""" + # Create user with email as uid (legacy format) + social_auth = UserSocialAuth.objects.create(provider="google-oauth2", uid="test@posthog.com", user=self.user) + + response = {"email": "test@posthog.com", "sub": self.sub} + + uid = self.google_oauth.get_user_id(self.details, response) + + self.assertEqual(uid, self.sub) + # Verify the uid was updated + social_auth.refresh_from_db() + self.assertEqual(social_auth.uid, self.sub) + + def test_get_user_id_new_user_uses_sub(self): + """Test that a new user gets sub as uid.""" + response = {"email": "test@posthog.com", "sub": self.sub} + + uid = self.google_oauth.get_user_id(self.details, response) + + self.assertEqual(uid, self.sub) + # Verify no UserSocialAuth objects were created + self.assertEqual(UserSocialAuth.objects.filter(provider="google-oauth2").count(), 0) + + def test_get_user_id_missing_sub_raises_error(self): + """Test that missing sub in response raises ValueError.""" + response = { + "email": "test@posthog.com", + # no sub provided + } + + with self.assertRaises(ValueError) as e: + self.google_oauth.get_user_id(self.details, response) + + self.assertEqual(str(e.exception), "Google OAuth response missing 'sub' claim") diff --git a/ee/settings.py b/ee/settings.py index 64e3bfc5b8b3c..0a2be3cb508ef 100644 --- a/ee/settings.py +++ b/ee/settings.py @@ -12,7 +12,7 @@ AUTHENTICATION_BACKENDS = [ *AUTHENTICATION_BACKENDS, "ee.api.authentication.MultitenantSAMLAuth", - "social_core.backends.google.GoogleOAuth2", + "ee.api.authentication.CustomGoogleOAuth2", ] # SAML base attributes diff --git a/posthog/api/test/test_signup.py b/posthog/api/test/test_signup.py index 4d832a42791ec..a84a4cd9555b0 100644 --- a/posthog/api/test/test_signup.py +++ b/posthog/api/test/test_signup.py @@ -655,6 +655,7 @@ def run_test_for_allowed_domain( mock_request.return_value.json.return_value = { "access_token": "123", "email": "jane@hogflix.posthog.com", + "sub": "123", } response = self.client.get(url, follow=True) @@ -789,6 +790,7 @@ def test_social_signup_with_allowed_domain_on_cloud_reverse(self, mock_sso_provi mock_request.return_value.json.return_value = { "access_token": "123", "email": "jane@hogflix.posthog.com", + "sub": "123", } response = self.client.get(url, follow=True) @@ -828,6 +830,7 @@ def test_cannot_social_signup_with_allowed_but_jit_provisioning_disabled(self, m mock_request.return_value.json.return_value = { "access_token": "123", "email": "alice@posthog.net", + "sub": "123", } response = self.client.get(url, follow=True) @@ -857,6 +860,7 @@ def test_cannot_social_signup_with_allowed_but_unverified_domain(self, mock_sso_ mock_request.return_value.json.return_value = { "access_token": "123", "email": "alice@posthog.net", + "sub": "123", } response = self.client.get(url, follow=True) @@ -886,6 +890,7 @@ def test_api_cannot_use_allow_list_for_different_domain(self, mock_sso_providers mock_request.return_value.json.return_value = { "access_token": "123", "email": "alice@evil.com", + "sub": "123", } # note evil.com response = self.client.get(url, follow=True) @@ -911,6 +916,7 @@ def test_social_signup_to_existing_org_without_allowed_domain_on_cloud(self, moc mock_request.return_value.json.return_value = { "access_token": "123", "email": "jane@hogflix.posthog.com", + "sub": "123", } response = self.client.get(url, follow=True)