From 674dd2bec64604e1846325c3f0a125eaa7cda3c2 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 31 Oct 2024 23:51:03 +0000 Subject: [PATCH 01/14] Add Google Sheets integration for GitHub user verification - Add Google Sheets client for fetching allowed usernames - Modify GitHub authentication to support both text file and Google Sheets - Add setup documentation for Google Sheets integration - Maintain backward compatibility with existing text file approach --- GOOGLE_SHEETS_SETUP.md | 36 +++++++++++++++++ openhands/server/github.py | 64 ++++++++++++++++++++++++------- openhands/server/sheets_client.py | 48 +++++++++++++++++++++++ 3 files changed, 134 insertions(+), 14 deletions(-) create mode 100644 GOOGLE_SHEETS_SETUP.md create mode 100644 openhands/server/sheets_client.py diff --git a/GOOGLE_SHEETS_SETUP.md b/GOOGLE_SHEETS_SETUP.md new file mode 100644 index 000000000000..3e268dfe6f9d --- /dev/null +++ b/GOOGLE_SHEETS_SETUP.md @@ -0,0 +1,36 @@ +# Setting up Google Sheets Integration + +To use the Google Sheets integration for GitHub user verification, follow these steps: + +1. Add the required dependencies to your project: +```bash +poetry add google-api-python-client google-auth-httplib2 google-auth-oauthlib +``` + +2. Set up environment variables: +```bash +# Existing variables +GITHUB_CLIENT_ID=your_github_client_id +GITHUB_CLIENT_SECRET=your_github_client_secret +GITHUB_USER_LIST_FILE=/path/to/users.txt # Optional: Keep for backwards compatibility + +# New variables for Google Sheets +GOOGLE_CREDENTIALS_FILE=/path/to/service-account-credentials.json +GITHUB_USERS_SHEET_ID=your_google_sheet_id +``` + +3. Create a Google Cloud Project and enable the Google Sheets API: + - Go to Google Cloud Console + - Create a new project or select an existing one + - Enable the Google Sheets API + - Create a service account and download the JSON credentials + - Save the credentials file securely and set its path in GOOGLE_CREDENTIALS_FILE + +4. Set up your Google Sheet: + - Create a new Google Sheet + - Share it with the service account email (found in the credentials JSON) + - Put GitHub usernames in column A + - Copy the Sheet ID from the URL (the long string between /d/ and /edit) + - Set the Sheet ID in GITHUB_USERS_SHEET_ID + +The system will now check both the text file (if configured) and Google Sheet for valid GitHub usernames. A user will be allowed if they appear in either source. \ No newline at end of file diff --git a/openhands/server/github.py b/openhands/server/github.py index a7bdef3543d4..f18754d6e8ed 100644 --- a/openhands/server/github.py +++ b/openhands/server/github.py @@ -1,30 +1,65 @@ import os +from typing import List, Optional import httpx from openhands.core.logger import openhands_logger as logger +from openhands.server.sheets_client import GoogleSheetsClient GITHUB_CLIENT_ID = os.getenv('GITHUB_CLIENT_ID', '').strip() GITHUB_CLIENT_SECRET = os.getenv('GITHUB_CLIENT_SECRET', '').strip() -GITHUB_USER_LIST = None - - -def load_github_user_list(): - global GITHUB_USER_LIST - waitlist = os.getenv('GITHUB_USER_LIST_FILE') - if waitlist: - with open(waitlist, 'r') as f: - GITHUB_USER_LIST = [line.strip() for line in f if line.strip()] +class UserVerifier: + def __init__(self): + self.file_users: Optional[List[str]] = None + self.sheets_client: Optional[GoogleSheetsClient] = None + self.spreadsheet_id: Optional[str] = None + + # Initialize from environment variables + self._init_file_users() + self._init_sheets_client() + + def _init_file_users(self): + """Load users from text file if configured""" + waitlist = os.getenv('GITHUB_USER_LIST_FILE') + if waitlist and os.path.exists(waitlist): + with open(waitlist, 'r') as f: + self.file_users = [line.strip() for line in f if line.strip()] + + def _init_sheets_client(self): + """Initialize Google Sheets client if configured""" + creds_path = os.getenv('GOOGLE_CREDENTIALS_FILE') + sheet_id = os.getenv('GITHUB_USERS_SHEET_ID') + + if creds_path and sheet_id: + self.sheets_client = GoogleSheetsClient(creds_path) + self.spreadsheet_id = sheet_id + + def is_user_allowed(self, username: str) -> bool: + """Check if user is allowed based on file and/or sheet configuration""" + # If no verification sources are configured, allow all users + if not self.file_users and not self.sheets_client: + return True + + # Check file-based users + if self.file_users and username in self.file_users: + return True + + # Check Google Sheets users + if self.sheets_client and self.spreadsheet_id: + sheet_users = self.sheets_client.get_usernames(self.spreadsheet_id) + if username in sheet_users: + return True + + return False -load_github_user_list() +# Global instance of user verifier +user_verifier = UserVerifier() async def authenticate_github_user(auth_token) -> bool: logger.info('Checking GitHub token') - if not GITHUB_USER_LIST: - return True - + if not auth_token: logger.warning('No GitHub token provided') return False @@ -33,7 +68,8 @@ async def authenticate_github_user(auth_token) -> bool: if error: logger.warning(f'Invalid GitHub token: {error}') return False - if login not in GITHUB_USER_LIST: + + if not user_verifier.is_user_allowed(login): logger.warning(f'GitHub user {login} not in allow list') return False diff --git a/openhands/server/sheets_client.py b/openhands/server/sheets_client.py new file mode 100644 index 000000000000..ccb62e3104bb --- /dev/null +++ b/openhands/server/sheets_client.py @@ -0,0 +1,48 @@ +from typing import List, Optional +import os +from google.oauth2 import service_account +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +class GoogleSheetsClient: + def __init__(self, credentials_path: str): + """Initialize Google Sheets client with service account credentials. + + Args: + credentials_path: Path to the service account JSON credentials file + """ + self.credentials = None + self.service = None + if os.path.exists(credentials_path): + self.credentials = service_account.Credentials.from_service_account_file( + credentials_path, + scopes=['https://www.googleapis.com/auth/spreadsheets.readonly'] + ) + self.service = build('sheets', 'v4', credentials=self.credentials) + + def get_usernames(self, spreadsheet_id: str, range_name: str = 'A:A') -> List[str]: + """Get list of usernames from specified Google Sheet. + + Args: + spreadsheet_id: The ID of the Google Sheet + range_name: The A1 notation of the range to fetch + + Returns: + List of usernames from the sheet + """ + if not self.service: + return [] + + try: + result = self.service.spreadsheets().values().get( + spreadsheetId=spreadsheet_id, + range=range_name + ).execute() + + values = result.get('values', []) + # Flatten the list and remove empty strings + return [str(cell[0]).strip() for cell in values if cell and cell[0].strip()] + + except HttpError as err: + print(f"Error accessing Google Sheet: {err}") + return [] \ No newline at end of file From 714aa5bf21b6f7b2a3c3880ac66ec3770e341175 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 31 Oct 2024 23:57:28 +0000 Subject: [PATCH 02/14] Remove setup documentation file - will add to main docs instead --- GOOGLE_SHEETS_SETUP.md | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 GOOGLE_SHEETS_SETUP.md diff --git a/GOOGLE_SHEETS_SETUP.md b/GOOGLE_SHEETS_SETUP.md deleted file mode 100644 index 3e268dfe6f9d..000000000000 --- a/GOOGLE_SHEETS_SETUP.md +++ /dev/null @@ -1,36 +0,0 @@ -# Setting up Google Sheets Integration - -To use the Google Sheets integration for GitHub user verification, follow these steps: - -1. Add the required dependencies to your project: -```bash -poetry add google-api-python-client google-auth-httplib2 google-auth-oauthlib -``` - -2. Set up environment variables: -```bash -# Existing variables -GITHUB_CLIENT_ID=your_github_client_id -GITHUB_CLIENT_SECRET=your_github_client_secret -GITHUB_USER_LIST_FILE=/path/to/users.txt # Optional: Keep for backwards compatibility - -# New variables for Google Sheets -GOOGLE_CREDENTIALS_FILE=/path/to/service-account-credentials.json -GITHUB_USERS_SHEET_ID=your_google_sheet_id -``` - -3. Create a Google Cloud Project and enable the Google Sheets API: - - Go to Google Cloud Console - - Create a new project or select an existing one - - Enable the Google Sheets API - - Create a service account and download the JSON credentials - - Save the credentials file securely and set its path in GOOGLE_CREDENTIALS_FILE - -4. Set up your Google Sheet: - - Create a new Google Sheet - - Share it with the service account email (found in the credentials JSON) - - Put GitHub usernames in column A - - Copy the Sheet ID from the URL (the long string between /d/ and /edit) - - Set the Sheet ID in GITHUB_USERS_SHEET_ID - -The system will now check both the text file (if configured) and Google Sheet for valid GitHub usernames. A user will be allowed if they appear in either source. \ No newline at end of file From 46f8cc8382a9ff66bd307bfbfc2cbe0da4d40cd8 Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 1 Nov 2024 00:00:42 +0000 Subject: [PATCH 03/14] Add Google Sheets API dependencies --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index b07fc0aa29a8..28f54017b30e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,9 @@ datasets = "*" pandas = "*" litellm = "^1.51.1" google-generativeai = "*" # To use litellm with Gemini Pro API +google-api-python-client = "*" # For Google Sheets API +google-auth-httplib2 = "*" # For Google Sheets authentication +google-auth-oauthlib = "*" # For Google Sheets OAuth termcolor = "*" seaborn = "*" docker = "*" From e26c9dd9f764bc8f51d5195dbdc873669d1a77bd Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 1 Nov 2024 00:04:34 +0000 Subject: [PATCH 04/14] Update poetry.lock with Google Sheets dependencies --- poetry.lock | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 6481fe5bafa5..6a2791471358 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "aenum" @@ -2319,6 +2319,24 @@ files = [ google-auth = "*" httplib2 = ">=0.19.0" +[[package]] +name = "google-auth-oauthlib" +version = "1.2.1" +description = "Google Authentication Library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "google_auth_oauthlib-1.2.1-py2.py3-none-any.whl", hash = "sha256:2d58a27262d55aa1b87678c3ba7142a080098cbc2024f903c62355deb235d91f"}, + {file = "google_auth_oauthlib-1.2.1.tar.gz", hash = "sha256:afd0cad092a2eaa53cd8e8298557d6de1034c6cb4a740500b5357b648af97263"}, +] + +[package.dependencies] +google-auth = ">=2.15.0" +requests-oauthlib = ">=0.7.0" + +[package.extras] +tool = ["click (>=6.0.0)"] + [[package]] name = "google-cloud-aiplatform" version = "1.70.0" @@ -10109,4 +10127,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "2b268ef696ace0d8170276407dbdeb414134477839ebe4b7ecf29b1a1fe2cef3" +content-hash = "2a4f90bb5c7f7d82160f57d71af7e81c7acef69426d0e1e46e1da09972a6215f" From 29e0c6223e318e38d10f08baaeb556d3c3013e97 Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 1 Nov 2024 00:06:15 +0000 Subject: [PATCH 05/14] Simplify Google Sheets auth to use workload identity --- openhands/server/github.py | 5 ++--- openhands/server/sheets_client.py | 23 +++++++---------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/openhands/server/github.py b/openhands/server/github.py index f18754d6e8ed..68d1a487c404 100644 --- a/openhands/server/github.py +++ b/openhands/server/github.py @@ -28,11 +28,10 @@ def _init_file_users(self): def _init_sheets_client(self): """Initialize Google Sheets client if configured""" - creds_path = os.getenv('GOOGLE_CREDENTIALS_FILE') sheet_id = os.getenv('GITHUB_USERS_SHEET_ID') - if creds_path and sheet_id: - self.sheets_client = GoogleSheetsClient(creds_path) + if sheet_id: + self.sheets_client = GoogleSheetsClient() self.spreadsheet_id = sheet_id def is_user_allowed(self, username: str) -> bool: diff --git a/openhands/server/sheets_client.py b/openhands/server/sheets_client.py index ccb62e3104bb..652305f7396e 100644 --- a/openhands/server/sheets_client.py +++ b/openhands/server/sheets_client.py @@ -1,24 +1,15 @@ -from typing import List, Optional -import os -from google.oauth2 import service_account +from typing import List +from google.auth import default from googleapiclient.discovery import build from googleapiclient.errors import HttpError class GoogleSheetsClient: - def __init__(self, credentials_path: str): - """Initialize Google Sheets client with service account credentials. - - Args: - credentials_path: Path to the service account JSON credentials file + def __init__(self): + """Initialize Google Sheets client using workload identity. + Uses application default credentials which supports workload identity when running in GCP. """ - self.credentials = None - self.service = None - if os.path.exists(credentials_path): - self.credentials = service_account.Credentials.from_service_account_file( - credentials_path, - scopes=['https://www.googleapis.com/auth/spreadsheets.readonly'] - ) - self.service = build('sheets', 'v4', credentials=self.credentials) + credentials, _ = default(scopes=['https://www.googleapis.com/auth/spreadsheets.readonly']) + self.service = build('sheets', 'v4', credentials=credentials) def get_usernames(self, spreadsheet_id: str, range_name: str = 'A:A') -> List[str]: """Get list of usernames from specified Google Sheet. From 8efc56136c03c85c84b58081e01ec788baa282be Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 1 Nov 2024 00:12:18 +0000 Subject: [PATCH 06/14] Add extensive logging to GitHub auth and Google Sheets integration --- openhands/server/github.py | 64 +++++++++++++++++++++++++------ openhands/server/sheets_client.py | 25 +++++++++--- 2 files changed, 72 insertions(+), 17 deletions(-) diff --git a/openhands/server/github.py b/openhands/server/github.py index 68d1a487c404..3cc727d741d3 100644 --- a/openhands/server/github.py +++ b/openhands/server/github.py @@ -11,6 +11,7 @@ class UserVerifier: def __init__(self): + logger.info('Initializing UserVerifier') self.file_users: Optional[List[str]] = None self.sheets_client: Optional[GoogleSheetsClient] = None self.spreadsheet_id: Optional[str] = None @@ -18,38 +19,70 @@ def __init__(self): # Initialize from environment variables self._init_file_users() self._init_sheets_client() + + # Log initialization status + if self.file_users: + logger.info(f'Text file verification enabled with {len(self.file_users)} users') + if self.sheets_client and self.spreadsheet_id: + logger.info(f'Google Sheets verification enabled with sheet ID: {self.spreadsheet_id}') + if not self.file_users and not self.sheets_client: + logger.warning('No verification sources configured - all users will be allowed') def _init_file_users(self): """Load users from text file if configured""" waitlist = os.getenv('GITHUB_USER_LIST_FILE') - if waitlist and os.path.exists(waitlist): + if not waitlist: + logger.info('GITHUB_USER_LIST_FILE not configured') + return + + if not os.path.exists(waitlist): + logger.warning(f'User list file not found: {waitlist}') + return + + try: with open(waitlist, 'r') as f: self.file_users = [line.strip() for line in f if line.strip()] + logger.info(f'Successfully loaded {len(self.file_users)} users from {waitlist}') + except Exception as e: + logger.error(f'Error reading user list file {waitlist}: {str(e)}') def _init_sheets_client(self): """Initialize Google Sheets client if configured""" sheet_id = os.getenv('GITHUB_USERS_SHEET_ID') - if sheet_id: - self.sheets_client = GoogleSheetsClient() - self.spreadsheet_id = sheet_id + if not sheet_id: + logger.info('GITHUB_USERS_SHEET_ID not configured') + return + + logger.info('Initializing Google Sheets integration') + self.sheets_client = GoogleSheetsClient() + self.spreadsheet_id = sheet_id def is_user_allowed(self, username: str) -> bool: """Check if user is allowed based on file and/or sheet configuration""" + logger.info(f'Checking if user {username} is allowed') + # If no verification sources are configured, allow all users if not self.file_users and not self.sheets_client: + logger.info('No verification sources configured - allowing all users') return True # Check file-based users - if self.file_users and username in self.file_users: - return True + if self.file_users: + if username in self.file_users: + logger.info(f'User {username} found in text file allowlist') + return True + logger.debug(f'User {username} not found in text file allowlist') # Check Google Sheets users if self.sheets_client and self.spreadsheet_id: sheet_users = self.sheets_client.get_usernames(self.spreadsheet_id) if username in sheet_users: + logger.info(f'User {username} found in Google Sheets allowlist') return True + logger.debug(f'User {username} not found in Google Sheets allowlist') + logger.info(f'User {username} not found in any allowlist') return False # Global instance of user verifier @@ -87,6 +120,7 @@ async def get_github_user(token: str) -> tuple[str | None, str | None]: If successful, error_message is None If failed, login is None and error_message contains the error """ + logger.info('Fetching GitHub user info from token') headers = { 'Accept': 'application/vnd.github+json', 'Authorization': f'Bearer {token}', @@ -94,14 +128,20 @@ async def get_github_user(token: str) -> tuple[str | None, str | None]: } try: async with httpx.AsyncClient() as client: + logger.debug('Making request to GitHub API') response = await client.get('https://api.github.com/user', headers=headers) + if response.status_code == 200: user_data = response.json() - return user_data.get('login'), None + login = user_data.get('login') + logger.info(f'Successfully retrieved GitHub user: {login}') + return login, None else: - return ( - None, - f'GitHub API error: {response.status_code} - {response.text}', - ) + error = f'GitHub API error: {response.status_code} - {response.text}' + logger.error(error) + return None, error + except Exception as e: - return None, f'Error connecting to GitHub: {str(e)}' + error = f'Error connecting to GitHub: {str(e)}' + logger.error(error) + return None, error diff --git a/openhands/server/sheets_client.py b/openhands/server/sheets_client.py index 652305f7396e..47e1ca5694cf 100644 --- a/openhands/server/sheets_client.py +++ b/openhands/server/sheets_client.py @@ -3,13 +3,22 @@ from googleapiclient.discovery import build from googleapiclient.errors import HttpError +from openhands.core.logger import openhands_logger as logger + class GoogleSheetsClient: def __init__(self): """Initialize Google Sheets client using workload identity. Uses application default credentials which supports workload identity when running in GCP. """ - credentials, _ = default(scopes=['https://www.googleapis.com/auth/spreadsheets.readonly']) - self.service = build('sheets', 'v4', credentials=credentials) + logger.info('Initializing Google Sheets client with workload identity') + try: + credentials, project = default(scopes=['https://www.googleapis.com/auth/spreadsheets.readonly']) + logger.info(f'Successfully obtained credentials for project: {project}') + self.service = build('sheets', 'v4', credentials=credentials) + logger.info('Successfully initialized Google Sheets API service') + except Exception as e: + logger.error(f'Failed to initialize Google Sheets client: {str(e)}') + self.service = None def get_usernames(self, spreadsheet_id: str, range_name: str = 'A:A') -> List[str]: """Get list of usernames from specified Google Sheet. @@ -22,18 +31,24 @@ def get_usernames(self, spreadsheet_id: str, range_name: str = 'A:A') -> List[st List of usernames from the sheet """ if not self.service: + logger.error('Google Sheets service not initialized') return [] try: + logger.info(f'Fetching usernames from sheet {spreadsheet_id}, range {range_name}') result = self.service.spreadsheets().values().get( spreadsheetId=spreadsheet_id, range=range_name ).execute() values = result.get('values', []) - # Flatten the list and remove empty strings - return [str(cell[0]).strip() for cell in values if cell and cell[0].strip()] + usernames = [str(cell[0]).strip() for cell in values if cell and cell[0].strip()] + logger.info(f'Successfully fetched {len(usernames)} usernames from Google Sheet') + return usernames except HttpError as err: - print(f"Error accessing Google Sheet: {err}") + logger.error(f'Error accessing Google Sheet {spreadsheet_id}: {err}') + return [] + except Exception as e: + logger.error(f'Unexpected error accessing Google Sheet {spreadsheet_id}: {str(e)}') return [] \ No newline at end of file From 715fe01797ee511e50d69674daf5b7b9b2a60932 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 20:51:33 -0400 Subject: [PATCH 07/14] fix some stuff --- openhands/server/github.py | 86 ++++++++++++++------------------------ 1 file changed, 32 insertions(+), 54 deletions(-) diff --git a/openhands/server/github.py b/openhands/server/github.py index 3cc727d741d3..03a835f7df9f 100644 --- a/openhands/server/github.py +++ b/openhands/server/github.py @@ -9,98 +9,87 @@ GITHUB_CLIENT_ID = os.getenv('GITHUB_CLIENT_ID', '').strip() GITHUB_CLIENT_SECRET = os.getenv('GITHUB_CLIENT_SECRET', '').strip() + class UserVerifier: def __init__(self): logger.info('Initializing UserVerifier') self.file_users: Optional[List[str]] = None self.sheets_client: Optional[GoogleSheetsClient] = None self.spreadsheet_id: Optional[str] = None - + # Initialize from environment variables self._init_file_users() self._init_sheets_client() - - # Log initialization status - if self.file_users: - logger.info(f'Text file verification enabled with {len(self.file_users)} users') - if self.sheets_client and self.spreadsheet_id: - logger.info(f'Google Sheets verification enabled with sheet ID: {self.spreadsheet_id}') - if not self.file_users and not self.sheets_client: - logger.warning('No verification sources configured - all users will be allowed') - + def _init_file_users(self): """Load users from text file if configured""" waitlist = os.getenv('GITHUB_USER_LIST_FILE') if not waitlist: logger.info('GITHUB_USER_LIST_FILE not configured') return - + if not os.path.exists(waitlist): - logger.warning(f'User list file not found: {waitlist}') - return - + logger.error(f'User list file not found: {waitlist}') + raise FileNotFoundError(f'User list file not found: {waitlist}') + try: with open(waitlist, 'r') as f: self.file_users = [line.strip() for line in f if line.strip()] - logger.info(f'Successfully loaded {len(self.file_users)} users from {waitlist}') + logger.info( + f'Successfully loaded {len(self.file_users)} users from {waitlist}' + ) except Exception as e: logger.error(f'Error reading user list file {waitlist}: {str(e)}') - + def _init_sheets_client(self): """Initialize Google Sheets client if configured""" sheet_id = os.getenv('GITHUB_USERS_SHEET_ID') - + if not sheet_id: logger.info('GITHUB_USERS_SHEET_ID not configured') return - + logger.info('Initializing Google Sheets integration') self.sheets_client = GoogleSheetsClient() self.spreadsheet_id = sheet_id - + def is_user_allowed(self, username: str) -> bool: """Check if user is allowed based on file and/or sheet configuration""" - logger.info(f'Checking if user {username} is allowed') - - # If no verification sources are configured, allow all users if not self.file_users and not self.sheets_client: - logger.info('No verification sources configured - allowing all users') + logger.debug('No verification sources configured - allowing all users') return True - - # Check file-based users + logger.info(f'Checking if GitHub user {username} is allowed') + if self.file_users: if username in self.file_users: logger.info(f'User {username} found in text file allowlist') return True logger.debug(f'User {username} not found in text file allowlist') - - # Check Google Sheets users + if self.sheets_client and self.spreadsheet_id: sheet_users = self.sheets_client.get_usernames(self.spreadsheet_id) if username in sheet_users: logger.info(f'User {username} found in Google Sheets allowlist') return True logger.debug(f'User {username} not found in Google Sheets allowlist') - + logger.info(f'User {username} not found in any allowlist') return False + # Global instance of user verifier user_verifier = UserVerifier() async def authenticate_github_user(auth_token) -> bool: logger.info('Checking GitHub token') - + if not auth_token: logger.warning('No GitHub token provided') return False - login, error = await get_github_user(auth_token) - if error: - logger.warning(f'Invalid GitHub token: {error}') - return False - + login = await get_github_user(auth_token) + if not user_verifier.is_user_allowed(login): logger.warning(f'GitHub user {login} not in allow list') return False @@ -109,7 +98,7 @@ async def authenticate_github_user(auth_token) -> bool: return True -async def get_github_user(token: str) -> tuple[str | None, str | None]: +async def get_github_user(token: str) -> str: """Get GitHub user info from token. Args: @@ -126,22 +115,11 @@ async def get_github_user(token: str) -> tuple[str | None, str | None]: 'Authorization': f'Bearer {token}', 'X-GitHub-Api-Version': '2022-11-28', } - try: - async with httpx.AsyncClient() as client: - logger.debug('Making request to GitHub API') - response = await client.get('https://api.github.com/user', headers=headers) - - if response.status_code == 200: - user_data = response.json() - login = user_data.get('login') - logger.info(f'Successfully retrieved GitHub user: {login}') - return login, None - else: - error = f'GitHub API error: {response.status_code} - {response.text}' - logger.error(error) - return None, error - - except Exception as e: - error = f'Error connecting to GitHub: {str(e)}' - logger.error(error) - return None, error + async with httpx.AsyncClient() as client: + logger.debug('Making request to GitHub API') + response = await client.get('https://api.github.com/user', headers=headers) + response.raise_for_status() + user_data = response.json() + login = user_data.get('login') + logger.info(f'Successfully retrieved GitHub user: {login}') + return login From 9983711c2199f44b3e0b1ea3ea158c8d3d4525cd Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 20:53:11 -0400 Subject: [PATCH 08/14] empty commit From 03bd146044c97dace1c4cd2edd1cee4932f717f0 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 20:54:11 -0400 Subject: [PATCH 09/14] revert --- openhands/server/github.py | 111 ++++++++++--------------------------- 1 file changed, 29 insertions(+), 82 deletions(-) diff --git a/openhands/server/github.py b/openhands/server/github.py index 03a835f7df9f..a7bdef3543d4 100644 --- a/openhands/server/github.py +++ b/openhands/server/github.py @@ -1,96 +1,39 @@ import os -from typing import List, Optional import httpx from openhands.core.logger import openhands_logger as logger -from openhands.server.sheets_client import GoogleSheetsClient GITHUB_CLIENT_ID = os.getenv('GITHUB_CLIENT_ID', '').strip() GITHUB_CLIENT_SECRET = os.getenv('GITHUB_CLIENT_SECRET', '').strip() +GITHUB_USER_LIST = None -class UserVerifier: - def __init__(self): - logger.info('Initializing UserVerifier') - self.file_users: Optional[List[str]] = None - self.sheets_client: Optional[GoogleSheetsClient] = None - self.spreadsheet_id: Optional[str] = None - - # Initialize from environment variables - self._init_file_users() - self._init_sheets_client() - - def _init_file_users(self): - """Load users from text file if configured""" - waitlist = os.getenv('GITHUB_USER_LIST_FILE') - if not waitlist: - logger.info('GITHUB_USER_LIST_FILE not configured') - return - - if not os.path.exists(waitlist): - logger.error(f'User list file not found: {waitlist}') - raise FileNotFoundError(f'User list file not found: {waitlist}') - - try: - with open(waitlist, 'r') as f: - self.file_users = [line.strip() for line in f if line.strip()] - logger.info( - f'Successfully loaded {len(self.file_users)} users from {waitlist}' - ) - except Exception as e: - logger.error(f'Error reading user list file {waitlist}: {str(e)}') - - def _init_sheets_client(self): - """Initialize Google Sheets client if configured""" - sheet_id = os.getenv('GITHUB_USERS_SHEET_ID') - - if not sheet_id: - logger.info('GITHUB_USERS_SHEET_ID not configured') - return - - logger.info('Initializing Google Sheets integration') - self.sheets_client = GoogleSheetsClient() - self.spreadsheet_id = sheet_id - - def is_user_allowed(self, username: str) -> bool: - """Check if user is allowed based on file and/or sheet configuration""" - if not self.file_users and not self.sheets_client: - logger.debug('No verification sources configured - allowing all users') - return True - logger.info(f'Checking if GitHub user {username} is allowed') - - if self.file_users: - if username in self.file_users: - logger.info(f'User {username} found in text file allowlist') - return True - logger.debug(f'User {username} not found in text file allowlist') - - if self.sheets_client and self.spreadsheet_id: - sheet_users = self.sheets_client.get_usernames(self.spreadsheet_id) - if username in sheet_users: - logger.info(f'User {username} found in Google Sheets allowlist') - return True - logger.debug(f'User {username} not found in Google Sheets allowlist') - - logger.info(f'User {username} not found in any allowlist') - return False +def load_github_user_list(): + global GITHUB_USER_LIST + waitlist = os.getenv('GITHUB_USER_LIST_FILE') + if waitlist: + with open(waitlist, 'r') as f: + GITHUB_USER_LIST = [line.strip() for line in f if line.strip()] -# Global instance of user verifier -user_verifier = UserVerifier() +load_github_user_list() async def authenticate_github_user(auth_token) -> bool: logger.info('Checking GitHub token') + if not GITHUB_USER_LIST: + return True if not auth_token: logger.warning('No GitHub token provided') return False - login = await get_github_user(auth_token) - - if not user_verifier.is_user_allowed(login): + login, error = await get_github_user(auth_token) + if error: + logger.warning(f'Invalid GitHub token: {error}') + return False + if login not in GITHUB_USER_LIST: logger.warning(f'GitHub user {login} not in allow list') return False @@ -98,7 +41,7 @@ async def authenticate_github_user(auth_token) -> bool: return True -async def get_github_user(token: str) -> str: +async def get_github_user(token: str) -> tuple[str | None, str | None]: """Get GitHub user info from token. Args: @@ -109,17 +52,21 @@ async def get_github_user(token: str) -> str: If successful, error_message is None If failed, login is None and error_message contains the error """ - logger.info('Fetching GitHub user info from token') headers = { 'Accept': 'application/vnd.github+json', 'Authorization': f'Bearer {token}', 'X-GitHub-Api-Version': '2022-11-28', } - async with httpx.AsyncClient() as client: - logger.debug('Making request to GitHub API') - response = await client.get('https://api.github.com/user', headers=headers) - response.raise_for_status() - user_data = response.json() - login = user_data.get('login') - logger.info(f'Successfully retrieved GitHub user: {login}') - return login + try: + async with httpx.AsyncClient() as client: + response = await client.get('https://api.github.com/user', headers=headers) + if response.status_code == 200: + user_data = response.json() + return user_data.get('login'), None + else: + return ( + None, + f'GitHub API error: {response.status_code} - {response.text}', + ) + except Exception as e: + return None, f'Error connecting to GitHub: {str(e)}' From c90ac31d6c6af88889c6792bfb64675a94da1e24 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 20:54:28 -0400 Subject: [PATCH 10/14] unrevert --- openhands/server/github.py | 111 +++++++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 29 deletions(-) diff --git a/openhands/server/github.py b/openhands/server/github.py index a7bdef3543d4..03a835f7df9f 100644 --- a/openhands/server/github.py +++ b/openhands/server/github.py @@ -1,39 +1,96 @@ import os +from typing import List, Optional import httpx from openhands.core.logger import openhands_logger as logger +from openhands.server.sheets_client import GoogleSheetsClient GITHUB_CLIENT_ID = os.getenv('GITHUB_CLIENT_ID', '').strip() GITHUB_CLIENT_SECRET = os.getenv('GITHUB_CLIENT_SECRET', '').strip() -GITHUB_USER_LIST = None -def load_github_user_list(): - global GITHUB_USER_LIST - waitlist = os.getenv('GITHUB_USER_LIST_FILE') - if waitlist: - with open(waitlist, 'r') as f: - GITHUB_USER_LIST = [line.strip() for line in f if line.strip()] +class UserVerifier: + def __init__(self): + logger.info('Initializing UserVerifier') + self.file_users: Optional[List[str]] = None + self.sheets_client: Optional[GoogleSheetsClient] = None + self.spreadsheet_id: Optional[str] = None + + # Initialize from environment variables + self._init_file_users() + self._init_sheets_client() + + def _init_file_users(self): + """Load users from text file if configured""" + waitlist = os.getenv('GITHUB_USER_LIST_FILE') + if not waitlist: + logger.info('GITHUB_USER_LIST_FILE not configured') + return + + if not os.path.exists(waitlist): + logger.error(f'User list file not found: {waitlist}') + raise FileNotFoundError(f'User list file not found: {waitlist}') + + try: + with open(waitlist, 'r') as f: + self.file_users = [line.strip() for line in f if line.strip()] + logger.info( + f'Successfully loaded {len(self.file_users)} users from {waitlist}' + ) + except Exception as e: + logger.error(f'Error reading user list file {waitlist}: {str(e)}') + + def _init_sheets_client(self): + """Initialize Google Sheets client if configured""" + sheet_id = os.getenv('GITHUB_USERS_SHEET_ID') + + if not sheet_id: + logger.info('GITHUB_USERS_SHEET_ID not configured') + return + + logger.info('Initializing Google Sheets integration') + self.sheets_client = GoogleSheetsClient() + self.spreadsheet_id = sheet_id + + def is_user_allowed(self, username: str) -> bool: + """Check if user is allowed based on file and/or sheet configuration""" + if not self.file_users and not self.sheets_client: + logger.debug('No verification sources configured - allowing all users') + return True + logger.info(f'Checking if GitHub user {username} is allowed') + + if self.file_users: + if username in self.file_users: + logger.info(f'User {username} found in text file allowlist') + return True + logger.debug(f'User {username} not found in text file allowlist') + + if self.sheets_client and self.spreadsheet_id: + sheet_users = self.sheets_client.get_usernames(self.spreadsheet_id) + if username in sheet_users: + logger.info(f'User {username} found in Google Sheets allowlist') + return True + logger.debug(f'User {username} not found in Google Sheets allowlist') + + logger.info(f'User {username} not found in any allowlist') + return False -load_github_user_list() +# Global instance of user verifier +user_verifier = UserVerifier() async def authenticate_github_user(auth_token) -> bool: logger.info('Checking GitHub token') - if not GITHUB_USER_LIST: - return True if not auth_token: logger.warning('No GitHub token provided') return False - login, error = await get_github_user(auth_token) - if error: - logger.warning(f'Invalid GitHub token: {error}') - return False - if login not in GITHUB_USER_LIST: + login = await get_github_user(auth_token) + + if not user_verifier.is_user_allowed(login): logger.warning(f'GitHub user {login} not in allow list') return False @@ -41,7 +98,7 @@ async def authenticate_github_user(auth_token) -> bool: return True -async def get_github_user(token: str) -> tuple[str | None, str | None]: +async def get_github_user(token: str) -> str: """Get GitHub user info from token. Args: @@ -52,21 +109,17 @@ async def get_github_user(token: str) -> tuple[str | None, str | None]: If successful, error_message is None If failed, login is None and error_message contains the error """ + logger.info('Fetching GitHub user info from token') headers = { 'Accept': 'application/vnd.github+json', 'Authorization': f'Bearer {token}', 'X-GitHub-Api-Version': '2022-11-28', } - try: - async with httpx.AsyncClient() as client: - response = await client.get('https://api.github.com/user', headers=headers) - if response.status_code == 200: - user_data = response.json() - return user_data.get('login'), None - else: - return ( - None, - f'GitHub API error: {response.status_code} - {response.text}', - ) - except Exception as e: - return None, f'Error connecting to GitHub: {str(e)}' + async with httpx.AsyncClient() as client: + logger.debug('Making request to GitHub API') + response = await client.get('https://api.github.com/user', headers=headers) + response.raise_for_status() + user_data = response.json() + login = user_data.get('login') + logger.info(f'Successfully retrieved GitHub user: {login}') + return login From 8395334d8f481e9ce21e51d4fd0fba065a475585 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 20:55:03 -0400 Subject: [PATCH 11/14] remove sheets client --- openhands/server/sheets_client.py | 54 ------------------------------- 1 file changed, 54 deletions(-) delete mode 100644 openhands/server/sheets_client.py diff --git a/openhands/server/sheets_client.py b/openhands/server/sheets_client.py deleted file mode 100644 index 47e1ca5694cf..000000000000 --- a/openhands/server/sheets_client.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import List -from google.auth import default -from googleapiclient.discovery import build -from googleapiclient.errors import HttpError - -from openhands.core.logger import openhands_logger as logger - -class GoogleSheetsClient: - def __init__(self): - """Initialize Google Sheets client using workload identity. - Uses application default credentials which supports workload identity when running in GCP. - """ - logger.info('Initializing Google Sheets client with workload identity') - try: - credentials, project = default(scopes=['https://www.googleapis.com/auth/spreadsheets.readonly']) - logger.info(f'Successfully obtained credentials for project: {project}') - self.service = build('sheets', 'v4', credentials=credentials) - logger.info('Successfully initialized Google Sheets API service') - except Exception as e: - logger.error(f'Failed to initialize Google Sheets client: {str(e)}') - self.service = None - - def get_usernames(self, spreadsheet_id: str, range_name: str = 'A:A') -> List[str]: - """Get list of usernames from specified Google Sheet. - - Args: - spreadsheet_id: The ID of the Google Sheet - range_name: The A1 notation of the range to fetch - - Returns: - List of usernames from the sheet - """ - if not self.service: - logger.error('Google Sheets service not initialized') - return [] - - try: - logger.info(f'Fetching usernames from sheet {spreadsheet_id}, range {range_name}') - result = self.service.spreadsheets().values().get( - spreadsheetId=spreadsheet_id, - range=range_name - ).execute() - - values = result.get('values', []) - usernames = [str(cell[0]).strip() for cell in values if cell and cell[0].strip()] - logger.info(f'Successfully fetched {len(usernames)} usernames from Google Sheet') - return usernames - - except HttpError as err: - logger.error(f'Error accessing Google Sheet {spreadsheet_id}: {err}') - return [] - except Exception as e: - logger.error(f'Unexpected error accessing Google Sheet {spreadsheet_id}: {str(e)}') - return [] \ No newline at end of file From b6142173eb67971aa78f75e92b820d7f1476fb40 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 20:55:22 -0400 Subject: [PATCH 12/14] unrevert --- openhands/server/sheets_client.py | 68 +++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 openhands/server/sheets_client.py diff --git a/openhands/server/sheets_client.py b/openhands/server/sheets_client.py new file mode 100644 index 000000000000..c2db1a343477 --- /dev/null +++ b/openhands/server/sheets_client.py @@ -0,0 +1,68 @@ +from typing import List + +from google.auth import default +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +from openhands.core.logger import openhands_logger as logger + + +class GoogleSheetsClient: + def __init__(self): + """Initialize Google Sheets client using workload identity. + Uses application default credentials which supports workload identity when running in GCP. + """ + logger.info('Initializing Google Sheets client with workload identity') + try: + credentials, project = default( + scopes=['https://www.googleapis.com/auth/spreadsheets.readonly'] + ) + logger.info(f'Successfully obtained credentials for project: {project}') + self.service = build('sheets', 'v4', credentials=credentials) + logger.info('Successfully initialized Google Sheets API service') + except Exception as e: + logger.error(f'Failed to initialize Google Sheets client: {str(e)}') + self.service = None + + def get_usernames(self, spreadsheet_id: str, range_name: str = 'A:A') -> List[str]: + """Get list of usernames from specified Google Sheet. + + Args: + spreadsheet_id: The ID of the Google Sheet + range_name: The A1 notation of the range to fetch + + Returns: + List of usernames from the sheet + """ + if not self.service: + logger.error('Google Sheets service not initialized') + return [] + + try: + logger.info( + f'Fetching usernames from sheet {spreadsheet_id}, range {range_name}' + ) + result = ( + self.service.spreadsheets() + .values() + .get(spreadsheetId=spreadsheet_id, range=range_name) + .execute() + ) + + values = result.get('values', []) + usernames = [ + str(cell[0]).strip() for cell in values if cell and cell[0].strip() + ] + logger.info( + f'Successfully fetched {len(usernames)} usernames from Google Sheet' + ) + return usernames + + except HttpError as err: + logger.error(f'Error accessing Google Sheet {spreadsheet_id}: {err}') + return [] + except Exception as e: + logger.error( + f'Unexpected error accessing Google Sheet {spreadsheet_id}: {str(e)}' + ) + return [] From dd89ce7d306dfbacaf2df9d77fffd6a451da0001 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Fri, 1 Nov 2024 17:10:39 -0400 Subject: [PATCH 13/14] Update typing --- openhands/server/github.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/openhands/server/github.py b/openhands/server/github.py index 03a835f7df9f..26d06cd3c8a1 100644 --- a/openhands/server/github.py +++ b/openhands/server/github.py @@ -1,6 +1,4 @@ import os -from typing import List, Optional - import httpx from openhands.core.logger import openhands_logger as logger @@ -11,17 +9,17 @@ class UserVerifier: - def __init__(self): + def __init__(self) -> None: logger.info('Initializing UserVerifier') - self.file_users: Optional[List[str]] = None - self.sheets_client: Optional[GoogleSheetsClient] = None - self.spreadsheet_id: Optional[str] = None + self.file_users: list[str] | None = None + self.sheets_client: GoogleSheetsClient | None = None + self.spreadsheet_id: str | None = None # Initialize from environment variables self._init_file_users() self._init_sheets_client() - def _init_file_users(self): + def _init_file_users(self) -> None: """Load users from text file if configured""" waitlist = os.getenv('GITHUB_USER_LIST_FILE') if not waitlist: @@ -41,7 +39,7 @@ def _init_file_users(self): except Exception as e: logger.error(f'Error reading user list file {waitlist}: {str(e)}') - def _init_sheets_client(self): + def _init_sheets_client(self) -> None: """Initialize Google Sheets client if configured""" sheet_id = os.getenv('GITHUB_USERS_SHEET_ID') From b86cffaaba422f532382e823e0d55f986d886a2b Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Fri, 1 Nov 2024 17:14:04 -0400 Subject: [PATCH 14/14] Lint --- openhands/server/github.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openhands/server/github.py b/openhands/server/github.py index 26d06cd3c8a1..14ef93cfeb97 100644 --- a/openhands/server/github.py +++ b/openhands/server/github.py @@ -1,4 +1,5 @@ import os + import httpx from openhands.core.logger import openhands_logger as logger