diff --git a/.pylintrc b/.pylintrc
index 9191f08..1bdae4a 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -4,11 +4,11 @@ init-hook='import sys; sys.path.append("/usr/local/lib/python3.10/site-packages"
[MESSAGES CONTROL]
-disable=duplicate-code
+disable=duplicate-code, f-string-without-interpolation
[DESIGN]
-max-locals=19
+max-locals=35
max-attributes=12
[FORMAT]
diff --git a/README.md b/README.md
index 68c9d9f..1c5836d 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,21 @@ Select launch instance from template
Select `LowEndOrchestrator` and use the default template.
![OrchTemplaceSelect](docs/images/CDOrchTemplateSelect.png)
+## Configuring OAuth
+Authentication and Access control is managed through an OAuth to GitHub. Starting the system for the first time requires a file named `env` in current working directory. An example `env.development` is provided that you may copy, and update to match the `secret`, `client_id`, and `callback_url` of your OAuth app.
+
+If the `env` file is not present, the application will not start, and it will emit the error `Can't find file env in current directory, not able to parse env properties, exiting.` If no `env` file is present in the working directory, when you deploy a new orchestration instances in AWS, the AWS User Data script will create one, in the home directory, using the contents of `env.defaults`. The default configuration is not correct, and OAuth will fail. Using the default configuration will allow the application to start, and respond to healthchecks. Please make sure to review the `env` file if you have any issues with authentication.
+
+### Access Control
+To gain access to the application, a user must have membership in specific GitHub teams. The org and teams checked for membership are found in the `env` file. You may use multiple teams for access control by providing a comma separated list in the `env` file. Access to the application is checked on every HTTP request, and the application makes HTTP calls to GitHub to ensure the user has sufficient privileges to perform the requested action. There are two methods of access control:
+- Using a web browser via OAuth: Click on the person icon in the top right corner to login. You will be redirected to GitHub to authenticate.
+- Using HTTP command line: Pass the header `Authorization` with your valid GitHub token. The GitHub token must have `read:org` scope for the organization specified in the `env` file.
+
+Example of command line access
+```
+curl -H 'Accept: application/json' -H 'Authorization: gho_bBB1bB1BBbbBbb1BBbBbBB1bbbb1BbbBB' http://127.0.0.1:4000/status
+```
+
## Updating Orchestrator Job Configuration
By default the setup will spin up a webservice with [Production Run from Jan 2024](meta-data/full-production-run-20240101.json). To change the job configuration you need to create your own JSON configuration, and restart the service to use the new JSON. **Note** need to use `nohup` on python webservice to keep the process running after ssh-shell exit.
- Create your own JSON following the example formate from `test-simple-jobs.json`
@@ -31,7 +46,7 @@ By default the setup will spin up a webservice with [Production Run from Jan 202
## Replay Setup
You can spin up as many replay nodes as you need. Replay nodes will continuously pick and process new jobs. Each replay host works on one job at a time before picking up the next job. Therefore a small number of replay hosts will process all the jobs given enough time. For example, if there are 100 replay slices configured at most 100 replay hosts, and as few as 1 replay host, may be utilized.
-Before running the script for the first time you must populate the correct subnet, security group, and region information into a file on the orchestration node. You will find that file `~/replay-test/scripts/replayhost/env`. Not setting the correct values will prevent the script from starting instances.
+Before running the script for the first time you must populate the correct subnet, security group, and region information into a file on the orchestration node. You will find that file `~/replay-test/scripts/replayhost/env`. Not setting the correct values will prevent the script from starting instances.
To run the replay nodes ssh into the orchestrator node and run [run-replay-instance.sh](scripts/replayhost/run-replay-instance.sh). The script takes two arguments the first is the number of replay hosts to spin up. The second argument indicates this is a dry run, and don't start up the hosts.
```
diff --git a/config/nginx-replay-test.conf b/config/nginx-replay-test.conf
index e6c9473..b597c3d 100644
--- a/config/nginx-replay-test.conf
+++ b/config/nginx-replay-test.conf
@@ -11,13 +11,19 @@ server {
#
root /var/www/html;
- index progress.html;
+ index progress;
server_name _;
# pass these URLs to app
- location ~ ^/(status|config|job|summary|healthcheck|replayhost|metrics|jobtimeoutcheck) {
+ location ~ ^/(oauthback|progress|grid|control|detail|status|config|job|summary|healthcheck|replayhost|metrics|jobtimeoutcheck|logout) {
proxy_buffering off;
proxy_pass http://127.0.0.1:4000;
+ proxy_set_header Host $host;
+ proxy_set_header X-Forwarded-For $remote_addr;
+ }
+
+ location = / {
+ return 301 /progress;
}
# everything else serve static content
diff --git a/docs/http-service-calls.md b/docs/http-service-calls.md
index 49bf9ee..1e775f9 100644
--- a/docs/http-service-calls.md
+++ b/docs/http-service-calls.md
@@ -5,6 +5,11 @@
- status - gets a replay nodes progress and state
- config - get/sets the configuration data used to initialize the job
- summary - progress of current run and reports any failed jobs
+- oauthback - login callback from OAuth provider
+- logout
+- process - Dynamic HTML for summary page
+- grid - Dynamic HTML with grid of jobs
+- control - Dynamic HTML with controls to operate replays
- healthcheck - gets 200/0K always
## Job
@@ -53,7 +58,7 @@ When running replay tests we don't always known the expected integrity hash. For
## Summary (Progress)
### GET
-Returns the following
+`/summary` Returns the following
- number of blocks processed
- total number of blocks to process
- jobs completed
@@ -66,9 +71,15 @@ Content Type Support.
- If the Accepts header is text-html returns html
- If Accepts header is application/json returns json
+## Authentication
+
+There are two request, `/oauthback` and `/logout`.
+- `/oauthback` is the call back from the OAuth provider, and it is used to set the authentication cookie. This call performs separate web calls to make sure the user has the correct privileges and may be allowed access.
+- `/logout` clears the cookie preventing access to the application.
+
## Healthcheck
-Always returns same value used for healthchecks
+`/healthcheck` Always returns same value used for healthchecks
### GET
Only get request is supported. Always returns body of `OK` with status `200`
diff --git a/docs/operating-details.md b/docs/operating-details.md
index 4a23c38..392c135 100644
--- a/docs/operating-details.md
+++ b/docs/operating-details.md
@@ -17,7 +17,6 @@ All scripts are under the `ubunutu` user. The `replay-test` github repository is
- /home/ubuntu/scripts/process_orchestration_log.py : parses log to produce stats on timing
## Replay hosts
-
All scripts are under the `enf-replay` user. The `replay-test` github repository is cloned into this directory.
### `Top Level Items`
@@ -33,7 +32,7 @@ All scripts are under the `enf-replay` user. The `replay-test` github repository
- nodoes.log : log from syncing runing
- nodeos-readonly.log : log from readonly spinup of nodoes
- ### `Additional Items`
+### `Additional Items`
- /home/enf-replay/replay-test/replay-client/background_status_update.sh : background job that send progress updates to orchestration service
- /home/enf-replay/replay-test/replay-client/config_operations.py : python script to HTTP POST integrity hash updates
- /home/enf-replay/replay-test/replay-client/create-nodeos-dir-struct.sh : init dir structure
diff --git a/env.development b/env.development
new file mode 100644
index 0000000..8a22f4a
--- /dev/null
+++ b/env.development
@@ -0,0 +1,8 @@
+client_id=11111111111111111111
+scope=read:org
+client_secret=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+authorize_url=https://github.com/login/oauth/authorize
+registered_callback=https://example.com/oauthback
+access_token=https://github.com/login/oauth/access_token
+user_info_url=https://api.github.com/user
+team=ORG/TEAM_1, ORG/TEAM_2
diff --git a/orchestration-service/env_store.py b/orchestration-service/env_store.py
new file mode 100644
index 0000000..1782e44
--- /dev/null
+++ b/orchestration-service/env_store.py
@@ -0,0 +1,48 @@
+"""Module writes and puts env vars"""
+import sys
+import os
+
+class EnvStore():
+ """Class to manage env vars"""
+
+ def __init__(self, file):
+ """initialize from file"""
+ self.env_name_values = {}
+ if os.path.exists(file):
+ with open('env', 'r', encoding='utf-8') as properties_file:
+ # Read and parse each line into a list of tuples (name, value)
+ for line in properties_file:
+ line = line.strip()
+ # Skip empty lines
+ if not line:
+ continue
+ # Assuming the line format is "name=value" or "name:value"
+ if '=' in line:
+ name, value = line.split('=', 1)
+ elif ':' in line:
+ name, value = line.split(':', 1)
+ else:
+ print(f"Line format in env file not recognized: {line}")
+ continue
+ self.env_name_values[name.strip()] = value.strip()
+ else:
+ sys.exit(f"Can't find file {file} in current directory, not able to parse env properties, exiting.")
+
+ def get(self, key):
+ """get values"""
+ return self.env_name_values[key]
+
+ def has(self,key):
+ """false if key not set or no value; otherwise true"""
+ if key not in self.env_name_values or not self.env_name_values[key]:
+ return False
+ return True
+
+ def set(self, key, value):
+ """set values"""
+ self.env_name_values[key] = value
+
+ def set_default(self, key, default_value):
+ """if key does not exist nor has previous value set"""
+ if key not in self.env_name_values or not self.env_name_values[key]:
+ self.env_name_values[key] = default_value
diff --git a/orchestration-service/github_oauth.py b/orchestration-service/github_oauth.py
new file mode 100644
index 0000000..c95a757
--- /dev/null
+++ b/orchestration-service/github_oauth.py
@@ -0,0 +1,126 @@
+"""modules to support oauth functions"""
+import json
+import requests
+
+class GitHubOauth():
+ """helper functions to manage git hub oauth"""
+
+ @staticmethod
+ def assemble_oauth_url(state, properties):
+ """assemble url for initial oauth request"""
+ return properties.get('authorize_url') \
+ + f"?client_id={properties.get('client_id')}" \
+ + f"&redirect_uri={properties.get('registered_callback')}" \
+ + f"&scope={properties.get('scope')}" \
+ + f"&state={state}" \
+ + f"&allow_signup=false"
+
+ @staticmethod
+ def get_oauth_access_token(code, properties):
+ """build url for the get token request"""
+ # construct http call to exchange tempory code for access token
+ params = {
+ 'client_id': properties.get('client_id'),
+ 'client_secret': properties.get('client_secret'),
+ 'code': code,
+ 'redirect_uri': properties.get('registered_callback')
+ }
+ # make post call to do exchange
+ exchange_response = requests.post(properties.get('access_token'),
+ params=params,
+ timeout=3,
+ headers={
+ 'Accept': 'application/json',
+ 'Content-Type': 'application/json'
+ })
+ # if good get the token otherwise fail
+ # returns following params access_token, scope, token_type
+ if exchange_response.status_code == 200:
+ exchange_data = json.loads(exchange_response.content.decode('utf-8'))
+ return exchange_data['access_token']
+ return None
+
+ @staticmethod
+ def create_auth_string(bearer_token, user_info_url):
+ """get public profile information using token"""
+ # https request to get public profile data, login and avatar_url
+ user_avatar_response = requests.get(user_info_url,
+ timeout=3,
+ headers={
+ 'Accept': 'application/vnd.github+json',
+ 'Authorization': f'Bearer {bearer_token}',
+ 'X-GitHub-Api-Version': '2022-11-28'
+ })
+ if user_avatar_response.status_code == 200:
+ user_data = json.loads(user_avatar_response.content.decode('utf-8'))
+ return GitHubOauth.credentials_to_str(user_data['login'],user_data['avatar_url'],bearer_token)
+ return None
+
+ @staticmethod
+ def check_membership(bearer_token, login, team_string):
+ """Check for team membership"""
+ if not login:
+ return False
+ # many contain many teams
+ for unit in team_string.split(','):
+ org, team = unit.split('/',1)
+ org = org.strip()
+ team = team.strip()
+ url = f'https://api.github.com/orgs/{org}/teams/{team}/members'
+ membership_check = requests.get(url,
+ timeout=3,
+ headers={
+ 'Accept': 'application/vnd.github+json',
+ 'Authorization': f'Bearer {bearer_token}',
+ 'X-GitHub-Api-Version': '2022-11-28',
+ 'User-Agent': 'App/OAuth/ReplayTest'
+ })
+ if membership_check.status_code == 200:
+ members_list = json.loads(membership_check.content.decode('utf-8'))
+ for member in members_list:
+ if member['login'] == login:
+ return True
+ return False
+
+ @staticmethod
+ def is_authorized(cookies, header_token, user_info_url, team_string):
+ """check for authorized token or cookie"""
+
+ token = None
+ if 'replay_auth' in cookies and cookies['replay_auth']:
+ token = GitHubOauth.extract_token(cookies['replay_auth'])
+ elif header_token:
+ token = header_token.replace("Bearer ","")
+ if not token:
+ return False
+
+ auth_string = GitHubOauth.create_auth_string(token, user_info_url)
+ login = GitHubOauth.extract_login(auth_string)
+ return GitHubOauth.check_membership(token, login, team_string)
+
+ @staticmethod
+ def credentials_to_str(login, avatar_url, token):
+ """converts profile data to string sep by :"""
+ return token + ":" + login + ":" + avatar_url
+
+ @staticmethod
+ def str_to_public_profile(data):
+ """converts str to array of profile data"""
+ if not data:
+ return []
+ # return public profile data leaving off bearer token
+ return data.split(':', 2)[1:]
+
+ @staticmethod
+ def extract_token(data):
+ """grabs the bearer token from string"""
+ if not data:
+ return []
+ return data.split(':', 2)[0]
+
+ @staticmethod
+ def extract_login(data):
+ """grabs the bearer token from string"""
+ if not data:
+ return []
+ return data.split(':', 2)[1]
diff --git a/orchestration-service/html_page.py b/orchestration-service/html_page.py
new file mode 100644
index 0000000..2fd2360
--- /dev/null
+++ b/orchestration-service/html_page.py
@@ -0,0 +1,49 @@
+"""modules for assembling HTML pages from files"""
+
+class HtmlPage:
+ """class for assembling HTML pages from files"""
+ def __init__(self, html_dir):
+ self.html_dir = html_dir
+ if not self.html_dir.endswith('/'):
+ self.html_dir = self.html_dir + '/'
+
+ def contents(self, file_name="progress.html"):
+ """Return contents of html files"""
+
+ if file_name == '/progress':
+ file_name = 'progress.html'
+ elif file_name == '/grid':
+ file_name = 'grid.html'
+ elif file_name == '/control':
+ file_name = 'control.html'
+ elif file_name == '/detail':
+ file_name = 'detail.html'
+
+ file_path = self.html_dir + file_name
+ with open(file_path, 'r', encoding='utf-8') as file:
+ # Read the file's contents into a string
+ file_contents = file.read()
+ return file_contents
+
+ def profile_top_bar_html(self, login, avatar_url):
+ """return top bar with profile"""
+ return f'''