-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding endpoint to get job output (#18)
* adding endpoint to get job output * support to flux-restful-cli in Python for the same endpoints * ensure we add test for log to authenticated set * submit jobs form in UI * finishing up early work for jobs table and info pages * finishing up tweaks to add to python client Signed-off-by: vsoch <[email protected]>
- Loading branch information
Showing
45 changed files
with
1,593 additions
and
203 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from typing import List, Optional | ||
|
||
from fastapi import Request | ||
|
||
import app.library.flux as flux_cli | ||
|
||
|
||
class SubmitForm: | ||
def __init__(self, request: Request): | ||
self.request: Request = request | ||
self.errors: List = [] | ||
self.command: str | ||
self.workdir: Optional[str] = None | ||
self.num_tasks: Optional[int] = None | ||
self.num_nodes: Optional[int] = None | ||
self.runtime: Optional[int] = None | ||
self.cores_per_task: Optional[int] = None | ||
self.gpus_per_task: Optional[int] = None | ||
self.exclusive: Optional[bool] = False | ||
|
||
# STOPPED HERE - serialize in jquery from form, submit as application/json. | ||
async def load_data(self): | ||
form = await self.request.form() | ||
self.command = form.get("command") | ||
self.workdir = form.get("workdir") or None | ||
self.num_tasks = form.get("num_tasks") or 1 | ||
self.num_nodes = form.get("num_nodes") or 1 | ||
self.runtime = form.get("runtime") or 0 | ||
self.cores_per_task = form.get("cores_per_task") or None | ||
self.gpus_per_task = form.get("gpus_per_task") or None | ||
self.exclusive = form.get("exclusive") or False | ||
|
||
@property | ||
def kwargs(self): | ||
""" | ||
Prepared key value dictionary of items. | ||
""" | ||
kwargs = {} | ||
for key in [ | ||
"command", | ||
"num_tasks", | ||
"num_nodes", | ||
"cores_per_task", | ||
"gpys_per_task", | ||
"exclusive", | ||
]: | ||
if getattr(self, key, None) is not None: | ||
kwargs[key] = getattr(self, key) | ||
return kwargs | ||
|
||
def is_valid(self): | ||
""" | ||
Determine if the form is valid (devoid of errors) | ||
""" | ||
self.errors = flux_cli.validate_submit_kwargs(self.kwargs, runtime=self.runtime) | ||
if not self.errors: | ||
return True | ||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
import json | ||
import os | ||
import re | ||
import shlex | ||
|
||
import flux | ||
import flux.job | ||
|
||
from app.core.config import settings | ||
|
||
|
||
def validate_submit_kwargs(kwargs, envars=None, runtime=None): | ||
""" | ||
Shared function to validate submit, from API or web UI. | ||
Kwargs are expected to be given to JobspecV1, and | ||
everything else is added to the fluxjob. | ||
""" | ||
errors = [] | ||
if "command" not in kwargs or not kwargs["command"]: | ||
errors.append("'command' is required.") | ||
|
||
# We can't ask for more nodes than available! | ||
num_nodes = kwargs.get("num_nodes") | ||
if num_nodes and num_nodes > settings.flux_nodes: | ||
errors.append( | ||
f"The server only has {settings.flux_nodes} nodes, you requested {num_nodes}" | ||
) | ||
|
||
# If the user asks for gpus and we don't have any, no go | ||
if "gpus_per_task" in kwargs and not settings.has_gpus: | ||
errors.append("This server does not support gpus: gpus_per_task cannot be set.") | ||
|
||
# Minimum value of zero | ||
if runtime and runtime < 0: | ||
errors.append(f"Runtime must be >= 0, found {runtime}") | ||
|
||
# Minimum values of 1 | ||
for key in ["cpus_per_task", "gpus_per_task"]: | ||
if key in kwargs and kwargs[key] < 1: | ||
errors.append(f"Parameter {key} must be >= 1") | ||
|
||
if envars and not isinstance(envars, dict): | ||
errors.append("Environment variables must be key/value pairs (dict)") | ||
return errors | ||
|
||
|
||
def prepare_job(kwargs, runtime=0, workdir=None, envars=None): | ||
""" | ||
After validation, prepare the job (shared function). | ||
""" | ||
envars = envars or {} | ||
|
||
# Generate the flux job | ||
command = kwargs["command"] | ||
if isinstance(command, str): | ||
command = shlex.split(command) | ||
|
||
# Delete command from the kwargs (we added because is required and validated that way) | ||
del kwargs["command"] | ||
fluxjob = flux.job.JobspecV1.from_command(command, **kwargs) | ||
|
||
if workdir is not None: | ||
fluxjob.workdir = workdir | ||
|
||
# A duration of zero (the default) means unlimited | ||
fluxjob.duration = runtime | ||
|
||
# Additional envars in the payload? | ||
environment = dict(os.environ) | ||
environment.update(envars) | ||
fluxjob.environment = environment | ||
return fluxjob | ||
|
||
|
||
def query_job(jobinfo, query): | ||
""" | ||
This would be better suited for a database, but should work for small numbers. | ||
""" | ||
searchstr = "".join([str(x) for x in list(jobinfo.values())]) | ||
return re.search(query, searchstr) | ||
|
||
|
||
def query_jobs(contenders, query): | ||
""" | ||
Wrapper to query more than one job. | ||
""" | ||
jobs = [] | ||
for contender in contenders: | ||
if not query_job(contender, query): | ||
continue | ||
jobs.append(contender) | ||
return jobs | ||
|
||
|
||
def get_job_output(jobid): | ||
""" | ||
Given a jobid, get the output. | ||
""" | ||
lines = [] | ||
from app.main import app | ||
|
||
# If the submit is too close to the log reqest, it cannot find the file handle | ||
# It could be also the jobid cannot be found. | ||
try: | ||
for line in flux.job.event_watch(app.handle, jobid, "guest.output"): | ||
if "data" in line.context: | ||
lines.append(line.context["data"]) | ||
except Exception: | ||
pass | ||
return lines | ||
|
||
|
||
def list_jobs_detailed(limit=None, query=None): | ||
""" | ||
Get a detailed listing of jobs. | ||
""" | ||
listing = list_jobs() | ||
ids = listing.get()["jobs"] | ||
jobs = {} | ||
for job in ids: | ||
|
||
# Stop if a limit is defined and we have hit it! | ||
if limit is not None and len(jobs) >= limit: | ||
break | ||
|
||
try: | ||
jobinfo = get_job(job["id"]) | ||
|
||
# Best effort hack to do a query | ||
if query and not query_job(jobinfo, query): | ||
continue | ||
jobs[job["id"]] = jobinfo | ||
except Exception: | ||
pass | ||
return jobs | ||
|
||
|
||
def list_jobs(): | ||
""" | ||
Get a simple listing of jobs (just the ids) | ||
""" | ||
from app.main import app | ||
|
||
return flux.job.job_list(app.handle) | ||
|
||
|
||
def get_simple_job(jobid): | ||
""" | ||
Not used - an original (simpler) implementation. | ||
""" | ||
from app.main import app | ||
|
||
info = flux.job.job_list_id(app.handle, jobid, attrs=["all"]) | ||
return json.loads(info.get_str())["job"] | ||
|
||
|
||
def get_job(jobid): | ||
""" | ||
Get details for a job | ||
""" | ||
from app.main import app | ||
|
||
payload = {"id": int(jobid), "attrs": ["all"]} | ||
rpc = flux.job.list.JobListIdRPC(app.handle, "job-list.list-id", payload) | ||
try: | ||
jobinfo = rpc.get() | ||
|
||
# The job does not exist! | ||
except FileNotFoundError: | ||
return None | ||
|
||
jobinfo = jobinfo["job"] | ||
|
||
# User friendly string from integer | ||
state = jobinfo["state"] | ||
jobinfo["state"] = flux.job.info.statetostr(state) | ||
|
||
# Get job info to add to result | ||
info = rpc.get_jobinfo() | ||
jobinfo["nnodes"] = info._nnodes | ||
jobinfo["result"] = info.result | ||
jobinfo["returncode"] = info.returncode | ||
jobinfo["runtime"] = info.runtime | ||
jobinfo["priority"] = info._priority | ||
jobinfo["waitstatus"] = info._waitstatus | ||
jobinfo["nodelist"] = info._nodelist | ||
jobinfo["nodelist"] = info._nodelist | ||
jobinfo["exception"] = info._exception.__dict__ | ||
return jobinfo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.