From abedadfcdd3fe35b0cf2f959026c29d1f04bd378 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Thu, 5 Sep 2024 15:07:09 -0700 Subject: [PATCH 01/43] initialize dask cluster --- jupyter_scheduler/extension.py | 22 ++++++++++++++++++++++ jupyter_scheduler/scheduler.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 1a4ba3736..f83f5a3d7 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -91,3 +91,25 @@ def initialize_settings(self): if scheduler.task_runner: loop = asyncio.get_event_loop() loop.create_task(scheduler.task_runner.start()) + + async def stop_extension(self): + """ + Public method called by Jupyter Server when the server is stopping. + This calls the cleanup code defined in `self._stop_exception()` inside + an exception handler, as the server halts if this method raises an + exception. + """ + try: + await self._stop_extension() + except Exception as e: + self.log.error("Jupyter Scheduler raised an exception while stopping:") + self.log.exception(e) + + async def _stop_extension(self): + """ + Private method that defines the cleanup code to run when the server is + stopping. + """ + if "scheduler" in self.settings: + scheduler: SchedulerApp = self.settings["scheduler"] + await scheduler.stop_extension() diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index 867034c60..b32769eb1 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -6,6 +6,8 @@ import fsspec import psutil +from dask.distributed import Client as DaskClient +from distributed import LocalCluster from jupyter_core.paths import jupyter_data_dir from jupyter_server.transutils import _i18n from jupyter_server.utils import to_os_path @@ -381,6 +383,12 @@ def get_local_output_path( else: return os.path.join(self.root_dir, self.output_directory, output_dir_name) + async def stop_extension(self): + """ + Placeholder method for a cleanup code to run when the server is stopping. + """ + pass + class Scheduler(BaseScheduler): _db_session = None @@ -395,6 +403,12 @@ class Scheduler(BaseScheduler): ), ) + dask_cluster_url = Unicode( + allow_none=True, + config=True, + help="URL of the Dask cluster to connect to.", + ) + db_url = Unicode(help=_i18n("Scheduler database url")) task_runner = Instance(allow_none=True, klass="jupyter_scheduler.task_runner.BaseTaskRunner") @@ -414,6 +428,15 @@ def __init__( if self.task_runner_class: self.task_runner = self.task_runner_class(scheduler=self, config=config) + self.dask_client: DaskClient = self._get_dask_client() + + def _get_dask_client(self): + """Creates and configures a Dask client.""" + if self.dask_cluster_url: + return DaskClient(self.dask_cluster_url) + cluster = LocalCluster(processes=True) + return DaskClient(cluster) + @property def db_session(self): if not self._db_session: @@ -777,6 +800,13 @@ def get_staging_paths(self, model: Union[DescribeJob, DescribeJobDefinition]) -> return staging_paths + async def stop_extension(self): + """ + Cleanup code to run when the server is stopping. + """ + if self.dask_client: + await self.dask_client.close() + class ArchivingScheduler(Scheduler): """Scheduler that captures all files in output directory in an archive.""" From 6c9de648b8c00fbd0ff5c547237120429ee4c525 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 9 Sep 2024 09:56:43 -0700 Subject: [PATCH 02/43] add workflow handler and endpoint --- jupyter_scheduler/executors.py | 93 ++++++++++++++++++++++++++++++++-- jupyter_scheduler/extension.py | 2 + jupyter_scheduler/orm.py | 10 ++++ jupyter_scheduler/scheduler.py | 28 +++++++++- jupyter_scheduler/workflows.py | 57 +++++++++++++++++++++ 5 files changed, 186 insertions(+), 4 deletions(-) create mode 100644 jupyter_scheduler/workflows.py diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index 7e1a9974e..e6a290cdf 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -4,17 +4,22 @@ import tarfile import traceback from abc import ABC, abstractmethod -from typing import Dict +from functools import lru_cache +from typing import Dict, List import fsspec import nbconvert import nbformat from nbconvert.preprocessors import CellExecutionError, ExecutePreprocessor +from prefect import flow, task +from prefect.futures import as_completed +from prefect_dask.task_runners import DaskTaskRunner from jupyter_scheduler.models import DescribeJob, JobFeature, Status -from jupyter_scheduler.orm import Job, create_session +from jupyter_scheduler.orm import Job, Workflow, create_session from jupyter_scheduler.parameterize import add_parameters from jupyter_scheduler.utils import get_utc_timestamp +from jupyter_scheduler.workflows import DescribeWorkflow class ExecutionManager(ABC): @@ -29,14 +34,29 @@ class ExecutionManager(ABC): _model = None _db_session = None - def __init__(self, job_id: str, root_dir: str, db_url: str, staging_paths: Dict[str, str]): + def __init__( + self, + job_id: str, + workflow_id: str, + root_dir: str, + db_url: str, + staging_paths: Dict[str, str], + ): self.job_id = job_id + self.workflow_id = workflow_id self.staging_paths = staging_paths self.root_dir = root_dir self.db_url = db_url @property def model(self): + if self.workflow_id: + with self.db_session() as session: + workflow = ( + session.query(Workflow).filter(Workflow.workflow_id == self.workflow_id).first() + ) + self._model = DescribeWorkflow.from_orm(workflow) + return self._model if self._model is None: with self.db_session() as session: job = session.query(Job).filter(Job.job_id == self.job_id).first() @@ -65,6 +85,18 @@ def process(self): else: self.on_complete() + def process_workflow(self): + + self.before_start_workflow() + try: + self.execute_workflow() + except CellExecutionError as e: + self.on_failure_workflow(e) + except Exception as e: + self.on_failure_workflow(e) + else: + self.on_complete_workflow() + @abstractmethod def execute(self): """Performs notebook execution, @@ -74,6 +106,11 @@ def execute(self): """ pass + @abstractmethod + def execute_workflow(self): + """Performs workflow execution""" + pass + @classmethod @abstractmethod def supported_features(cls) -> Dict[JobFeature, bool]: @@ -98,6 +135,15 @@ def before_start(self): ) session.commit() + def before_start_workflow(self): + """Called before start of execute""" + workflow = self.model + with self.db_session() as session: + session.query(Workflow).filter(Workflow.workflow_id == workflow.workflow_id).update( + {"status": Status.IN_PROGRESS} + ) + session.commit() + def on_failure(self, e: Exception): """Called after failure of execute""" job = self.model @@ -109,6 +155,17 @@ def on_failure(self, e: Exception): traceback.print_exc() + def on_failure_workflow(self, e: Exception): + """Called after failure of execute""" + workflow = self.model + with self.db_session() as session: + session.query(Workflow).filter(Workflow.workflow_id == workflow.workflow_id).update( + {"status": Status.FAILED, "status_message": str(e)} + ) + session.commit() + + traceback.print_exc() + def on_complete(self): """Called after job is completed""" job = self.model @@ -118,10 +175,40 @@ def on_complete(self): ) session.commit() + def on_complete_workflow(self): + workflow = self.model + with self.db_session() as session: + session.query(Workflow).filter(Workflow.workflow_id == workflow.workflow_id).update( + {"status": Status.COMPLETED} + ) + session.commit() + class DefaultExecutionManager(ExecutionManager): """Default execution manager that executes notebooks""" + @task(task_run_name="{task_id}") + def execute_task(task_id: str): + print(f"Task {task_id} executed") + return task_id + + @flow(task_runner=DaskTaskRunner()) + def execute_workflow(self): + workflow: DescribeWorkflow = self.model + tasks = {task["id"]: task for task in workflow.tasks} + + # create Prefect tasks, use caching to ensure Prefect tasks are created before wait_for is called on them + @lru_cache(maxsize=None) + def make_task(task_id, execute_task): + deps = tasks[task_id]["dependsOn"] + return execute_task.submit( + task_id, wait_for=[make_task(dep_id, execute_task) for dep_id in deps] + ) + + final_tasks = [make_task(task_id, self.execute_task) for task_id in tasks] + for future in as_completed(final_tasks): + print(future.result()) + def execute(self): job = self.model diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index f83f5a3d7..49a5d9c0d 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -6,6 +6,7 @@ from traitlets import Bool, Type, Unicode, default from jupyter_scheduler.orm import create_tables +from jupyter_scheduler.workflows import WorkflowHandler from .handlers import ( BatchJobHandler, @@ -35,6 +36,7 @@ class SchedulerApp(ExtensionApp): (r"scheduler/job_definitions/%s/jobs" % JOB_DEFINITION_ID_REGEX, JobFromDefinitionHandler), (r"scheduler/runtime_environments", RuntimeEnvironmentsHandler), (r"scheduler/config", ConfigHandler), + (r"scheduler/worklows", WorkflowHandler), ] drop_tables = Bool(False, config=True, help="Drop the database tables before starting.") diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index dbbbfad8e..9347ac201 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -107,6 +107,16 @@ class Job(CommonColumns, Base): # Any default values specified for new columns will be ignored during the migration process. +class Workflow(Base): + __tablename__ = "workflows" + __table_args__ = {"extend_existing": True} + workflow_id = Column(String(36), primary_key=True, default=generate_uuid) + tasks = Column(JsonType(1024)) + status = Column(String(64), default=Status.STOPPED) + # All new columns added to this table must be nullable to ensure compatibility during database migrations. + # Any default values specified for new columns will be ignored during the migration process. + + class JobDefinition(CommonColumns, Base): __tablename__ = "job_definitions" __table_args__ = {"extend_existing": True} diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index b32769eb1..ee56c9bc5 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -40,12 +40,13 @@ UpdateJob, UpdateJobDefinition, ) -from jupyter_scheduler.orm import Job, JobDefinition, create_session +from jupyter_scheduler.orm import Job, JobDefinition, Workflow, create_session from jupyter_scheduler.utils import ( copy_directory, create_output_directory, create_output_filename, ) +from jupyter_scheduler.workflows import CreateWorkflow class BaseScheduler(LoggingConfigurable): @@ -111,6 +112,10 @@ def create_job(self, model: CreateJob) -> str: """ raise NotImplementedError("must be implemented by subclass") + def create_workflow(self, model: CreateWorkflow) -> str: + """Creates a new workflow record, may trigger execution of the workflow.""" + raise NotImplementedError("must be implemented by subclass") + def update_job(self, job_id: str, model: UpdateJob): """Updates job metadata in the persistence store, for example name, status etc. In case of status @@ -526,6 +531,27 @@ def create_job(self, model: CreateJob) -> str: return job_id + def create_workflow(self, model: CreateWorkflow) -> str: + + with self.db_session() as session: + + workflow = Workflow(**model.dict(exclude_none=True)) + + session.add(workflow) + session.commit() + + execution_manager = self.execution_manager_class( + workflow_id=workflow.workflow_id, + root_dir=self.root_dir, + db_url=self.db_url, + ) + execution_manager.process_workflow() + session.commit() + + workflow_id = workflow.workflow_id + + return workflow_id + def update_job(self, job_id: str, model: UpdateJob): with self.db_session() as session: session.query(Job).filter(Job.job_id == job_id).update(model.dict(exclude_none=True)) diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py new file mode 100644 index 000000000..57926f094 --- /dev/null +++ b/jupyter_scheduler/workflows.py @@ -0,0 +1,57 @@ +import json +from typing import List + +from jupyter_server.utils import ensure_async +from tornado.web import HTTPError, authenticated + +from jupyter_scheduler.exceptions import ( + IdempotencyTokenError, + InputUriError, + SchedulerError, +) +from jupyter_scheduler.handlers import ( + APIHandler, + ExtensionHandlerMixin, + JobHandlersMixin, +) +from jupyter_scheduler.models import Status +from jupyter_scheduler.pydantic_v1 import BaseModel, ValidationError + + +class WorkflowHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): + @authenticated + async def post(self): + payload = self.get_json_body() + try: + workflow_id = await ensure_async( + self.scheduler.create_workflow(CreateWorkflow(**payload)) + ) + self.log.info(payload) + print(payload) + except ValidationError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except InputUriError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except IdempotencyTokenError as e: + self.log.exception(e) + raise HTTPError(409, str(e)) from e + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError(500, "Unexpected error occurred during creation of a workflow.") from e + else: + self.finish(json.dumps(dict(workflow_id=workflow_id))) + + +class CreateWorkflow(BaseModel): + tasks: List[str] + + +class DescribeWorkflow(BaseModel): + workflow_id: str + tasks: List[str] = None + status: Status = Status.CREATED From 71aa7fe430477ed841399dd57a885bfbb4b8466b Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Tue, 10 Sep 2024 19:50:39 -0700 Subject: [PATCH 03/43] make execute_workflow a flow --- jupyter_scheduler/executors.py | 34 ++++++++++++++++++++++++---------- jupyter_scheduler/scheduler.py | 2 ++ jupyter_scheduler/workflows.py | 11 +++++++++++ 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index e6a290cdf..b7d6ac7f7 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -19,7 +19,7 @@ from jupyter_scheduler.orm import Job, Workflow, create_session from jupyter_scheduler.parameterize import add_parameters from jupyter_scheduler.utils import get_utc_timestamp -from jupyter_scheduler.workflows import DescribeWorkflow +from jupyter_scheduler.workflows import DescribeTask, DescribeWorkflow class ExecutionManager(ABC): @@ -188,26 +188,40 @@ class DefaultExecutionManager(ExecutionManager): """Default execution manager that executes notebooks""" @task(task_run_name="{task_id}") - def execute_task(task_id: str): + def execute_task(self, task_id: str): print(f"Task {task_id} executed") return task_id - @flow(task_runner=DaskTaskRunner()) + @task + def get_task_data(self, task_ids: List[str] = []): + # TODO: get orm objects from Task table of the db, create DescribeTask for each + tasks_data_obj = [ + {"id": "task0", "dependsOn": ["task3"]}, + {"id": "task4", "dependsOn": ["task0", "task1", "task2", "task3"]}, + {"id": "task1", "dependsOn": []}, + {"id": "task2", "dependsOn": ["task1"]}, + {"id": "task3", "dependsOn": ["task1", "task2"]}, + ] + + return tasks_data_obj + + @flow() def execute_workflow(self): - workflow: DescribeWorkflow = self.model - tasks = {task["id"]: task for task in workflow.tasks} + + tasks_info = self.get_task_data() + tasks = {task["id"]: task for task in tasks_info} # create Prefect tasks, use caching to ensure Prefect tasks are created before wait_for is called on them @lru_cache(maxsize=None) - def make_task(task_id, execute_task): + def make_task(task_id): deps = tasks[task_id]["dependsOn"] - return execute_task.submit( - task_id, wait_for=[make_task(dep_id, execute_task) for dep_id in deps] + return self.execute_task.submit( + task_id, wait_for=[make_task(dep_id) for dep_id in deps] ) - final_tasks = [make_task(task_id, self.execute_task) for task_id in tasks] + final_tasks = [make_task(task_id) for task_id in tasks] for future in as_completed(final_tasks): - print(future.result()) + future.result() def execute(self): job = self.model diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index ee56c9bc5..18aacc5a1 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -541,6 +541,8 @@ def create_workflow(self, model: CreateWorkflow) -> str: session.commit() execution_manager = self.execution_manager_class( + job_id="123", + staging_paths=dict(), workflow_id=workflow.workflow_id, root_dir=self.root_dir, db_url=self.db_url, diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 57926f094..40cfb2f77 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -55,3 +55,14 @@ class DescribeWorkflow(BaseModel): workflow_id: str tasks: List[str] = None status: Status = Status.CREATED + + class Config: + orm_mode = True + + +class DescribeTask(BaseModel): + dependsOn: List[str] = [] + status: Status = Status.CREATED + + class Config: + orm_mode = True From c8e0bf6d2fe888aa8f1458e591635caaab780bc3 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Wed, 11 Sep 2024 09:23:47 -0700 Subject: [PATCH 04/43] add execute_workflow option to DefaultExecutionManager --- jupyter_scheduler/executors.py | 11 +++++------ jupyter_scheduler/scheduler.py | 3 --- jupyter_scheduler/workflows.py | 3 --- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index b7d6ac7f7..d94f53012 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -36,11 +36,11 @@ class ExecutionManager(ABC): def __init__( self, - job_id: str, - workflow_id: str, - root_dir: str, db_url: str, - staging_paths: Dict[str, str], + job_id: str = None, + workflow_id: str = None, + root_dir: str = None, + staging_paths: Dict[str, str] = None, ): self.job_id = job_id self.workflow_id = workflow_id @@ -86,7 +86,6 @@ def process(self): self.on_complete() def process_workflow(self): - self.before_start_workflow() try: self.execute_workflow() @@ -205,7 +204,7 @@ def get_task_data(self, task_ids: List[str] = []): return tasks_data_obj - @flow() + @flow def execute_workflow(self): tasks_info = self.get_task_data() diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index 18aacc5a1..4489b80f0 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -541,10 +541,7 @@ def create_workflow(self, model: CreateWorkflow) -> str: session.commit() execution_manager = self.execution_manager_class( - job_id="123", - staging_paths=dict(), workflow_id=workflow.workflow_id, - root_dir=self.root_dir, db_url=self.db_url, ) execution_manager.process_workflow() diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 40cfb2f77..74df95f93 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -3,7 +3,6 @@ from jupyter_server.utils import ensure_async from tornado.web import HTTPError, authenticated - from jupyter_scheduler.exceptions import ( IdempotencyTokenError, InputUriError, @@ -26,8 +25,6 @@ async def post(self): workflow_id = await ensure_async( self.scheduler.create_workflow(CreateWorkflow(**payload)) ) - self.log.info(payload) - print(payload) except ValidationError as e: self.log.exception(e) raise HTTPError(500, str(e)) from e From b79b91738a71b222663f2a489e9643c94d86c92e Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Wed, 11 Sep 2024 10:51:44 -0700 Subject: [PATCH 05/43] Update job model to be used with workflows --- jupyter_scheduler/executors.py | 2 +- jupyter_scheduler/models.py | 6 ++++++ jupyter_scheduler/orm.py | 4 +++- jupyter_scheduler/workflows.py | 7 ++++--- src/handler.ts | 5 +++++ src/model.ts | 8 +++++++- 6 files changed, 26 insertions(+), 6 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index d94f53012..41db52c84 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -19,7 +19,7 @@ from jupyter_scheduler.orm import Job, Workflow, create_session from jupyter_scheduler.parameterize import add_parameters from jupyter_scheduler.utils import get_utc_timestamp -from jupyter_scheduler.workflows import DescribeTask, DescribeWorkflow +from jupyter_scheduler.workflows import DescribeWorkflow class ExecutionManager(ABC): diff --git a/jupyter_scheduler/models.py b/jupyter_scheduler/models.py index 38e240e0e..8b1564c2c 100644 --- a/jupyter_scheduler/models.py +++ b/jupyter_scheduler/models.py @@ -42,6 +42,7 @@ def __str__(self) -> str: class Status(str, Enum): + DRAFT = "DRAFT" CREATED = "CREATED" QUEUED = "QUEUED" IN_PROGRESS = "IN_PROGRESS" @@ -86,6 +87,8 @@ class CreateJob(BaseModel): output_filename_template: Optional[str] = OUTPUT_FILENAME_TEMPLATE compute_type: Optional[str] = None package_input_folder: Optional[bool] = None + depends_on: Optional[str] = None + workflow_id: str = None @root_validator def compute_input_filename(cls, values) -> Dict: @@ -148,6 +151,8 @@ class DescribeJob(BaseModel): downloaded: bool = False package_input_folder: Optional[bool] = None packaged_files: Optional[List[str]] = [] + depends_on: Optional[str] = None + workflow_id: str = None class Config: orm_mode = True @@ -193,6 +198,7 @@ class UpdateJob(BaseModel): status: Optional[Status] = None name: Optional[str] = None compute_type: Optional[str] = None + depends_on: Optional[str] = None class DeleteJob(BaseModel): diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index 9347ac201..cf155db4c 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -103,6 +103,8 @@ class Job(CommonColumns, Base): url = Column(String(256), default=generate_jobs_url) pid = Column(Integer) idempotency_token = Column(String(256)) + depends_on = Column(JsonType(1024)) + workflow_id = Column(String(36)) # All new columns added to this table must be nullable to ensure compatibility during database migrations. # Any default values specified for new columns will be ignored during the migration process. @@ -112,7 +114,7 @@ class Workflow(Base): __table_args__ = {"extend_existing": True} workflow_id = Column(String(36), primary_key=True, default=generate_uuid) tasks = Column(JsonType(1024)) - status = Column(String(64), default=Status.STOPPED) + status = Column(String(64), default=Status.DRAFT) # All new columns added to this table must be nullable to ensure compatibility during database migrations. # Any default values specified for new columns will be ignored during the migration process. diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 74df95f93..2b207fb4f 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -57,9 +57,10 @@ class Config: orm_mode = True -class DescribeTask(BaseModel): - dependsOn: List[str] = [] - status: Status = Status.CREATED +class UpdateWorkflow(BaseModel): + workflow_id: str + tasks: List[str] = None + status: Status = None class Config: orm_mode = True diff --git a/src/handler.ts b/src/handler.ts index 4381bbd30..285e44ef5 100644 --- a/src/handler.ts +++ b/src/handler.ts @@ -372,6 +372,7 @@ export namespace Scheduler { timezone?: string; active?: boolean; input_uri?: string; + depends_on?: string[]; } export interface IDescribeJobDefinition { @@ -418,6 +419,8 @@ export namespace Scheduler { output_formats?: string[]; compute_type?: string; package_input_folder?: boolean; + depends_on?: string[]; + workflow_id?: string; } export interface ICreateJobFromDefinition { @@ -467,6 +470,8 @@ export namespace Scheduler { end_time?: number; downloaded: boolean; package_input_folder?: boolean; + depends_on?: string[]; + workflow_id?: string; } export interface ICreateJobResponse { diff --git a/src/model.ts b/src/model.ts index 01b501cb8..f4ed1326e 100644 --- a/src/model.ts +++ b/src/model.ts @@ -100,6 +100,8 @@ export interface ICreateJobModel // Is the create button disabled due to a submission in progress? createInProgress?: boolean; packageInputFolder?: boolean; + dependsOn?: string[]; + workflowId?: string; } export const defaultScheduleFields: ModelWithScheduleFields = { @@ -312,6 +314,8 @@ export interface IJobDetailModel { job_files: Scheduler.IJobFile[]; downloaded: boolean; packageInputFolder?: boolean; + dependsOn?: string[]; + workflowId?: string; } export interface IJobDefinitionModel { @@ -388,7 +392,9 @@ export function convertDescribeJobtoJobDetail( startTime: describeJob.start_time, endTime: describeJob.end_time, downloaded: describeJob.downloaded, - packageInputFolder: describeJob.package_input_folder + packageInputFolder: describeJob.package_input_folder, + dependsOn: describeJob.depends_on, + workflowId: describeJob.workflow_id }; } From 4ef2373fb0dccfa72ddae03b1e078870f7d75d01 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Wed, 11 Sep 2024 10:52:15 -0700 Subject: [PATCH 06/43] Add workflow run and jobs endpoints --- jupyter_scheduler/extension.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 49a5d9c0d..70e732b77 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -21,6 +21,7 @@ JOB_DEFINITION_ID_REGEX = r"(?P\w+(?:-\w+)+)" JOB_ID_REGEX = r"(?P\w+(?:-\w+)+)" +WORKFLOW_ID_REGEX = r"(?P\w+(?:-\w+)+)" class SchedulerApp(ExtensionApp): @@ -37,6 +38,9 @@ class SchedulerApp(ExtensionApp): (r"scheduler/runtime_environments", RuntimeEnvironmentsHandler), (r"scheduler/config", ConfigHandler), (r"scheduler/worklows", WorkflowHandler), + (r"scheduler/worklows/%s/run" % (WORKFLOW_ID_REGEX, JOB_ID_REGEX), WorkflowHandler), + (r"scheduler/worklows/%s/jobs" % WORKFLOW_ID_REGEX, WorkflowHandler), + (r"scheduler/worklows/%s/jobs/%s" % (WORKFLOW_ID_REGEX, JOB_ID_REGEX), WorkflowHandler), ] drop_tables = Bool(False, config=True, help="Drop the database tables before starting.") From d77310ec17bf630c9da9b92af65418253b43a3c6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 11 Sep 2024 17:53:30 +0000 Subject: [PATCH 07/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyter_scheduler/extension.py | 4 ++-- jupyter_scheduler/workflows.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 70e732b77..b55cd3eec 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -38,9 +38,9 @@ class SchedulerApp(ExtensionApp): (r"scheduler/runtime_environments", RuntimeEnvironmentsHandler), (r"scheduler/config", ConfigHandler), (r"scheduler/worklows", WorkflowHandler), - (r"scheduler/worklows/%s/run" % (WORKFLOW_ID_REGEX, JOB_ID_REGEX), WorkflowHandler), + (r"scheduler/worklows/{}/run".format(WORKFLOW_ID_REGEX, JOB_ID_REGEX), WorkflowHandler), (r"scheduler/worklows/%s/jobs" % WORKFLOW_ID_REGEX, WorkflowHandler), - (r"scheduler/worklows/%s/jobs/%s" % (WORKFLOW_ID_REGEX, JOB_ID_REGEX), WorkflowHandler), + (r"scheduler/worklows/{}/jobs/{}".format(WORKFLOW_ID_REGEX, JOB_ID_REGEX), WorkflowHandler), ] drop_tables = Bool(False, config=True, help="Drop the database tables before starting.") diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 2b207fb4f..2ef05729e 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -3,6 +3,7 @@ from jupyter_server.utils import ensure_async from tornado.web import HTTPError, authenticated + from jupyter_scheduler.exceptions import ( IdempotencyTokenError, InputUriError, From c5da5b0d24f6d39d1216544bd51b9b648896333d Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Wed, 11 Sep 2024 16:12:47 -0700 Subject: [PATCH 08/43] split create_workflow and run_workflow endpoints and methods --- jupyter_scheduler/extension.py | 9 +++++---- jupyter_scheduler/scheduler.py | 19 +++++++------------ jupyter_scheduler/workflows.py | 24 ++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index b55cd3eec..725435c78 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -6,7 +6,7 @@ from traitlets import Bool, Type, Unicode, default from jupyter_scheduler.orm import create_tables -from jupyter_scheduler.workflows import WorkflowHandler +from jupyter_scheduler.workflows import WorkflowHandler, WorkflowRunHandler from .handlers import ( BatchJobHandler, @@ -38,9 +38,10 @@ class SchedulerApp(ExtensionApp): (r"scheduler/runtime_environments", RuntimeEnvironmentsHandler), (r"scheduler/config", ConfigHandler), (r"scheduler/worklows", WorkflowHandler), - (r"scheduler/worklows/{}/run".format(WORKFLOW_ID_REGEX, JOB_ID_REGEX), WorkflowHandler), - (r"scheduler/worklows/%s/jobs" % WORKFLOW_ID_REGEX, WorkflowHandler), - (r"scheduler/worklows/{}/jobs/{}".format(WORKFLOW_ID_REGEX, JOB_ID_REGEX), WorkflowHandler), + ( + r"scheduler/worklows/{}/run".format(WORKFLOW_ID_REGEX), + WorkflowRunHandler, + ), ] drop_tables = Bool(False, config=True, help="Drop the database tables before starting.") diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index 4489b80f0..c294871f9 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -532,23 +532,18 @@ def create_job(self, model: CreateJob) -> str: return job_id def create_workflow(self, model: CreateWorkflow) -> str: - with self.db_session() as session: - workflow = Workflow(**model.dict(exclude_none=True)) - session.add(workflow) session.commit() + return workflow.workflow_id - execution_manager = self.execution_manager_class( - workflow_id=workflow.workflow_id, - db_url=self.db_url, - ) - execution_manager.process_workflow() - session.commit() - - workflow_id = workflow.workflow_id - + def run_workflow(self, workflow_id: str) -> str: + execution_manager = self.execution_manager_class( + workflow_id=workflow_id, + db_url=self.db_url, + ) + execution_manager.process_workflow() return workflow_id def update_job(self, job_id: str, model: UpdateJob): diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 2ef05729e..1917399fb 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -45,6 +45,30 @@ async def post(self): self.finish(json.dumps(dict(workflow_id=workflow_id))) +class WorkflowRunHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): + @authenticated + async def post(self, workflow_id: str): + try: + workflow_id = await ensure_async(self.scheduler.run_workflow(workflow_id)) + except ValidationError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except InputUriError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except IdempotencyTokenError as e: + self.log.exception(e) + raise HTTPError(409, str(e)) from e + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError(500, "Unexpected error occurred during creation of a workflow.") from e + else: + self.finish(json.dumps(dict(workflow_id=workflow_id))) + + class CreateWorkflow(BaseModel): tasks: List[str] From c64a53b8026bf66e61e1c587db07a6349f649941 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 11 Sep 2024 23:13:00 +0000 Subject: [PATCH 09/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyter_scheduler/extension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 725435c78..fdc3ded4f 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -39,7 +39,7 @@ class SchedulerApp(ExtensionApp): (r"scheduler/config", ConfigHandler), (r"scheduler/worklows", WorkflowHandler), ( - r"scheduler/worklows/{}/run".format(WORKFLOW_ID_REGEX), + fr"scheduler/worklows/{WORKFLOW_ID_REGEX}/run", WorkflowRunHandler, ), ] From b33191c81b3a533cc4711bebae118b77f9ddc646 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Wed, 11 Sep 2024 23:12:12 -0700 Subject: [PATCH 10/43] add GET workflow/{id} --- jupyter_scheduler/extension.py | 1 + jupyter_scheduler/models.py | 1 - jupyter_scheduler/orm.py | 2 +- jupyter_scheduler/scheduler.py | 20 ++++++++++++++++++-- jupyter_scheduler/workflows.py | 15 +++++++++++++++ 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index fdc3ded4f..2d07c04a3 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -38,6 +38,7 @@ class SchedulerApp(ExtensionApp): (r"scheduler/runtime_environments", RuntimeEnvironmentsHandler), (r"scheduler/config", ConfigHandler), (r"scheduler/worklows", WorkflowHandler), + (r"scheduler/worklows/{}".format(WORKFLOW_ID_REGEX), WorkflowHandler), ( fr"scheduler/worklows/{WORKFLOW_ID_REGEX}/run", WorkflowRunHandler, diff --git a/jupyter_scheduler/models.py b/jupyter_scheduler/models.py index 8b1564c2c..d118673f7 100644 --- a/jupyter_scheduler/models.py +++ b/jupyter_scheduler/models.py @@ -42,7 +42,6 @@ def __str__(self) -> str: class Status(str, Enum): - DRAFT = "DRAFT" CREATED = "CREATED" QUEUED = "QUEUED" IN_PROGRESS = "IN_PROGRESS" diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index cf155db4c..edbb2d812 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -114,7 +114,7 @@ class Workflow(Base): __table_args__ = {"extend_existing": True} workflow_id = Column(String(36), primary_key=True, default=generate_uuid) tasks = Column(JsonType(1024)) - status = Column(String(64), default=Status.DRAFT) + status = Column(String(64), default=Status.CREATED) # All new columns added to this table must be nullable to ensure compatibility during database migrations. # Any default values specified for new columns will be ignored during the migration process. diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index c294871f9..64f1c0f4f 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -46,7 +46,7 @@ create_output_directory, create_output_filename, ) -from jupyter_scheduler.workflows import CreateWorkflow +from jupyter_scheduler.workflows import CreateWorkflow, DescribeWorkflow class BaseScheduler(LoggingConfigurable): @@ -113,7 +113,15 @@ def create_job(self, model: CreateJob) -> str: raise NotImplementedError("must be implemented by subclass") def create_workflow(self, model: CreateWorkflow) -> str: - """Creates a new workflow record, may trigger execution of the workflow.""" + """Creates a new workflow record.""" + raise NotImplementedError("must be implemented by subclass") + + def run_workflow(self, workflow_id: str) -> str: + """Triggers execution of the workflow.""" + raise NotImplementedError("must be implemented by subclass") + + def get_workflow(self, workflow_id: str) -> DescribeWorkflow: + """Returns workflow record for a single workflow.""" raise NotImplementedError("must be implemented by subclass") def update_job(self, job_id: str, model: UpdateJob): @@ -546,6 +554,14 @@ def run_workflow(self, workflow_id: str) -> str: execution_manager.process_workflow() return workflow_id + def get_workflow(self, workflow_id: str) -> DescribeWorkflow: + with self.db_session() as session: + workflow_record = ( + session.query(Workflow).filter(Workflow.workflow_id == workflow_id).one() + ) + model = DescribeWorkflow.from_orm(workflow_record) + return model + def update_job(self, job_id: str, model: UpdateJob): with self.db_session() as session: session.query(Job).filter(Job.job_id == job_id).update(model.dict(exclude_none=True)) diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 1917399fb..980f3e1d4 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -44,6 +44,21 @@ async def post(self): else: self.finish(json.dumps(dict(workflow_id=workflow_id))) + @authenticated + async def get(self, workflow_id: str = None): + if not workflow_id: + raise HTTPError(400, "Missing workflow_id in the request URL.") + try: + workflow = await ensure_async(self.scheduler.get_workflow(workflow_id)) + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError(500, "Unexpected error occurred while getting workflow details.") from e + else: + self.finish(workflow.json()) + class WorkflowRunHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): @authenticated From b07a76543f16401621b1dafe5c6f08e9550ea7ec Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 06:12:30 +0000 Subject: [PATCH 11/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyter_scheduler/extension.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 2d07c04a3..2a88a3d4e 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -38,9 +38,9 @@ class SchedulerApp(ExtensionApp): (r"scheduler/runtime_environments", RuntimeEnvironmentsHandler), (r"scheduler/config", ConfigHandler), (r"scheduler/worklows", WorkflowHandler), - (r"scheduler/worklows/{}".format(WORKFLOW_ID_REGEX), WorkflowHandler), + (fr"scheduler/worklows/{WORKFLOW_ID_REGEX}", WorkflowHandler), ( - fr"scheduler/worklows/{WORKFLOW_ID_REGEX}/run", + rf"scheduler/worklows/{WORKFLOW_ID_REGEX}/run", WorkflowRunHandler, ), ] From 044e3bb18f4724487bcb83fd9d2eae4f120854f1 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Thu, 12 Sep 2024 10:51:45 -0700 Subject: [PATCH 12/43] add workflow/{workflow_id}/tasks endpoint --- jupyter_scheduler/extension.py | 12 ++++-- jupyter_scheduler/scheduler.py | 5 ++- jupyter_scheduler/workflows.py | 79 ++++++++++++++++++++++++++++++++-- 3 files changed, 87 insertions(+), 9 deletions(-) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 2a88a3d4e..8d979fde7 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -6,7 +6,7 @@ from traitlets import Bool, Type, Unicode, default from jupyter_scheduler.orm import create_tables -from jupyter_scheduler.workflows import WorkflowHandler, WorkflowRunHandler +from jupyter_scheduler.workflows import WorkflowsHandler, WorkflowsRunHandler, WorkflowsTasksHandler from .handlers import ( BatchJobHandler, @@ -37,11 +37,15 @@ class SchedulerApp(ExtensionApp): (r"scheduler/job_definitions/%s/jobs" % JOB_DEFINITION_ID_REGEX, JobFromDefinitionHandler), (r"scheduler/runtime_environments", RuntimeEnvironmentsHandler), (r"scheduler/config", ConfigHandler), - (r"scheduler/worklows", WorkflowHandler), - (fr"scheduler/worklows/{WORKFLOW_ID_REGEX}", WorkflowHandler), + (r"scheduler/worklows", WorkflowsHandler), + (rf"scheduler/worklows/{WORKFLOW_ID_REGEX}", WorkflowsHandler), ( rf"scheduler/worklows/{WORKFLOW_ID_REGEX}/run", - WorkflowRunHandler, + WorkflowsRunHandler, + ), + ( + rf"scheduler/worklows/{WORKFLOW_ID_REGEX}/tasks", + WorkflowsTasksHandler, ), ] diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index 64f1c0f4f..d9c008757 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -473,7 +473,7 @@ def copy_input_folder(self, input_uri: str, nb_copy_to_path: str) -> List[str]: destination_dir=staging_dir, ) - def create_job(self, model: CreateJob) -> str: + def create_job(self, model: CreateJob, run: bool = True) -> str: if not model.job_definition_id and not self.file_exists(model.input_uri): raise InputUriError(model.input_uri) @@ -514,6 +514,9 @@ def create_job(self, model: CreateJob) -> str: else: self.copy_input_file(model.input_uri, staging_paths["input"]) + if not run: + return job.job_id + # The MP context forces new processes to not be forked on Linux. # This is necessary because `asyncio.get_event_loop()` is bugged in # forked processes in Python versions below 3.12. This method is diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 980f3e1d4..fd4b63754 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -14,11 +14,11 @@ ExtensionHandlerMixin, JobHandlersMixin, ) -from jupyter_scheduler.models import Status +from jupyter_scheduler.models import CreateJob, Status, UpdateJob from jupyter_scheduler.pydantic_v1 import BaseModel, ValidationError -class WorkflowHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): +class WorkflowsHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): @authenticated async def post(self): payload = self.get_json_body() @@ -60,7 +60,76 @@ async def get(self, workflow_id: str = None): self.finish(workflow.json()) -class WorkflowRunHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): +class WorkflowsTasksHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): + @authenticated + async def post(self, workflow_id: str): + print("WorkflowsTasksHandler post") + payload = self.get_json_body() + if workflow_id != payload.get("workflow_id"): + raise HTTPError( + 400, + "Error during workflow job creation. workflow_id in the URL and payload don't match.", + ) + try: + job_id = await ensure_async(self.scheduler.create_job(CreateJob(**payload), run=False)) + except ValidationError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except InputUriError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except IdempotencyTokenError as e: + self.log.exception(e) + raise HTTPError(409, str(e)) from e + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, "Unexpected error occurred during creation of workflow job." + ) from e + else: + self.finish(json.dumps(dict(job_id=job_id))) + + @authenticated + async def patch(self, workflow_id: str, job_id: str): + payload = self.get_json_body() + if workflow_id != payload.get("workflow_id", None): + raise HTTPError( + 400, + "Error during workflow job creation. workflow_id in the URL and payload don't match.", + ) + status = payload.get("status") + status = Status(status) if status else None + + if status and status != Status.STOPPED: + raise HTTPError( + 500, + "Invalid value for field 'status'. Workflow job status can only be updated to status 'STOPPED' after creation.", + ) + try: + if status: + await ensure_async(self.scheduler.stop_job(job_id)) + else: + await ensure_async(self.scheduler.update_job(job_id, UpdateJob(**payload))) + except ValidationError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, "Unexpected error occurred while updating the workflow job." + ) from e + else: + self.set_status(204) + self.finish() + + +class WorkflowsRunHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): @authenticated async def post(self, workflow_id: str): try: @@ -79,7 +148,9 @@ async def post(self, workflow_id: str): raise HTTPError(500, str(e)) from e except Exception as e: self.log.exception(e) - raise HTTPError(500, "Unexpected error occurred during creation of a workflow.") from e + raise HTTPError( + 500, "Unexpected error occurred during attempt to run a workflow." + ) from e else: self.finish(json.dumps(dict(workflow_id=workflow_id))) From 5c351ac20635299e411e62837c615d42549cfe09 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 17:52:05 +0000 Subject: [PATCH 13/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyter_scheduler/extension.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 8d979fde7..d06e6dc0c 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -6,7 +6,11 @@ from traitlets import Bool, Type, Unicode, default from jupyter_scheduler.orm import create_tables -from jupyter_scheduler.workflows import WorkflowsHandler, WorkflowsRunHandler, WorkflowsTasksHandler +from jupyter_scheduler.workflows import ( + WorkflowsHandler, + WorkflowsRunHandler, + WorkflowsTasksHandler, +) from .handlers import ( BatchJobHandler, From 4aa3046df9805e065fac5cb8c87bec3e5075c59a Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Thu, 12 Sep 2024 13:42:13 -0700 Subject: [PATCH 14/43] add create_workflow_task to handler and scheduler --- jupyter_scheduler/scheduler.py | 14 +++++++++++++- jupyter_scheduler/workflows.py | 15 +++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index d9c008757..5aece2bec 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -46,7 +46,7 @@ create_output_directory, create_output_filename, ) -from jupyter_scheduler.workflows import CreateWorkflow, DescribeWorkflow +from jupyter_scheduler.workflows import CreateWorkflow, DescribeWorkflow, UpdateWorkflow class BaseScheduler(LoggingConfigurable): @@ -124,6 +124,10 @@ def get_workflow(self, workflow_id: str) -> DescribeWorkflow: """Returns workflow record for a single workflow.""" raise NotImplementedError("must be implemented by subclass") + def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str: + """Adds a task to a workflow.""" + raise NotImplementedError("must be implemented by subclass") + def update_job(self, job_id: str, model: UpdateJob): """Updates job metadata in the persistence store, for example name, status etc. In case of status @@ -565,6 +569,14 @@ def get_workflow(self, workflow_id: str) -> DescribeWorkflow: model = DescribeWorkflow.from_orm(workflow_record) return model + def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str: + job_id = self.scheduler.create_job(model, run=False) + workflow: DescribeWorkflow = self.scheduler.get_workflow(workflow_id) + updated_tasks = (workflow.tasks or [])[:] + updated_tasks.append(job_id) + self.scheduler.update_workflow(workflow_id, UpdateWorkflow(depends_on=updated_tasks)) + return job_id + def update_job(self, job_id: str, model: UpdateJob): with self.db_session() as session: session.query(Job).filter(Job.job_id == job_id).update(model.dict(exclude_none=True)) diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index fd4b63754..3b7707c32 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -1,5 +1,5 @@ import json -from typing import List +from typing import List, Optional from jupyter_server.utils import ensure_async from tornado.web import HTTPError, authenticated @@ -71,7 +71,11 @@ async def post(self, workflow_id: str): "Error during workflow job creation. workflow_id in the URL and payload don't match.", ) try: - job_id = await ensure_async(self.scheduler.create_job(CreateJob(**payload), run=False)) + job_id = await ensure_async( + self.scheduler.create_workflow_task( + workflow_id=workflow_id, model=CreateJob(**payload) + ) + ) except ValidationError as e: self.log.exception(e) raise HTTPError(500, str(e)) from e @@ -175,3 +179,10 @@ class UpdateWorkflow(BaseModel): class Config: orm_mode = True + + +class UpdateWorkflow(BaseModel): + status: Optional[Status] = None + name: Optional[str] = None + compute_type: Optional[str] = None + depends_on: Optional[str] = None From c276385ea7222cbe35cb8174c4086b88fc994102 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Thu, 12 Sep 2024 16:28:14 -0700 Subject: [PATCH 15/43] make CreateWorkflow.tasks optional --- jupyter_scheduler/workflows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 3b7707c32..33388c4b7 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -21,7 +21,7 @@ class WorkflowsHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): @authenticated async def post(self): - payload = self.get_json_body() + payload = self.get_json_body() or {} try: workflow_id = await ensure_async( self.scheduler.create_workflow(CreateWorkflow(**payload)) @@ -160,7 +160,7 @@ async def post(self, workflow_id: str): class CreateWorkflow(BaseModel): - tasks: List[str] + tasks: List[str] = [] class DescribeWorkflow(BaseModel): From 2f6938b40f8835c8bb500761cc203194a1eb89de Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Fri, 13 Sep 2024 14:20:05 -0700 Subject: [PATCH 16/43] add update workflow functionality --- jupyter_scheduler/models.py | 6 +++--- jupyter_scheduler/scheduler.py | 13 ++++++++++--- jupyter_scheduler/workflows.py | 12 ++---------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/jupyter_scheduler/models.py b/jupyter_scheduler/models.py index d118673f7..d8ad3d626 100644 --- a/jupyter_scheduler/models.py +++ b/jupyter_scheduler/models.py @@ -86,7 +86,7 @@ class CreateJob(BaseModel): output_filename_template: Optional[str] = OUTPUT_FILENAME_TEMPLATE compute_type: Optional[str] = None package_input_folder: Optional[bool] = None - depends_on: Optional[str] = None + depends_on: Optional[List[str]] = None workflow_id: str = None @root_validator @@ -150,7 +150,7 @@ class DescribeJob(BaseModel): downloaded: bool = False package_input_folder: Optional[bool] = None packaged_files: Optional[List[str]] = [] - depends_on: Optional[str] = None + depends_on: Optional[List[str]] = None workflow_id: str = None class Config: @@ -197,7 +197,7 @@ class UpdateJob(BaseModel): status: Optional[Status] = None name: Optional[str] = None compute_type: Optional[str] = None - depends_on: Optional[str] = None + depends_on: Optional[List[str]] = None class DeleteJob(BaseModel): diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index 5aece2bec..0906c62bc 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -570,13 +570,20 @@ def get_workflow(self, workflow_id: str) -> DescribeWorkflow: return model def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str: - job_id = self.scheduler.create_job(model, run=False) - workflow: DescribeWorkflow = self.scheduler.get_workflow(workflow_id) + job_id = self.create_job(model, run=False) + workflow: DescribeWorkflow = self.get_workflow(workflow_id) updated_tasks = (workflow.tasks or [])[:] updated_tasks.append(job_id) - self.scheduler.update_workflow(workflow_id, UpdateWorkflow(depends_on=updated_tasks)) + self.update_workflow(workflow_id, UpdateWorkflow(tasks=updated_tasks)) return job_id + def update_workflow(self, workflow_id: str, model: UpdateWorkflow): + with self.db_session() as session: + session.query(Workflow).filter(Workflow.workflow_id == workflow_id).update( + model.dict(exclude_none=True) + ) + session.commit() + def update_job(self, job_id: str, model: UpdateJob): with self.db_session() as session: session.query(Job).filter(Job.job_id == job_id).update(model.dict(exclude_none=True)) diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 33388c4b7..be1e231f5 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -173,16 +173,8 @@ class Config: class UpdateWorkflow(BaseModel): - workflow_id: str - tasks: List[str] = None - status: Status = None + tasks: Optional[List[str]] = None + status: Optional[Status] = None class Config: orm_mode = True - - -class UpdateWorkflow(BaseModel): - status: Optional[Status] = None - name: Optional[str] = None - compute_type: Optional[str] = None - depends_on: Optional[str] = None From 3ae89971971eeb4f64c84b80e30e13fe5692d5c3 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 16 Sep 2024 12:30:24 -0700 Subject: [PATCH 17/43] execute notebook as task via DefaultExecutionManager --- jupyter_scheduler/executors.py | 54 ++++++++++++++++++++-------------- jupyter_scheduler/scheduler.py | 28 ++++++++++++++++++ 2 files changed, 60 insertions(+), 22 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index 41db52c84..9515b9c9e 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -3,6 +3,7 @@ import shutil import tarfile import traceback +import multiprocessing as mp from abc import ABC, abstractmethod from functools import lru_cache from typing import Dict, List @@ -15,9 +16,10 @@ from prefect.futures import as_completed from prefect_dask.task_runners import DaskTaskRunner -from jupyter_scheduler.models import DescribeJob, JobFeature, Status +from jupyter_scheduler.models import CreateJob, DescribeJob, JobFeature, Status from jupyter_scheduler.orm import Job, Workflow, create_session from jupyter_scheduler.parameterize import add_parameters +from jupyter_scheduler.scheduler import Scheduler from jupyter_scheduler.utils import get_utc_timestamp from jupyter_scheduler.workflows import DescribeWorkflow @@ -186,36 +188,44 @@ def on_complete_workflow(self): class DefaultExecutionManager(ExecutionManager): """Default execution manager that executes notebooks""" - @task(task_run_name="{task_id}") - def execute_task(self, task_id: str): - print(f"Task {task_id} executed") - return task_id + @task + def execute_task(self, job: Job): + with self.db_session() as session: + staging_paths = Scheduler.get_staging_paths(DescribeJob.from_orm(job)) + + execution_manager = DefaultExecutionManager( + job_id=job.job_id, + staging_paths=staging_paths, + root_dir=self.root_dir, + db_url=self.db_url, + ) + execution_manager.process() + + job.pid = 1 # TODO: fix pid hardcode + job_id = job.job_id + + return job_id @task - def get_task_data(self, task_ids: List[str] = []): - # TODO: get orm objects from Task table of the db, create DescribeTask for each - tasks_data_obj = [ - {"id": "task0", "dependsOn": ["task3"]}, - {"id": "task4", "dependsOn": ["task0", "task1", "task2", "task3"]}, - {"id": "task1", "dependsOn": []}, - {"id": "task2", "dependsOn": ["task1"]}, - {"id": "task3", "dependsOn": ["task1", "task2"]}, - ] - - return tasks_data_obj + def get_tasks_records(self, task_ids: List[str]) -> List[Job]: + with self.db_session() as session: + tasks = session.query(Job).filter(Job.job_id.in_(task_ids)).all() + + return tasks @flow def execute_workflow(self): + tasks_info: List[Job] = self.get_tasks_records(self.model.tasks) + tasks = {task.job_id: task for task in tasks_info} - tasks_info = self.get_task_data() - tasks = {task["id"]: task for task in tasks_info} - - # create Prefect tasks, use caching to ensure Prefect tasks are created before wait_for is called on them @lru_cache(maxsize=None) def make_task(task_id): - deps = tasks[task_id]["dependsOn"] + """Create a delayed object for the given task recursively creating delayed objects for all tasks it depends on""" + deps = tasks[task_id].depends_on or [] + name = tasks[task_id].name + job_id = tasks[task_id].job_id return self.execute_task.submit( - task_id, wait_for=[make_task(dep_id) for dep_id in deps] + tasks[task_id], wait_for=[make_task(dep_id) for dep_id in deps] ) final_tasks = [make_task(task_id) for task_id in tasks] diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index 0906c62bc..ccfd7b6ed 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -521,6 +521,11 @@ def create_job(self, model: CreateJob, run: bool = True) -> str: if not run: return job.job_id + job_id = self.run_job(job=job, staging_paths=staging_paths) + return job_id + + def run_job(self, job: Job, staging_paths: Dict[str, str]) -> str: + with self.db_session() as session: # The MP context forces new processes to not be forked on Linux. # This is necessary because `asyncio.get_event_loop()` is bugged in # forked processes in Python versions below 3.12. This method is @@ -556,6 +561,7 @@ def create_workflow(self, model: CreateWorkflow) -> str: def run_workflow(self, workflow_id: str) -> str: execution_manager = self.execution_manager_class( workflow_id=workflow_id, + root_dir=self.root_dir, db_url=self.db_url, ) execution_manager.process_workflow() @@ -858,6 +864,28 @@ def get_staging_paths(self, model: Union[DescribeJob, DescribeJobDefinition]) -> return staging_paths + @staticmethod + def get_staging_paths(model: Union[DescribeJob, DescribeJobDefinition]) -> Dict[str, str]: + staging_paths = {} + if not model: + return staging_paths + + id = model.job_id if isinstance(model, DescribeJob) else model.job_definition_id + + for output_format in model.output_formats: + filename = create_output_filename( + model.input_filename, model.create_time, output_format + ) + staging_paths[output_format] = os.path.join( + os.path.join(jupyter_data_dir(), "scheduler_staging_area"), id, filename + ) + + staging_paths["input"] = os.path.join( + os.path.join(jupyter_data_dir(), "scheduler_staging_area"), id, model.input_filename + ) + + return staging_paths + async def stop_extension(self): """ Cleanup code to run when the server is stopping. From b9c466b8daf632797001283520d45a7fab675756 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Sep 2024 19:30:38 +0000 Subject: [PATCH 18/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyter_scheduler/executors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index 9515b9c9e..800bf9ae8 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -1,9 +1,9 @@ import io +import multiprocessing as mp import os import shutil import tarfile import traceback -import multiprocessing as mp from abc import ABC, abstractmethod from functools import lru_cache from typing import Dict, List From 69b34ca5ecef35aca3f12ba38f7f5889656ca961 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 16 Sep 2024 13:16:17 -0700 Subject: [PATCH 19/43] Use prefect task to execute jobs, download files. Add flow and run names --- jupyter_scheduler/executors.py | 9 ++++++--- jupyter_scheduler/job_files_manager.py | 21 ++++++++++----------- jupyter_scheduler/scheduler.py | 25 +++++++------------------ 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index 800bf9ae8..81aef3d97 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -188,7 +188,7 @@ def on_complete_workflow(self): class DefaultExecutionManager(ExecutionManager): """Default execution manager that executes notebooks""" - @task + @task(name="Execute workflow task") def execute_task(self, job: Job): with self.db_session() as session: staging_paths = Scheduler.get_staging_paths(DescribeJob.from_orm(job)) @@ -206,14 +206,14 @@ def execute_task(self, job: Job): return job_id - @task + @task(name="Get workflow task records") def get_tasks_records(self, task_ids: List[str]) -> List[Job]: with self.db_session() as session: tasks = session.query(Job).filter(Job.job_id.in_(task_ids)).all() return tasks - @flow + @flow(name="Execute workflow", flow_run_name="Execute workflow run") def execute_workflow(self): tasks_info: List[Job] = self.get_tasks_records(self.model.tasks) tasks = {task.job_id: task for task in tasks_info} @@ -232,6 +232,7 @@ def make_task(task_id): for future in as_completed(final_tasks): future.result() + @flow(name="Execute job", flow_run_name="Execute job run") def execute(self): job = self.model @@ -254,6 +255,7 @@ def execute(self): self.add_side_effects_files(staging_dir) self.create_output_files(job, nb) + @task(name="Check for and add side effect files") def add_side_effects_files(self, staging_dir: str): """Scan for side effect files potentially created after input file execution and update the job's packaged_files with these files""" input_notebook = os.path.relpath(self.staging_paths["input"]) @@ -276,6 +278,7 @@ def add_side_effects_files(self, staging_dir: str): ) session.commit() + @task(name="Create output files") def create_output_files(self, job: DescribeJob, notebook_node): for output_format in job.output_formats: cls = nbconvert.get_exporter(output_format) diff --git a/jupyter_scheduler/job_files_manager.py b/jupyter_scheduler/job_files_manager.py index 0e39c2b76..e0774d8a8 100644 --- a/jupyter_scheduler/job_files_manager.py +++ b/jupyter_scheduler/job_files_manager.py @@ -6,6 +6,7 @@ import fsspec from jupyter_server.utils import ensure_async +from prefect import task from jupyter_scheduler.exceptions import SchedulerError from jupyter_scheduler.scheduler import BaseScheduler @@ -23,17 +24,14 @@ async def copy_from_staging(self, job_id: str, redownload: Optional[bool] = Fals output_filenames = self.scheduler.get_job_filenames(job) output_dir = self.scheduler.get_local_output_path(model=job, root_dir_relative=True) - p = Process( - target=Downloader( - output_formats=job.output_formats, - output_filenames=output_filenames, - staging_paths=staging_paths, - output_dir=output_dir, - redownload=redownload, - include_staging_files=job.package_input_folder, - ).download - ) - p.start() + target = Downloader( + output_formats=job.output_formats, + output_filenames=output_filenames, + staging_paths=staging_paths, + output_dir=output_dir, + redownload=redownload, + include_staging_files=job.package_input_folder, + ).download class Downloader: @@ -77,6 +75,7 @@ def download_tar(self, archive_format: str = "tar"): with tarfile.open(fileobj=f, mode=read_mode) as tar: tar.extractall(self.output_dir) + @task(name="Download job files") def download(self): # ensure presence of staging paths if not self.staging_paths: diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index ccfd7b6ed..f2745ca0e 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -1,4 +1,3 @@ -import multiprocessing as mp import os import random import shutil @@ -526,25 +525,15 @@ def create_job(self, model: CreateJob, run: bool = True) -> str: def run_job(self, job: Job, staging_paths: Dict[str, str]) -> str: with self.db_session() as session: - # The MP context forces new processes to not be forked on Linux. - # This is necessary because `asyncio.get_event_loop()` is bugged in - # forked processes in Python versions below 3.12. This method is - # called by `jupyter_core` by `nbconvert` in the default executor. - # - # See: https://github.com/python/cpython/issues/66285 - # See also: https://github.com/jupyter/jupyter_core/pull/362 - mp_ctx = mp.get_context("spawn") - p = mp_ctx.Process( - target=self.execution_manager_class( - job_id=job.job_id, - staging_paths=staging_paths, - root_dir=self.root_dir, - db_url=self.db_url, - ).process + execution_manager = self.execution_manager_class( + job_id=job.job_id, + staging_paths=staging_paths, + root_dir=self.root_dir, + db_url=self.db_url, ) - p.start() + execution_manager.process() - job.pid = p.pid + job.pid = 1 # TODO: fix pid hardcode session.commit() job_id = job.job_id From f18071e962392940439d378130c84aa9d2fb98d2 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 16 Sep 2024 13:45:05 -0700 Subject: [PATCH 20/43] Remove Dask --- jupyter_scheduler/executors.py | 1 - jupyter_scheduler/extension.py | 22 ---------------------- jupyter_scheduler/scheduler.py | 29 ----------------------------- jupyter_scheduler/workflows.py | 1 - 4 files changed, 53 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index 81aef3d97..7be02242b 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -14,7 +14,6 @@ from nbconvert.preprocessors import CellExecutionError, ExecutePreprocessor from prefect import flow, task from prefect.futures import as_completed -from prefect_dask.task_runners import DaskTaskRunner from jupyter_scheduler.models import CreateJob, DescribeJob, JobFeature, Status from jupyter_scheduler.orm import Job, Workflow, create_session diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index d06e6dc0c..086fc34a0 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -107,25 +107,3 @@ def initialize_settings(self): if scheduler.task_runner: loop = asyncio.get_event_loop() loop.create_task(scheduler.task_runner.start()) - - async def stop_extension(self): - """ - Public method called by Jupyter Server when the server is stopping. - This calls the cleanup code defined in `self._stop_exception()` inside - an exception handler, as the server halts if this method raises an - exception. - """ - try: - await self._stop_extension() - except Exception as e: - self.log.error("Jupyter Scheduler raised an exception while stopping:") - self.log.exception(e) - - async def _stop_extension(self): - """ - Private method that defines the cleanup code to run when the server is - stopping. - """ - if "scheduler" in self.settings: - scheduler: SchedulerApp = self.settings["scheduler"] - await scheduler.stop_extension() diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index f2745ca0e..724e18b07 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -5,7 +5,6 @@ import fsspec import psutil -from dask.distributed import Client as DaskClient from distributed import LocalCluster from jupyter_core.paths import jupyter_data_dir from jupyter_server.transutils import _i18n @@ -399,12 +398,6 @@ def get_local_output_path( else: return os.path.join(self.root_dir, self.output_directory, output_dir_name) - async def stop_extension(self): - """ - Placeholder method for a cleanup code to run when the server is stopping. - """ - pass - class Scheduler(BaseScheduler): _db_session = None @@ -419,12 +412,6 @@ class Scheduler(BaseScheduler): ), ) - dask_cluster_url = Unicode( - allow_none=True, - config=True, - help="URL of the Dask cluster to connect to.", - ) - db_url = Unicode(help=_i18n("Scheduler database url")) task_runner = Instance(allow_none=True, klass="jupyter_scheduler.task_runner.BaseTaskRunner") @@ -444,15 +431,6 @@ def __init__( if self.task_runner_class: self.task_runner = self.task_runner_class(scheduler=self, config=config) - self.dask_client: DaskClient = self._get_dask_client() - - def _get_dask_client(self): - """Creates and configures a Dask client.""" - if self.dask_cluster_url: - return DaskClient(self.dask_cluster_url) - cluster = LocalCluster(processes=True) - return DaskClient(cluster) - @property def db_session(self): if not self._db_session: @@ -875,13 +853,6 @@ def get_staging_paths(model: Union[DescribeJob, DescribeJobDefinition]) -> Dict[ return staging_paths - async def stop_extension(self): - """ - Cleanup code to run when the server is stopping. - """ - if self.dask_client: - await self.dask_client.close() - class ArchivingScheduler(Scheduler): """Scheduler that captures all files in output directory in an archive.""" diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index be1e231f5..ec45b00e0 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -63,7 +63,6 @@ async def get(self, workflow_id: str = None): class WorkflowsTasksHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): @authenticated async def post(self, workflow_id: str): - print("WorkflowsTasksHandler post") payload = self.get_json_body() if workflow_id != payload.get("workflow_id"): raise HTTPError( From 440dfd16a010f77136362339dfab806807eb414a Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 23 Sep 2024 13:14:22 -0700 Subject: [PATCH 21/43] change depends_on and tasks data type to not limit the size --- jupyter_scheduler/orm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index edbb2d812..b37961406 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -103,7 +103,7 @@ class Job(CommonColumns, Base): url = Column(String(256), default=generate_jobs_url) pid = Column(Integer) idempotency_token = Column(String(256)) - depends_on = Column(JsonType(1024)) + depends_on = Column(JsonType) workflow_id = Column(String(36)) # All new columns added to this table must be nullable to ensure compatibility during database migrations. # Any default values specified for new columns will be ignored during the migration process. @@ -113,7 +113,7 @@ class Workflow(Base): __tablename__ = "workflows" __table_args__ = {"extend_existing": True} workflow_id = Column(String(36), primary_key=True, default=generate_uuid) - tasks = Column(JsonType(1024)) + tasks = Column(JsonType) status = Column(String(64), default=Status.CREATED) # All new columns added to this table must be nullable to ensure compatibility during database migrations. # Any default values specified for new columns will be ignored during the migration process. From 6f9ec65b2ff0b00dcfa2ccc994dc0bdb8ea1dde5 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 23 Sep 2024 13:14:47 -0700 Subject: [PATCH 22/43] make workflow_id optional --- jupyter_scheduler/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter_scheduler/models.py b/jupyter_scheduler/models.py index d8ad3d626..c7d2e6bb5 100644 --- a/jupyter_scheduler/models.py +++ b/jupyter_scheduler/models.py @@ -87,7 +87,7 @@ class CreateJob(BaseModel): compute_type: Optional[str] = None package_input_folder: Optional[bool] = None depends_on: Optional[List[str]] = None - workflow_id: str = None + workflow_id: Optional[str] = None @root_validator def compute_input_filename(cls, values) -> Dict: @@ -151,7 +151,7 @@ class DescribeJob(BaseModel): package_input_folder: Optional[bool] = None packaged_files: Optional[List[str]] = [] depends_on: Optional[List[str]] = None - workflow_id: str = None + workflow_id: Optional[str] = None class Config: orm_mode = True From 5a29d0343b145a41d1942a1ef4a14ebaf610318f Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 23 Sep 2024 13:15:07 -0700 Subject: [PATCH 23/43] remove distributed dependency --- jupyter_scheduler/scheduler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index 724e18b07..5ee99de44 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -5,7 +5,6 @@ import fsspec import psutil -from distributed import LocalCluster from jupyter_core.paths import jupyter_data_dir from jupyter_server.transutils import _i18n from jupyter_server.utils import to_os_path From cdafb4f2805c60fad1bad5d6231f83649c9e2848 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 23 Sep 2024 13:16:01 -0700 Subject: [PATCH 24/43] don't require workflow_id in the task creation body, only as a part of URL --- jupyter_scheduler/workflows.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index ec45b00e0..ba4515871 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -64,13 +64,8 @@ class WorkflowsTasksHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler) @authenticated async def post(self, workflow_id: str): payload = self.get_json_body() - if workflow_id != payload.get("workflow_id"): - raise HTTPError( - 400, - "Error during workflow job creation. workflow_id in the URL and payload don't match.", - ) try: - job_id = await ensure_async( + task_id = await ensure_async( self.scheduler.create_workflow_task( workflow_id=workflow_id, model=CreateJob(**payload) ) @@ -93,16 +88,11 @@ async def post(self, workflow_id: str): 500, "Unexpected error occurred during creation of workflow job." ) from e else: - self.finish(json.dumps(dict(job_id=job_id))) + self.finish(json.dumps(dict(task_id=task_id))) @authenticated - async def patch(self, workflow_id: str, job_id: str): + async def patch(self, _: str, task_id: str): payload = self.get_json_body() - if workflow_id != payload.get("workflow_id", None): - raise HTTPError( - 400, - "Error during workflow job creation. workflow_id in the URL and payload don't match.", - ) status = payload.get("status") status = Status(status) if status else None @@ -113,9 +103,9 @@ async def patch(self, workflow_id: str, job_id: str): ) try: if status: - await ensure_async(self.scheduler.stop_job(job_id)) + await ensure_async(self.scheduler.stop_job(task_id)) else: - await ensure_async(self.scheduler.update_job(job_id, UpdateJob(**payload))) + await ensure_async(self.scheduler.update_job(task_id, UpdateJob(**payload))) except ValidationError as e: self.log.exception(e) raise HTTPError(500, str(e)) from e From 650a5661f22c54843f1344ddf6f8ca2b3196f35b Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 23 Sep 2024 13:40:32 -0700 Subject: [PATCH 25/43] add status to the workflow models --- jupyter_scheduler/orm.py | 1 + jupyter_scheduler/workflows.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index b37961406..4c3ae0e01 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -115,6 +115,7 @@ class Workflow(Base): workflow_id = Column(String(36), primary_key=True, default=generate_uuid) tasks = Column(JsonType) status = Column(String(64), default=Status.CREATED) + active = Column(Boolean, default=False) # All new columns added to this table must be nullable to ensure compatibility during database migrations. # Any default values specified for new columns will be ignored during the migration process. diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index ba4515871..7656936a5 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -156,6 +156,7 @@ class DescribeWorkflow(BaseModel): workflow_id: str tasks: List[str] = None status: Status = Status.CREATED + active: Optional[bool] = None class Config: orm_mode = True @@ -164,6 +165,7 @@ class Config: class UpdateWorkflow(BaseModel): tasks: Optional[List[str]] = None status: Optional[Status] = None + active: Optional[bool] = None class Config: orm_mode = True From 6466c48da4c487e9a2385f363cb2bfa8eb736119 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 23 Sep 2024 13:54:00 -0700 Subject: [PATCH 26/43] add workflow definitions --- jupyter_scheduler/extension.py | 15 +++++ jupyter_scheduler/orm.py | 13 ++++ jupyter_scheduler/workflows.py | 105 +++++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 086fc34a0..57d934ada 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -7,6 +7,8 @@ from jupyter_scheduler.orm import create_tables from jupyter_scheduler.workflows import ( + WorkflowDefinitionsHandler, + WorkflowDefinitionsTasksHandler, WorkflowsHandler, WorkflowsRunHandler, WorkflowsTasksHandler, @@ -25,6 +27,7 @@ JOB_DEFINITION_ID_REGEX = r"(?P\w+(?:-\w+)+)" JOB_ID_REGEX = r"(?P\w+(?:-\w+)+)" +WORKFLOW_DEFINITION_ID_REGEX = r"(?P\w+(?:-\w+)+)" WORKFLOW_ID_REGEX = r"(?P\w+(?:-\w+)+)" @@ -51,6 +54,18 @@ class SchedulerApp(ExtensionApp): rf"scheduler/worklows/{WORKFLOW_ID_REGEX}/tasks", WorkflowsTasksHandler, ), + ( + rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}", + WorkflowDefinitionsHandler, + ), + ( + rf"scheduler/worklows/{WORKFLOW_DEFINITION_ID_REGEX}/run", + WorkflowDefinitionsHandler, + ), + ( + rf"scheduler/worklows/{WORKFLOW_ID_REGEX}/tasks", + WorkflowDefinitionsTasksHandler, + ), ] drop_tables = Bool(False, config=True, help="Drop the database tables before starting.") diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index 4c3ae0e01..3d14dede6 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -120,6 +120,19 @@ class Workflow(Base): # Any default values specified for new columns will be ignored during the migration process. +class WorkflowDefinition(Base): + __tablename__ = "workflow_definitions" + __table_args__ = {"extend_existing": True} + workflow_id = Column(String(36), primary_key=True, default=generate_uuid) + tasks = Column(JsonType) + status = Column(String(64), default=Status.CREATED) + active = Column(Boolean, default=False) + schedule = Column(String(256)) + timezone = Column(String(36)) + # All new columns added to this table must be nullable to ensure compatibility during database migrations. + # Any default values specified for new columns will be ignored during the migration process. + + class JobDefinition(CommonColumns, Base): __tablename__ = "job_definitions" __table_args__ = {"extend_existing": True} diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 7656936a5..bc599ac36 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -148,6 +148,79 @@ async def post(self, workflow_id: str): self.finish(json.dumps(dict(workflow_id=workflow_id))) +class WorkflowDefinitionsHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): + @authenticated + async def post(self): + payload = self.get_json_body() or {} + try: + workflow_id = await ensure_async( + self.scheduler.create_workflow(CreateWorkflow(**payload)) + ) + except ValidationError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except InputUriError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except IdempotencyTokenError as e: + self.log.exception(e) + raise HTTPError(409, str(e)) from e + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError(500, "Unexpected error occurred during creation of a workflow.") from e + else: + self.finish(json.dumps(dict(workflow_id=workflow_id))) + + @authenticated + async def get(self, workflow_id: str = None): + if not workflow_id: + raise HTTPError(400, "Missing workflow_id in the request URL.") + try: + workflow = await ensure_async(self.scheduler.get_workflow(workflow_id)) + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError(500, "Unexpected error occurred while getting workflow details.") from e + else: + self.finish(workflow.json()) + + +class WorkflowDefinitionsTasksHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): + @authenticated + async def post(self, workflow_id: str): + payload = self.get_json_body() + try: + task_id = await ensure_async( + self.scheduler.create_workflow_task( + workflow_id=workflow_id, model=CreateJob(**payload) + ) + ) + except ValidationError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except InputUriError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except IdempotencyTokenError as e: + self.log.exception(e) + raise HTTPError(409, str(e)) from e + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, "Unexpected error occurred during creation of workflow job." + ) from e + else: + self.finish(json.dumps(dict(task_id=task_id))) + + class CreateWorkflow(BaseModel): tasks: List[str] = [] @@ -169,3 +242,35 @@ class UpdateWorkflow(BaseModel): class Config: orm_mode = True + + +class CreateWorkflowDefinition(BaseModel): + tasks: List[str] = [] + # any field added to CreateWorkflow should also be added to this model as well + schedule: Optional[str] = None + timezone: Optional[str] = None + + class Config: + orm_mode = True + + +class DescribeWorkflowDefinition(BaseModel): + workflow_definition_id: str + tasks: List[str] = None + schedule: Optional[str] = None + timezone: Optional[str] = None + status: Status = Status.CREATED + active: Optional[bool] = None + + class Config: + orm_mode = True + + +class UpdateWorkflowDefinition(BaseModel): + tasks: Optional[List[str]] = None + schedule: Optional[str] = None + timezone: Optional[str] = None + active: Optional[bool] = None + + class Config: + orm_mode = True From 99947670431ea6514bcdedf02479e17248baeab6 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 7 Oct 2024 14:07:22 -0700 Subject: [PATCH 27/43] add workflow execution handlers and endpoints --- jupyter_scheduler/executors.py | 34 ++++++++++- jupyter_scheduler/extension.py | 8 ++- jupyter_scheduler/orm.py | 2 +- jupyter_scheduler/scheduler.py | 87 ++++++++++++++++++++++++++-- jupyter_scheduler/workflows.py | 102 +++++++++++++++++++++++++++------ 5 files changed, 206 insertions(+), 27 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index 7be02242b..159ce4cce 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -16,11 +16,11 @@ from prefect.futures import as_completed from jupyter_scheduler.models import CreateJob, DescribeJob, JobFeature, Status -from jupyter_scheduler.orm import Job, Workflow, create_session +from jupyter_scheduler.orm import Job, Workflow, WorkflowDefinition, create_session from jupyter_scheduler.parameterize import add_parameters from jupyter_scheduler.scheduler import Scheduler from jupyter_scheduler.utils import get_utc_timestamp -from jupyter_scheduler.workflows import DescribeWorkflow +from jupyter_scheduler.workflows import DescribeWorkflow, DescribeWorkflowDefinition class ExecutionManager(ABC): @@ -40,11 +40,13 @@ def __init__( db_url: str, job_id: str = None, workflow_id: str = None, + workflow_definition_id: str = None, root_dir: str = None, staging_paths: Dict[str, str] = None, ): self.job_id = job_id self.workflow_id = workflow_id + self.workflow_definition_id = workflow_definition_id self.staging_paths = staging_paths self.root_dir = root_dir self.db_url = db_url @@ -58,6 +60,17 @@ def model(self): ) self._model = DescribeWorkflow.from_orm(workflow) return self._model + if self.workflow_definition_id: + with self.db_session() as session: + workflow_definition = ( + session.query(WorkflowDefinition) + .filter( + WorkflowDefinition.workflow_definition_id == self.workflow_definition_id + ) + .first() + ) + self._model = DescribeWorkflowDefinition.from_orm(workflow_definition) + return self._model if self._model is None: with self.db_session() as session: job = session.query(Job).filter(Job.job_id == self.job_id).first() @@ -187,6 +200,23 @@ def on_complete_workflow(self): class DefaultExecutionManager(ExecutionManager): """Default execution manager that executes notebooks""" + def activate_workflow_definition(self): + workflow_definition = self.model + with self.db_session() as session: + session.query(WorkflowDefinition).filter( + WorkflowDefinition.workflow_definition_id + == workflow_definition.workflow_definition_id + ).update({"active": True}) + session.commit() + workflow_definition = ( + session.query(WorkflowDefinition) + .filter( + WorkflowDefinition.workflow_definition_id + == workflow_definition.workflow_definition_id + ) + .first() + ) + @task(name="Execute workflow task") def execute_task(self, job: Job): with self.db_session() as session: diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 57d934ada..4aea3430b 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -7,6 +7,7 @@ from jupyter_scheduler.orm import create_tables from jupyter_scheduler.workflows import ( + WorkflowDefinitionsActivationHandler, WorkflowDefinitionsHandler, WorkflowDefinitionsTasksHandler, WorkflowsHandler, @@ -54,16 +55,17 @@ class SchedulerApp(ExtensionApp): rf"scheduler/worklows/{WORKFLOW_ID_REGEX}/tasks", WorkflowsTasksHandler, ), + (r"scheduler/worklow_definitions", WorkflowDefinitionsHandler), ( rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}", WorkflowDefinitionsHandler, ), ( - rf"scheduler/worklows/{WORKFLOW_DEFINITION_ID_REGEX}/run", - WorkflowDefinitionsHandler, + rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}/activate", + WorkflowDefinitionsActivationHandler, ), ( - rf"scheduler/worklows/{WORKFLOW_ID_REGEX}/tasks", + rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}/tasks", WorkflowDefinitionsTasksHandler, ), ] diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index 3d14dede6..08bfa4e7d 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -123,7 +123,7 @@ class Workflow(Base): class WorkflowDefinition(Base): __tablename__ = "workflow_definitions" __table_args__ = {"extend_existing": True} - workflow_id = Column(String(36), primary_key=True, default=generate_uuid) + workflow_definition_id = Column(String(36), primary_key=True, default=generate_uuid) tasks = Column(JsonType) status = Column(String(64), default=Status.CREATED) active = Column(Boolean, default=False) diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index 5ee99de44..e8dc1c150 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -37,13 +37,20 @@ UpdateJob, UpdateJobDefinition, ) -from jupyter_scheduler.orm import Job, JobDefinition, Workflow, create_session +from jupyter_scheduler.orm import Job, JobDefinition, Workflow, WorkflowDefinition, create_session from jupyter_scheduler.utils import ( copy_directory, create_output_directory, create_output_filename, ) -from jupyter_scheduler.workflows import CreateWorkflow, DescribeWorkflow, UpdateWorkflow +from jupyter_scheduler.workflows import ( + CreateWorkflow, + CreateWorkflowDefinition, + DescribeWorkflow, + DescribeWorkflowDefinition, + UpdateWorkflow, + UpdateWorkflowDefinition, +) class BaseScheduler(LoggingConfigurable): @@ -117,6 +124,10 @@ def run_workflow(self, workflow_id: str) -> str: """Triggers execution of the workflow.""" raise NotImplementedError("must be implemented by subclass") + def activate_workflow_definition(self, workflow_definition_id: str) -> str: + """Activates workflow marking it as ready for execution.""" + raise NotImplementedError("must be implemented by subclass") + def get_workflow(self, workflow_id: str) -> DescribeWorkflow: """Returns workflow record for a single workflow.""" raise NotImplementedError("must be implemented by subclass") @@ -125,6 +136,12 @@ def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str: """Adds a task to a workflow.""" raise NotImplementedError("must be implemented by subclass") + def create_workflow_definition_task( + self, workflow_definition_id: str, model: CreateJobDefinition + ) -> str: + """Adds a task to a workflow definition.""" + raise NotImplementedError("must be implemented by subclass") + def update_job(self, job_id: str, model: UpdateJob): """Updates job metadata in the persistence store, for example name, status etc. In case of status @@ -176,6 +193,13 @@ def create_job_definition(self, model: CreateJobDefinition) -> str: """ raise NotImplementedError("must be implemented by subclass") + def create_workflow_definition(self, model: CreateWorkflowDefinition) -> str: + """Creates a new workflow definition record, + consider this as the template for creating + recurring/scheduled workflows. + """ + raise NotImplementedError("must be implemented by subclass") + def update_job_definition(self, job_definition_id: str, model: UpdateJobDefinition): """Updates job definition metadata in the persistence store, should only impact all future jobs. @@ -192,6 +216,10 @@ def get_job_definition(self, job_definition_id: str) -> DescribeJobDefinition: """Returns job definition record for a single job definition""" raise NotImplementedError("must be implemented by subclass") + def get_workflow_definition(self, workflow_definition_id: str) -> DescribeWorkflowDefinition: + """Returns workflow definition record for a single workflow definition""" + raise NotImplementedError("must be implemented by subclass") + def list_job_definitions(self, query: ListJobDefinitionsQuery) -> ListJobDefinitionsResponse: """Returns list of all job definitions filtered by query""" raise NotImplementedError("must be implemented by subclass") @@ -524,6 +552,13 @@ def create_workflow(self, model: CreateWorkflow) -> str: session.commit() return workflow.workflow_id + def create_workflow_definition(self, model: CreateWorkflowDefinition) -> str: + with self.db_session() as session: + workflow_definition = WorkflowDefinition(**model.dict(exclude_none=True)) + session.add(workflow_definition) + session.commit() + return workflow_definition.workflow_definition_id + def run_workflow(self, workflow_id: str) -> str: execution_manager = self.execution_manager_class( workflow_id=workflow_id, @@ -533,6 +568,15 @@ def run_workflow(self, workflow_id: str) -> str: execution_manager.process_workflow() return workflow_id + def activate_workflow_definition(self, workflow_definition_id: str) -> str: + execution_manager = self.execution_manager_class( + workflow_definition_id=workflow_definition_id, + root_dir=self.root_dir, + db_url=self.db_url, + ) + execution_manager.activate_workflow_definition() + return workflow_definition_id + def get_workflow(self, workflow_id: str) -> DescribeWorkflow: with self.db_session() as session: workflow_record = ( @@ -541,6 +585,16 @@ def get_workflow(self, workflow_id: str) -> DescribeWorkflow: model = DescribeWorkflow.from_orm(workflow_record) return model + def get_workflow_definition(self, workflow_definition_id: str) -> DescribeWorkflowDefinition: + with self.db_session() as session: + workflow_definition_record = ( + session.query(WorkflowDefinition) + .filter(WorkflowDefinition.workflow_definition_id == workflow_definition_id) + .one() + ) + model = DescribeWorkflowDefinition.from_orm(workflow_definition_record) + return model + def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str: job_id = self.create_job(model, run=False) workflow: DescribeWorkflow = self.get_workflow(workflow_id) @@ -549,6 +603,20 @@ def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str: self.update_workflow(workflow_id, UpdateWorkflow(tasks=updated_tasks)) return job_id + def create_workflow_definition_task( + self, workflow_definition_id: str, model: CreateJobDefinition + ) -> str: + job_definition_id = self.create_job_definition(model, add_to_task_runner=False) + workflow_definition: DescribeWorkflowDefinition = self.get_workflow_definition( + workflow_definition_id + ) + updated_tasks = (workflow_definition.tasks or [])[:] + updated_tasks.append(job_definition_id) + self.update_workflow_definition( + workflow_definition_id, UpdateWorkflowDefinition(tasks=updated_tasks) + ) + return job_definition_id + def update_workflow(self, workflow_id: str, model: UpdateWorkflow): with self.db_session() as session: session.query(Workflow).filter(Workflow.workflow_id == workflow_id).update( @@ -556,6 +624,15 @@ def update_workflow(self, workflow_id: str, model: UpdateWorkflow): ) session.commit() + def update_workflow_definition( + self, workflow_definition_id: str, model: UpdateWorkflowDefinition + ): + with self.db_session() as session: + session.query(WorkflowDefinition).filter( + WorkflowDefinition.workflow_definition_id == workflow_definition_id + ).update(model.dict(exclude_none=True)) + session.commit() + def update_job(self, job_id: str, model: UpdateJob): with self.db_session() as session: session.query(Job).filter(Job.job_id == job_id).update(model.dict(exclude_none=True)) @@ -657,7 +734,9 @@ def stop_job(self, job_id): session.commit() break - def create_job_definition(self, model: CreateJobDefinition) -> str: + def create_job_definition( + self, model: CreateJobDefinition, add_to_task_runner: bool = True + ) -> str: with self.db_session() as session: if not self.file_exists(model.input_uri): raise InputUriError(model.input_uri) @@ -681,7 +760,7 @@ def create_job_definition(self, model: CreateJobDefinition) -> str: else: self.copy_input_file(model.input_uri, staging_paths["input"]) - if self.task_runner and job_definition_schedule: + if add_to_task_runner and self.task_runner and job_definition_schedule: self.task_runner.add_job_definition(job_definition_id) return job_definition_id diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index bc599ac36..5f9774131 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -14,7 +14,13 @@ ExtensionHandlerMixin, JobHandlersMixin, ) -from jupyter_scheduler.models import CreateJob, Status, UpdateJob +from jupyter_scheduler.models import ( + CreateJob, + CreateJobDefinition, + Status, + UpdateJob, + UpdateJobDefinition, +) from jupyter_scheduler.pydantic_v1 import BaseModel, ValidationError @@ -99,7 +105,7 @@ async def patch(self, _: str, task_id: str): if status and status != Status.STOPPED: raise HTTPError( 500, - "Invalid value for field 'status'. Workflow job status can only be updated to status 'STOPPED' after creation.", + "Invalid value for field 'status'. Workflow task status can only be updated to status 'STOPPED' after creation.", ) try: if status: @@ -153,8 +159,8 @@ class WorkflowDefinitionsHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHan async def post(self): payload = self.get_json_body() or {} try: - workflow_id = await ensure_async( - self.scheduler.create_workflow(CreateWorkflow(**payload)) + workflow_definition_id = await ensure_async( + self.scheduler.create_workflow_definition(CreateWorkflowDefinition(**payload)) ) except ValidationError as e: self.log.exception(e) @@ -170,34 +176,41 @@ async def post(self): raise HTTPError(500, str(e)) from e except Exception as e: self.log.exception(e) - raise HTTPError(500, "Unexpected error occurred during creation of a workflow.") from e + raise HTTPError( + 500, "Unexpected error occurred during creation of a workflow definition." + ) from e else: - self.finish(json.dumps(dict(workflow_id=workflow_id))) + self.finish(json.dumps(dict(workflow_definition_id=workflow_definition_id))) @authenticated - async def get(self, workflow_id: str = None): - if not workflow_id: + async def get(self, workflow_definition_id: str = None): + if not workflow_definition_id: raise HTTPError(400, "Missing workflow_id in the request URL.") try: - workflow = await ensure_async(self.scheduler.get_workflow(workflow_id)) + workflow_definition = await ensure_async( + self.scheduler.get_workflow_definition(workflow_definition_id) + ) except SchedulerError as e: self.log.exception(e) raise HTTPError(500, str(e)) from e except Exception as e: self.log.exception(e) - raise HTTPError(500, "Unexpected error occurred while getting workflow details.") from e + raise HTTPError( + 500, "Unexpected error occurred while getting workflow definition details." + ) from e else: - self.finish(workflow.json()) + self.finish(workflow_definition.json()) class WorkflowDefinitionsTasksHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): @authenticated - async def post(self, workflow_id: str): + async def post(self, workflow_definition_id: str): payload = self.get_json_body() try: - task_id = await ensure_async( - self.scheduler.create_workflow_task( - workflow_id=workflow_id, model=CreateJob(**payload) + task_defintion_id = await ensure_async( + self.scheduler.create_workflow_definition_task( + workflow_definition_id=workflow_definition_id, + model=CreateJobDefinition(**payload), ) ) except ValidationError as e: @@ -215,10 +228,65 @@ async def post(self, workflow_id: str): except Exception as e: self.log.exception(e) raise HTTPError( - 500, "Unexpected error occurred during creation of workflow job." + 500, "Unexpected error occurred during creation of workflow definition task." ) from e else: - self.finish(json.dumps(dict(task_id=task_id))) + self.finish(json.dumps(dict(task_defintion_id=task_defintion_id))) + + @authenticated + async def patch(self, _: str, task_definition_id: str): + payload = self.get_json_body() + status = payload.get("status") + status = Status(status) if status else None + + try: + await ensure_async( + self.scheduler.update_job_definition( + task_definition_id, UpdateJobDefinition(**payload) + ) + ) + except ValidationError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, "Unexpected error occurred while updating the workflow definition task." + ) from e + else: + self.set_status(204) + self.finish() + + +class WorkflowDefinitionsActivationHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): + @authenticated + async def post(self, workflow_definition_id: str): + try: + workflow_definition_id = await ensure_async( + self.scheduler.activate_workflow_definition(workflow_definition_id) + ) + except ValidationError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except InputUriError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except IdempotencyTokenError as e: + self.log.exception(e) + raise HTTPError(409, str(e)) from e + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, "Unexpected error occurred during attempt to run a workflow." + ) from e + else: + self.finish(json.dumps(dict(workflow_definition_id=workflow_definition_id))) class CreateWorkflow(BaseModel): From be6a01f92539a28625c953a4c546a66eed6d2d87 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 21:07:43 +0000 Subject: [PATCH 28/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyter_scheduler/scheduler.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index e8dc1c150..9dd9b1e38 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -37,7 +37,13 @@ UpdateJob, UpdateJobDefinition, ) -from jupyter_scheduler.orm import Job, JobDefinition, Workflow, WorkflowDefinition, create_session +from jupyter_scheduler.orm import ( + Job, + JobDefinition, + Workflow, + WorkflowDefinition, + create_session, +) from jupyter_scheduler.utils import ( copy_directory, create_output_directory, From 417f392d98478255ff6a0828f10ddd90874996c3 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Wed, 9 Oct 2024 11:03:28 -0700 Subject: [PATCH 29/43] serve workflow definition on schedule --- jupyter_scheduler/executors.py | 54 +++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index 159ce4cce..3161d595f 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -1,6 +1,7 @@ import io import multiprocessing as mp import os +from pathlib import Path import shutil import tarfile import traceback @@ -20,7 +21,7 @@ from jupyter_scheduler.parameterize import add_parameters from jupyter_scheduler.scheduler import Scheduler from jupyter_scheduler.utils import get_utc_timestamp -from jupyter_scheduler.workflows import DescribeWorkflow, DescribeWorkflowDefinition +from jupyter_scheduler.workflows import CreateWorkflow, DescribeWorkflow, DescribeWorkflowDefinition class ExecutionManager(ABC): @@ -197,25 +198,56 @@ def on_complete_workflow(self): session.commit() +@flow(name="Create and run a new workflow`") +def create_and_run_workflow(tasks: List[str], root_dir, db_url): + db_session = create_session(db_url) + with db_session() as session: + workflow = Workflow(tasks=tasks) + session.add(workflow) + session.commit() + workflow_id = workflow.workflow_id + execution_manager = DefaultExecutionManager( + workflow_id=workflow_id, + root_dir=root_dir, + db_url=db_url, + ) + execution_manager.process_workflow() + + class DefaultExecutionManager(ExecutionManager): """Default execution manager that executes notebooks""" def activate_workflow_definition(self): - workflow_definition = self.model + describe_workflow_definition: DescribeWorkflowDefinition = self.model with self.db_session() as session: session.query(WorkflowDefinition).filter( WorkflowDefinition.workflow_definition_id - == workflow_definition.workflow_definition_id + == describe_workflow_definition.workflow_definition_id ).update({"active": True}) session.commit() - workflow_definition = ( - session.query(WorkflowDefinition) - .filter( - WorkflowDefinition.workflow_definition_id - == workflow_definition.workflow_definition_id - ) - .first() - ) + self.serve_workflow_definition() + + @flow + def serve_workflow_definition(self): + describe_workflow_definition: DescribeWorkflowDefinition = self.model + attributes = describe_workflow_definition.dict( + exclude={"schedule", "timezone"}, exclude_none=True + ) + create_workflow = CreateWorkflow(**attributes) + flow_path = Path( + "/Users/aieroshe/Documents/jupyter-scheduler/jupyter_scheduler/executors.py" + ) + create_and_run_workflow.from_source( + source=str(flow_path.parent), + entrypoint="executors.py:create_and_run_workflow", + ).serve( + cron=self.model.schedule, + parameters={ + "model": create_workflow, + "root_dir": self.root_dir, + "db_url": self.db_url, + }, + ) @task(name="Execute workflow task") def execute_task(self, job: Job): From 5a2621bcb5e9c4c65e81bd68f066d19ab24abca5 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 14 Oct 2024 06:48:46 -0700 Subject: [PATCH 30/43] fix parameters passed to create_and_run_workflow when served with schedule --- jupyter_scheduler/executors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index 3161d595f..9b5656e13 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -243,7 +243,7 @@ def serve_workflow_definition(self): ).serve( cron=self.model.schedule, parameters={ - "model": create_workflow, + "tasks": create_workflow.tasks, "root_dir": self.root_dir, "db_url": self.db_url, }, From 87306a8d29cc668237ae95993d50d4e0195b49e4 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 14 Oct 2024 13:45:47 -0700 Subject: [PATCH 31/43] change worklow_definitions/{id}/activate endpoint to worklow_definitions/{id}/deploy --- jupyter_scheduler/extension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 4aea3430b..a09a5c725 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -61,7 +61,7 @@ class SchedulerApp(ExtensionApp): WorkflowDefinitionsHandler, ), ( - rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}/activate", + rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}/deploy", WorkflowDefinitionsActivationHandler, ), ( From e35607fdab96a8c1e7d40a7c715091de6c26a61d Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 14 Oct 2024 13:46:11 -0700 Subject: [PATCH 32/43] add name, parameters fields to Workflow and WorkflowDefinition --- jupyter_scheduler/orm.py | 4 ++++ jupyter_scheduler/workflows.py | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index 08bfa4e7d..f3cadb748 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -116,6 +116,8 @@ class Workflow(Base): tasks = Column(JsonType) status = Column(String(64), default=Status.CREATED) active = Column(Boolean, default=False) + name = Column(String(256)) + parameters = Column(JsonType(1024)) # All new columns added to this table must be nullable to ensure compatibility during database migrations. # Any default values specified for new columns will be ignored during the migration process. @@ -129,6 +131,8 @@ class WorkflowDefinition(Base): active = Column(Boolean, default=False) schedule = Column(String(256)) timezone = Column(String(36)) + name = Column(String(256)) + parameters = Column(JsonType(1024)) # All new columns added to this table must be nullable to ensure compatibility during database migrations. # Any default values specified for new columns will be ignored during the migration process. diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 5f9774131..bfe0f155d 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -1,5 +1,5 @@ import json -from typing import List, Optional +from typing import Dict, List, Optional from jupyter_server.utils import ensure_async from tornado.web import HTTPError, authenticated @@ -291,9 +291,13 @@ async def post(self, workflow_definition_id: str): class CreateWorkflow(BaseModel): tasks: List[str] = [] + name: str + parameters: Optional[Dict[str, str]] = None class DescribeWorkflow(BaseModel): + name: str + parameters: Optional[Dict[str, str]] = None workflow_id: str tasks: List[str] = None status: Status = Status.CREATED @@ -304,6 +308,8 @@ class Config: class UpdateWorkflow(BaseModel): + name: str + parameters: Optional[Dict[str, str]] = None tasks: Optional[List[str]] = None status: Optional[Status] = None active: Optional[bool] = None @@ -315,6 +321,8 @@ class Config: class CreateWorkflowDefinition(BaseModel): tasks: List[str] = [] # any field added to CreateWorkflow should also be added to this model as well + name: str + parameters: Optional[Dict[str, str]] = None schedule: Optional[str] = None timezone: Optional[str] = None @@ -323,6 +331,8 @@ class Config: class DescribeWorkflowDefinition(BaseModel): + name: str + parameters: Optional[Dict[str, str]] = None workflow_definition_id: str tasks: List[str] = None schedule: Optional[str] = None @@ -335,6 +345,8 @@ class Config: class UpdateWorkflowDefinition(BaseModel): + name: str + parameters: Optional[Dict[str, str]] = None tasks: Optional[List[str]] = None schedule: Optional[str] = None timezone: Optional[str] = None From 78e0818dce060fe89d94f738f5ba57ef9bb5edee Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:46:28 +0000 Subject: [PATCH 33/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyter_scheduler/executors.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index 9b5656e13..b9c745737 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -1,12 +1,12 @@ import io import multiprocessing as mp import os -from pathlib import Path import shutil import tarfile import traceback from abc import ABC, abstractmethod from functools import lru_cache +from pathlib import Path from typing import Dict, List import fsspec @@ -21,7 +21,11 @@ from jupyter_scheduler.parameterize import add_parameters from jupyter_scheduler.scheduler import Scheduler from jupyter_scheduler.utils import get_utc_timestamp -from jupyter_scheduler.workflows import CreateWorkflow, DescribeWorkflow, DescribeWorkflowDefinition +from jupyter_scheduler.workflows import ( + CreateWorkflow, + DescribeWorkflow, + DescribeWorkflowDefinition, +) class ExecutionManager(ABC): From e9d22e1443a0fdeb5c1437326379beaf86bdcf20 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Fri, 18 Oct 2024 10:37:08 -0700 Subject: [PATCH 34/43] replace prefect with dask --- jupyter_scheduler/executors.py | 57 +++++++++++--------------- jupyter_scheduler/job_files_manager.py | 4 +- jupyter_scheduler/workflows.py | 4 +- 3 files changed, 28 insertions(+), 37 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index b9c745737..cc1003423 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -9,12 +9,11 @@ from pathlib import Path from typing import Dict, List +import dask import fsspec import nbconvert import nbformat from nbconvert.preprocessors import CellExecutionError, ExecutePreprocessor -from prefect import flow, task -from prefect.futures import as_completed from jupyter_scheduler.models import CreateJob, DescribeJob, JobFeature, Status from jupyter_scheduler.orm import Job, Workflow, WorkflowDefinition, create_session @@ -202,7 +201,7 @@ def on_complete_workflow(self): session.commit() -@flow(name="Create and run a new workflow`") +@dask.delayed(name="Create and run a new workflow`") def create_and_run_workflow(tasks: List[str], root_dir, db_url): db_session = create_session(db_url) with db_session() as session: @@ -231,30 +230,30 @@ def activate_workflow_definition(self): session.commit() self.serve_workflow_definition() - @flow + @dask.delayed(name="Serve workflow definition") def serve_workflow_definition(self): describe_workflow_definition: DescribeWorkflowDefinition = self.model attributes = describe_workflow_definition.dict( exclude={"schedule", "timezone"}, exclude_none=True ) create_workflow = CreateWorkflow(**attributes) - flow_path = Path( - "/Users/aieroshe/Documents/jupyter-scheduler/jupyter_scheduler/executors.py" - ) - create_and_run_workflow.from_source( - source=str(flow_path.parent), - entrypoint="executors.py:create_and_run_workflow", - ).serve( - cron=self.model.schedule, - parameters={ - "tasks": create_workflow.tasks, - "root_dir": self.root_dir, - "db_url": self.db_url, - }, - ) - - @task(name="Execute workflow task") - def execute_task(self, job: Job): + # flow_path = Path( + # "/Users/aieroshe/Documents/jupyter-scheduler/jupyter_scheduler/executors.py" + # ) + # create_and_run_workflow.from_source( + # source=str(flow_path.parent), + # entrypoint="executors.py:create_and_run_workflow", + # ).serve( + # cron=self.model.schedule, + # parameters={ + # "tasks": create_workflow.tasks, + # "root_dir": self.root_dir, + # "db_url": self.db_url, + # }, + # ) + + @dask.delayed(name="Execute workflow task") + def execute_task(self, job: Job, dependencies: List[str]) -> str: with self.db_session() as session: staging_paths = Scheduler.get_staging_paths(DescribeJob.from_orm(job)) @@ -271,14 +270,12 @@ def execute_task(self, job: Job): return job_id - @task(name="Get workflow task records") def get_tasks_records(self, task_ids: List[str]) -> List[Job]: with self.db_session() as session: tasks = session.query(Job).filter(Job.job_id.in_(task_ids)).all() return tasks - @flow(name="Execute workflow", flow_run_name="Execute workflow run") def execute_workflow(self): tasks_info: List[Job] = self.get_tasks_records(self.model.tasks) tasks = {task.job_id: task for task in tasks_info} @@ -287,17 +284,11 @@ def execute_workflow(self): def make_task(task_id): """Create a delayed object for the given task recursively creating delayed objects for all tasks it depends on""" deps = tasks[task_id].depends_on or [] - name = tasks[task_id].name - job_id = tasks[task_id].job_id - return self.execute_task.submit( - tasks[task_id], wait_for=[make_task(dep_id) for dep_id in deps] - ) + return self.execute_task(tasks[task_id], [make_task(dep_id) for dep_id in deps]) final_tasks = [make_task(task_id) for task_id in tasks] - for future in as_completed(final_tasks): - future.result() + dask.compute(*final_tasks) - @flow(name="Execute job", flow_run_name="Execute job run") def execute(self): job = self.model @@ -320,7 +311,7 @@ def execute(self): self.add_side_effects_files(staging_dir) self.create_output_files(job, nb) - @task(name="Check for and add side effect files") + @dask.delayed(name="Check for and add side effect files") def add_side_effects_files(self, staging_dir: str): """Scan for side effect files potentially created after input file execution and update the job's packaged_files with these files""" input_notebook = os.path.relpath(self.staging_paths["input"]) @@ -343,7 +334,7 @@ def add_side_effects_files(self, staging_dir: str): ) session.commit() - @task(name="Create output files") + @dask.delayed(name="Create output files") def create_output_files(self, job: DescribeJob, notebook_node): for output_format in job.output_formats: cls = nbconvert.get_exporter(output_format) diff --git a/jupyter_scheduler/job_files_manager.py b/jupyter_scheduler/job_files_manager.py index e0774d8a8..748ce3041 100644 --- a/jupyter_scheduler/job_files_manager.py +++ b/jupyter_scheduler/job_files_manager.py @@ -4,9 +4,9 @@ from multiprocessing import Process from typing import Dict, List, Optional, Type +import dask import fsspec from jupyter_server.utils import ensure_async -from prefect import task from jupyter_scheduler.exceptions import SchedulerError from jupyter_scheduler.scheduler import BaseScheduler @@ -75,7 +75,7 @@ def download_tar(self, archive_format: str = "tar"): with tarfile.open(fileobj=f, mode=read_mode) as tar: tar.extractall(self.output_dir) - @task(name="Download job files") + @dask.delayed(name="Download job files") def download(self): # ensure presence of staging paths if not self.staging_paths: diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index bfe0f155d..bd6f5f95b 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -308,7 +308,7 @@ class Config: class UpdateWorkflow(BaseModel): - name: str + name: Optional[str] = None parameters: Optional[Dict[str, str]] = None tasks: Optional[List[str]] = None status: Optional[Status] = None @@ -345,7 +345,7 @@ class Config: class UpdateWorkflowDefinition(BaseModel): - name: str + name: Optional[str] = None parameters: Optional[Dict[str, str]] = None tasks: Optional[List[str]] = None schedule: Optional[str] = None From 14f574063cd0a4166c68bd51b3e6b8f1562e075c Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Fri, 18 Oct 2024 13:29:36 -0700 Subject: [PATCH 35/43] add workflow runner --- jupyter_scheduler/workflow_runner.py | 345 +++++++++++++++++++++++++++ 1 file changed, 345 insertions(+) create mode 100644 jupyter_scheduler/workflow_runner.py diff --git a/jupyter_scheduler/workflow_runner.py b/jupyter_scheduler/workflow_runner.py new file mode 100644 index 000000000..daa685ca1 --- /dev/null +++ b/jupyter_scheduler/workflow_runner.py @@ -0,0 +1,345 @@ +import asyncio +from dataclasses import dataclass +from datetime import datetime +from heapq import heappop, heappush +from typing import List, Optional + +import traitlets +from jupyter_server.transutils import _i18n +from sqlalchemy import Boolean, Column, Integer, String, create_engine +from sqlalchemy.orm import sessionmaker +from traitlets.config import LoggingConfigurable + +from jupyter_scheduler.orm import WorkflowDefinition, declarative_base +from jupyter_scheduler.pydantic_v1 import BaseModel +from jupyter_scheduler.utils import ( + compute_next_run_time, + get_localized_timestamp, + get_utc_timestamp, +) +from jupyter_scheduler.workflows import CreateWorkflow, UpdateWorkflowDefinition + +Base = declarative_base() + + +class WorkflowDefinitionCache(Base): + __tablename__ = "workflow_definitions_cache" + workflow_definition_id = Column(String(36), primary_key=True) + next_run_time = Column(Integer) + active = Column(Boolean) + timezone = Column(String(36)) + schedule = Column(String(256)) + + +class DescribeWorkflowDefinitionCache(BaseModel): + workflow_definition_id: str + next_run_time: int + active: bool + timezone: Optional[str] = None + schedule: str + + class Config: + orm_mode = True + + +class UpdateWorkflowDefinitionCache(BaseModel): + next_run_time: Optional[int] = None + active: Optional[bool] = None + timezone: Optional[str] = None + schedule: Optional[str] = None + + +@dataclass +class WorkflowDefinitionTask: + workflow_definition_id: str + next_run_time: int + + def __lt__(self, other): + return self.next_run_time < other.next_run_time + + def __str__(self): + next_run_time = datetime.fromtimestamp(self.next_run_time / 1e3) + return f"Id: {self.workflow_definition_id}, Run-time: {next_run_time}" + + +class PriorityQueue: + """A priority queue using heapq""" + + def __init__(self): + self._heap = [] + + def peek(self): + if self.isempty(): + raise "Queue is empty" + + return self._heap[0] + + def push(self, task: WorkflowDefinitionTask): + heappush(self._heap, task) + + def pop(self): + task = heappop(self._heap) + return task + + def __len__(self): + return len(self._heap) + + def isempty(self): + return len(self._heap) < 1 + + def __str__(self): + tasks = [] + for task in self._heap: + tasks.append(str(task)) + + return "\n".join(tasks) + + +class Cache: + def __init__(self) -> None: + self.cache_url = "sqlite://" + engine = create_engine(self.cache_url, echo=False) + Base.metadata.create_all(engine) + self.session = sessionmaker(bind=engine) + + def load(self, models: List[DescribeWorkflowDefinitionCache]): + with self.session() as session: + for model in models: + session.add(WorkflowDefinitionCache(**model.dict())) + session.commit() + + def get(self, job_definition_id: str) -> DescribeWorkflowDefinitionCache: + with self.session() as session: + definition = ( + session.query(WorkflowDefinitionCache) + .filter(WorkflowDefinitionCache.job_definition_id == job_definition_id) + .first() + ) + + if definition: + return DescribeWorkflowDefinitionCache.from_orm(definition) + else: + return None + + def put(self, model: DescribeWorkflowDefinitionCache): + with self.session() as session: + session.add(WorkflowDefinitionCache(**model.dict())) + session.commit() + + def update(self, job_definition_id: str, model: UpdateWorkflowDefinitionCache): + with self.session() as session: + session.query(WorkflowDefinitionCache).filter( + WorkflowDefinitionCache.job_definition_id == job_definition_id + ).update(model.dict(exclude_none=True)) + session.commit() + + def delete(self, job_definition_id: str): + with self.session() as session: + session.query(WorkflowDefinitionCache).filter( + WorkflowDefinitionCache.job_definition_id == job_definition_id + ).delete() + session.commit() + + +class BaseTaskRunner(LoggingConfigurable): + """Base task runner, this class's start method is called + at the start of jupyter server, and is responsible for + polling for the workflow definitions and creating new workflows + based on the schedule/timezone in the workflow definition. + """ + + def __init__(self, config=None, **kwargs): + super().__init__(config=config) + + poll_interval = traitlets.Integer( + default_value=10, + config=True, + help=_i18n( + "The interval in seconds that the task runner polls for scheduled workflows to run." + ), + ) + + async def start(self): + """Async method that is called by extension at server start""" + raise NotImplementedError("must be implemented by subclass") + + def add_workflow_definition(self, workflow_definition_id: str): + """This should handle adding data for new + workflow definition to the PriorityQueue and Cache.""" + raise NotImplementedError("must be implemented by subclass") + + def update_workflow_definition( + self, workflow_definition_id: str, model: UpdateWorkflowDefinition + ): + """This should handles updates to workflow definitions""" + NotImplementedError("must be implemented by subclass") + + def delete_workflow_definition(self, workflow_definition_id: str): + """Handles deletion of workflow definitions""" + NotImplementedError("must be implemented by subclass") + + def pause_workflows(self, workflow_definition_id: str): + """Handles pausing a workflow definition""" + NotImplementedError("must be implemented by subclass") + + def resume_workflows(self, workflow_definition_id: str): + """Handles resuming of a workflow definition""" + NotImplementedError("must be implemented by subclass") + + +class TaskRunner(BaseTaskRunner): + """Default task runner that maintains a workflow definition cache and a + priority queue, and polls the queue every `poll_interval` seconds + for new jobs to create. + """ + + def __init__(self, scheduler, config=None) -> None: + super().__init__(config=config) + self.scheduler = scheduler + self.db_session = scheduler.db_session + self.cache = Cache() + self.queue = PriorityQueue() + + def compute_next_run_time(self, schedule: str, timezone: Optional[str] = None): + return compute_next_run_time(schedule, timezone) + + def populate_cache(self): + with self.db_session() as session: + definitions = ( + session.query(WorkflowDefinition).filter(WorkflowDefinition.schedule != None).all() + ) + + for definition in definitions: + next_run_time = self.compute_next_run_time(definition.schedule, definition.timezone) + self.cache.put( + DescribeWorkflowDefinitionCache( + workflow_definition_id=definition.workflow_definition_id, + next_run_time=next_run_time, + active=definition.active, + timezone=definition.timezone, + schedule=definition.schedule, + ) + ) + if definition.active: + self.queue.push( + WorkflowDefinitionTask( + job_definition_id=definition.workflow_definition_id, + next_run_time=next_run_time, + ) + ) + + def add_workflow_definition(self, workflow_definition_id: str): + with self.db_session() as session: + definition = ( + session.query(WorkflowDefinition) + .filter(WorkflowDefinition.workflow_definition_id == workflow_definition_id) + .first() + ) + + next_run_time = self.compute_next_run_time(definition.schedule, definition.timezone) + + self.cache.put( + DescribeWorkflowDefinitionCache( + workflow_definition_id=definition.workflow_definition_id, + active=definition.active, + next_run_time=next_run_time, + timezone=definition.timezone, + schedule=definition.schedule, + ) + ) + if definition.active: + self.queue.push( + WorkflowDefinitionTask( + workflow_definition_id=definition.workflow_definition_id, + next_run_time=next_run_time, + ) + ) + + def update_workflow_definition( + self, workflow_definition_id: str, model: UpdateWorkflowDefinition + ): + cache = self.cache.get(workflow_definition_id) + schedule = model.schedule or cache.schedule + timezone = model.timezone or cache.timezone + active = model.active if model.active is not None else cache.active + cached_next_run_time = cache.next_run_time + next_run_time = self.compute_next_run_time(schedule, timezone) + + self.cache.update( + workflow_definition_id, + UpdateWorkflowDefinitionCache( + timezone=timezone, next_run_time=next_run_time, active=active, schedule=schedule + ), + ) + + next_run_time_changed = cached_next_run_time != next_run_time and active + resumed_job = model.active and not cache.active + + if next_run_time_changed or resumed_job: + self.log.debug("Updating queue...") + task = WorkflowDefinitionTask( + job_definition_id=workflow_definition_id, next_run_time=next_run_time + ) + self.queue.push(task) + self.log.debug(f"Updated queue, {task}") + + def delete_workflow_definition(self, workflow_definition_id: str): + self.cache.delete(workflow_definition_id) + + def create_workflow(self, workflow_definition_id: str): + definition = self.scheduler.get_workflow_definition(workflow_definition_id) + if definition and definition.active: + self.scheduler.create_workflow( + CreateWorkflow( + **definition.dict(exclude={"schedule", "timezone"}, exclude_none=True), + ) + ) + + def compute_time_diff(self, queue_run_time: int, timezone: str): + local_time = get_localized_timestamp(timezone) if timezone else get_utc_timestamp() + return local_time - queue_run_time + + def process_queue(self): + self.log.debug(self.queue) + while not self.queue.isempty(): + task = self.queue.peek() + cache = self.cache.get(task.workflow_definition_id) + + if not cache: + self.queue.pop() + continue + + cache_run_time = cache.next_run_time + queue_run_time = task.next_run_time + + if not cache.active or queue_run_time != cache_run_time: + self.queue.pop() + continue + + time_diff = self.compute_time_diff(queue_run_time, cache.timezone) + + # if run time is in future + if time_diff < 0: + break + else: + try: + self.create_workflow(task.workflow_definition_id) + except Exception as e: + self.log.exception(e) + self.queue.pop() + run_time = self.compute_next_run_time(cache.schedule, cache.timezone) + self.cache.update( + task.workflow_definition_id, + UpdateWorkflowDefinitionCache(next_run_time=run_time), + ) + self.queue.push( + WorkflowDefinitionTask( + job_definition_id=task.job_definition_id, next_run_time=run_time + ) + ) + + async def start(self): + self.populate_cache() + while True: + self.process_queue() + await asyncio.sleep(self.poll_interval) From 9321d4115ca62ebefdf0b804ffbe1060eedf657f Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Fri, 18 Oct 2024 13:30:09 -0700 Subject: [PATCH 36/43] use dask instead of prefect --- jupyter_scheduler/executors.py | 39 +++++++++++++++++++---- jupyter_scheduler/extension.py | 23 ++++++++++++++ jupyter_scheduler/job_files_manager.py | 2 +- jupyter_scheduler/scheduler.py | 44 ++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 7 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index cc1003423..e0f126ce4 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -201,7 +201,7 @@ def on_complete_workflow(self): session.commit() -@dask.delayed(name="Create and run a new workflow`") +# @dask.delayed(name="Create and run a new workflow`") def create_and_run_workflow(tasks: List[str], root_dir, db_url): db_session = create_session(db_url) with db_session() as session: @@ -230,7 +230,7 @@ def activate_workflow_definition(self): session.commit() self.serve_workflow_definition() - @dask.delayed(name="Serve workflow definition") + # @dask.delayed(name="Serve workflow definition") def serve_workflow_definition(self): describe_workflow_definition: DescribeWorkflowDefinition = self.model attributes = describe_workflow_definition.dict( @@ -253,8 +253,9 @@ def serve_workflow_definition(self): # ) @dask.delayed(name="Execute workflow task") - def execute_task(self, job: Job, dependencies: List[str]) -> str: + def execute_task(self, job: Job, dependencies: List[str] = []) -> str: with self.db_session() as session: + print(f"executing task {job.job_id} with dependencies {dependencies}") staging_paths = Scheduler.get_staging_paths(DescribeJob.from_orm(job)) execution_manager = DefaultExecutionManager( @@ -265,12 +266,21 @@ def execute_task(self, job: Job, dependencies: List[str]) -> str: ) execution_manager.process() + self.dask_client.submit( + DefaultExecutionManager( + job_id=job.job_id, + root_dir=self.root_dir, + db_url=self.db_url, + ).process + ) + job.pid = 1 # TODO: fix pid hardcode job_id = job.job_id return job_id def get_tasks_records(self, task_ids: List[str]) -> List[Job]: + print("getting task records for task: {task_ids}") with self.db_session() as session: tasks = session.query(Job).filter(Job.job_id.in_(task_ids)).all() @@ -278,15 +288,32 @@ def get_tasks_records(self, task_ids: List[str]) -> List[Job]: def execute_workflow(self): tasks_info: List[Job] = self.get_tasks_records(self.model.tasks) + print(f"tasks_info in execute_workflow: {tasks_info}") tasks = {task.job_id: task for task in tasks_info} + print(f"tasks in execute_workflow: {tasks}") @lru_cache(maxsize=None) def make_task(task_id): """Create a delayed object for the given task recursively creating delayed objects for all tasks it depends on""" + print("making task for") + print(task_id) deps = tasks[task_id].depends_on or [] - return self.execute_task(tasks[task_id], [make_task(dep_id) for dep_id in deps]) + print(deps) + print(f"dependencies in make_task for {task_id}") + print(deps) + + execute_task_result = self.execute_task( + tasks[task_id], [make_task(dep_id) for dep_id in deps] + ) + print("execute task result from make_task") + print(execute_task_result) + + return execute_task_result final_tasks = [make_task(task_id) for task_id in tasks] + print("Final tasks:") + print(final_tasks) + print(f"Calling compute after loops") dask.compute(*final_tasks) def execute(self): @@ -311,7 +338,7 @@ def execute(self): self.add_side_effects_files(staging_dir) self.create_output_files(job, nb) - @dask.delayed(name="Check for and add side effect files") + # @dask.delayed(name="Check for and add side effect files") def add_side_effects_files(self, staging_dir: str): """Scan for side effect files potentially created after input file execution and update the job's packaged_files with these files""" input_notebook = os.path.relpath(self.staging_paths["input"]) @@ -334,7 +361,7 @@ def add_side_effects_files(self, staging_dir: str): ) session.commit() - @dask.delayed(name="Create output files") + # @dask.delayed(name="Create output files") def create_output_files(self, job: DescribeJob, notebook_node): for output_format in job.output_formats: cls = nbconvert.get_exporter(output_format) diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index a09a5c725..7ef2b09f2 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -124,3 +124,26 @@ def initialize_settings(self): if scheduler.task_runner: loop = asyncio.get_event_loop() loop.create_task(scheduler.task_runner.start()) + + async def stop_extension(self): + """ + Public method called by Jupyter Server when the server is stopping. + This calls the cleanup code defined in `self._stop_exception()` inside + an exception handler, as the server halts if this method raises an + exception. + """ + try: + await self._stop_extension() + except Exception as e: + self.log.error("Jupyter Scheduler raised an exception while stopping:") + + self.log.exception(e) + + async def _stop_extension(self): + """ + Private method that defines the cleanup code to run when the server is + stopping. + """ + if "scheduler" in self.settings: + scheduler: SchedulerApp = self.settings["scheduler"] + await scheduler.stop_extension() diff --git a/jupyter_scheduler/job_files_manager.py b/jupyter_scheduler/job_files_manager.py index 748ce3041..77d0e8114 100644 --- a/jupyter_scheduler/job_files_manager.py +++ b/jupyter_scheduler/job_files_manager.py @@ -75,7 +75,7 @@ def download_tar(self, archive_format: str = "tar"): with tarfile.open(fileobj=f, mode=read_mode) as tar: tar.extractall(self.output_dir) - @dask.delayed(name="Download job files") + # @dask.delayed(name="Download job files") def download(self): # ensure presence of staging paths if not self.staging_paths: diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index 9dd9b1e38..bbc546b05 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -5,6 +5,8 @@ import fsspec import psutil +from dask.distributed import Client as DaskClient +from distributed import LocalCluster from jupyter_core.paths import jupyter_data_dir from jupyter_server.transutils import _i18n from jupyter_server.utils import to_os_path @@ -431,6 +433,12 @@ def get_local_output_path( else: return os.path.join(self.root_dir, self.output_directory, output_dir_name) + async def stop_extension(self): + """ + Placeholder method for a cleanup code to run when the server is stopping. + """ + pass + class Scheduler(BaseScheduler): _db_session = None @@ -445,6 +453,12 @@ class Scheduler(BaseScheduler): ), ) + dask_cluster_url = Unicode( + allow_none=True, + config=True, + help="URL of the Dask cluster to connect to.", + ) + db_url = Unicode(help=_i18n("Scheduler database url")) task_runner = Instance(allow_none=True, klass="jupyter_scheduler.task_runner.BaseTaskRunner") @@ -463,6 +477,17 @@ def __init__( self.db_url = db_url if self.task_runner_class: self.task_runner = self.task_runner_class(scheduler=self, config=config) + self.dask_client: DaskClient = self._get_dask_client() + + def _get_dask_client(self): + """Creates and configures a Dask client.""" + if self.dask_cluster_url: + return DaskClient(self.dask_cluster_url) + print("Starting local Dask cluster") + cluster = LocalCluster(processes=True) + client = DaskClient(cluster) + print(client) + return client @property def db_session(self): @@ -566,6 +591,13 @@ def create_workflow_definition(self, model: CreateWorkflowDefinition) -> str: return workflow_definition.workflow_definition_id def run_workflow(self, workflow_id: str) -> str: + # self.dask_client.submit( + # self.execution_manager_class( + # workflow_id=workflow_id, + # root_dir=self.root_dir, + # db_url=self.db_url, + # ).process_workflow + # ) execution_manager = self.execution_manager_class( workflow_id=workflow_id, root_dir=self.root_dir, @@ -603,9 +635,14 @@ def get_workflow_definition(self, workflow_definition_id: str) -> DescribeWorkfl def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str: job_id = self.create_job(model, run=False) + print(f"create_workflow_task job_id: {job_id}") workflow: DescribeWorkflow = self.get_workflow(workflow_id) + print(f"workflow in create_workflow_task: {workflow}") updated_tasks = (workflow.tasks or [])[:] + print(f"updated_tasks before update: {updated_tasks}") updated_tasks.append(job_id) + print(f"updated_tasks after update: {updated_tasks}") + self.update_workflow(workflow_id, UpdateWorkflow(tasks=updated_tasks)) return job_id @@ -937,6 +974,13 @@ def get_staging_paths(model: Union[DescribeJob, DescribeJobDefinition]) -> Dict[ return staging_paths + async def stop_extension(self): + """ + Cleanup code to run when the server is stopping. + """ + if self.dask_client: + await self.dask_client.close() + class ArchivingScheduler(Scheduler): """Scheduler that captures all files in output directory in an archive.""" From 2b25c019a5adcd5ef5de26c281ff82b39f0a5a1d Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Fri, 18 Oct 2024 13:47:13 -0700 Subject: [PATCH 37/43] add trigger_rule enum, field to Jobs and JobDefinitions --- jupyter_scheduler/models.py | 26 ++++++++++++++++++++++++++ jupyter_scheduler/orm.py | 7 ++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/jupyter_scheduler/models.py b/jupyter_scheduler/models.py index c7d2e6bb5..ab3bf1695 100644 --- a/jupyter_scheduler/models.py +++ b/jupyter_scheduler/models.py @@ -70,6 +70,20 @@ def __str__(self): OUTPUT_FILENAME_TEMPLATE = "{{input_filename}}-{{create_time}}" +class TriggerRule(str, Enum): + ALL_SUCCESS = "all_success" + ALL_FAILES = "all_failed" + ALL_DONE = "all_done" + ONE_FAILED = "one_failed" + ONE_SUCCESS = "one_success" + NONE_FAILED = "none_failed" + NONE_SKIPPED = "none_skipped" + DUMMY = "dummy" + + def __str__(self): + return self.value + + class CreateJob(BaseModel): """Defines the model for creating a new job""" @@ -88,6 +102,7 @@ class CreateJob(BaseModel): package_input_folder: Optional[bool] = None depends_on: Optional[List[str]] = None workflow_id: Optional[str] = None + trigger_rule: Optional[TriggerRule] = None @root_validator def compute_input_filename(cls, values) -> Dict: @@ -152,6 +167,7 @@ class DescribeJob(BaseModel): packaged_files: Optional[List[str]] = [] depends_on: Optional[List[str]] = None workflow_id: Optional[str] = None + trigger_rule: Optional[TriggerRule] = None class Config: orm_mode = True @@ -198,6 +214,7 @@ class UpdateJob(BaseModel): name: Optional[str] = None compute_type: Optional[str] = None depends_on: Optional[List[str]] = None + trigger_rule: Optional[TriggerRule] = None class DeleteJob(BaseModel): @@ -218,6 +235,9 @@ class CreateJobDefinition(BaseModel): schedule: Optional[str] = None timezone: Optional[str] = None package_input_folder: Optional[bool] = None + depends_on: Optional[List[str]] = None + workflow_id: Optional[str] = None + trigger_rule: Optional[TriggerRule] = None @root_validator def compute_input_filename(cls, values) -> Dict: @@ -245,6 +265,9 @@ class DescribeJobDefinition(BaseModel): active: bool package_input_folder: Optional[bool] = None packaged_files: Optional[List[str]] = [] + depends_on: Optional[List[str]] = None + workflow_id: Optional[str] = None + trigger_rule: Optional[TriggerRule] = None class Config: orm_mode = True @@ -264,6 +287,9 @@ class UpdateJobDefinition(BaseModel): active: Optional[bool] = None compute_type: Optional[str] = None input_uri: Optional[str] = None + depends_on: Optional[List[str]] = None + workflow_id: Optional[str] = None + trigger_rule: Optional[TriggerRule] = None class ListJobDefinitionsQuery(BaseModel): diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index f3cadb748..0a61358f0 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -7,7 +7,7 @@ from sqlalchemy.orm import declarative_base, declarative_mixin, registry, sessionmaker from sqlalchemy.sql import text -from jupyter_scheduler.models import EmailNotifications, Status +from jupyter_scheduler.models import EmailNotifications, Status, TriggerRule from jupyter_scheduler.utils import get_utc_timestamp Base = declarative_base() @@ -89,6 +89,9 @@ class CommonColumns: # Any default values specified for new columns will be ignored during the migration process. package_input_folder = Column(Boolean) packaged_files = Column(JsonType, default=[]) + depends_on = Column(JsonType) + workflow_id = Column(String(36)) + trigger_rule = Column(String(64)) class Job(CommonColumns, Base): @@ -103,8 +106,6 @@ class Job(CommonColumns, Base): url = Column(String(256), default=generate_jobs_url) pid = Column(Integer) idempotency_token = Column(String(256)) - depends_on = Column(JsonType) - workflow_id = Column(String(36)) # All new columns added to this table must be nullable to ensure compatibility during database migrations. # Any default values specified for new columns will be ignored during the migration process. From c2bcf13954026d01e1e393421ab5c59b56bbd25f Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Tue, 22 Oct 2024 10:46:06 -0700 Subject: [PATCH 38/43] add DRAFT Status enum and set it as a default workflow status --- jupyter_scheduler/models.py | 1 + jupyter_scheduler/orm.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/jupyter_scheduler/models.py b/jupyter_scheduler/models.py index ab3bf1695..5c1e7a958 100644 --- a/jupyter_scheduler/models.py +++ b/jupyter_scheduler/models.py @@ -42,6 +42,7 @@ def __str__(self) -> str: class Status(str, Enum): + DRAFT = "DRAFT" CREATED = "CREATED" QUEUED = "QUEUED" IN_PROGRESS = "IN_PROGRESS" diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index 0a61358f0..4c1e80b26 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -115,7 +115,7 @@ class Workflow(Base): __table_args__ = {"extend_existing": True} workflow_id = Column(String(36), primary_key=True, default=generate_uuid) tasks = Column(JsonType) - status = Column(String(64), default=Status.CREATED) + status = Column(String(64), default=Status.DRAFT) active = Column(Boolean, default=False) name = Column(String(256)) parameters = Column(JsonType(1024)) @@ -128,7 +128,7 @@ class WorkflowDefinition(Base): __table_args__ = {"extend_existing": True} workflow_definition_id = Column(String(36), primary_key=True, default=generate_uuid) tasks = Column(JsonType) - status = Column(String(64), default=Status.CREATED) + status = Column(String(64), default=Status.DRAFT) active = Column(Boolean, default=False) schedule = Column(String(256)) timezone = Column(String(36)) From 6e162cbfe28aeef684445135c8b8613abaf25794 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Thu, 24 Oct 2024 13:40:15 -0700 Subject: [PATCH 39/43] add DEPLOYED status --- jupyter_scheduler/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyter_scheduler/models.py b/jupyter_scheduler/models.py index 5c1e7a958..8697e008f 100644 --- a/jupyter_scheduler/models.py +++ b/jupyter_scheduler/models.py @@ -43,6 +43,7 @@ def __str__(self) -> str: class Status(str, Enum): DRAFT = "DRAFT" + DEPLOYED = "DEPLOYED" CREATED = "CREATED" QUEUED = "QUEUED" IN_PROGRESS = "IN_PROGRESS" From 8866d24ce1f66df2c6500f2908e572afe8b08f8f Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Thu, 24 Oct 2024 13:41:37 -0700 Subject: [PATCH 40/43] rename activate_workflow_definition into deploy_workflow_definition --- jupyter_scheduler/executors.py | 79 ++++++++++------------------------ jupyter_scheduler/extension.py | 4 +- jupyter_scheduler/scheduler.py | 35 +++++++-------- jupyter_scheduler/workflows.py | 6 +-- 4 files changed, 43 insertions(+), 81 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index e0f126ce4..e65ba0ec3 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -220,64 +220,14 @@ def create_and_run_workflow(tasks: List[str], root_dir, db_url): class DefaultExecutionManager(ExecutionManager): """Default execution manager that executes notebooks""" - def activate_workflow_definition(self): + def deploy_workflow_definition(self): describe_workflow_definition: DescribeWorkflowDefinition = self.model with self.db_session() as session: session.query(WorkflowDefinition).filter( WorkflowDefinition.workflow_definition_id == describe_workflow_definition.workflow_definition_id - ).update({"active": True}) + ).update({"active": True, "status": Status.DEPLOYED}) session.commit() - self.serve_workflow_definition() - - # @dask.delayed(name="Serve workflow definition") - def serve_workflow_definition(self): - describe_workflow_definition: DescribeWorkflowDefinition = self.model - attributes = describe_workflow_definition.dict( - exclude={"schedule", "timezone"}, exclude_none=True - ) - create_workflow = CreateWorkflow(**attributes) - # flow_path = Path( - # "/Users/aieroshe/Documents/jupyter-scheduler/jupyter_scheduler/executors.py" - # ) - # create_and_run_workflow.from_source( - # source=str(flow_path.parent), - # entrypoint="executors.py:create_and_run_workflow", - # ).serve( - # cron=self.model.schedule, - # parameters={ - # "tasks": create_workflow.tasks, - # "root_dir": self.root_dir, - # "db_url": self.db_url, - # }, - # ) - - @dask.delayed(name="Execute workflow task") - def execute_task(self, job: Job, dependencies: List[str] = []) -> str: - with self.db_session() as session: - print(f"executing task {job.job_id} with dependencies {dependencies}") - staging_paths = Scheduler.get_staging_paths(DescribeJob.from_orm(job)) - - execution_manager = DefaultExecutionManager( - job_id=job.job_id, - staging_paths=staging_paths, - root_dir=self.root_dir, - db_url=self.db_url, - ) - execution_manager.process() - - self.dask_client.submit( - DefaultExecutionManager( - job_id=job.job_id, - root_dir=self.root_dir, - db_url=self.db_url, - ).process - ) - - job.pid = 1 # TODO: fix pid hardcode - job_id = job.job_id - - return job_id def get_tasks_records(self, task_ids: List[str]) -> List[Job]: print("getting task records for task: {task_ids}") @@ -286,6 +236,7 @@ def get_tasks_records(self, task_ids: List[str]) -> List[Job]: return tasks + # @dask.delayed(name="Execute workflow") def execute_workflow(self): tasks_info: List[Job] = self.get_tasks_records(self.model.tasks) print(f"tasks_info in execute_workflow: {tasks_info}") @@ -302,13 +253,16 @@ def make_task(task_id): print(f"dependencies in make_task for {task_id}") print(deps) - execute_task_result = self.execute_task( - tasks[task_id], [make_task(dep_id) for dep_id in deps] + execute_task_delayed = execute_task( + job=tasks[task_id], + root_dir=self.root_dir, + db_url=self.db_url, + dependencies=[make_task(dep_id) for dep_id in deps], ) print("execute task result from make_task") - print(execute_task_result) + print(execute_task_delayed) - return execute_task_result + return execute_task_delayed final_tasks = [make_task(task_id) for task_id in tasks] print("Final tasks:") @@ -397,6 +351,19 @@ def validate(cls, input_path: str) -> bool: return True +@dask.delayed(name="Execute workflow task") +def execute_task(job: Job, root_dir: str, db_url: str, dependencies: List[str] = []): + print(f"executing task {job.job_id} with dependencies {dependencies}") + staging_paths = Scheduler.get_staging_paths(DescribeJob.from_orm(job)) + process_job = DefaultExecutionManager( + job_id=job.job_id, + staging_paths=staging_paths, + root_dir=root_dir, + db_url=db_url, + ).process + return process_job() + + class ArchivingExecutionManager(DefaultExecutionManager): """Execution manager that archives all output files in and under the output directory into a single archive file diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 7ef2b09f2..70d5cece1 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -7,7 +7,7 @@ from jupyter_scheduler.orm import create_tables from jupyter_scheduler.workflows import ( - WorkflowDefinitionsActivationHandler, + WorkflowDefinitionsDeploymentHandler, WorkflowDefinitionsHandler, WorkflowDefinitionsTasksHandler, WorkflowsHandler, @@ -62,7 +62,7 @@ class SchedulerApp(ExtensionApp): ), ( rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}/deploy", - WorkflowDefinitionsActivationHandler, + WorkflowDefinitionsDeploymentHandler, ), ( rf"scheduler/worklow_definitions/{WORKFLOW_DEFINITION_ID_REGEX}/tasks", diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index bbc546b05..11f2305f3 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -132,7 +132,7 @@ def run_workflow(self, workflow_id: str) -> str: """Triggers execution of the workflow.""" raise NotImplementedError("must be implemented by subclass") - def activate_workflow_definition(self, workflow_definition_id: str) -> str: + def deploy_workflow_definition(self, workflow_definition_id: str) -> str: """Activates workflow marking it as ready for execution.""" raise NotImplementedError("must be implemented by subclass") @@ -487,6 +487,7 @@ def _get_dask_client(self): cluster = LocalCluster(processes=True) client = DaskClient(cluster) print(client) + print(f"Dask dashboard link: {client.dashboard_link}") return client @property @@ -561,15 +562,14 @@ def create_job(self, model: CreateJob, run: bool = True) -> str: def run_job(self, job: Job, staging_paths: Dict[str, str]) -> str: with self.db_session() as session: - execution_manager = self.execution_manager_class( + process_job = self.execution_manager_class( job_id=job.job_id, staging_paths=staging_paths, root_dir=self.root_dir, db_url=self.db_url, - ) - execution_manager.process() - - job.pid = 1 # TODO: fix pid hardcode + ).process + future = self.dask_client.submit(process_job) + job.pid = future.key session.commit() job_id = job.job_id @@ -591,28 +591,21 @@ def create_workflow_definition(self, model: CreateWorkflowDefinition) -> str: return workflow_definition.workflow_definition_id def run_workflow(self, workflow_id: str) -> str: - # self.dask_client.submit( - # self.execution_manager_class( - # workflow_id=workflow_id, - # root_dir=self.root_dir, - # db_url=self.db_url, - # ).process_workflow - # ) - execution_manager = self.execution_manager_class( + process_workflow = self.execution_manager_class( workflow_id=workflow_id, root_dir=self.root_dir, db_url=self.db_url, - ) - execution_manager.process_workflow() + ).process_workflow + self.dask_client.submit(process_workflow) return workflow_id - def activate_workflow_definition(self, workflow_definition_id: str) -> str: + def deploy_workflow_definition(self, workflow_definition_id: str) -> str: execution_manager = self.execution_manager_class( workflow_definition_id=workflow_definition_id, root_dir=self.root_dir, db_url=self.db_url, ) - execution_manager.activate_workflow_definition() + execution_manager.deploy_workflow_definition() return workflow_definition_id def get_workflow(self, workflow_id: str) -> DescribeWorkflow: @@ -978,8 +971,10 @@ async def stop_extension(self): """ Cleanup code to run when the server is stopping. """ - if self.dask_client: - await self.dask_client.close() + if self.dask_client is None: + return + if self.dask_client and self.dask_client.close: + self.dask_client.close() class ArchivingScheduler(Scheduler): diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index bd6f5f95b..1e0c11c38 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -261,12 +261,12 @@ async def patch(self, _: str, task_definition_id: str): self.finish() -class WorkflowDefinitionsActivationHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): +class WorkflowDefinitionsDeploymentHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): @authenticated async def post(self, workflow_definition_id: str): try: workflow_definition_id = await ensure_async( - self.scheduler.activate_workflow_definition(workflow_definition_id) + self.scheduler.deploy_workflow_definition(workflow_definition_id) ) except ValidationError as e: self.log.exception(e) @@ -321,7 +321,7 @@ class Config: class CreateWorkflowDefinition(BaseModel): tasks: List[str] = [] # any field added to CreateWorkflow should also be added to this model as well - name: str + name: str = "" parameters: Optional[Dict[str, str]] = None schedule: Optional[str] = None timezone: Optional[str] = None From d49f9305717afb996b458d3c5a88168e2751e686 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 28 Oct 2024 13:35:39 -0700 Subject: [PATCH 41/43] add create_time to Workflow model --- jupyter_scheduler/orm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyter_scheduler/orm.py b/jupyter_scheduler/orm.py index 4c1e80b26..f4ca13530 100644 --- a/jupyter_scheduler/orm.py +++ b/jupyter_scheduler/orm.py @@ -119,6 +119,7 @@ class Workflow(Base): active = Column(Boolean, default=False) name = Column(String(256)) parameters = Column(JsonType(1024)) + create_time = Column(Integer, default=get_utc_timestamp) # All new columns added to this table must be nullable to ensure compatibility during database migrations. # Any default values specified for new columns will be ignored during the migration process. From ee9e067e7126a07f624ec1ab02c03c58e3f84d41 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Mon, 28 Oct 2024 13:42:49 -0700 Subject: [PATCH 42/43] create and run workflow definitions including on schedule --- jupyter_scheduler/executors.py | 30 +------ jupyter_scheduler/extension.py | 4 + jupyter_scheduler/scheduler.py | 101 +++++++++++++++++++---- jupyter_scheduler/workflow_runner.py | 55 +++++++------ jupyter_scheduler/workflows.py | 119 +++++++++++++++++++++++---- 5 files changed, 228 insertions(+), 81 deletions(-) diff --git a/jupyter_scheduler/executors.py b/jupyter_scheduler/executors.py index e65ba0ec3..d1e41dd27 100644 --- a/jupyter_scheduler/executors.py +++ b/jupyter_scheduler/executors.py @@ -104,6 +104,7 @@ def process(self): self.on_complete() def process_workflow(self): + print(f"calling ExecutionManager(ABC).process_workflow for {self.model}") self.before_start_workflow() try: self.execute_workflow() @@ -154,6 +155,7 @@ def before_start(self): def before_start_workflow(self): """Called before start of execute""" + print(f"calling ExecutionManager(ABC).before_start_workflow for {self.model}") workflow = self.model with self.db_session() as session: session.query(Workflow).filter(Workflow.workflow_id == workflow.workflow_id).update( @@ -201,38 +203,14 @@ def on_complete_workflow(self): session.commit() -# @dask.delayed(name="Create and run a new workflow`") -def create_and_run_workflow(tasks: List[str], root_dir, db_url): - db_session = create_session(db_url) - with db_session() as session: - workflow = Workflow(tasks=tasks) - session.add(workflow) - session.commit() - workflow_id = workflow.workflow_id - execution_manager = DefaultExecutionManager( - workflow_id=workflow_id, - root_dir=root_dir, - db_url=db_url, - ) - execution_manager.process_workflow() - - class DefaultExecutionManager(ExecutionManager): """Default execution manager that executes notebooks""" - def deploy_workflow_definition(self): - describe_workflow_definition: DescribeWorkflowDefinition = self.model - with self.db_session() as session: - session.query(WorkflowDefinition).filter( - WorkflowDefinition.workflow_definition_id - == describe_workflow_definition.workflow_definition_id - ).update({"active": True, "status": Status.DEPLOYED}) - session.commit() - def get_tasks_records(self, task_ids: List[str]) -> List[Job]: - print("getting task records for task: {task_ids}") + print(f"getting task records for task: {task_ids}") with self.db_session() as session: tasks = session.query(Job).filter(Job.job_id.in_(task_ids)).all() + print(f"gotten task records for task {task_ids}: {tasks}") return tasks diff --git a/jupyter_scheduler/extension.py b/jupyter_scheduler/extension.py index 70d5cece1..f6cafb637 100644 --- a/jupyter_scheduler/extension.py +++ b/jupyter_scheduler/extension.py @@ -125,6 +125,10 @@ def initialize_settings(self): loop = asyncio.get_event_loop() loop.create_task(scheduler.task_runner.start()) + if scheduler.workflow_runner: + loop = asyncio.get_event_loop() + loop.create_task(scheduler.workflow_runner.start()) + async def stop_extension(self): """ Public method called by Jupyter Server when the server is stopping. diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index 11f2305f3..a34f5c2af 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -453,6 +453,16 @@ class Scheduler(BaseScheduler): ), ) + workflow_runner_class = TType( + allow_none=True, + config=True, + default_value="jupyter_scheduler.workflow_runner.WorkflowRunner", + klass="jupyter_scheduler.workflow_runner.BaseWorkflowRunner", + help=_i18n( + "The class that handles the workflow creation of scheduled workflows from workflow definitions." + ), + ) + dask_cluster_url = Unicode( allow_none=True, config=True, @@ -463,6 +473,10 @@ class Scheduler(BaseScheduler): task_runner = Instance(allow_none=True, klass="jupyter_scheduler.task_runner.BaseTaskRunner") + workflow_runner = Instance( + allow_none=True, klass="jupyter_scheduler.workflow_runner.BaseWorkflowRunner" + ) + def __init__( self, root_dir: str, @@ -477,6 +491,8 @@ def __init__( self.db_url = db_url if self.task_runner_class: self.task_runner = self.task_runner_class(scheduler=self, config=config) + if self.workflow_runner_class: + self.workflow_runner = self.workflow_runner_class(scheduler=self, config=config) self.dask_client: DaskClient = self._get_dask_client() def _get_dask_client(self): @@ -576,21 +592,32 @@ def run_job(self, job: Job, staging_paths: Dict[str, str]) -> str: return job_id + def run_workflow_from_definition(self, model: DescribeWorkflowDefinition) -> str: + print(f"scheduler.calling create_and_run_workflow with {model}") + workflow_id = self.create_workflow( + CreateWorkflow( + **model.dict(exclude={"schedule", "timezone", "tasks"}, exclude_none=True), + ) + ) + task_definitions = self.get_workflow_definition_tasks(model.workflow_definition_id) + for task_definition in task_definitions: + self.create_workflow_task( + workflow_id=workflow_id, + model=CreateJob(**task_definition.dict(exclude={"schedule", "timezone"})), + ) + return workflow_id + def create_workflow(self, model: CreateWorkflow) -> str: + print(f"calling create_workflow with {model}") + print(model.dict) with self.db_session() as session: workflow = Workflow(**model.dict(exclude_none=True)) session.add(workflow) session.commit() return workflow.workflow_id - def create_workflow_definition(self, model: CreateWorkflowDefinition) -> str: - with self.db_session() as session: - workflow_definition = WorkflowDefinition(**model.dict(exclude_none=True)) - session.add(workflow_definition) - session.commit() - return workflow_definition.workflow_definition_id - def run_workflow(self, workflow_id: str) -> str: + print(f"calling run_workflow for workflow {workflow_id}") process_workflow = self.execution_manager_class( workflow_id=workflow_id, root_dir=self.root_dir, @@ -599,13 +626,30 @@ def run_workflow(self, workflow_id: str) -> str: self.dask_client.submit(process_workflow) return workflow_id + def create_workflow_definition(self, model: CreateWorkflowDefinition) -> str: + with self.db_session() as session: + workflow_definition = WorkflowDefinition(**model.dict(exclude_none=True)) + session.add(workflow_definition) + session.commit() + return workflow_definition.workflow_definition_id + def deploy_workflow_definition(self, workflow_definition_id: str) -> str: - execution_manager = self.execution_manager_class( - workflow_definition_id=workflow_definition_id, - root_dir=self.root_dir, - db_url=self.db_url, - ) - execution_manager.deploy_workflow_definition() + with self.db_session() as session: + workflow_definition = ( + session.query(WorkflowDefinition) + .filter(WorkflowDefinition.workflow_definition_id == workflow_definition_id) + .with_for_update() + .one() + ) + workflow_definition_schedule = workflow_definition.schedule + session.query(WorkflowDefinition).filter( + WorkflowDefinition.workflow_definition_id == workflow_definition_id + ).update({"active": True, "status": Status.DEPLOYED}) + session.commit() + + if self.workflow_runner and workflow_definition_schedule: + self.workflow_runner.add_workflow_definition(workflow_definition_id) + return workflow_definition_id def get_workflow(self, workflow_id: str) -> DescribeWorkflow: @@ -616,7 +660,15 @@ def get_workflow(self, workflow_id: str) -> DescribeWorkflow: model = DescribeWorkflow.from_orm(workflow_record) return model - def get_workflow_definition(self, workflow_definition_id: str) -> DescribeWorkflowDefinition: + def get_all_workflows(self) -> List[DescribeWorkflow]: + with self.db_session() as session: + workflow_records = session.query(Workflow).all() + models = [ + DescribeWorkflow.from_orm(workflow_record) for workflow_record in workflow_records + ] + return models + + def get_workflow_definition(self, workflow_definition_id: str) -> List[Workflow]: with self.db_session() as session: workflow_definition_record = ( session.query(WorkflowDefinition) @@ -626,6 +678,18 @@ def get_workflow_definition(self, workflow_definition_id: str) -> DescribeWorkfl model = DescribeWorkflowDefinition.from_orm(workflow_definition_record) return model + def get_workflow_definition_tasks( + self, workflow_definition_id: str + ) -> List[DescribeJobDefinition]: + with self.db_session() as session: + task_records = ( + session.query(JobDefinition) + .filter(JobDefinition.workflow_id == workflow_definition_id) + .all() + ) + tasks = [DescribeJobDefinition.from_orm(task_record) for task_record in task_records] + return tasks + def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str: job_id = self.create_job(model, run=False) print(f"create_workflow_task job_id: {job_id}") @@ -653,6 +717,15 @@ def create_workflow_definition_task( ) return job_definition_id + def get_all_workflow_definition_tasks(self) -> List[DescribeWorkflowDefinition]: + with self.db_session() as session: + workflow_definition_records = session.query(WorkflowDefinition).all() + models = [ + DescribeWorkflowDefinition.from_orm(workflow_definition_record) + for workflow_definition_record in workflow_definition_records + ] + return models + def update_workflow(self, workflow_id: str, model: UpdateWorkflow): with self.db_session() as session: session.query(Workflow).filter(Workflow.workflow_id == workflow_id).update( diff --git a/jupyter_scheduler/workflow_runner.py b/jupyter_scheduler/workflow_runner.py index daa685ca1..a23aedddc 100644 --- a/jupyter_scheduler/workflow_runner.py +++ b/jupyter_scheduler/workflow_runner.py @@ -17,7 +17,11 @@ get_localized_timestamp, get_utc_timestamp, ) -from jupyter_scheduler.workflows import CreateWorkflow, UpdateWorkflowDefinition +from jupyter_scheduler.workflows import ( + CreateWorkflow, + DescribeWorkflowDefinition, + UpdateWorkflowDefinition, +) Base = declarative_base() @@ -108,11 +112,11 @@ def load(self, models: List[DescribeWorkflowDefinitionCache]): session.add(WorkflowDefinitionCache(**model.dict())) session.commit() - def get(self, job_definition_id: str) -> DescribeWorkflowDefinitionCache: + def get(self, workflow_definition_id: str) -> DescribeWorkflowDefinitionCache: with self.session() as session: definition = ( session.query(WorkflowDefinitionCache) - .filter(WorkflowDefinitionCache.job_definition_id == job_definition_id) + .filter(WorkflowDefinitionCache.workflow_definition_id == workflow_definition_id) .first() ) @@ -126,22 +130,22 @@ def put(self, model: DescribeWorkflowDefinitionCache): session.add(WorkflowDefinitionCache(**model.dict())) session.commit() - def update(self, job_definition_id: str, model: UpdateWorkflowDefinitionCache): + def update(self, workflow_definition_id: str, model: UpdateWorkflowDefinitionCache): with self.session() as session: session.query(WorkflowDefinitionCache).filter( - WorkflowDefinitionCache.job_definition_id == job_definition_id + WorkflowDefinitionCache.workflow_definition_id == workflow_definition_id ).update(model.dict(exclude_none=True)) session.commit() - def delete(self, job_definition_id: str): + def delete(self, workflow_definition_id: str): with self.session() as session: session.query(WorkflowDefinitionCache).filter( - WorkflowDefinitionCache.job_definition_id == job_definition_id + WorkflowDefinitionCache.workflow_definition_id == workflow_definition_id ).delete() session.commit() -class BaseTaskRunner(LoggingConfigurable): +class BaseWorkflowRunner(LoggingConfigurable): """Base task runner, this class's start method is called at the start of jupyter server, and is responsible for polling for the workflow definitions and creating new workflows @@ -187,10 +191,10 @@ def resume_workflows(self, workflow_definition_id: str): NotImplementedError("must be implemented by subclass") -class TaskRunner(BaseTaskRunner): - """Default task runner that maintains a workflow definition cache and a +class WorkflowRunner(BaseWorkflowRunner): + """Default workflow runner that maintains a workflow definition cache and a priority queue, and polls the queue every `poll_interval` seconds - for new jobs to create. + for new workflows to create. """ def __init__(self, scheduler, config=None) -> None: @@ -205,7 +209,7 @@ def compute_next_run_time(self, schedule: str, timezone: Optional[str] = None): def populate_cache(self): with self.db_session() as session: - definitions = ( + definitions: List[WorkflowDefinition] = ( session.query(WorkflowDefinition).filter(WorkflowDefinition.schedule != None).all() ) @@ -223,7 +227,7 @@ def populate_cache(self): if definition.active: self.queue.push( WorkflowDefinitionTask( - job_definition_id=definition.workflow_definition_id, + workflow_definition_id=definition.workflow_definition_id, next_run_time=next_run_time, ) ) @@ -273,12 +277,12 @@ def update_workflow_definition( ) next_run_time_changed = cached_next_run_time != next_run_time and active - resumed_job = model.active and not cache.active + resumed_workflow = model.active and not cache.active - if next_run_time_changed or resumed_job: + if next_run_time_changed or resumed_workflow: self.log.debug("Updating queue...") task = WorkflowDefinitionTask( - job_definition_id=workflow_definition_id, next_run_time=next_run_time + workflow_definition_id=workflow_definition_id, next_run_time=next_run_time ) self.queue.push(task) self.log.debug(f"Updated queue, {task}") @@ -286,14 +290,13 @@ def update_workflow_definition( def delete_workflow_definition(self, workflow_definition_id: str): self.cache.delete(workflow_definition_id) - def create_workflow(self, workflow_definition_id: str): - definition = self.scheduler.get_workflow_definition(workflow_definition_id) + def create_and_run_workflow(self, workflow_definition_id: str): + definition: DescribeWorkflowDefinition = self.scheduler.get_workflow_definition( + workflow_definition_id + ) + print(f"calling workflow_runner.create_and_run_workflow with {definition.dict}") if definition and definition.active: - self.scheduler.create_workflow( - CreateWorkflow( - **definition.dict(exclude={"schedule", "timezone"}, exclude_none=True), - ) - ) + self.scheduler.run_workflow_from_definition(definition) def compute_time_diff(self, queue_run_time: int, timezone: str): local_time = get_localized_timestamp(timezone) if timezone else get_utc_timestamp() @@ -302,7 +305,7 @@ def compute_time_diff(self, queue_run_time: int, timezone: str): def process_queue(self): self.log.debug(self.queue) while not self.queue.isempty(): - task = self.queue.peek() + task: WorkflowDefinitionTask = self.queue.peek() cache = self.cache.get(task.workflow_definition_id) if not cache: @@ -323,7 +326,7 @@ def process_queue(self): break else: try: - self.create_workflow(task.workflow_definition_id) + self.create_and_run_workflow(task.workflow_definition_id) except Exception as e: self.log.exception(e) self.queue.pop() @@ -334,7 +337,7 @@ def process_queue(self): ) self.queue.push( WorkflowDefinitionTask( - job_definition_id=task.job_definition_id, next_run_time=run_time + workflow_definition_id=task.workflow_definition_id, next_run_time=run_time ) ) diff --git a/jupyter_scheduler/workflows.py b/jupyter_scheduler/workflows.py index 1e0c11c38..60c281bc3 100644 --- a/jupyter_scheduler/workflows.py +++ b/jupyter_scheduler/workflows.py @@ -65,6 +65,36 @@ async def get(self, workflow_id: str = None): else: self.finish(workflow.json()) + @authenticated + async def get(self, workflow_id: str = None): + if workflow_id: + try: + workflow = await ensure_async(self.scheduler.get_workflow(workflow_id)) + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, "Unexpected error occurred while getting workflow details." + ) from e + else: + self.finish(workflow.json()) + else: + try: + workflows = await ensure_async(self.scheduler.get_all_workflows()) + workflows_json = [workflow.dict() for workflow in workflows] + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, "Unexpected error occurred while getting all workflows details." + ) from e + else: + self.finish(json.dumps(workflows_json)) + class WorkflowsTasksHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): @authenticated @@ -184,22 +214,40 @@ async def post(self): @authenticated async def get(self, workflow_definition_id: str = None): - if not workflow_definition_id: - raise HTTPError(400, "Missing workflow_id in the request URL.") - try: - workflow_definition = await ensure_async( - self.scheduler.get_workflow_definition(workflow_definition_id) - ) - except SchedulerError as e: - self.log.exception(e) - raise HTTPError(500, str(e)) from e - except Exception as e: - self.log.exception(e) - raise HTTPError( - 500, "Unexpected error occurred while getting workflow definition details." - ) from e + if workflow_definition_id: + try: + workflow_definition = await ensure_async( + self.scheduler.get_workflow_definition(workflow_definition_id) + ) + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, "Unexpected error occurred while getting workflow definition details." + ) from e + else: + self.finish(workflow_definition.json()) else: - self.finish(workflow_definition.json()) + try: + workflow_definitions = await ensure_async( + self.scheduler.get_all_workflow_definitions() + ) + workflow_definitions_json = [ + workflow_definition.dict() for workflow_definition in workflow_definitions + ] + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, + "Unexpected error occurred while getting all workflows definitions details.", + ) from e + else: + self.finish(json.dumps(workflow_definitions_json)) class WorkflowDefinitionsTasksHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): @@ -260,6 +308,47 @@ async def patch(self, _: str, task_definition_id: str): self.set_status(204) self.finish() + @authenticated + async def get(self, workflow_definition_id: str = None): + if workflow_definition_id: + try: + task_definitions = await ensure_async( + self.scheduler.get_workflow_definition_tasks(workflow_definition_id) + ) + task_definitions_json = [ + task_definition.dict() for task_definition in task_definitions + ] + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, + "Unexpected error occurred while getting workflow task definitions details.", + ) from e + else: + self.finish(json.dumps(task_definitions_json)) + else: + try: + task_definitions = await ensure_async( + self.scheduler.get_all_workflow_definition_tasks() + ) + task_definitions_json = [ + task_definition.dict() for task_definition in task_definitions + ] + except SchedulerError as e: + self.log.exception(e) + raise HTTPError(500, str(e)) from e + except Exception as e: + self.log.exception(e) + raise HTTPError( + 500, + "Unexpected error occurred while getting all task definitions details.", + ) from e + else: + self.finish(json.dumps(task_definitions_json)) + class WorkflowDefinitionsDeploymentHandler(ExtensionHandlerMixin, JobHandlersMixin, APIHandler): @authenticated From 97ac5306fa1ad3c69f385ac2fdda6d47bf04d5a4 Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Tue, 29 Oct 2024 12:07:12 -0700 Subject: [PATCH 43/43] add more print statements --- jupyter_scheduler/scheduler.py | 12 +++++++++--- jupyter_scheduler/workflow_runner.py | 3 +++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/jupyter_scheduler/scheduler.py b/jupyter_scheduler/scheduler.py index a34f5c2af..4bad98847 100644 --- a/jupyter_scheduler/scheduler.py +++ b/jupyter_scheduler/scheduler.py @@ -593,7 +593,9 @@ def run_job(self, job: Job, staging_paths: Dict[str, str]) -> str: return job_id def run_workflow_from_definition(self, model: DescribeWorkflowDefinition) -> str: - print(f"scheduler.calling create_and_run_workflow with {model}") + print( + f"calling scheduler.run_workflow_from_definition with DescribeWorkflowDefinition {model}" + ) workflow_id = self.create_workflow( CreateWorkflow( **model.dict(exclude={"schedule", "timezone", "tasks"}, exclude_none=True), @@ -608,7 +610,7 @@ def run_workflow_from_definition(self, model: DescribeWorkflowDefinition) -> str return workflow_id def create_workflow(self, model: CreateWorkflow) -> str: - print(f"calling create_workflow with {model}") + print(f"calling scheduler.create_workflow with {model}") print(model.dict) with self.db_session() as session: workflow = Workflow(**model.dict(exclude_none=True)) @@ -617,7 +619,7 @@ def create_workflow(self, model: CreateWorkflow) -> str: return workflow.workflow_id def run_workflow(self, workflow_id: str) -> str: - print(f"calling run_workflow for workflow {workflow_id}") + print(f"calling scheduler.run_workflow for {workflow_id}") process_workflow = self.execution_manager_class( workflow_id=workflow_id, root_dir=self.root_dir, @@ -681,6 +683,7 @@ def get_workflow_definition(self, workflow_definition_id: str) -> List[Workflow] def get_workflow_definition_tasks( self, workflow_definition_id: str ) -> List[DescribeJobDefinition]: + print(f"calling scheduler.get_workflow_definition_tasks for{workflow_definition_id}") with self.db_session() as session: task_records = ( session.query(JobDefinition) @@ -691,6 +694,9 @@ def get_workflow_definition_tasks( return tasks def create_workflow_task(self, workflow_id: str, model: CreateJob) -> str: + print( + f"calling scheduler.create_workflow_task with workflow_id {workflow_id},\n CreateJob {model},\n about to call scheduler.create_job" + ) job_id = self.create_job(model, run=False) print(f"create_workflow_task job_id: {job_id}") workflow: DescribeWorkflow = self.get_workflow(workflow_id) diff --git a/jupyter_scheduler/workflow_runner.py b/jupyter_scheduler/workflow_runner.py index a23aedddc..a569349b3 100644 --- a/jupyter_scheduler/workflow_runner.py +++ b/jupyter_scheduler/workflow_runner.py @@ -296,6 +296,9 @@ def create_and_run_workflow(self, workflow_definition_id: str): ) print(f"calling workflow_runner.create_and_run_workflow with {definition.dict}") if definition and definition.active: + print( + f"calling self.scheduler.run_workflow_from_definition from workflow_runner.create_and_run_workflow with {definition.dict}" + ) self.scheduler.run_workflow_from_definition(definition) def compute_time_diff(self, queue_run_time: int, timezone: str):