Skip to content

Commit

Permalink
EWMS Sidecar Pt-3 (#112)
Browse files Browse the repository at this point in the history
* trim down `ewms_sidecar.py`

* trim `act.py`

* wording

* add `methos` pos var

* adapt k8s args for testing with ewms_sidecar

* add `unstable_testing__starter_exc` key under `classifiers` for testing

* refactor `tms` & `clientmananger` to `cluster_starter` (no DB changes)

* typo

* fix ci

* revert test changes

* <bot> update dependencies*.log files(s)

* fix ci env var

* call it `direct-remote-condor`

---------

Co-authored-by: github-actions <[email protected]>
  • Loading branch information
ric-evans and github-actions authored Dec 20, 2023
1 parent 44335c2 commit 2cd3470
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 206 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/wipac-cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on: [push]

env:
CI_TEST: 'yes'
CLIENTMANAGER_IMAGE_WITH_TAG: 'ghcr.io/wipacrepo/skydriver:latest'
THIS_IMAGE_WITH_TAG: 'ghcr.io/wipacrepo/skydriver:latest'
EWMS_PILOT_TASK_TIMEOUT: 999
SCAN_BACKLOG_RUNNER_SHORT_DELAY: 1
SCAN_BACKLOG_RUNNER_DELAY: 1
Expand Down Expand Up @@ -135,6 +135,7 @@ jobs:
docker run --network="host" --rm -i --name test \
--env LATEST_TAG=$LATEST_TAG \
--env THIS_IMAGE_WITH_TAG=$THIS_IMAGE_WITH_TAG \
$(env | grep '^SKYSCAN_' | awk '$0="--env "$0') \
$(env | grep '^EWMS_' | awk '$0="--env "$0') \
$(env | grep '^CLIENTMANAGER_' | awk '$0="--env "$0') \
Expand Down
10 changes: 5 additions & 5 deletions dependencies-from-Dockerfile.log
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
# pip freeze
########################################################################
backoff==2.2.1
boto3==1.34.3
botocore==1.34.3
boto3==1.34.4
botocore==1.34.4
cachetools==5.3.2
certifi==2023.11.17
cffi==1.16.0
Expand Down Expand Up @@ -75,15 +75,15 @@ pip==23.2.1
pipdeptree==2.13.1
setuptools==65.5.1
skydriver-clientmanager-ewms-sidecar
├── boto3 [required: Any, installed: 1.34.3]
│ ├── botocore [required: >=1.34.3,<1.35.0, installed: 1.34.3]
├── boto3 [required: Any, installed: 1.34.4]
│ ├── botocore [required: >=1.34.4,<1.35.0, installed: 1.34.4]
│ │ ├── jmespath [required: >=0.7.1,<2.0.0, installed: 1.0.1]
│ │ ├── python-dateutil [required: >=2.1,<3.0.0, installed: 2.8.2]
│ │ │ └── six [required: >=1.5, installed: 1.16.0]
│ │ └── urllib3 [required: >=1.25.4,<2.1, installed: 1.26.18]
│ ├── jmespath [required: >=0.7.1,<2.0.0, installed: 1.0.1]
│ └── s3transfer [required: >=0.9.0,<0.10.0, installed: 0.9.0]
│ └── botocore [required: >=1.33.2,<2.0a.0, installed: 1.34.3]
│ └── botocore [required: >=1.33.2,<2.0a.0, installed: 1.34.4]
│ ├── jmespath [required: >=0.7.1,<2.0.0, installed: 1.0.1]
│ ├── python-dateutil [required: >=2.1,<3.0.0, installed: 2.8.2]
│ │ └── six [required: >=1.5, installed: 1.16.0]
Expand Down
6 changes: 3 additions & 3 deletions ewms_sidecar/__main__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Entry-point to start up clientmanager service."""
"""Entry-point to start up EWMS Sidecar."""

from . import clientmanager, config
from . import config, ewms_sidecar

if __name__ == "__main__":
clientmanager.main()
ewms_sidecar.main()
config.LOGGER.info("Done.")
125 changes: 57 additions & 68 deletions ewms_sidecar/condor/act.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from .. import utils
from ..config import ENV, LOGGER
from . import condor_tools, starter, stopper, watcher
from . import condor_tools, starter, watcher


def act(args: argparse.Namespace) -> None:
Expand All @@ -26,70 +26,59 @@ def act(args: argparse.Namespace) -> None:


def _act(args: argparse.Namespace, schedd_obj: htcondor.Schedd) -> None:
match args.action:
case "start":
LOGGER.info(
f"Starting {args.n_workers} Skymap Scanner client workers on {args.collector} / {args.schedd}"
)
# make connections -- do now so we don't have any surprises downstream
skydriver_rc = utils.connect_to_skydriver()
# start
submit_dict = starter.prep(
spool=args.spool,
# starter CL args -- worker
worker_memory_bytes=args.worker_memory_bytes,
worker_disk_bytes=args.worker_disk_bytes,
n_cores=args.n_cores,
max_worker_runtime=args.max_worker_runtime,
priority=args.priority,
# starter CL args -- client
client_args=args.client_args,
client_startup_json_s3=utils.s3ify(args.client_startup_json),
image=args.image,
)
# final checks
if args.dryrun:
LOGGER.critical("Script Aborted: dryrun enabled")
return
if utils.skydriver_aborted_scan(skydriver_rc):
LOGGER.critical("Script Aborted: SkyDriver aborted scan")
return
# start
submit_result_obj = starter.start(
schedd_obj=schedd_obj,
n_workers=args.n_workers,
submit_dict=submit_dict,
spool=args.spool,
)
# report to SkyDriver
skydriver_cluster_obj = dict(
orchestrator="condor",
location={
"collector": args.collector,
"schedd": args.schedd,
},
uuid=args.uuid,
cluster_id=submit_result_obj.cluster(),
n_workers=submit_result_obj.num_procs(),
starter_info=submit_dict,
)
utils.update_skydriver(skydriver_rc, **skydriver_cluster_obj)
LOGGER.info("Sent cluster info to SkyDriver")
watcher.watch(
args.collector,
args.schedd,
submit_result_obj.cluster(),
schedd_obj,
submit_result_obj.num_procs(),
skydriver_rc,
skydriver_cluster_obj,
)
case "stop":
stopper.stop(
args.collector,
args.schedd,
args.cluster_id,
schedd_obj,
)
case _:
raise RuntimeError(f"Unknown action: {args.action}")
LOGGER.info(
f"Starting {args.n_workers} Skymap Scanner client workers on {args.collector} / {args.schedd}"
)
# make connections -- do now so we don't have any surprises downstream
skydriver_rc = utils.connect_to_skydriver()
# start
submit_dict = starter.prep(
spool=args.spool,
# starter CL args -- worker
worker_memory_bytes=args.worker_memory_bytes,
worker_disk_bytes=args.worker_disk_bytes,
n_cores=args.n_cores,
max_worker_runtime=args.max_worker_runtime,
priority=args.priority,
# starter CL args -- client
client_args=args.client_args,
client_startup_json_s3=utils.s3ify(args.client_startup_json),
image=args.image,
)
# final checks
if args.dryrun:
LOGGER.critical("Script Aborted: dryrun enabled")
return
if utils.skydriver_aborted_scan(skydriver_rc):
LOGGER.critical("Script Aborted: SkyDriver aborted scan")
return
# start
submit_result_obj = starter.start(
schedd_obj=schedd_obj,
n_workers=args.n_workers,
submit_dict=submit_dict,
spool=args.spool,
)
# report to SkyDriver
skydriver_cluster_obj = dict(
orchestrator="condor",
location={
"collector": args.collector,
"schedd": args.schedd,
},
uuid=args.uuid,
cluster_id=submit_result_obj.cluster(),
n_workers=submit_result_obj.num_procs(),
starter_info=submit_dict,
)
utils.update_skydriver(skydriver_rc, **skydriver_cluster_obj)
LOGGER.info("Sent cluster info to SkyDriver")
watcher.watch(
args.collector,
args.schedd,
submit_result_obj.cluster(),
schedd_obj,
submit_result_obj.num_procs(),
skydriver_rc,
skydriver_cluster_obj,
)
Loading

0 comments on commit 2cd3470

Please sign in to comment.