Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add args for k8s_glue_example.py #196

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clearml_agent/glue/k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ def __init__(
:param str extra_bash_init_script: Additional bash script to run before starting the Task inside the container
:param str namespace: K8S namespace to be used when creating the new pods (default: clearml)
:param int max_pods_limit: Maximum number of pods that K8S glue can run at the same time
:param str pod_name_prefix: Define pod name prefix for k8s (default: clearml-id-)
:param str limit_pod_label: Define limit pod label for k8s (default: ai.allegro.agent.serial=pod-{pod_number})
:param bool force_system_packages: true when running tasks in containers (i.e. docker mode or k8s glue).
(default: true)
"""
super(K8sIntegration, self).__init__()
self.kind = os.environ.get("CLEARML_K8S_GLUE_KIND", "pod").strip().lower()
Expand Down
34 changes: 33 additions & 1 deletion examples/k8s_glue_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@ def parse_args():
parser = ArgumentParser()
group = parser.add_mutually_exclusive_group()

parser.add_argument(
"--k8s-pending-queue-name", type=str,
help="Queue name to use when task is pending in the k8s scheduler (default: %(default)s)", default="k8s_scheduler"
)
parser.add_argument(
"--container-bash-script", type=str,
help="Path to the file with container bash script to be executed in k8s", default=None
)
parser.add_argument(
"--debug", action="store_true", default=False,
help="Switch logging on (default: %(default)s)"
)
parser.add_argument(
"--queue", type=str, help="Queues to pull tasks from. If multiple queues, use comma separated list, e.g. 'queue1,queue2'",
)
Expand Down Expand Up @@ -65,6 +77,18 @@ def parse_args():
help="Limit the maximum number of pods that this service can run at the same time."
"Should not be used with ports-mode"
)
parser.add_argument(
"--pod-name-prefix", type=str,
help="Define pod name prefix for k8s (default: %(default)s)", default="clearml-id-"
)
parser.add_argument(
"--limit-pod-label", type=str,
help="Define limit pod label for k8s (default: %(default)s)", default="ai.allegro.agent.serial=pod-{pod_number}"
)
parser.add_argument(
"--no-system-packages", action="store_true", default=False,
help="False when running tasks in containers (default: %(default)s)"
)
parser.add_argument(
"--use-owner-token", action="store_true", default=False,
help="Generate and use task owner token for the execution of each task"
Expand All @@ -88,12 +112,20 @@ def k8s_user_props_cb(pod_number=0):
return user_prop
user_props_cb = k8s_user_props_cb

if args.container_bash_script:
with open(args.container_bash_script, "r") as file:
container_bash_script = file.read().splitlines()
else:
container_bash_script = None

k8s = K8sIntegration(
k8s_pending_queue_name=args.k8s_pending_queue_name, container_bash_script=container_bash_script,
ports_mode=args.ports_mode, num_of_services=args.num_of_services, base_pod_num=args.base_pod_num,
user_props_cb=user_props_cb, overrides_yaml=args.overrides_yaml, clearml_conf_file=args.pod_clearml_conf,
template_yaml=args.template_yaml, extra_bash_init_script=K8sIntegration.get_ssh_server_bash(
ssh_port_number=args.ssh_server_port) if args.ssh_server_port else None,
namespace=args.namespace, max_pods_limit=args.max_pods or None,
namespace=args.namespace, max_pods_limit=args.max_pods or None, pod_name_prefix=args.pod_name_prefix,
limit_pod_label=args.limit_pod_label, force_system_packages=not args.no_system_packages, debug=args.debug,
)
args.queue = [q.strip() for q in args.queue.split(",") if q.strip()]

Expand Down