Skip to content

Commit

Permalink
WIP: more CLI to control the remote
Browse files Browse the repository at this point in the history
- Change command to aiida-hq
- add aiida-hq install <computer>
- [ ] add tests
- [ ] start server
- [ ] pre-commit lint
  • Loading branch information
unkcpz committed Jun 5, 2024
1 parent fd11895 commit 8d5bb61
Show file tree
Hide file tree
Showing 19 changed files with 471 additions and 51 deletions.
8 changes: 8 additions & 0 deletions aiida_hyperqueue/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
from aiida.cmdline.params import options as core_options
from aiida.cmdline.params import types as core_types

from .root import cmd_root
from .install import cmd_install
from .server import cmd_info, cmd_start, cmd_stop
from .alloc import cmd_list, cmd_add, cmd_remove
96 changes: 96 additions & 0 deletions aiida_hyperqueue/cli/alloc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import click

from aiida.cmdline.params import options, arguments
from aiida.cmdline.utils import echo

from .root import cmd_root

@cmd_root.group("alloc")
def alloc_group():
"""Commands to configure HQ allocations."""


@alloc_group.command("add")
@click.argument("slurm-options", nargs=-1)
@options.COMPUTER(required=True)
@click.option(
"-t",
"--time-limit",
type=str,
required=True,
help=(
"Time limit for each job run by the allocation. The duration can be expressed using various shortcuts "
"recognised by HyperQueue, e.g. 30m, 2h, ... For the full list, see https://tinyurl.com/hq-duration."
),
)
@click.option(
"--hyper-threading/--no-hyper-threading",
default=True,
type=click.BOOL,
help=("Allow HyperQueue to consider hyperthreads when assigning resources."),
)
@click.option(
"-b",
"--backlog",
type=click.INT,
required=False,
default=1,
help=(
"Set the backlog for the allocator. This is the number of allocations HyperQueue will make sure is waiting with"
" the job manager."
),
)
@click.option(
"-w",
"--workers-per-alloc",
type=click.INT,
required=False,
default=1,
help=("Option to allow pooled jobs to launch on multiple nodes."),
)
def cmd_add(
slurm_options, computer, time_limit, hyper_threading, backlog, workers_per_alloc
):
"""Add a new allocation to the HQ server."""

hyper = "" if hyper_threading else "--cpus no-ht"

with computer.get_transport() as transport:
retval, _, stderr = transport.exec_command_wait(
f'hq alloc add slurm --backlog {backlog} --time-limit {time_limit} --name aiida {hyper} '
f'--workers-per-alloc {workers_per_alloc} -- {" ".join(slurm_options)}'
)

if retval != 0:
echo.echo_critical(f"failed to create new allocation: {stderr}\n")

echo.echo_success(f"{stderr}")


@alloc_group.command("list")
@arguments.COMPUTER()
def cmd_list(computer):
"""List the allocations on the HQ server."""

with computer.get_transport() as transport:
retval, stdout, stderr = transport.exec_command_wait("hq alloc list")

if retval != 0:
echo.echo_critical(f"failed to list allocations: {stderr}\n")

echo.echo(stdout)


@alloc_group.command("remove")
@click.argument("alloc_id")
@options.COMPUTER(required=True)
def cmd_remove(alloc_id, computer):
"""Remove an allocation from the HQ server."""

with computer.get_transport() as transport:
retval, _, stderr = transport.exec_command_wait(f"hq alloc remove {alloc_id}")

if retval != 0:
echo.echo_critical(f"failed to remove allocation: {stderr}\n")

echo.echo_success(f"{stderr}")
35 changes: 31 additions & 4 deletions aiida_hyperqueue/cli.py → aiida_hyperqueue/cli/cli.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,42 @@
# -*- coding: utf-8 -*-
"""Command line interface (CLI) for aiida_hyperqueue."""
"""Command line interface `aiida-hq` for aiida-hyperqueue.
The CLI implementation prototype from `aiida-pseudo`.
"""

import click
from aiida.cmdline.groups.verdi import VerdiCommandGroup
from aiida.cmdline.params import options, arguments
from aiida.cmdline.utils import decorators, echo
from aiida.cmdline.commands.cmd_data import verdi_data

from .params import options

@verdi_data.group("hyperqueue")
def data_cli():
"""Command line interface for aiida-hyperqueue"""

class CustomVerdiCommandGroup(VerdiCommandGroup):
"""Subclass of :class:`aiida.cmdline.groups.verdi.VerdiCommandGroup` for the CLI.
This subclass overrides the verbosity option to use a custom one that removes the ``-v`` short version of the option
since that is used by other options in this CLI and so would clash.
"""

@staticmethod
def add_verbosity_option(cmd):
"""Apply the ``verbosity`` option to the command, which is common to all subcommands."""
if cmd is not None and "verbosity" not in [param.name for param in cmd.params]:
cmd = options.VERBOSITY()(cmd)

return cmd


@click.group(
"aiida-hq",
cls=CustomVerdiCommandGroup,
context_settings={"help_option_names": ["-h", "--help"]},
)
@options.VERBOSITY()
@options.PROFILE()
def cmd_root():
"""CLI for the ``aiida-hyperqueue`` plugin."""


@data_cli.group("server")
Expand Down
89 changes: 89 additions & 0 deletions aiida_hyperqueue/cli/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
import click
import tempfile
import requests
import tarfile
from pathlib import Path

from aiida import orm
from aiida.cmdline.utils import echo

from .params import arguments
from .root import cmd_root


@cmd_root.command("install")
@arguments.COMPUTER()
# FIXME: the $HOME is not working for remote, it will create a folder named '$HOME', try to understand how ssh makedirs works
@click.option(
"-p",
"--remote-bin-dir",
type=click.Path(),
default=Path("bin/"),
help="remote bin path hq will stored.",
)
@click.option(
"--hq-version", type=str, default="0.19.0", help="the hq version will be installed."
)
# TODO: separate the bashrc write and make it optional.
# TODO: should also support different arch binary??
def cmd_install(computer: orm.Computer, remote_bin_dir, hq_version):
"""Install the hq binary to the computer through the transport"""

# Download the hq binary with specific version to local temp folder
# raise if the version not found
# Then upload to the remote using opened transport of computer
with tempfile.TemporaryDirectory() as temp_dir:
url = f"https://github.com/It4innovations/hyperqueue/releases/download/v{hq_version}/hq-v{hq_version}-linux-x64.tar.gz"
response = requests.get(url, stream=True)
rcode = response.status_code

if rcode != 200:
echo.echo_error(
"Cannot download the hq, please check the version is exist."
)

temp_dir = Path(temp_dir)
tar_path = temp_dir / "hq.tar.gz"

with open(tar_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)

with tarfile.open(tar_path, "r") as tar:
tar.extractall(path=temp_dir)

echo.echo_success(f"The hq version {hq_version} binary downloaded.")

bin_path = temp_dir / "hq"

# upload the binary to remote
# TODO: try not override if the binary exist, put has overwrite=True as default
with computer.get_transport() as transport:
# first check if the hq exist in the target folder
if transport.isfile(str(remote_bin_dir / "hq")):
echo.echo_info(
f"hq exist in the {remote_bin_dir} on remote, will override it."
)

transport.makedirs(path=remote_bin_dir, ignore_existing=True)
transport.put(
localpath=str(bin_path.resolve()), remotepath=str(remote_bin_dir)
)

# XXX: should transport.put take care of this already??
transport.exec_command_wait(f"chmod +x {str(remote_bin_dir / 'hq')}")

# write to bashrc
identity_str = "by aiida-hq"
retval, _, stderr = transport.exec_command_wait(
f"grep -q '# {identity_str}' ~/.bashrc && echo '# {identity_str}\nexport PATH=$HOME/bin:$PATH' >> ~/.bashrc"
)

if retval != 0:
echo.echo_critical(
f"Not able to set set the path $HOME/bin to your remote bashrc, try to do it manually.\n"
f"Info: {stderr}"
)

echo.echo_success("The hq binary installed in remote")
Empty file.
4 changes: 4 additions & 0 deletions aiida_hyperqueue/cli/params/arguments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# -*- coding: utf-8 -*-
from aiida.cmdline.params import arguments as core_arguments

COMPUTER = core_arguments.COMPUTER
33 changes: 33 additions & 0 deletions aiida_hyperqueue/cli/params/options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
"""Reusable options for CLI commands."""

import functools

import click
from aiida.cmdline.params import options as core_options
from aiida.cmdline.params import types as core_types

__all__ = (
"PROFILE",
"VERBOSITY",
"VERSION",
)

PROFILE = functools.partial(
core_options.PROFILE,
type=core_types.ProfileParamType(load_profile=True),
expose_value=False,
)

# Clone the ``VERBOSITY`` option from ``aiida-core`` so the ``-v`` short flag can be removed, since that overlaps with
# the flag of the ``VERSION`` option of this CLI.
VERBOSITY = core_options.VERBOSITY.clone()
VERBOSITY.args = ("--verbosity",)

VERSION = core_options.OverridableOption(
"-v",
"--version",
type=click.STRING,
required=False,
help="Select the version of the installed configuration.",
)
37 changes: 37 additions & 0 deletions aiida_hyperqueue/cli/root.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
"""Command line interface `aiida-hq` for aiida-hyperqueue.
The CLI implementation prototype from `aiida-pseudo`.
"""

import click

from aiida.cmdline.groups.verdi import VerdiCommandGroup

from .params import options


class CustomVerdiCommandGroup(VerdiCommandGroup):
"""Subclass of :class:`aiida.cmdline.groups.verdi.VerdiCommandGroup` for the CLI.
This subclass overrides the verbosity option to use a custom one that removes the ``-v`` short version of the option
since that is used by other options in this CLI and so would clash.
"""

@staticmethod
def add_verbosity_option(cmd):
"""Apply the ``verbosity`` option to the command, which is common to all subcommands."""
if cmd is not None and "verbosity" not in [param.name for param in cmd.params]:
cmd = options.VERBOSITY()(cmd)

return cmd


@click.group(
"aiida-hq",
cls=CustomVerdiCommandGroup,
context_settings={"help_option_names": ["-h", "--help"]},
)
@options.VERBOSITY()
@options.PROFILE()
def cmd_root():
"""CLI for the ``aiida-hyperqueue`` plugin."""
76 changes: 76 additions & 0 deletions aiida_hyperqueue/cli/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from .root import cmd_root

from aiida.cmdline.utils import echo

from .params import arguments

@cmd_root.group("server")
def server_group():
"""Commands for interacting with the HQ server."""


@server_group.command("start")
@arguments.COMPUTER()
def cmd_start(computer):
"""Start the HyperQueue server."""

with computer.get_transport() as transport:
retval, _, _ = transport.exec_command_wait("hq server info")

if retval == 0:
echo.echo_info("server is already running!")
return

with computer.get_transport() as transport:
# FIXME: It requires to sleep a bit after the nohup
# see https://github.com/aiidateam/aiida-core/issues/6377
# but the sleep solution is incorrect!!! Since the sleep will always return 0.
# this not rely on https://github.com/aiidateam/aiida-core/pull/6452
retval, _, stderr = transport.exec_command_wait(
"nohup hq server start 1>$HOME/.hq-stdout 2>$HOME/.hq-stderr &",
timeout=0.1,
)

if retval != 0:
echo.echo_critical(f"unable to start the server: {stderr}")

echo.echo_success("HQ server started!")

@server_group.command("stop")
@arguments.COMPUTER()
def cmd_stop(computer):
"""Start the HyperQueue server."""

with computer.get_transport() as transport:
retval, _, _ = transport.exec_command_wait("hq server info")

if retval != 0:
echo.echo_info("server is not running!")
return

with computer.get_transport() as transport:
retval, _, stderr = transport.exec_command_wait(
"hq server stop"
)

if retval != 0:
echo.echo_critical(f"unable to stop the server: {stderr}")

echo.echo_success("HQ server stopped!")


@server_group.command("info")
@arguments.COMPUTER()
def cmd_info(computer):
"""Get information on the HyperQueue server."""

with computer.get_transport() as transport:
retval, stdout, stderr = transport.exec_command_wait("hq server info")

if retval != 0:
echo.echo_critical(
f"cannot obtain HyperQueue server information: {stderr}\n"
"Try starting the server with `aiida-qe server start`."
)

echo.echo(stdout)
Loading

0 comments on commit 8d5bb61

Please sign in to comment.