-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Change command to aiida-hq - add aiida-hq install <computer> - [ ] add tests - [ ] start server - [ ] pre-commit lint
- Loading branch information
Showing
19 changed files
with
471 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# -*- coding: utf-8 -*- | ||
from aiida.cmdline.params import options as core_options | ||
from aiida.cmdline.params import types as core_types | ||
|
||
from .root import cmd_root | ||
from .install import cmd_install | ||
from .server import cmd_info, cmd_start, cmd_stop | ||
from .alloc import cmd_list, cmd_add, cmd_remove |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import click | ||
|
||
from aiida.cmdline.params import options, arguments | ||
from aiida.cmdline.utils import echo | ||
|
||
from .root import cmd_root | ||
|
||
@cmd_root.group("alloc") | ||
def alloc_group(): | ||
"""Commands to configure HQ allocations.""" | ||
|
||
|
||
@alloc_group.command("add") | ||
@click.argument("slurm-options", nargs=-1) | ||
@options.COMPUTER(required=True) | ||
@click.option( | ||
"-t", | ||
"--time-limit", | ||
type=str, | ||
required=True, | ||
help=( | ||
"Time limit for each job run by the allocation. The duration can be expressed using various shortcuts " | ||
"recognised by HyperQueue, e.g. 30m, 2h, ... For the full list, see https://tinyurl.com/hq-duration." | ||
), | ||
) | ||
@click.option( | ||
"--hyper-threading/--no-hyper-threading", | ||
default=True, | ||
type=click.BOOL, | ||
help=("Allow HyperQueue to consider hyperthreads when assigning resources."), | ||
) | ||
@click.option( | ||
"-b", | ||
"--backlog", | ||
type=click.INT, | ||
required=False, | ||
default=1, | ||
help=( | ||
"Set the backlog for the allocator. This is the number of allocations HyperQueue will make sure is waiting with" | ||
" the job manager." | ||
), | ||
) | ||
@click.option( | ||
"-w", | ||
"--workers-per-alloc", | ||
type=click.INT, | ||
required=False, | ||
default=1, | ||
help=("Option to allow pooled jobs to launch on multiple nodes."), | ||
) | ||
def cmd_add( | ||
slurm_options, computer, time_limit, hyper_threading, backlog, workers_per_alloc | ||
): | ||
"""Add a new allocation to the HQ server.""" | ||
|
||
hyper = "" if hyper_threading else "--cpus no-ht" | ||
|
||
with computer.get_transport() as transport: | ||
retval, _, stderr = transport.exec_command_wait( | ||
f'hq alloc add slurm --backlog {backlog} --time-limit {time_limit} --name aiida {hyper} ' | ||
f'--workers-per-alloc {workers_per_alloc} -- {" ".join(slurm_options)}' | ||
) | ||
|
||
if retval != 0: | ||
echo.echo_critical(f"failed to create new allocation: {stderr}\n") | ||
|
||
echo.echo_success(f"{stderr}") | ||
|
||
|
||
@alloc_group.command("list") | ||
@arguments.COMPUTER() | ||
def cmd_list(computer): | ||
"""List the allocations on the HQ server.""" | ||
|
||
with computer.get_transport() as transport: | ||
retval, stdout, stderr = transport.exec_command_wait("hq alloc list") | ||
|
||
if retval != 0: | ||
echo.echo_critical(f"failed to list allocations: {stderr}\n") | ||
|
||
echo.echo(stdout) | ||
|
||
|
||
@alloc_group.command("remove") | ||
@click.argument("alloc_id") | ||
@options.COMPUTER(required=True) | ||
def cmd_remove(alloc_id, computer): | ||
"""Remove an allocation from the HQ server.""" | ||
|
||
with computer.get_transport() as transport: | ||
retval, _, stderr = transport.exec_command_wait(f"hq alloc remove {alloc_id}") | ||
|
||
if retval != 0: | ||
echo.echo_critical(f"failed to remove allocation: {stderr}\n") | ||
|
||
echo.echo_success(f"{stderr}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# -*- coding: utf-8 -*- | ||
import click | ||
import tempfile | ||
import requests | ||
import tarfile | ||
from pathlib import Path | ||
|
||
from aiida import orm | ||
from aiida.cmdline.utils import echo | ||
|
||
from .params import arguments | ||
from .root import cmd_root | ||
|
||
|
||
@cmd_root.command("install") | ||
@arguments.COMPUTER() | ||
# FIXME: the $HOME is not working for remote, it will create a folder named '$HOME', try to understand how ssh makedirs works | ||
@click.option( | ||
"-p", | ||
"--remote-bin-dir", | ||
type=click.Path(), | ||
default=Path("bin/"), | ||
help="remote bin path hq will stored.", | ||
) | ||
@click.option( | ||
"--hq-version", type=str, default="0.19.0", help="the hq version will be installed." | ||
) | ||
# TODO: separate the bashrc write and make it optional. | ||
# TODO: should also support different arch binary?? | ||
def cmd_install(computer: orm.Computer, remote_bin_dir, hq_version): | ||
"""Install the hq binary to the computer through the transport""" | ||
|
||
# Download the hq binary with specific version to local temp folder | ||
# raise if the version not found | ||
# Then upload to the remote using opened transport of computer | ||
with tempfile.TemporaryDirectory() as temp_dir: | ||
url = f"https://github.com/It4innovations/hyperqueue/releases/download/v{hq_version}/hq-v{hq_version}-linux-x64.tar.gz" | ||
response = requests.get(url, stream=True) | ||
rcode = response.status_code | ||
|
||
if rcode != 200: | ||
echo.echo_error( | ||
"Cannot download the hq, please check the version is exist." | ||
) | ||
|
||
temp_dir = Path(temp_dir) | ||
tar_path = temp_dir / "hq.tar.gz" | ||
|
||
with open(tar_path, "wb") as f: | ||
for chunk in response.iter_content(chunk_size=8192): | ||
f.write(chunk) | ||
|
||
with tarfile.open(tar_path, "r") as tar: | ||
tar.extractall(path=temp_dir) | ||
|
||
echo.echo_success(f"The hq version {hq_version} binary downloaded.") | ||
|
||
bin_path = temp_dir / "hq" | ||
|
||
# upload the binary to remote | ||
# TODO: try not override if the binary exist, put has overwrite=True as default | ||
with computer.get_transport() as transport: | ||
# first check if the hq exist in the target folder | ||
if transport.isfile(str(remote_bin_dir / "hq")): | ||
echo.echo_info( | ||
f"hq exist in the {remote_bin_dir} on remote, will override it." | ||
) | ||
|
||
transport.makedirs(path=remote_bin_dir, ignore_existing=True) | ||
transport.put( | ||
localpath=str(bin_path.resolve()), remotepath=str(remote_bin_dir) | ||
) | ||
|
||
# XXX: should transport.put take care of this already?? | ||
transport.exec_command_wait(f"chmod +x {str(remote_bin_dir / 'hq')}") | ||
|
||
# write to bashrc | ||
identity_str = "by aiida-hq" | ||
retval, _, stderr = transport.exec_command_wait( | ||
f"grep -q '# {identity_str}' ~/.bashrc && echo '# {identity_str}\nexport PATH=$HOME/bin:$PATH' >> ~/.bashrc" | ||
) | ||
|
||
if retval != 0: | ||
echo.echo_critical( | ||
f"Not able to set set the path $HOME/bin to your remote bashrc, try to do it manually.\n" | ||
f"Info: {stderr}" | ||
) | ||
|
||
echo.echo_success("The hq binary installed in remote") |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# -*- coding: utf-8 -*- | ||
from aiida.cmdline.params import arguments as core_arguments | ||
|
||
COMPUTER = core_arguments.COMPUTER |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Reusable options for CLI commands.""" | ||
|
||
import functools | ||
|
||
import click | ||
from aiida.cmdline.params import options as core_options | ||
from aiida.cmdline.params import types as core_types | ||
|
||
__all__ = ( | ||
"PROFILE", | ||
"VERBOSITY", | ||
"VERSION", | ||
) | ||
|
||
PROFILE = functools.partial( | ||
core_options.PROFILE, | ||
type=core_types.ProfileParamType(load_profile=True), | ||
expose_value=False, | ||
) | ||
|
||
# Clone the ``VERBOSITY`` option from ``aiida-core`` so the ``-v`` short flag can be removed, since that overlaps with | ||
# the flag of the ``VERSION`` option of this CLI. | ||
VERBOSITY = core_options.VERBOSITY.clone() | ||
VERBOSITY.args = ("--verbosity",) | ||
|
||
VERSION = core_options.OverridableOption( | ||
"-v", | ||
"--version", | ||
type=click.STRING, | ||
required=False, | ||
help="Select the version of the installed configuration.", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Command line interface `aiida-hq` for aiida-hyperqueue. | ||
The CLI implementation prototype from `aiida-pseudo`. | ||
""" | ||
|
||
import click | ||
|
||
from aiida.cmdline.groups.verdi import VerdiCommandGroup | ||
|
||
from .params import options | ||
|
||
|
||
class CustomVerdiCommandGroup(VerdiCommandGroup): | ||
"""Subclass of :class:`aiida.cmdline.groups.verdi.VerdiCommandGroup` for the CLI. | ||
This subclass overrides the verbosity option to use a custom one that removes the ``-v`` short version of the option | ||
since that is used by other options in this CLI and so would clash. | ||
""" | ||
|
||
@staticmethod | ||
def add_verbosity_option(cmd): | ||
"""Apply the ``verbosity`` option to the command, which is common to all subcommands.""" | ||
if cmd is not None and "verbosity" not in [param.name for param in cmd.params]: | ||
cmd = options.VERBOSITY()(cmd) | ||
|
||
return cmd | ||
|
||
|
||
@click.group( | ||
"aiida-hq", | ||
cls=CustomVerdiCommandGroup, | ||
context_settings={"help_option_names": ["-h", "--help"]}, | ||
) | ||
@options.VERBOSITY() | ||
@options.PROFILE() | ||
def cmd_root(): | ||
"""CLI for the ``aiida-hyperqueue`` plugin.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
from .root import cmd_root | ||
|
||
from aiida.cmdline.utils import echo | ||
|
||
from .params import arguments | ||
|
||
@cmd_root.group("server") | ||
def server_group(): | ||
"""Commands for interacting with the HQ server.""" | ||
|
||
|
||
@server_group.command("start") | ||
@arguments.COMPUTER() | ||
def cmd_start(computer): | ||
"""Start the HyperQueue server.""" | ||
|
||
with computer.get_transport() as transport: | ||
retval, _, _ = transport.exec_command_wait("hq server info") | ||
|
||
if retval == 0: | ||
echo.echo_info("server is already running!") | ||
return | ||
|
||
with computer.get_transport() as transport: | ||
# FIXME: It requires to sleep a bit after the nohup | ||
# see https://github.com/aiidateam/aiida-core/issues/6377 | ||
# but the sleep solution is incorrect!!! Since the sleep will always return 0. | ||
# this not rely on https://github.com/aiidateam/aiida-core/pull/6452 | ||
retval, _, stderr = transport.exec_command_wait( | ||
"nohup hq server start 1>$HOME/.hq-stdout 2>$HOME/.hq-stderr &", | ||
timeout=0.1, | ||
) | ||
|
||
if retval != 0: | ||
echo.echo_critical(f"unable to start the server: {stderr}") | ||
|
||
echo.echo_success("HQ server started!") | ||
|
||
@server_group.command("stop") | ||
@arguments.COMPUTER() | ||
def cmd_stop(computer): | ||
"""Start the HyperQueue server.""" | ||
|
||
with computer.get_transport() as transport: | ||
retval, _, _ = transport.exec_command_wait("hq server info") | ||
|
||
if retval != 0: | ||
echo.echo_info("server is not running!") | ||
return | ||
|
||
with computer.get_transport() as transport: | ||
retval, _, stderr = transport.exec_command_wait( | ||
"hq server stop" | ||
) | ||
|
||
if retval != 0: | ||
echo.echo_critical(f"unable to stop the server: {stderr}") | ||
|
||
echo.echo_success("HQ server stopped!") | ||
|
||
|
||
@server_group.command("info") | ||
@arguments.COMPUTER() | ||
def cmd_info(computer): | ||
"""Get information on the HyperQueue server.""" | ||
|
||
with computer.get_transport() as transport: | ||
retval, stdout, stderr = transport.exec_command_wait("hq server info") | ||
|
||
if retval != 0: | ||
echo.echo_critical( | ||
f"cannot obtain HyperQueue server information: {stderr}\n" | ||
"Try starting the server with `aiida-qe server start`." | ||
) | ||
|
||
echo.echo(stdout) |
Oops, something went wrong.