Skip to content

Commit

Permalink
Merge pull request #1055 from trws/first-nodex
Browse files Browse the repository at this point in the history
Add first nodex policy
  • Loading branch information
mergify[bot] authored Sep 6, 2023
2 parents 5ede1d4 + 8fce829 commit c619fbe
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 172 deletions.
4 changes: 3 additions & 1 deletion resource/policies/dfu_match_policy_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,13 @@ std::shared_ptr<dfu_match_cb_t> create_match_cb (const std::string &policy)
std::shared_ptr<dfu_match_cb_t> matcher = nullptr;

try {
if (policy == FIRST_MATCH) {
if (policy == FIRST_MATCH || policy == FIRST_NODEX_MATCH) {
std::shared_ptr<high_first_t> ptr
= std::make_shared<high_first_t> ();
ptr->add_score_factor (std::string ("node"), 1, 10000);
ptr->set_stop_on_k_matches (1);
if (policy == FIRST_NODEX_MATCH)
ptr->add_exclusive_resource_type ("node");
matcher = ptr;
} else if (policy == HIGH_ID_FIRST) {
matcher = std::make_shared<high_first_t> ();
Expand Down
1 change: 1 addition & 0 deletions resource/policies/dfu_match_policy_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ namespace Flux {
namespace resource_model {

const std::string FIRST_MATCH = "first";
const std::string FIRST_NODEX_MATCH = "firstnodex";
const std::string HIGH_ID_FIRST = "high";
const std::string LOW_ID_FIRST = "low";
const std::string LOW_NODE_FIRST = "lonode";
Expand Down
251 changes: 126 additions & 125 deletions resource/utilities/resource-query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,132 +54,133 @@ static const struct option longopts[] = {

static void usage (int code)
{
std::cerr <<
"usage: resource-query [OPTIONS...]\n"
"\n"
"Command-line utility that takes in an HPC resource request written in\n"
"Flux's Canonical Job Specification (or simply a jobspec) (RFC 14) and\n"
"selects the best-matching compute and other resources in accordance\n"
"with a selection policy.\n"
"\n"
"Read in a resource-graph generation recipe written in the GRUG format\n"
"and populate the resource-graph data store representing the compute and\n"
"other HPC resources and their relationships (RFC 4).\n"
"\n"
"Provide a simple command-line interface (cli) to allow users to allocate\n"
"or reserve the resource set in this resource-graph data store \n"
"using a jobspec as an input.\n"
"Traverse the resource graph in a predefined order for resource selection.\n"
"Currently only support one traversal type: depth-first traversal on the\n"
"dominant subsystem and up-walk traversal on one or more auxiliary \n"
"subsystems.\n"
"\n"
"OPTIONS allow for using a predefined matcher that is configured\n"
"to use a different set of subsystems as its dominant and/or auxiliary\n"
"ones to perform the matches on.\n"
"\n"
"OPTIONS also allow for instantiating a different resource-matching\n"
"selection policy--e.g., select resources with high or low IDs first.\n"
"\n"
"OPTIONS allow for exporting the filtered graph of the used matcher\n"
"in a selected graph format at the end of the cli session.\n"
"\n"
"To see cli commands, type in \"help\" in the cli: i.e.,\n"
" % resource-query> help\n"
"\n"
"\n"
"\n"
"OPTIONS:\n"
" -h, --help\n"
" Display this usage information\n"
"\n"
" -L, --load-file=filepath\n"
" Input file from which to load the resource graph data store\n"
" (default=conf/default)\n"
"\n"
" -f, --load-format=<grug|hwloc|jgf|rv1exec>\n"
" Format of the load file (default=grug)\n"
"\n"
" -W, --load-allowlist=<resource1[,resource2[,resource3...]]>\n"
" Allowlist of resource types to be loaded\n"
" Resources that are not included in this list will be filtered out\n"
"\n"
" -S, --match-subsystems="
std::cerr << R"(
usage: resource-query [OPTIONS...]
Command-line utility that takes in an HPC resource request written in
Flux's Canonical Job Specification (or simply a jobspec) (RFC 14) and
selects the best-matching compute and other resources in accordance
with a selection policy.
Read in a resource-graph generation recipe written in the GRUG format
and populate the resource-graph data store representing the compute and
other HPC resources and their relationships (RFC 4).
Provide a simple command-line interface (cli) to allow users to allocate
or reserve the resource set in this resource-graph data store
using a jobspec as an input.
Traverse the resource graph in a predefined order for resource selection.
Currently only support one traversal type: depth-first traversal on the
dominant subsystem and up-walk traversal on one or more auxiliary
subsystems.
OPTIONS allow for using a predefined matcher that is configured
to use a different set of subsystems as its dominant and/or auxiliary
ones to perform the matches on.
OPTIONS also allow for instantiating a different resource-matching
selection policy--e.g., select resources with high or low IDs first.
OPTIONS allow for exporting the filtered graph of the used matcher
in a selected graph format at the end of the cli session.
To see cli commands, type in "help" in the cli: i.e.,
% resource-query> help
OPTIONS:
-h, --help
Display this usage information
-L, --load-file=filepath
Input file from which to load the resource graph data store
(default=conf/default)
-f, --load-format=<grug|hwloc|jgf|rv1exec>
Format of the load file (default=grug)
-W, --load-allowlist=<resource1[,resource2[,resource3...]]>
Allowlist of resource types to be loaded
Resources that are not included in this list will be filtered out
-S, --match-subsystems="
"<CA|IBA|IBBA|PFS1BA|PA|C+IBA|C+PFS1BA|C+PA|IB+IBBA|"
"C+P+IBA|VA|V+PFS1BA|ALL>\n"
" Set the predefined matcher to use. Available matchers are:\n"
" CA: Containment Aware\n"
" IBA: InfiniBand connection-Aware\n"
" IBBA: InfiniBand Bandwidth-Aware\n"
" PFS1BA: Parallel File System 1 Bandwidth-aware\n"
" PA: Power-Aware\n"
" C+IBA: Containment- and InfiniBand connection-Aware\n"
" C+PFS1BA: Containment- and PFS1 Bandwidth-Aware\n"
" C+PA: Containment- and Power-Aware\n"
" IB+IBBA: InfiniBand connection and Bandwidth-Aware\n"
" C+P+IBA: Containment-, Power- and InfiniBand connection-Aware\n"
" VA: Virtual Hierarchy-Aware\n"
" V+PFS1BA: Virtual Hierarchy and PFS1 Bandwidth-Aware \n"
" ALL: Aware of everything.\n"
" (default=CA).\n"
"\n"
" -P, --match-policy=<low|high|lonode|hinode|lonodex|hinodex|first|locality|variation>\n"
" Set the resource match selection policy. Available policies are:\n"
" low: Select resources with low ID first\n"
" high: Select resources with high ID first\n"
" lonode: Select resources with lowest node ID first, \n"
" low ID first otherwise (e.g., node-local resource types) \n"
" hinode: Select resources with highest node ID first, \n"
" high ID first otherwise (e.g., node-local resource types) \n"
" lonodex: Same as lonode except each node is exclusively allocated \n"
" hinodex: Same as hinode except each node is exclusively allocated \n"
" first: Select the first matching resources and stop the search\n"
" locality: Select contiguous resources first in their ID space\n"
" variation: Allocate resources based on performance classes.\n"
" (perf_class must be set using set-property).\n"
" (default=first).\n"
"\n"
" -F, --match-format=<simple|pretty_simple|jgf|rlite|rv1|rv1_nosched>\n"
" Specify the emit format of the matched resource set.\n"
" (default=simple).\n"
"\n"
" -p, --prune-filters=<HL-resource1:LL-resource1[,HL-resource2:LL-resource2...]...]>\n"
" Install a planner-based filter at each High-Level (HL) resource\n"
" vertex which tracks the state of the Low-Level (LL) resources\n"
" in aggregate, residing under its subtree. If a jobspec requests\n"
" 1 node with 4 cores, and the visiting compute-node vertex has\n"
" only a total of 2 available cores in aggregate at its\n"
" subtree, this filter allows the traverser to prune a further descent\n"
" to accelerate the search.\n"
" Use the ALL keyword for HL-resource if you want LL-resource to be\n"
" tracked at all of the HL-resource vertices. Examples:\n"
" rack:node,node:core\n"
" ALL:core,cluster:node,rack:node\n"
" (default=ALL:core).\n"
"\n"
" -g, --graph-format=<dot|graphml>\n"
" Specify the graph format of the output file\n"
" (default=dot).\n"
"\n"
" -r, --reserve-vtx-vec=<size>\n"
" Reserve the graph vertex size to optimize resource graph loading.\n"
" The size value must be a non-zero integer up to 2000000.\n"
"\n"
" -e, --elapse-time\n"
" Print the elapse time per scheduling operation.\n"
"\n"
" -d, --disable-prompt\n"
" Don't print the prompt.\n"
"\n"
" -o, --graph-output=<basename>\n"
" Set the basename of the graph output file\n"
" For AT&T Graphviz dot, <basename>.dot\n"
" For GraphML, <basename>.graphml.\n"
"\n"
" -t, --test-output=<filename>\n"
" Set the output filename where allocated or reserved resource\n"
" information is stored into.\n"
"\n";
"C+P+IBA|VA|V+PFS1BA|ALL>
Set the predefined matcher to use. Available matchers are:
CA: Containment Aware
IBA: InfiniBand connection-Aware
IBBA: InfiniBand Bandwidth-Aware
PFS1BA: Parallel File System 1 Bandwidth-aware
PA: Power-Aware
C+IBA: Containment- and InfiniBand connection-Aware
C+PFS1BA: Containment- and PFS1 Bandwidth-Aware
C+PA: Containment- and Power-Aware
IB+IBBA: InfiniBand connection and Bandwidth-Aware
C+P+IBA: Containment-, Power- and InfiniBand connection-Aware
VA: Virtual Hierarchy-Aware
V+PFS1BA: Virtual Hierarchy and PFS1 Bandwidth-Aware
ALL: Aware of everything.
(default=CA).
-P, --match-policy=<low|high|lonode|hinode|lonodex|hinodex|first|firstnodex|locality|variation>
Set the resource match selection policy. Available policies are:
low: Select resources with low ID first
high: Select resources with high ID first
lonode: Select resources with lowest node ID first,
low ID first otherwise (e.g., node-local resource types)
hinode: Select resources with highest node ID first,
high ID first otherwise (e.g., node-local resource types)
lonodex: Same as lonode except each node is exclusively allocated
hinodex: Same as hinode except each node is exclusively allocated
first: Select the first matching resources and stop the search
firstnodex: Select the first matching resources, node exclusive,
and stop the search
locality: Select contiguous resources first in their ID space
variation: Allocate resources based on performance classes.
(perf_class must be set using set-property).
(default=first).
-F, --match-format=<simple|pretty_simple|jgf|rlite|rv1|rv1_nosched>
Specify the emit format of the matched resource set.
(default=simple).
-p, --prune-filters=<HL-resource1:LL-resource1[,HL-resource2:LL-resource2...]...]>
Install a planner-based filter at each High-Level (HL) resource
vertex which tracks the state of the Low-Level (LL) resources
in aggregate, residing under its subtree. If a jobspec requests
1 node with 4 cores, and the visiting compute-node vertex has
only a total of 2 available cores in aggregate at its
subtree, this filter allows the traverser to prune a further descent
to accelerate the search.
Use the ALL keyword for HL-resource if you want LL-resource to be
tracked at all of the HL-resource vertices. Examples:
rack:node,node:core
ALL:core,cluster:node,rack:node
(default=ALL:core).
-g, --graph-format=<dot|graphml>
Specify the graph format of the output file
(default=dot).
-r, --reserve-vtx-vec=<size>
Reserve the graph vertex size to optimize resource graph loading.
The size value must be a non-zero integer up to 2000000.
-e, --elapse-time
Print the elapse time per scheduling operation.
-d, --disable-prompt
Don't print the prompt.
-o, --graph-output=<basename>
Set the basename of the graph output file
For AT&T Graphviz dot, <basename>.dot
For GraphML, <basename>.graphml.
-t, --test-output=<filename>
Set the output filename where allocated or reserved resource
information is stored into.
)";
exit (code);
}

Expand Down
79 changes: 33 additions & 46 deletions t/t3033-resource-nodex.t
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,36 @@ exp_dir="${SHARNESS_TEST_SRCDIR}/data/resource/expected/nodex"
grugs="${SHARNESS_TEST_SRCDIR}/data/resource/grugs/small.graphml"
query="../../resource/utilities/resource-query"

#
# Takes policy and cmd outfile prefix
run_tests_with_policy() {
pol=$1
prefix=$2

cmds001="${cmd_dir}/cmds01.in"
test001_desc="allocate 7 jobs with node-level constraint (pol=$pol)"
test_expect_success "${test001_desc}" '
sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds001} > cmds001 &&
${query} -L ${grugs} -S CA -P $pol -t ${prefix}1.R.out < cmds001 &&
test_cmp ${prefix}1.R.out ${exp_dir}/${prefix}1.R.out
'

cmds002="${cmd_dir}/cmds02.in"
test002_desc="allocate 7 jobs with no node-level constraint (pol=$pol)"
test_expect_success "${test002_desc}" '
sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds002} > cmds002 &&
${query} -L ${grugs} -S CA -P $pol -t ${prefix}2.R.out < cmds002 &&
test_cmp ${prefix}2.R.out ${exp_dir}/${prefix}2.R.out
'

cmds003="${cmd_dir}/cmds03.in"
test003_desc="match allocate 7 jobs -- last fails (pol=$pol)"
test_expect_success "${test003_desc}" '
sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds003} > cmds003 &&
${query} -L ${grugs} -S CA -P $pol -t ${prefix}3.R.out < cmds003 &&
test_cmp ${prefix}3.R.out ${exp_dir}/${prefix}3.R.out
'
}

# Selection Policy -- High node first with node exclusivity (-P hinodex)
# Selection behavior is identical to hinode except that
# it marks each selected node as exclusive even if the
Expand All @@ -25,29 +54,7 @@ query="../../resource/utilities/resource-query"
# again all 36 cores from the current available highest node.
#

cmds001="${cmd_dir}/cmds01.in"
test001_desc="allocate 7 jobs with node-level constraint (pol=hinodex)"
test_expect_success "${test001_desc}" '
sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds001} > cmds001 &&
${query} -L ${grugs} -S CA -P hinodex -t 001.R.out < cmds001 &&
test_cmp 001.R.out ${exp_dir}/001.R.out
'

cmds002="${cmd_dir}/cmds02.in"
test002_desc="allocate 7 jobs with no node-level constraint (pol=hinodex)"
test_expect_success "${test002_desc}" '
sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds002} > cmds002 &&
${query} -L ${grugs} -S CA -P hinodex -t 002.R.out < cmds002 &&
test_cmp 002.R.out ${exp_dir}/002.R.out
'

cmds003="${cmd_dir}/cmds03.in"
test003_desc="match allocate 7 jobs -- last fails (pol=hinodex)"
test_expect_success "${test003_desc}" '
sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds003} > cmds003 &&
${query} -L ${grugs} -S CA -P hinodex -t 003.R.out < cmds003 &&
test_cmp 003.R.out ${exp_dir}/003.R.out
'
run_tests_with_policy hinodex 00

#
# Selection Policy -- Low node first with node exclusivity (-P lonodex)
Expand All @@ -65,28 +72,8 @@ test_expect_success "${test003_desc}" '
# again all 36 cores from the current available lowest node.
#

cmds011="${cmd_dir}/cmds01.in"
test011_desc="allocate 7 jobs with node-level constraint (pol=lonodex)"
test_expect_success "${test011_desc}" '
sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds011} > cmds011 &&
${query} -L ${grugs} -S CA -P lonodex -t 011.R.out < cmds011 &&
test_cmp 011.R.out ${exp_dir}/011.R.out
'

cmds012="${cmd_dir}/cmds02.in"
test012_desc="allocate 7 jobs with no node-level constraint (pol=lonodex)"
test_expect_success "${test012_desc}" '
sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds012} > cmds012 &&
${query} -L ${grugs} -S CA -P lonodex -t 012.R.out < cmds012 &&
test_cmp 012.R.out ${exp_dir}/012.R.out
'
run_tests_with_policy lonodex 01

cmds013="${cmd_dir}/cmds03.in"
test013_desc="match allocate 7 jobs -- last fails (pol=lonodex)"
test_expect_success "${test013_desc}" '
sed "s~@TEST_SRCDIR@~${SHARNESS_TEST_SRCDIR}~g" ${cmds013} > cmds013 &&
${query} -L ${grugs} -S CA -P lonodex -t 013.R.out < cmds013 &&
test_cmp 013.R.out ${exp_dir}/013.R.out
'
run_tests_with_policy firstnodex 00

test_done

0 comments on commit c619fbe

Please sign in to comment.