From 119acdf2218252c0010c18a99d7a7ef41c775c35 Mon Sep 17 00:00:00 2001 From: Christopher Moussa Date: Tue, 7 Jan 2025 15:23:56 -0800 Subject: [PATCH 1/4] association_table: add max_cores column Problem: The association_table has no way to track the max number of cores an association can have across all of their running jobs. This is needed in order to track the max number of resources an association has across all of their running jobs. Add a new column to the association_table called "max_cores". Update the schema version number for the flux-accounting database to account for the new column addition. --- src/bindings/python/fluxacct/accounting/__init__.py.in | 3 ++- src/bindings/python/fluxacct/accounting/create_db.py | 1 + t/t1017-update-db.t | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/bindings/python/fluxacct/accounting/__init__.py.in b/src/bindings/python/fluxacct/accounting/__init__.py.in index 9c5745fc2..3df9d3ffa 100644 --- a/src/bindings/python/fluxacct/accounting/__init__.py.in +++ b/src/bindings/python/fluxacct/accounting/__init__.py.in @@ -1,6 +1,6 @@ DB_DIR = "@X_LOCALSTATEDIR@/lib/flux/" DB_PATH = "@X_LOCALSTATEDIR@/lib/flux/FluxAccounting.db" -DB_SCHEMA_VERSION = 24 +DB_SCHEMA_VERSION = 25 # flux-accounting DB table column names ASSOCIATION_TABLE = [ @@ -17,6 +17,7 @@ ASSOCIATION_TABLE = [ "max_running_jobs", "max_active_jobs", "max_nodes", + "max_cores", "queues", "projects", "default_project", diff --git a/src/bindings/python/fluxacct/accounting/create_db.py b/src/bindings/python/fluxacct/accounting/create_db.py index 77a8d4820..127072c1a 100755 --- a/src/bindings/python/fluxacct/accounting/create_db.py +++ b/src/bindings/python/fluxacct/accounting/create_db.py @@ -105,6 +105,7 @@ def create_db( max_running_jobs int(11) DEFAULT 5 NOT NULL ON CONFLICT REPLACE DEFAULT 5, max_active_jobs int(11) DEFAULT 7 NOT NULL ON CONFLICT REPLACE DEFAULT 7, max_nodes int(11) DEFAULT 2147483647 NOT NULL ON CONFLICT REPLACE DEFAULT 2147483647, + max_cores int(11) DEFAULT 2147483647 NOT NULL ON CONFLICT REPLACE DEFAULT 2147483647, queues tinytext DEFAULT '' NOT NULL ON CONFLICT REPLACE DEFAULT '', projects tinytext DEFAULT '*' NOT NULL ON CONFLICT REPLACE DEFAULT '*', default_project tinytext DEFAULT '*' NOT NULL ON CONFLICT REPLACE DEFAULT '*', diff --git a/t/t1017-update-db.t b/t/t1017-update-db.t index 974aeaf75..069a75b7f 100755 --- a/t/t1017-update-db.t +++ b/t/t1017-update-db.t @@ -83,6 +83,7 @@ test_expect_success 'get all the columns of the updated table in the DB and chec max_running_jobs max_active_jobs max_nodes + max_cores queues projects default_project From 9f04c000647ccf4927e760c47b8e05c5fc2bb91d Mon Sep 17 00:00:00 2001 From: Christopher Moussa Date: Tue, 7 Jan 2025 15:27:19 -0800 Subject: [PATCH 2/4] add-user/edit-user: add max-cores as a field Problem: max_cores is an attribute for every row in the association_table, but it cannot be configured with the add-user and edit-user commands. Add max_cores as a configurable field in the add-user and edit-user commands. --- .../python/fluxacct/accounting/user_subcommands.py | 10 ++++++++-- src/cmd/flux-account-service.py | 2 ++ src/cmd/flux-account.py | 14 +++++++++++++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/bindings/python/fluxacct/accounting/user_subcommands.py b/src/bindings/python/fluxacct/accounting/user_subcommands.py index ddd3a2291..7835f1e4e 100755 --- a/src/bindings/python/fluxacct/accounting/user_subcommands.py +++ b/src/bindings/python/fluxacct/accounting/user_subcommands.py @@ -274,6 +274,7 @@ def add_user( max_running_jobs=5, max_active_jobs=7, max_nodes=2147483647, + max_cores=2147483647, queues="", projects="*", ): @@ -330,8 +331,9 @@ def add_user( INSERT INTO association_table (creation_time, mod_time, username, userid, bank, default_bank, shares, max_running_jobs, max_active_jobs, - max_nodes, queues, projects, default_project) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + max_nodes, max_cores, queues, projects, + default_project) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( int(time.time()), @@ -344,6 +346,7 @@ def add_user( max_running_jobs, max_active_jobs, max_nodes, + max_cores, queues, projects, default_project, @@ -417,6 +420,8 @@ def edit_user(conn, username, bank=None, **kwargs): can have at any given time. max_nodes: The man number of nodes an association can have across all of their running jobs. + max_cores: The max number of cores an association can have across all of their + running jobs. queues: A comma-separated list of all of the queues an association can run jobs under. projects: A comma-separated list of all of the projects an association can run jobs @@ -437,6 +442,7 @@ def edit_user(conn, username, bank=None, **kwargs): "max_running_jobs", "max_active_jobs", "max_nodes", + "max_cores", "queues", "projects", "default_project", diff --git a/src/cmd/flux-account-service.py b/src/cmd/flux-account-service.py index 64c8ef166..9db7056ac 100755 --- a/src/cmd/flux-account-service.py +++ b/src/cmd/flux-account-service.py @@ -179,6 +179,7 @@ def add_user(self, handle, watcher, msg, arg): max_running_jobs=msg.payload["max_running_jobs"], max_active_jobs=msg.payload["max_active_jobs"], max_nodes=msg.payload["max_nodes"], + max_cores=msg.payload["max_cores"], queues=msg.payload["queues"], projects=msg.payload["projects"], ) @@ -223,6 +224,7 @@ def edit_user(self, handle, watcher, msg, arg): max_running_jobs=msg.payload["max_running_jobs"], max_active_jobs=msg.payload["max_active_jobs"], max_nodes=msg.payload["max_nodes"], + max_cores=msg.payload["max_cores"], queues=msg.payload["queues"], projects=msg.payload["projects"], default_project=msg.payload["default_project"], diff --git a/src/cmd/flux-account.py b/src/cmd/flux-account.py index 6917ab75d..9478218b5 100755 --- a/src/cmd/flux-account.py +++ b/src/cmd/flux-account.py @@ -54,7 +54,7 @@ def add_view_user_arg(subparsers): metavar=( "CREATION_TIME,MOD_TIME,ACTIVE,USERNAME,USERID,BANK,DEFAULT_BANK," "SHARES,JOB_USAGE,FAIRSHARE,MAX_RUNNING_JOBS,MAX_ACTIVE_JOBS,MAX_NODES," - "QUEUES,PROJECTS,DEFAULT_PROJECT" + "MAX_CORES,QUEUES,PROJECTS,DEFAULT_PROJECT" ), ) @@ -108,6 +108,12 @@ def add_add_user_arg(subparsers): default=2147483647, metavar="MAX_NODES", ) + subparser_add_user.add_argument( + "--max-cores", + help="max number of cores a user can have across all of their running jobs", + default=2147483647, + metavar="MAX_CORES", + ) subparser_add_user.add_argument( "--queues", help="queues the user is allowed to run jobs in", @@ -191,6 +197,12 @@ def add_edit_user_arg(subparsers): default=None, metavar="MAX_NODES", ) + subparser_edit_user.add_argument( + "--max-cores", + help="max number of cores a user can have across all of their running jobs", + default=None, + metavar="MAX_CORES", + ) subparser_edit_user.add_argument( "--queues", help="queues the user is allowed to run jobs in", From 6b4210a2f735bfe3df1d590451a4795fa12a02d2 Mon Sep 17 00:00:00 2001 From: Christopher Moussa Date: Tue, 7 Jan 2025 15:28:57 -0800 Subject: [PATCH 3/4] t: add flux-accounting schema v24 to testsuite Problem: The flux-accounting database from before the addition of the "max_cores" column is not in the testsuite, but it should be a part of the update-db tests since the schema has changed. Add the flux-accounting DB schema version 24 to the testsuite. --- t/expected/test_dbs/FluxAccountingv0-40-0.db | Bin 0 -> 61440 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 t/expected/test_dbs/FluxAccountingv0-40-0.db diff --git a/t/expected/test_dbs/FluxAccountingv0-40-0.db b/t/expected/test_dbs/FluxAccountingv0-40-0.db new file mode 100644 index 0000000000000000000000000000000000000000..425483d28c9832adec62364999b7ccb3f3325142 GIT binary patch literal 61440 zcmeI*&2Q6Y90zc_Y2&0Nq?%M^FNYa1DGgN6^o=2eMAikVBD7tX9?&#-PU;jx;uOc3 z;53bageIgNcHy}Dq}?X*7wo#@w9~XZ+GUraUG_YW{lt&s$E6)3qTSb`_1oj$^E{vD zagvIkm5)EF*`B4hx^Bnx^hIS_QB~!Nt}BX?4}QpsEmP$8c(6@ZD*1U+*q3eQm2*vX zf)>vIP0#23m8>X^K>z{}fB*y_009U<00Izz00bcLY6MQCay0PttBH=N5P$##AOHaf zKmY;|fB*y_009WR9s-lJ8TkAE^%xAa0s;_#00bZa0SG_<0uX=z1R(H=1nB?&tNA|_ z@(&9HAOHafKmY;|fB*y_009U<00PHHU`o}LnQShbal2iQ{{26e|4qsNL4IL@00bZa z0SG_<0uX=z1Rwwb2teQ{3gpI8r?L&xxovo6qis#AsbY4|?cTDQo__?-{~yI+MVlc2 z0SG_<0uX=z1Rwwb2tWV=hZDf_|A+HJX$U|70uX=z1Rwwb2tWV=5P-l@6!4$_f2QQ0 z9mUq8%@BY91Rwwb2tWV=5P$##AOHaf3=6!Wrryz-?S9X*+{&-NfB)^3&!&<8Kdgj( z2tWV=5P$##AOHafKmY;|fWYw+K>q*n8&mWM0uX=z1Rwwb2tWV=5P$##ATTW8=l?%f z^3R9QU>^byfB*y_009U<00Izz00bZafg>+4p^hv0%$Z#NFD0ctRkGXpAGN1iYy2ne zt<*2-X6ieoL{5o|_KmoQZ3sXB0uX=z1V%3K)!QkpaO#x$@C)|EKH*pVgj?gQTh&Uv zs@E$Y)T(-TT%VoPhZc6y-eE^=$Fp`VSKr)PyI$G4pZjkR?W?|OB;J}14L zNA#?_9^dBc^^Ll|zFn)uuQZ#U{ki3rBNfk<%O(B8>eb42t*)2J;p8j#OxJQegSK4f zb@D5V#duA-l%&%eZQox1z(x6Mj8_8YlbaxCd z2*j=uqYe5Rjh7rv98`7K4jN z7na^%Tv_t#J!o~|W$}9N^sRnaDQUMjUHOO?i(wzh)TwR~#uY7at&ASx7zf{K3h3)gIVmmw5-QJzH?OBG|_quG~ zh`oIn<=CMV`M{E)A6oGI|HwbOp+X2i00Izz00bZa0SG_<0uX?}$OZ8H|H#QvAp{@* z0SG_<0uX=z1Rwwb2tZ)u0=bN>WG5ag`5$th-~a|d`+Pw-TW6pY-czZ`OraJl3*UD z9QsX4$UaW)&el*@=nvqb44l>AP6vEIT@yXDjne{^pEjdU-q~}V-6Y0XG znWR)i+GL|lf1f0GHA^_W|K6llC>GTR8k^)nd;Glk$j($`+DA@F^5OY;arz2VSDgPZ zc$guG6zRE`*(#h!9&rA=fKXVRzdSou`uG67rnH<*7uHTExw6z--^?frJChHtXSBlU z)9NFW=?bTW&1^(gI3aOmXT@HS?L%vHY{y_3%Eq4M(wt}#L`cEr8sY;Vw95VRpv&^I$!g zcznhbgL0DOZOhq-HCA{J;StCkI5~JlD_u*c3&zEyyN*m;|3M(5Mc!7rF24cg$F&EW z6I$WoMfK4oW+X2qKbnrX$;(L8c*3~IHZT(p2HSk*#HU}vCjs_+mL$+%4#p!%-$wR% zf+x96ZL{am7Z3KNQqMCxd%}2 Date: Tue, 7 Jan 2025 12:18:09 -0800 Subject: [PATCH 4/4] plugin: add max_cores attribute per association Problem: The priority plugin does not store max_cores information per-association in the plugin, which it will need in order to do resource tracking across all of an association's running jobs. Add max_cores to the list of information sent to the priority plugin per-association. Add max_cores as an attribute to the Association class so that it can be stored and accessed per-association in the plugin. Set a default for this attribute when creating a temporary association in the plugin. Edit the accounting.cpp unit test to account for the addition of the new max_cores attribute. --- src/cmd/flux-account-priority-update.py | 4 +++- src/plugins/accounting.cpp | 5 +++-- src/plugins/accounting.hpp | 1 + src/plugins/mf_priority.cpp | 9 ++++++--- src/plugins/test/accounting_test01.cpp | 9 +++++---- 5 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/cmd/flux-account-priority-update.py b/src/cmd/flux-account-priority-update.py index 5f6051048..fba29cbc3 100755 --- a/src/cmd/flux-account-priority-update.py +++ b/src/cmd/flux-account-priority-update.py @@ -78,7 +78,7 @@ def bulk_update(path): for row in cur.execute( """SELECT userid, bank, default_bank, fairshare, max_running_jobs, max_active_jobs, - queues, active, projects, default_project, max_nodes + queues, active, projects, default_project, max_nodes, max_cores FROM association_table""" ): # create a JSON payload with the results of the query @@ -94,6 +94,7 @@ def bulk_update(path): "projects": str(row[8]), "def_project": str(row[9]), "max_nodes": int(row[10]), + "max_cores": int(row[11]), } bulk_user_data.append(single_user_data) @@ -159,6 +160,7 @@ def send_instance_owner_info(): "projects": "*", "def_project": "*", "max_nodes": 1000000, + "max_cores": 1000000, } flux.Flux().rpc( diff --git a/src/plugins/accounting.cpp b/src/plugins/accounting.cpp index 7a02638c8..7b463a74f 100644 --- a/src/plugins/accounting.cpp +++ b/src/plugins/accounting.cpp @@ -85,8 +85,8 @@ json_t* Association::to_json () const } // 'o' steals the reference for both held_job_ids and user_queues - json_t *u = json_pack ("{s:s, s:f, s:i, s:i, s:i, s:i," - " s:o, s:o, s:i, s:o, s:s, s:i, s:i}", + json_t *u = json_pack ("{s:s, s:f, s:i, s:i, s:i, s:i, s:o," + " s:o, s:i, s:o, s:s, s:i, s:i, s:i}", "bank_name", bank_name.c_str (), "fairshare", fairshare, "max_run_jobs", max_run_jobs, @@ -99,6 +99,7 @@ json_t* Association::to_json () const "projects", user_projects, "def_project", def_project.c_str (), "max_nodes", max_nodes, + "max_cores", max_cores, "active", active); if (!u) diff --git a/src/plugins/accounting.hpp b/src/plugins/accounting.hpp index 4cbec507a..f83884db3 100644 --- a/src/plugins/accounting.hpp +++ b/src/plugins/accounting.hpp @@ -45,6 +45,7 @@ class Association { std::vector projects; // list of accessible projects std::string def_project; // default project int max_nodes; // max num nodes across all running jobs + int max_cores; // max num cores across all running jobs // methods json_t* to_json () const; // convert object to JSON string diff --git a/src/plugins/mf_priority.cpp b/src/plugins/mf_priority.cpp index e1825d04e..36b5853cf 100644 --- a/src/plugins/mf_priority.cpp +++ b/src/plugins/mf_priority.cpp @@ -187,6 +187,7 @@ static void add_special_association (flux_plugin_t *p, flux_t *h, int userid) a->active = 1; a->held_jobs = std::vector(); a->max_nodes = INT16_MAX; + a->max_cores = INT16_MAX; if (flux_jobtap_job_aux_set (p, FLUX_JOBTAP_CURRENT_JOB, @@ -277,7 +278,7 @@ static void rec_update_cb (flux_t *h, void *arg) { char *bank, *def_bank, *assoc_queues, *assoc_projects, *def_project = NULL; - int uid, max_running_jobs, max_active_jobs, max_nodes = 0; + int uid, max_running_jobs, max_active_jobs, max_nodes, max_cores = 0; double fshare = 0.0; json_t *data, *jtemp = NULL; json_error_t error; @@ -301,7 +302,7 @@ static void rec_update_cb (flux_t *h, if (json_unpack_ex (el, &error, 0, "{s:i, s:s, s:s, s:F, s:i," - " s:i, s:s, s:i, s:s, s:s, s:i}", + " s:i, s:s, s:i, s:s, s:s, s:i, s:i}", "userid", &uid, "bank", &bank, "def_bank", &def_bank, @@ -312,7 +313,8 @@ static void rec_update_cb (flux_t *h, "active", &active, "projects", &assoc_projects, "def_project", &def_project, - "max_nodes", &max_nodes) < 0) + "max_nodes", &max_nodes, + "max_cores", &max_cores) < 0) flux_log (h, LOG_ERR, "mf_priority unpack: %s", error.text); Association *b; @@ -325,6 +327,7 @@ static void rec_update_cb (flux_t *h, b->active = active; b->def_project = def_project; b->max_nodes = max_nodes; + b->max_cores = max_cores; // split queues comma-delimited string and add it to b->queues vector b->queues.clear (); diff --git a/src/plugins/test/accounting_test01.cpp b/src/plugins/test/accounting_test01.cpp index 5d56b65d1..b444b1846 100644 --- a/src/plugins/test/accounting_test01.cpp +++ b/src/plugins/test/accounting_test01.cpp @@ -55,7 +55,8 @@ void add_user_to_map ( a.active, a.projects, a.def_project, - a.max_nodes + a.max_nodes, + a.max_cores }; } @@ -67,9 +68,9 @@ void initialize_map ( std::map> &users) { Association user1 = {"bank_A", 0.5, 5, 0, 7, 0, {}, - {}, 0, 1, {"*"}, "*", 2147483647}; + {}, 0, 1, {"*"}, "*", 2147483647, 2147483647}; Association user2 = {"bank_A", 0.5, 5, 0, 7, 0, {}, - {}, 0, 1, {"*"}, "*", 2147483647}; + {}, 0, 1, {"*"}, "*", 2147483647, 2147483647}; add_user_to_map (users, 1001, "bank_A", user1); users_def_bank[1001] = "bank_A"; @@ -271,7 +272,7 @@ static void test_check_map_dne_true () users_def_bank.clear (); Association tmp_user = {"DNE", 0.5, 5, 0, 7, 0, {}, - {}, 0, 1, {"*"}, "*", 2147483647}; + {}, 0, 1, {"*"}, "*", 2147483647, 2147483647}; add_user_to_map (users, 9999, "DNE", tmp_user); users_def_bank[9999] = "DNE";