export-db: refactor to allow custom formatting

Problem: The export_db_info() function statically defines the columns extracted from both the bank_table and the association_table, but there could definitely be a case where the user extracting DB information wants to customize the format of the data they are retrieving. Refactor the export_db_info() function to allow customization of which fields to extract from both association_table and bank_table through optional arguments. Dynamically build each SQLite query using these custom fields and include which columns are being used in the header of the .csv files.
flux-framework · Nov 7, 2024 · bb5533b · bb5533b
1 parent a29e5a1
commit bb5533b
Show file tree

Hide file tree

Showing 3 changed files with 69 additions and 23 deletions.
diff --git a/src/bindings/python/fluxacct/accounting/db_info_subcommands.py b/src/bindings/python/fluxacct/accounting/db_info_subcommands.py
@@ -10,45 +10,65 @@
 # SPDX-License-Identifier: LGPL-3.0
 ###############################################################
 import csv
+import sqlite3
 
+import fluxacct.accounting
 from fluxacct.accounting import bank_subcommands as b
 from fluxacct.accounting import user_subcommands as u
+from fluxacct.accounting import sql_util as sql
 
 
-def export_db_info(conn, users=None, banks=None):
+def export_db_info(conn, users=None, banks=None, bank_cols=None, user_cols=None):
+    """
+    Export information from association_table and bank_table and place them into .csv
+    files. If the "users" or "banks" optional arguments are not specified, "users.csv"
+    and "banks.csv" will be created and placed in the current working directory.
+
+    Args:
+        users: an optional specified path to a .csv file to hold all user information.
+
+        banks: an optional specified path to a .csv file to hold all bank information.
+    """
+    # use all column names if none are passed in
+    bank_cols = bank_cols or fluxacct.accounting.BANK_TABLE
+    user_cols = user_cols or fluxacct.accounting.ASSOCIATION_TABLE
     try:
+        # validate custom columns if any were passed in; execute queries to get DB info
         cur = conn.cursor()
-        select_users_stmt = """
-            SELECT username, userid, bank, shares, max_running_jobs, max_active_jobs,
-            max_nodes, queues FROM association_table
-        """
-        cur.execute(select_users_stmt)
-        table = cur.fetchall()
+        sql.validate_columns(user_cols, fluxacct.accounting.ASSOCIATION_TABLE)
+        select_stmt = f"SELECT {', '.join(user_cols)} FROM association_table"
+        cur.execute(select_stmt)
+        association_table = cur.fetchall()
+        association_table_headers = [description[0] for description in cur.description]
+
+        sql.validate_columns(bank_cols, fluxacct.accounting.BANK_TABLE)
+        select_stmt = f"SELECT {', '.join(bank_cols)} FROM bank_table"
+        cur.execute(select_stmt)
+        bank_table = cur.fetchall()
+        bank_table_headers = [description[0] for description in cur.description]
 
-        # open a .csv file for writing
-        users_filepath = users if users else "users.csv"
-        users_file = open(users_filepath, "w")
+        # open .csv files for writing
+        users_file = open(users if users else "users.csv", "w")
         with users_file:
             writer = csv.writer(users_file)
-
-            for row in table:
+            writer.writerow(association_table_headers)
+            for row in association_table:
                 writer.writerow(row)
 
-        select_banks_stmt = """
-            SELECT bank, parent_bank, shares FROM bank_table
-        """
-        cur.execute(select_banks_stmt)
-        table = cur.fetchall()
-
-        banks_filepath = banks if banks else "banks.csv"
-        banks_file = open(banks_filepath, "w")
+        banks_file = open(banks if banks else "banks.csv", "w")
         with banks_file:
             writer = csv.writer(banks_file)
-
-            for row in table:
+            writer.writerow(bank_table_headers)
+            for row in bank_table:
                 writer.writerow(row)
+    except ValueError as err:
+        raise ValueError(f"export-db: {err}")
     except IOError as err:
-        print(err)
+        raise IOError(f"export-db: {err}")
+    except sqlite3.OperationalError as exc:
+        raise sqlite3.OperationalError(
+            f"export-db: an sqlite3.OperationalError occurred: {exc}"
+        )
 
 
 def populate_db(conn, users=None, banks=None):

diff --git a/src/cmd/flux-account-service.py b/src/cmd/flux-account-service.py
@@ -548,13 +548,25 @@ def export_db(self, handle, watcher, msg, arg):
                 self.conn,
                 msg.payload["users"],
                 msg.payload["banks"],
+                msg.payload["bank_fields"].split(",")
+                if msg.payload.get("bank_fields")
+                else None,
+                msg.payload["user_fields"].split(",")
+                if msg.payload.get("user_fields")
+                else None,
             )
 
             payload = {"export_db": val}
 
             handle.respond(msg, payload)
         except KeyError as exc:
             handle.respond_error(msg, 0, f"missing key in payload: {exc}")
+        except IOError as exc:
+            handle.respond_error(msg, 0, f"an IOError occurred: {exc}")
+        except ValueError as exc:
+            handle.respond_error(msg, 0, f"{exc}")
+        except sqlite3.OperationalError as exc:
+            handle.respond_error(msg, 0, f"{exc}")
         except Exception as exc:
             handle.respond_error(
                 msg, 0, f"a non-OSError exception was caught: {str(exc)}"

diff --git a/src/cmd/flux-account.py b/src/cmd/flux-account.py
@@ -582,6 +582,20 @@ def add_export_db_arg(subparsers):
     subparser.add_argument(
         "-b", "--banks", help="path to a .csv file containing bank information"
     )
+    subparser.add_argument(
+        "--bank-fields",
+        help="list of fields from bank_table to include",
+        metavar="BANK_ID,BANK,ACTIVE,PARENT_BANK,SHARES,JOB_USAGE",
+    )
+    subparser.add_argument(
+        "--user-fields",
+        help="list of fields from association_table to include",
+        metavar=(
+            "CREATION_TIME,MOD_TIME,ACTIVE,USERNAME,USERID,BANK,DEFAULT_BANK,"
+            "SHARES,JOB_USAGE,FAIRSHARE,MAX_RUNNING_JOBS,MAX_ACTIVE_JOBS,MAX_NODES,"
+            "QUEUES,PROJECTS,DEFAULT_PROJECT"
+        ),
+    )
 
 
 def add_pop_db_arg(subparsers):