diff --git a/docs/manual/developer/05_tools_and_utilities.md b/docs/manual/developer/05_tools_and_utilities.md index 7c6817b7594..fcb0adb7f32 100644 --- a/docs/manual/developer/05_tools_and_utilities.md +++ b/docs/manual/developer/05_tools_and_utilities.md @@ -743,3 +743,24 @@ An example of how to execute the script to generate roles locally: $ ./build_product rhel9 $ ./utils/ansible_playbook_to_role.py --dry-run output ``` + +### `utils/find_unused_rules.py` – List Rules That Are Not Used In Any Data stream + +This script will output rules are not in any data streams. +To prevent false positives the script will not run if the number of build datas treams less than the total number of products in the project. +The script assumes that `./build_project --derivatives` was executed before the script is used. +This script does require that `./utils/rule_dir_json.py` was executed before this script is used as well. + +This script works by comparing rules in the data streams to the rules in the `rule_dirs.json` file. +The script works by adding off the rule ids from the data streams to a `set`. +Then the script converts the keys of `rule_dirs.json` to a set. +The set of rules in the data stream is subtracted to from the set of rules in `rule_dirs.json`. +The difference is then output to the user. + +Example usage: + +```bash +$ ./build_product --derivatives +$ ./utils/rule_dir_json.py +$ ./utils/find_unused_rules.py +``` diff --git a/utils/find_unused_rules.py b/utils/find_unused_rules.py new file mode 100755 index 00000000000..45427554837 --- /dev/null +++ b/utils/find_unused_rules.py @@ -0,0 +1,92 @@ +#!/usr/bin/python3 +import argparse +import json +import pathlib +import sys +import xml.etree.ElementTree as ET + +from ssg.constants import OSCAP_RULE, XCCDF12_NS + +SSG_ROOT = pathlib.Path(__file__).resolve().parent.parent +BUILD_DIR = SSG_ROOT.joinpath("build") +RULE_DIR_JSON = BUILD_DIR.joinpath("rule_dirs.json") +EPILOG = """ +This script lists rules that are not used in any data streams. +It requires that all products (and derivatives) are built. +To do this run ./build_product --derivatives +The script has the following return codes: + 0 - All rules are used in the data streams, + 1 - Some rules are not used in the data streams, + 2 - Not all products are built, and + 3 - rule_dirs.json does not exist. +""" + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="List rules that are not used in any " + "data streams." + "Note that script requires that all products " + "(and derivatives) are built.", epilog=EPILOG) + parser.add_argument("--root", + help="Root directory of the SSG git repository", default=SSG_ROOT) + parser.add_argument("--json", help="Path to rule_dir.json file", + default=RULE_DIR_JSON.absolute()) + parser.add_argument("--force", + help="Force the operation even if all products are not built", + action="store_true") + return parser.parse_args() + + +def _get_ds_rules(datastream_files): + ds_rules = set() + for ds in datastream_files: + root = ET.parse(ds).getroot() + root_elements = list(root.findall(".//{%s}Rule" % XCCDF12_NS)) + for rule in root_elements: + rule_id = rule.get("id").removeprefix(OSCAP_RULE) + ds_rules.add(rule_id) + return ds_rules + + +def _get_product_count(products_path): + products_count = 0 + for product in products_path.iterdir(): + if product.is_dir() and product.name != "example": + products_count += 1 + return products_count + + +def main() -> int: + args = _parse_args() + root_path = pathlib.Path(args.root) + products_path = root_path.joinpath("products") + build_dir = root_path.joinpath("build") + rule_dir_path = pathlib.Path(args.json) + if not rule_dir_path.exists(): + print(f"Rule directory {rule_dir_path} does not exist.", file=sys.stderr) + print("Hint run: ./utils/rule_dir_json.py", file=sys.stderr) + return 3 + rule_dir_json = json.loads(rule_dir_path.read_text()) + all_rules = set(rule_dir_json.keys()) + products_count = _get_product_count(products_path) + datastream_files = list(build_dir.glob("ssg-*-ds.xml")) + ds_products = set() + for ds in datastream_files: + ds_products.add(ds.name.split("-")[1]) + if products_count > len(datastream_files): + print("Not all products are built, cowardly refusing to continue.", file=sys.stderr) + print(f"Products: {products_count}, data streams: {len(datastream_files)}", file=sys.stderr) + print("Hint: run ./build_product --derivatives", file=sys.stderr) + return 2 + ds_rules = _get_ds_rules(datastream_files) + disuse_rules = all_rules - ds_rules + if not disuse_rules: + print("All rules are used in the datastream files.") + return 0 + print("The following rules are not used in ANY of the provided data stream files:") + print("\n".join(disuse_rules)) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main())