[feat] Guideline statistics (#4365)

The new `Guideline statistics` tab on the Statistics page can list all rules for the selected guidelines. The user can select multiple guidelines (but currently the only one is `sei-cert` and it is the default). The table can show the checker statistics that are related to the specified guideline rule. Rules may connect to more than one checker or may not have any checker. The checker statistics are calculated for runs that are selected (or for all runs if no run selected) in the report filter. It can show guideline name, guideline rule, checker name, checker severity, checker status, number of closed and outstanding reports. The status informs the user about how many runs the given checker was enabled or disabled. Closed and outstanding report counts depend on review and detection status. New config dir was created to store guideline files. Each yaml file represents a guideline an contains its rules. The `Guidelines` class can parse the yamls. We can reach the guideline data via `getGuidelineRules` API endpoint that can return a list of `Rules`.
Ericsson · Dec 10, 2024 · 890654a · 890654a
1 parent 4f14816
commit 890654a
Show file tree

Hide file tree

Showing 28 changed files with 1,584 additions and 28 deletions.
diff --git a/analyzer/codechecker_analyzer/analyzer_context.py b/analyzer/codechecker_analyzer/analyzer_context.py
@@ -19,6 +19,7 @@
 from codechecker_analyzer.arg import analyzer_binary
 from codechecker_common import logger
 from codechecker_common.checker_labels import CheckerLabels
+from codechecker_common.guidelines import Guidelines
 from codechecker_common.singleton import Singleton
 from codechecker_common.util import load_json
 from pathlib import Path
@@ -52,13 +53,17 @@ def __init__(self):
         if 'CC_TEST_LABELS_DIR' in os.environ:
             labels_dir = os.environ['CC_TEST_LABELS_DIR']
 
+        guidelines_dir = os.path.join(self._data_files_dir_path,
+                                      'config', 'guidelines')
+
         cfg_dict = self.__get_package_config()
         self.env_vars = cfg_dict['environment_variables']
 
         lcfg_dict = self.__get_package_layout()
         self.pckg_layout = lcfg_dict['runtime']
 
         self._checker_labels = CheckerLabels(labels_dir)
+        self._guidelines = Guidelines(guidelines_dir)
         self.__package_version = None
         self.__package_build_date = None
         self.__package_git_hash = None
@@ -378,6 +383,10 @@ def checker_plugin(self):
     def checker_labels(self):
         return self._checker_labels
 
+    @property
+    def guideline(self):
+        return self._guidelines
+
 
 def get_context():
     try:

diff --git a/analyzer/tests/unit/test_guidelines.py b/analyzer/tests/unit/test_guidelines.py
@@ -0,0 +1,104 @@
+# -------------------------------------------------------------------------
+#
+#  Part of the CodeChecker project, under the Apache License v2.0 with
+#  LLVM Exceptions. See LICENSE for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# -------------------------------------------------------------------------
+
+"""Tests for Guidelines class."""
+
+
+import yaml
+import os
+import tempfile
+import unittest
+
+from codechecker_common.guidelines import Guidelines
+
+
+class TestGuidelines(unittest.TestCase):
+    def setUp(self) -> None:
+        self.guidelines_dir = tempfile.TemporaryDirectory()
+        self.initialize_guidelines_dir()
+
+    def tearDown(self) -> None:
+        self.guidelines_dir.cleanup()
+
+    def initialize_guidelines_dir(self):
+        guidelines = {
+            "guideline": "sei-cert",
+            "guideline_title": "SEI CERT Coding Standard",
+            "rules": [
+                {
+                    "rule_id": "con50-cpp",
+                    "rule_url": "https://wiki.sei.cmu.edu/confluence/display"
+                                "/cplusplus/CON50-CPP.+Do+not+destroy+a+mutex"
+                                "+while+it+is+locked",
+                    "rule_title": ""
+                },
+                {
+                    "rule_id": "con51-cpp",
+                    "rule_url": "https://wiki.sei.cmu.edu/confluence/display"
+                                "/cplusplus/CON51-CPP.+Ensure+actively+held+"
+                                "locks+are+released+on+exceptional+conditions",
+                    "rule_title": ""
+                },
+                {
+                    "rule_id": "con52-cpp",
+                    "rule_url": "https://wiki.sei.cmu.edu/confluence/display"
+                                "/cplusplus/CON52-CPP.+Prevent+data+races+when"
+                                "+accessing+bit-fields+from+multiple+threads",
+                    "rule_title": ""
+                },
+                {
+                    "rule_id": "con53-cpp",
+                    "rule_url": "https://wiki.sei.cmu.edu/confluence/display"
+                                "/cplusplus/CON53-CPP.+Avoid+deadlock+by+"
+                                "locking+in+a+predefined+order",
+                    "rule_title": ""
+                },
+            ]
+        }
+
+        with open(os.path.join(self.guidelines_dir.name, 'sei-cert.yaml'),
+                  'w', encoding='utf-8') as fp:
+            yaml.safe_dump(guidelines, fp, default_flow_style=False)
+
+    def test_guidelines(self):
+        g = Guidelines(self.guidelines_dir.name)
+
+        self.assertNotEqual(len(g.rules_of_guideline("sei-cert")), 0)
+
+        self.assertEqual(
+            sorted(g.rules_of_guideline("sei-cert").keys()),
+            ["con50-cpp", "con51-cpp", "con52-cpp", "con53-cpp"])
+
+        self.assertEqual(
+            g.rules_of_guideline("sei-cert"),
+            {
+                "con50-cpp": {
+                    "rule_url": "https://wiki.sei.cmu.edu/confluence/display"
+                                "/cplusplus/CON50-CPP.+Do+not+destroy+a+mutex"
+                                "+while+it+is+locked",
+                    "rule_title": ""
+                },
+                "con51-cpp": {
+                    "rule_url": "https://wiki.sei.cmu.edu/confluence/display"
+                                "/cplusplus/CON51-CPP.+Ensure+actively+held+"
+                                "locks+are+released+on+exceptional+conditions",
+                    "rule_title": ""
+                },
+                "con52-cpp": {
+                    "rule_url": "https://wiki.sei.cmu.edu/confluence/display"
+                                "/cplusplus/CON52-CPP.+Prevent+data+races+when"
+                                "+accessing+bit-fields+from+multiple+threads",
+                    "rule_title": ""
+                },
+                "con53-cpp": {
+                    "rule_url": "https://wiki.sei.cmu.edu/confluence/display"
+                                "/cplusplus/CON53-CPP.+Avoid+deadlock+by+"
+                                "locking+in+a+predefined+order",
+                    "rule_title": ""
+                },
+            })
diff --git a/codechecker_common/guidelines.py b/codechecker_common/guidelines.py
@@ -0,0 +1,116 @@
+# -------------------------------------------------------------------------
+#
+#  Part of the CodeChecker project, under the Apache License v2.0 with
+#  LLVM Exceptions. See LICENSE for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# -------------------------------------------------------------------------
+import os
+from typing import DefaultDict, Dict, Iterable
+from collections import defaultdict
+
+from codechecker_common.util import load_yaml
+from codechecker_common.logger import get_logger
+
+LOG = get_logger('system')
+
+
+class Guidelines:
+    def __init__(self, guidelines_dir: str):
+        if not os.path.isdir(guidelines_dir):
+            raise NotADirectoryError(
+                f'{guidelines_dir} is not a directory.')
+
+        guideline_yaml_files = map(
+            lambda f: os.path.join(guidelines_dir, f),
+            os.listdir(guidelines_dir))
+
+        self.__all_rules = self.__union_guideline_files(guideline_yaml_files)
+
+    def __check_guideline_format(self, guideline_data: dict):
+        """
+        Check the format of a guideline, It must contain specific values with
+        specific types. In case of any format error a ValueError exception is
+        thrown with the description of the wrong format.
+        """
+
+        if "guideline" not in guideline_data \
+           or not isinstance(guideline_data["guideline"], str):
+            raise ValueError(
+                "The 'guideline' field must exist and be a string.")
+
+        if "guideline_title" not in guideline_data \
+           or not isinstance(guideline_data["guideline_title"], str):
+            raise ValueError(
+                "The 'guideline_title' field must exist and be a string.")
+
+        rules = guideline_data.get("rules")
+        if not isinstance(rules, list) \
+           or not all(map(lambda r: isinstance(r, dict), rules)):
+            raise ValueError(
+                "The 'rules' field must exist and be a list of dictionaris.")
+
+        if any(map(lambda rule: "rule_id" not in rule
+           or not isinstance(rule["rule_id"], str), rules)):
+            raise ValueError(
+                "All rules must have 'rule_id' that is a string.")
+
+    def __union_guideline_files(
+        self,
+        guideline_files: Iterable[str]
+    ) -> DefaultDict[str, Dict[str, Dict[str, str]]]:
+        """
+        This function creates a union object of the given guideline files. The
+        resulting object maps guidelines to the collection of their rules.
+        E.g.:
+        {
+            "guideline1": {
+                "rule_id1": {
+                    "rule_url": ...
+                    "title": ...
+                },
+                "rule_id2": {
+                    ...
+                }
+            ],
+            "guideline2": {
+                ...
+            },
+        }
+        """
+        all_rules: DefaultDict[
+            str, Dict[str, Dict[str, str]]] = defaultdict(dict)
+
+        for guideline_file in guideline_files:
+            guideline_data = load_yaml(guideline_file)
+
+            try:
+                self.__check_guideline_format(guideline_data)
+
+                guideline_name = guideline_data["guideline"]
+                rules = guideline_data["rules"]
+                all_rules[guideline_name] = {rule.pop("rule_id"): rule
+                                             for rule in rules}
+            except ValueError as ex:
+                LOG.warning("%s does not have a correct guideline format.",
+                            guideline_file)
+                LOG.warning(ex)
+
+        return all_rules
+
+    def rules_of_guideline(
+        self,
+        guideline_name: str,
+    ) -> Dict[str, Dict[str, str]]:
+        """
+        Return the list of rules of a guideline.
+        """
+
+        guideline_rules = self.__all_rules[guideline_name]
+
+        return guideline_rules
+
+    def all_guideline_rules(
+        self
+    ) -> DefaultDict[str, Dict[str, Dict[str, str]]]:
+        return self.__all_rules
diff --git a/codechecker_common/util.py b/codechecker_common/util.py
@@ -10,6 +10,7 @@
 """
 import itertools
 import json
+import yaml
 import os
 from typing import TextIO
 
@@ -89,6 +90,32 @@ def load_json(path: str, default=None, lock=False, display_warning=True):
     return ret
 
 
+def load_yaml(path: str):
+    """
+    Load the contents of the given file as a YAML and return it's value.
+    """
+
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return yaml.safe_load(f)
+    except OSError as ex:
+        LOG.warning("Failed to open YAML file: %s", path)
+        LOG.warning(ex)
+        return None
+    except yaml.YAMLError as ex:
+        LOG.warning("Failed to parse YAML file: %s", path)
+        LOG.warning(ex)
+        return None
+    except ValueError as ex:
+        LOG.warning("%s is not a valid YAML file.", path)
+        LOG.warning(ex)
+        return None
+    except TypeError as ex:
+        LOG.warning("Failed to process YAML file: %s", path)
+        LOG.warning(ex)
+        return None
+
+
 def get_linef(fp: TextIO, line_no: int) -> str:
     """'fp' should be (readable) file object.
     Return the line content at line_no or an empty line